Spaces:

Ephraimmm
/

Pidgin_0.1

Runtime error

App Files Files Community

Ephraimmm commited on Sep 17

Commit

893184e

verified ·

1 Parent(s): ea8adbd

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -65

app.py CHANGED Viewed

@@ -1,104 +1,110 @@
 import torch
 import gc
 import json
 from threading import Thread
 import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import TextIteratorStreamer
-# -------------------------------------------------------------------
-# Load model with Unsloth (quantized 4-bit safe!)
-# -------------------------------------------------------------------
-print("Clearing memory...")
 torch.cuda.empty_cache()
 gc.collect()
-print("Loading Unsloth quantized model...")
-model_name = "Ephraimmm/PIDGIN_gemma-3"
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=model_name,
-    max_seq_length=2048,   # adjust if you need longer context
-    dtype=torch.float16,   # half precision
-    load_in_4bit=True,     # quantized
-    device_map="auto",     # put layers on GPU(s)
 )
-model.eval()
-print("✅ Model loaded!")
-# -------------------------------------------------------------------
-# Chat function with streaming
-# -------------------------------------------------------------------
 def stream_chat(message, history):
-    # Force Pidgin instruction at start
     messages = [
-        {"role": "user", "content": "You be Naija assistant. Always reply for Pidgin English."},
-        {"role": "assistant", "content": "Ok, I go dey reply for Pidgin."},
     ]
-    for human, bot in history:
-        messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": bot})
     messages.append({"role": "user", "content": message})
     inputs = tokenizer.apply_chat_template(
         messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids=inputs,
-        streamer=streamer,
-        max_new_tokens=256,
-        temperature=0.8,
-        top_p=0.9,
-        do_sample=True,
     )
-    thread = Thread(target=model.generate, kwargs=generate_kwargs)
     thread.start()
-    partial_text = ""
-    for new_text in streamer:
-        partial_text += new_text
-        yield partial_text
-# -------------------------------------------------------------------
-# Save chat as JSON
-# -------------------------------------------------------------------
 def save_chat(history):
-    export_data = []
     for human, bot in history:
-        export_data.append({"role": "user", "content": human})
-        export_data.append({"role": "assistant", "content": bot})
-    file_path = "conversation.json"
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(export_data, f, ensure_ascii=False, indent=4)
-    return file_path
-# -------------------------------------------------------------------
-# Gradio UI
-# -------------------------------------------------------------------
-with gr.Blocks(title="🇳🇬 Pidgin English Chatbot") as demo:
-    gr.HTML("<h1 style='text-align: center;'>🇳🇬 Pidgin English Chatbot</h1>")
-    chatbot = gr.Chatbot(height=400, show_label=False)
     with gr.Row():
-        msg = gr.Textbox(placeholder="Type your message...", scale=4)
-        send = gr.Button("Send", variant="primary", scale=1)
     with gr.Row():
-        clear = gr.Button("Clear Chat")
-        save_btn = gr.Button("💾 Save Conversation")
         download_file = gr.File()
     def respond(message, history):
@@ -116,8 +122,5 @@ with gr.Blocks(title="🇳🇬 Pidgin English Chatbot") as demo:
     clear.click(lambda: ([], ""), outputs=[chatbot, msg])
     save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
-# -------------------------------------------------------------------
-# Launch
-# -------------------------------------------------------------------
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)

 import torch
 import gc
 import json
+import time
 from threading import Thread
 import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import TextIteratorStreamer
+# ---------------------
+# Setup + Model Load
+# ---------------------
+# Clear out memory before loading
 torch.cuda.empty_cache()
 gc.collect()
+MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
+CONTEXT_LEN = 128000  # Gemma-3 default context window as per blog
+print("Using Unsloth Gemma-3 model with 128K context window...")
+# Make sure your environment has updated versions:
+# pip install -U unsloth unsloth_zoo transformers
+# Load the quantized model with Unsloth
 model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = MODEL_ID,
+    max_seq_length = CONTEXT_LEN,
+    dtype = None,            # Let Unsloth pick appropriate dtype
+    load_in_4bit = True,
+    trust_remote_code = True,
 )
+FastLanguageModel.for_inference(model)
+print("✅ Model loaded (4-bit dynamic if available)")
+# ---------------------
+# Chat Streaming Function
+# ---------------------
 def stream_chat(message, history):
+    # Build message list as required by Unsloth
     messages = [
+        {"role": "system", "content": "You be Naija assistant. You must always reply for Pidgin English."}
     ]
+    if history:
+        for human, bot in history:
+            messages.append({"role": "user", "content": human})
+            messages.append({"role": "assistant", "content": bot})
     messages.append({"role": "user", "content": message})
+    # Using apply_chat_template (supported by Unsloth) to handle the formatting
     inputs = tokenizer.apply_chat_template(
         messages,
+        add_generation_prompt = True,
+        return_tensors = "pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids = inputs,
+        streamer = streamer,
+        max_new_tokens = 512,
+        temperature = 0.8,
+        do_sample = True,
+        top_p = 0.9,
     )
+    # Run in background thread to stream
+    thread = Thread(target = model.generate, kwargs = generate_kwargs)
     thread.start()
+    output = ""
+    for partial in streamer:
+        output += partial
+        yield output
+# ---------------------
+# Save chat to file (JSON format)
+# ---------------------
 def save_chat(history):
+    export = []
     for human, bot in history:
+        export.append({"role": "user", "content": human})
+        export.append({"role": "assistant", "content": bot})
+    timestamp = time.strftime("%Y%m%d-%H%M%S")
+    fname = f"conversation_{timestamp}.json"
+    with open(fname, "w", encoding="utf-8") as f:
+        json.dump(export, f, ensure_ascii=False, indent=2)
+    return fname
+# ---------------------
+# UI with Gradio
+# ---------------------
+with gr.Blocks(title="🇳🇬 PIDGIN Gemma-3 Chatbot") as demo:
+    gr.HTML("<h1><center>🇳🇬 PIDGIN Gemma-3 Chatbot</center></h1>")
+    chatbot = gr.Chatbot(height=450, show_label=False)
     with gr.Row():
+        msg = gr.Textbox(placeholder="Type your message here...", lines=2, scale=4)
+        send = gr.Button("Send", variant="primary", scale=1, size="lg")
     with gr.Row():
+        clear = gr.Button("Clear Chat", variant="secondary", scale=1)
+        save_btn = gr.Button("💾 Save Conversation", variant="secondary", scale=1)
         download_file = gr.File()
     def respond(message, history):
     clear.click(lambda: ([], ""), outputs=[chatbot, msg])
     save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
 if __name__ == "__main__":
     demo.launch(share=True, debug=True)