Spaces:

Ephraimmm
/

Pidgin_0.1

Runtime error

App Files Files Community

Ephraimmm commited on Sep 17

Commit

4d743b7

verified ·

1 Parent(s): 893184e

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -111

app.py CHANGED Viewed

@@ -1,126 +1,92 @@
-import torch
-import gc
-import json
-import time
-from threading import Thread
 import gradio as gr
 from unsloth import FastLanguageModel
 from transformers import TextIteratorStreamer
-# ---------------------
-# Setup + Model Load
-# ---------------------
-# Clear out memory before loading
-torch.cuda.empty_cache()
-gc.collect()
-MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
-CONTEXT_LEN = 128000  # Gemma-3 default context window as per blog
-print("Using Unsloth Gemma-3 model with 128K context window...")
-# Make sure your environment has updated versions:
-# pip install -U unsloth unsloth_zoo transformers
-# Load the quantized model with Unsloth
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = MODEL_ID,
-    max_seq_length = CONTEXT_LEN,
-    dtype = None,            # Let Unsloth pick appropriate dtype
-    load_in_4bit = True,
-    trust_remote_code = True,
 )
-FastLanguageModel.for_inference(model)
-print("✅ Model loaded (4-bit dynamic if available)")
-# ---------------------
-# Chat Streaming Function
-# ---------------------
-def stream_chat(message, history):
-    # Build message list as required by Unsloth
     messages = [
-        {"role": "system", "content": "You be Naija assistant. You must always reply for Pidgin English."}
     ]
-    if history:
-        for human, bot in history:
-            messages.append({"role": "user", "content": human})
-            messages.append({"role": "assistant", "content": bot})
-    messages.append({"role": "user", "content": message})
-    # Using apply_chat_template (supported by Unsloth) to handle the formatting
     inputs = tokenizer.apply_chat_template(
         messages,
-        add_generation_prompt = True,
-        return_tensors = "pt"
-    ).to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
-    generate_kwargs = dict(
-        input_ids = inputs,
-        streamer = streamer,
-        max_new_tokens = 512,
-        temperature = 0.8,
-        do_sample = True,
-        top_p = 0.9,
     )
-    # Run in background thread to stream
-    thread = Thread(target = model.generate, kwargs = generate_kwargs)
-    thread.start()
-    output = ""
-    for partial in streamer:
-        output += partial
-        yield output
-# ---------------------
-# Save chat to file (JSON format)
-# ---------------------
-def save_chat(history):
-    export = []
-    for human, bot in history:
-        export.append({"role": "user", "content": human})
-        export.append({"role": "assistant", "content": bot})
-    timestamp = time.strftime("%Y%m%d-%H%M%S")
-    fname = f"conversation_{timestamp}.json"
-    with open(fname, "w", encoding="utf-8") as f:
-        json.dump(export, f, ensure_ascii=False, indent=2)
-    return fname
-# ---------------------
-# UI with Gradio
-# ---------------------
-with gr.Blocks(title="🇳🇬 PIDGIN Gemma-3 Chatbot") as demo:
-    gr.HTML("<h1><center>🇳🇬 PIDGIN Gemma-3 Chatbot</center></h1>")
-    chatbot = gr.Chatbot(height=450, show_label=False)
-    with gr.Row():
-        msg = gr.Textbox(placeholder="Type your message here...", lines=2, scale=4)
-        send = gr.Button("Send", variant="primary", scale=1, size="lg")
-    with gr.Row():
-        clear = gr.Button("Clear Chat", variant="secondary", scale=1)
-        save_btn = gr.Button("💾 Save Conversation", variant="secondary", scale=1)
-        download_file = gr.File()
-    def respond(message, history):
-        if history is None:
-            history = []
-        stream = stream_chat(message, history)
-        response = ""
-        for partial in stream:
-            response = partial
-            yield history + [(message, response)], ""
-        yield history + [(message, response)], ""
-    msg.submit(respond, [msg, chatbot], [chatbot, msg])
-    send.click(respond, [msg, chatbot], [chatbot, msg])
-    clear.click(lambda: ([], ""), outputs=[chatbot, msg])
-    save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
-if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 import gradio as gr
+import torch
 from unsloth import FastLanguageModel
 from transformers import TextIteratorStreamer
+import threading
+from peft import PeftModel
+# -----------------------------
+# 1️⃣ Set device
+# -----------------------------
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print("Using device:", device)
+# -----------------------------
+# 2️⃣ Load base model (skip compilation)
+# -----------------------------
+base_model_name = "Ephraimmm/PIDGIN_gemma-3"
+base_model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=base_model_name,
+    max_seq_length=2048,
+    dtype=torch.float16,
+    load_in_4bit=False,
+    disable_compile=True  # <- avoids unsloth compilation errors
 )
+# -----------------------------
+# 3️⃣ Load LoRA
+# -----------------------------
+lora_repo = "Ephraimmm/PIDGIN_gemma-3"
+lora_model = PeftModel.from_pretrained(base_model, lora_repo, adapter_name="adapter_model")
+FastLanguageModel.for_inference(lora_model)
+# -----------------------------
+# 4️⃣ Streaming generation function with Nigerian Pidgin system prompt
+# -----------------------------
+def generate_response(user_message):
     messages = [
+        {
+            "role": "system",
+            "content": [{"type": "text", "text": "You be Nigerian assistant wey sabi Pidgin English only. No speak any other language."}]
+        },
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": user_message}]
+        }
     ]
     inputs = tokenizer.apply_chat_template(
         messages,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        tokenize=True,
+        return_dict=True
+    ).to(device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
+        **inputs,
+        streamer=streamer,
+        max_new_tokens=256,
+        temperature=0.7,
+        top_p=0.9,
+        top_k=40,
+        use_cache=False
     )
+    def generate():
+        lora_model.generate(**generation_kwargs)
+    thread = threading.Thread(target=generate)
+    thread.start()
+    full_response = ""
+    for new_token in streamer:
+        if new_token:
+            full_response += new_token
+    thread.join()
+    return full_response
+# -----------------------------
+# 5️⃣ Gradio interface
+# -----------------------------
+iface = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your message..."),
+    outputs=gr.Textbox(label="PIDGIN Assistant"),
+    title="Nigerian PIDGIN Assistant",
+    description="Chat with a Nigerian assistant that only speaks Pidgin English."
+)
+iface.launch()