Spaces:

Ephraimmm
/

Pidgin_0.1

Runtime error

App Files Files Community

Ephraimmm commited on Sep 17

Commit

890a412

verified ·

1 Parent(s): 60cedc0

Update app.py

Browse files

Files changed (1) hide show

app.py +219 -83

app.py CHANGED Viewed

@@ -1,115 +1,251 @@
-from threading import Thread
-import json
-import time
-import torch
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
 # ---------------------
 # Model + Tokenizer
 # ---------------------
-from transformers import AutoModelForCausalLM, AutoTokenizer
-MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
-print("Loading quantized model...")
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",              # Let HF handle GPU placement
-    torch_dtype="auto",             # Match the quantization dtype
-    trust_remote_code=True          # Required for Unsloth models
-)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-print("Loading model...")
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",
-    quantization_config=bnb_config,
-    trust_remote_code=True,
-)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-# ---------------------
-# Chat Streaming
-# ---------------------
-def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
-    conversation = [{"role": "system", "content": system or "You are a helpful assistant. Always reply in Pidgin english"}]
-    for prompt, answer in history:
-        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(
-        conversation,
         add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids=input_ids,
         streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        do_sample=(temperature > 0),
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    output = ""
-    for new_token in streamer:
-        output += new_token
-        yield output
-# ---------------------
-# Save Chat
-# ---------------------
 def save_chat(history):
-    conversation = []
-    for prompt, answer in history:
-        conversation.append({"role": "user", "content": prompt})
-        conversation.append({"role": "assistant", "content": answer})
-    filename = f"chat_{int(time.time())}.json"
-    with open(filename, "w", encoding="utf-8") as f:
-        json.dump(conversation, f, ensure_ascii=False, indent=2)
-    return filename
-# ---------------------
 # Gradio UI
-# ---------------------
-chatbot = gr.Chatbot(height=450)
-with gr.Blocks(css=".duplicate-button {margin: auto !important;}") as demo:
-    gr.HTML("<h1><center>Chat with PIDGIN Gemma-3</center></h1>")
-    chat_interface = gr.ChatInterface(
-        fn=stream_chat,
-        chatbot=chatbot,
-        fill_height=True,
-        additional_inputs=[
-            gr.Text(value="", label="System Prompt"),
-            gr.Slider(0, 1, value=0.8, step=0.1, label="Temperature"),
-            gr.Slider(128, 4096, value=1024, step=1, label="Max New Tokens"),
-        ],
-    )
-    save_btn = gr.Button("💾 Save Chat")
-    download = gr.File(label="Download Chat")
-    save_btn.click(fn=save_chat, inputs=[chat_interface.chatbot], outputs=[download])
 if __name__ == "__main__":
-    demo.launch()

+# from threading import Thread
+# import json
+# import time
+# import torch
+# import gradio as gr
+# from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
 # ---------------------
 # Model + Tokenizer
 # ---------------------
+# from transformers import AutoModelForCausalLM, AutoTokenizer
+# MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
+# print("Loading quantized model...")
+# model = AutoModelForCausalLM.from_pretrained(
+#     MODEL_ID,
+#     device_map="auto",              # Let HF handle GPU placement
+#     torch_dtype="auto",             # Match the quantization dtype
+#     trust_remote_code=True          # Required for Unsloth models
+# )
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# from unsloth import FastLanguageModel
+# model, tokenizer = FastLanguageModel.from_pretrained(
+#     model_name="Ephraimmm/PIDGIN_gemma-3",
+#     max_seq_length=2048,
+#     dtype=None,             # Unsloth will pick the right dtype
+#     load_in_4bit=True,      # because it’s a 4-bit model
+#     trust_remote_code=True,
+# )
+# FastLanguageModel.for_inference(model)
+# print("Loading model...")
+# model = AutoModelForCausalLM.from_pretrained(
+#     MODEL_ID,
+#     device_map="auto",
+#     quantization_config=bnb_config,
+#     trust_remote_code=True,
+# )
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+# # ---------------------
+# # Chat Streaming
+# # ---------------------
+# def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
+#     conversation = [{"role": "system", "content": system or "You are a helpful assistant. Always reply in Pidgin english"}]
+#     for prompt, answer in history:
+#         conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
+#     conversation.append({"role": "user", "content": message})
+#     input_ids = tokenizer.apply_chat_template(
+#         conversation,
+#         add_generation_prompt=True,
+#         return_tensors="pt"
+#     ).to(model.device)
+#     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+#     generate_kwargs = dict(
+#         input_ids=input_ids,
+#         streamer=streamer,
+#         max_new_tokens=max_new_tokens,
+#         temperature=temperature,
+#         do_sample=(temperature > 0),
+#     )
+#     t = Thread(target=model.generate, kwargs=generate_kwargs)
+#     t.start()
+#     output = ""
+#     for new_token in streamer:
+#         output += new_token
+#         yield output
+# # ---------------------
+# # Save Chat
+# # ---------------------
+# def save_chat(history):
+#     conversation = []
+#     for prompt, answer in history:
+#         conversation.append({"role": "user", "content": prompt})
+#         conversation.append({"role": "assistant", "content": answer})
+#     filename = f"chat_{int(time.time())}.json"
+#     with open(filename, "w", encoding="utf-8") as f:
+#         json.dump(conversation, f, ensure_ascii=False, indent=2)
+#     return filename
+# # ---------------------
+# # Gradio UI
+# # ---------------------
+# chatbot = gr.Chatbot(height=450)
+# with gr.Blocks(css=".duplicate-button {margin: auto !important;}") as demo:
+#     gr.HTML("<h1><center>Chat with PIDGIN Gemma-3</center></h1>")
+#     chat_interface = gr.ChatInterface(
+#         fn=stream_chat,
+#         chatbot=chatbot,
+#         fill_height=True,
+#         additional_inputs=[
+#             gr.Text(value="", label="System Prompt"),
+#             gr.Slider(0, 1, value=0.8, step=0.1, label="Temperature"),
+#             gr.Slider(128, 4096, value=1024, step=1, label="Max New Tokens"),
+#         ],
+#     )
+#     save_btn = gr.Button("💾 Save Chat")
+#     download = gr.File(label="Download Chat")
+#     save_btn.click(fn=save_chat, inputs=[chat_interface.chatbot], outputs=[download])
+# if __name__ == "__main__":
+#     demo.launch()
+import torch
+import gc
+import json
+from threading import Thread
+import gradio as gr
+from unsloth import FastLanguageModel
+from transformers import TextIteratorStreamer
+# -------------------------------------------------------------------
+# Load model with Unsloth (since it's already quantized 4-bit)
+# -------------------------------------------------------------------
+print("Clearing memory...")
+torch.cuda.empty_cache()
+gc.collect()
+print("Loading Unsloth quantized model...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="Ephraimmm/PIDGIN_gemma-3",  # Your fine-tuned model
+    max_seq_length=2048,
+    dtype=None,
+    load_in_4bit=True,
+    trust_remote_code=True,
+)
+FastLanguageModel.for_inference(model)
+print("✅ Model loaded!")
+# -------------------------------------------------------------------
+# Chat function with streaming
+# -------------------------------------------------------------------
+def stream_chat(message, history):
+    # Build conversation in the right format
+    messages = [
+        {"role": "system", "content": "You be Naija assistant. Always reply for Pidgin English."}
+    ]
+    for human, bot in history:
+        messages.append({"role": "user", "content": human})
+        messages.append({"role": "assistant", "content": bot})
+    messages.append({"role": "user", "content": message})
+    # Apply chat template
+    inputs = tokenizer.apply_chat_template(
+        messages,
         add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=inputs,
         streamer=streamer,
+        max_new_tokens=256,
+        temperature=0.8,
+        top_p=0.9,
+        do_sample=True,
     )
+    # Run generation in a background thread
+    thread = Thread(target=model.generate, kwargs=generate_kwargs)
+    thread.start()
+    partial_text = ""
+    for new_text in streamer:
+        partial_text += new_text
+        yield partial_text
+# -------------------------------------------------------------------
+# Save chat as JSON file
+# -------------------------------------------------------------------
 def save_chat(history):
+    export_data = []
+    for human, bot in history:
+        export_data.append({"role": "user", "content": human})
+        export_data.append({"role": "assistant", "content": bot})
+    file_path = "conversation.json"
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(export_data, f, ensure_ascii=False, indent=4)
+    return file_path
+# -------------------------------------------------------------------
 # Gradio UI
+# -------------------------------------------------------------------
+with gr.Blocks(title="🇳🇬 Pidgin English Chatbot") as demo:
+    gr.HTML("<h1 style='text-align: center;'>🇳🇬 Pidgin English Chatbot</h1>")
+    chatbot = gr.Chatbot(height=400, show_label=False)
+    with gr.Row():
+        msg = gr.Textbox(placeholder="Type your message...", scale=4)
+        send = gr.Button("Send", variant="primary", scale=1)
+    with gr.Row():
+        clear = gr.Button("Clear Chat")
+        save_btn = gr.Button("💾 Save Conversation")
+        download_file = gr.File()
+    # Connect events
+    def respond(message, history):
+        if history is None:
+            history = []
+        stream = stream_chat(message, history)
+        response = ""
+        for partial in stream:
+            response = partial
+            yield history + [(message, response)], ""
+        yield history + [(message, response)], ""
+    msg.submit(respond, [msg, chatbot], [chatbot, msg])
+    send.click(respond, [msg, chatbot], [chatbot, msg])
+    clear.click(lambda: ([], ""), outputs=[chatbot, msg])
+    save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
+# -------------------------------------------------------------------
+# Launch
+# -------------------------------------------------------------------
 if __name__ == "__main__":
+    demo.launch(share=True, debug=True)