Ephraimmm commited on
Commit
893184e
Β·
verified Β·
1 Parent(s): ea8adbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -65
app.py CHANGED
@@ -1,104 +1,110 @@
1
  import torch
2
  import gc
3
  import json
 
4
  from threading import Thread
5
 
6
  import gradio as gr
7
  from unsloth import FastLanguageModel
8
  from transformers import TextIteratorStreamer
9
 
10
- # -------------------------------------------------------------------
11
- # Load model with Unsloth (quantized 4-bit safe!)
12
- # -------------------------------------------------------------------
13
- print("Clearing memory...")
 
14
  torch.cuda.empty_cache()
15
  gc.collect()
16
 
17
- print("Loading Unsloth quantized model...")
18
- model_name = "Ephraimmm/PIDGIN_gemma-3"
 
 
 
 
 
19
 
 
20
  model, tokenizer = FastLanguageModel.from_pretrained(
21
- model_name=model_name,
22
- max_seq_length=2048, # adjust if you need longer context
23
- dtype=torch.float16, # half precision
24
- load_in_4bit=True, # quantized
25
- device_map="auto", # put layers on GPU(s)
26
  )
 
 
27
 
28
- model.eval()
29
- print("βœ… Model loaded!")
30
-
31
- # -------------------------------------------------------------------
32
- # Chat function with streaming
33
- # -------------------------------------------------------------------
34
  def stream_chat(message, history):
35
- # Force Pidgin instruction at start
36
  messages = [
37
- {"role": "user", "content": "You be Naija assistant. Always reply for Pidgin English."},
38
- {"role": "assistant", "content": "Ok, I go dey reply for Pidgin."},
39
  ]
40
-
41
- for human, bot in history:
42
- messages.append({"role": "user", "content": human})
43
- messages.append({"role": "assistant", "content": bot})
44
-
45
  messages.append({"role": "user", "content": message})
46
 
 
47
  inputs = tokenizer.apply_chat_template(
48
  messages,
49
- add_generation_prompt=True,
50
- return_tensors="pt"
51
  ).to(model.device)
52
 
53
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
54
 
55
  generate_kwargs = dict(
56
- input_ids=inputs,
57
- streamer=streamer,
58
- max_new_tokens=256,
59
- temperature=0.8,
60
- top_p=0.9,
61
- do_sample=True,
62
  )
63
 
64
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
 
65
  thread.start()
66
 
67
- partial_text = ""
68
- for new_text in streamer:
69
- partial_text += new_text
70
- yield partial_text
71
 
72
- # -------------------------------------------------------------------
73
- # Save chat as JSON
74
- # -------------------------------------------------------------------
75
  def save_chat(history):
76
- export_data = []
77
  for human, bot in history:
78
- export_data.append({"role": "user", "content": human})
79
- export_data.append({"role": "assistant", "content": bot})
80
-
81
- file_path = "conversation.json"
82
- with open(file_path, "w", encoding="utf-8") as f:
83
- json.dump(export_data, f, ensure_ascii=False, indent=4)
84
-
85
- return file_path
86
-
87
- # -------------------------------------------------------------------
88
- # Gradio UI
89
- # -------------------------------------------------------------------
90
- with gr.Blocks(title="πŸ‡³πŸ‡¬ Pidgin English Chatbot") as demo:
91
- gr.HTML("<h1 style='text-align: center;'>πŸ‡³πŸ‡¬ Pidgin English Chatbot</h1>")
92
-
93
- chatbot = gr.Chatbot(height=400, show_label=False)
94
 
95
  with gr.Row():
96
- msg = gr.Textbox(placeholder="Type your message...", scale=4)
97
- send = gr.Button("Send", variant="primary", scale=1)
98
 
99
  with gr.Row():
100
- clear = gr.Button("Clear Chat")
101
- save_btn = gr.Button("πŸ’Ύ Save Conversation")
102
  download_file = gr.File()
103
 
104
  def respond(message, history):
@@ -116,8 +122,5 @@ with gr.Blocks(title="πŸ‡³πŸ‡¬ Pidgin English Chatbot") as demo:
116
  clear.click(lambda: ([], ""), outputs=[chatbot, msg])
117
  save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
118
 
119
- # -------------------------------------------------------------------
120
- # Launch
121
- # -------------------------------------------------------------------
122
  if __name__ == "__main__":
123
  demo.launch(share=True, debug=True)
 
1
  import torch
2
  import gc
3
  import json
4
+ import time
5
  from threading import Thread
6
 
7
  import gradio as gr
8
  from unsloth import FastLanguageModel
9
  from transformers import TextIteratorStreamer
10
 
11
+ # ---------------------
12
+ # Setup + Model Load
13
+ # ---------------------
14
+
15
+ # Clear out memory before loading
16
  torch.cuda.empty_cache()
17
  gc.collect()
18
 
19
+ MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
20
+ CONTEXT_LEN = 128000 # Gemma-3 default context window as per blog
21
+
22
+ print("Using Unsloth Gemma-3 model with 128K context window...")
23
+
24
+ # Make sure your environment has updated versions:
25
+ # pip install -U unsloth unsloth_zoo transformers
26
 
27
+ # Load the quantized model with Unsloth
28
  model, tokenizer = FastLanguageModel.from_pretrained(
29
+ model_name = MODEL_ID,
30
+ max_seq_length = CONTEXT_LEN,
31
+ dtype = None, # Let Unsloth pick appropriate dtype
32
+ load_in_4bit = True,
33
+ trust_remote_code = True,
34
  )
35
+ FastLanguageModel.for_inference(model)
36
+ print("βœ… Model loaded (4-bit dynamic if available)")
37
 
38
+ # ---------------------
39
+ # Chat Streaming Function
40
+ # ---------------------
 
 
 
41
  def stream_chat(message, history):
42
+ # Build message list as required by Unsloth
43
  messages = [
44
+ {"role": "system", "content": "You be Naija assistant. You must always reply for Pidgin English."}
 
45
  ]
46
+ if history:
47
+ for human, bot in history:
48
+ messages.append({"role": "user", "content": human})
49
+ messages.append({"role": "assistant", "content": bot})
 
50
  messages.append({"role": "user", "content": message})
51
 
52
+ # Using apply_chat_template (supported by Unsloth) to handle the formatting
53
  inputs = tokenizer.apply_chat_template(
54
  messages,
55
+ add_generation_prompt = True,
56
+ return_tensors = "pt"
57
  ).to(model.device)
58
 
59
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
60
 
61
  generate_kwargs = dict(
62
+ input_ids = inputs,
63
+ streamer = streamer,
64
+ max_new_tokens = 512,
65
+ temperature = 0.8,
66
+ do_sample = True,
67
+ top_p = 0.9,
68
  )
69
 
70
+ # Run in background thread to stream
71
+ thread = Thread(target = model.generate, kwargs = generate_kwargs)
72
  thread.start()
73
 
74
+ output = ""
75
+ for partial in streamer:
76
+ output += partial
77
+ yield output
78
 
79
+ # ---------------------
80
+ # Save chat to file (JSON format)
81
+ # ---------------------
82
  def save_chat(history):
83
+ export = []
84
  for human, bot in history:
85
+ export.append({"role": "user", "content": human})
86
+ export.append({"role": "assistant", "content": bot})
87
+
88
+ timestamp = time.strftime("%Y%m%d-%H%M%S")
89
+ fname = f"conversation_{timestamp}.json"
90
+ with open(fname, "w", encoding="utf-8") as f:
91
+ json.dump(export, f, ensure_ascii=False, indent=2)
92
+ return fname
93
+
94
+ # ---------------------
95
+ # UI with Gradio
96
+ # ---------------------
97
+ with gr.Blocks(title="πŸ‡³πŸ‡¬ PIDGIN Gemma-3 Chatbot") as demo:
98
+ gr.HTML("<h1><center>πŸ‡³πŸ‡¬ PIDGIN Gemma-3 Chatbot</center></h1>")
99
+ chatbot = gr.Chatbot(height=450, show_label=False)
 
100
 
101
  with gr.Row():
102
+ msg = gr.Textbox(placeholder="Type your message here...", lines=2, scale=4)
103
+ send = gr.Button("Send", variant="primary", scale=1, size="lg")
104
 
105
  with gr.Row():
106
+ clear = gr.Button("Clear Chat", variant="secondary", scale=1)
107
+ save_btn = gr.Button("πŸ’Ύ Save Conversation", variant="secondary", scale=1)
108
  download_file = gr.File()
109
 
110
  def respond(message, history):
 
122
  clear.click(lambda: ([], ""), outputs=[chatbot, msg])
123
  save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
124
 
 
 
 
125
  if __name__ == "__main__":
126
  demo.launch(share=True, debug=True)