Ephraimmm commited on
Commit
890a412
·
verified ·
1 Parent(s): 60cedc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -83
app.py CHANGED
@@ -1,115 +1,251 @@
1
- from threading import Thread
2
- import json
3
- import time
4
 
5
- import torch
6
- import gradio as gr
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
8
 
9
 
10
  # ---------------------
11
  # Model + Tokenizer
12
  # ---------------------
13
- from transformers import AutoModelForCausalLM, AutoTokenizer
14
 
15
- MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
 
 
 
 
 
 
 
 
 
 
16
 
17
- print("Loading quantized model...")
18
- model = AutoModelForCausalLM.from_pretrained(
19
- MODEL_ID,
20
- device_map="auto", # Let HF handle GPU placement
21
- torch_dtype="auto", # Match the quantization dtype
22
- trust_remote_code=True # Required for Unsloth models
23
- )
24
 
25
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
 
26
 
 
27
 
28
- print("Loading model...")
29
- model = AutoModelForCausalLM.from_pretrained(
30
- MODEL_ID,
31
- device_map="auto",
32
- quantization_config=bnb_config,
33
- trust_remote_code=True,
34
- )
35
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
36
 
37
 
38
- # ---------------------
39
- # Chat Streaming
40
- # ---------------------
41
- def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
42
- conversation = [{"role": "system", "content": system or "You are a helpful assistant. Always reply in Pidgin english"}]
43
- for prompt, answer in history:
44
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- conversation.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
47
 
48
- input_ids = tokenizer.apply_chat_template(
49
- conversation,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  add_generation_prompt=True,
51
  return_tensors="pt"
52
  ).to(model.device)
53
 
54
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
55
 
56
  generate_kwargs = dict(
57
- input_ids=input_ids,
58
  streamer=streamer,
59
- max_new_tokens=max_new_tokens,
60
- temperature=temperature,
61
- do_sample=(temperature > 0),
 
62
  )
63
 
64
- t = Thread(target=model.generate, kwargs=generate_kwargs)
65
- t.start()
 
66
 
67
- output = ""
68
- for new_token in streamer:
69
- output += new_token
70
- yield output
71
 
72
-
73
- # ---------------------
74
- # Save Chat
75
- # ---------------------
76
  def save_chat(history):
77
- conversation = []
78
- for prompt, answer in history:
79
- conversation.append({"role": "user", "content": prompt})
80
- conversation.append({"role": "assistant", "content": answer})
81
 
82
- filename = f"chat_{int(time.time())}.json"
83
- with open(filename, "w", encoding="utf-8") as f:
84
- json.dump(conversation, f, ensure_ascii=False, indent=2)
85
 
86
- return filename
87
 
88
-
89
- # ---------------------
90
  # Gradio UI
91
- # ---------------------
92
- chatbot = gr.Chatbot(height=450)
93
-
94
- with gr.Blocks(css=".duplicate-button {margin: auto !important;}") as demo:
95
- gr.HTML("<h1><center>Chat with PIDGIN Gemma-3</center></h1>")
96
-
97
- chat_interface = gr.ChatInterface(
98
- fn=stream_chat,
99
- chatbot=chatbot,
100
- fill_height=True,
101
- additional_inputs=[
102
- gr.Text(value="", label="System Prompt"),
103
- gr.Slider(0, 1, value=0.8, step=0.1, label="Temperature"),
104
- gr.Slider(128, 4096, value=1024, step=1, label="Max New Tokens"),
105
- ],
106
- )
107
-
108
- save_btn = gr.Button("💾 Save Chat")
109
- download = gr.File(label="Download Chat")
110
-
111
- save_btn.click(fn=save_chat, inputs=[chat_interface.chatbot], outputs=[download])
112
-
113
-
 
 
 
 
 
 
 
 
 
 
 
114
  if __name__ == "__main__":
115
- demo.launch()
 
1
+ # from threading import Thread
2
+ # import json
3
+ # import time
4
 
5
+ # import torch
6
+ # import gradio as gr
7
+ # from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
8
 
9
 
10
  # ---------------------
11
  # Model + Tokenizer
12
  # ---------------------
13
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
14
 
15
+ # MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
16
+
17
+ # print("Loading quantized model...")
18
+ # model = AutoModelForCausalLM.from_pretrained(
19
+ # MODEL_ID,
20
+ # device_map="auto", # Let HF handle GPU placement
21
+ # torch_dtype="auto", # Match the quantization dtype
22
+ # trust_remote_code=True # Required for Unsloth models
23
+ # )
24
+
25
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
26
 
27
+ # from unsloth import FastLanguageModel
 
 
 
 
 
 
28
 
29
+ # model, tokenizer = FastLanguageModel.from_pretrained(
30
+ # model_name="Ephraimmm/PIDGIN_gemma-3",
31
+ # max_seq_length=2048,
32
+ # dtype=None, # Unsloth will pick the right dtype
33
+ # load_in_4bit=True, # because it’s a 4-bit model
34
+ # trust_remote_code=True,
35
+ # )
36
 
37
+ # FastLanguageModel.for_inference(model)
38
 
 
 
 
 
 
 
 
 
39
 
40
 
41
+ # print("Loading model...")
42
+ # model = AutoModelForCausalLM.from_pretrained(
43
+ # MODEL_ID,
44
+ # device_map="auto",
45
+ # quantization_config=bnb_config,
46
+ # trust_remote_code=True,
47
+ # )
48
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
49
+
50
+
51
+ # # ---------------------
52
+ # # Chat Streaming
53
+ # # ---------------------
54
+ # def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
55
+ # conversation = [{"role": "system", "content": system or "You are a helpful assistant. Always reply in Pidgin english"}]
56
+ # for prompt, answer in history:
57
+ # conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
58
+
59
+ # conversation.append({"role": "user", "content": message})
60
+
61
+ # input_ids = tokenizer.apply_chat_template(
62
+ # conversation,
63
+ # add_generation_prompt=True,
64
+ # return_tensors="pt"
65
+ # ).to(model.device)
66
+
67
+ # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
68
+
69
+ # generate_kwargs = dict(
70
+ # input_ids=input_ids,
71
+ # streamer=streamer,
72
+ # max_new_tokens=max_new_tokens,
73
+ # temperature=temperature,
74
+ # do_sample=(temperature > 0),
75
+ # )
76
+
77
+ # t = Thread(target=model.generate, kwargs=generate_kwargs)
78
+ # t.start()
79
+
80
+ # output = ""
81
+ # for new_token in streamer:
82
+ # output += new_token
83
+ # yield output
84
+
85
+
86
+ # # ---------------------
87
+ # # Save Chat
88
+ # # ---------------------
89
+ # def save_chat(history):
90
+ # conversation = []
91
+ # for prompt, answer in history:
92
+ # conversation.append({"role": "user", "content": prompt})
93
+ # conversation.append({"role": "assistant", "content": answer})
94
+
95
+ # filename = f"chat_{int(time.time())}.json"
96
+ # with open(filename, "w", encoding="utf-8") as f:
97
+ # json.dump(conversation, f, ensure_ascii=False, indent=2)
98
+
99
+ # return filename
100
+
101
+
102
+ # # ---------------------
103
+ # # Gradio UI
104
+ # # ---------------------
105
+ # chatbot = gr.Chatbot(height=450)
106
+
107
+ # with gr.Blocks(css=".duplicate-button {margin: auto !important;}") as demo:
108
+ # gr.HTML("<h1><center>Chat with PIDGIN Gemma-3</center></h1>")
109
 
110
+ # chat_interface = gr.ChatInterface(
111
+ # fn=stream_chat,
112
+ # chatbot=chatbot,
113
+ # fill_height=True,
114
+ # additional_inputs=[
115
+ # gr.Text(value="", label="System Prompt"),
116
+ # gr.Slider(0, 1, value=0.8, step=0.1, label="Temperature"),
117
+ # gr.Slider(128, 4096, value=1024, step=1, label="Max New Tokens"),
118
+ # ],
119
+ # )
120
 
121
+ # save_btn = gr.Button("💾 Save Chat")
122
+ # download = gr.File(label="Download Chat")
123
+
124
+ # save_btn.click(fn=save_chat, inputs=[chat_interface.chatbot], outputs=[download])
125
+
126
+
127
+ # if __name__ == "__main__":
128
+ # demo.launch()
129
+
130
+
131
+ import torch
132
+ import gc
133
+ import json
134
+ from threading import Thread
135
+
136
+ import gradio as gr
137
+ from unsloth import FastLanguageModel
138
+ from transformers import TextIteratorStreamer
139
+
140
+ # -------------------------------------------------------------------
141
+ # Load model with Unsloth (since it's already quantized 4-bit)
142
+ # -------------------------------------------------------------------
143
+ print("Clearing memory...")
144
+ torch.cuda.empty_cache()
145
+ gc.collect()
146
+
147
+ print("Loading Unsloth quantized model...")
148
+ model, tokenizer = FastLanguageModel.from_pretrained(
149
+ model_name="Ephraimmm/PIDGIN_gemma-3", # Your fine-tuned model
150
+ max_seq_length=2048,
151
+ dtype=None,
152
+ load_in_4bit=True,
153
+ trust_remote_code=True,
154
+ )
155
+ FastLanguageModel.for_inference(model)
156
+ print("✅ Model loaded!")
157
+
158
+ # -------------------------------------------------------------------
159
+ # Chat function with streaming
160
+ # -------------------------------------------------------------------
161
+ def stream_chat(message, history):
162
+ # Build conversation in the right format
163
+ messages = [
164
+ {"role": "system", "content": "You be Naija assistant. Always reply for Pidgin English."}
165
+ ]
166
+ for human, bot in history:
167
+ messages.append({"role": "user", "content": human})
168
+ messages.append({"role": "assistant", "content": bot})
169
+
170
+ messages.append({"role": "user", "content": message})
171
+
172
+ # Apply chat template
173
+ inputs = tokenizer.apply_chat_template(
174
+ messages,
175
  add_generation_prompt=True,
176
  return_tensors="pt"
177
  ).to(model.device)
178
 
179
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
180
 
181
  generate_kwargs = dict(
182
+ input_ids=inputs,
183
  streamer=streamer,
184
+ max_new_tokens=256,
185
+ temperature=0.8,
186
+ top_p=0.9,
187
+ do_sample=True,
188
  )
189
 
190
+ # Run generation in a background thread
191
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
192
+ thread.start()
193
 
194
+ partial_text = ""
195
+ for new_text in streamer:
196
+ partial_text += new_text
197
+ yield partial_text
198
 
199
+ # -------------------------------------------------------------------
200
+ # Save chat as JSON file
201
+ # -------------------------------------------------------------------
 
202
  def save_chat(history):
203
+ export_data = []
204
+ for human, bot in history:
205
+ export_data.append({"role": "user", "content": human})
206
+ export_data.append({"role": "assistant", "content": bot})
207
 
208
+ file_path = "conversation.json"
209
+ with open(file_path, "w", encoding="utf-8") as f:
210
+ json.dump(export_data, f, ensure_ascii=False, indent=4)
211
 
212
+ return file_path
213
 
214
+ # -------------------------------------------------------------------
 
215
  # Gradio UI
216
+ # -------------------------------------------------------------------
217
+ with gr.Blocks(title="🇳🇬 Pidgin English Chatbot") as demo:
218
+ gr.HTML("<h1 style='text-align: center;'>🇳🇬 Pidgin English Chatbot</h1>")
219
+
220
+ chatbot = gr.Chatbot(height=400, show_label=False)
221
+
222
+ with gr.Row():
223
+ msg = gr.Textbox(placeholder="Type your message...", scale=4)
224
+ send = gr.Button("Send", variant="primary", scale=1)
225
+
226
+ with gr.Row():
227
+ clear = gr.Button("Clear Chat")
228
+ save_btn = gr.Button("💾 Save Conversation")
229
+ download_file = gr.File()
230
+
231
+ # Connect events
232
+ def respond(message, history):
233
+ if history is None:
234
+ history = []
235
+ stream = stream_chat(message, history)
236
+ response = ""
237
+ for partial in stream:
238
+ response = partial
239
+ yield history + [(message, response)], ""
240
+ yield history + [(message, response)], ""
241
+
242
+ msg.submit(respond, [msg, chatbot], [chatbot, msg])
243
+ send.click(respond, [msg, chatbot], [chatbot, msg])
244
+ clear.click(lambda: ([], ""), outputs=[chatbot, msg])
245
+ save_btn.click(save_chat, inputs=[chatbot], outputs=[download_file])
246
+
247
+ # -------------------------------------------------------------------
248
+ # Launch
249
+ # -------------------------------------------------------------------
250
  if __name__ == "__main__":
251
+ demo.launch(share=True, debug=True)