Ephraimmm commited on
Commit
56bfd7b
·
verified ·
1 Parent(s): f484739

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -11
app.py CHANGED
@@ -1,3 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  from unsloth import FastLanguageModel
@@ -6,6 +144,7 @@ import threading
6
  from peft import PeftModel
7
  import json
8
  import time
 
9
 
10
  # -----------------------------
11
  # 1️⃣ Set device
@@ -22,7 +161,6 @@ base_model, tokenizer = FastLanguageModel.from_pretrained(
22
  max_seq_length=2048,
23
  dtype=torch.float16,
24
  load_in_4bit=False,
25
- # <- avoids unsloth compilation errors
26
  )
27
 
28
  # -----------------------------
@@ -33,7 +171,7 @@ lora_model = PeftModel.from_pretrained(base_model, lora_repo, adapter_name="adap
33
  FastLanguageModel.for_inference(lora_model)
34
 
35
  # -----------------------------
36
- # 4️⃣ Streaming generation function with Nigerian Pidgin system prompt
37
  # -----------------------------
38
  def generate_response(user_message):
39
  messages = [
@@ -92,7 +230,11 @@ def chat(user_message):
92
 
93
  def save_conversation():
94
  if not chat_history:
95
- return None
 
 
 
 
96
 
97
  conversation = []
98
  for user_msg, bot_msg in chat_history:
@@ -126,12 +268,4 @@ with gr.Blocks() as demo:
126
  save_button.click(save_conversation, outputs=download_file)
127
 
128
  demo.launch()
129
- # iface = gr.Interface(
130
- # fn=generate_response,
131
- # inputs=gr.Textbox(lines=2, placeholder="Enter your message..."),
132
- # outputs=gr.Textbox(label="PIDGIN Assistant"),
133
- # title="Nigerian PIDGIN Assistant",
134
- # description="Chat with a Nigerian assistant that only speaks Pidgin English."
135
- # )
136
 
137
- # iface.launch()
 
1
+ # import gradio as gr
2
+ # import torch
3
+ # from unsloth import FastLanguageModel
4
+ # from transformers import TextIteratorStreamer
5
+ # import threading
6
+ # from peft import PeftModel
7
+ # import json
8
+ # import time
9
+
10
+ # # -----------------------------
11
+ # # 1️⃣ Set device
12
+ # # -----------------------------
13
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ # print("Using device:", device)
15
+
16
+ # # -----------------------------
17
+ # # 2️⃣ Load base model (skip compilation)
18
+ # # -----------------------------
19
+ # base_model_name = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
20
+ # base_model, tokenizer = FastLanguageModel.from_pretrained(
21
+ # model_name=base_model_name,
22
+ # max_seq_length=2048,
23
+ # dtype=torch.float16,
24
+ # load_in_4bit=False,
25
+ # # <- avoids unsloth compilation errors
26
+ # )
27
+
28
+ # # -----------------------------
29
+ # # 3️⃣ Load LoRA
30
+ # # -----------------------------
31
+ # lora_repo = "Ephraimmm/PIDGIN_gemma-3"
32
+ # lora_model = PeftModel.from_pretrained(base_model, lora_repo, adapter_name="adapter_model")
33
+ # FastLanguageModel.for_inference(lora_model)
34
+
35
+ # # -----------------------------
36
+ # # 4️⃣ Streaming generation function with Nigerian Pidgin system prompt
37
+ # # -----------------------------
38
+ # def generate_response(user_message):
39
+ # messages = [
40
+ # {
41
+ # "role": "system",
42
+ # "content": [{"type": "text", "text": "You be Nigerian assistant wey sabi Pidgin English only. No speak any other language. Reply friendly and in short sentences"}]
43
+ # },
44
+ # {
45
+ # "role": "user",
46
+ # "content": [{"type": "text", "text": user_message}]
47
+ # }
48
+ # ]
49
+
50
+ # inputs = tokenizer.apply_chat_template(
51
+ # messages,
52
+ # add_generation_prompt=True,
53
+ # return_tensors="pt",
54
+ # tokenize=True,
55
+ # return_dict=True
56
+ # ).to(device)
57
+
58
+ # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
59
+
60
+ # generation_kwargs = dict(
61
+ # **inputs,
62
+ # streamer=streamer,
63
+ # max_new_tokens=100,
64
+ # temperature=0.7,
65
+ # top_p=0.7,
66
+ # top_k=40,
67
+ # use_cache=False
68
+ # )
69
+
70
+ # def generate():
71
+ # lora_model.generate(**generation_kwargs)
72
+
73
+ # thread = threading.Thread(target=generate)
74
+ # thread.start()
75
+
76
+ # full_response = ""
77
+ # for new_token in streamer:
78
+ # if new_token:
79
+ # full_response += new_token
80
+ # thread.join()
81
+ # return full_response
82
+
83
+ # # -----------------------------
84
+ # # 5️⃣ Chat + Save
85
+ # # -----------------------------
86
+ # chat_history = []
87
+
88
+ # def chat(user_message):
89
+ # bot_response = generate_response(user_message)
90
+ # chat_history.append((user_message, bot_response))
91
+ # return chat_history, "" # also clears input box
92
+
93
+ # def save_conversation():
94
+ # if not chat_history:
95
+ # return None
96
+
97
+ # conversation = []
98
+ # for user_msg, bot_msg in chat_history:
99
+ # conversation.append({"role": "user", "content": str(user_msg)})
100
+ # conversation.append({"role": "assistant", "content": str(bot_msg)})
101
+
102
+ # timestamp = time.strftime("%Y%m%d-%H%M%S")
103
+ # file_path = f"conversation_{timestamp}.txt" # save as TXT not JSON
104
+
105
+ # with open(file_path, "w", encoding="utf-8") as f:
106
+ # json.dump(conversation, f, indent=4, ensure_ascii=False)
107
+
108
+ # return file_path
109
+
110
+ # # -----------------------------
111
+ # # 6️⃣ Gradio interface
112
+ # # -----------------------------
113
+ # with gr.Blocks() as demo:
114
+ # gr.Markdown("# Nigerian PIDGIN Assistant")
115
+ # gr.Markdown("Chat with a Nigerian assistant that only speaks Pidgin English.")
116
+
117
+ # chatbot = gr.Chatbot(label="Conversation")
118
+ # user_input = gr.Textbox(label="Your message", placeholder="Type your message here...")
119
+
120
+ # with gr.Row():
121
+ # send_button = gr.Button("Send")
122
+ # save_button = gr.Button("Save Conversation")
123
+ # download_file = gr.File(label="Download Conversation")
124
+
125
+ # send_button.click(chat, inputs=user_input, outputs=[chatbot, user_input])
126
+ # save_button.click(save_conversation, outputs=download_file)
127
+
128
+ # demo.launch()
129
+ # # iface = gr.Interface(
130
+ # # fn=generate_response,
131
+ # # inputs=gr.Textbox(lines=2, placeholder="Enter your message..."),
132
+ # # outputs=gr.Textbox(label="PIDGIN Assistant"),
133
+ # # title="Nigerian PIDGIN Assistant",
134
+ # # description="Chat with a Nigerian assistant that only speaks Pidgin English."
135
+ # # )
136
+
137
+ # # iface.launch()
138
+
139
  import gradio as gr
140
  import torch
141
  from unsloth import FastLanguageModel
 
144
  from peft import PeftModel
145
  import json
146
  import time
147
+ import os
148
 
149
  # -----------------------------
150
  # 1️⃣ Set device
 
161
  max_seq_length=2048,
162
  dtype=torch.float16,
163
  load_in_4bit=False,
 
164
  )
165
 
166
  # -----------------------------
 
171
  FastLanguageModel.for_inference(lora_model)
172
 
173
  # -----------------------------
174
+ # 4️⃣ Streaming generation function
175
  # -----------------------------
176
  def generate_response(user_message):
177
  messages = [
 
230
 
231
  def save_conversation():
232
  if not chat_history:
233
+ # Return a small empty txt file instead of None (to avoid Gradio error)
234
+ file_path = "conversation_empty.txt"
235
+ with open(file_path, "w", encoding="utf-8") as f:
236
+ f.write("[]")
237
+ return file_path
238
 
239
  conversation = []
240
  for user_msg, bot_msg in chat_history:
 
268
  save_button.click(save_conversation, outputs=download_file)
269
 
270
  demo.launch()
 
 
 
 
 
 
 
271