nroggendorff commited on
Commit
2b46203
·
1 Parent(s): 5e16e26

initial commit

Browse files
Files changed (3) hide show
  1. README.md +6 -8
  2. app.py +54 -17
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,15 +1,13 @@
1
  ---
2
- title: Gradio Chatbot
3
  emoji: 💬
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
- hf_oauth: true
11
- hf_oauth_scopes:
12
- - inference-api
13
  ---
14
 
15
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
  ---
2
+ title: Smallama
3
  emoji: 💬
4
+ colorFrom: red
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
 
 
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,5 +1,56 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def respond(
@@ -9,17 +60,10 @@ def respond(
9
  max_tokens,
10
  temperature,
11
  top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
  messages = [{"role": "system", "content": system_message}]
20
-
21
  messages.extend(history)
22
-
23
  messages.append({"role": "user", "content": message})
24
 
25
  response = ""
@@ -35,21 +79,16 @@ def respond(
35
  token = ""
36
  if len(choices) and choices[0].delta.content:
37
  token = choices[0].delta.content
38
-
39
  response += token
40
  yield response
41
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  chatbot = gr.ChatInterface(
47
  respond,
48
  type="messages",
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
  gr.Slider(
54
  minimum=0.1,
55
  maximum=1.0,
@@ -61,8 +100,6 @@ chatbot = gr.ChatInterface(
61
  )
62
 
63
  with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
  chatbot.render()
67
 
68
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from spaces import GPU as gpu
4
+
5
+
6
+ class Delta:
7
+ def __init__(self, content):
8
+ self.content = content
9
+
10
+
11
+ class Choice:
12
+ def __init__(self, delta):
13
+ self.delta = delta
14
+
15
+
16
+ class InferenceClient:
17
+ def __init__(self, model_id="nroggendorff/smallama-it"):
18
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+ self.model = AutoModelForCausalLM.from_pretrained(model_id)
20
+
21
+ class ModelOutput:
22
+ def __init__(self, client, inputs):
23
+ self.client = client
24
+ self.inputs = inputs
25
+ self.choices = []
26
+
27
+ def decode(self, output):
28
+ decoded_output = self.client.tokenizer.decode(
29
+ output[0][self.inputs["input_ids"].shape[-1] :],
30
+ skip_special_tokens=True,
31
+ )
32
+ self.choices = [Choice(Delta(decoded_output))]
33
+ return self
34
+
35
+ @gpu
36
+ def chat_completion(
37
+ self, messages, max_tokens=256, stream=True, temperature=0.2, top_p=0.95
38
+ ):
39
+ inputs = self.tokenizer.apply_chat_template(
40
+ messages,
41
+ add_generation_prompt=True,
42
+ tokenize=True,
43
+ return_dict=True,
44
+ return_tensors="pt",
45
+ ).to(self.model.device)
46
+
47
+ model_output = self.ModelOutput(self, inputs)
48
+
49
+ for _ in range(max_tokens):
50
+ output = self.model.generate(
51
+ **inputs, max_new_tokens=1, temperature=temperature, top_p=top_p
52
+ )
53
+ yield model_output.decode(output)
54
 
55
 
56
  def respond(
 
60
  max_tokens,
61
  temperature,
62
  top_p,
 
63
  ):
64
+ client = InferenceClient()
 
 
 
 
65
  messages = [{"role": "system", "content": system_message}]
 
66
  messages.extend(history)
 
67
  messages.append({"role": "user", "content": message})
68
 
69
  response = ""
 
79
  token = ""
80
  if len(choices) and choices[0].delta.content:
81
  token = choices[0].delta.content
 
82
  response += token
83
  yield response
84
 
85
 
 
 
 
86
  chatbot = gr.ChatInterface(
87
  respond,
88
  type="messages",
89
  additional_inputs=[
 
90
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
91
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.2, step=0.1, label="Temperature"),
92
  gr.Slider(
93
  minimum=0.1,
94
  maximum=1.0,
 
100
  )
101
 
102
  with gr.Blocks() as demo:
 
 
103
  chatbot.render()
104
 
105
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ accelerate