Spaces:
Running
on
Zero
Running
on
Zero
removes cache implementation static
Browse files
app.py
CHANGED
|
@@ -218,7 +218,7 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 218 |
attention_mask=inputs['attention_mask'],
|
| 219 |
pad_token_id=tokenizer.eos_token_id,
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
-
cache_implementation="static"
|
| 222 |
)
|
| 223 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 224 |
assistant_response = response[len(full_prompt):].strip()
|
|
|
|
| 218 |
attention_mask=inputs['attention_mask'],
|
| 219 |
pad_token_id=tokenizer.eos_token_id,
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
+
# cache_implementation="static"
|
| 222 |
)
|
| 223 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 224 |
assistant_response = response[len(full_prompt):].strip()
|