Spaces:
Running
on
Zero
Running
on
Zero
adds minor changes to the README
Browse files
README.md
CHANGED
|
@@ -28,7 +28,7 @@ A complete Gradio application for the [Petite Elle L'Aime 3](https://huggingface
|
|
| 28 |
|
| 29 |
## π Model Information
|
| 30 |
|
| 31 |
-
- **Base Model**: SmolLM3-3B
|
| 32 |
- **Parameters**: ~3B
|
| 33 |
- **Context Length**: 128k
|
| 34 |
- **Precision**: Full fine-tuned model (float16/float32)
|
|
|
|
| 28 |
|
| 29 |
## π Model Information
|
| 30 |
|
| 31 |
+
- **Base Model**: HuggingFaceTB/SmolLM3-3B
|
| 32 |
- **Parameters**: ~3B
|
| 33 |
- **Context Length**: 128k
|
| 34 |
- **Precision**: Full fine-tuned model (float16/float32)
|
app.py
CHANGED
|
@@ -224,35 +224,35 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 224 |
response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
| 225 |
|
| 226 |
# Debug: Print the full raw response with tokens
|
| 227 |
-
logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
|
| 228 |
-
logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
|
| 229 |
-
logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
|
| 230 |
|
| 231 |
# More robust response extraction - look for assistant marker
|
| 232 |
-
logger.info(f"Looking for assistant marker in response...")
|
| 233 |
if "<|im_start|>assistant" in response_with_tokens:
|
| 234 |
-
logger.info(f"Found assistant marker in response")
|
| 235 |
# Find the start of assistant response
|
| 236 |
assistant_start = response_with_tokens.find("<|im_start|>assistant")
|
| 237 |
-
logger.info(f"Assistant marker found at position: {assistant_start}")
|
| 238 |
if assistant_start != -1:
|
| 239 |
# Find the end of the assistant marker
|
| 240 |
marker_end = response_with_tokens.find("\n", assistant_start)
|
| 241 |
-
logger.info(f"Marker end found at position: {marker_end}")
|
| 242 |
if marker_end != -1:
|
| 243 |
assistant_response = response_with_tokens[marker_end + 1:].strip()
|
| 244 |
-
logger.info(f"Using marker-based extraction")
|
| 245 |
else:
|
| 246 |
assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
|
| 247 |
-
logger.info(f"Using fallback marker extraction")
|
| 248 |
else:
|
| 249 |
# Fallback to prompt-based extraction
|
| 250 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 251 |
assistant_response = response[len(full_prompt):].strip()
|
| 252 |
-
logger.info(f"Using prompt-based extraction (marker not found)")
|
| 253 |
else:
|
| 254 |
# Fallback to original method
|
| 255 |
-
logger.info(f"No assistant marker found, using prompt-based extraction")
|
| 256 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 257 |
assistant_response = response[len(full_prompt):].strip()
|
| 258 |
|
|
@@ -262,34 +262,34 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 262 |
assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
|
| 263 |
|
| 264 |
# Debug: Print the extracted assistant response after cleanup
|
| 265 |
-
logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
|
| 266 |
-
logger.info(f"Extracted response length: {len(assistant_response)}")
|
| 267 |
-
logger.info(f"Extracted response: {repr(assistant_response)}")
|
| 268 |
|
| 269 |
# Debug: Print before cleanup
|
| 270 |
-
logger.info(f"=== BEFORE CLEANUP DEBUG ===")
|
| 271 |
-
logger.info(f"Before cleanup length: {len(assistant_response)}")
|
| 272 |
-
logger.info(f"Before cleanup: {repr(assistant_response)}")
|
| 273 |
|
| 274 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
| 275 |
|
| 276 |
# Debug: Print after first cleanup
|
| 277 |
-
logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
|
| 278 |
-
logger.info(f"After first cleanup length: {len(assistant_response)}")
|
| 279 |
-
logger.info(f"After first cleanup: {repr(assistant_response)}")
|
| 280 |
|
| 281 |
if not enable_thinking:
|
| 282 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
| 283 |
|
| 284 |
# Debug: Print after thinking cleanup
|
| 285 |
-
logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
|
| 286 |
-
logger.info(f"After thinking cleanup length: {len(assistant_response)}")
|
| 287 |
-
logger.info(f"After thinking cleanup: {repr(assistant_response)}")
|
| 288 |
|
| 289 |
# Debug: Print before tool call handling
|
| 290 |
-
logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
|
| 291 |
-
logger.info(f"Before tool call handling length: {len(assistant_response)}")
|
| 292 |
-
logger.info(f"Before tool call handling: {repr(assistant_response)}")
|
| 293 |
|
| 294 |
# Handle tool calls if present
|
| 295 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
|
@@ -305,17 +305,17 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 305 |
assistant_response += f"\n\nπ Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
| 306 |
|
| 307 |
# Debug: Print after tool call handling
|
| 308 |
-
logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
|
| 309 |
-
logger.info(f"After tool call handling length: {len(assistant_response)}")
|
| 310 |
-
logger.info(f"After tool call handling: {repr(assistant_response)}")
|
| 311 |
|
| 312 |
assistant_response = assistant_response.strip()
|
| 313 |
|
| 314 |
# Debug: Print final response
|
| 315 |
-
logger.info(f"=== FINAL RESPONSE DEBUG ===")
|
| 316 |
-
logger.info(f"Final response length: {len(assistant_response)}")
|
| 317 |
-
logger.info(f"Final response: {repr(assistant_response)}")
|
| 318 |
-
logger.info(f"=== END DEBUG ===")
|
| 319 |
|
| 320 |
return assistant_response
|
| 321 |
|
|
|
|
| 224 |
response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
| 225 |
|
| 226 |
# Debug: Print the full raw response with tokens
|
| 227 |
+
# logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
|
| 228 |
+
# logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
|
| 229 |
+
# logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
|
| 230 |
|
| 231 |
# More robust response extraction - look for assistant marker
|
| 232 |
+
# logger.info(f"Looking for assistant marker in response...")
|
| 233 |
if "<|im_start|>assistant" in response_with_tokens:
|
| 234 |
+
# logger.info(f"Found assistant marker in response")
|
| 235 |
# Find the start of assistant response
|
| 236 |
assistant_start = response_with_tokens.find("<|im_start|>assistant")
|
| 237 |
+
# logger.info(f"Assistant marker found at position: {assistant_start}")
|
| 238 |
if assistant_start != -1:
|
| 239 |
# Find the end of the assistant marker
|
| 240 |
marker_end = response_with_tokens.find("\n", assistant_start)
|
| 241 |
+
# logger.info(f"Marker end found at position: {marker_end}")
|
| 242 |
if marker_end != -1:
|
| 243 |
assistant_response = response_with_tokens[marker_end + 1:].strip()
|
| 244 |
+
# logger.info(f"Using marker-based extraction")
|
| 245 |
else:
|
| 246 |
assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
|
| 247 |
+
# logger.info(f"Using fallback marker extraction")
|
| 248 |
else:
|
| 249 |
# Fallback to prompt-based extraction
|
| 250 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 251 |
assistant_response = response[len(full_prompt):].strip()
|
| 252 |
+
# logger.info(f"Using prompt-based extraction (marker not found)")
|
| 253 |
else:
|
| 254 |
# Fallback to original method
|
| 255 |
+
# logger.info(f"No assistant marker found, using prompt-based extraction")
|
| 256 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 257 |
assistant_response = response[len(full_prompt):].strip()
|
| 258 |
|
|
|
|
| 262 |
assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
|
| 263 |
|
| 264 |
# Debug: Print the extracted assistant response after cleanup
|
| 265 |
+
# logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
|
| 266 |
+
# logger.info(f"Extracted response length: {len(assistant_response)}")
|
| 267 |
+
# logger.info(f"Extracted response: {repr(assistant_response)}")
|
| 268 |
|
| 269 |
# Debug: Print before cleanup
|
| 270 |
+
# logger.info(f"=== BEFORE CLEANUP DEBUG ===")
|
| 271 |
+
# logger.info(f"Before cleanup length: {len(assistant_response)}")
|
| 272 |
+
# logger.info(f"Before cleanup: {repr(assistant_response)}")
|
| 273 |
|
| 274 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
| 275 |
|
| 276 |
# Debug: Print after first cleanup
|
| 277 |
+
# logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
|
| 278 |
+
# logger.info(f"After first cleanup length: {len(assistant_response)}")
|
| 279 |
+
# logger.info(f"After first cleanup: {repr(assistant_response)}")
|
| 280 |
|
| 281 |
if not enable_thinking:
|
| 282 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
| 283 |
|
| 284 |
# Debug: Print after thinking cleanup
|
| 285 |
+
# logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
|
| 286 |
+
# logger.info(f"After thinking cleanup length: {len(assistant_response)}")
|
| 287 |
+
# logger.info(f"After thinking cleanup: {repr(assistant_response)}")
|
| 288 |
|
| 289 |
# Debug: Print before tool call handling
|
| 290 |
+
# logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
|
| 291 |
+
# logger.info(f"Before tool call handling length: {len(assistant_response)}")
|
| 292 |
+
# logger.info(f"Before tool call handling: {repr(assistant_response)}")
|
| 293 |
|
| 294 |
# Handle tool calls if present
|
| 295 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
|
|
|
| 305 |
assistant_response += f"\n\nπ Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
| 306 |
|
| 307 |
# Debug: Print after tool call handling
|
| 308 |
+
# logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
|
| 309 |
+
# logger.info(f"After tool call handling length: {len(assistant_response)}")
|
| 310 |
+
# logger.info(f"After tool call handling: {repr(assistant_response)}")
|
| 311 |
|
| 312 |
assistant_response = assistant_response.strip()
|
| 313 |
|
| 314 |
# Debug: Print final response
|
| 315 |
+
# logger.info(f"=== FINAL RESPONSE DEBUG ===")
|
| 316 |
+
# logger.info(f"Final response length: {len(assistant_response)}")
|
| 317 |
+
# logger.info(f"Final response: {repr(assistant_response)}")
|
| 318 |
+
# logger.info(f"=== END DEBUG ===")
|
| 319 |
|
| 320 |
return assistant_response
|
| 321 |
|