Seth0330 commited on
Commit
5aa4351
·
verified ·
1 Parent(s): 93fcaaf

Update backend/app/openrouter_client.py

Browse files
Files changed (1) hide show
  1. backend/app/openrouter_client.py +57 -64
backend/app/openrouter_client.py CHANGED
@@ -325,10 +325,21 @@ async def _extract_with_openrouter_single(image_bytes: bytes, page_num: int, tot
325
 
326
 
327
  async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
328
- """Extract from a single page using HuggingFace Inference API (standard endpoint)."""
329
  if not HF_TOKEN:
330
  raise RuntimeError("HF_TOKEN environment variable is not set")
331
 
 
 
 
 
 
 
 
 
 
 
 
332
  prompt = (
333
  f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
334
  "Extract every word, number, and piece of information, including any non-English text. "
@@ -338,74 +349,50 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
338
  print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
339
 
340
  try:
341
- # Use standard HuggingFace Inference API endpoint (not chat_completion/router)
342
- api_url = f"{HF_INFERENCE_API_URL}/{HF_MODEL_NAME}"
343
-
344
- # Convert image to base64
345
  image_base64 = base64.b64encode(image_bytes).decode('utf-8')
 
346
 
347
- # For Qwen3-VL models, use the chat format through standard API
348
- # The standard API accepts chat-completion format for compatible models
349
- payload = {
350
- "inputs": {
351
- "messages": [
 
 
 
 
 
352
  {
353
  "role": "user",
354
  "content": [
355
- {"type": "text", "text": prompt},
356
- {"type": "image", "image": f"data:image/jpeg;base64,{image_base64}"}
 
 
 
 
 
 
 
 
357
  ]
358
  }
359
- ]
360
- },
361
- "parameters": {
362
- "max_new_tokens": 2048,
363
- "temperature": 0.1
364
- }
365
- }
366
-
367
- headers = {
368
- "Authorization": f"Bearer {HF_TOKEN}",
369
- "Content-Type": "application/json"
370
- }
371
-
372
- timeout = httpx.Timeout(180.0, connect=30.0)
373
- async with httpx.AsyncClient(timeout=timeout) as client:
374
- print(f"[INFO] Making POST request to {api_url}...")
375
- resp = await client.post(api_url, headers=headers, json=payload)
376
- print(f"[INFO] Received response: Status {resp.status_code}")
377
- resp.raise_for_status()
378
- result = resp.json()
379
 
380
- # Extract response text - format depends on model response
381
- response_text = None
382
- if isinstance(result, list) and len(result) > 0:
383
- # Standard API often returns list with generated_text
384
- response_text = result[0].get("generated_text", str(result[0]))
385
- elif isinstance(result, dict):
386
- # Check for different response formats
387
- if "generated_text" in result:
388
- response_text = result["generated_text"]
389
- elif "text" in result:
390
- response_text = result["text"]
391
- elif "choices" in result and len(result["choices"]) > 0:
392
- # Chat completion format
393
- message = result["choices"][0].get("message", {})
394
- content = message.get("content", "")
395
- if isinstance(content, list):
396
- response_text = "".join(
397
- item.get("text", "")
398
- for item in content
399
- if item.get("type") == "text"
400
- )
401
- else:
402
- response_text = content
403
  else:
404
- response_text = str(result)
405
- elif isinstance(result, str):
406
- response_text = result
407
  else:
408
- response_text = str(result)
409
 
410
  if not response_text:
411
  raise ValueError("Empty response from HuggingFace API")
@@ -413,12 +400,18 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
413
  print(f"[DEBUG] HuggingFace response preview: {response_text[:500]}")
414
 
415
  return _parse_model_response(response_text, page_num)
416
- except httpx.HTTPStatusError as e:
417
- print(f"[ERROR] HuggingFace API HTTP error: {e.response.status_code} - {e.response.text[:500]}")
418
- raise RuntimeError(f"HuggingFace API error for page {page_num}: {e.response.status_code} - {str(e)}")
419
  except Exception as e:
420
- print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {str(e)}")
421
- raise RuntimeError(f"HuggingFace API error for page {page_num}: {str(e)}")
 
 
 
 
 
 
 
 
 
422
 
423
 
424
  def _parse_model_response(text: str, page_num: int = None) -> Dict[str, Any]:
 
325
 
326
 
327
  async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
328
+ """Extract from a single page using HuggingFace Inference API (router endpoint)."""
329
  if not HF_TOKEN:
330
  raise RuntimeError("HF_TOKEN environment variable is not set")
331
 
332
+ try:
333
+ from huggingface_hub import InferenceClient
334
+ except ImportError:
335
+ raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
336
+
337
+ # Use InferenceClient with router endpoint (required for newer models)
338
+ client = InferenceClient(
339
+ api_key=HF_TOKEN,
340
+ timeout=180.0
341
+ )
342
+
343
  prompt = (
344
  f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
345
  "Extract every word, number, and piece of information, including any non-English text. "
 
349
  print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
350
 
351
  try:
352
+ # Convert image bytes to base64 data URL
 
 
 
353
  image_base64 = base64.b64encode(image_bytes).decode('utf-8')
354
+ image_data_url = f"data:image/jpeg;base64,{image_base64}"
355
 
356
+ # Use chat.completions.create() as shown in HuggingFace documentation
357
+ # This uses the router endpoint which is now required
358
+ # Run in executor since it's a blocking synchronous call
359
+ import asyncio
360
+ loop = asyncio.get_event_loop()
361
+ completion = await loop.run_in_executor(
362
+ None,
363
+ lambda: client.chat.completions.create(
364
+ model=HF_MODEL_NAME,
365
+ messages=[
366
  {
367
  "role": "user",
368
  "content": [
369
+ {
370
+ "type": "text",
371
+ "text": prompt
372
+ },
373
+ {
374
+ "type": "image_url",
375
+ "image_url": {
376
+ "url": image_data_url
377
+ }
378
+ }
379
  ]
380
  }
381
+ ],
382
+ max_tokens=2048,
383
+ temperature=0.1
384
+ )
385
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
387
+ # Extract response text from completion
388
+ if hasattr(completion, 'choices') and len(completion.choices) > 0:
389
+ message = completion.choices[0].message
390
+ if hasattr(message, 'content'):
391
+ response_text = message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  else:
393
+ response_text = str(message)
 
 
394
  else:
395
+ response_text = str(completion)
396
 
397
  if not response_text:
398
  raise ValueError("Empty response from HuggingFace API")
 
400
  print(f"[DEBUG] HuggingFace response preview: {response_text[:500]}")
401
 
402
  return _parse_model_response(response_text, page_num)
 
 
 
403
  except Exception as e:
404
+ error_msg = str(e)
405
+ print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {error_msg}")
406
+
407
+ # Check if it's a permissions error
408
+ if "403" in error_msg or "permissions" in error_msg.lower() or "Forbidden" in error_msg:
409
+ raise RuntimeError(
410
+ f"HuggingFace API error for page {page_num}: Insufficient permissions. "
411
+ "Your HF_TOKEN may need to be a token with 'read' access to Inference API. "
412
+ "Check your HuggingFace account settings and token permissions."
413
+ )
414
+ raise RuntimeError(f"HuggingFace API error for page {page_num}: {error_msg}")
415
 
416
 
417
  def _parse_model_response(text: str, page_num: int = None) -> Dict[str, Any]: