Spaces:

Seth0330
/

AIEXTRACT1

Sleeping

App Files Files Community

Seth0330 commited on 14 days ago

Commit

5aa4351

verified ·

1 Parent(s): 93fcaaf

Update backend/app/openrouter_client.py

Browse files

Files changed (1) hide show

backend/app/openrouter_client.py +57 -64

backend/app/openrouter_client.py CHANGED Viewed

@@ -325,10 +325,21 @@ async def _extract_with_openrouter_single(image_bytes: bytes, page_num: int, tot
 async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
-    """Extract from a single page using HuggingFace Inference API (standard endpoint)."""
     if not HF_TOKEN:
         raise RuntimeError("HF_TOKEN environment variable is not set")
     prompt = (
         f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
         "Extract every word, number, and piece of information, including any non-English text. "
@@ -338,74 +349,50 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
     print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
     try:
-        # Use standard HuggingFace Inference API endpoint (not chat_completion/router)
-        api_url = f"{HF_INFERENCE_API_URL}/{HF_MODEL_NAME}"
-        # Convert image to base64
         image_base64 = base64.b64encode(image_bytes).decode('utf-8')
-        # For Qwen3-VL models, use the chat format through standard API
-        # The standard API accepts chat-completion format for compatible models
-        payload = {
-            "inputs": {
-                "messages": [
                     {
                         "role": "user",
                         "content": [
-                            {"type": "text", "text": prompt},
-                            {"type": "image", "image": f"data:image/jpeg;base64,{image_base64}"}
                         ]
                     }
-                ]
-            },
-            "parameters": {
-                "max_new_tokens": 2048,
-                "temperature": 0.1
-            }
-        }
-        headers = {
-            "Authorization": f"Bearer {HF_TOKEN}",
-            "Content-Type": "application/json"
-        }
-        timeout = httpx.Timeout(180.0, connect=30.0)
-        async with httpx.AsyncClient(timeout=timeout) as client:
-            print(f"[INFO] Making POST request to {api_url}...")
-            resp = await client.post(api_url, headers=headers, json=payload)
-            print(f"[INFO] Received response: Status {resp.status_code}")
-            resp.raise_for_status()
-            result = resp.json()
-        # Extract response text - format depends on model response
-        response_text = None
-        if isinstance(result, list) and len(result) > 0:
-            # Standard API often returns list with generated_text
-            response_text = result[0].get("generated_text", str(result[0]))
-        elif isinstance(result, dict):
-            # Check for different response formats
-            if "generated_text" in result:
-                response_text = result["generated_text"]
-            elif "text" in result:
-                response_text = result["text"]
-            elif "choices" in result and len(result["choices"]) > 0:
-                # Chat completion format
-                message = result["choices"][0].get("message", {})
-                content = message.get("content", "")
-                if isinstance(content, list):
-                    response_text = "".join(
-                        item.get("text", "")
-                        for item in content
-                        if item.get("type") == "text"
-                    )
-                else:
-                    response_text = content
             else:
-                response_text = str(result)
-        elif isinstance(result, str):
-            response_text = result
         else:
-            response_text = str(result)
         if not response_text:
             raise ValueError("Empty response from HuggingFace API")
@@ -413,12 +400,18 @@ async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int)
         print(f"[DEBUG] HuggingFace response preview: {response_text[:500]}")
         return _parse_model_response(response_text, page_num)
-    except httpx.HTTPStatusError as e:
-        print(f"[ERROR] HuggingFace API HTTP error: {e.response.status_code} - {e.response.text[:500]}")
-        raise RuntimeError(f"HuggingFace API error for page {page_num}: {e.response.status_code} - {str(e)}")
     except Exception as e:
-        print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {str(e)}")
-        raise RuntimeError(f"HuggingFace API error for page {page_num}: {str(e)}")
 def _parse_model_response(text: str, page_num: int = None) -> Dict[str, Any]:

 async def _extract_with_hf(image_bytes: bytes, page_num: int, total_pages: int) -> Dict[str, Any]:
+    """Extract from a single page using HuggingFace Inference API (router endpoint)."""
     if not HF_TOKEN:
         raise RuntimeError("HF_TOKEN environment variable is not set")
+    try:
+        from huggingface_hub import InferenceClient
+    except ImportError:
+        raise RuntimeError("huggingface_hub not installed. Add it to requirements.txt")
+    # Use InferenceClient with router endpoint (required for newer models)
+    client = InferenceClient(
+        api_key=HF_TOKEN,
+        timeout=180.0
+    )
     prompt = (
         f"Read this document page ({page_num} of {total_pages}) and extract ALL text content. "
         "Extract every word, number, and piece of information, including any non-English text. "
     print(f"[INFO] HuggingFace: Processing page {page_num} with model {HF_MODEL_NAME}")
     try:
+        # Convert image bytes to base64 data URL
         image_base64 = base64.b64encode(image_bytes).decode('utf-8')
+        image_data_url = f"data:image/jpeg;base64,{image_base64}"
+        # Use chat.completions.create() as shown in HuggingFace documentation
+        # This uses the router endpoint which is now required
+        # Run in executor since it's a blocking synchronous call
+        import asyncio
+        loop = asyncio.get_event_loop()
+        completion = await loop.run_in_executor(
+            None,
+            lambda: client.chat.completions.create(
+                model=HF_MODEL_NAME,
+                messages=[
                     {
                         "role": "user",
                         "content": [
+                            {
+                                "type": "text",
+                                "text": prompt
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": image_data_url
+                                }
+                            }
                         ]
                     }
+                ],
+                max_tokens=2048,
+                temperature=0.1
+            )
+        )
+        # Extract response text from completion
+        if hasattr(completion, 'choices') and len(completion.choices) > 0:
+            message = completion.choices[0].message
+            if hasattr(message, 'content'):
+                response_text = message.content
             else:
+                response_text = str(message)
         else:
+            response_text = str(completion)
         if not response_text:
             raise ValueError("Empty response from HuggingFace API")
         print(f"[DEBUG] HuggingFace response preview: {response_text[:500]}")
         return _parse_model_response(response_text, page_num)
     except Exception as e:
+        error_msg = str(e)
+        print(f"[ERROR] HuggingFace API error details: {type(e).__name__}: {error_msg}")
+        # Check if it's a permissions error
+        if "403" in error_msg or "permissions" in error_msg.lower() or "Forbidden" in error_msg:
+            raise RuntimeError(
+                f"HuggingFace API error for page {page_num}: Insufficient permissions. "
+                "Your HF_TOKEN may need to be a token with 'read' access to Inference API. "
+                "Check your HuggingFace account settings and token permissions."
+            )
+        raise RuntimeError(f"HuggingFace API error for page {page_num}: {error_msg}")
 def _parse_model_response(text: str, page_num: int = None) -> Dict[str, Any]: