Spaces:

mike23415
/

Orc1

Sleeping

App Files Files Community

mike23415 commited on May 30

Commit

9b18f9e

verified ·

1 Parent(s): bb720a4

Update app.py

Browse files

Files changed (1) hide show

app.py +190 -110

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ import tempfile
 temp_dir = tempfile.mkdtemp()
 print(f"Created temporary directory: {temp_dir}")
-# Set environment variables to use the temp directory for caching
-os.environ['HF_HOME'] = temp_dir
-os.environ['TRANSFORMERS_CACHE'] = temp_dir
 os.environ['HOME'] = temp_dir
 # Now import everything else
@@ -18,15 +18,10 @@ import base64
 import io
 from PIL import Image
 import logging
-import torch
 import numpy as np
 import threading
-# Surya OCR imports
-from surya.ocr import run_ocr
-from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
-from surya.model.recognition.model import load_model as load_rec_model
-from surya.model.recognition.processor import load_processor as load_rec_processor
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -36,16 +31,14 @@ app = Flask(__name__)
 CORS(app)
 # Global variables for models
-det_model = None
-det_processor = None
-rec_model = None
-rec_processor = None
 models_loaded = False
 loading_lock = threading.Lock()
 def initialize_models():
-    """Initialize Surya OCR models"""
-    global det_model, det_processor, rec_model, rec_processor, models_loaded
     if models_loaded:
         return
@@ -55,25 +48,73 @@ def initialize_models():
             return
         try:
-            logger.info("Starting Surya OCR model initialization...")
-            # Load detection model and processor
-            logger.info("Loading Surya detection model...")
-            det_processor = load_det_processor()
-            det_model = load_det_model()
-            logger.info("Surya detection model loaded successfully!")
-            # Load recognition model and processor
-            logger.info("Loading Surya recognition model...")
-            rec_model = load_rec_model()
-            rec_processor = load_rec_processor()
-            logger.info("Surya recognition model loaded successfully!")
             models_loaded = True
-            logger.info("Surya OCR model initialization completed!")
         except Exception as e:
-            logger.error(f"Error loading Surya models: {str(e)}")
             models_loaded = False
             raise e
@@ -82,75 +123,121 @@ def ensure_models_loaded():
     if not models_loaded:
         initialize_models()
-def preprocess_image(image):
-    """Preprocess image for better OCR results"""
     # Convert to RGB if needed
     if image.mode != 'RGB':
         image = image.convert('RGB')
-    # Resize if image is too large (Surya can handle larger images but let's be safe for free tier)
-    max_size = 1536  # Surya can handle larger images than previous models
     if max(image.size) > max_size:
         ratio = max_size / max(image.size)
         new_size = tuple(int(dim * ratio) for dim in image.size)
         image = image.resize(new_size, Image.Resampling.LANCZOS)
-    return image
-def extract_text_surya(image, ocr_type="auto"):
-    """Extract text using Surya OCR"""
     try:
         ensure_models_loaded()
-        if not all([det_model, det_processor, rec_model, rec_processor]):
-            logger.warning("Surya models not available")
-            return {"text": "", "confidence": 0, "bbox_count": 0}
         # Preprocess image
-        image = preprocess_image(image)
-        # Run Surya OCR
-        langs = ["en"]  # English language
-        # Run OCR with Surya
-        predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
-        if not predictions or len(predictions) == 0:
-            return {"text": "", "confidence": 0, "bbox_count": 0}
-        # Extract text from predictions
-        prediction = predictions[0]
-        text_results = []
-        total_confidence = 0
-        bbox_count = 0
-        for text_line in prediction.text_lines:
-            text_results.append(text_line.text)
-            total_confidence += text_line.confidence if hasattr(text_line, 'confidence') else 1.0
-            bbox_count += 1
-        extracted_text = ' '.join(text_results).strip()
-        avg_confidence = total_confidence / bbox_count if bbox_count > 0 else 0
-        return {
-            "text": extracted_text,
-            "confidence": avg_confidence,
-            "bbox_count": bbox_count
-        }
     except Exception as e:
-        logger.error(f"Surya OCR error: {str(e)}")
-        return {"text": "", "confidence": 0, "bbox_count": 0}
 def process_image_ocr(image, ocr_type="auto"):
-    """Process image with Surya OCR"""
     results = {}
-    # Surya handles both printed and handwritten text well
-    surya_result = extract_text_surya(image, ocr_type)
-    results["surya"] = surya_result["text"]
-    results["confidence"] = surya_result["confidence"]
-    results["bbox_count"] = surya_result["bbox_count"]
-    results["final"] = surya_result["text"]
     return results
@@ -158,7 +245,7 @@ def process_image_ocr(image, ocr_type="auto"):
 def home():
     """Root endpoint"""
     return jsonify({
-        "service": "Surya OCR Backend",
         "status": "running",
         "version": "2.0.0",
         "models_loaded": models_loaded,
@@ -170,7 +257,7 @@ def home():
         },
         "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
         "ocr_types": ["auto", "handwritten", "printed"],
-        "ocr_engine": "Surya OCR"
     })
 @app.route('/health', methods=['GET'])
@@ -179,8 +266,7 @@ def health_check():
     return jsonify({
         "status": "healthy",
         "models_loaded": models_loaded,
-        "service": "Surya OCR Backend",
-        "cache_dir": temp_dir
     })
 @app.route('/ocr', methods=['POST'])
@@ -197,14 +283,14 @@ def ocr_endpoint():
         if request.is_json and 'image_base64' not in request.json:
             return jsonify({"error": "No 'image_base64' field found in JSON"}), 400
-        # Get OCR type preference (Surya handles all types well)
         if request.is_json:
             ocr_type = request.json.get('type', 'auto')
         else:
             ocr_type = request.form.get('type', 'auto')
         # Validate ocr_type
-        if ocr_type not in ['auto', 'handwritten', 'printed']:
             return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
         # Load image
@@ -236,9 +322,11 @@ def ocr_endpoint():
             "text": results["final"],
             "type_used": ocr_type,
             "character_count": len(results["final"]),
-            "confidence": results.get("confidence", 0),
-            "bbox_count": results.get("bbox_count", 0),
-            "engine": "Surya OCR"
         }
         return jsonify(response)
@@ -264,7 +352,7 @@ def batch_ocr_endpoint():
         ocr_type = request.form.get('type', 'auto')
         # Validate ocr_type
-        if ocr_type not in ['auto', 'handwritten', 'printed']:
             return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
         results = []
@@ -287,8 +375,6 @@ def batch_ocr_endpoint():
                     "filename": image_file.filename,
                     "text": ocr_results["final"],
                     "character_count": len(ocr_results["final"]),
-                    "confidence": ocr_results.get("confidence", 0),
-                    "bbox_count": ocr_results.get("bbox_count", 0),
                     "success": True
                 })
             except Exception as e:
@@ -308,7 +394,7 @@ def batch_ocr_endpoint():
             "successful": successful_count,
             "failed": len(results) - successful_count,
             "type_used": ocr_type,
-            "engine": "Surya OCR"
         })
     except Exception as e:
@@ -319,34 +405,28 @@ def batch_ocr_endpoint():
 def models_info():
     """Get information about loaded models"""
     return jsonify({
         "models": {
-            "surya_detection": {
-                "name": "Surya Detection Model",
-                "description": "Surya's SegFormer-based text detection model",
-                "loaded": det_model is not None and det_processor is not None,
-                "best_for": "Text detection in images"
             },
-            "surya_recognition": {
-                "name": "Surya Recognition Model",
-                "description": "Surya's transformer-based text recognition model",
-                "loaded": rec_model is not None and rec_processor is not None,
-                "best_for": "Both printed and handwritten text recognition"
             }
         },
         "supported_types": ["auto", "handwritten", "printed"],
         "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
         "cache_directory": temp_dir,
-        "hf_cache": temp_dir,
-        "gpu_available": torch.cuda.is_available(),
         "models_loaded": models_loaded,
-        "engine": "Surya OCR",
-        "engine_features": [
-            "High accuracy on both printed and handwritten text",
-            "Multilingual support",
-            "Modern transformer architecture",
-            "Efficient processing",
-            "Works well on Hugging Face free tier"
-        ]
     })
 @app.route('/models/load', methods=['POST'])
@@ -354,10 +434,10 @@ def load_models():
     """Manually trigger model loading"""
     try:
         if models_loaded:
-            return jsonify({"message": "Surya models already loaded", "success": True})
         initialize_models()
-        return jsonify({"message": "Surya models loaded successfully", "success": True})
     except Exception as e:
         return jsonify({"error": str(e), "success": False}), 500
@@ -383,10 +463,10 @@ def internal_error(error):
     }), 500
 if __name__ == '__main__':
-    logger.info("Starting Surya OCR service in development mode...")
     # Run the app - models will load on first request
     port = int(os.environ.get('PORT', 5000))
     app.run(host='0.0.0.0', port=port, debug=False)
 else:
     # Running with gunicorn - just log startup, don't do anything else
-    logger.info("Surya OCR service ready - models will load on first request")

 temp_dir = tempfile.mkdtemp()
 print(f"Created temporary directory: {temp_dir}")
+# Set PaddleOCR environment variables to use the temp directory
+os.environ['PPOCR_MODEL_PATH'] = temp_dir
+os.environ['PADDLEOCR_HOME'] = temp_dir
 os.environ['HOME'] = temp_dir
 # Now import everything else
 import io
 from PIL import Image
 import logging
 import numpy as np
 import threading
+import shutil
+import cv2
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 CORS(app)
 # Global variables for models
+paddle_ocr = None
+paddle_ocr_handwritten = None
 models_loaded = False
 loading_lock = threading.Lock()
 def initialize_models():
+    """Initialize PaddleOCR models"""
+    global paddle_ocr, paddle_ocr_handwritten, models_loaded
     if models_loaded:
         return
             return
         try:
+            logger.info("Starting PaddleOCR model initialization...")
+            # Import PaddleOCR after setting environment variables
+            from paddleocr import PaddleOCR
+            # Create model cache directory
+            paddle_cache = os.path.join(temp_dir, 'paddleocr_models')
+            os.makedirs(paddle_cache, exist_ok=True)
+            # Initialize PaddleOCR for printed text
+            logger.info("Loading PaddleOCR model for printed text...")
+            try:
+                paddle_ocr = PaddleOCR(
+                    use_angle_cls=True,
+                    lang='en',
+                    use_gpu=False,  # Set to False for CPU-only (Hugging Face free tier)
+                    show_log=False,
+                    det_model_dir=os.path.join(paddle_cache, 'det'),
+                    rec_model_dir=os.path.join(paddle_cache, 'rec'),
+                    cls_model_dir=os.path.join(paddle_cache, 'cls'),
+                    det_limit_side_len=960,  # Reduce for memory efficiency
+                    det_limit_type='min',
+                    rec_batch_num=6,  # Reduce batch size for memory efficiency
+                )
+                logger.info("PaddleOCR for printed text loaded successfully!")
+            except Exception as e:
+                logger.warning(f"Failed to load PaddleOCR for printed text: {e}")
+                # Try with minimal configuration
+                try:
+                    paddle_ocr = PaddleOCR(
+                        use_angle_cls=False,
+                        lang='en',
+                        use_gpu=False,
+                        show_log=False
+                    )
+                    logger.info("PaddleOCR loaded with minimal configuration!")
+                except Exception as e2:
+                    logger.error(f"Failed to load PaddleOCR: {e2}")
+            # Initialize PaddleOCR for handwritten text (using different configuration)
+            logger.info("Loading PaddleOCR model for handwritten text...")
+            try:
+                paddle_ocr_handwritten = PaddleOCR(
+                    use_angle_cls=True,
+                    lang='en',
+                    use_gpu=False,
+                    show_log=False,
+                    det_model_dir=os.path.join(paddle_cache, 'det_hand'),
+                    rec_model_dir=os.path.join(paddle_cache, 'rec_hand'),
+                    cls_model_dir=os.path.join(paddle_cache, 'cls_hand'),
+                    det_limit_side_len=736,  # Smaller for handwritten text
+                    det_limit_type='min',
+                    rec_batch_num=4,
+                    # Use Chinese model which often works better for handwritten text
+                    rec_algorithm='CRNN'
+                )
+                logger.info("PaddleOCR for handwritten text loaded successfully!")
+            except Exception as e:
+                logger.warning(f"Failed to load separate handwritten model: {e}")
+                # Use the same model for both
+                paddle_ocr_handwritten = paddle_ocr
             models_loaded = True
+            logger.info("PaddleOCR model initialization completed!")
         except Exception as e:
+            logger.error(f"Error loading PaddleOCR models: {str(e)}")
             models_loaded = False
             raise e
     if not models_loaded:
         initialize_models()
+def preprocess_image_for_paddle(image):
+    """Preprocess image for better PaddleOCR results"""
     # Convert to RGB if needed
     if image.mode != 'RGB':
         image = image.convert('RGB')
+    # Resize if image is too large (for memory efficiency on free tier)
+    max_size = 1024
     if max(image.size) > max_size:
         ratio = max_size / max(image.size)
         new_size = tuple(int(dim * ratio) for dim in image.size)
         image = image.resize(new_size, Image.Resampling.LANCZOS)
+    # Convert PIL to numpy array for PaddleOCR
+    img_array = np.array(image)
+    # Convert RGB to BGR for OpenCV/PaddleOCR
+    if len(img_array.shape) == 3:
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+    return img_array
+def extract_text_paddle_printed(image):
+    """Extract text using PaddleOCR (optimized for printed text)"""
     try:
         ensure_models_loaded()
+        if not paddle_ocr:
+            logger.warning("PaddleOCR model not available")
+            return ""
         # Preprocess image
+        img_array = preprocess_image_for_paddle(image)
+        # Extract text
+        results = paddle_ocr.ocr(img_array, cls=True)
+        # Parse results
+        extracted_text = []
+        if results and results[0]:
+            for line in results[0]:
+                if line and len(line) >= 2:
+                    text = line[1][0]  # text content
+                    confidence = line[1][1]  # confidence score
+                    if confidence > 0.5:  # Filter low confidence results
+                        extracted_text.append(text)
+        return ' '.join(extracted_text).strip()
+    except Exception as e:
+        logger.error(f"PaddleOCR printed text error: {str(e)}")
+        return ""
+def extract_text_paddle_handwritten(image):
+    """Extract text using PaddleOCR (optimized for handwritten text)"""
+    try:
+        ensure_models_loaded()
+        if not paddle_ocr_handwritten:
+            logger.warning("PaddleOCR handwritten model not available")
+            return ""
+        # Preprocess image
+        img_array = preprocess_image_for_paddle(image)
+        # Extract text with different settings for handwritten text
+        results = paddle_ocr_handwritten.ocr(img_array, cls=True)
+        # Parse results with lower confidence threshold for handwritten text
+        extracted_text = []
+        if results and results[0]:
+            for line in results[0]:
+                if line and len(line) >= 2:
+                    text = line[1][0]  # text content
+                    confidence = line[1][1]  # confidence score
+                    if confidence > 0.3:  # Lower threshold for handwritten text
+                        extracted_text.append(text)
+        return ' '.join(extracted_text).strip()
     except Exception as e:
+        logger.error(f"PaddleOCR handwritten text error: {str(e)}")
+        return ""
 def process_image_ocr(image, ocr_type="auto"):
+    """Process image with specified OCR method"""
     results = {}
+    if ocr_type in ["auto", "handwritten", "paddle_handwritten"]:
+        handwritten_text = extract_text_paddle_handwritten(image)
+        results["paddle_handwritten"] = handwritten_text
+    if ocr_type in ["auto", "printed", "paddle_printed"]:
+        printed_text = extract_text_paddle_printed(image)
+        results["paddle_printed"] = printed_text
+    # For auto mode, return the longer result or combine both
+    if ocr_type == "auto":
+        handwritten_len = len(results.get("paddle_handwritten", ""))
+        printed_len = len(results.get("paddle_printed", ""))
+        if handwritten_len > 0 and printed_len > 0:
+            # If both have results, combine them intelligently
+            if abs(handwritten_len - printed_len) / max(handwritten_len, printed_len) < 0.3:
+                # If lengths are similar, prefer printed text model
+                results["final"] = results["paddle_printed"]
+            else:
+                # Use the longer result
+                results["final"] = results["paddle_handwritten"] if handwritten_len > printed_len else results["paddle_printed"]
+        elif handwritten_len > 0:
+            results["final"] = results["paddle_handwritten"]
+        elif printed_len > 0:
+            results["final"] = results["paddle_printed"]
+        else:
+            results["final"] = ""
+    else:
+        # Return the specific model result
+        model_key = ocr_type.replace("handwritten", "paddle_handwritten").replace("printed", "paddle_printed")
+        results["final"] = results.get(model_key, "")
     return results
 def home():
     """Root endpoint"""
     return jsonify({
+        "service": "PaddleOCR Backend",
         "status": "running",
         "version": "2.0.0",
         "models_loaded": models_loaded,
         },
         "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
         "ocr_types": ["auto", "handwritten", "printed"],
+        "engine": "PaddleOCR"
     })
 @app.route('/health', methods=['GET'])
     return jsonify({
         "status": "healthy",
         "models_loaded": models_loaded,
+        "service": "PaddleOCR Backend"
     })
 @app.route('/ocr', methods=['POST'])
         if request.is_json and 'image_base64' not in request.json:
             return jsonify({"error": "No 'image_base64' field found in JSON"}), 400
+        # Get OCR type preference
         if request.is_json:
             ocr_type = request.json.get('type', 'auto')
         else:
             ocr_type = request.form.get('type', 'auto')
         # Validate ocr_type
+        if ocr_type not in ['auto', 'handwritten', 'printed', 'paddle_handwritten', 'paddle_printed']:
             return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
         # Load image
             "text": results["final"],
             "type_used": ocr_type,
             "character_count": len(results["final"]),
+            "engine": "PaddleOCR",
+            "details": {
+                "printed_result": results.get("paddle_printed", ""),
+                "handwritten_result": results.get("paddle_handwritten", "")
+            } if ocr_type == "auto" else {}
         }
         return jsonify(response)
         ocr_type = request.form.get('type', 'auto')
         # Validate ocr_type
+        if ocr_type not in ['auto', 'handwritten', 'printed', 'paddle_handwritten', 'paddle_printed']:
             return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
         results = []
                     "filename": image_file.filename,
                     "text": ocr_results["final"],
                     "character_count": len(ocr_results["final"]),
                     "success": True
                 })
             except Exception as e:
             "successful": successful_count,
             "failed": len(results) - successful_count,
             "type_used": ocr_type,
+            "engine": "PaddleOCR"
         })
     except Exception as e:
 def models_info():
     """Get information about loaded models"""
     return jsonify({
+        "engine": "PaddleOCR",
         "models": {
+            "paddle_printed": {
+                "name": "PaddleOCR English (Printed)",
+                "description": "PaddleOCR model optimized for printed text recognition",
+                "loaded": paddle_ocr is not None,
+                "best_for": "Printed text, documents, signs, books"
             },
+            "paddle_handwritten": {
+                "name": "PaddleOCR English (Handwritten)",
+                "description": "PaddleOCR model optimized for handwritten text recognition",
+                "loaded": paddle_ocr_handwritten is not None,
+                "best_for": "Handwritten text, notes, forms"
             }
         },
         "supported_types": ["auto", "handwritten", "printed"],
         "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
         "cache_directory": temp_dir,
+        "paddle_cache": os.path.join(temp_dir, 'paddleocr_models'),
+        "gpu_available": False,  # Using CPU for Hugging Face free tier
         "models_loaded": models_loaded,
+        "memory_optimized": True
     })
 @app.route('/models/load', methods=['POST'])
     """Manually trigger model loading"""
     try:
         if models_loaded:
+            return jsonify({"message": "Models already loaded", "success": True})
         initialize_models()
+        return jsonify({"message": "PaddleOCR models loaded successfully", "success": True})
     except Exception as e:
         return jsonify({"error": str(e), "success": False}), 500
     }), 500
 if __name__ == '__main__':
+    logger.info("Starting PaddleOCR service in development mode...")
     # Run the app - models will load on first request
     port = int(os.environ.get('PORT', 5000))
     app.run(host='0.0.0.0', port=port, debug=False)
 else:
     # Running with gunicorn - just log startup, don't do anything else
+    logger.info("PaddleOCR service ready - models will load on first request")