Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
|
|
|
| 2 |
from flask import Flask, request, jsonify
|
| 3 |
from flask_cors import CORS
|
| 4 |
import base64
|
| 5 |
import io
|
| 6 |
-
import os
|
| 7 |
from PIL import Image
|
| 8 |
import logging
|
| 9 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|
@@ -42,11 +48,14 @@ def initialize_models():
|
|
| 42 |
|
| 43 |
# Set cache directories
|
| 44 |
cache_dir = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface')
|
| 45 |
-
easyocr_cache =
|
| 46 |
|
| 47 |
os.makedirs(cache_dir, exist_ok=True)
|
| 48 |
os.makedirs(easyocr_cache, exist_ok=True)
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
# Initialize TrOCR for handwritten text (Microsoft's model)
|
| 51 |
logger.info("Loading TrOCR model for handwritten text...")
|
| 52 |
trocr_processor = TrOCRProcessor.from_pretrained(
|
|
@@ -60,12 +69,18 @@ def initialize_models():
|
|
| 60 |
|
| 61 |
# Initialize EasyOCR for printed text with custom model directory
|
| 62 |
logger.info("Loading EasyOCR for printed text...")
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
models_loaded = True
|
| 71 |
logger.info("All models loaded successfully!")
|
|
@@ -352,6 +367,7 @@ def models_info():
|
|
| 352 |
"supported_types": ["auto", "handwritten", "printed"],
|
| 353 |
"supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
|
| 354 |
"cache_directory": os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface'),
|
|
|
|
| 355 |
"gpu_available": torch.cuda.is_available(),
|
| 356 |
"models_loaded": models_loaded
|
| 357 |
})
|
|
@@ -396,4 +412,4 @@ if __name__ == '__main__':
|
|
| 396 |
app.run(host='0.0.0.0', port=port, debug=False)
|
| 397 |
else:
|
| 398 |
# Running with gunicorn - just log startup, don't do anything else
|
| 399 |
-
logger.info("OCR service ready - models will load on first request")
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
# Set EasyOCR environment variables BEFORE importing anything else
|
| 5 |
+
os.environ['EASYOCR_MODULE_PATH'] = '/app/.cache/easyocr'
|
| 6 |
+
os.environ['HOME'] = '/app'
|
| 7 |
|
| 8 |
+
# Now import everything else
|
| 9 |
from flask import Flask, request, jsonify
|
| 10 |
from flask_cors import CORS
|
| 11 |
import base64
|
| 12 |
import io
|
|
|
|
| 13 |
from PIL import Image
|
| 14 |
import logging
|
| 15 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
|
|
|
| 48 |
|
| 49 |
# Set cache directories
|
| 50 |
cache_dir = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface')
|
| 51 |
+
easyocr_cache = '/app/.cache/easyocr'
|
| 52 |
|
| 53 |
os.makedirs(cache_dir, exist_ok=True)
|
| 54 |
os.makedirs(easyocr_cache, exist_ok=True)
|
| 55 |
|
| 56 |
+
# Make sure the EasyOCR cache directory is writable
|
| 57 |
+
os.chmod(easyocr_cache, 0o777)
|
| 58 |
+
|
| 59 |
# Initialize TrOCR for handwritten text (Microsoft's model)
|
| 60 |
logger.info("Loading TrOCR model for handwritten text...")
|
| 61 |
trocr_processor = TrOCRProcessor.from_pretrained(
|
|
|
|
| 69 |
|
| 70 |
# Initialize EasyOCR for printed text with custom model directory
|
| 71 |
logger.info("Loading EasyOCR for printed text...")
|
| 72 |
+
try:
|
| 73 |
+
easyocr_reader = easyocr.Reader(
|
| 74 |
+
['en'],
|
| 75 |
+
gpu=torch.cuda.is_available(),
|
| 76 |
+
model_storage_directory=easyocr_cache,
|
| 77 |
+
download_enabled=True
|
| 78 |
+
)
|
| 79 |
+
except Exception as e:
|
| 80 |
+
logger.warning(f"Failed to initialize EasyOCR with custom directory: {e}")
|
| 81 |
+
# Fallback: try without specifying directory
|
| 82 |
+
logger.info("Attempting EasyOCR initialization without custom directory...")
|
| 83 |
+
easyocr_reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
| 84 |
|
| 85 |
models_loaded = True
|
| 86 |
logger.info("All models loaded successfully!")
|
|
|
|
| 367 |
"supported_types": ["auto", "handwritten", "printed"],
|
| 368 |
"supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
|
| 369 |
"cache_directory": os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface'),
|
| 370 |
+
"easyocr_cache": os.environ.get('EASYOCR_MODULE_PATH', '/app/.cache/easyocr'),
|
| 371 |
"gpu_available": torch.cuda.is_available(),
|
| 372 |
"models_loaded": models_loaded
|
| 373 |
})
|
|
|
|
| 412 |
app.run(host='0.0.0.0', port=port, debug=False)
|
| 413 |
else:
|
| 414 |
# Running with gunicorn - just log startup, don't do anything else
|
| 415 |
+
logger.info("OCR service ready - models will load on first request")
|