mike23415 commited on
Commit
0c1061f
·
verified ·
1 Parent(s): ac78bc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -1,9 +1,15 @@
 
 
 
 
 
 
1
 
 
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
  import base64
5
  import io
6
- import os
7
  from PIL import Image
8
  import logging
9
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
@@ -42,11 +48,14 @@ def initialize_models():
42
 
43
  # Set cache directories
44
  cache_dir = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface')
45
- easyocr_cache = os.environ.get('EASYOCR_MODEL_PATH', '/app/.cache/easyocr')
46
 
47
  os.makedirs(cache_dir, exist_ok=True)
48
  os.makedirs(easyocr_cache, exist_ok=True)
49
 
 
 
 
50
  # Initialize TrOCR for handwritten text (Microsoft's model)
51
  logger.info("Loading TrOCR model for handwritten text...")
52
  trocr_processor = TrOCRProcessor.from_pretrained(
@@ -60,12 +69,18 @@ def initialize_models():
60
 
61
  # Initialize EasyOCR for printed text with custom model directory
62
  logger.info("Loading EasyOCR for printed text...")
63
- easyocr_reader = easyocr.Reader(
64
- ['en'],
65
- gpu=torch.cuda.is_available(),
66
- model_storage_directory=easyocr_cache,
67
- download_enabled=True
68
- )
 
 
 
 
 
 
69
 
70
  models_loaded = True
71
  logger.info("All models loaded successfully!")
@@ -352,6 +367,7 @@ def models_info():
352
  "supported_types": ["auto", "handwritten", "printed"],
353
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
354
  "cache_directory": os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface'),
 
355
  "gpu_available": torch.cuda.is_available(),
356
  "models_loaded": models_loaded
357
  })
@@ -396,4 +412,4 @@ if __name__ == '__main__':
396
  app.run(host='0.0.0.0', port=port, debug=False)
397
  else:
398
  # Running with gunicorn - just log startup, don't do anything else
399
- logger.info("OCR service ready - models will load on first request")
 
1
+ import os
2
+ import sys
3
+
4
+ # Set EasyOCR environment variables BEFORE importing anything else
5
+ os.environ['EASYOCR_MODULE_PATH'] = '/app/.cache/easyocr'
6
+ os.environ['HOME'] = '/app'
7
 
8
+ # Now import everything else
9
  from flask import Flask, request, jsonify
10
  from flask_cors import CORS
11
  import base64
12
  import io
 
13
  from PIL import Image
14
  import logging
15
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
48
 
49
  # Set cache directories
50
  cache_dir = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface')
51
+ easyocr_cache = '/app/.cache/easyocr'
52
 
53
  os.makedirs(cache_dir, exist_ok=True)
54
  os.makedirs(easyocr_cache, exist_ok=True)
55
 
56
+ # Make sure the EasyOCR cache directory is writable
57
+ os.chmod(easyocr_cache, 0o777)
58
+
59
  # Initialize TrOCR for handwritten text (Microsoft's model)
60
  logger.info("Loading TrOCR model for handwritten text...")
61
  trocr_processor = TrOCRProcessor.from_pretrained(
 
69
 
70
  # Initialize EasyOCR for printed text with custom model directory
71
  logger.info("Loading EasyOCR for printed text...")
72
+ try:
73
+ easyocr_reader = easyocr.Reader(
74
+ ['en'],
75
+ gpu=torch.cuda.is_available(),
76
+ model_storage_directory=easyocr_cache,
77
+ download_enabled=True
78
+ )
79
+ except Exception as e:
80
+ logger.warning(f"Failed to initialize EasyOCR with custom directory: {e}")
81
+ # Fallback: try without specifying directory
82
+ logger.info("Attempting EasyOCR initialization without custom directory...")
83
+ easyocr_reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
84
 
85
  models_loaded = True
86
  logger.info("All models loaded successfully!")
 
367
  "supported_types": ["auto", "handwritten", "printed"],
368
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
369
  "cache_directory": os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/huggingface'),
370
+ "easyocr_cache": os.environ.get('EASYOCR_MODULE_PATH', '/app/.cache/easyocr'),
371
  "gpu_available": torch.cuda.is_available(),
372
  "models_loaded": models_loaded
373
  })
 
412
  app.run(host='0.0.0.0', port=port, debug=False)
413
  else:
414
  # Running with gunicorn - just log startup, don't do anything else
415
+ logger.info("OCR service ready - models will load on first request")