mike23415 commited on
Commit
9b18f9e
·
verified ·
1 Parent(s): bb720a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -110
app.py CHANGED
@@ -6,9 +6,9 @@ import tempfile
6
  temp_dir = tempfile.mkdtemp()
7
  print(f"Created temporary directory: {temp_dir}")
8
 
9
- # Set environment variables to use the temp directory for caching
10
- os.environ['HF_HOME'] = temp_dir
11
- os.environ['TRANSFORMERS_CACHE'] = temp_dir
12
  os.environ['HOME'] = temp_dir
13
 
14
  # Now import everything else
@@ -18,15 +18,10 @@ import base64
18
  import io
19
  from PIL import Image
20
  import logging
21
- import torch
22
  import numpy as np
23
  import threading
24
-
25
- # Surya OCR imports
26
- from surya.ocr import run_ocr
27
- from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
28
- from surya.model.recognition.model import load_model as load_rec_model
29
- from surya.model.recognition.processor import load_processor as load_rec_processor
30
 
31
  # Set up logging
32
  logging.basicConfig(level=logging.INFO)
@@ -36,16 +31,14 @@ app = Flask(__name__)
36
  CORS(app)
37
 
38
  # Global variables for models
39
- det_model = None
40
- det_processor = None
41
- rec_model = None
42
- rec_processor = None
43
  models_loaded = False
44
  loading_lock = threading.Lock()
45
 
46
  def initialize_models():
47
- """Initialize Surya OCR models"""
48
- global det_model, det_processor, rec_model, rec_processor, models_loaded
49
 
50
  if models_loaded:
51
  return
@@ -55,25 +48,73 @@ def initialize_models():
55
  return
56
 
57
  try:
58
- logger.info("Starting Surya OCR model initialization...")
 
 
 
59
 
60
- # Load detection model and processor
61
- logger.info("Loading Surya detection model...")
62
- det_processor = load_det_processor()
63
- det_model = load_det_model()
64
- logger.info("Surya detection model loaded successfully!")
65
 
66
- # Load recognition model and processor
67
- logger.info("Loading Surya recognition model...")
68
- rec_model = load_rec_model()
69
- rec_processor = load_rec_processor()
70
- logger.info("Surya recognition model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  models_loaded = True
73
- logger.info("Surya OCR model initialization completed!")
74
 
75
  except Exception as e:
76
- logger.error(f"Error loading Surya models: {str(e)}")
77
  models_loaded = False
78
  raise e
79
 
@@ -82,75 +123,121 @@ def ensure_models_loaded():
82
  if not models_loaded:
83
  initialize_models()
84
 
85
- def preprocess_image(image):
86
- """Preprocess image for better OCR results"""
87
  # Convert to RGB if needed
88
  if image.mode != 'RGB':
89
  image = image.convert('RGB')
90
 
91
- # Resize if image is too large (Surya can handle larger images but let's be safe for free tier)
92
- max_size = 1536 # Surya can handle larger images than previous models
93
  if max(image.size) > max_size:
94
  ratio = max_size / max(image.size)
95
  new_size = tuple(int(dim * ratio) for dim in image.size)
96
  image = image.resize(new_size, Image.Resampling.LANCZOS)
97
 
98
- return image
 
 
 
 
 
 
 
99
 
100
- def extract_text_surya(image, ocr_type="auto"):
101
- """Extract text using Surya OCR"""
102
  try:
103
  ensure_models_loaded()
104
- if not all([det_model, det_processor, rec_model, rec_processor]):
105
- logger.warning("Surya models not available")
106
- return {"text": "", "confidence": 0, "bbox_count": 0}
107
 
108
  # Preprocess image
109
- image = preprocess_image(image)
110
-
111
- # Run Surya OCR
112
- langs = ["en"] # English language
113
-
114
- # Run OCR with Surya
115
- predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
116
 
117
- if not predictions or len(predictions) == 0:
118
- return {"text": "", "confidence": 0, "bbox_count": 0}
119
 
120
- # Extract text from predictions
121
- prediction = predictions[0]
122
- text_results = []
123
- total_confidence = 0
124
- bbox_count = 0
 
 
 
 
125
 
126
- for text_line in prediction.text_lines:
127
- text_results.append(text_line.text)
128
- total_confidence += text_line.confidence if hasattr(text_line, 'confidence') else 1.0
129
- bbox_count += 1
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- extracted_text = ' '.join(text_results).strip()
132
- avg_confidence = total_confidence / bbox_count if bbox_count > 0 else 0
133
 
134
- return {
135
- "text": extracted_text,
136
- "confidence": avg_confidence,
137
- "bbox_count": bbox_count
138
- }
 
 
 
 
139
 
 
140
  except Exception as e:
141
- logger.error(f"Surya OCR error: {str(e)}")
142
- return {"text": "", "confidence": 0, "bbox_count": 0}
143
 
144
  def process_image_ocr(image, ocr_type="auto"):
145
- """Process image with Surya OCR"""
146
  results = {}
147
 
148
- # Surya handles both printed and handwritten text well
149
- surya_result = extract_text_surya(image, ocr_type)
150
- results["surya"] = surya_result["text"]
151
- results["confidence"] = surya_result["confidence"]
152
- results["bbox_count"] = surya_result["bbox_count"]
153
- results["final"] = surya_result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  return results
156
 
@@ -158,7 +245,7 @@ def process_image_ocr(image, ocr_type="auto"):
158
  def home():
159
  """Root endpoint"""
160
  return jsonify({
161
- "service": "Surya OCR Backend",
162
  "status": "running",
163
  "version": "2.0.0",
164
  "models_loaded": models_loaded,
@@ -170,7 +257,7 @@ def home():
170
  },
171
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
172
  "ocr_types": ["auto", "handwritten", "printed"],
173
- "ocr_engine": "Surya OCR"
174
  })
175
 
176
  @app.route('/health', methods=['GET'])
@@ -179,8 +266,7 @@ def health_check():
179
  return jsonify({
180
  "status": "healthy",
181
  "models_loaded": models_loaded,
182
- "service": "Surya OCR Backend",
183
- "cache_dir": temp_dir
184
  })
185
 
186
  @app.route('/ocr', methods=['POST'])
@@ -197,14 +283,14 @@ def ocr_endpoint():
197
  if request.is_json and 'image_base64' not in request.json:
198
  return jsonify({"error": "No 'image_base64' field found in JSON"}), 400
199
 
200
- # Get OCR type preference (Surya handles all types well)
201
  if request.is_json:
202
  ocr_type = request.json.get('type', 'auto')
203
  else:
204
  ocr_type = request.form.get('type', 'auto')
205
 
206
  # Validate ocr_type
207
- if ocr_type not in ['auto', 'handwritten', 'printed']:
208
  return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
209
 
210
  # Load image
@@ -236,9 +322,11 @@ def ocr_endpoint():
236
  "text": results["final"],
237
  "type_used": ocr_type,
238
  "character_count": len(results["final"]),
239
- "confidence": results.get("confidence", 0),
240
- "bbox_count": results.get("bbox_count", 0),
241
- "engine": "Surya OCR"
 
 
242
  }
243
 
244
  return jsonify(response)
@@ -264,7 +352,7 @@ def batch_ocr_endpoint():
264
  ocr_type = request.form.get('type', 'auto')
265
 
266
  # Validate ocr_type
267
- if ocr_type not in ['auto', 'handwritten', 'printed']:
268
  return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
269
 
270
  results = []
@@ -287,8 +375,6 @@ def batch_ocr_endpoint():
287
  "filename": image_file.filename,
288
  "text": ocr_results["final"],
289
  "character_count": len(ocr_results["final"]),
290
- "confidence": ocr_results.get("confidence", 0),
291
- "bbox_count": ocr_results.get("bbox_count", 0),
292
  "success": True
293
  })
294
  except Exception as e:
@@ -308,7 +394,7 @@ def batch_ocr_endpoint():
308
  "successful": successful_count,
309
  "failed": len(results) - successful_count,
310
  "type_used": ocr_type,
311
- "engine": "Surya OCR"
312
  })
313
 
314
  except Exception as e:
@@ -319,34 +405,28 @@ def batch_ocr_endpoint():
319
  def models_info():
320
  """Get information about loaded models"""
321
  return jsonify({
 
322
  "models": {
323
- "surya_detection": {
324
- "name": "Surya Detection Model",
325
- "description": "Surya's SegFormer-based text detection model",
326
- "loaded": det_model is not None and det_processor is not None,
327
- "best_for": "Text detection in images"
328
  },
329
- "surya_recognition": {
330
- "name": "Surya Recognition Model",
331
- "description": "Surya's transformer-based text recognition model",
332
- "loaded": rec_model is not None and rec_processor is not None,
333
- "best_for": "Both printed and handwritten text recognition"
334
  }
335
  },
336
  "supported_types": ["auto", "handwritten", "printed"],
337
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
338
  "cache_directory": temp_dir,
339
- "hf_cache": temp_dir,
340
- "gpu_available": torch.cuda.is_available(),
341
  "models_loaded": models_loaded,
342
- "engine": "Surya OCR",
343
- "engine_features": [
344
- "High accuracy on both printed and handwritten text",
345
- "Multilingual support",
346
- "Modern transformer architecture",
347
- "Efficient processing",
348
- "Works well on Hugging Face free tier"
349
- ]
350
  })
351
 
352
  @app.route('/models/load', methods=['POST'])
@@ -354,10 +434,10 @@ def load_models():
354
  """Manually trigger model loading"""
355
  try:
356
  if models_loaded:
357
- return jsonify({"message": "Surya models already loaded", "success": True})
358
 
359
  initialize_models()
360
- return jsonify({"message": "Surya models loaded successfully", "success": True})
361
  except Exception as e:
362
  return jsonify({"error": str(e), "success": False}), 500
363
 
@@ -383,10 +463,10 @@ def internal_error(error):
383
  }), 500
384
 
385
  if __name__ == '__main__':
386
- logger.info("Starting Surya OCR service in development mode...")
387
  # Run the app - models will load on first request
388
  port = int(os.environ.get('PORT', 5000))
389
  app.run(host='0.0.0.0', port=port, debug=False)
390
  else:
391
  # Running with gunicorn - just log startup, don't do anything else
392
- logger.info("Surya OCR service ready - models will load on first request")
 
6
  temp_dir = tempfile.mkdtemp()
7
  print(f"Created temporary directory: {temp_dir}")
8
 
9
+ # Set PaddleOCR environment variables to use the temp directory
10
+ os.environ['PPOCR_MODEL_PATH'] = temp_dir
11
+ os.environ['PADDLEOCR_HOME'] = temp_dir
12
  os.environ['HOME'] = temp_dir
13
 
14
  # Now import everything else
 
18
  import io
19
  from PIL import Image
20
  import logging
 
21
  import numpy as np
22
  import threading
23
+ import shutil
24
+ import cv2
 
 
 
 
25
 
26
  # Set up logging
27
  logging.basicConfig(level=logging.INFO)
 
31
  CORS(app)
32
 
33
  # Global variables for models
34
+ paddle_ocr = None
35
+ paddle_ocr_handwritten = None
 
 
36
  models_loaded = False
37
  loading_lock = threading.Lock()
38
 
39
  def initialize_models():
40
+ """Initialize PaddleOCR models"""
41
+ global paddle_ocr, paddle_ocr_handwritten, models_loaded
42
 
43
  if models_loaded:
44
  return
 
48
  return
49
 
50
  try:
51
+ logger.info("Starting PaddleOCR model initialization...")
52
+
53
+ # Import PaddleOCR after setting environment variables
54
+ from paddleocr import PaddleOCR
55
 
56
+ # Create model cache directory
57
+ paddle_cache = os.path.join(temp_dir, 'paddleocr_models')
58
+ os.makedirs(paddle_cache, exist_ok=True)
 
 
59
 
60
+ # Initialize PaddleOCR for printed text
61
+ logger.info("Loading PaddleOCR model for printed text...")
62
+ try:
63
+ paddle_ocr = PaddleOCR(
64
+ use_angle_cls=True,
65
+ lang='en',
66
+ use_gpu=False, # Set to False for CPU-only (Hugging Face free tier)
67
+ show_log=False,
68
+ det_model_dir=os.path.join(paddle_cache, 'det'),
69
+ rec_model_dir=os.path.join(paddle_cache, 'rec'),
70
+ cls_model_dir=os.path.join(paddle_cache, 'cls'),
71
+ det_limit_side_len=960, # Reduce for memory efficiency
72
+ det_limit_type='min',
73
+ rec_batch_num=6, # Reduce batch size for memory efficiency
74
+ )
75
+ logger.info("PaddleOCR for printed text loaded successfully!")
76
+ except Exception as e:
77
+ logger.warning(f"Failed to load PaddleOCR for printed text: {e}")
78
+ # Try with minimal configuration
79
+ try:
80
+ paddle_ocr = PaddleOCR(
81
+ use_angle_cls=False,
82
+ lang='en',
83
+ use_gpu=False,
84
+ show_log=False
85
+ )
86
+ logger.info("PaddleOCR loaded with minimal configuration!")
87
+ except Exception as e2:
88
+ logger.error(f"Failed to load PaddleOCR: {e2}")
89
+
90
+ # Initialize PaddleOCR for handwritten text (using different configuration)
91
+ logger.info("Loading PaddleOCR model for handwritten text...")
92
+ try:
93
+ paddle_ocr_handwritten = PaddleOCR(
94
+ use_angle_cls=True,
95
+ lang='en',
96
+ use_gpu=False,
97
+ show_log=False,
98
+ det_model_dir=os.path.join(paddle_cache, 'det_hand'),
99
+ rec_model_dir=os.path.join(paddle_cache, 'rec_hand'),
100
+ cls_model_dir=os.path.join(paddle_cache, 'cls_hand'),
101
+ det_limit_side_len=736, # Smaller for handwritten text
102
+ det_limit_type='min',
103
+ rec_batch_num=4,
104
+ # Use Chinese model which often works better for handwritten text
105
+ rec_algorithm='CRNN'
106
+ )
107
+ logger.info("PaddleOCR for handwritten text loaded successfully!")
108
+ except Exception as e:
109
+ logger.warning(f"Failed to load separate handwritten model: {e}")
110
+ # Use the same model for both
111
+ paddle_ocr_handwritten = paddle_ocr
112
 
113
  models_loaded = True
114
+ logger.info("PaddleOCR model initialization completed!")
115
 
116
  except Exception as e:
117
+ logger.error(f"Error loading PaddleOCR models: {str(e)}")
118
  models_loaded = False
119
  raise e
120
 
 
123
  if not models_loaded:
124
  initialize_models()
125
 
126
+ def preprocess_image_for_paddle(image):
127
+ """Preprocess image for better PaddleOCR results"""
128
  # Convert to RGB if needed
129
  if image.mode != 'RGB':
130
  image = image.convert('RGB')
131
 
132
+ # Resize if image is too large (for memory efficiency on free tier)
133
+ max_size = 1024
134
  if max(image.size) > max_size:
135
  ratio = max_size / max(image.size)
136
  new_size = tuple(int(dim * ratio) for dim in image.size)
137
  image = image.resize(new_size, Image.Resampling.LANCZOS)
138
 
139
+ # Convert PIL to numpy array for PaddleOCR
140
+ img_array = np.array(image)
141
+
142
+ # Convert RGB to BGR for OpenCV/PaddleOCR
143
+ if len(img_array.shape) == 3:
144
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
145
+
146
+ return img_array
147
 
148
+ def extract_text_paddle_printed(image):
149
+ """Extract text using PaddleOCR (optimized for printed text)"""
150
  try:
151
  ensure_models_loaded()
152
+ if not paddle_ocr:
153
+ logger.warning("PaddleOCR model not available")
154
+ return ""
155
 
156
  # Preprocess image
157
+ img_array = preprocess_image_for_paddle(image)
 
 
 
 
 
 
158
 
159
+ # Extract text
160
+ results = paddle_ocr.ocr(img_array, cls=True)
161
 
162
+ # Parse results
163
+ extracted_text = []
164
+ if results and results[0]:
165
+ for line in results[0]:
166
+ if line and len(line) >= 2:
167
+ text = line[1][0] # text content
168
+ confidence = line[1][1] # confidence score
169
+ if confidence > 0.5: # Filter low confidence results
170
+ extracted_text.append(text)
171
 
172
+ return ' '.join(extracted_text).strip()
173
+ except Exception as e:
174
+ logger.error(f"PaddleOCR printed text error: {str(e)}")
175
+ return ""
176
+
177
+ def extract_text_paddle_handwritten(image):
178
+ """Extract text using PaddleOCR (optimized for handwritten text)"""
179
+ try:
180
+ ensure_models_loaded()
181
+ if not paddle_ocr_handwritten:
182
+ logger.warning("PaddleOCR handwritten model not available")
183
+ return ""
184
+
185
+ # Preprocess image
186
+ img_array = preprocess_image_for_paddle(image)
187
 
188
+ # Extract text with different settings for handwritten text
189
+ results = paddle_ocr_handwritten.ocr(img_array, cls=True)
190
 
191
+ # Parse results with lower confidence threshold for handwritten text
192
+ extracted_text = []
193
+ if results and results[0]:
194
+ for line in results[0]:
195
+ if line and len(line) >= 2:
196
+ text = line[1][0] # text content
197
+ confidence = line[1][1] # confidence score
198
+ if confidence > 0.3: # Lower threshold for handwritten text
199
+ extracted_text.append(text)
200
 
201
+ return ' '.join(extracted_text).strip()
202
  except Exception as e:
203
+ logger.error(f"PaddleOCR handwritten text error: {str(e)}")
204
+ return ""
205
 
206
  def process_image_ocr(image, ocr_type="auto"):
207
+ """Process image with specified OCR method"""
208
  results = {}
209
 
210
+ if ocr_type in ["auto", "handwritten", "paddle_handwritten"]:
211
+ handwritten_text = extract_text_paddle_handwritten(image)
212
+ results["paddle_handwritten"] = handwritten_text
213
+
214
+ if ocr_type in ["auto", "printed", "paddle_printed"]:
215
+ printed_text = extract_text_paddle_printed(image)
216
+ results["paddle_printed"] = printed_text
217
+
218
+ # For auto mode, return the longer result or combine both
219
+ if ocr_type == "auto":
220
+ handwritten_len = len(results.get("paddle_handwritten", ""))
221
+ printed_len = len(results.get("paddle_printed", ""))
222
+
223
+ if handwritten_len > 0 and printed_len > 0:
224
+ # If both have results, combine them intelligently
225
+ if abs(handwritten_len - printed_len) / max(handwritten_len, printed_len) < 0.3:
226
+ # If lengths are similar, prefer printed text model
227
+ results["final"] = results["paddle_printed"]
228
+ else:
229
+ # Use the longer result
230
+ results["final"] = results["paddle_handwritten"] if handwritten_len > printed_len else results["paddle_printed"]
231
+ elif handwritten_len > 0:
232
+ results["final"] = results["paddle_handwritten"]
233
+ elif printed_len > 0:
234
+ results["final"] = results["paddle_printed"]
235
+ else:
236
+ results["final"] = ""
237
+ else:
238
+ # Return the specific model result
239
+ model_key = ocr_type.replace("handwritten", "paddle_handwritten").replace("printed", "paddle_printed")
240
+ results["final"] = results.get(model_key, "")
241
 
242
  return results
243
 
 
245
  def home():
246
  """Root endpoint"""
247
  return jsonify({
248
+ "service": "PaddleOCR Backend",
249
  "status": "running",
250
  "version": "2.0.0",
251
  "models_loaded": models_loaded,
 
257
  },
258
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
259
  "ocr_types": ["auto", "handwritten", "printed"],
260
+ "engine": "PaddleOCR"
261
  })
262
 
263
  @app.route('/health', methods=['GET'])
 
266
  return jsonify({
267
  "status": "healthy",
268
  "models_loaded": models_loaded,
269
+ "service": "PaddleOCR Backend"
 
270
  })
271
 
272
  @app.route('/ocr', methods=['POST'])
 
283
  if request.is_json and 'image_base64' not in request.json:
284
  return jsonify({"error": "No 'image_base64' field found in JSON"}), 400
285
 
286
+ # Get OCR type preference
287
  if request.is_json:
288
  ocr_type = request.json.get('type', 'auto')
289
  else:
290
  ocr_type = request.form.get('type', 'auto')
291
 
292
  # Validate ocr_type
293
+ if ocr_type not in ['auto', 'handwritten', 'printed', 'paddle_handwritten', 'paddle_printed']:
294
  return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
295
 
296
  # Load image
 
322
  "text": results["final"],
323
  "type_used": ocr_type,
324
  "character_count": len(results["final"]),
325
+ "engine": "PaddleOCR",
326
+ "details": {
327
+ "printed_result": results.get("paddle_printed", ""),
328
+ "handwritten_result": results.get("paddle_handwritten", "")
329
+ } if ocr_type == "auto" else {}
330
  }
331
 
332
  return jsonify(response)
 
352
  ocr_type = request.form.get('type', 'auto')
353
 
354
  # Validate ocr_type
355
+ if ocr_type not in ['auto', 'handwritten', 'printed', 'paddle_handwritten', 'paddle_printed']:
356
  return jsonify({"error": "Invalid OCR type. Use: auto, handwritten, printed"}), 400
357
 
358
  results = []
 
375
  "filename": image_file.filename,
376
  "text": ocr_results["final"],
377
  "character_count": len(ocr_results["final"]),
 
 
378
  "success": True
379
  })
380
  except Exception as e:
 
394
  "successful": successful_count,
395
  "failed": len(results) - successful_count,
396
  "type_used": ocr_type,
397
+ "engine": "PaddleOCR"
398
  })
399
 
400
  except Exception as e:
 
405
  def models_info():
406
  """Get information about loaded models"""
407
  return jsonify({
408
+ "engine": "PaddleOCR",
409
  "models": {
410
+ "paddle_printed": {
411
+ "name": "PaddleOCR English (Printed)",
412
+ "description": "PaddleOCR model optimized for printed text recognition",
413
+ "loaded": paddle_ocr is not None,
414
+ "best_for": "Printed text, documents, signs, books"
415
  },
416
+ "paddle_handwritten": {
417
+ "name": "PaddleOCR English (Handwritten)",
418
+ "description": "PaddleOCR model optimized for handwritten text recognition",
419
+ "loaded": paddle_ocr_handwritten is not None,
420
+ "best_for": "Handwritten text, notes, forms"
421
  }
422
  },
423
  "supported_types": ["auto", "handwritten", "printed"],
424
  "supported_formats": ["PNG", "JPEG", "JPG", "BMP", "TIFF"],
425
  "cache_directory": temp_dir,
426
+ "paddle_cache": os.path.join(temp_dir, 'paddleocr_models'),
427
+ "gpu_available": False, # Using CPU for Hugging Face free tier
428
  "models_loaded": models_loaded,
429
+ "memory_optimized": True
 
 
 
 
 
 
 
430
  })
431
 
432
  @app.route('/models/load', methods=['POST'])
 
434
  """Manually trigger model loading"""
435
  try:
436
  if models_loaded:
437
+ return jsonify({"message": "Models already loaded", "success": True})
438
 
439
  initialize_models()
440
+ return jsonify({"message": "PaddleOCR models loaded successfully", "success": True})
441
  except Exception as e:
442
  return jsonify({"error": str(e), "success": False}), 500
443
 
 
463
  }), 500
464
 
465
  if __name__ == '__main__':
466
+ logger.info("Starting PaddleOCR service in development mode...")
467
  # Run the app - models will load on first request
468
  port = int(os.environ.get('PORT', 5000))
469
  app.run(host='0.0.0.0', port=port, debug=False)
470
  else:
471
  # Running with gunicorn - just log startup, don't do anything else
472
+ logger.info("PaddleOCR service ready - models will load on first request")