aniket47 commited on
Commit
86e7db6
Β·
0 Parent(s):

Initial FastAPI backend for HF Spaces

Browse files
README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text to 3D Backend
3
+ emoji: 🎨
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ app_port: 7860
10
+ ---
11
+
12
+ # Text to 3D Model Converter - Backend
13
+
14
+ This is the backend API for the Text-to-3D Model Converter application. It provides FastAPI endpoints for:
15
+
16
+ - **Text-to-3D**: Generate 3D models from text descriptions
17
+ - **Image-to-3D**: Convert uploaded images to 3D models
18
+ - **Progress Tracking**: Real-time job progress monitoring
19
+
20
+ ## Features
21
+
22
+ - 🎨 **Direct Model Loading**: Stable Diffusion and DPT models loaded locally for fast inference
23
+ - ⚑ **Async Processing**: Background job processing with progress tracking
24
+ - πŸ”„ **Job Management**: Cancel jobs, track progress, and get results
25
+ - ☁️ **Cloud Storage**: Automatic upload to Cloudinary for results
26
+ - πŸš€ **FastAPI**: High-performance API with automatic docs
27
+
28
+ ## API Endpoints
29
+
30
+ - `GET /` - Health check and model status
31
+ - `POST /generate` - Generate 3D model from text prompt
32
+ - `POST /upload` - Convert uploaded image to 3D model
33
+ - `GET /progress/{job_id}` - Get job progress
34
+ - `POST /cancel` - Cancel a running job
35
+
36
+ ## Models Used
37
+
38
+ - **Image Generation**: Stable Diffusion v1.5 (runwayml/stable-diffusion-v1-5)
39
+ - **Depth Estimation**: DPT (Intel/dpt-beit-large-512)
40
+ - **3D Reconstruction**: Open3D Poisson surface reconstruction
41
+
42
+ ## Environment Variables
43
+
44
+ Set these in the Space settings:
45
+
46
+ ```
47
+ CLOUDINARY_CLOUD_NAME=your_cloud_name
48
+ CLOUDINARY_API_KEY=your_api_key
49
+ CLOUDINARY_API_SECRET=your_api_secret
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ The API is designed to work with the frontend application deployed on Render. CORS is configured to allow requests from the frontend domain.
55
+
56
+ ### Example Request
57
+
58
+ ```python
59
+ import requests
60
+
61
+ # Generate 3D model from text
62
+ response = requests.post(
63
+ "https://your-space-url/generate",
64
+ json={"prompt": "a red sports car"}
65
+ )
66
+
67
+ job_id = response.json()["job_id"]
68
+
69
+ # Check progress
70
+ progress = requests.get(f"https://your-space-url/progress/{job_id}")
71
+ print(progress.json())
72
+ ```
73
+
74
+ ## Development
75
+
76
+ To run locally:
77
+
78
+ ```bash
79
+ pip install -r requirements.txt
80
+ python app.py
81
+ ```
82
+
83
+ The API will be available at `http://localhost:7860`
app.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Backend for Text-to-3D Model Converter
3
+ Deployed on Hugging Face Spaces with direct model loading
4
+ """
5
+
6
+ import os
7
+ import logging
8
+ import time
9
+ import uuid
10
+ import asyncio
11
+ from typing import Optional
12
+ from contextlib import asynccontextmanager
13
+
14
+ import uvicorn
15
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from fastapi.responses import JSONResponse
18
+ from pydantic import BaseModel
19
+
20
+ from models.depth_processor import DepthProcessor
21
+ from models.image_generator import ImageGenerator
22
+ from utils.job_manager import JobManager
23
+ from utils.cloudinary_client import CloudinaryClient
24
+
25
+ # Configure logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Global variables for models
30
+ depth_processor = None
31
+ image_generator = None
32
+ job_manager = None
33
+ cloudinary_client = None
34
+
35
+ @asynccontextmanager
36
+ async def lifespan(app: FastAPI):
37
+ """Initialize models on startup"""
38
+ global depth_processor, image_generator, job_manager, cloudinary_client
39
+
40
+ logger.info("πŸš€ Starting Text-to-3D Backend...")
41
+
42
+ # Initialize utilities
43
+ job_manager = JobManager()
44
+ cloudinary_client = CloudinaryClient()
45
+
46
+ # Initialize models
47
+ logger.info("πŸ“¦ Loading AI models...")
48
+ try:
49
+ # Initialize depth processor
50
+ depth_processor = DepthProcessor()
51
+ await asyncio.to_thread(depth_processor.load_model)
52
+ logger.info("βœ… Depth estimation model loaded")
53
+
54
+ # Initialize image generator
55
+ image_generator = ImageGenerator()
56
+ await asyncio.to_thread(image_generator.load_model)
57
+ logger.info("βœ… Image generation model loaded")
58
+
59
+ logger.info("πŸŽ‰ All models loaded successfully!")
60
+
61
+ except Exception as e:
62
+ logger.error(f"❌ Failed to load models: {str(e)}")
63
+ raise e
64
+
65
+ yield
66
+
67
+ # Cleanup on shutdown
68
+ logger.info("πŸ”„ Shutting down...")
69
+
70
+ # Initialize FastAPI app
71
+ app = FastAPI(
72
+ title="Text-to-3D Backend",
73
+ description="Convert text prompts and images to 3D models",
74
+ version="1.0.0",
75
+ lifespan=lifespan
76
+ )
77
+
78
+ # Configure CORS
79
+ app.add_middleware(
80
+ CORSMiddleware,
81
+ allow_origins=[
82
+ "http://localhost:3000", # Local development
83
+ "https://*.render.com", # Render deployment
84
+ "*" # Allow all for now, restrict in production
85
+ ],
86
+ allow_credentials=True,
87
+ allow_methods=["*"],
88
+ allow_headers=["*"],
89
+ )
90
+
91
+ # Request/Response models
92
+ class GenerateRequest(BaseModel):
93
+ prompt: str
94
+ user_id: Optional[str] = None
95
+
96
+ class GenerateResponse(BaseModel):
97
+ success: bool
98
+ job_id: str
99
+ image_url: Optional[str] = None
100
+ model_url: Optional[str] = None
101
+ depth_map_url: Optional[str] = None
102
+ error: Optional[str] = None
103
+
104
+ class ProgressResponse(BaseModel):
105
+ stage: str
106
+ progress: int
107
+ message: str
108
+ timestamp: Optional[float] = None
109
+
110
+ @app.get("/")
111
+ async def root():
112
+ """Health check endpoint"""
113
+ return {
114
+ "status": "Text-to-3D Backend is running! πŸš€",
115
+ "version": "1.0.0",
116
+ "models_loaded": {
117
+ "depth_processor": depth_processor is not None,
118
+ "image_generator": image_generator is not None
119
+ },
120
+ "gpu_available": depth_processor.device.type == "cuda" if depth_processor else False
121
+ }
122
+
123
+ @app.get("/health")
124
+ async def health_check():
125
+ """Detailed health check"""
126
+ return {
127
+ "status": "healthy",
128
+ "models": {
129
+ "depth_estimation": "loaded" if depth_processor else "not_loaded",
130
+ "image_generation": "loaded" if image_generator else "not_loaded"
131
+ },
132
+ "device": str(depth_processor.device) if depth_processor else "unknown",
133
+ "active_jobs": job_manager.get_active_job_count() if job_manager else 0
134
+ }
135
+
136
+ @app.post("/generate", response_model=GenerateResponse)
137
+ async def generate_from_text(
138
+ request: GenerateRequest,
139
+ background_tasks: BackgroundTasks
140
+ ):
141
+ """Generate 3D model from text prompt"""
142
+ try:
143
+ if not request.prompt.strip():
144
+ raise HTTPException(status_code=400, detail="Prompt cannot be empty")
145
+
146
+ # Create job ID
147
+ job_id = str(uuid.uuid4())
148
+ job_manager.register_job(job_id)
149
+
150
+ logger.info(f"🎨 Starting text-to-3D generation: '{request.prompt}' (Job: {job_id})")
151
+
152
+ # Start background processing
153
+ background_tasks.add_task(
154
+ process_text_to_3d,
155
+ job_id,
156
+ request.prompt,
157
+ request.user_id
158
+ )
159
+
160
+ return GenerateResponse(
161
+ success=True,
162
+ job_id=job_id,
163
+ message="Generation started"
164
+ )
165
+
166
+ except Exception as e:
167
+ logger.error(f"❌ Error in generate endpoint: {str(e)}")
168
+ raise HTTPException(status_code=500, detail=str(e))
169
+
170
+ @app.post("/upload")
171
+ async def upload_image(
172
+ file: UploadFile = File(...),
173
+ background_tasks: BackgroundTasks = None,
174
+ user_id: Optional[str] = None
175
+ ):
176
+ """Convert uploaded image to 3D model"""
177
+ try:
178
+ # Validate file type
179
+ if not file.content_type.startswith('image/'):
180
+ raise HTTPException(status_code=400, detail="File must be an image")
181
+
182
+ # Create job ID
183
+ job_id = str(uuid.uuid4())
184
+ job_manager.register_job(job_id)
185
+
186
+ logger.info(f"πŸ“€ Processing uploaded image: {file.filename} (Job: {job_id})")
187
+
188
+ # Read file content
189
+ file_content = await file.read()
190
+
191
+ # Start background processing
192
+ background_tasks.add_task(
193
+ process_upload_to_3d,
194
+ job_id,
195
+ file_content,
196
+ file.filename,
197
+ user_id
198
+ )
199
+
200
+ return {
201
+ "success": True,
202
+ "job_id": job_id,
203
+ "message": "Upload processing started"
204
+ }
205
+
206
+ except Exception as e:
207
+ logger.error(f"❌ Error in upload endpoint: {str(e)}")
208
+ raise HTTPException(status_code=500, detail=str(e))
209
+
210
+ @app.get("/progress/{job_id}", response_model=ProgressResponse)
211
+ async def get_progress(job_id: str):
212
+ """Get job progress"""
213
+ try:
214
+ progress = job_manager.get_job_progress(job_id)
215
+ if not progress:
216
+ raise HTTPException(status_code=404, detail="Job not found")
217
+
218
+ return ProgressResponse(**progress)
219
+
220
+ except Exception as e:
221
+ logger.error(f"❌ Error getting progress: {str(e)}")
222
+ raise HTTPException(status_code=500, detail=str(e))
223
+
224
+ @app.post("/cancel")
225
+ async def cancel_job(job_id: str):
226
+ """Cancel a running job"""
227
+ try:
228
+ success = job_manager.cancel_job(job_id)
229
+ if success:
230
+ return {"success": True, "message": f"Job {job_id} cancelled"}
231
+ else:
232
+ raise HTTPException(status_code=404, detail="Job not found")
233
+
234
+ except Exception as e:
235
+ logger.error(f"❌ Error cancelling job: {str(e)}")
236
+ raise HTTPException(status_code=500, detail=str(e))
237
+
238
+ async def process_text_to_3d(job_id: str, prompt: str, user_id: Optional[str]):
239
+ """Background task to process text to 3D"""
240
+ try:
241
+ # Update progress
242
+ job_manager.update_job_progress(job_id, "generating_image", 10, "Generating image from text...")
243
+
244
+ # Generate image from text
245
+ image_result = await asyncio.to_thread(
246
+ image_generator.generate_image,
247
+ prompt
248
+ )
249
+
250
+ if job_manager.is_job_cancelled(job_id):
251
+ return
252
+
253
+ job_manager.update_job_progress(job_id, "uploading_image", 40, "Uploading generated image...")
254
+
255
+ # Upload image to Cloudinary
256
+ image_url = await asyncio.to_thread(
257
+ cloudinary_client.upload_image_from_bytes,
258
+ image_result['image_bytes'],
259
+ f"generated_{job_id}"
260
+ )
261
+
262
+ if job_manager.is_job_cancelled(job_id):
263
+ return
264
+
265
+ job_manager.update_job_progress(job_id, "creating_depth", 60, "Creating depth map...")
266
+
267
+ # Generate depth map and 3D model
268
+ depth_result = await asyncio.to_thread(
269
+ depth_processor.process_image_to_3d,
270
+ image_result['image_pil'],
271
+ job_id
272
+ )
273
+
274
+ if job_manager.is_job_cancelled(job_id):
275
+ return
276
+
277
+ job_manager.update_job_progress(job_id, "uploading_results", 90, "Uploading 3D model...")
278
+
279
+ # Upload results
280
+ model_url = await asyncio.to_thread(
281
+ cloudinary_client.upload_file,
282
+ depth_result['obj_path'],
283
+ f"model_{job_id}.obj"
284
+ )
285
+
286
+ depth_map_url = await asyncio.to_thread(
287
+ cloudinary_client.upload_image_from_path,
288
+ depth_result['depth_map_path'],
289
+ f"depth_{job_id}"
290
+ )
291
+
292
+ # Complete job
293
+ job_manager.complete_job(job_id, {
294
+ "image_url": image_url,
295
+ "model_url": model_url,
296
+ "depth_map_url": depth_map_url
297
+ })
298
+
299
+ logger.info(f"βœ… Text-to-3D generation completed: {job_id}")
300
+
301
+ except Exception as e:
302
+ logger.error(f"❌ Error in text-to-3D processing: {str(e)}")
303
+ job_manager.fail_job(job_id, str(e))
304
+
305
+ async def process_upload_to_3d(job_id: str, file_content: bytes, filename: str, user_id: Optional[str]):
306
+ """Background task to process uploaded image to 3D"""
307
+ try:
308
+ job_manager.update_job_progress(job_id, "uploading", 20, "Uploading image to cloud...")
309
+
310
+ # Upload original image
311
+ image_url = await asyncio.to_thread(
312
+ cloudinary_client.upload_image_from_bytes,
313
+ file_content,
314
+ f"upload_{job_id}_{filename}"
315
+ )
316
+
317
+ if job_manager.is_job_cancelled(job_id):
318
+ return
319
+
320
+ job_manager.update_job_progress(job_id, "processing", 50, "Processing image to 3D...")
321
+
322
+ # Convert to PIL Image
323
+ from PIL import Image
324
+ image_pil = Image.open(io.BytesIO(file_content))
325
+
326
+ # Generate depth map and 3D model
327
+ depth_result = await asyncio.to_thread(
328
+ depth_processor.process_image_to_3d,
329
+ image_pil,
330
+ job_id
331
+ )
332
+
333
+ if job_manager.is_job_cancelled(job_id):
334
+ return
335
+
336
+ job_manager.update_job_progress(job_id, "uploading_results", 90, "Uploading 3D model...")
337
+
338
+ # Upload results
339
+ model_url = await asyncio.to_thread(
340
+ cloudinary_client.upload_file,
341
+ depth_result['obj_path'],
342
+ f"model_{job_id}.obj"
343
+ )
344
+
345
+ depth_map_url = await asyncio.to_thread(
346
+ cloudinary_client.upload_image_from_path,
347
+ depth_result['depth_map_path'],
348
+ f"depth_{job_id}"
349
+ )
350
+
351
+ # Complete job
352
+ job_manager.complete_job(job_id, {
353
+ "image_url": image_url,
354
+ "model_url": model_url,
355
+ "depth_map_url": depth_map_url
356
+ })
357
+
358
+ logger.info(f"βœ… Upload-to-3D processing completed: {job_id}")
359
+
360
+ except Exception as e:
361
+ logger.error(f"❌ Error in upload-to-3D processing: {str(e)}")
362
+ job_manager.fail_job(job_id, str(e))
363
+
364
+ if __name__ == "__main__":
365
+ uvicorn.run(app, host="0.0.0.0", port=7860)
models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Models package
models/depth_processor.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Depth processing module for converting 2D images to depth maps and 3D models
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import tempfile
8
+ import numpy as np
9
+ import torch
10
+ from PIL import Image
11
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
12
+ import open3d as o3d
13
+ import matplotlib.pyplot as plt
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class DepthProcessor:
18
+ """Handles depth estimation and 3D model generation"""
19
+
20
+ def __init__(self):
21
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ self.processor = None
23
+ self.model = None
24
+ self.temp_dir = tempfile.mkdtemp()
25
+
26
+ def load_model(self):
27
+ """Load the DPT depth estimation model"""
28
+ try:
29
+ logger.info(f"πŸ”„ Loading DPT model on {self.device}...")
30
+
31
+ # Load processor and model
32
+ self.processor = DPTImageProcessor.from_pretrained("Intel/dpt-beit-large-512")
33
+ self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-beit-large-512")
34
+ self.model.to(self.device)
35
+ self.model.eval()
36
+
37
+ if self.device.type == "cuda":
38
+ logger.info(f"βœ… DPT model loaded on GPU: {torch.cuda.get_device_name(0)}")
39
+ else:
40
+ logger.info("βœ… DPT model loaded on CPU")
41
+
42
+ except Exception as e:
43
+ logger.error(f"❌ Failed to load DPT model: {str(e)}")
44
+ raise e
45
+
46
+ def generate_depth_map(self, image: Image.Image) -> np.ndarray:
47
+ """Generate depth map from PIL Image"""
48
+ try:
49
+ # Prepare image for model
50
+ inputs = self.processor(images=image, return_tensors="pt")
51
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
52
+
53
+ # Generate depth map
54
+ with torch.no_grad():
55
+ outputs = self.model(**inputs)
56
+ predicted_depth = outputs.predicted_depth
57
+
58
+ # Convert to numpy and normalize
59
+ depth = predicted_depth.squeeze().cpu().numpy()
60
+ depth_normalized = (depth - depth.min()) / (depth.max() - depth.min())
61
+
62
+ return depth_normalized
63
+
64
+ except Exception as e:
65
+ logger.error(f"❌ Error generating depth map: {str(e)}")
66
+ raise e
67
+
68
+ def save_depth_map_image(self, depth_map: np.ndarray, job_id: str) -> str:
69
+ """Save depth map as image file"""
70
+ try:
71
+ # Create colorized depth map
72
+ plt.figure(figsize=(10, 10))
73
+ plt.imshow(depth_map, cmap='plasma')
74
+ plt.axis('off')
75
+ plt.tight_layout()
76
+
77
+ # Save image
78
+ depth_path = os.path.join(self.temp_dir, f"depth_{job_id}.png")
79
+ plt.savefig(depth_path, bbox_inches='tight', pad_inches=0, dpi=150)
80
+ plt.close()
81
+
82
+ return depth_path
83
+
84
+ except Exception as e:
85
+ logger.error(f"❌ Error saving depth map image: {str(e)}")
86
+ raise e
87
+
88
+ def create_3d_model(self, image: Image.Image, depth_map: np.ndarray, job_id: str) -> str:
89
+ """Create 3D OBJ model from image and depth map"""
90
+ try:
91
+ # Convert image to numpy array
92
+ img_array = np.array(image)
93
+ h, w = depth_map.shape
94
+
95
+ # Create point cloud
96
+ points = []
97
+ colors = []
98
+
99
+ # Sample points (reduce resolution for performance)
100
+ step = max(1, min(h, w) // 200) # Target ~200x200 points max
101
+
102
+ for y in range(0, h, step):
103
+ for x in range(0, w, step):
104
+ # Get depth value (invert for proper 3D orientation)
105
+ z = (1.0 - depth_map[y, x]) * 50.0 # Scale depth
106
+
107
+ # Skip points that are too far
108
+ if z > 45.0:
109
+ continue
110
+
111
+ # Add point
112
+ points.append([x / w - 0.5, (h - y) / h - 0.5, z])
113
+
114
+ # Add color
115
+ if len(img_array.shape) == 3:
116
+ colors.append(img_array[y, x] / 255.0)
117
+ else:
118
+ colors.append([0.7, 0.7, 0.7]) # Gray for grayscale
119
+
120
+ if not points:
121
+ raise ValueError("No valid points generated for 3D model")
122
+
123
+ # Create Open3D point cloud
124
+ pcd = o3d.geometry.PointCloud()
125
+ pcd.points = o3d.utility.Vector3dVector(np.array(points))
126
+ pcd.colors = o3d.utility.Vector3dVector(np.array(colors))
127
+
128
+ # Estimate normals
129
+ pcd.estimate_normals()
130
+
131
+ # Create mesh using Poisson reconstruction
132
+ mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
133
+ pcd, depth=8, width=0, scale=1.1, linear_fit=False
134
+ )
135
+
136
+ # Remove degenerate triangles and unreferenced vertices
137
+ mesh.remove_degenerate_triangles()
138
+ mesh.remove_duplicated_triangles()
139
+ mesh.remove_duplicated_vertices()
140
+ mesh.remove_non_manifold_edges()
141
+
142
+ # Smooth the mesh
143
+ mesh = mesh.filter_smooth_simple(number_of_iterations=2)
144
+
145
+ # Save as OBJ file
146
+ obj_path = os.path.join(self.temp_dir, f"model_{job_id}.obj")
147
+ o3d.io.write_triangle_mesh(obj_path, mesh)
148
+
149
+ logger.info(f"βœ… 3D model created: {len(mesh.vertices)} vertices, {len(mesh.triangles)} triangles")
150
+
151
+ return obj_path
152
+
153
+ except Exception as e:
154
+ logger.error(f"❌ Error creating 3D model: {str(e)}")
155
+ raise e
156
+
157
+ def process_image_to_3d(self, image: Image.Image, job_id: str) -> dict:
158
+ """Complete pipeline: image -> depth map -> 3D model"""
159
+ try:
160
+ logger.info(f"πŸ”„ Processing image to 3D model (Job: {job_id})")
161
+
162
+ # Resize image if too large (for performance)
163
+ max_size = 512
164
+ if max(image.size) > max_size:
165
+ ratio = max_size / max(image.size)
166
+ new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
167
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
168
+ logger.info(f"πŸ“ Resized image to {new_size}")
169
+
170
+ # Convert to RGB if needed
171
+ if image.mode != 'RGB':
172
+ image = image.convert('RGB')
173
+
174
+ # Generate depth map
175
+ depth_map = self.generate_depth_map(image)
176
+
177
+ # Save depth map as image
178
+ depth_map_path = self.save_depth_map_image(depth_map, job_id)
179
+
180
+ # Create 3D model
181
+ obj_path = self.create_3d_model(image, depth_map, job_id)
182
+
183
+ return {
184
+ 'depth_map': depth_map,
185
+ 'depth_map_path': depth_map_path,
186
+ 'obj_path': obj_path,
187
+ 'success': True
188
+ }
189
+
190
+ except Exception as e:
191
+ logger.error(f"❌ Error in image-to-3D pipeline: {str(e)}")
192
+ return {
193
+ 'success': False,
194
+ 'error': str(e)
195
+ }
models/image_generator.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Image generation module using Stable Diffusion
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import tempfile
8
+ import io
9
+ from PIL import Image
10
+ import torch
11
+ from diffusers import StableDiffusionPipeline
12
+ import gc
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class ImageGenerator:
17
+ """Handles text-to-image generation using Stable Diffusion"""
18
+
19
+ def __init__(self):
20
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+ self.pipeline = None
22
+ self.temp_dir = tempfile.mkdtemp()
23
+
24
+ def load_model(self):
25
+ """Load the Stable Diffusion model"""
26
+ try:
27
+ logger.info(f"πŸ”„ Loading Stable Diffusion model on {self.device}...")
28
+
29
+ # Use a smaller, faster model for better performance on free tier
30
+ model_id = "runwayml/stable-diffusion-v1-5"
31
+
32
+ # Load pipeline
33
+ self.pipeline = StableDiffusionPipeline.from_pretrained(
34
+ model_id,
35
+ torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
36
+ safety_checker=None, # Disable safety checker for faster inference
37
+ requires_safety_checker=False
38
+ )
39
+
40
+ self.pipeline.to(self.device)
41
+
42
+ # Enable memory efficient attention if available
43
+ if hasattr(self.pipeline, "enable_attention_slicing"):
44
+ self.pipeline.enable_attention_slicing()
45
+
46
+ # Enable model offloading for CPU
47
+ if self.device.type == "cpu":
48
+ self.pipeline.enable_sequential_cpu_offload()
49
+
50
+ if self.device.type == "cuda":
51
+ logger.info(f"βœ… Stable Diffusion loaded on GPU: {torch.cuda.get_device_name(0)}")
52
+ else:
53
+ logger.info("βœ… Stable Diffusion loaded on CPU")
54
+
55
+ except Exception as e:
56
+ logger.error(f"❌ Failed to load Stable Diffusion model: {str(e)}")
57
+ raise e
58
+
59
+ def generate_image(self, prompt: str, negative_prompt: str = None) -> dict:
60
+ """Generate image from text prompt"""
61
+ try:
62
+ logger.info(f"🎨 Generating image for prompt: '{prompt}'")
63
+
64
+ # Default negative prompt for better quality
65
+ if negative_prompt is None:
66
+ negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy"
67
+
68
+ # Enhanced prompt for 3D-suitable images
69
+ enhanced_prompt = f"{prompt}, high quality, detailed, clear lighting, suitable for 3D modeling"
70
+
71
+ # Generation parameters
72
+ generator = torch.Generator(device=self.device).manual_seed(42) # Fixed seed for consistency
73
+
74
+ # Generate image
75
+ with torch.no_grad():
76
+ result = self.pipeline(
77
+ prompt=enhanced_prompt,
78
+ negative_prompt=negative_prompt,
79
+ num_inference_steps=20, # Reduced for faster inference
80
+ guidance_scale=7.5,
81
+ width=512,
82
+ height=512,
83
+ generator=generator
84
+ )
85
+
86
+ image = result.images[0]
87
+
88
+ # Convert to bytes for storage
89
+ img_bytes = io.BytesIO()
90
+ image.save(img_bytes, format='PNG', quality=95)
91
+ img_bytes.seek(0)
92
+
93
+ # Clean up GPU memory
94
+ if self.device.type == "cuda":
95
+ torch.cuda.empty_cache()
96
+ gc.collect()
97
+
98
+ logger.info("βœ… Image generated successfully")
99
+
100
+ return {
101
+ 'image_pil': image,
102
+ 'image_bytes': img_bytes.getvalue(),
103
+ 'success': True
104
+ }
105
+
106
+ except Exception as e:
107
+ logger.error(f"❌ Error generating image: {str(e)}")
108
+
109
+ # Clean up memory on error
110
+ if self.device.type == "cuda":
111
+ torch.cuda.empty_cache()
112
+ gc.collect()
113
+
114
+ return {
115
+ 'success': False,
116
+ 'error': str(e)
117
+ }
118
+
119
+ def enhance_prompt_for_3d(self, prompt: str) -> str:
120
+ """Enhance prompt to be more suitable for 3D modeling"""
121
+ # Add keywords that typically produce good depth information
122
+ enhancement_keywords = [
123
+ "3D rendering",
124
+ "detailed texture",
125
+ "clear lighting",
126
+ "high contrast",
127
+ "depth",
128
+ "dimensional"
129
+ ]
130
+
131
+ # Check if prompt already contains 3D-related terms
132
+ has_3d_terms = any(term in prompt.lower() for term in ["3d", "render", "model", "dimensional"])
133
+
134
+ if not has_3d_terms:
135
+ # Add one enhancement keyword
136
+ enhanced = f"{prompt}, 3D rendering style"
137
+ else:
138
+ enhanced = prompt
139
+
140
+ return enhanced
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ python-multipart==0.0.6
4
+ pydantic==2.5.0
5
+ torch==2.1.1
6
+ torchvision==0.16.1
7
+ torchaudio==2.1.1
8
+ transformers==4.39.3
9
+ diffusers==0.27.0
10
+ accelerate==0.27.0
11
+ Pillow==10.3.0
12
+ numpy==1.24.3
13
+ open3d==0.18.0
14
+ matplotlib==3.7.2
15
+ cloudinary==1.37.0
16
+ python-dotenv==1.0.0
17
+ safetensors==0.4.2
18
+ huggingface_hub==0.20.2
19
+ requests==2.31.0
utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Utils package
utils/cloudinary_client.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cloudinary client for file uploads
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import cloudinary
8
+ import cloudinary.uploader
9
+ from typing import Union
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class CloudinaryClient:
14
+ """Handles file uploads to Cloudinary"""
15
+
16
+ def __init__(self):
17
+ # Configure Cloudinary
18
+ cloudinary.config(
19
+ cloud_name=os.environ.get("CLOUDINARY_CLOUD_NAME"),
20
+ api_key=os.environ.get("CLOUDINARY_API_KEY"),
21
+ api_secret=os.environ.get("CLOUDINARY_API_SECRET")
22
+ )
23
+
24
+ # Verify configuration
25
+ if not all([
26
+ os.environ.get("CLOUDINARY_CLOUD_NAME"),
27
+ os.environ.get("CLOUDINARY_API_KEY"),
28
+ os.environ.get("CLOUDINARY_API_SECRET")
29
+ ]):
30
+ logger.warning("⚠️ Cloudinary credentials not fully configured")
31
+ else:
32
+ logger.info("βœ… Cloudinary client initialized")
33
+
34
+ def upload_image_from_bytes(self, image_bytes: bytes, public_id: str) -> str:
35
+ """Upload image from bytes to Cloudinary"""
36
+ try:
37
+ logger.info(f"☁️ Uploading image to Cloudinary: {public_id}")
38
+
39
+ result = cloudinary.uploader.upload(
40
+ image_bytes,
41
+ public_id=f"text-to-3d/{public_id}",
42
+ resource_type="image",
43
+ unique_filename=True,
44
+ overwrite=True,
45
+ quality="auto"
46
+ )
47
+
48
+ url = result["secure_url"]
49
+ logger.info(f"βœ… Image uploaded: {url}")
50
+ return url
51
+
52
+ except Exception as e:
53
+ logger.error(f"❌ Error uploading image to Cloudinary: {str(e)}")
54
+ raise e
55
+
56
+ def upload_image_from_path(self, file_path: str, public_id: str) -> str:
57
+ """Upload image from file path to Cloudinary"""
58
+ try:
59
+ logger.info(f"☁️ Uploading image file to Cloudinary: {public_id}")
60
+
61
+ result = cloudinary.uploader.upload(
62
+ file_path,
63
+ public_id=f"text-to-3d/{public_id}",
64
+ resource_type="image",
65
+ unique_filename=True,
66
+ overwrite=True,
67
+ quality="auto"
68
+ )
69
+
70
+ url = result["secure_url"]
71
+ logger.info(f"βœ… Image file uploaded: {url}")
72
+ return url
73
+
74
+ except Exception as e:
75
+ logger.error(f"❌ Error uploading image file to Cloudinary: {str(e)}")
76
+ raise e
77
+
78
+ def upload_file(self, file_path: str, public_id: str) -> str:
79
+ """Upload any file to Cloudinary"""
80
+ try:
81
+ logger.info(f"☁️ Uploading file to Cloudinary: {public_id}")
82
+
83
+ result = cloudinary.uploader.upload(
84
+ file_path,
85
+ public_id=f"text-to-3d/{public_id}",
86
+ resource_type="raw", # For non-image files
87
+ unique_filename=True,
88
+ overwrite=True
89
+ )
90
+
91
+ url = result["secure_url"]
92
+ logger.info(f"βœ… File uploaded: {url}")
93
+ return url
94
+
95
+ except Exception as e:
96
+ logger.error(f"❌ Error uploading file to Cloudinary: {str(e)}")
97
+ raise e
98
+
99
+ def delete_file(self, public_id: str, resource_type: str = "image") -> bool:
100
+ """Delete file from Cloudinary"""
101
+ try:
102
+ logger.info(f"πŸ—‘οΈ Deleting file from Cloudinary: {public_id}")
103
+
104
+ result = cloudinary.uploader.destroy(
105
+ f"text-to-3d/{public_id}",
106
+ resource_type=resource_type
107
+ )
108
+
109
+ success = result.get("result") == "ok"
110
+ if success:
111
+ logger.info(f"βœ… File deleted: {public_id}")
112
+ else:
113
+ logger.warning(f"⚠️ File deletion may have failed: {public_id}")
114
+
115
+ return success
116
+
117
+ except Exception as e:
118
+ logger.error(f"❌ Error deleting file from Cloudinary: {str(e)}")
119
+ return False
utils/job_manager.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Job management for tracking async tasks
3
+ """
4
+
5
+ import time
6
+ import threading
7
+ import logging
8
+ from typing import Dict, Optional, Any
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class JobManager:
13
+ """Manages background job tracking and progress"""
14
+
15
+ def __init__(self):
16
+ self.active_jobs: Dict[str, Dict] = {}
17
+ self.job_progress: Dict[str, Dict] = {}
18
+ self.job_results: Dict[str, Dict] = {}
19
+ self.jobs_lock = threading.Lock()
20
+ self.progress_lock = threading.Lock()
21
+ self.results_lock = threading.Lock()
22
+
23
+ # Start cleanup task
24
+ self._start_cleanup_task()
25
+
26
+ def register_job(self, job_id: str):
27
+ """Register a new job"""
28
+ with self.jobs_lock:
29
+ self.active_jobs[job_id] = {
30
+ 'cancelled': False,
31
+ 'created_at': time.time(),
32
+ 'status': 'active'
33
+ }
34
+
35
+ with self.progress_lock:
36
+ self.job_progress[job_id] = {
37
+ 'stage': 'starting',
38
+ 'progress': 0,
39
+ 'message': 'Job started...',
40
+ 'timestamp': time.time()
41
+ }
42
+
43
+ logger.info(f"πŸ“ Job registered: {job_id}")
44
+
45
+ def is_job_cancelled(self, job_id: str) -> bool:
46
+ """Check if a job has been cancelled"""
47
+ with self.jobs_lock:
48
+ return self.active_jobs.get(job_id, {}).get('cancelled', False)
49
+
50
+ def cancel_job(self, job_id: str) -> bool:
51
+ """Cancel a job"""
52
+ with self.jobs_lock:
53
+ if job_id in self.active_jobs:
54
+ self.active_jobs[job_id]['cancelled'] = True
55
+ self.active_jobs[job_id]['status'] = 'cancelled'
56
+ logger.info(f"❌ Job cancelled: {job_id}")
57
+ return True
58
+ return False
59
+
60
+ def update_job_progress(self, job_id: str, stage: str, progress: int, message: str):
61
+ """Update job progress"""
62
+ with self.progress_lock:
63
+ if job_id in self.job_progress:
64
+ self.job_progress[job_id] = {
65
+ 'stage': stage,
66
+ 'progress': progress,
67
+ 'message': message,
68
+ 'timestamp': time.time()
69
+ }
70
+ logger.info(f"πŸ“Š Job {job_id}: {stage} - {progress}% - {message}")
71
+
72
+ def get_job_progress(self, job_id: str) -> Optional[Dict]:
73
+ """Get current job progress"""
74
+ with self.progress_lock:
75
+ return self.job_progress.get(job_id)
76
+
77
+ def complete_job(self, job_id: str, results: Dict[str, Any]):
78
+ """Mark job as completed with results"""
79
+ with self.jobs_lock:
80
+ if job_id in self.active_jobs:
81
+ self.active_jobs[job_id]['status'] = 'completed'
82
+
83
+ with self.progress_lock:
84
+ self.job_progress[job_id] = {
85
+ 'stage': 'completed',
86
+ 'progress': 100,
87
+ 'message': 'Job completed successfully!',
88
+ 'timestamp': time.time()
89
+ }
90
+
91
+ with self.results_lock:
92
+ self.job_results[job_id] = {
93
+ **results,
94
+ 'completed_at': time.time()
95
+ }
96
+
97
+ logger.info(f"βœ… Job completed: {job_id}")
98
+
99
+ def fail_job(self, job_id: str, error_message: str):
100
+ """Mark job as failed"""
101
+ with self.jobs_lock:
102
+ if job_id in self.active_jobs:
103
+ self.active_jobs[job_id]['status'] = 'failed'
104
+
105
+ with self.progress_lock:
106
+ self.job_progress[job_id] = {
107
+ 'stage': 'error',
108
+ 'progress': 0,
109
+ 'message': f'Error: {error_message}',
110
+ 'timestamp': time.time()
111
+ }
112
+
113
+ logger.error(f"❌ Job failed: {job_id} - {error_message}")
114
+
115
+ def get_job_results(self, job_id: str) -> Optional[Dict]:
116
+ """Get job results if completed"""
117
+ with self.results_lock:
118
+ return self.job_results.get(job_id)
119
+
120
+ def get_active_job_count(self) -> int:
121
+ """Get number of active jobs"""
122
+ with self.jobs_lock:
123
+ return len([j for j in self.active_jobs.values() if j['status'] == 'active'])
124
+
125
+ def cleanup_old_jobs(self):
126
+ """Clean up jobs older than 30 minutes"""
127
+ current_time = time.time()
128
+ cleanup_age = 1800 # 30 minutes
129
+
130
+ jobs_to_remove = []
131
+
132
+ with self.jobs_lock:
133
+ for job_id, job_data in self.active_jobs.items():
134
+ if current_time - job_data['created_at'] > cleanup_age:
135
+ jobs_to_remove.append(job_id)
136
+
137
+ for job_id in jobs_to_remove:
138
+ self._remove_job(job_id)
139
+ logger.info(f"🧹 Cleaned up old job: {job_id}")
140
+
141
+ def _remove_job(self, job_id: str):
142
+ """Remove job from all tracking dictionaries"""
143
+ with self.jobs_lock:
144
+ self.active_jobs.pop(job_id, None)
145
+
146
+ with self.progress_lock:
147
+ self.job_progress.pop(job_id, None)
148
+
149
+ with self.results_lock:
150
+ self.job_results.pop(job_id, None)
151
+
152
+ def _start_cleanup_task(self):
153
+ """Start background cleanup task"""
154
+ def cleanup_worker():
155
+ while True:
156
+ time.sleep(300) # Run every 5 minutes
157
+ try:
158
+ self.cleanup_old_jobs()
159
+ except Exception as e:
160
+ logger.error(f"❌ Error in cleanup task: {str(e)}")
161
+
162
+ cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
163
+ cleanup_thread.start()
164
+ logger.info("🧹 Cleanup task started")