Spaces:

aniket47
/

text-to-3d-backend

Sleeping

App Files Files Community

aniket47 commited on Oct 6

Commit

6e0e6cc

1 Parent(s): fa13587

Speed optimizations: pre-download models, optimized caching, model CPU offloading

Browse files

Files changed (3) hide show

Dockerfile +18 -0
models/image_generator.py +35 -10
preload_models.py +55 -0

Dockerfile CHANGED Viewed

@@ -2,11 +2,29 @@ FROM python:3.9-slim
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 7860
 CMD ["python", "app.py"]

 WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
 COPY . .
+# Set environment variables for HuggingFace caching
+ENV HF_HOME=/app/model_cache
+ENV TRANSFORMERS_CACHE=/app/model_cache
+ENV HF_DATASETS_CACHE=/app/model_cache
+# Create cache directory with proper permissions
+RUN mkdir -p /app/model_cache && chmod 755 /app/model_cache
+# Pre-download models during build time for faster startup
+RUN python preload_models.py
 EXPOSE 7860
 CMD ["python", "app.py"]

models/image_generator.py CHANGED Viewed

@@ -22,20 +22,27 @@ class ImageGenerator:
         self.temp_dir = tempfile.mkdtemp()
     def load_model(self):
-        """Load the Stable Diffusion model"""
         try:
             logger.info(f"🔄 Loading Stability AI model on {self.device}...")
             # Use Stability AI's SDXL model for highest quality
             model_id = "stabilityai/stable-diffusion-xl-base-1.0"
-            # Load pipeline
             self.pipeline = StableDiffusionPipeline.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                 safety_checker=None,  # Disable safety checker for faster inference
                 requires_safety_checker=False,
-                use_safetensors=True
             )
             self.pipeline.to(self.device)
@@ -52,14 +59,21 @@ class ImageGenerator:
                 except:
                     logger.info("ℹ️ XFormers not available, using default attention")
-            # Only enable CPU offloading if CUDA is available but we want to save memory
-            # For pure CPU mode, keep everything on CPU
-            if self.device.type == "cuda":
-                # Enable model offloading to save GPU memory
                 self.pipeline.enable_sequential_cpu_offload()
                 logger.info(f"✅ Stability AI SDXL loaded on GPU: {torch.cuda.get_device_name(0)}")
             else:
-                # For CPU-only mode, don't use offloading
                 logger.info("✅ Stability AI SDXL loaded on CPU")
         except Exception as e:
@@ -73,7 +87,9 @@ class ImageGenerator:
                     model_id,
                     torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                     safety_checker=None,
-                    requires_safety_checker=False
                 )
                 self.pipeline.to(self.device)
@@ -81,8 +97,17 @@ class ImageGenerator:
                 if hasattr(self.pipeline, "enable_attention_slicing"):
                     self.pipeline.enable_attention_slicing()
                 if self.device.type == "cuda":
-                    self.pipeline.enable_sequential_cpu_offload()
                     logger.info(f"✅ Fallback SD v1.5 loaded on GPU: {torch.cuda.get_device_name(0)}")
                 else:
                     logger.info("✅ Fallback SD v1.5 loaded on CPU")

         self.temp_dir = tempfile.mkdtemp()
     def load_model(self):
+        """Load the Stable Diffusion model with optimized caching"""
         try:
             logger.info(f"🔄 Loading Stability AI model on {self.device}...")
             # Use Stability AI's SDXL model for highest quality
             model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+            # Optimize caching for faster subsequent loads
+            cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface_cache")
+            # Load pipeline with optimized settings
             self.pipeline = StableDiffusionPipeline.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                 safety_checker=None,  # Disable safety checker for faster inference
                 requires_safety_checker=False,
+                use_safetensors=True,
+                cache_dir=cache_dir,
+                resume_download=True,  # Resume interrupted downloads
+                local_files_only=False,  # Allow downloads but prefer cache
+                variant="fp16" if self.device.type == "cuda" else None  # Use fp16 variant for GPU
             )
             self.pipeline.to(self.device)
                 except:
                     logger.info("ℹ️ XFormers not available, using default attention")
+            # Enable model CPU offloading for memory optimization
+            if hasattr(self.pipeline, "enable_model_cpu_offload"):
+                try:
+                    self.pipeline.enable_model_cpu_offload()
+                    logger.info("✅ Model CPU offloading enabled for memory optimization")
+                except:
+                    logger.info("ℹ️ CPU offloading not available")
+            # Only enable sequential CPU offloading if model CPU offload fails
+            if self.device.type == "cuda" and not hasattr(self.pipeline, "enable_model_cpu_offload"):
                 self.pipeline.enable_sequential_cpu_offload()
+            if self.device.type == "cuda":
                 logger.info(f"✅ Stability AI SDXL loaded on GPU: {torch.cuda.get_device_name(0)}")
             else:
                 logger.info("✅ Stability AI SDXL loaded on CPU")
         except Exception as e:
                     model_id,
                     torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                     safety_checker=None,
+                    requires_safety_checker=False,
+                    cache_dir=cache_dir,
+                    resume_download=True
                 )
                 self.pipeline.to(self.device)
                 if hasattr(self.pipeline, "enable_attention_slicing"):
                     self.pipeline.enable_attention_slicing()
+                if hasattr(self.pipeline, "enable_xformers_memory_efficient_attention"):
+                    try:
+                        self.pipeline.enable_xformers_memory_efficient_attention()
+                    except:
+                        pass
                 if self.device.type == "cuda":
+                    if hasattr(self.pipeline, "enable_model_cpu_offload"):
+                        self.pipeline.enable_model_cpu_offload()
+                    else:
+                        self.pipeline.enable_sequential_cpu_offload()
                     logger.info(f"✅ Fallback SD v1.5 loaded on GPU: {torch.cuda.get_device_name(0)}")
                 else:
                     logger.info("✅ Fallback SD v1.5 loaded on CPU")

preload_models.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+Pre-download models for faster startup
+"""
+import os
+import sys
+def preload_models():
+    """Pre-download all required models"""
+    try:
+        print("🔄 Pre-downloading Stability AI SDXL model...")
+        from diffusers import StableDiffusionPipeline
+        import torch
+        # Set cache directory
+        cache_dir = os.environ.get('HF_HOME', '/app/model_cache')
+        # Download SDXL model
+        try:
+            pipeline = StableDiffusionPipeline.from_pretrained(
+                'stabilityai/stable-diffusion-xl-base-1.0',
+                torch_dtype=torch.float32,
+                safety_checker=None,
+                requires_safety_checker=False,
+                cache_dir=cache_dir
+            )
+            print("✅ SDXL model downloaded successfully")
+        except Exception as e:
+            print(f"⚠️ SDXL download failed, downloading fallback: {e}")
+            # Download fallback model
+            pipeline = StableDiffusionPipeline.from_pretrained(
+                'runwayml/stable-diffusion-v1-5',
+                torch_dtype=torch.float32,
+                safety_checker=None,
+                requires_safety_checker=False,
+                cache_dir=cache_dir
+            )
+            print("✅ SD v1.5 fallback model downloaded successfully")
+        # Also pre-download depth estimation model
+        print("🔄 Pre-downloading depth estimation model...")
+        from transformers import DPTImageProcessor, DPTForDepthEstimation
+        DPTImageProcessor.from_pretrained('Intel/dpt-beit-large-512', cache_dir=cache_dir)
+        DPTForDepthEstimation.from_pretrained('Intel/dpt-beit-large-512', cache_dir=cache_dir)
+        print("✅ Depth estimation model downloaded successfully")
+        print("🎉 All models pre-loaded successfully!")
+    except Exception as e:
+        print(f"❌ Error pre-loading models: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    preload_models()