Spaces:

aniket47
/

text-to-3d-backend

Sleeping

App Files Files Community

aniket47 commited on Oct 6

Commit

fa13587

1 Parent(s): a09688b

Upgrade to Stability AI SDXL model for superior image quality

Browse files

Files changed (2) hide show

models/image_generator.py +64 -19
requirements.txt +2 -1

models/image_generator.py CHANGED Viewed

@@ -24,17 +24,18 @@ class ImageGenerator:
     def load_model(self):
         """Load the Stable Diffusion model"""
         try:
-            logger.info(f"🔄 Loading Stable Diffusion model on {self.device}...")
-            # Use a smaller, faster model for better performance on free tier
-            model_id = "runwayml/stable-diffusion-v1-5"
             # Load pipeline
             self.pipeline = StableDiffusionPipeline.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                 safety_checker=None,  # Disable safety checker for faster inference
-                requires_safety_checker=False
             )
             self.pipeline.to(self.device)
@@ -43,39 +44,78 @@ class ImageGenerator:
             if hasattr(self.pipeline, "enable_attention_slicing"):
                 self.pipeline.enable_attention_slicing()
             # Only enable CPU offloading if CUDA is available but we want to save memory
             # For pure CPU mode, keep everything on CPU
             if self.device.type == "cuda":
                 # Enable model offloading to save GPU memory
                 self.pipeline.enable_sequential_cpu_offload()
-                logger.info(f"✅ Stable Diffusion loaded on GPU: {torch.cuda.get_device_name(0)}")
             else:
                 # For CPU-only mode, don't use offloading
-                logger.info("✅ Stable Diffusion loaded on CPU")
         except Exception as e:
-            logger.error(f"❌ Failed to load Stable Diffusion model: {str(e)}")
-            raise e
     def generate_image(self, prompt: str, negative_prompt: str = None) -> dict:
         """Generate image from text prompt"""
         try:
             logger.info(f"🎨 Generating image for prompt: '{prompt}'")
-            # Default negative prompt for better quality
             if negative_prompt is None:
-                negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, worst quality, low res"
-            # Enhanced prompt for 3D-suitable images
-            enhanced_prompt = f"{prompt}, high quality, detailed, clear lighting, suitable for 3D modeling, photorealistic"
-            # Generation parameters - optimized for quality
             generator = torch.Generator(device=self.device).manual_seed(42)  # Fixed seed for consistency
-            # Higher quality parameters - even for CPU
-            num_steps = 25 if self.device.type == "cpu" else 50
-            width = 512  # Full resolution for better quality
-            height = 512
             logger.info(f"🖼️ Generating {width}x{height} image with {num_steps} steps on {self.device}")
@@ -85,7 +125,7 @@ class ImageGenerator:
                     prompt=enhanced_prompt,
                     negative_prompt=negative_prompt,
                     num_inference_steps=num_steps,
-                    guidance_scale=8.5,  # Higher guidance for better quality
                     width=width,
                     height=height,
                     generator=generator
@@ -93,6 +133,11 @@ class ImageGenerator:
             image = result.images[0]
             # Convert to bytes for storage
             img_bytes = io.BytesIO()
             image.save(img_bytes, format='PNG', quality=95)
@@ -103,7 +148,7 @@ class ImageGenerator:
                 torch.cuda.empty_cache()
                 gc.collect()
-            logger.info("✅ Image generated successfully")
             return {
                 'image_pil': image,

     def load_model(self):
         """Load the Stable Diffusion model"""
         try:
+            logger.info(f"🔄 Loading Stability AI model on {self.device}...")
+            # Use Stability AI's SDXL model for highest quality
+            model_id = "stabilityai/stable-diffusion-xl-base-1.0"
             # Load pipeline
             self.pipeline = StableDiffusionPipeline.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
                 safety_checker=None,  # Disable safety checker for faster inference
+                requires_safety_checker=False,
+                use_safetensors=True
             )
             self.pipeline.to(self.device)
             if hasattr(self.pipeline, "enable_attention_slicing"):
                 self.pipeline.enable_attention_slicing()
+            # Enable xformers for better performance if available
+            if hasattr(self.pipeline, "enable_xformers_memory_efficient_attention"):
+                try:
+                    self.pipeline.enable_xformers_memory_efficient_attention()
+                    logger.info("✅ XFormers memory efficient attention enabled")
+                except:
+                    logger.info("ℹ️ XFormers not available, using default attention")
             # Only enable CPU offloading if CUDA is available but we want to save memory
             # For pure CPU mode, keep everything on CPU
             if self.device.type == "cuda":
                 # Enable model offloading to save GPU memory
                 self.pipeline.enable_sequential_cpu_offload()
+                logger.info(f"✅ Stability AI SDXL loaded on GPU: {torch.cuda.get_device_name(0)}")
             else:
                 # For CPU-only mode, don't use offloading
+                logger.info("✅ Stability AI SDXL loaded on CPU")
         except Exception as e:
+            logger.error(f"❌ Failed to load Stability AI model: {str(e)}")
+            # Fallback to standard SD 1.5 if SDXL fails
+            logger.info("🔄 Falling back to Stable Diffusion v1.5...")
+            try:
+                model_id = "runwayml/stable-diffusion-v1-5"
+                self.pipeline = StableDiffusionPipeline.from_pretrained(
+                    model_id,
+                    torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
+                    safety_checker=None,
+                    requires_safety_checker=False
+                )
+                self.pipeline.to(self.device)
+                if hasattr(self.pipeline, "enable_attention_slicing"):
+                    self.pipeline.enable_attention_slicing()
+                if self.device.type == "cuda":
+                    self.pipeline.enable_sequential_cpu_offload()
+                    logger.info(f"✅ Fallback SD v1.5 loaded on GPU: {torch.cuda.get_device_name(0)}")
+                else:
+                    logger.info("✅ Fallback SD v1.5 loaded on CPU")
+            except Exception as fallback_error:
+                logger.error(f"❌ Fallback model also failed: {str(fallback_error)}")
+                raise fallback_error
     def generate_image(self, prompt: str, negative_prompt: str = None) -> dict:
         """Generate image from text prompt"""
         try:
             logger.info(f"🎨 Generating image for prompt: '{prompt}'")
+            # Enhanced negative prompt for Stability AI models
             if negative_prompt is None:
+                negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, worst quality, low res, jpeg artifacts, watermark, signature"
+            # Enhanced prompt for 3D-suitable images with Stability AI style
+            enhanced_prompt = f"{prompt}, masterpiece, best quality, highly detailed, sharp focus, professional photography, suitable for 3D modeling, photorealistic, 8k uhd"
+            # Generation parameters - optimized for Stability AI models
             generator = torch.Generator(device=self.device).manual_seed(42)  # Fixed seed for consistency
+            # SDXL optimized parameters
+            num_steps = 30 if self.device.type == "cpu" else 50  # SDXL works best with more steps
+            width = 1024 if self.device.type == "cuda" else 512   # SDXL native resolution is 1024x1024
+            height = 1024 if self.device.type == "cuda" else 512
+            guidance_scale = 7.0  # SDXL works best with lower guidance scale
+            # For CPU, use smaller resolution to manage memory
+            if self.device.type == "cpu":
+                width, height = 512, 512
+                num_steps = 25  # Fewer steps for CPU but still good quality
             logger.info(f"🖼️ Generating {width}x{height} image with {num_steps} steps on {self.device}")
                     prompt=enhanced_prompt,
                     negative_prompt=negative_prompt,
                     num_inference_steps=num_steps,
+                    guidance_scale=guidance_scale,
                     width=width,
                     height=height,
                     generator=generator
             image = result.images[0]
+            # Resize to 512x512 for consistency if generated at higher resolution
+            if width > 512 or height > 512:
+                image = image.resize((512, 512), Image.Resampling.LANCZOS)
+                logger.info("🔄 Resized image from 1024x1024 to 512x512 for processing")
             # Convert to bytes for storage
             img_bytes = io.BytesIO()
             image.save(img_bytes, format='PNG', quality=95)
                 torch.cuda.empty_cache()
                 gc.collect()
+            logger.info("✅ Image generated successfully with Stability AI model")
             return {
                 'image_pil': image,

requirements.txt CHANGED Viewed

@@ -17,4 +17,5 @@ safetensors==0.4.2
 huggingface_hub==0.20.2
 requests==2.31.0
 trimesh==4.0.5
-scipy==1.11.4

 huggingface_hub==0.20.2
 requests==2.31.0
 trimesh==4.0.5
+scipy==1.11.4
+xformers==0.0.22