Spaces:

yusenthebot
/

sign-image-autogluon-predictor

Sleeping

File size: 12,220 Bytes

import os
import shutil
import zipfile
import pathlib
import tempfile
import gradio as gr
import pandas as pd
import PIL.Image
import huggingface_hub
from autogluon.multimodal import MultiModalPredictor

# Model configuration
MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor"
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
HF_TOKEN = os.getenv("HF_TOKEN", None)

# Local cache/extract directories
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"

def prepare_predictor_dir():
    """Download and extract the AutoGluon predictor from HuggingFace Hub"""
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    
    # Download the model zip file
    print("Downloading model from Hugging Face...")
    local_zip = huggingface_hub.hf_hub_download(
        repo_id=MODEL_REPO_ID,
        filename=ZIP_FILENAME,
        repo_type="model",
        token=HF_TOKEN,
        local_dir=str(CACHE_DIR)
    )
    
    # Clean and prepare extraction directory
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    
    # Extract the model
    print("Extracting model files...")
    with zipfile.ZipFile(local_zip, "r") as zf:
        zf.extractall(str(EXTRACT_DIR))
    
    # The predictor files are directly in EXTRACT_DIR after extraction
    return str(EXTRACT_DIR)

# Load the predictor
model_loaded = False
predictor = None
class_labels = []

try:
    PREDICTOR_DIR = prepare_predictor_dir()
    print(f"Loading predictor from: {PREDICTOR_DIR}")
    predictor = MultiModalPredictor.load(PREDICTOR_DIR)
    model_loaded = True
    print("✓ Model loaded successfully")
    
    # Try to determine the classes from the model
    try:
        # Create a dummy image to get class information
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            dummy_img_path = tmpdir / "dummy.png"
            # Create a small dummy image
            dummy_img = PIL.Image.new('RGB', (224, 224), color='white')
            dummy_img.save(dummy_img_path)
            dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]})
            
            # Get probability predictions to see the classes
            proba_df = predictor.predict_proba(dummy_df)
            class_labels = list(proba_df.columns)
            print(f"Detected classes: {class_labels}")
    except Exception as e:
        print(f"Could not determine classes: {e}")
        # Default class labels if detection fails
        class_labels = []
        
except Exception as e:
    print(f"Error loading model: {e}")
    import traceback
    traceback.print_exc()
    model_loaded = False
    predictor = None

def predict_sign(image):
    """
    Predict the sign type from an image
    
    Args:
        image: PIL Image object
    
    Returns:
        dict: Probability distribution over classes
        str: Formatted prediction result
    """
    
    # Check if image is provided
    if image is None:
        return None, "⚠️ Please upload an image of a sign to classify"
    
    # Check if model is loaded
    if not model_loaded or predictor is None:
        return None, "⚠️ Model failed to load. Please refresh the page."
    
    try:
        # Save the image temporarily for AutoGluon
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            img_path = tmpdir / "input.png"
            
            # Convert to RGB if necessary (handle RGBA images)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            
            # Save the image
            image.save(img_path)
            
            # Create DataFrame for AutoGluon (must have 'image' column with file paths)
            df = pd.DataFrame({"image": [str(img_path)]})
            
            # Get predictions
            prediction = predictor.predict(df)
            pred_class = prediction.iloc[0]
            
            # Get probabilities
            proba_df = predictor.predict_proba(df)
            
            # Create probability dictionary
            proba_dict = {}
            for col in proba_df.columns:
                # Use the column name as the label
                label = str(col)
                proba_dict[label] = float(proba_df.iloc[0][col])
            
            # Sort by probability (highest first)
            proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
            
            # Get top predictions for display
            top_5 = list(proba_dict.items())[:5]
            
            # Format the prediction result
            pred_label = str(pred_class)
            confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100
            
            # Create formatted output
            result_text = f"## 🚦 Sign Classification Result\n\n"
            result_text += f"### **Predicted Sign: {pred_label}**\n\n"
            result_text += f"**Confidence:** {confidence:.1f}%\n\n"
            
            # Add confidence level interpretation
            if confidence > 80:
                result_text += "✅ **High Confidence** - Clear sign detection\n\n"
            elif confidence > 50:
                result_text += "⚠️ **Medium Confidence** - Sign detected but may need verification\n\n"
            else:
                result_text += "❌ **Low Confidence** - Unclear or ambiguous sign\n\n"
            
            # Show top predictions
            if len(top_5) > 1:
                result_text += "### Top 5 Predictions:\n"
                for i, (label, prob) in enumerate(top_5, 1):
                    bar_length = int(prob * 20)
                    bar = '█' * bar_length + '░' * (20 - bar_length)
                    result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n"
            
            # Add interpretation guide
            result_text += "\n### 📊 Image Properties:\n"
            result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n"
            result_text += f"- **Mode:** {image.mode}\n"
            
            return proba_dict, result_text
            
    except Exception as e:
        import traceback
        error_detail = traceback.format_exc()
        print(f"Error in prediction: {error_detail}")
        return None, f"⚠️ Error making prediction:\n```\n{str(e)}\n```"

# Example images - using local files
EXAMPLES = [
    ["asl1.jpg"],
    ["asl2.jpg"],
    ["asl3.jpg"]
]

# Create Gradio interface
with gr.Blocks(
    title="Sign Identification System",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        font-family: 'Arial', sans-serif;
    }
    .title {
        text-align: center;
    }
    .confidence-high { color: green; }
    .confidence-medium { color: orange; }
    .confidence-low { color: red; }
    """
) as demo:
    
    # Header
    gr.Markdown("""
    # 🚦 Sign Identification System
    
    ## About This Application
    This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset 
    to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone
    for accurate sign recognition.
    
    ### Model Performance
    - **Accuracy:** 0.9444
    - **Weighted F1 Score:** 0.9508

    ### Limitations
    - **Currently only have 2 classes (Stop and Speed limit)
    
    ### How to Use
    1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign
    2. **Take a Photo**: Use your webcam to capture a sign directly
    3. **Paste from Clipboard**: Paste an image directly
    4. **View Results**: The model will identify the sign type with confidence scores
    5. **Try Examples**: Click on the example images below to test the model
    """)
    
    gr.Markdown("---")
    
    # Model status indicator
    if model_loaded:
        status_text = "✅ **Model Status:** Ready for predictions"
        if class_labels:
            status_text += f"\n\n**Number of classes:** {len(class_labels)}"
        gr.Markdown(status_text)
    else:
        gr.Markdown("❌ **Model Status:** Failed to load - please refresh the page")
    
    # Main interface
    with gr.Row():
        with gr.Column(scale=1):
            # Image input
            image_input = gr.Image(
                type="pil",
                label="Upload Sign Image",
                sources=["upload", "webcam", "clipboard"],
                height=400
            )
            
            with gr.Row():
                # Prediction button
                predict_btn = gr.Button(
                    "🔍 Identify Sign",
                    variant="primary",
                    size="lg",
                    scale=2
                )
                
                # Clear button
                clear_btn = gr.Button(
                    "🔄 Clear",
                    variant="secondary",
                    scale=1
                )
        
        with gr.Column(scale=1):
            # Results display
            result_text = gr.Markdown(
                label="Recognition Result",
                value="### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
            )
            
            # Probability distribution
            proba_output = gr.Label(
                label="Class Probabilities",
                num_top_classes=10,
                show_label=True
            )
    
    # Examples section
    gr.Markdown("---")
    gr.Markdown("### 📸 Example Images")
    gr.Markdown("Click on any example below to test the model with sample sign images:")
    
    gr.Examples(
        examples=EXAMPLES,
        inputs=[image_input],
        outputs=[proba_output, result_text],
        fn=predict_sign,
        label="Sample Sign Images",
        examples_per_page=3,
        cache_examples=False
    )
    
    # Additional information
    gr.Markdown("---")
    gr.Markdown("""
    ### 📊 Model Information
    
    **Technical Details:**
    - **Model Type**: AutoGluon MultiModal Predictor
    - **Architecture**: ResNet18 (timm_image backbone)
    - **Training**: Medium quality preset with AutoGluon
    - **Dataset**: ecopus/sign_identification (augmented split for training)
    - **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor)
    
    **Training Configuration:**
    - **Problem Type**: Multi-class Classification
    - **Evaluation Metric**: Accuracy
    - **Preprocessing**: AutoGluon automatic preprocessing
    - **Data Augmentation**: Applied during training
    
    **Files in Repository:**
    - `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory
    - `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format)
    
    ### 🎯 Best Practices
    - Use clear, well-focused images
    - Ensure the sign is fully visible in frame
    - Good lighting improves accuracy
    - Avoid excessive blur or distortion
    - Center the sign in the image
    
    ### ⚠️ Limitations
    - Trained on specific sign dataset
    - Performance may vary with image quality
    - Best suited for single sign per image
    - May struggle with heavily obscured signs
    
    ---
    *Created for CMU 24-679 Course | Powered by AutoGluon & Gradio*
    """)
    
    # Connect functions to UI elements
    predict_btn.click(
        fn=predict_sign,
        inputs=[image_input],
        outputs=[proba_output, result_text]
    )
    
    # Auto-predict on image change
    image_input.change(
        fn=predict_sign,
        inputs=[image_input],
        outputs=[proba_output, result_text]
    )
    
    # Clear function
    def clear_interface():
        return None, None, "### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
    
    clear_btn.click(
        fn=clear_interface,
        outputs=[image_input, proba_output, result_text]
    )

if __name__ == "__main__":
    demo.launch()