import os import shutil import zipfile import pathlib import tempfile import gradio as gr import pandas as pd import PIL.Image import huggingface_hub from autogluon.multimodal import MultiModalPredictor # Model configuration MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor" ZIP_FILENAME = "autogluon_image_predictor_dir.zip" HF_TOKEN = os.getenv("HF_TOKEN", None) # Local cache/extract directories CACHE_DIR = pathlib.Path("hf_assets") EXTRACT_DIR = CACHE_DIR / "predictor_native" def prepare_predictor_dir(): """Download and extract the AutoGluon predictor from HuggingFace Hub""" CACHE_DIR.mkdir(parents=True, exist_ok=True) # Download the model zip file print("Downloading model from Hugging Face...") local_zip = huggingface_hub.hf_hub_download( repo_id=MODEL_REPO_ID, filename=ZIP_FILENAME, repo_type="model", token=HF_TOKEN, local_dir=str(CACHE_DIR) ) # Clean and prepare extraction directory if EXTRACT_DIR.exists(): shutil.rmtree(EXTRACT_DIR) EXTRACT_DIR.mkdir(parents=True, exist_ok=True) # Extract the model print("Extracting model files...") with zipfile.ZipFile(local_zip, "r") as zf: zf.extractall(str(EXTRACT_DIR)) # The predictor files are directly in EXTRACT_DIR after extraction return str(EXTRACT_DIR) # Load the predictor model_loaded = False predictor = None class_labels = [] try: PREDICTOR_DIR = prepare_predictor_dir() print(f"Loading predictor from: {PREDICTOR_DIR}") predictor = MultiModalPredictor.load(PREDICTOR_DIR) model_loaded = True print("✓ Model loaded successfully") # Try to determine the classes from the model try: # Create a dummy image to get class information with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) dummy_img_path = tmpdir / "dummy.png" # Create a small dummy image dummy_img = PIL.Image.new('RGB', (224, 224), color='white') dummy_img.save(dummy_img_path) dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]}) # Get probability predictions to see the classes proba_df = predictor.predict_proba(dummy_df) class_labels = list(proba_df.columns) print(f"Detected classes: {class_labels}") except Exception as e: print(f"Could not determine classes: {e}") # Default class labels if detection fails class_labels = [] except Exception as e: print(f"Error loading model: {e}") import traceback traceback.print_exc() model_loaded = False predictor = None def predict_sign(image): """ Predict the sign type from an image Args: image: PIL Image object Returns: dict: Probability distribution over classes str: Formatted prediction result """ # Check if image is provided if image is None: return None, "⚠️ Please upload an image of a sign to classify" # Check if model is loaded if not model_loaded or predictor is None: return None, "⚠️ Model failed to load. Please refresh the page." try: # Save the image temporarily for AutoGluon with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) img_path = tmpdir / "input.png" # Convert to RGB if necessary (handle RGBA images) if image.mode != 'RGB': image = image.convert('RGB') # Save the image image.save(img_path) # Create DataFrame for AutoGluon (must have 'image' column with file paths) df = pd.DataFrame({"image": [str(img_path)]}) # Get predictions prediction = predictor.predict(df) pred_class = prediction.iloc[0] # Get probabilities proba_df = predictor.predict_proba(df) # Create probability dictionary proba_dict = {} for col in proba_df.columns: # Use the column name as the label label = str(col) proba_dict[label] = float(proba_df.iloc[0][col]) # Sort by probability (highest first) proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True)) # Get top predictions for display top_5 = list(proba_dict.items())[:5] # Format the prediction result pred_label = str(pred_class) confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100 # Create formatted output result_text = f"## 🚦 Sign Classification Result\n\n" result_text += f"### **Predicted Sign: {pred_label}**\n\n" result_text += f"**Confidence:** {confidence:.1f}%\n\n" # Add confidence level interpretation if confidence > 80: result_text += "✅ **High Confidence** - Clear sign detection\n\n" elif confidence > 50: result_text += "⚠️ **Medium Confidence** - Sign detected but may need verification\n\n" else: result_text += "❌ **Low Confidence** - Unclear or ambiguous sign\n\n" # Show top predictions if len(top_5) > 1: result_text += "### Top 5 Predictions:\n" for i, (label, prob) in enumerate(top_5, 1): bar_length = int(prob * 20) bar = '█' * bar_length + '░' * (20 - bar_length) result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n" # Add interpretation guide result_text += "\n### 📊 Image Properties:\n" result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n" result_text += f"- **Mode:** {image.mode}\n" return proba_dict, result_text except Exception as e: import traceback error_detail = traceback.format_exc() print(f"Error in prediction: {error_detail}") return None, f"⚠️ Error making prediction:\n```\n{str(e)}\n```" # Example images - using local files EXAMPLES = [ ["asl1.jpg"], ["asl2.jpg"], ["asl3.jpg"] ] # Create Gradio interface with gr.Blocks( title="Sign Identification System", theme=gr.themes.Soft(), css=""" .gradio-container { font-family: 'Arial', sans-serif; } .title { text-align: center; } .confidence-high { color: green; } .confidence-medium { color: orange; } .confidence-low { color: red; } """ ) as demo: # Header gr.Markdown(""" # 🚦 Sign Identification System ## About This Application This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone for accurate sign recognition. ### Model Performance - **Accuracy:** 0.9444 - **Weighted F1 Score:** 0.9508 ### Limitations - **Currently only have 2 classes (Stop and Speed limit) ### How to Use 1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign 2. **Take a Photo**: Use your webcam to capture a sign directly 3. **Paste from Clipboard**: Paste an image directly 4. **View Results**: The model will identify the sign type with confidence scores 5. **Try Examples**: Click on the example images below to test the model """) gr.Markdown("---") # Model status indicator if model_loaded: status_text = "✅ **Model Status:** Ready for predictions" if class_labels: status_text += f"\n\n**Number of classes:** {len(class_labels)}" gr.Markdown(status_text) else: gr.Markdown("❌ **Model Status:** Failed to load - please refresh the page") # Main interface with gr.Row(): with gr.Column(scale=1): # Image input image_input = gr.Image( type="pil", label="Upload Sign Image", sources=["upload", "webcam", "clipboard"], height=400 ) with gr.Row(): # Prediction button predict_btn = gr.Button( "🔍 Identify Sign", variant="primary", size="lg", scale=2 ) # Clear button clear_btn = gr.Button( "🔄 Clear", variant="secondary", scale=1 ) with gr.Column(scale=1): # Results display result_text = gr.Markdown( label="Recognition Result", value="### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores." ) # Probability distribution proba_output = gr.Label( label="Class Probabilities", num_top_classes=10, show_label=True ) # Examples section gr.Markdown("---") gr.Markdown("### 📸 Example Images") gr.Markdown("Click on any example below to test the model with sample sign images:") gr.Examples( examples=EXAMPLES, inputs=[image_input], outputs=[proba_output, result_text], fn=predict_sign, label="Sample Sign Images", examples_per_page=3, cache_examples=False ) # Additional information gr.Markdown("---") gr.Markdown(""" ### 📊 Model Information **Technical Details:** - **Model Type**: AutoGluon MultiModal Predictor - **Architecture**: ResNet18 (timm_image backbone) - **Training**: Medium quality preset with AutoGluon - **Dataset**: ecopus/sign_identification (augmented split for training) - **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor) **Training Configuration:** - **Problem Type**: Multi-class Classification - **Evaluation Metric**: Accuracy - **Preprocessing**: AutoGluon automatic preprocessing - **Data Augmentation**: Applied during training **Files in Repository:** - `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory - `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format) ### 🎯 Best Practices - Use clear, well-focused images - Ensure the sign is fully visible in frame - Good lighting improves accuracy - Avoid excessive blur or distortion - Center the sign in the image ### ⚠️ Limitations - Trained on specific sign dataset - Performance may vary with image quality - Best suited for single sign per image - May struggle with heavily obscured signs --- *Created for CMU 24-679 Course | Powered by AutoGluon & Gradio* """) # Connect functions to UI elements predict_btn.click( fn=predict_sign, inputs=[image_input], outputs=[proba_output, result_text] ) # Auto-predict on image change image_input.change( fn=predict_sign, inputs=[image_input], outputs=[proba_output, result_text] ) # Clear function def clear_interface(): return None, None, "### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores." clear_btn.click( fn=clear_interface, outputs=[image_input, proba_output, result_text] ) if __name__ == "__main__": demo.launch()