yusenthebot's picture
Update app.py
92679f8 verified
import os
import shutil
import zipfile
import pathlib
import tempfile
import gradio as gr
import pandas as pd
import PIL.Image
import huggingface_hub
from autogluon.multimodal import MultiModalPredictor
# Model configuration
MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor"
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
HF_TOKEN = os.getenv("HF_TOKEN", None)
# Local cache/extract directories
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
def prepare_predictor_dir():
"""Download and extract the AutoGluon predictor from HuggingFace Hub"""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# Download the model zip file
print("Downloading model from Hugging Face...")
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
token=HF_TOKEN,
local_dir=str(CACHE_DIR)
)
# Clean and prepare extraction directory
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
# Extract the model
print("Extracting model files...")
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
# The predictor files are directly in EXTRACT_DIR after extraction
return str(EXTRACT_DIR)
# Load the predictor
model_loaded = False
predictor = None
class_labels = []
try:
PREDICTOR_DIR = prepare_predictor_dir()
print(f"Loading predictor from: {PREDICTOR_DIR}")
predictor = MultiModalPredictor.load(PREDICTOR_DIR)
model_loaded = True
print("βœ“ Model loaded successfully")
# Try to determine the classes from the model
try:
# Create a dummy image to get class information
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
dummy_img_path = tmpdir / "dummy.png"
# Create a small dummy image
dummy_img = PIL.Image.new('RGB', (224, 224), color='white')
dummy_img.save(dummy_img_path)
dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]})
# Get probability predictions to see the classes
proba_df = predictor.predict_proba(dummy_df)
class_labels = list(proba_df.columns)
print(f"Detected classes: {class_labels}")
except Exception as e:
print(f"Could not determine classes: {e}")
# Default class labels if detection fails
class_labels = []
except Exception as e:
print(f"Error loading model: {e}")
import traceback
traceback.print_exc()
model_loaded = False
predictor = None
def predict_sign(image):
"""
Predict the sign type from an image
Args:
image: PIL Image object
Returns:
dict: Probability distribution over classes
str: Formatted prediction result
"""
# Check if image is provided
if image is None:
return None, "⚠️ Please upload an image of a sign to classify"
# Check if model is loaded
if not model_loaded or predictor is None:
return None, "⚠️ Model failed to load. Please refresh the page."
try:
# Save the image temporarily for AutoGluon
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
img_path = tmpdir / "input.png"
# Convert to RGB if necessary (handle RGBA images)
if image.mode != 'RGB':
image = image.convert('RGB')
# Save the image
image.save(img_path)
# Create DataFrame for AutoGluon (must have 'image' column with file paths)
df = pd.DataFrame({"image": [str(img_path)]})
# Get predictions
prediction = predictor.predict(df)
pred_class = prediction.iloc[0]
# Get probabilities
proba_df = predictor.predict_proba(df)
# Create probability dictionary
proba_dict = {}
for col in proba_df.columns:
# Use the column name as the label
label = str(col)
proba_dict[label] = float(proba_df.iloc[0][col])
# Sort by probability (highest first)
proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
# Get top predictions for display
top_5 = list(proba_dict.items())[:5]
# Format the prediction result
pred_label = str(pred_class)
confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100
# Create formatted output
result_text = f"## 🚦 Sign Classification Result\n\n"
result_text += f"### **Predicted Sign: {pred_label}**\n\n"
result_text += f"**Confidence:** {confidence:.1f}%\n\n"
# Add confidence level interpretation
if confidence > 80:
result_text += "βœ… **High Confidence** - Clear sign detection\n\n"
elif confidence > 50:
result_text += "⚠️ **Medium Confidence** - Sign detected but may need verification\n\n"
else:
result_text += "❌ **Low Confidence** - Unclear or ambiguous sign\n\n"
# Show top predictions
if len(top_5) > 1:
result_text += "### Top 5 Predictions:\n"
for i, (label, prob) in enumerate(top_5, 1):
bar_length = int(prob * 20)
bar = 'β–ˆ' * bar_length + 'β–‘' * (20 - bar_length)
result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n"
# Add interpretation guide
result_text += "\n### πŸ“Š Image Properties:\n"
result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n"
result_text += f"- **Mode:** {image.mode}\n"
return proba_dict, result_text
except Exception as e:
import traceback
error_detail = traceback.format_exc()
print(f"Error in prediction: {error_detail}")
return None, f"⚠️ Error making prediction:\n```\n{str(e)}\n```"
# Example images - using local files
EXAMPLES = [
["asl1.jpg"],
["asl2.jpg"],
["asl3.jpg"]
]
# Create Gradio interface
with gr.Blocks(
title="Sign Identification System",
theme=gr.themes.Soft(),
css="""
.gradio-container {
font-family: 'Arial', sans-serif;
}
.title {
text-align: center;
}
.confidence-high { color: green; }
.confidence-medium { color: orange; }
.confidence-low { color: red; }
"""
) as demo:
# Header
gr.Markdown("""
# 🚦 Sign Identification System
## About This Application
This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset
to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone
for accurate sign recognition.
### Model Performance
- **Accuracy:** 0.9444
- **Weighted F1 Score:** 0.9508
### Limitations
- **Currently only have 2 classes (Stop and Speed limit)
### How to Use
1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign
2. **Take a Photo**: Use your webcam to capture a sign directly
3. **Paste from Clipboard**: Paste an image directly
4. **View Results**: The model will identify the sign type with confidence scores
5. **Try Examples**: Click on the example images below to test the model
""")
gr.Markdown("---")
# Model status indicator
if model_loaded:
status_text = "βœ… **Model Status:** Ready for predictions"
if class_labels:
status_text += f"\n\n**Number of classes:** {len(class_labels)}"
gr.Markdown(status_text)
else:
gr.Markdown("❌ **Model Status:** Failed to load - please refresh the page")
# Main interface
with gr.Row():
with gr.Column(scale=1):
# Image input
image_input = gr.Image(
type="pil",
label="Upload Sign Image",
sources=["upload", "webcam", "clipboard"],
height=400
)
with gr.Row():
# Prediction button
predict_btn = gr.Button(
"πŸ” Identify Sign",
variant="primary",
size="lg",
scale=2
)
# Clear button
clear_btn = gr.Button(
"πŸ”„ Clear",
variant="secondary",
scale=1
)
with gr.Column(scale=1):
# Results display
result_text = gr.Markdown(
label="Recognition Result",
value="### πŸ“€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
)
# Probability distribution
proba_output = gr.Label(
label="Class Probabilities",
num_top_classes=10,
show_label=True
)
# Examples section
gr.Markdown("---")
gr.Markdown("### πŸ“Έ Example Images")
gr.Markdown("Click on any example below to test the model with sample sign images:")
gr.Examples(
examples=EXAMPLES,
inputs=[image_input],
outputs=[proba_output, result_text],
fn=predict_sign,
label="Sample Sign Images",
examples_per_page=3,
cache_examples=False
)
# Additional information
gr.Markdown("---")
gr.Markdown("""
### πŸ“Š Model Information
**Technical Details:**
- **Model Type**: AutoGluon MultiModal Predictor
- **Architecture**: ResNet18 (timm_image backbone)
- **Training**: Medium quality preset with AutoGluon
- **Dataset**: ecopus/sign_identification (augmented split for training)
- **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor)
**Training Configuration:**
- **Problem Type**: Multi-class Classification
- **Evaluation Metric**: Accuracy
- **Preprocessing**: AutoGluon automatic preprocessing
- **Data Augmentation**: Applied during training
**Files in Repository:**
- `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory
- `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format)
### 🎯 Best Practices
- Use clear, well-focused images
- Ensure the sign is fully visible in frame
- Good lighting improves accuracy
- Avoid excessive blur or distortion
- Center the sign in the image
### ⚠️ Limitations
- Trained on specific sign dataset
- Performance may vary with image quality
- Best suited for single sign per image
- May struggle with heavily obscured signs
---
*Created for CMU 24-679 Course | Powered by AutoGluon & Gradio*
""")
# Connect functions to UI elements
predict_btn.click(
fn=predict_sign,
inputs=[image_input],
outputs=[proba_output, result_text]
)
# Auto-predict on image change
image_input.change(
fn=predict_sign,
inputs=[image_input],
outputs=[proba_output, result_text]
)
# Clear function
def clear_interface():
return None, None, "### πŸ“€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
clear_btn.click(
fn=clear_interface,
outputs=[image_input, proba_output, result_text]
)
if __name__ == "__main__":
demo.launch()