Spaces:

yusenthebot
/

sign-image-autogluon-predictor

Sleeping

App Files Files Community

sign-image-autogluon-predictor / app.py

yusenthebot

Update app.py

92679f8 verified 3 months ago

raw

history blame contribute delete

12.2 kB

	import os
	import shutil
	import zipfile
	import pathlib
	import tempfile
	import gradio as gr
	import pandas as pd
	import PIL.Image
	import huggingface_hub
	from autogluon.multimodal import MultiModalPredictor

	# Model configuration
	MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor"
	ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
	HF_TOKEN = os.getenv("HF_TOKEN", None)

	# Local cache/extract directories
	CACHE_DIR = pathlib.Path("hf_assets")
	EXTRACT_DIR = CACHE_DIR / "predictor_native"

	def prepare_predictor_dir():
	"""Download and extract the AutoGluon predictor from HuggingFace Hub"""
	CACHE_DIR.mkdir(parents=True, exist_ok=True)

	# Download the model zip file
	print("Downloading model from Hugging Face...")
	local_zip = huggingface_hub.hf_hub_download(
	repo_id=MODEL_REPO_ID,
	filename=ZIP_FILENAME,
	repo_type="model",
	token=HF_TOKEN,
	local_dir=str(CACHE_DIR)
	)

	# Clean and prepare extraction directory
	if EXTRACT_DIR.exists():
	shutil.rmtree(EXTRACT_DIR)
	EXTRACT_DIR.mkdir(parents=True, exist_ok=True)

	# Extract the model
	print("Extracting model files...")
	with zipfile.ZipFile(local_zip, "r") as zf:
	zf.extractall(str(EXTRACT_DIR))

	# The predictor files are directly in EXTRACT_DIR after extraction
	return str(EXTRACT_DIR)

	# Load the predictor
	model_loaded = False
	predictor = None
	class_labels = []

	try:
	PREDICTOR_DIR = prepare_predictor_dir()
	print(f"Loading predictor from: {PREDICTOR_DIR}")
	predictor = MultiModalPredictor.load(PREDICTOR_DIR)
	model_loaded = True
	print("✓ Model loaded successfully")

	# Try to determine the classes from the model
	try:
	# Create a dummy image to get class information
	with tempfile.TemporaryDirectory() as tmpdir:
	tmpdir = pathlib.Path(tmpdir)
	dummy_img_path = tmpdir / "dummy.png"
	# Create a small dummy image
	dummy_img = PIL.Image.new('RGB', (224, 224), color='white')
	dummy_img.save(dummy_img_path)
	dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]})

	# Get probability predictions to see the classes
	proba_df = predictor.predict_proba(dummy_df)
	class_labels = list(proba_df.columns)
	print(f"Detected classes: {class_labels}")
	except Exception as e:
	print(f"Could not determine classes: {e}")
	# Default class labels if detection fails
	class_labels = []

	except Exception as e:
	print(f"Error loading model: {e}")
	import traceback
	traceback.print_exc()
	model_loaded = False
	predictor = None

	def predict_sign(image):
	"""
	Predict the sign type from an image

	Args:
	image: PIL Image object

	Returns:
	dict: Probability distribution over classes
	str: Formatted prediction result
	"""

	# Check if image is provided
	if image is None:
	return None, "⚠️ Please upload an image of a sign to classify"

	# Check if model is loaded
	if not model_loaded or predictor is None:
	return None, "⚠️ Model failed to load. Please refresh the page."

	try:
	# Save the image temporarily for AutoGluon
	with tempfile.TemporaryDirectory() as tmpdir:
	tmpdir = pathlib.Path(tmpdir)
	img_path = tmpdir / "input.png"

	# Convert to RGB if necessary (handle RGBA images)
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Save the image
	image.save(img_path)

	# Create DataFrame for AutoGluon (must have 'image' column with file paths)
	df = pd.DataFrame({"image": [str(img_path)]})

	# Get predictions
	prediction = predictor.predict(df)
	pred_class = prediction.iloc[0]

	# Get probabilities
	proba_df = predictor.predict_proba(df)

	# Create probability dictionary
	proba_dict = {}
	for col in proba_df.columns:
	# Use the column name as the label
	label = str(col)
	proba_dict[label] = float(proba_df.iloc[0][col])

	# Sort by probability (highest first)
	proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))

	# Get top predictions for display
	top_5 = list(proba_dict.items())[:5]

	# Format the prediction result
	pred_label = str(pred_class)
	confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100

	# Create formatted output
	result_text = f"## 🚦 Sign Classification Result\n\n"
	result_text += f"### Predicted Sign: {pred_label}\n\n"
	result_text += f"Confidence: {confidence:.1f}%\n\n"

	# Add confidence level interpretation
	if confidence > 80:
	result_text += "✅ High Confidence - Clear sign detection\n\n"
	elif confidence > 50:
	result_text += "⚠️ Medium Confidence - Sign detected but may need verification\n\n"
	else:
	result_text += "❌ Low Confidence - Unclear or ambiguous sign\n\n"

	# Show top predictions
	if len(top_5) > 1:
	result_text += "### Top 5 Predictions:\n"
	for i, (label, prob) in enumerate(top_5, 1):
	bar_length = int(prob * 20)
	bar = '█' * bar_length + '░' * (20 - bar_length)
	result_text += f"{i}. {label}: {bar} {prob*100:.1f}%\n"

	# Add interpretation guide
	result_text += "\n### 📊 Image Properties:\n"
	result_text += f"- Image Size: {image.size[0]}x{image.size[1]} pixels\n"
	result_text += f"- Mode: {image.mode}\n"

	return proba_dict, result_text

	except Exception as e:
	import traceback
	error_detail = traceback.format_exc()
	print(f"Error in prediction: {error_detail}")
	return None, f"⚠️ Error making prediction:\n```\n{str(e)}\n```"

	# Example images - using local files
	EXAMPLES = [
	["asl1.jpg"],
	["asl2.jpg"],
	["asl3.jpg"]
	]

	# Create Gradio interface
	with gr.Blocks(
	title="Sign Identification System",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	font-family: 'Arial', sans-serif;
	}
	.title {
	text-align: center;
	}
	.confidence-high { color: green; }
	.confidence-medium { color: orange; }
	.confidence-low { color: red; }
	"""
	) as demo:

	# Header
	gr.Markdown("""
	# 🚦 Sign Identification System

	## About This Application
	This application uses an AutoGluon MultiModal Predictor trained on the ecopus/sign_identification dataset
	to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone
	for accurate sign recognition.

	### Model Performance
	- Accuracy: 0.9444
	- Weighted F1 Score: 0.9508

	### Limitations
	- **Currently only have 2 classes (Stop and Speed limit)

	### How to Use
	1. Upload an Image: Click to upload or drag-and-drop an image of a sign
	2. Take a Photo: Use your webcam to capture a sign directly
	3. Paste from Clipboard: Paste an image directly
	4. View Results: The model will identify the sign type with confidence scores
	5. Try Examples: Click on the example images below to test the model
	""")

	gr.Markdown("---")

	# Model status indicator
	if model_loaded:
	status_text = "✅ Model Status: Ready for predictions"
	if class_labels:
	status_text += f"\n\nNumber of classes: {len(class_labels)}"
	gr.Markdown(status_text)
	else:
	gr.Markdown("❌ Model Status: Failed to load - please refresh the page")

	# Main interface
	with gr.Row():
	with gr.Column(scale=1):
	# Image input
	image_input = gr.Image(
	type="pil",
	label="Upload Sign Image",
	sources=["upload", "webcam", "clipboard"],
	height=400
	)

	with gr.Row():
	# Prediction button
	predict_btn = gr.Button(
	"🔍 Identify Sign",
	variant="primary",
	size="lg",
	scale=2
	)

	# Clear button
	clear_btn = gr.Button(
	"🔄 Clear",
	variant="secondary",
	scale=1
	)

	with gr.Column(scale=1):
	# Results display
	result_text = gr.Markdown(
	label="Recognition Result",
	value="### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
	)

	# Probability distribution
	proba_output = gr.Label(
	label="Class Probabilities",
	num_top_classes=10,
	show_label=True
	)

	# Examples section
	gr.Markdown("---")
	gr.Markdown("### 📸 Example Images")
	gr.Markdown("Click on any example below to test the model with sample sign images:")

	gr.Examples(
	examples=EXAMPLES,
	inputs=[image_input],
	outputs=[proba_output, result_text],
	fn=predict_sign,
	label="Sample Sign Images",
	examples_per_page=3,
	cache_examples=False
	)

	# Additional information
	gr.Markdown("---")
	gr.Markdown("""
	### 📊 Model Information

	Technical Details:
	- Model Type: AutoGluon MultiModal Predictor
	- Architecture: ResNet18 (timm_image backbone)
	- Training: Medium quality preset with AutoGluon
	- Dataset: ecopus/sign_identification (augmented split for training)
	- Source: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor)

	Training Configuration:
	- Problem Type: Multi-class Classification
	- Evaluation Metric: Accuracy
	- Preprocessing: AutoGluon automatic preprocessing
	- Data Augmentation: Applied during training

	Files in Repository:
	- `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory
	- `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format)

	### 🎯 Best Practices
	- Use clear, well-focused images
	- Ensure the sign is fully visible in frame
	- Good lighting improves accuracy
	- Avoid excessive blur or distortion
	- Center the sign in the image

	### ⚠️ Limitations
	- Trained on specific sign dataset
	- Performance may vary with image quality
	- Best suited for single sign per image
	- May struggle with heavily obscured signs

	---
	Created for CMU 24-679 Course \| Powered by AutoGluon & Gradio
	""")

	# Connect functions to UI elements
	predict_btn.click(
	fn=predict_sign,
	inputs=[image_input],
	outputs=[proba_output, result_text]
	)

	# Auto-predict on image change
	image_input.change(
	fn=predict_sign,
	inputs=[image_input],
	outputs=[proba_output, result_text]
	)

	# Clear function
	def clear_interface():
	return None, None, "### 📤 Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."

	clear_btn.click(
	fn=clear_interface,
	outputs=[image_input, proba_output, result_text]
	)

	if __name__ == "__main__":
	demo.launch()