File size: 12,220 Bytes
a578f34 61733d4 a578f34 61733d4 45df97d 61733d4 1a33a3d 45df97d 61733d4 45df97d 61733d4 45df97d 61733d4 1a33a3d 45df97d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 491944e 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 45df97d 1a33a3d 61733d4 45df97d 61733d4 45df97d 61733d4 2c5d905 61733d4 45df97d 1a33a3d 61733d4 45df97d 61733d4 45df97d 61733d4 45df97d 61733d4 1a33a3d 45df97d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 45df97d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 45df97d 61733d4 45df97d 61733d4 45df97d 61733d4 a578f34 1a33a3d a578f34 61733d4 1a33a3d a578f34 61733d4 a578f34 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 92679f8 61733d4 1a33a3d 61733d4 a578f34 61733d4 1a33a3d a578f34 1a33a3d 61733d4 a578f34 61733d4 a578f34 1a33a3d a578f34 61733d4 a578f34 61733d4 1a33a3d 61733d4 a578f34 61733d4 a578f34 61733d4 1a33a3d a578f34 61733d4 a578f34 1a33a3d a578f34 61733d4 a578f34 61733d4 a578f34 1a33a3d 61733d4 a578f34 1a33a3d 61733d4 a578f34 61733d4 a578f34 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 1a33a3d 61733d4 a578f34 1a33a3d a578f34 61733d4 a578f34 61733d4 a578f34 1a33a3d a578f34 61733d4 a578f34 61733d4 1a33a3d 61733d4 a578f34 61733d4 a578f34 61733d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 |
import os
import shutil
import zipfile
import pathlib
import tempfile
import gradio as gr
import pandas as pd
import PIL.Image
import huggingface_hub
from autogluon.multimodal import MultiModalPredictor
# Model configuration
MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor"
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
HF_TOKEN = os.getenv("HF_TOKEN", None)
# Local cache/extract directories
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"
def prepare_predictor_dir():
"""Download and extract the AutoGluon predictor from HuggingFace Hub"""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# Download the model zip file
print("Downloading model from Hugging Face...")
local_zip = huggingface_hub.hf_hub_download(
repo_id=MODEL_REPO_ID,
filename=ZIP_FILENAME,
repo_type="model",
token=HF_TOKEN,
local_dir=str(CACHE_DIR)
)
# Clean and prepare extraction directory
if EXTRACT_DIR.exists():
shutil.rmtree(EXTRACT_DIR)
EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
# Extract the model
print("Extracting model files...")
with zipfile.ZipFile(local_zip, "r") as zf:
zf.extractall(str(EXTRACT_DIR))
# The predictor files are directly in EXTRACT_DIR after extraction
return str(EXTRACT_DIR)
# Load the predictor
model_loaded = False
predictor = None
class_labels = []
try:
PREDICTOR_DIR = prepare_predictor_dir()
print(f"Loading predictor from: {PREDICTOR_DIR}")
predictor = MultiModalPredictor.load(PREDICTOR_DIR)
model_loaded = True
print("β Model loaded successfully")
# Try to determine the classes from the model
try:
# Create a dummy image to get class information
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
dummy_img_path = tmpdir / "dummy.png"
# Create a small dummy image
dummy_img = PIL.Image.new('RGB', (224, 224), color='white')
dummy_img.save(dummy_img_path)
dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]})
# Get probability predictions to see the classes
proba_df = predictor.predict_proba(dummy_df)
class_labels = list(proba_df.columns)
print(f"Detected classes: {class_labels}")
except Exception as e:
print(f"Could not determine classes: {e}")
# Default class labels if detection fails
class_labels = []
except Exception as e:
print(f"Error loading model: {e}")
import traceback
traceback.print_exc()
model_loaded = False
predictor = None
def predict_sign(image):
"""
Predict the sign type from an image
Args:
image: PIL Image object
Returns:
dict: Probability distribution over classes
str: Formatted prediction result
"""
# Check if image is provided
if image is None:
return None, "β οΈ Please upload an image of a sign to classify"
# Check if model is loaded
if not model_loaded or predictor is None:
return None, "β οΈ Model failed to load. Please refresh the page."
try:
# Save the image temporarily for AutoGluon
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
img_path = tmpdir / "input.png"
# Convert to RGB if necessary (handle RGBA images)
if image.mode != 'RGB':
image = image.convert('RGB')
# Save the image
image.save(img_path)
# Create DataFrame for AutoGluon (must have 'image' column with file paths)
df = pd.DataFrame({"image": [str(img_path)]})
# Get predictions
prediction = predictor.predict(df)
pred_class = prediction.iloc[0]
# Get probabilities
proba_df = predictor.predict_proba(df)
# Create probability dictionary
proba_dict = {}
for col in proba_df.columns:
# Use the column name as the label
label = str(col)
proba_dict[label] = float(proba_df.iloc[0][col])
# Sort by probability (highest first)
proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
# Get top predictions for display
top_5 = list(proba_dict.items())[:5]
# Format the prediction result
pred_label = str(pred_class)
confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100
# Create formatted output
result_text = f"## π¦ Sign Classification Result\n\n"
result_text += f"### **Predicted Sign: {pred_label}**\n\n"
result_text += f"**Confidence:** {confidence:.1f}%\n\n"
# Add confidence level interpretation
if confidence > 80:
result_text += "β
**High Confidence** - Clear sign detection\n\n"
elif confidence > 50:
result_text += "β οΈ **Medium Confidence** - Sign detected but may need verification\n\n"
else:
result_text += "β **Low Confidence** - Unclear or ambiguous sign\n\n"
# Show top predictions
if len(top_5) > 1:
result_text += "### Top 5 Predictions:\n"
for i, (label, prob) in enumerate(top_5, 1):
bar_length = int(prob * 20)
bar = 'β' * bar_length + 'β' * (20 - bar_length)
result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n"
# Add interpretation guide
result_text += "\n### π Image Properties:\n"
result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n"
result_text += f"- **Mode:** {image.mode}\n"
return proba_dict, result_text
except Exception as e:
import traceback
error_detail = traceback.format_exc()
print(f"Error in prediction: {error_detail}")
return None, f"β οΈ Error making prediction:\n```\n{str(e)}\n```"
# Example images - using local files
EXAMPLES = [
["asl1.jpg"],
["asl2.jpg"],
["asl3.jpg"]
]
# Create Gradio interface
with gr.Blocks(
title="Sign Identification System",
theme=gr.themes.Soft(),
css="""
.gradio-container {
font-family: 'Arial', sans-serif;
}
.title {
text-align: center;
}
.confidence-high { color: green; }
.confidence-medium { color: orange; }
.confidence-low { color: red; }
"""
) as demo:
# Header
gr.Markdown("""
# π¦ Sign Identification System
## About This Application
This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset
to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone
for accurate sign recognition.
### Model Performance
- **Accuracy:** 0.9444
- **Weighted F1 Score:** 0.9508
### Limitations
- **Currently only have 2 classes (Stop and Speed limit)
### How to Use
1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign
2. **Take a Photo**: Use your webcam to capture a sign directly
3. **Paste from Clipboard**: Paste an image directly
4. **View Results**: The model will identify the sign type with confidence scores
5. **Try Examples**: Click on the example images below to test the model
""")
gr.Markdown("---")
# Model status indicator
if model_loaded:
status_text = "β
**Model Status:** Ready for predictions"
if class_labels:
status_text += f"\n\n**Number of classes:** {len(class_labels)}"
gr.Markdown(status_text)
else:
gr.Markdown("β **Model Status:** Failed to load - please refresh the page")
# Main interface
with gr.Row():
with gr.Column(scale=1):
# Image input
image_input = gr.Image(
type="pil",
label="Upload Sign Image",
sources=["upload", "webcam", "clipboard"],
height=400
)
with gr.Row():
# Prediction button
predict_btn = gr.Button(
"π Identify Sign",
variant="primary",
size="lg",
scale=2
)
# Clear button
clear_btn = gr.Button(
"π Clear",
variant="secondary",
scale=1
)
with gr.Column(scale=1):
# Results display
result_text = gr.Markdown(
label="Recognition Result",
value="### π€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
)
# Probability distribution
proba_output = gr.Label(
label="Class Probabilities",
num_top_classes=10,
show_label=True
)
# Examples section
gr.Markdown("---")
gr.Markdown("### πΈ Example Images")
gr.Markdown("Click on any example below to test the model with sample sign images:")
gr.Examples(
examples=EXAMPLES,
inputs=[image_input],
outputs=[proba_output, result_text],
fn=predict_sign,
label="Sample Sign Images",
examples_per_page=3,
cache_examples=False
)
# Additional information
gr.Markdown("---")
gr.Markdown("""
### π Model Information
**Technical Details:**
- **Model Type**: AutoGluon MultiModal Predictor
- **Architecture**: ResNet18 (timm_image backbone)
- **Training**: Medium quality preset with AutoGluon
- **Dataset**: ecopus/sign_identification (augmented split for training)
- **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor)
**Training Configuration:**
- **Problem Type**: Multi-class Classification
- **Evaluation Metric**: Accuracy
- **Preprocessing**: AutoGluon automatic preprocessing
- **Data Augmentation**: Applied during training
**Files in Repository:**
- `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory
- `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format)
### π― Best Practices
- Use clear, well-focused images
- Ensure the sign is fully visible in frame
- Good lighting improves accuracy
- Avoid excessive blur or distortion
- Center the sign in the image
### β οΈ Limitations
- Trained on specific sign dataset
- Performance may vary with image quality
- Best suited for single sign per image
- May struggle with heavily obscured signs
---
*Created for CMU 24-679 Course | Powered by AutoGluon & Gradio*
""")
# Connect functions to UI elements
predict_btn.click(
fn=predict_sign,
inputs=[image_input],
outputs=[proba_output, result_text]
)
# Auto-predict on image change
image_input.change(
fn=predict_sign,
inputs=[image_input],
outputs=[proba_output, result_text]
)
# Clear function
def clear_interface():
return None, None, "### π€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
clear_btn.click(
fn=clear_interface,
outputs=[image_input, proba_output, result_text]
)
if __name__ == "__main__":
demo.launch() |