|
|
import os |
|
|
import shutil |
|
|
import zipfile |
|
|
import pathlib |
|
|
import tempfile |
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import PIL.Image |
|
|
import huggingface_hub |
|
|
from autogluon.multimodal import MultiModalPredictor |
|
|
|
|
|
|
|
|
MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor" |
|
|
ZIP_FILENAME = "autogluon_image_predictor_dir.zip" |
|
|
HF_TOKEN = os.getenv("HF_TOKEN", None) |
|
|
|
|
|
|
|
|
CACHE_DIR = pathlib.Path("hf_assets") |
|
|
EXTRACT_DIR = CACHE_DIR / "predictor_native" |
|
|
|
|
|
def prepare_predictor_dir(): |
|
|
"""Download and extract the AutoGluon predictor from HuggingFace Hub""" |
|
|
CACHE_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
print("Downloading model from Hugging Face...") |
|
|
local_zip = huggingface_hub.hf_hub_download( |
|
|
repo_id=MODEL_REPO_ID, |
|
|
filename=ZIP_FILENAME, |
|
|
repo_type="model", |
|
|
token=HF_TOKEN, |
|
|
local_dir=str(CACHE_DIR) |
|
|
) |
|
|
|
|
|
|
|
|
if EXTRACT_DIR.exists(): |
|
|
shutil.rmtree(EXTRACT_DIR) |
|
|
EXTRACT_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
print("Extracting model files...") |
|
|
with zipfile.ZipFile(local_zip, "r") as zf: |
|
|
zf.extractall(str(EXTRACT_DIR)) |
|
|
|
|
|
|
|
|
return str(EXTRACT_DIR) |
|
|
|
|
|
|
|
|
model_loaded = False |
|
|
predictor = None |
|
|
class_labels = [] |
|
|
|
|
|
try: |
|
|
PREDICTOR_DIR = prepare_predictor_dir() |
|
|
print(f"Loading predictor from: {PREDICTOR_DIR}") |
|
|
predictor = MultiModalPredictor.load(PREDICTOR_DIR) |
|
|
model_loaded = True |
|
|
print("β Model loaded successfully") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
|
tmpdir = pathlib.Path(tmpdir) |
|
|
dummy_img_path = tmpdir / "dummy.png" |
|
|
|
|
|
dummy_img = PIL.Image.new('RGB', (224, 224), color='white') |
|
|
dummy_img.save(dummy_img_path) |
|
|
dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]}) |
|
|
|
|
|
|
|
|
proba_df = predictor.predict_proba(dummy_df) |
|
|
class_labels = list(proba_df.columns) |
|
|
print(f"Detected classes: {class_labels}") |
|
|
except Exception as e: |
|
|
print(f"Could not determine classes: {e}") |
|
|
|
|
|
class_labels = [] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
model_loaded = False |
|
|
predictor = None |
|
|
|
|
|
def predict_sign(image): |
|
|
""" |
|
|
Predict the sign type from an image |
|
|
|
|
|
Args: |
|
|
image: PIL Image object |
|
|
|
|
|
Returns: |
|
|
dict: Probability distribution over classes |
|
|
str: Formatted prediction result |
|
|
""" |
|
|
|
|
|
|
|
|
if image is None: |
|
|
return None, "β οΈ Please upload an image of a sign to classify" |
|
|
|
|
|
|
|
|
if not model_loaded or predictor is None: |
|
|
return None, "β οΈ Model failed to load. Please refresh the page." |
|
|
|
|
|
try: |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
|
tmpdir = pathlib.Path(tmpdir) |
|
|
img_path = tmpdir / "input.png" |
|
|
|
|
|
|
|
|
if image.mode != 'RGB': |
|
|
image = image.convert('RGB') |
|
|
|
|
|
|
|
|
image.save(img_path) |
|
|
|
|
|
|
|
|
df = pd.DataFrame({"image": [str(img_path)]}) |
|
|
|
|
|
|
|
|
prediction = predictor.predict(df) |
|
|
pred_class = prediction.iloc[0] |
|
|
|
|
|
|
|
|
proba_df = predictor.predict_proba(df) |
|
|
|
|
|
|
|
|
proba_dict = {} |
|
|
for col in proba_df.columns: |
|
|
|
|
|
label = str(col) |
|
|
proba_dict[label] = float(proba_df.iloc[0][col]) |
|
|
|
|
|
|
|
|
proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True)) |
|
|
|
|
|
|
|
|
top_5 = list(proba_dict.items())[:5] |
|
|
|
|
|
|
|
|
pred_label = str(pred_class) |
|
|
confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100 |
|
|
|
|
|
|
|
|
result_text = f"## π¦ Sign Classification Result\n\n" |
|
|
result_text += f"### **Predicted Sign: {pred_label}**\n\n" |
|
|
result_text += f"**Confidence:** {confidence:.1f}%\n\n" |
|
|
|
|
|
|
|
|
if confidence > 80: |
|
|
result_text += "β
**High Confidence** - Clear sign detection\n\n" |
|
|
elif confidence > 50: |
|
|
result_text += "β οΈ **Medium Confidence** - Sign detected but may need verification\n\n" |
|
|
else: |
|
|
result_text += "β **Low Confidence** - Unclear or ambiguous sign\n\n" |
|
|
|
|
|
|
|
|
if len(top_5) > 1: |
|
|
result_text += "### Top 5 Predictions:\n" |
|
|
for i, (label, prob) in enumerate(top_5, 1): |
|
|
bar_length = int(prob * 20) |
|
|
bar = 'β' * bar_length + 'β' * (20 - bar_length) |
|
|
result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n" |
|
|
|
|
|
|
|
|
result_text += "\n### π Image Properties:\n" |
|
|
result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n" |
|
|
result_text += f"- **Mode:** {image.mode}\n" |
|
|
|
|
|
return proba_dict, result_text |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_detail = traceback.format_exc() |
|
|
print(f"Error in prediction: {error_detail}") |
|
|
return None, f"β οΈ Error making prediction:\n```\n{str(e)}\n```" |
|
|
|
|
|
|
|
|
EXAMPLES = [ |
|
|
["asl1.jpg"], |
|
|
["asl2.jpg"], |
|
|
["asl3.jpg"] |
|
|
] |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="Sign Identification System", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.gradio-container { |
|
|
font-family: 'Arial', sans-serif; |
|
|
} |
|
|
.title { |
|
|
text-align: center; |
|
|
} |
|
|
.confidence-high { color: green; } |
|
|
.confidence-medium { color: orange; } |
|
|
.confidence-low { color: red; } |
|
|
""" |
|
|
) as demo: |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
# π¦ Sign Identification System |
|
|
|
|
|
## About This Application |
|
|
This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset |
|
|
to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone |
|
|
for accurate sign recognition. |
|
|
|
|
|
### Model Performance |
|
|
- **Accuracy:** 0.9444 |
|
|
- **Weighted F1 Score:** 0.9508 |
|
|
|
|
|
### Limitations |
|
|
- **Currently only have 2 classes (Stop and Speed limit) |
|
|
|
|
|
### How to Use |
|
|
1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign |
|
|
2. **Take a Photo**: Use your webcam to capture a sign directly |
|
|
3. **Paste from Clipboard**: Paste an image directly |
|
|
4. **View Results**: The model will identify the sign type with confidence scores |
|
|
5. **Try Examples**: Click on the example images below to test the model |
|
|
""") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
if model_loaded: |
|
|
status_text = "β
**Model Status:** Ready for predictions" |
|
|
if class_labels: |
|
|
status_text += f"\n\n**Number of classes:** {len(class_labels)}" |
|
|
gr.Markdown(status_text) |
|
|
else: |
|
|
gr.Markdown("β **Model Status:** Failed to load - please refresh the page") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
image_input = gr.Image( |
|
|
type="pil", |
|
|
label="Upload Sign Image", |
|
|
sources=["upload", "webcam", "clipboard"], |
|
|
height=400 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
predict_btn = gr.Button( |
|
|
"π Identify Sign", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
scale=2 |
|
|
) |
|
|
|
|
|
|
|
|
clear_btn = gr.Button( |
|
|
"π Clear", |
|
|
variant="secondary", |
|
|
scale=1 |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
result_text = gr.Markdown( |
|
|
label="Recognition Result", |
|
|
value="### π€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores." |
|
|
) |
|
|
|
|
|
|
|
|
proba_output = gr.Label( |
|
|
label="Class Probabilities", |
|
|
num_top_classes=10, |
|
|
show_label=True |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### πΈ Example Images") |
|
|
gr.Markdown("Click on any example below to test the model with sample sign images:") |
|
|
|
|
|
gr.Examples( |
|
|
examples=EXAMPLES, |
|
|
inputs=[image_input], |
|
|
outputs=[proba_output, result_text], |
|
|
fn=predict_sign, |
|
|
label="Sample Sign Images", |
|
|
examples_per_page=3, |
|
|
cache_examples=False |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown(""" |
|
|
### π Model Information |
|
|
|
|
|
**Technical Details:** |
|
|
- **Model Type**: AutoGluon MultiModal Predictor |
|
|
- **Architecture**: ResNet18 (timm_image backbone) |
|
|
- **Training**: Medium quality preset with AutoGluon |
|
|
- **Dataset**: ecopus/sign_identification (augmented split for training) |
|
|
- **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor) |
|
|
|
|
|
**Training Configuration:** |
|
|
- **Problem Type**: Multi-class Classification |
|
|
- **Evaluation Metric**: Accuracy |
|
|
- **Preprocessing**: AutoGluon automatic preprocessing |
|
|
- **Data Augmentation**: Applied during training |
|
|
|
|
|
**Files in Repository:** |
|
|
- `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory |
|
|
- `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format) |
|
|
|
|
|
### π― Best Practices |
|
|
- Use clear, well-focused images |
|
|
- Ensure the sign is fully visible in frame |
|
|
- Good lighting improves accuracy |
|
|
- Avoid excessive blur or distortion |
|
|
- Center the sign in the image |
|
|
|
|
|
### β οΈ Limitations |
|
|
- Trained on specific sign dataset |
|
|
- Performance may vary with image quality |
|
|
- Best suited for single sign per image |
|
|
- May struggle with heavily obscured signs |
|
|
|
|
|
--- |
|
|
*Created for CMU 24-679 Course | Powered by AutoGluon & Gradio* |
|
|
""") |
|
|
|
|
|
|
|
|
predict_btn.click( |
|
|
fn=predict_sign, |
|
|
inputs=[image_input], |
|
|
outputs=[proba_output, result_text] |
|
|
) |
|
|
|
|
|
|
|
|
image_input.change( |
|
|
fn=predict_sign, |
|
|
inputs=[image_input], |
|
|
outputs=[proba_output, result_text] |
|
|
) |
|
|
|
|
|
|
|
|
def clear_interface(): |
|
|
return None, None, "### π€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores." |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_interface, |
|
|
outputs=[image_input, proba_output, result_text] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |