File size: 12,220 Bytes
a578f34
 
 
 
 
 
 
 
 
 
 
61733d4
 
 
 
a578f34
61733d4
 
 
45df97d
61733d4
 
 
 
 
1a33a3d
45df97d
61733d4
 
45df97d
61733d4
 
45df97d
61733d4
 
 
 
 
 
 
1a33a3d
45df97d
61733d4
 
1a33a3d
 
61733d4
 
1a33a3d
 
 
 
61733d4
 
1a33a3d
61733d4
 
 
 
1a33a3d
491944e
1a33a3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61733d4
 
 
1a33a3d
 
61733d4
 
 
1a33a3d
45df97d
1a33a3d
61733d4
45df97d
61733d4
 
45df97d
61733d4
 
2c5d905
61733d4
 
45df97d
1a33a3d
61733d4
 
 
45df97d
61733d4
45df97d
61733d4
45df97d
61733d4
 
 
 
 
 
 
1a33a3d
45df97d
61733d4
1a33a3d
61733d4
 
 
1a33a3d
 
61733d4
 
45df97d
61733d4
 
 
1a33a3d
 
 
 
61733d4
1a33a3d
61733d4
 
 
 
 
 
1a33a3d
61733d4
 
1a33a3d
 
 
61733d4
 
1a33a3d
 
 
 
 
 
 
 
 
61733d4
 
1a33a3d
61733d4
 
1a33a3d
61733d4
1a33a3d
 
 
 
61733d4
 
 
45df97d
 
61733d4
 
45df97d
 
61733d4
 
 
 
 
 
45df97d
61733d4
a578f34
1a33a3d
a578f34
 
 
 
 
61733d4
 
 
1a33a3d
 
 
a578f34
 
61733d4
a578f34
61733d4
1a33a3d
61733d4
 
1a33a3d
 
 
61733d4
1a33a3d
 
 
92679f8
 
 
61733d4
1a33a3d
 
 
 
 
 
61733d4
 
a578f34
61733d4
 
 
1a33a3d
 
 
 
a578f34
1a33a3d
61733d4
 
a578f34
 
61733d4
a578f34
 
1a33a3d
a578f34
61733d4
a578f34
61733d4
1a33a3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61733d4
a578f34
61733d4
a578f34
61733d4
1a33a3d
a578f34
61733d4
 
a578f34
1a33a3d
a578f34
61733d4
a578f34
61733d4
 
a578f34
 
1a33a3d
61733d4
a578f34
 
 
 
1a33a3d
 
61733d4
 
a578f34
61733d4
 
a578f34
61733d4
 
 
 
 
1a33a3d
 
 
61733d4
 
1a33a3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61733d4
 
1a33a3d
 
61733d4
1a33a3d
61733d4
 
 
 
 
 
a578f34
1a33a3d
a578f34
61733d4
a578f34
61733d4
 
a578f34
1a33a3d
a578f34
61733d4
a578f34
61733d4
 
 
1a33a3d
61733d4
a578f34
 
61733d4
a578f34
 
 
61733d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
import os
import shutil
import zipfile
import pathlib
import tempfile
import gradio as gr
import pandas as pd
import PIL.Image
import huggingface_hub
from autogluon.multimodal import MultiModalPredictor

# Model configuration
MODEL_REPO_ID = "its-zion-18/sign-image-autogluon-predictor"
ZIP_FILENAME = "autogluon_image_predictor_dir.zip"
HF_TOKEN = os.getenv("HF_TOKEN", None)

# Local cache/extract directories
CACHE_DIR = pathlib.Path("hf_assets")
EXTRACT_DIR = CACHE_DIR / "predictor_native"

def prepare_predictor_dir():
    """Download and extract the AutoGluon predictor from HuggingFace Hub"""
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    
    # Download the model zip file
    print("Downloading model from Hugging Face...")
    local_zip = huggingface_hub.hf_hub_download(
        repo_id=MODEL_REPO_ID,
        filename=ZIP_FILENAME,
        repo_type="model",
        token=HF_TOKEN,
        local_dir=str(CACHE_DIR)
    )
    
    # Clean and prepare extraction directory
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    
    # Extract the model
    print("Extracting model files...")
    with zipfile.ZipFile(local_zip, "r") as zf:
        zf.extractall(str(EXTRACT_DIR))
    
    # The predictor files are directly in EXTRACT_DIR after extraction
    return str(EXTRACT_DIR)

# Load the predictor
model_loaded = False
predictor = None
class_labels = []

try:
    PREDICTOR_DIR = prepare_predictor_dir()
    print(f"Loading predictor from: {PREDICTOR_DIR}")
    predictor = MultiModalPredictor.load(PREDICTOR_DIR)
    model_loaded = True
    print("βœ“ Model loaded successfully")
    
    # Try to determine the classes from the model
    try:
        # Create a dummy image to get class information
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            dummy_img_path = tmpdir / "dummy.png"
            # Create a small dummy image
            dummy_img = PIL.Image.new('RGB', (224, 224), color='white')
            dummy_img.save(dummy_img_path)
            dummy_df = pd.DataFrame({"image": [str(dummy_img_path)]})
            
            # Get probability predictions to see the classes
            proba_df = predictor.predict_proba(dummy_df)
            class_labels = list(proba_df.columns)
            print(f"Detected classes: {class_labels}")
    except Exception as e:
        print(f"Could not determine classes: {e}")
        # Default class labels if detection fails
        class_labels = []
        
except Exception as e:
    print(f"Error loading model: {e}")
    import traceback
    traceback.print_exc()
    model_loaded = False
    predictor = None

def predict_sign(image):
    """
    Predict the sign type from an image
    
    Args:
        image: PIL Image object
    
    Returns:
        dict: Probability distribution over classes
        str: Formatted prediction result
    """
    
    # Check if image is provided
    if image is None:
        return None, "⚠️ Please upload an image of a sign to classify"
    
    # Check if model is loaded
    if not model_loaded or predictor is None:
        return None, "⚠️ Model failed to load. Please refresh the page."
    
    try:
        # Save the image temporarily for AutoGluon
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            img_path = tmpdir / "input.png"
            
            # Convert to RGB if necessary (handle RGBA images)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            
            # Save the image
            image.save(img_path)
            
            # Create DataFrame for AutoGluon (must have 'image' column with file paths)
            df = pd.DataFrame({"image": [str(img_path)]})
            
            # Get predictions
            prediction = predictor.predict(df)
            pred_class = prediction.iloc[0]
            
            # Get probabilities
            proba_df = predictor.predict_proba(df)
            
            # Create probability dictionary
            proba_dict = {}
            for col in proba_df.columns:
                # Use the column name as the label
                label = str(col)
                proba_dict[label] = float(proba_df.iloc[0][col])
            
            # Sort by probability (highest first)
            proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
            
            # Get top predictions for display
            top_5 = list(proba_dict.items())[:5]
            
            # Format the prediction result
            pred_label = str(pred_class)
            confidence = proba_dict.get(pred_label, max(proba_dict.values()) if proba_dict else 0) * 100
            
            # Create formatted output
            result_text = f"## 🚦 Sign Classification Result\n\n"
            result_text += f"### **Predicted Sign: {pred_label}**\n\n"
            result_text += f"**Confidence:** {confidence:.1f}%\n\n"
            
            # Add confidence level interpretation
            if confidence > 80:
                result_text += "βœ… **High Confidence** - Clear sign detection\n\n"
            elif confidence > 50:
                result_text += "⚠️ **Medium Confidence** - Sign detected but may need verification\n\n"
            else:
                result_text += "❌ **Low Confidence** - Unclear or ambiguous sign\n\n"
            
            # Show top predictions
            if len(top_5) > 1:
                result_text += "### Top 5 Predictions:\n"
                for i, (label, prob) in enumerate(top_5, 1):
                    bar_length = int(prob * 20)
                    bar = 'β–ˆ' * bar_length + 'β–‘' * (20 - bar_length)
                    result_text += f"{i}. **{label}**: {bar} {prob*100:.1f}%\n"
            
            # Add interpretation guide
            result_text += "\n### πŸ“Š Image Properties:\n"
            result_text += f"- **Image Size:** {image.size[0]}x{image.size[1]} pixels\n"
            result_text += f"- **Mode:** {image.mode}\n"
            
            return proba_dict, result_text
            
    except Exception as e:
        import traceback
        error_detail = traceback.format_exc()
        print(f"Error in prediction: {error_detail}")
        return None, f"⚠️ Error making prediction:\n```\n{str(e)}\n```"

# Example images - using local files
EXAMPLES = [
    ["asl1.jpg"],
    ["asl2.jpg"],
    ["asl3.jpg"]
]

# Create Gradio interface
with gr.Blocks(
    title="Sign Identification System",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        font-family: 'Arial', sans-serif;
    }
    .title {
        text-align: center;
    }
    .confidence-high { color: green; }
    .confidence-medium { color: orange; }
    .confidence-low { color: red; }
    """
) as demo:
    
    # Header
    gr.Markdown("""
    # 🚦 Sign Identification System
    
    ## About This Application
    This application uses an **AutoGluon MultiModal Predictor** trained on the ecopus/sign_identification dataset 
    to identify and classify signs from images. The model leverages deep learning with a ResNet18 backbone
    for accurate sign recognition.
    
    ### Model Performance
    - **Accuracy:** 0.9444
    - **Weighted F1 Score:** 0.9508

    ### Limitations
    - **Currently only have 2 classes (Stop and Speed limit)
    
    ### How to Use
    1. **Upload an Image**: Click to upload or drag-and-drop an image of a sign
    2. **Take a Photo**: Use your webcam to capture a sign directly
    3. **Paste from Clipboard**: Paste an image directly
    4. **View Results**: The model will identify the sign type with confidence scores
    5. **Try Examples**: Click on the example images below to test the model
    """)
    
    gr.Markdown("---")
    
    # Model status indicator
    if model_loaded:
        status_text = "βœ… **Model Status:** Ready for predictions"
        if class_labels:
            status_text += f"\n\n**Number of classes:** {len(class_labels)}"
        gr.Markdown(status_text)
    else:
        gr.Markdown("❌ **Model Status:** Failed to load - please refresh the page")
    
    # Main interface
    with gr.Row():
        with gr.Column(scale=1):
            # Image input
            image_input = gr.Image(
                type="pil",
                label="Upload Sign Image",
                sources=["upload", "webcam", "clipboard"],
                height=400
            )
            
            with gr.Row():
                # Prediction button
                predict_btn = gr.Button(
                    "πŸ” Identify Sign",
                    variant="primary",
                    size="lg",
                    scale=2
                )
                
                # Clear button
                clear_btn = gr.Button(
                    "πŸ”„ Clear",
                    variant="secondary",
                    scale=1
                )
        
        with gr.Column(scale=1):
            # Results display
            result_text = gr.Markdown(
                label="Recognition Result",
                value="### πŸ“€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
            )
            
            # Probability distribution
            proba_output = gr.Label(
                label="Class Probabilities",
                num_top_classes=10,
                show_label=True
            )
    
    # Examples section
    gr.Markdown("---")
    gr.Markdown("### πŸ“Έ Example Images")
    gr.Markdown("Click on any example below to test the model with sample sign images:")
    
    gr.Examples(
        examples=EXAMPLES,
        inputs=[image_input],
        outputs=[proba_output, result_text],
        fn=predict_sign,
        label="Sample Sign Images",
        examples_per_page=3,
        cache_examples=False
    )
    
    # Additional information
    gr.Markdown("---")
    gr.Markdown("""
    ### πŸ“Š Model Information
    
    **Technical Details:**
    - **Model Type**: AutoGluon MultiModal Predictor
    - **Architecture**: ResNet18 (timm_image backbone)
    - **Training**: Medium quality preset with AutoGluon
    - **Dataset**: ecopus/sign_identification (augmented split for training)
    - **Source**: [its-zion-18/sign-image-autogluon-predictor](https://huggingface.co/its-zion-18/sign-image-autogluon-predictor)
    
    **Training Configuration:**
    - **Problem Type**: Multi-class Classification
    - **Evaluation Metric**: Accuracy
    - **Preprocessing**: AutoGluon automatic preprocessing
    - **Data Augmentation**: Applied during training
    
    **Files in Repository:**
    - `autogluon_image_predictor_dir.zip`: Native AutoGluon predictor directory
    - `autogluon_image_predictor.pkl`: Pickled predictor (cloudpickle format)
    
    ### 🎯 Best Practices
    - Use clear, well-focused images
    - Ensure the sign is fully visible in frame
    - Good lighting improves accuracy
    - Avoid excessive blur or distortion
    - Center the sign in the image
    
    ### ⚠️ Limitations
    - Trained on specific sign dataset
    - Performance may vary with image quality
    - Best suited for single sign per image
    - May struggle with heavily obscured signs
    
    ---
    *Created for CMU 24-679 Course | Powered by AutoGluon & Gradio*
    """)
    
    # Connect functions to UI elements
    predict_btn.click(
        fn=predict_sign,
        inputs=[image_input],
        outputs=[proba_output, result_text]
    )
    
    # Auto-predict on image change
    image_input.change(
        fn=predict_sign,
        inputs=[image_input],
        outputs=[proba_output, result_text]
    )
    
    # Clear function
    def clear_interface():
        return None, None, "### πŸ“€ Upload an image to see predictions\n\nThe model will automatically identify the sign type and provide confidence scores."
    
    clear_btn.click(
        fn=clear_interface,
        outputs=[image_input, proba_output, result_text]
    )

if __name__ == "__main__":
    demo.launch()