somya-27-04-03 commited on
Commit
6c52838
·
verified ·
1 Parent(s): 68ac0e5

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile.txt +21 -0
  2. app.py +390 -0
  3. emotion_cnn.pth +3 -0
  4. requirements.txt +13 -0
  5. runtime.txt +1 -0
Dockerfile.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ ffmpeg \
7
+ libsndfile1 \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ WORKDIR /app
12
+
13
+ COPY requirements.txt /app/
14
+
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ COPY . /app
18
+
19
+ EXPOSE 7860
20
+
21
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from flask import Flask, request, render_template_string
4
+ from textblob import TextBlob
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from torchvision import transforms
9
+ from PIL import Image
10
+ import numpy as np
11
+ import librosa
12
+
13
+ # ----------------------------------------------------------
14
+ # PATHS
15
+ # ----------------------------------------------------------
16
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
17
+ UPLOAD_FOLDER = os.path.join(BASE_DIR, "uploads")
18
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
+
20
+ app = Flask(__name__)
21
+
22
+ # ----------------------------------------------------------
23
+ # TEXT SENTIMENT
24
+ # ----------------------------------------------------------
25
+ def predict_text_sentiment(text: str):
26
+ if not text or not text.strip():
27
+ return None, None
28
+
29
+ polarity = TextBlob(text).sentiment.polarity
30
+
31
+ if polarity > 0.1:
32
+ arr = [0.1, 0.1, 0.8]
33
+ elif polarity < -0.1:
34
+ arr = [0.8, 0.1, 0.1]
35
+ else:
36
+ arr = [0.2, 0.7, 0.1]
37
+
38
+ return arr, max(arr)
39
+
40
+
41
+ # ----------------------------------------------------------
42
+ # FINAL AUDIO SENTIMENT (Librosa-based - NO TF, NO TRANSFORMERS)
43
+ # ----------------------------------------------------------
44
+ def predict_audio_sentiment(file_path):
45
+ if not file_path:
46
+ return None, None
47
+
48
+ try:
49
+ # Load audio
50
+ y, sr = librosa.load(file_path, sr=16000)
51
+
52
+ # Extract intensity & pitch
53
+ energy = float(np.mean(np.abs(y)))
54
+
55
+ pitch, _ = librosa.piptrack(y=y, sr=sr)
56
+ pitch_vals = pitch[pitch > 0]
57
+ pitch_mean = float(np.mean(pitch_vals)) if pitch_vals.size > 0 else 0
58
+
59
+ tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
60
+
61
+ # Simple rule-based emotions
62
+ if energy < 0.02 and pitch_mean < 120:
63
+ arr = [0.8, 0.15, 0.05] # negative
64
+ elif pitch_mean > 180 and energy > 0.05:
65
+ arr = [0.7, 0.2, 0.1] # angry -> negative
66
+ elif tempo > 120 or pitch_mean > 160:
67
+ arr = [0.1, 0.1, 0.8] # happy -> positive
68
+ else:
69
+ arr = [0.1, 0.8, 0.1] # neutral
70
+
71
+ return arr, max(arr)
72
+
73
+ except Exception as e:
74
+ print("❌ AUDIO ERROR:", e)
75
+ return None, None
76
+
77
+
78
+ # ----------------------------------------------------------
79
+ # IMAGE SENTIMENT (your trained CNN)
80
+ # ----------------------------------------------------------
81
+ NUM_CLASSES = 7
82
+ IMG_LABELS = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]
83
+
84
+ class MediumEmotionCNN(nn.Module):
85
+ def __init__(self):
86
+ super().__init__()
87
+ self.layer1 = nn.Sequential(
88
+ nn.Conv2d(1, 32, 3, padding=1),
89
+ nn.ReLU(),
90
+ nn.BatchNorm2d(32),
91
+ nn.MaxPool2d(2)
92
+ )
93
+ self.layer2 = nn.Sequential(
94
+ nn.Conv2d(32, 64, 3, padding=1),
95
+ nn.ReLU(),
96
+ nn.BatchNorm2d(64),
97
+ nn.MaxPool2d(2)
98
+ )
99
+ self.layer3 = nn.Sequential(
100
+ nn.Conv2d(64, 128, 3, padding=1),
101
+ nn.ReLU(),
102
+ nn.BatchNorm2d(128),
103
+ nn.MaxPool2d(2)
104
+ )
105
+ self.fc1 = nn.Linear(128 * 6 * 6, 256)
106
+ self.dropout = nn.Dropout(0.4)
107
+ self.fc2 = nn.Linear(256, NUM_CLASSES)
108
+
109
+ def forward(self, x):
110
+ x = self.layer1(x)
111
+ x = self.layer2(x)
112
+ x = self.layer3(x)
113
+ x = x.reshape(x.size(0), -1)
114
+ x = F.relu(self.fc1(x))
115
+ x = self.dropout(x)
116
+ return self.fc2(x)
117
+
118
+ # Load model
119
+ IMG_MODEL_PATH = os.path.join(BASE_DIR, "emotion_cnn.pth")
120
+ image_device = "cuda" if torch.cuda.is_available() else "cpu"
121
+
122
+ image_model = MediumEmotionCNN().to(image_device)
123
+ IMAGE_MODEL_OK = False
124
+
125
+ try:
126
+ image_model.load_state_dict(torch.load(IMG_MODEL_PATH, map_location=image_device))
127
+ image_model.eval()
128
+ IMAGE_MODEL_OK = True
129
+ print("🟢 Image CNN loaded successfully!")
130
+ except Exception as e:
131
+ print("❌ Image model failed:", e)
132
+
133
+ img_transform = transforms.Compose([
134
+ transforms.Grayscale(1),
135
+ transforms.Resize((48, 48)),
136
+ transforms.ToTensor(),
137
+ transforms.Normalize((0.5,), (0.5,))
138
+ ])
139
+
140
+ def predict_image_sentiment(path):
141
+ if not (IMAGE_MODEL_OK and path and os.path.exists(path)):
142
+ return None, None
143
+
144
+ try:
145
+ img = Image.open(path).convert("RGB")
146
+ x = img_transform(img).unsqueeze(0).to(image_device)
147
+
148
+ with torch.no_grad():
149
+ logits = image_model(x)
150
+ probs7 = torch.softmax(logits, dim=1)[0].cpu().numpy()
151
+
152
+ idx = {l: i for i, l in enumerate(IMG_LABELS)}
153
+ pos = float(probs7[idx["happy"]] + probs7[idx["surprise"]])
154
+ neu = float(probs7[idx["neutral"]])
155
+ neg = float(probs7[idx["angry"]] + probs7[idx["disgust"]] +
156
+ probs7[idx["fear"]] + probs7[idx["sad"]])
157
+
158
+ return [neg, neu, pos], max([neg, neu, pos])
159
+
160
+ except Exception as e:
161
+ print("❌ Image error:", e)
162
+ return None, None
163
+
164
+
165
+ # ----------------------------------------------------------
166
+ # FUSION
167
+ # ----------------------------------------------------------
168
+ def fuse_sentiments(*items):
169
+ probs = [arr for arr, conf in items if arr]
170
+ if not probs:
171
+ return None
172
+
173
+ avg = torch.tensor(probs).mean(dim=0).tolist()
174
+ sent = ["negative", "neutral", "positive"][int(np.argmax(avg))]
175
+ emoji = {"negative": "😡", "neutral": "😐", "positive": "😊"}[sent]
176
+
177
+ return {"sentiment": sent, "emoji": emoji, "probs": avg}
178
+
179
+
180
+ # ----------------------------------------------------------
181
+ # HTML (unchanged)
182
+ # ----------------------------------------------------------
183
+ HTML = """
184
+ <!doctype html>
185
+ <html><head>
186
+ <meta charset="utf-8" />
187
+
188
+ <title>🎭 Multimodal Sentiment Analyzer</title>
189
+
190
+ <style>
191
+ body{
192
+ margin:0;
193
+ font-family:Poppins, sans-serif;
194
+ background: linear-gradient(135deg, #161616, #1f0033, #33001a);
195
+ background-size: 200% 200%;
196
+ animation: gradientShift 8s ease infinite;
197
+ color:#f5f5f5;
198
+ }
199
+
200
+ @keyframes gradientShift {
201
+ 0% { background-position: 0% 50%; }
202
+ 50% { background-position: 100% 50%; }
203
+ 100% { background-position: 0% 50%; }
204
+ }
205
+
206
+ .wrap{
207
+ max-width:900px;
208
+ margin:40px auto;
209
+ padding:20px;
210
+ }
211
+
212
+ .card{
213
+ background:rgba(255,255,255,0.07);
214
+ backdrop-filter: blur(12px);
215
+ border-radius:16px;
216
+ padding:28px;
217
+ box-shadow:0 0 18px rgba(0,0,0,0.5);
218
+ margin-top:22px;
219
+ border:1px solid rgba(255,255,255,0.15);
220
+ }
221
+
222
+ h1{
223
+ text-align:center;
224
+ font-size:36px;
225
+ font-weight:700;
226
+ color:#ffca5a;
227
+ margin-bottom:10px;
228
+ text-shadow:0 0 12px rgba(255, 204, 102,0.4);
229
+ }
230
+
231
+ input,textarea{
232
+ width:100%;
233
+ padding:14px;
234
+ border-radius:12px;
235
+ background:rgba(255,255,255,0.15);
236
+ border:1px solid rgba(255,255,255,0.25);
237
+ color:#fff;
238
+ margin-top:8px;
239
+ margin-bottom:18px;
240
+ outline:none;
241
+ resize:none;
242
+ box-sizing: border-box;
243
+ }
244
+
245
+ .btn{
246
+ width:100%;
247
+ padding:16px;
248
+ border-radius:12px;
249
+ background:linear-gradient(90deg,#ff9933,#ff5500);
250
+ border:0;
251
+ font-weight:bold;
252
+ color:white;
253
+ margin-top:6px;
254
+ cursor:pointer;
255
+ box-shadow:0 0 12px rgba(255,153,51,0.5);
256
+ transition: transform .2s ease;
257
+ }
258
+
259
+ .btn:hover{
260
+ transform:scale(1.04);
261
+ }
262
+
263
+ .preview img,.preview audio{
264
+ margin-top:12px;
265
+ max-width:100%;
266
+ border-radius:12px;
267
+ box-shadow:0 0 14px rgba(255,153,51,0.4);
268
+ }
269
+
270
+ .result-emoji{
271
+ font-size:60px;
272
+ margin-bottom:10px;
273
+ animation: pop 0.7s ease;
274
+ }
275
+
276
+ @keyframes pop {
277
+ 0%{transform:scale(0.2);}
278
+ 100%{transform:scale(1);}
279
+ }
280
+
281
+ pre{
282
+ background:rgba(0,0,0,0.4);
283
+ padding:16px;
284
+ border-radius:12px;
285
+ color:#7fffd4;
286
+ overflow:auto;
287
+ }
288
+
289
+ label{
290
+ font-size:15px;
291
+ opacity:0.9;
292
+ margin-top:12px;
293
+ display:block;
294
+ }
295
+ </style>
296
+
297
+
298
+ <script>
299
+ function preview(input,id,type){
300
+ let file = input.files[0];
301
+ if(!file) return;
302
+ let url = URL.createObjectURL(file);
303
+
304
+ if(type==="img")
305
+ document.getElementById(id).innerHTML = `<img src="${url}">`;
306
+ else
307
+ document.getElementById(id).innerHTML = `<audio controls src="${url}"></audio>`;
308
+ }
309
+ </script>
310
+
311
+ </head>
312
+ <body>
313
+
314
+ <div class="wrap">
315
+ <h1>🎯 Multimodal Sentiment Analyzer</h1>
316
+
317
+ <form method="POST" enctype="multipart/form-data" class="card">
318
+ <label>Enter Text:</label>
319
+ <textarea name="text" rows="4" placeholder="Write something..."></textarea>
320
+
321
+ <label>Upload Face Image:</label>
322
+ <input type="file" name="image" accept="image/*" onchange="preview(this,'imgprev','img')">
323
+ <div class="preview" id="imgprev"></div>
324
+
325
+ <label>Upload Audio:</label>
326
+ <input type="file" name="audio" accept="audio/*" onchange="preview(this,'audprev','aud')">
327
+ <div class="preview" id="audprev"></div>
328
+
329
+ <button class="btn">🚀 Analyze</button>
330
+ </form>
331
+
332
+ {% if result %}
333
+ <div class="card" style="text-align:center;">
334
+ <div class="result-emoji">{{ result['fused']['emoji'] }}</div>
335
+ <h2>{{ result['fused']['sentiment'] | capitalize }}</h2>
336
+ </div>
337
+
338
+ <div class="card">
339
+ <pre>{{ result_json }}</pre>
340
+ </div>
341
+ {% endif %}
342
+ </div>
343
+
344
+ </body></html>
345
+ """
346
+
347
+
348
+
349
+ # ----------------------------------------------------------
350
+ # ROUTE
351
+ # ----------------------------------------------------------
352
+ @app.route("/", methods=["GET", "POST"])
353
+ def home():
354
+ result = None
355
+
356
+ if request.method == "POST":
357
+ text = request.form.get("text", "")
358
+
359
+ audio_file = request.files.get("audio")
360
+ image_file = request.files.get("image")
361
+
362
+ audio_path = None
363
+ img_path = None
364
+
365
+ if audio_file and audio_file.filename:
366
+ audio_path = os.path.join(UPLOAD_FOLDER, audio_file.filename)
367
+ audio_file.save(audio_path)
368
+
369
+ if image_file and image_file.filename:
370
+ img_path = os.path.join(UPLOAD_FOLDER, image_file.filename)
371
+ image_file.save(img_path)
372
+
373
+ t = predict_text_sentiment(text)
374
+ a = predict_audio_sentiment(audio_path)
375
+ i = predict_image_sentiment(img_path)
376
+
377
+ fused = fuse_sentiments(t, a, i)
378
+
379
+ result = {"text": t, "audio": a, "image": i, "fused": fused}
380
+ result_json = json.dumps(result, indent=2)
381
+
382
+ return render_template_string(HTML, result=result, result_json=result_json)
383
+
384
+ return render_template_string(HTML)
385
+
386
+
387
+ # ----------------------------------------------------------
388
+ if __name__ == "__main__":
389
+ port = int(os.environ.get("PORT", 5000))
390
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
emotion_cnn.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d02ae0ed00d07354860b6967be52b23a2e6a6de764ffa7e035778504dab3cdbc
3
+ size 5109183
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask==3.0.0
2
+ gunicorn==21.2.0
3
+ textblob==0.17.1
4
+ transformers==4.45.1
5
+ torch==2.9.0
6
+ torchvision==0.24.0
7
+ torchaudio==2.9.0
8
+ Pillow==10.4.0
9
+ librosa==0.10.1
10
+
11
+ huggingface-hub
12
+ accelerate
13
+ soundfile
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10