Spaces:

somya-27-04-03
/

Multimodal-Sentimental-Analyzer

Sleeping

App Files Files Community

Multimodal-Sentimental-Analyzer / app.py

somya-27-04-03

Upload 5 files

6c52838 verified about 1 month ago

raw

history blame contribute delete

10.8 kB

	import os
	import json
	from flask import Flask, request, render_template_string
	from textblob import TextBlob
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torchvision import transforms
	from PIL import Image
	import numpy as np
	import librosa

	# ----------------------------------------------------------
	# PATHS
	# ----------------------------------------------------------
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	UPLOAD_FOLDER = os.path.join(BASE_DIR, "uploads")
	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	app = Flask(__name__)

	# ----------------------------------------------------------
	# TEXT SENTIMENT
	# ----------------------------------------------------------
	def predict_text_sentiment(text: str):
	if not text or not text.strip():
	return None, None

	polarity = TextBlob(text).sentiment.polarity

	if polarity > 0.1:
	arr = [0.1, 0.1, 0.8]
	elif polarity < -0.1:
	arr = [0.8, 0.1, 0.1]
	else:
	arr = [0.2, 0.7, 0.1]

	return arr, max(arr)


	# ----------------------------------------------------------
	# FINAL AUDIO SENTIMENT (Librosa-based - NO TF, NO TRANSFORMERS)
	# ----------------------------------------------------------
	def predict_audio_sentiment(file_path):
	if not file_path:
	return None, None

	try:
	# Load audio
	y, sr = librosa.load(file_path, sr=16000)

	# Extract intensity & pitch
	energy = float(np.mean(np.abs(y)))

	pitch, _ = librosa.piptrack(y=y, sr=sr)
	pitch_vals = pitch[pitch > 0]
	pitch_mean = float(np.mean(pitch_vals)) if pitch_vals.size > 0 else 0

	tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

	# Simple rule-based emotions
	if energy < 0.02 and pitch_mean < 120:
	arr = [0.8, 0.15, 0.05] # negative
	elif pitch_mean > 180 and energy > 0.05:
	arr = [0.7, 0.2, 0.1] # angry -> negative
	elif tempo > 120 or pitch_mean > 160:
	arr = [0.1, 0.1, 0.8] # happy -> positive
	else:
	arr = [0.1, 0.8, 0.1] # neutral

	return arr, max(arr)

	except Exception as e:
	print("❌ AUDIO ERROR:", e)
	return None, None


	# ----------------------------------------------------------
	# IMAGE SENTIMENT (your trained CNN)
	# ----------------------------------------------------------
	NUM_CLASSES = 7
	IMG_LABELS = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

	class MediumEmotionCNN(nn.Module):
	def __init__(self):
	super().__init__()
	self.layer1 = nn.Sequential(
	nn.Conv2d(1, 32, 3, padding=1),
	nn.ReLU(),
	nn.BatchNorm2d(32),
	nn.MaxPool2d(2)
	)
	self.layer2 = nn.Sequential(
	nn.Conv2d(32, 64, 3, padding=1),
	nn.ReLU(),
	nn.BatchNorm2d(64),
	nn.MaxPool2d(2)
	)
	self.layer3 = nn.Sequential(
	nn.Conv2d(64, 128, 3, padding=1),
	nn.ReLU(),
	nn.BatchNorm2d(128),
	nn.MaxPool2d(2)
	)
	self.fc1 = nn.Linear(128 * 6 * 6, 256)
	self.dropout = nn.Dropout(0.4)
	self.fc2 = nn.Linear(256, NUM_CLASSES)

	def forward(self, x):
	x = self.layer1(x)
	x = self.layer2(x)
	x = self.layer3(x)
	x = x.reshape(x.size(0), -1)
	x = F.relu(self.fc1(x))
	x = self.dropout(x)
	return self.fc2(x)

	# Load model
	IMG_MODEL_PATH = os.path.join(BASE_DIR, "emotion_cnn.pth")
	image_device = "cuda" if torch.cuda.is_available() else "cpu"

	image_model = MediumEmotionCNN().to(image_device)
	IMAGE_MODEL_OK = False

	try:
	image_model.load_state_dict(torch.load(IMG_MODEL_PATH, map_location=image_device))
	image_model.eval()
	IMAGE_MODEL_OK = True
	print("🟢 Image CNN loaded successfully!")
	except Exception as e:
	print("❌ Image model failed:", e)

	img_transform = transforms.Compose([
	transforms.Grayscale(1),
	transforms.Resize((48, 48)),
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,))
	])

	def predict_image_sentiment(path):
	if not (IMAGE_MODEL_OK and path and os.path.exists(path)):
	return None, None

	try:
	img = Image.open(path).convert("RGB")
	x = img_transform(img).unsqueeze(0).to(image_device)

	with torch.no_grad():
	logits = image_model(x)
	probs7 = torch.softmax(logits, dim=1)[0].cpu().numpy()

	idx = {l: i for i, l in enumerate(IMG_LABELS)}
	pos = float(probs7[idx["happy"]] + probs7[idx["surprise"]])
	neu = float(probs7[idx["neutral"]])
	neg = float(probs7[idx["angry"]] + probs7[idx["disgust"]] +
	probs7[idx["fear"]] + probs7[idx["sad"]])

	return [neg, neu, pos], max([neg, neu, pos])

	except Exception as e:
	print("❌ Image error:", e)
	return None, None


	# ----------------------------------------------------------
	# FUSION
	# ----------------------------------------------------------
	def fuse_sentiments(*items):
	probs = [arr for arr, conf in items if arr]
	if not probs:
	return None

	avg = torch.tensor(probs).mean(dim=0).tolist()
	sent = ["negative", "neutral", "positive"][int(np.argmax(avg))]
	emoji = {"negative": "😡", "neutral": "😐", "positive": "😊"}[sent]

	return {"sentiment": sent, "emoji": emoji, "probs": avg}


	# ----------------------------------------------------------
	# HTML (unchanged)
	# ----------------------------------------------------------
	HTML = """
	<!doctype html>
	<html><head>
	<meta charset="utf-8" />

	<title>🎭 Multimodal Sentiment Analyzer</title>

	<style>
	body{
	margin:0;
	font-family:Poppins, sans-serif;
	background: linear-gradient(135deg, #161616, #1f0033, #33001a);
	background-size: 200% 200%;
	animation: gradientShift 8s ease infinite;
	color:#f5f5f5;
	}

	@keyframes gradientShift {
	0% { background-position: 0% 50%; }
	50% { background-position: 100% 50%; }
	100% { background-position: 0% 50%; }
	}

	.wrap{
	max-width:900px;
	margin:40px auto;
	padding:20px;
	}

	.card{
	background:rgba(255,255,255,0.07);
	backdrop-filter: blur(12px);
	border-radius:16px;
	padding:28px;
	box-shadow:0 0 18px rgba(0,0,0,0.5);
	margin-top:22px;
	border:1px solid rgba(255,255,255,0.15);
	}

	h1{
	text-align:center;
	font-size:36px;
	font-weight:700;
	color:#ffca5a;
	margin-bottom:10px;
	text-shadow:0 0 12px rgba(255, 204, 102,0.4);
	}

	input,textarea{
	width:100%;
	padding:14px;
	border-radius:12px;
	background:rgba(255,255,255,0.15);
	border:1px solid rgba(255,255,255,0.25);
	color:#fff;
	margin-top:8px;
	margin-bottom:18px;
	outline:none;
	resize:none;
	box-sizing: border-box;
	}

	.btn{
	width:100%;
	padding:16px;
	border-radius:12px;
	background:linear-gradient(90deg,#ff9933,#ff5500);
	border:0;
	font-weight:bold;
	color:white;
	margin-top:6px;
	cursor:pointer;
	box-shadow:0 0 12px rgba(255,153,51,0.5);
	transition: transform .2s ease;
	}

	.btn:hover{
	transform:scale(1.04);
	}

	.preview img,.preview audio{
	margin-top:12px;
	max-width:100%;
	border-radius:12px;
	box-shadow:0 0 14px rgba(255,153,51,0.4);
	}

	.result-emoji{
	font-size:60px;
	margin-bottom:10px;
	animation: pop 0.7s ease;
	}

	@keyframes pop {
	0%{transform:scale(0.2);}
	100%{transform:scale(1);}
	}

	pre{
	background:rgba(0,0,0,0.4);
	padding:16px;
	border-radius:12px;
	color:#7fffd4;
	overflow:auto;
	}

	label{
	font-size:15px;
	opacity:0.9;
	margin-top:12px;
	display:block;
	}
	</style>


	<script>
	function preview(input,id,type){
	let file = input.files[0];
	if(!file) return;
	let url = URL.createObjectURL(file);

	if(type==="img")
	document.getElementById(id).innerHTML = `<img src="${url}">`;
	else
	document.getElementById(id).innerHTML = `<audio controls src="${url}"></audio>`;
	}
	</script>

	</head>
	<body>

	<div class="wrap">
	<h1>🎯 Multimodal Sentiment Analyzer</h1>

	<form method="POST" enctype="multipart/form-data" class="card">
	<label>Enter Text:</label>
	<textarea name="text" rows="4" placeholder="Write something..."></textarea>

	<label>Upload Face Image:</label>
	<input type="file" name="image" accept="image/*" onchange="preview(this,'imgprev','img')">
	<div class="preview" id="imgprev"></div>

	<label>Upload Audio:</label>
	<input type="file" name="audio" accept="audio/*" onchange="preview(this,'audprev','aud')">
	<div class="preview" id="audprev"></div>

	<button class="btn">🚀 Analyze</button>
	</form>

	{% if result %}
	<div class="card" style="text-align:center;">
	<div class="result-emoji">{{ result['fused']['emoji'] }}</div>
	<h2>{{ result['fused']['sentiment'] \| capitalize }}</h2>
	</div>

	<div class="card">
	<pre>{{ result_json }}</pre>
	</div>
	{% endif %}
	</div>

	</body></html>
	"""



	# ----------------------------------------------------------
	# ROUTE
	# ----------------------------------------------------------
	@app.route("/", methods=["GET", "POST"])
	def home():
	result = None

	if request.method == "POST":
	text = request.form.get("text", "")

	audio_file = request.files.get("audio")
	image_file = request.files.get("image")

	audio_path = None
	img_path = None

	if audio_file and audio_file.filename:
	audio_path = os.path.join(UPLOAD_FOLDER, audio_file.filename)
	audio_file.save(audio_path)

	if image_file and image_file.filename:
	img_path = os.path.join(UPLOAD_FOLDER, image_file.filename)
	image_file.save(img_path)

	t = predict_text_sentiment(text)
	a = predict_audio_sentiment(audio_path)
	i = predict_image_sentiment(img_path)

	fused = fuse_sentiments(t, a, i)

	result = {"text": t, "audio": a, "image": i, "fused": fused}
	result_json = json.dumps(result, indent=2)

	return render_template_string(HTML, result=result, result_json=result_json)

	return render_template_string(HTML)


	# ----------------------------------------------------------
	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 5000))
	app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))