Spaces:

somya-27-04-03
/

Multimodal-Sentimental-Analyzer

Sleeping

File size: 10,831 Bytes

6c52838

import os
import json
from flask import Flask, request, render_template_string
from textblob import TextBlob
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import numpy as np
import librosa

# ----------------------------------------------------------
# PATHS
# ----------------------------------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_FOLDER = os.path.join(BASE_DIR, "uploads")
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

app = Flask(__name__)

# ----------------------------------------------------------
# TEXT SENTIMENT
# ----------------------------------------------------------
def predict_text_sentiment(text: str):
    if not text or not text.strip():
        return None, None

    polarity = TextBlob(text).sentiment.polarity

    if polarity > 0.1:
        arr = [0.1, 0.1, 0.8]
    elif polarity < -0.1:
        arr = [0.8, 0.1, 0.1]
    else:
        arr = [0.2, 0.7, 0.1]

    return arr, max(arr)


# ----------------------------------------------------------
# FINAL AUDIO SENTIMENT (Librosa-based - NO TF, NO TRANSFORMERS)
# ----------------------------------------------------------
def predict_audio_sentiment(file_path):
    if not file_path:
        return None, None

    try:
        # Load audio
        y, sr = librosa.load(file_path, sr=16000)

        # Extract intensity & pitch
        energy = float(np.mean(np.abs(y)))

        pitch, _ = librosa.piptrack(y=y, sr=sr)
        pitch_vals = pitch[pitch > 0]
        pitch_mean = float(np.mean(pitch_vals)) if pitch_vals.size > 0 else 0

        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Simple rule-based emotions
        if energy < 0.02 and pitch_mean < 120:
            arr = [0.8, 0.15, 0.05]  # negative
        elif pitch_mean > 180 and energy > 0.05:
            arr = [0.7, 0.2, 0.1]   # angry -> negative
        elif tempo > 120 or pitch_mean > 160:
            arr = [0.1, 0.1, 0.8]   # happy -> positive
        else:
            arr = [0.1, 0.8, 0.1]   # neutral

        return arr, max(arr)

    except Exception as e:
        print("❌ AUDIO ERROR:", e)
        return None, None


# ----------------------------------------------------------
# IMAGE SENTIMENT (your trained CNN)
# ----------------------------------------------------------
NUM_CLASSES = 7
IMG_LABELS = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

class MediumEmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )
        self.fc1 = nn.Linear(128 * 6 * 6, 256)
        self.dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(256, NUM_CLASSES)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.reshape(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

# Load model
IMG_MODEL_PATH = os.path.join(BASE_DIR, "emotion_cnn.pth")
image_device = "cuda" if torch.cuda.is_available() else "cpu"

image_model = MediumEmotionCNN().to(image_device)
IMAGE_MODEL_OK = False

try:
    image_model.load_state_dict(torch.load(IMG_MODEL_PATH, map_location=image_device))
    image_model.eval()
    IMAGE_MODEL_OK = True
    print("🟢 Image CNN loaded successfully!")
except Exception as e:
    print("❌ Image model failed:", e)

img_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def predict_image_sentiment(path):
    if not (IMAGE_MODEL_OK and path and os.path.exists(path)):
        return None, None

    try:
        img = Image.open(path).convert("RGB")
        x = img_transform(img).unsqueeze(0).to(image_device)

        with torch.no_grad():
            logits = image_model(x)
            probs7 = torch.softmax(logits, dim=1)[0].cpu().numpy()

        idx = {l: i for i, l in enumerate(IMG_LABELS)}
        pos = float(probs7[idx["happy"]] + probs7[idx["surprise"]])
        neu = float(probs7[idx["neutral"]])
        neg = float(probs7[idx["angry"]] + probs7[idx["disgust"]] +
                    probs7[idx["fear"]] + probs7[idx["sad"]])

        return [neg, neu, pos], max([neg, neu, pos])

    except Exception as e:
        print("❌ Image error:", e)
        return None, None


# ----------------------------------------------------------
# FUSION
# ----------------------------------------------------------
def fuse_sentiments(*items):
    probs = [arr for arr, conf in items if arr]
    if not probs:
        return None

    avg = torch.tensor(probs).mean(dim=0).tolist()
    sent = ["negative", "neutral", "positive"][int(np.argmax(avg))]
    emoji = {"negative": "😡", "neutral": "😐", "positive": "😊"}[sent]

    return {"sentiment": sent, "emoji": emoji, "probs": avg}


# ----------------------------------------------------------
# HTML (unchanged)
# ----------------------------------------------------------
HTML = """

<!doctype html>

<html><head>

<meta charset="utf-8" />



<title>🎭 Multimodal Sentiment Analyzer</title>



<style>

body{

    margin:0;

    font-family:Poppins, sans-serif;

    background: linear-gradient(135deg, #161616, #1f0033, #33001a);

    background-size: 200% 200%;

    animation: gradientShift 8s ease infinite;

    color:#f5f5f5;

}



@keyframes gradientShift {

    0% { background-position: 0% 50%; }

    50% { background-position: 100% 50%; }

    100% { background-position: 0% 50%; }

}



.wrap{

    max-width:900px;

    margin:40px auto;

    padding:20px;

}



.card{

    background:rgba(255,255,255,0.07);

    backdrop-filter: blur(12px);

    border-radius:16px;

    padding:28px;

    box-shadow:0 0 18px rgba(0,0,0,0.5);

    margin-top:22px;

    border:1px solid rgba(255,255,255,0.15);

}



h1{

    text-align:center;

    font-size:36px;

    font-weight:700;

    color:#ffca5a;

    margin-bottom:10px;

    text-shadow:0 0 12px rgba(255, 204, 102,0.4);

}



input,textarea{

    width:100%;

    padding:14px;

    border-radius:12px;

    background:rgba(255,255,255,0.15);

    border:1px solid rgba(255,255,255,0.25);

    color:#fff;

    margin-top:8px;

    margin-bottom:18px;

    outline:none;

    resize:none;

    box-sizing: border-box;

}



.btn{

    width:100%;

    padding:16px;

    border-radius:12px;

    background:linear-gradient(90deg,#ff9933,#ff5500);

    border:0;

    font-weight:bold;

    color:white;

    margin-top:6px;

    cursor:pointer;

    box-shadow:0 0 12px rgba(255,153,51,0.5);

    transition: transform .2s ease;

}



.btn:hover{

    transform:scale(1.04);

}



.preview img,.preview audio{

    margin-top:12px;

    max-width:100%;

    border-radius:12px;

    box-shadow:0 0 14px rgba(255,153,51,0.4);

}



.result-emoji{

    font-size:60px;

    margin-bottom:10px;

    animation: pop 0.7s ease;

}



@keyframes pop {

    0%{transform:scale(0.2);}

    100%{transform:scale(1);}

}



pre{

    background:rgba(0,0,0,0.4);

    padding:16px;

    border-radius:12px;

    color:#7fffd4;

    overflow:auto;

}



label{

    font-size:15px;

    opacity:0.9;

    margin-top:12px;

    display:block;

}

</style>





<script>

function preview(input,id,type){

    let file = input.files[0];

    if(!file) return;

    let url = URL.createObjectURL(file);



    if(type==="img")

        document.getElementById(id).innerHTML = `<img src="${url}">`;

    else

        document.getElementById(id).innerHTML = `<audio controls src="${url}"></audio>`;

}

</script>



</head>

<body>



<div class="wrap">

    <h1>🎯 Multimodal Sentiment Analyzer</h1>



    <form method="POST" enctype="multipart/form-data" class="card">

        <label>Enter Text:</label>

        <textarea name="text" rows="4" placeholder="Write something..."></textarea>



        <label>Upload Face Image:</label>

        <input type="file" name="image" accept="image/*" onchange="preview(this,'imgprev','img')">

        <div class="preview" id="imgprev"></div>



        <label>Upload Audio:</label>

        <input type="file" name="audio" accept="audio/*" onchange="preview(this,'audprev','aud')">

        <div class="preview" id="audprev"></div>



        <button class="btn">🚀 Analyze</button>

    </form>



    {% if result %}

    <div class="card" style="text-align:center;">

        <div class="result-emoji">{{ result['fused']['emoji'] }}</div>

        <h2>{{ result['fused']['sentiment'] | capitalize }}</h2>

    </div>



    <div class="card">

        <pre>{{ result_json }}</pre>

    </div>

    {% endif %}

</div>



</body></html>

"""



# ----------------------------------------------------------
# ROUTE
# ----------------------------------------------------------
@app.route("/", methods=["GET", "POST"])
def home():
    result = None

    if request.method == "POST":
        text = request.form.get("text", "")

        audio_file = request.files.get("audio")
        image_file = request.files.get("image")

        audio_path = None
        img_path = None

        if audio_file and audio_file.filename:
            audio_path = os.path.join(UPLOAD_FOLDER, audio_file.filename)
            audio_file.save(audio_path)

        if image_file and image_file.filename:
            img_path = os.path.join(UPLOAD_FOLDER, image_file.filename)
            image_file.save(img_path)

        t = predict_text_sentiment(text)
        a = predict_audio_sentiment(audio_path)
        i = predict_image_sentiment(img_path)

        fused = fuse_sentiments(t, a, i)

        result = {"text": t, "audio": a, "image": i, "fused": fused}
        result_json = json.dumps(result, indent=2)

        return render_template_string(HTML, result=result, result_json=result_json)

    return render_template_string(HTML)


# ----------------------------------------------------------
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 5000))
    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))