import os
import json
from flask import Flask, request, render_template_string
from textblob import TextBlob
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import numpy as np
import librosa

# ----------------------------------------------------------
# PATHS
# ----------------------------------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_FOLDER = os.path.join(BASE_DIR, "uploads")
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

app = Flask(__name__)

# ----------------------------------------------------------
# TEXT SENTIMENT
# ----------------------------------------------------------
def predict_text_sentiment(text: str):
    if not text or not text.strip():
        return None, None

    polarity = TextBlob(text).sentiment.polarity

    if polarity > 0.1:
        arr = [0.1, 0.1, 0.8]
    elif polarity < -0.1:
        arr = [0.8, 0.1, 0.1]
    else:
        arr = [0.2, 0.7, 0.1]

    return arr, max(arr)


# ----------------------------------------------------------
# FINAL AUDIO SENTIMENT (Librosa-based - NO TF, NO TRANSFORMERS)
# ----------------------------------------------------------
def predict_audio_sentiment(file_path):
    if not file_path:
        return None, None

    try:
        # Load audio
        y, sr = librosa.load(file_path, sr=16000)

        # Extract intensity & pitch
        energy = float(np.mean(np.abs(y)))

        pitch, _ = librosa.piptrack(y=y, sr=sr)
        pitch_vals = pitch[pitch > 0]
        pitch_mean = float(np.mean(pitch_vals)) if pitch_vals.size > 0 else 0

        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

        # Simple rule-based emotions
        if energy < 0.02 and pitch_mean < 120:
            arr = [0.8, 0.15, 0.05]  # negative
        elif pitch_mean > 180 and energy > 0.05:
            arr = [0.7, 0.2, 0.1]   # angry -> negative
        elif tempo > 120 or pitch_mean > 160:
            arr = [0.1, 0.1, 0.8]   # happy -> positive
        else:
            arr = [0.1, 0.8, 0.1]   # neutral

        return arr, max(arr)

    except Exception as e:
        print("❌ AUDIO ERROR:", e)
        return None, None


# ----------------------------------------------------------
# IMAGE SENTIMENT (your trained CNN)
# ----------------------------------------------------------
NUM_CLASSES = 7
IMG_LABELS = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

class MediumEmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )
        self.fc1 = nn.Linear(128 * 6 * 6, 256)
        self.dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(256, NUM_CLASSES)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.reshape(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)

# Load model
IMG_MODEL_PATH = os.path.join(BASE_DIR, "emotion_cnn.pth")
image_device = "cuda" if torch.cuda.is_available() else "cpu"

image_model = MediumEmotionCNN().to(image_device)
IMAGE_MODEL_OK = False

try:
    image_model.load_state_dict(torch.load(IMG_MODEL_PATH, map_location=image_device))
    image_model.eval()
    IMAGE_MODEL_OK = True
    print("🟢 Image CNN loaded successfully!")
except Exception as e:
    print("❌ Image model failed:", e)

img_transform = transforms.Compose([
    transforms.Grayscale(1),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def predict_image_sentiment(path):
    if not (IMAGE_MODEL_OK and path and os.path.exists(path)):
        return None, None

    try:
        img = Image.open(path).convert("RGB")
        x = img_transform(img).unsqueeze(0).to(image_device)

        with torch.no_grad():
            logits = image_model(x)
            probs7 = torch.softmax(logits, dim=1)[0].cpu().numpy()

        idx = {l: i for i, l in enumerate(IMG_LABELS)}
        pos = float(probs7[idx["happy"]] + probs7[idx["surprise"]])
        neu = float(probs7[idx["neutral"]])
        neg = float(probs7[idx["angry"]] + probs7[idx["disgust"]] +
                    probs7[idx["fear"]] + probs7[idx["sad"]])

        return [neg, neu, pos], max([neg, neu, pos])

    except Exception as e:
        print("❌ Image error:", e)
        return None, None


# ----------------------------------------------------------
# FUSION
# ----------------------------------------------------------
def fuse_sentiments(*items):
    probs = [arr for arr, conf in items if arr]
    if not probs:
        return None

    avg = torch.tensor(probs).mean(dim=0).tolist()
    sent = ["negative", "neutral", "positive"][int(np.argmax(avg))]
    emoji = {"negative": "😡", "neutral": "😐", "positive": "😊"}[sent]

    return {"sentiment": sent, "emoji": emoji, "probs": avg}


# ----------------------------------------------------------
# HTML (unchanged)
# ----------------------------------------------------------
HTML = """
<!doctype html>
<html><head>
<meta charset="utf-8" />

<title>🎭 Multimodal Sentiment Analyzer</title>

<style>
body{
    margin:0;
    font-family:Poppins, sans-serif;
    background: linear-gradient(135deg, #161616, #1f0033, #33001a);
    background-size: 200% 200%;
    animation: gradientShift 8s ease infinite;
    color:#f5f5f5;
}

@keyframes gradientShift {
    0% { background-position: 0% 50%; }
    50% { background-position: 100% 50%; }
    100% { background-position: 0% 50%; }
}

.wrap{
    max-width:900px;
    margin:40px auto;
    padding:20px;
}

.card{
    background:rgba(255,255,255,0.07);
    backdrop-filter: blur(12px);
    border-radius:16px;
    padding:28px;
    box-shadow:0 0 18px rgba(0,0,0,0.5);
    margin-top:22px;
    border:1px solid rgba(255,255,255,0.15);
}

h1{
    text-align:center;
    font-size:36px;
    font-weight:700;
    color:#ffca5a;
    margin-bottom:10px;
    text-shadow:0 0 12px rgba(255, 204, 102,0.4);
}

input,textarea{
    width:100%;
    padding:14px;
    border-radius:12px;
    background:rgba(255,255,255,0.15);
    border:1px solid rgba(255,255,255,0.25);
    color:#fff;
    margin-top:8px;
    margin-bottom:18px;
    outline:none;
    resize:none;
    box-sizing: border-box;
}

.btn{
    width:100%;
    padding:16px;
    border-radius:12px;
    background:linear-gradient(90deg,#ff9933,#ff5500);
    border:0;
    font-weight:bold;
    color:white;
    margin-top:6px;
    cursor:pointer;
    box-shadow:0 0 12px rgba(255,153,51,0.5);
    transition: transform .2s ease;
}

.btn:hover{
    transform:scale(1.04);
}

.preview img,.preview audio{
    margin-top:12px;
    max-width:100%;
    border-radius:12px;
    box-shadow:0 0 14px rgba(255,153,51,0.4);
}

.result-emoji{
    font-size:60px;
    margin-bottom:10px;
    animation: pop 0.7s ease;
}

@keyframes pop {
    0%{transform:scale(0.2);}
    100%{transform:scale(1);}
}

pre{
    background:rgba(0,0,0,0.4);
    padding:16px;
    border-radius:12px;
    color:#7fffd4;
    overflow:auto;
}

label{
    font-size:15px;
    opacity:0.9;
    margin-top:12px;
    display:block;
}
</style>


<script>
function preview(input,id,type){
    let file = input.files[0];
    if(!file) return;
    let url = URL.createObjectURL(file);

    if(type==="img")
        document.getElementById(id).innerHTML = `<img src="${url}">`;
    else
        document.getElementById(id).innerHTML = `<audio controls src="${url}"></audio>`;
}
</script>

</head>
<body>

<div class="wrap">
    <h1>🎯 Multimodal Sentiment Analyzer</h1>

    <form method="POST" enctype="multipart/form-data" class="card">
        <label>Enter Text:</label>
        <textarea name="text" rows="4" placeholder="Write something..."></textarea>

        <label>Upload Face Image:</label>
        <input type="file" name="image" accept="image/*" onchange="preview(this,'imgprev','img')">
        <div class="preview" id="imgprev"></div>

        <label>Upload Audio:</label>
        <input type="file" name="audio" accept="audio/*" onchange="preview(this,'audprev','aud')">
        <div class="preview" id="audprev"></div>

        <button class="btn">🚀 Analyze</button>
    </form>

    {% if result %}
    <div class="card" style="text-align:center;">
        <div class="result-emoji">{{ result['fused']['emoji'] }}</div>
        <h2>{{ result['fused']['sentiment'] | capitalize }}</h2>
    </div>

    <div class="card">
        <pre>{{ result_json }}</pre>
    </div>
    {% endif %}
</div>

</body></html>
"""



# ----------------------------------------------------------
# ROUTE
# ----------------------------------------------------------
@app.route("/", methods=["GET", "POST"])
def home():
    result = None

    if request.method == "POST":
        text = request.form.get("text", "")

        audio_file = request.files.get("audio")
        image_file = request.files.get("image")

        audio_path = None
        img_path = None

        if audio_file and audio_file.filename:
            audio_path = os.path.join(UPLOAD_FOLDER, audio_file.filename)
            audio_file.save(audio_path)

        if image_file and image_file.filename:
            img_path = os.path.join(UPLOAD_FOLDER, image_file.filename)
            image_file.save(img_path)

        t = predict_text_sentiment(text)
        a = predict_audio_sentiment(audio_path)
        i = predict_image_sentiment(img_path)

        fused = fuse_sentiments(t, a, i)

        result = {"text": t, "audio": a, "image": i, "fused": fused}
        result_json = json.dumps(result, indent=2)

        return render_template_string(HTML, result=result, result_json=result_json)

    return render_template_string(HTML)


# ----------------------------------------------------------
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 5000))
    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))