import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats

st.set_page_config(page_title="AI Vulnerability Benchmark", layout="wide")

# -----------------------------
# LOAD DATA
# -----------------------------
@st.cache_data
def load_df(file):
    df = pd.read_csv(file)
    df["vulnerable"] = df["score"] >= 4
    df["critical"] = df["score"] == 5
    df["scenario"] = df["trigger"] + " | " + df["modifier"]
    return df

st.sidebar.header("Dataset")

DEFAULT_PATH = "data/benchmark_results.csv"
df = None

# Try default dataset from repo
try:
    df = load_df(DEFAULT_PATH)
    st.sidebar.success("Loaded default dataset from repository.")
except:
    st.sidebar.warning("Default dataset missing. Upload required.")

# Allow optional upload from user
uploaded = st.sidebar.file_uploader("Upload custom benchmark_results.csv", type=["csv"])
if uploaded:
    df = load_df(uploaded)
    st.sidebar.success("Using uploaded dataset.")

if df is None:
    st.error("No dataset found. Upload a CSV to continue.")
    st.stop()


# -----------------------------
# FILTERS
# -----------------------------
models = sorted(df["model"].unique())
triggers = sorted(df["trigger"].unique())
modifiers = sorted(df["modifier"].unique())
tasks = sorted(df["task_id"].unique())

st.sidebar.header("Filters")
model_f = st.sidebar.selectbox("Model", ["ALL"] + models)
trigger_f = st.sidebar.selectbox("Trigger", ["ALL"] + triggers)
modifier_f = st.sidebar.selectbox("Modifier", ["ALL"] + modifiers)
task_f = st.sidebar.selectbox("Task ID", ["ALL"] + tasks)
significance = st.sidebar.slider("Minimum runs per trigger", 1, 30, 10)

df_f = df.copy()
if model_f != "ALL":
    df_f = df_f[df_f["model"] == model_f]
if trigger_f != "ALL":
    df_f = df_f[df_f["trigger"] == trigger_f]
if modifier_f != "ALL":
    df_f = df_f[df_f["modifier"] == modifier_f]
if task_f != "ALL":
    df_f = df_f[df_f["task_id"] == task_f]

# apply significance filter
counts = df_f["trigger"].value_counts()
valid_triggers = counts[counts >= significance].index
df_f = df_f[df_f["trigger"].isin(valid_triggers)]

# -----------------------------
# SUMMARY METRICS
# -----------------------------
c1, c2, c3, c4 = st.columns(4)
c1.metric("Rows", len(df_f))
c2.metric("Vulnerability Rate", f"{df_f['vulnerable'].mean():.2%}")
c3.metric("Critical Rate", f"{df_f['critical'].mean():.2%}")
c4.metric("Unique scenarios", df_f["scenario"].nunique())

st.markdown("---")

# ------------------------------------------
# SECTION SELECTOR
# ------------------------------------------
section = st.selectbox(
    "Choose analysis view",
    [
        "📊 Vulnerability by Model",
        "🎯 Vulnerability by Trigger",
        "🧱 Vulnerability by Modifier",
        "🔥 Model × Trigger Heatmap",
        "🧩 Model × Trigger × Modifier Explorer",
        "📦 Top Dangerous Scenarios",
        "📈 Score Distribution",
        "📉 Vulnerability Distribution by Model",
        "🎻 Violin Plots (Per Model / Trigger)",
        "📚 Task Difficulty Explorer",
        "📐 ANOVA & Statistical Tests",
        "⚡ Sensitivity Index (Model Stability)",
        "🌀 Critical Scenario Explorer",
    ],
)

# ------------------------------------------
# 1. VULNERABILITY BY MODEL
# ------------------------------------------
if section == "📊 Vulnerability by Model":
    st.header("📊 Vulnerability by Model")
    fig = px.bar(df_f, x="model", y="vulnerable", color="model")
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 2. VULNERABILITY BY TRIGGER
# ------------------------------------------
elif section == "🎯 Vulnerability by Trigger":
    st.header("🎯 Vulnerability by Trigger")
    fig = px.bar(df_f, x="trigger", y="vulnerable", color="trigger")
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 3. VULNERABILITY BY MODIFIER
# ------------------------------------------
elif section == "🧱 Vulnerability by Modifier":
    st.header("🧱 Vulnerability by Modifier")
    fig = px.bar(df_f, x="modifier", y="vulnerable", color="modifier")
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 4. MODEL × TRIGGER HEATMAP
# ------------------------------------------
elif section == "🔥 Model × Trigger Heatmap":
    st.header("🔥 Model × Trigger Vulnerability Heatmap")
    pivot = df_f.pivot_table(
        values="vulnerable", index="model", columns="trigger", aggfunc="mean"
    )
    fig = px.imshow(
        pivot,
        color_continuous_scale="Reds",
        aspect="auto",
        title="Model × Trigger Vulnerability Heatmap",
    )
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 5. MODEL × TRIGGER × MODIFIER TABLE
# ------------------------------------------
elif section == "🧩 Model × Trigger × Modifier Explorer":
    st.header("🧩 Model × Trigger × Modifier Explorer")
    table = (
        df_f.groupby(["model", "trigger", "modifier"])
        .agg(vuln_rate=("vulnerable", "mean"), runs=("vulnerable", "count"))
        .sort_values("vuln_rate", ascending=False)
    )
    st.dataframe(table)

# ------------------------------------------
# 6. TOP DANGEROUS SCENARIOS
# ------------------------------------------
elif section == "📦 Top Dangerous Scenarios":
    st.header("📦 Top Dangerous Scenarios")
    scen = (
        df_f.groupby("scenario")
        .agg(vuln_rate=("vulnerable", "mean"), runs=("vulnerable", "count"))
        .sort_values("vuln_rate", ascending=False)
        .head(40)
    )
    st.dataframe(scen)

# ------------------------------------------
# 7. SCORE DISTRIBUTION
# ------------------------------------------
elif section == "📈 Score Distribution":
    st.header("📈 Score Distribution")
    fig = px.histogram(df_f, x="score", nbins=5)
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 8. VULN DISTRIBUTION BY MODEL
# ------------------------------------------
elif section == "📉 Vulnerability Distribution by Model":
    st.header("📉 Vulnerability Distribution by Model")
    fig = px.box(df_f, x="model", y="vulnerable", color="model")
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 9. VIOLIN PLOTS
# ------------------------------------------
elif section == "🎻 Violin Plots (Per Model / Trigger)":
    st.header("🎻 Distribution of Scores (Violin Plots)")
    fig = px.violin(df_f, x="model", y="score", color="model", box=True)
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 10. TASK DIFFICULTY
# ------------------------------------------
elif section == "📚 Task Difficulty Explorer":
    st.header("📚 Task Difficulty Explorer")
    pivot = df_f.pivot_table(
        values="vulnerable", index="task_id", columns="model", aggfunc="mean"
    )
    fig = px.imshow(
        pivot, color_continuous_scale="Reds", aspect="auto",
        title="Task Difficulty per Model"
    )
    st.plotly_chart(fig, use_container_width=True)

# ------------------------------------------
# 11. STATISTICAL TESTS
# ------------------------------------------
elif section == "📐 ANOVA & Statistical Tests":
    st.header("📐 ANOVA & Statistical Tests")

    # χ²: Does vulnerability depend on model?
    ct_model = pd.crosstab(df_f["model"], df_f["vulnerable"])
    chi2_m, p_m, _, _ = stats.chi2_contingency(ct_model)

    # χ²: trigger dependence
    ct_trig = pd.crosstab(df_f["trigger"], df_f["vulnerable"])
    chi2_t, p_t, _, _ = stats.chi2_contingency(ct_trig)

    st.subheader("Chi-Square Tests")
    st.write(pd.DataFrame([
        {"test": "model vs vulnerability", "chi2": chi2_m, "p_value": p_m},
        {"test": "trigger vs vulnerability", "chi2": chi2_t, "p_value": p_t},
    ]))

# ------------------------------------------
# 12. SENSITIVITY INDEX
# ------------------------------------------
elif section == "⚡ Sensitivity Index (Model Stability)":
    st.header("⚡ Sensitivity Index (per Model)")

    rows = []
    for m in df_f["model"].unique():
        sub = df_f[df_f["model"] == m]
        trig_rates = (
            sub.groupby("trigger")["vulnerable"].mean().values
        )
        if len(trig_rates) > 1:
            rows.append({
                "model": m,
                "std_trigger_rate": np.std(trig_rates),
                "range_trigger_rate": trig_rates.max() - trig_rates.min(),
                "mean_trigger_rate": trig_rates.mean(),
            })

    st.dataframe(pd.DataFrame(rows).sort_values("std_trigger_rate", ascending=False))

# ------------------------------------------
# 13. CRITICAL SCENARIO EXPLORER
# ------------------------------------------
elif section == "🌀 Critical Scenario Explorer":
    st.header("🌀 Critical (score=5) Scenario Explorer")
    crit = (
        df_f[df_f["critical"] == True]
        .groupby("scenario")
        .agg(critical_count=("critical", "sum"), runs=("critical", "count"))
        .sort_values("critical_count", ascending=False)
    )
    st.dataframe(crit)