import streamlit as st import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px from scipy import stats st.set_page_config(page_title="AI Vulnerability Benchmark", layout="wide") # ----------------------------- # LOAD DATA # ----------------------------- @st.cache_data def load_df(file): df = pd.read_csv(file) df["vulnerable"] = df["score"] >= 4 df["critical"] = df["score"] == 5 df["scenario"] = df["trigger"] + " | " + df["modifier"] return df st.sidebar.header("Dataset") DEFAULT_PATH = "data/benchmark_results.csv" df = None # Try default dataset from repo try: df = load_df(DEFAULT_PATH) st.sidebar.success("Loaded default dataset from repository.") except: st.sidebar.warning("Default dataset missing. Upload required.") # Allow optional upload from user uploaded = st.sidebar.file_uploader("Upload custom benchmark_results.csv", type=["csv"]) if uploaded: df = load_df(uploaded) st.sidebar.success("Using uploaded dataset.") if df is None: st.error("No dataset found. Upload a CSV to continue.") st.stop() # ----------------------------- # FILTERS # ----------------------------- models = sorted(df["model"].unique()) triggers = sorted(df["trigger"].unique()) modifiers = sorted(df["modifier"].unique()) tasks = sorted(df["task_id"].unique()) st.sidebar.header("Filters") model_f = st.sidebar.selectbox("Model", ["ALL"] + models) trigger_f = st.sidebar.selectbox("Trigger", ["ALL"] + triggers) modifier_f = st.sidebar.selectbox("Modifier", ["ALL"] + modifiers) task_f = st.sidebar.selectbox("Task ID", ["ALL"] + tasks) significance = st.sidebar.slider("Minimum runs per trigger", 1, 30, 10) df_f = df.copy() if model_f != "ALL": df_f = df_f[df_f["model"] == model_f] if trigger_f != "ALL": df_f = df_f[df_f["trigger"] == trigger_f] if modifier_f != "ALL": df_f = df_f[df_f["modifier"] == modifier_f] if task_f != "ALL": df_f = df_f[df_f["task_id"] == task_f] # apply significance filter counts = df_f["trigger"].value_counts() valid_triggers = counts[counts >= significance].index df_f = df_f[df_f["trigger"].isin(valid_triggers)] # ----------------------------- # SUMMARY METRICS # ----------------------------- c1, c2, c3, c4 = st.columns(4) c1.metric("Rows", len(df_f)) c2.metric("Vulnerability Rate", f"{df_f['vulnerable'].mean():.2%}") c3.metric("Critical Rate", f"{df_f['critical'].mean():.2%}") c4.metric("Unique scenarios", df_f["scenario"].nunique()) st.markdown("---") # ------------------------------------------ # SECTION SELECTOR # ------------------------------------------ section = st.selectbox( "Choose analysis view", [ "📊 Vulnerability by Model", "🎯 Vulnerability by Trigger", "🧱 Vulnerability by Modifier", "🔥 Model × Trigger Heatmap", "🧩 Model × Trigger × Modifier Explorer", "📦 Top Dangerous Scenarios", "📈 Score Distribution", "📉 Vulnerability Distribution by Model", "🎻 Violin Plots (Per Model / Trigger)", "📚 Task Difficulty Explorer", "📐 ANOVA & Statistical Tests", "⚡ Sensitivity Index (Model Stability)", "🌀 Critical Scenario Explorer", ], ) # ------------------------------------------ # 1. VULNERABILITY BY MODEL # ------------------------------------------ if section == "📊 Vulnerability by Model": st.header("📊 Vulnerability by Model") fig = px.bar(df_f, x="model", y="vulnerable", color="model") st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 2. VULNERABILITY BY TRIGGER # ------------------------------------------ elif section == "🎯 Vulnerability by Trigger": st.header("🎯 Vulnerability by Trigger") fig = px.bar(df_f, x="trigger", y="vulnerable", color="trigger") st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 3. VULNERABILITY BY MODIFIER # ------------------------------------------ elif section == "🧱 Vulnerability by Modifier": st.header("🧱 Vulnerability by Modifier") fig = px.bar(df_f, x="modifier", y="vulnerable", color="modifier") st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 4. MODEL × TRIGGER HEATMAP # ------------------------------------------ elif section == "🔥 Model × Trigger Heatmap": st.header("🔥 Model × Trigger Vulnerability Heatmap") pivot = df_f.pivot_table( values="vulnerable", index="model", columns="trigger", aggfunc="mean" ) fig = px.imshow( pivot, color_continuous_scale="Reds", aspect="auto", title="Model × Trigger Vulnerability Heatmap", ) st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 5. MODEL × TRIGGER × MODIFIER TABLE # ------------------------------------------ elif section == "🧩 Model × Trigger × Modifier Explorer": st.header("🧩 Model × Trigger × Modifier Explorer") table = ( df_f.groupby(["model", "trigger", "modifier"]) .agg(vuln_rate=("vulnerable", "mean"), runs=("vulnerable", "count")) .sort_values("vuln_rate", ascending=False) ) st.dataframe(table) # ------------------------------------------ # 6. TOP DANGEROUS SCENARIOS # ------------------------------------------ elif section == "📦 Top Dangerous Scenarios": st.header("📦 Top Dangerous Scenarios") scen = ( df_f.groupby("scenario") .agg(vuln_rate=("vulnerable", "mean"), runs=("vulnerable", "count")) .sort_values("vuln_rate", ascending=False) .head(40) ) st.dataframe(scen) # ------------------------------------------ # 7. SCORE DISTRIBUTION # ------------------------------------------ elif section == "📈 Score Distribution": st.header("📈 Score Distribution") fig = px.histogram(df_f, x="score", nbins=5) st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 8. VULN DISTRIBUTION BY MODEL # ------------------------------------------ elif section == "📉 Vulnerability Distribution by Model": st.header("📉 Vulnerability Distribution by Model") fig = px.box(df_f, x="model", y="vulnerable", color="model") st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 9. VIOLIN PLOTS # ------------------------------------------ elif section == "🎻 Violin Plots (Per Model / Trigger)": st.header("🎻 Distribution of Scores (Violin Plots)") fig = px.violin(df_f, x="model", y="score", color="model", box=True) st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 10. TASK DIFFICULTY # ------------------------------------------ elif section == "📚 Task Difficulty Explorer": st.header("📚 Task Difficulty Explorer") pivot = df_f.pivot_table( values="vulnerable", index="task_id", columns="model", aggfunc="mean" ) fig = px.imshow( pivot, color_continuous_scale="Reds", aspect="auto", title="Task Difficulty per Model" ) st.plotly_chart(fig, use_container_width=True) # ------------------------------------------ # 11. STATISTICAL TESTS # ------------------------------------------ elif section == "📐 ANOVA & Statistical Tests": st.header("📐 ANOVA & Statistical Tests") # χ²: Does vulnerability depend on model? ct_model = pd.crosstab(df_f["model"], df_f["vulnerable"]) chi2_m, p_m, _, _ = stats.chi2_contingency(ct_model) # χ²: trigger dependence ct_trig = pd.crosstab(df_f["trigger"], df_f["vulnerable"]) chi2_t, p_t, _, _ = stats.chi2_contingency(ct_trig) st.subheader("Chi-Square Tests") st.write(pd.DataFrame([ {"test": "model vs vulnerability", "chi2": chi2_m, "p_value": p_m}, {"test": "trigger vs vulnerability", "chi2": chi2_t, "p_value": p_t}, ])) # ------------------------------------------ # 12. SENSITIVITY INDEX # ------------------------------------------ elif section == "⚡ Sensitivity Index (Model Stability)": st.header("⚡ Sensitivity Index (per Model)") rows = [] for m in df_f["model"].unique(): sub = df_f[df_f["model"] == m] trig_rates = ( sub.groupby("trigger")["vulnerable"].mean().values ) if len(trig_rates) > 1: rows.append({ "model": m, "std_trigger_rate": np.std(trig_rates), "range_trigger_rate": trig_rates.max() - trig_rates.min(), "mean_trigger_rate": trig_rates.mean(), }) st.dataframe(pd.DataFrame(rows).sort_values("std_trigger_rate", ascending=False)) # ------------------------------------------ # 13. CRITICAL SCENARIO EXPLORER # ------------------------------------------ elif section == "🌀 Critical Scenario Explorer": st.header("🌀 Critical (score=5) Scenario Explorer") crit = ( df_f[df_f["critical"] == True] .groupby("scenario") .agg(critical_count=("critical", "sum"), runs=("critical", "count")) .sort_values("critical_count", ascending=False) ) st.dataframe(crit)