Spaces:
Sleeping
Sleeping
sg
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# app.py
|
| 2 |
# Flask RAG app (HF Spaces / Static) — dataset sudah ada di Space.
|
| 3 |
import os, json, re, time, logging
|
| 4 |
-
from functools import lru_cache
|
| 5 |
from typing import Dict, List, Tuple
|
| 6 |
from dataclasses import dataclass
|
| 7 |
from datetime import datetime
|
|
@@ -252,13 +252,10 @@ def build_prompt(user_query: str, sentences: List[str]) -> str:
|
|
| 252 |
"- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', atau 'bersumber'."
|
| 253 |
)
|
| 254 |
return f"""{system}
|
| 255 |
-
|
| 256 |
KALIMAT SUMBER:
|
| 257 |
{block}
|
| 258 |
-
|
| 259 |
PERTANYAAN:
|
| 260 |
{user_query}
|
| 261 |
-
|
| 262 |
JAWAB (1 kalimat saja):
|
| 263 |
"""
|
| 264 |
|
|
@@ -316,14 +313,15 @@ def db():
|
|
| 316 |
return SessionLocal()
|
| 317 |
|
| 318 |
def login_required(view_func):
|
|
|
|
| 319 |
def wrapper(*args, **kwargs):
|
| 320 |
if not session.get("logged_in"):
|
| 321 |
return redirect(url_for("auth_login"))
|
| 322 |
return view_func(*args, **kwargs)
|
| 323 |
-
wrapper.__name__ = view_func.__name__
|
| 324 |
return wrapper
|
| 325 |
|
| 326 |
def admin_required(view_func):
|
|
|
|
| 327 |
def wrapper(*args, **kwargs):
|
| 328 |
if not session.get("logged_in"):
|
| 329 |
return redirect(url_for("auth_login"))
|
|
@@ -331,7 +329,6 @@ def admin_required(view_func):
|
|
| 331 |
flash("Hanya admin yang boleh mengakses halaman itu.", "error")
|
| 332 |
return redirect(url_for("subjects"))
|
| 333 |
return view_func(*args, **kwargs)
|
| 334 |
-
wrapper.__name__ = view_func.__name__
|
| 335 |
return wrapper
|
| 336 |
|
| 337 |
|
|
@@ -508,17 +505,39 @@ def ask(subject_key: str):
|
|
| 508 |
prompt = build_prompt(query, sentences)
|
| 509 |
|
| 510 |
try:
|
| 511 |
-
|
| 512 |
LLM, prompt,
|
| 513 |
max_tokens=64, temperature=0.2, top_p=1.0,
|
| 514 |
stop=["\n\n", "\n###", "###", "\nUser:",
|
| 515 |
"Berdasarkan", "berdasarkan", "Menurut", "menurut",
|
| 516 |
"Merujuk", "merujuk", "Mengacu", "mengacu", "Bersumber", "bersumber"]
|
| 517 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
except Exception as e:
|
| 519 |
log.exception(f"[LLM] generate error: {e}")
|
| 520 |
return jsonify({"ok": True, "answer": FALLBACK_TEXT})
|
| 521 |
|
|
|
|
| 522 |
m = re.search(r"(.+?[.!?])(\s|$)", answer)
|
| 523 |
answer = (m.group(1) if m else answer).strip()
|
| 524 |
answer = strip_meta_sentence(answer)
|
|
|
|
| 1 |
# app.py
|
| 2 |
# Flask RAG app (HF Spaces / Static) — dataset sudah ada di Space.
|
| 3 |
import os, json, re, time, logging
|
| 4 |
+
from functools import lru_cache, wraps
|
| 5 |
from typing import Dict, List, Tuple
|
| 6 |
from dataclasses import dataclass
|
| 7 |
from datetime import datetime
|
|
|
|
| 252 |
"- DILARANG menulis frasa meta seperti 'berdasarkan', 'menurut', 'merujuk', atau 'bersumber'."
|
| 253 |
)
|
| 254 |
return f"""{system}
|
|
|
|
| 255 |
KALIMAT SUMBER:
|
| 256 |
{block}
|
|
|
|
| 257 |
PERTANYAAN:
|
| 258 |
{user_query}
|
|
|
|
| 259 |
JAWAB (1 kalimat saja):
|
| 260 |
"""
|
| 261 |
|
|
|
|
| 313 |
return SessionLocal()
|
| 314 |
|
| 315 |
def login_required(view_func):
|
| 316 |
+
@wraps(view_func)
|
| 317 |
def wrapper(*args, **kwargs):
|
| 318 |
if not session.get("logged_in"):
|
| 319 |
return redirect(url_for("auth_login"))
|
| 320 |
return view_func(*args, **kwargs)
|
|
|
|
| 321 |
return wrapper
|
| 322 |
|
| 323 |
def admin_required(view_func):
|
| 324 |
+
@wraps(view_func)
|
| 325 |
def wrapper(*args, **kwargs):
|
| 326 |
if not session.get("logged_in"):
|
| 327 |
return redirect(url_for("auth_login"))
|
|
|
|
| 329 |
flash("Hanya admin yang boleh mengakses halaman itu.", "error")
|
| 330 |
return redirect(url_for("subjects"))
|
| 331 |
return view_func(*args, **kwargs)
|
|
|
|
| 332 |
return wrapper
|
| 333 |
|
| 334 |
|
|
|
|
| 505 |
prompt = build_prompt(query, sentences)
|
| 506 |
|
| 507 |
try:
|
| 508 |
+
raw_answer = generate(
|
| 509 |
LLM, prompt,
|
| 510 |
max_tokens=64, temperature=0.2, top_p=1.0,
|
| 511 |
stop=["\n\n", "\n###", "###", "\nUser:",
|
| 512 |
"Berdasarkan", "berdasarkan", "Menurut", "menurut",
|
| 513 |
"Merujuk", "merujuk", "Mengacu", "mengacu", "Bersumber", "bersumber"]
|
| 514 |
+
)
|
| 515 |
+
if raw_answer is None:
|
| 516 |
+
raw_answer = ""
|
| 517 |
+
raw_answer = raw_answer.strip()
|
| 518 |
+
|
| 519 |
+
# Log raw model output for debugging
|
| 520 |
+
log.info(f"[LLM] Raw answer repr: {repr(raw_answer)}")
|
| 521 |
+
|
| 522 |
+
# Hati-hati: banyak model menyisipkan <think>...</think>
|
| 523 |
+
# Hapus block <think>...</think> dengan aman (case-insensitive)
|
| 524 |
+
cleaned = re.sub(r"<think\b[^>]*>.*?</think>", "", raw_answer, flags=re.DOTALL | re.IGNORECASE).strip()
|
| 525 |
+
# Jika masih ada sisa tag tunggal seperti </think> atau <think>, hapus saja tag-tag tersebut
|
| 526 |
+
cleaned = re.sub(r"</?think\b[^>]*>", "", cleaned, flags=re.IGNORECASE).strip()
|
| 527 |
+
|
| 528 |
+
# Jika pembersihan menghapus semuanya, coba fallback: keluarkan teks yang bukan tag dari raw_answer
|
| 529 |
+
if not cleaned:
|
| 530 |
+
non_tag = re.sub(r"<[^>]+>", "", raw_answer).strip()
|
| 531 |
+
if non_tag:
|
| 532 |
+
cleaned = non_tag
|
| 533 |
+
|
| 534 |
+
answer = cleaned
|
| 535 |
+
|
| 536 |
except Exception as e:
|
| 537 |
log.exception(f"[LLM] generate error: {e}")
|
| 538 |
return jsonify({"ok": True, "answer": FALLBACK_TEXT})
|
| 539 |
|
| 540 |
+
# Ambil 1 kalimat pertama (jika model mengeluarkan beberapa kalimat)
|
| 541 |
m = re.search(r"(.+?[.!?])(\s|$)", answer)
|
| 542 |
answer = (m.group(1) if m else answer).strip()
|
| 543 |
answer = strip_meta_sentence(answer)
|