Update app.py
Browse files
app.py
CHANGED
|
@@ -235,15 +235,41 @@ def keyphrase_generator(
|
|
| 235 |
article_heading = corpus[0] if corpus else ""
|
| 236 |
except Exception:
|
| 237 |
article_heading = corpus[0] if corpus else ""
|
|
|
|
| 238 |
model_4 = SentenceTransformer(model_4)
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
title_embeddings = model_4.encode(titles_list)
|
| 241 |
heading_embedding = model_4.encode([article_heading])
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
# ---------- Evidence Extraction Integration ----------
|
| 249 |
if extract_evidence:
|
|
|
|
| 235 |
article_heading = corpus[0] if corpus else ""
|
| 236 |
except Exception:
|
| 237 |
article_heading = corpus[0] if corpus else ""
|
| 238 |
+
# ---------- Most relevant abstracts by heading (FIXED) ----------
|
| 239 |
model_4 = SentenceTransformer(model_4)
|
| 240 |
+
|
| 241 |
+
# Clean PubMed outputs (avoid None / malformed entries)
|
| 242 |
+
filtered = [
|
| 243 |
+
(t.text.strip(), a.text.strip())
|
| 244 |
+
for t, a in zip(article_title, article_abstract)
|
| 245 |
+
if t is not None and t.text and a is not None and a.text
|
| 246 |
+
]
|
| 247 |
+
|
| 248 |
+
if not filtered:
|
| 249 |
+
return {"error": "No valid PubMed titles/abstracts found."}
|
| 250 |
+
|
| 251 |
+
titles_list, abstracts_list = zip(*filtered)
|
| 252 |
+
titles_list = list(titles_list)
|
| 253 |
+
abstracts_list = list(abstracts_list)
|
| 254 |
+
|
| 255 |
+
# Encode titles
|
| 256 |
title_embeddings = model_4.encode(titles_list)
|
| 257 |
heading_embedding = model_4.encode([article_heading])
|
| 258 |
+
|
| 259 |
+
# Compute similarity
|
| 260 |
+
similarities = cosine_similarity(heading_embedding, title_embeddings)[0]
|
| 261 |
+
|
| 262 |
+
# Select top-N by similarity
|
| 263 |
+
top_indices = similarities.argsort()[-max_retrieved:]
|
| 264 |
+
|
| 265 |
+
sorted_titles = [titles_list[i] for i in top_indices]
|
| 266 |
+
sorted_abstract_list = [abstracts_list[i] for i in top_indices]
|
| 267 |
+
|
| 268 |
+
sorted_dict = {
|
| 269 |
+
"Title": sorted_titles,
|
| 270 |
+
"Abstract": sorted_abstract_list
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
|
| 274 |
# ---------- Evidence Extraction Integration ----------
|
| 275 |
if extract_evidence:
|