pritamdeka commited on
Commit
6917fc0
Β·
verified Β·
1 Parent(s): cde7707

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -6
app.py CHANGED
@@ -235,15 +235,41 @@ def keyphrase_generator(
235
  article_heading = corpus[0] if corpus else ""
236
  except Exception:
237
  article_heading = corpus[0] if corpus else ""
 
238
  model_4 = SentenceTransformer(model_4)
239
- my_dict = dict(zip(titles_list, abstracts_list))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  title_embeddings = model_4.encode(titles_list)
241
  heading_embedding = model_4.encode([article_heading])
242
- similarities = cosine_similarity(heading_embedding, title_embeddings)
243
- max_n = max_retrieved
244
- sorted_titles = [titles_list[index] for index in similarities.argsort()[0][-max_n:]]
245
- sorted_abstract_list = [my_dict[list_elem] for list_elem in sorted_titles]
246
- sorted_dict = {'Title': sorted_titles, 'Abstract': sorted_abstract_list}
 
 
 
 
 
 
 
 
 
 
247
 
248
  # ---------- Evidence Extraction Integration ----------
249
  if extract_evidence:
 
235
  article_heading = corpus[0] if corpus else ""
236
  except Exception:
237
  article_heading = corpus[0] if corpus else ""
238
+ # ---------- Most relevant abstracts by heading (FIXED) ----------
239
  model_4 = SentenceTransformer(model_4)
240
+
241
+ # Clean PubMed outputs (avoid None / malformed entries)
242
+ filtered = [
243
+ (t.text.strip(), a.text.strip())
244
+ for t, a in zip(article_title, article_abstract)
245
+ if t is not None and t.text and a is not None and a.text
246
+ ]
247
+
248
+ if not filtered:
249
+ return {"error": "No valid PubMed titles/abstracts found."}
250
+
251
+ titles_list, abstracts_list = zip(*filtered)
252
+ titles_list = list(titles_list)
253
+ abstracts_list = list(abstracts_list)
254
+
255
+ # Encode titles
256
  title_embeddings = model_4.encode(titles_list)
257
  heading_embedding = model_4.encode([article_heading])
258
+
259
+ # Compute similarity
260
+ similarities = cosine_similarity(heading_embedding, title_embeddings)[0]
261
+
262
+ # Select top-N by similarity
263
+ top_indices = similarities.argsort()[-max_retrieved:]
264
+
265
+ sorted_titles = [titles_list[i] for i in top_indices]
266
+ sorted_abstract_list = [abstracts_list[i] for i in top_indices]
267
+
268
+ sorted_dict = {
269
+ "Title": sorted_titles,
270
+ "Abstract": sorted_abstract_list
271
+ }
272
+
273
 
274
  # ---------- Evidence Extraction Integration ----------
275
  if extract_evidence: