tsuching commited on
Commit
69b711b
·
verified ·
1 Parent(s): 9c45e61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -199,25 +199,32 @@ def run_task(text, language, task):
199
  except Exception as e2:
200
  return None, None, f"Translation error: {e2}"
201
  elif language == "Tibetan":
202
- # 1) Segment Tibetan text with Botok
203
- tokens = [t.text for t in wt.tokenize(text)]
204
- segmented_text = " ".join(tokens)
205
-
206
- # 2) Set source and target languages
207
- tokenizer.src_lang = "bo_CN"
208
- forced_bos = tokenizer.lang_code_to_id["en_XX"]
209
-
210
- # 3) Translate using MBART-50
211
- inputs = tokenizer(segmented_text, return_tensors="pt")
212
- outputs = model.generate(
213
- **inputs,
214
- max_new_tokens=256,
215
- forced_bos_token_id=forced_bos
216
- )
217
- english_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
218
-
219
- return None, None, english_text
220
-
 
 
 
 
 
 
 
221
  #translated_text = translate_with_quota(text, src_lang="bo", tgt_lang="en")
222
  #return None, None, translated_text
223
  else:
 
199
  except Exception as e2:
200
  return None, None, f"Translation error: {e2}"
201
  elif language == "Tibetan":
202
+
203
+ try:
204
+ # 1) Segment Tibetan text with Botok
205
+ tokens = [t.text for t in wt.tokenize(text)]
206
+ segmented_text = " ".join(tokens)
207
+ print("Segmented Tibetan:", segmented_text)
208
+
209
+
210
+ # 2) Set source and target languages
211
+ tokenizer.src_lang = "bo_CN"
212
+ forced_bos = tokenizer.lang_code_to_id["en_XX"]
213
+
214
+ # 3) Translate using MBART-50
215
+ inputs = tokenizer(segmented_text, return_tensors="pt")
216
+ outputs = model.generate(
217
+ **inputs,
218
+ max_new_tokens=256,
219
+ forced_bos_token_id=forced_bos
220
+ )
221
+ # 4) Decode output
222
+ english_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
223
+ print("Translation output:", english_text)
224
+
225
+ return None, None, english_text
226
+ except Exception as e:
227
+ return None, None, f"Tibetan translation error: {e}"
228
  #translated_text = translate_with_quota(text, src_lang="bo", tgt_lang="en")
229
  #return None, None, translated_text
230
  else: