tsuching commited on
Commit
07702a8
·
verified ·
1 Parent(s): 57f5830

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -5,6 +5,7 @@ import bophono
5
 
6
  from pydub import AudioSegment
7
  from botok import WordTokenizer
 
8
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
  from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
10
 
@@ -360,15 +361,17 @@ def run_task(text, language, task):
360
  # Load Monlam AI Tibetan→English model
361
  #tib_tokenizer = AutoTokenizer.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
362
  #tib_model = AutoModelForSeq2SeqLM.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
363
- tib_tokenizer = AutoTokenizer.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
364
- tib_model = AutoModelForSeq2SeqLM.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
365
 
366
  # Encode Tibetan input
367
- inputs = tib_tokenizer(text, return_tensors="pt")
368
 
369
  # Generate translation
370
- outputs = tib_model.generate(**inputs, max_new_tokens=256)
371
- translated = tib_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
 
 
372
 
373
  # Handle nonsense or empty outputs
374
  if not translated or translated.isspace():
 
5
 
6
  from pydub import AudioSegment
7
  from botok import WordTokenizer
8
+ from mlotsawa.translator import Translator
9
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
11
 
 
361
  # Load Monlam AI Tibetan→English model
362
  #tib_tokenizer = AutoTokenizer.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
363
  #tib_model = AutoModelForSeq2SeqLM.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
364
+ #tib_tokenizer = AutoTokenizer.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
365
+ #tib_model = AutoModelForSeq2SeqLM.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
366
 
367
  # Encode Tibetan input
368
+ #inputs = tib_tokenizer(text, return_tensors="pt")
369
 
370
  # Generate translation
371
+ #outputs = tib_model.generate(**inputs, max_new_tokens=256)
372
+ #translated = tib_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
373
+ translator = Translator()
374
+ translated = translator.translate(text)
375
 
376
  # Handle nonsense or empty outputs
377
  if not translated or translated.isspace():