Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import bophono
|
|
| 5 |
|
| 6 |
from pydub import AudioSegment
|
| 7 |
from botok import WordTokenizer
|
|
|
|
| 8 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 9 |
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
|
| 10 |
|
|
@@ -360,15 +361,17 @@ def run_task(text, language, task):
|
|
| 360 |
# Load Monlam AI Tibetan→English model
|
| 361 |
#tib_tokenizer = AutoTokenizer.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
|
| 362 |
#tib_model = AutoModelForSeq2SeqLM.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
|
| 363 |
-
tib_tokenizer = AutoTokenizer.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
|
| 364 |
-
tib_model = AutoModelForSeq2SeqLM.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
|
| 365 |
|
| 366 |
# Encode Tibetan input
|
| 367 |
-
inputs = tib_tokenizer(text, return_tensors="pt")
|
| 368 |
|
| 369 |
# Generate translation
|
| 370 |
-
outputs = tib_model.generate(**inputs, max_new_tokens=256)
|
| 371 |
-
translated = tib_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
|
|
|
|
|
|
| 372 |
|
| 373 |
# Handle nonsense or empty outputs
|
| 374 |
if not translated or translated.isspace():
|
|
|
|
| 5 |
|
| 6 |
from pydub import AudioSegment
|
| 7 |
from botok import WordTokenizer
|
| 8 |
+
from mlotsawa.translator import Translator
|
| 9 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 10 |
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
|
| 11 |
|
|
|
|
| 361 |
# Load Monlam AI Tibetan→English model
|
| 362 |
#tib_tokenizer = AutoTokenizer.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
|
| 363 |
#tib_model = AutoModelForSeq2SeqLM.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN)
|
| 364 |
+
#tib_tokenizer = AutoTokenizer.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
|
| 365 |
+
#tib_model = AutoModelForSeq2SeqLM.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1")
|
| 366 |
|
| 367 |
# Encode Tibetan input
|
| 368 |
+
#inputs = tib_tokenizer(text, return_tensors="pt")
|
| 369 |
|
| 370 |
# Generate translation
|
| 371 |
+
#outputs = tib_model.generate(**inputs, max_new_tokens=256)
|
| 372 |
+
#translated = tib_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 373 |
+
translator = Translator()
|
| 374 |
+
translated = translator.translate(text)
|
| 375 |
|
| 376 |
# Handle nonsense or empty outputs
|
| 377 |
if not translated or translated.isspace():
|