Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| from pyewts import pyewts | |
| import bophono | |
| from fastapi import FastAPI | |
| from fastapi.responses import FileResponse | |
| from pydub import AudioSegment | |
| from botok import WordTokenizer | |
| from mlotsawa.translator import Translator | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| from transformers import MBart50TokenizerFast, MBartForConditionalGeneration | |
| import datetime | |
| import tempfile | |
| import soundfile as sf | |
| import os | |
| import re | |
| # --- Initiation --- | |
| # --- Initialization: Ensure the converters are instantiated --- | |
| # Initialize the Wylie Converter class object | |
| # This makes the Wylie Converter available for use in functions. | |
| # We initialize the converter object once per function call or globally if preferred, | |
| # but defining the class is necessary here: | |
| WYLIE_CONVERTER_HANDLE = pyewts() | |
| # Initialize Botok | |
| wt = WordTokenizer() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # --- Bophono Initialization --- | |
| # Initialize the MST (Manual of Standard Tibetan) converter globally | |
| # The 'options' can be adjusted based on desired pronunciation rules. | |
| BOPHONO_MST_OPTIONS = { | |
| 'aspirateLowTones': True | |
| } | |
| # Initialize the converter instance (MST is the standard scheme) | |
| bophono_mst_converter = bophono.UnicodeToApi( | |
| schema="MST", | |
| options=BOPHONO_MST_OPTIONS | |
| ) | |
| # 2. KVP Converter (for English-readable phonetic spelling) | |
| # Note: KVP requires different options for its specific ruleset. | |
| BOPHONO_KVP_OPTIONS = { | |
| 'aspirateLowTones': False, | |
| 'vowelLengthInFinals': True, # Example: Adjust as per the KVP scheme rules | |
| } | |
| bophono_kvp_converter = bophono.UnicodeToApi( | |
| schema="KVP", # Use the KVP schema identifier | |
| options=BOPHONO_KVP_OPTIONS | |
| ) | |
| # --- Translation Quotas --- | |
| GOOGLE_QUOTA = 500_000 # free tier characters/month | |
| MS_QUOTA = 2_000_000 # free tier characters/month | |
| usage = {"google": 0, "microsoft": 0} | |
| last_reset = datetime.date.today().replace(day=1) | |
| def translate_with_quota(text, src_lang="bo", tgt_lang="en"): | |
| global usage, last_reset | |
| # Reset counters on the 1st of each month | |
| today = datetime.date.today() | |
| if today.month != last_reset.month or today.year != last_reset.year: | |
| usage = {"google": 0, "microsoft": 0} | |
| last_reset = today.replace(day=1) | |
| char_count = len(text) | |
| # Try Google first | |
| if usage["google"] + char_count <= GOOGLE_QUOTA: | |
| usage["google"] += char_count | |
| return call_google_translate(text, src_lang, tgt_lang) | |
| # Fallback to Microsoft | |
| elif usage["microsoft"] + char_count <= MS_QUOTA: | |
| usage["microsoft"] += char_count | |
| return call_microsoft_translate(text, src_lang, tgt_lang) | |
| # If both exceeded | |
| else: | |
| return "Translation quota exceeded for this month. Please try again next month." | |
| # --- Load TTS pipelines --- | |
| tts_tibetan = pipeline("text-to-speech", model="facebook/mms-tts-bod") | |
| #tts_sanskrit = pipeline("text-to-speech", model="facebook/mms-tts-san") | |
| # Load MBART-50 | |
| tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", use_fast=False) | |
| model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| # Use the slow tokenizer to avoid the bug | |
| translation_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt",use_fast=False) | |
| #translation_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") | |
| translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| #translation_model = AutoModel.from_pretrained("xlm-roberta-base") | |
| # Public multilingual translation model | |
| #translation_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| #translation_tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| # Translation model | |
| #translation_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B") | |
| #translation_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B") | |
| #AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", use_auth_token=os.environ["HF_TOKEN"]) | |
| def call_google_translate(text, src_lang, tgt_lang): | |
| # TODO: implement Google API call | |
| return "Google translated text" | |
| def call_microsoft_translate(text, src_lang, tgt_lang): | |
| # TODO: implement Microsoft API call | |
| return "Microsoft translated text" | |
| def safe_tokenize_sanskrit(text): | |
| """ | |
| Return both machine tokens (subwords) and human-readable word tokens for Sanskrit. | |
| """ | |
| machine_tokens = None | |
| # 1) Try IndicTrans2 tokenizer | |
| try: | |
| machine_tokens = indictrans_tokenizer.tokenize(text) | |
| except Exception: | |
| pass | |
| # 2) Try MBART-50 tokenizer | |
| if machine_tokens is None: | |
| try: | |
| machine_tokens = tokenizer.tokenize(text) | |
| except Exception: | |
| pass | |
| # 3) Try XLM-R tokenizer | |
| if machine_tokens is None: | |
| try: | |
| machine_tokens = xlm_tokenizer.tokenize(text) | |
| except Exception: | |
| pass | |
| # 4) Regex fallback for human-readable tokens | |
| human_tokens = [tok for tok in re.split(r"(\s+|[ΰ₯€ΰ₯₯,.;:!?])", text) if tok.strip()] | |
| return machine_tokens, human_tokens | |
| # --- Define this helper function outside run_task --- | |
| def format_word_by_word_output(schemes_data): | |
| """ | |
| Formats the structured scheme data back into the multi-line, | |
| word-by-word analysis format for learning. | |
| """ | |
| output_lines = [] | |
| # Define headers for the output | |
| HEADER_UNICODE = "Unicode:" | |
| HEADER_WYLIE = " Wylie (Morphological):" | |
| HEADER_MST = " MST (IPA):" | |
| HEADER_KVP = " KVP (Phonetic):" | |
| # Iterate through the lists of tokens (they should all have the same length) | |
| for i in range(len(schemes_data['unicode'])): | |
| unicode_str = schemes_data['unicode'][i] | |
| wylie_str = schemes_data['wylie'][i] | |
| mst_ipa = schemes_data['mst_ipa'][i] | |
| kvp_phonetic = schemes_data['kvp_phonetic'][i] | |
| # Check if the token is a separator (empty string placeholder) | |
| if not unicode_str.strip(): | |
| output_lines.append("\n") # Add a vertical break for spacing | |
| continue | |
| # Format the output block for one word | |
| output = ( | |
| f"{HEADER_UNICODE} {unicode_str}\n" | |
| f"{HEADER_WYLIE} {wylie_str}\n" | |
| f"{HEADER_MST} {mst_ipa}\n" | |
| f"{HEADER_KVP} {kvp_phonetic}\n" | |
| ) | |
| output_lines.append(output) | |
| return "\n".join(output_lines) | |
| def get_all_phonetics_schemes(text): | |
| """ | |
| Converts Tibetan text into parallel Unicode, MST (IPA), and KVP (Romanization) output, | |
| formatted clearly by segmented word. | |
| """ | |
| global bophono_mst_converter, bophono_kvp_converter | |
| # 1. Segment the text first, as bophono works word-by-word | |
| # Botok tokens include words, punctuation, and whitespace elements. | |
| tokens = [t.text for t in wt.tokenize(text)] | |
| #output_lines = [] | |
| # Dictionaries to store the results by token | |
| results = { | |
| "unicode": [], | |
| "wylie": [], | |
| "mst_ipa": [], | |
| "kvp_phonetic": [] | |
| } | |
| # Define headers for the output | |
| #HEADER_UNICODE = "Unicode:" | |
| #HEADER_WYLIE = " Wylie (Morphological):" | |
| #HEADER_MST = " MST (IPA):" | |
| #HEADER_KVP = " KVP (Phonetic):" | |
| # 2. Process each token (word, punctuation, or space) | |
| for tok in tokens: | |
| # Skip empty strings | |
| if not tok: | |
| continue | |
| # Punctuation/Whitespace Handling: Pass through for spacing | |
| #if not tok.strip() or len(tok) == 1 and tok in 'ΰΌΰΌ.': | |
| # Add a vertical space to clearly separate output by word/phrase | |
| # output_lines.append("\n") | |
| # continue | |
| # Punctuation/Whitespace Handling: Use a consistent placeholder for spacing | |
| is_separator = not tok.strip() or len(tok) == 1 and tok in 'ΰΌΰΌ.' | |
| if is_separator: | |
| # Use a placeholder that will be converted to a break later | |
| results["unicode"].append("") | |
| results["wylie"].append("") | |
| results["mst_ipa"].append("") | |
| results["kvp_phonetic"].append("") | |
| continue | |
| unicode_str = tok | |
| #wylie_str = "" # Initialized to prevent UnboundLocalError | |
| #mst_ipa = "" | |
| #kvp_phonetic = "" | |
| # Initialize to avoid UnboundLocalError during failure | |
| wylie_str, mst_ipa, kvp_phonetic = "(Failed)", "(Failed)", "(Failed)" | |
| try: | |
| # Calculate Wylie first (always needed) | |
| wylie_str = WYLIE_CONVERTER_HANDLE.toWylie(tok) | |
| # Only try conversion if the token is a meaningful Tibetan word | |
| mst_ipa = bophono_mst_converter.get_api(tok) | |
| kvp_phonetic = bophono_kvp_converter.get_api(tok) | |
| except Exception: | |
| # If conversion fails (e.g., non-Tibetan or complex characters), | |
| # flag the output. | |
| print(f"Conversion failed for token '{tok}': {e}") | |
| wylie_str = "(Conversion Failed)" | |
| mst_ipa = "(Conversion Failed)" | |
| kvp_phonetic = "(Conversion Failed)" | |
| # 3. Format the output for one word | |
| #output = ( | |
| # f"{HEADER_UNICODE} {unicode_str}\n" | |
| # f"{HEADER_WYLIE} {wylie_str}\n" | |
| # f"{HEADER_MST} {mst_ipa}\n" | |
| # f"{HEADER_KVP} {kvp_phonetic}\n" | |
| #) | |
| #output_lines.append(output) | |
| # Store results | |
| results["unicode"].append(tok) | |
| results["wylie"].append(wylie_str) | |
| results["mst_ipa"].append(mst_ipa) | |
| results["kvp_phonetic"].append(kvp_phonetic) | |
| # 4. Join all formatted outputs into a single string | |
| #return "\n".join(output_lines) | |
| return results | |
| # Tibetan TTS function | |
| #def run_task_tts(text): | |
| # Always return: [audio_numpy, audio_filepath, text_output] | |
| # 1) Generate speech via MMS-TTS | |
| # speech = tts_tibetan(text) | |
| # 2) Clip, cast, flatten for Gradio (browser playback expects float32 in [-1, 1]) | |
| # audio = speech["audio"] | |
| # sr = int(speech["sampling_rate"]) | |
| # audio = np.clip(audio.astype(np.float32), -1.0, 1.0).flatten() | |
| # 3) Write a WAV file for download/Flutter using PCM_16 to avoid pydub header errors | |
| # tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| # sf.write(tmpfile.name, audio, sr, subtype="PCM_16") | |
| # 4) Return both audio forms + a status message | |
| # return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!" | |
| ######## | |
| def run_task_tts(text: str): | |
| # Ensure input is a string and strip whitespace | |
| if not isinstance(text, str): | |
| text = str(text) | |
| text = text.strip() | |
| # 1. Segment Text and Filter Empty Chunks | |
| # Use the primary phrase marker (ΰΌ) to split the long text into manageable segments. | |
| # The regex re.split(r'[ΰΌ\n]', text) is safer for finding both tsheg and newlines | |
| # Use the primary phrase marker (ΰΌ) and newlines (\n) to split the text. | |
| # The 're' module must be imported at the top of your script (which it is). | |
| segments = [s.strip() for s in re.split(r'[ΰΌ\n]', text) if s.strip()] | |
| if not segments: | |
| return (None, ""), "", "β οΈ Error: No valid Tibetan text found after cleaning/segmentation." | |
| # List to hold all generated audio segments (numpy arrays) | |
| audio_segments = [] | |
| # Get sampling rate once, will be the same for all segments | |
| sr = 0 | |
| try: | |
| # 2. Process each segment | |
| for segment in segments: | |
| # Re-add the closing tsheg/shes (ΰΌ) for better phrasing, | |
| # and an extra space to prevent cut endings. If the segment already | |
| # ends in a ΰΌ, this is harmless as it's trimmed later. | |
| segment_with_tsheg = segment + " ΰΌ" | |
| # Generate speech for the short segment | |
| speech = tts_tibetan(segment_with_tsheg) | |
| # Clip and flatten the audio for the segment | |
| audio_data = speech["audio"] | |
| sr = int(speech["sampling_rate"]) # Capture the sampling rate | |
| # Convert to float32 and normalize | |
| segment_audio = np.clip(audio_data.astype(np.float32), -1.0, 1.0).flatten() | |
| audio_segments.append(segment_audio) | |
| # Add a small silence gap between segments for clarity (e.g., 0.25s) | |
| silence_duration = 0.25 # seconds | |
| silence_samples = int(sr * silence_duration) | |
| silence = np.zeros(silence_samples, dtype=np.float32) | |
| audio_segments.append(silence) | |
| # 3. Concatenate all audio segments into the final array | |
| final_audio = np.concatenate(audio_segments) | |
| # 4. Write a WAV file for download/Flutter using PCM_16 | |
| tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| # We must have a valid sampling rate 'sr' here | |
| if sr == 0: | |
| raise ValueError("Sampling rate was not determined during TTS generation.") | |
| sf.write(tmpfile.name, final_audio, sr, subtype="PCM_16") | |
| # 5. Return both audio forms + a status message | |
| return (sr, final_audio), tmpfile.name, "Tibetan audio generated successfully via segmentation!" | |
| except Exception as e: | |
| # Catch any failure during TTS or concatenation | |
| error_message = f"TTS processing failed for a long text segment: {e}. The segmenting process may have failed or the model encountered an unpronounceable character. Try shorter text." | |
| print(f"TTS Error during segmentation: {e}") | |
| return (None, ""), "", error_message # Return empty data on failure | |
| ######## | |
| # def run_task_tts(text: str): | |
| # Ensure input is a string | |
| # if not isinstance(text, str): | |
| # text = str(text) | |
| # Add extra space to prevent cut endings | |
| # text = text.strip() #+ " ΰΌ"; | |
| # 1) Generate speech via MMS-TTS | |
| # speech = tts_tibetan(text) # pipeline expects plain string | |
| # 2) Clip, cast, flatten for Gradio (browser playback expects float32 in [-1, 1]) | |
| # audio = speech["audio"] | |
| # sr = int(speech["sampling_rate"]) | |
| # audio = np.clip(audio.astype(np.float32), -1.0, 1.0).flatten() | |
| # π₯ Add 1 second of silence padding | |
| # silence_duration = 1.0 # seconds | |
| # silence_samples = int(sr * silence_duration) | |
| # silence = np.zeros(silence_samples, dtype=np.float32) | |
| # padded_audio = np.concatenate([audio, silence]) | |
| # 3) Write a WAV file for download/Flutter using PCM_16 | |
| # tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| # sf.write(tmpfile.name, audio, sr, subtype="PCM_16") | |
| # 4) Return both audio forms + a status message | |
| # return (sr, audio), tmpfile.name, "Tibetan audio generated successfully!" | |
| # Translate/Tokenize function | |
| def run_task(text, language, task): | |
| if task == "Translate": | |
| if language == "Sanskrit": | |
| # Prefer IndicTrans2 for Sanskrit -> English (gated): indic-en model | |
| try: | |
| # Lazy-load IndicTrans only when Sanskrit translation is requested | |
| indictrans_tokenizer = AutoTokenizer.from_pretrained( | |
| "ai4bharat/IndicTrans2-en-indic-1B", | |
| token=HF_TOKEN, | |
| trust_remote_code=True | |
| ) | |
| indictrans_model = AutoModelForSeq2SeqLM.from_pretrained( | |
| "ai4bharat/IndicTrans2-en-indic-1B", | |
| token=HF_TOKEN, | |
| trust_remote_code=True | |
| ) | |
| # IndicTrans2 expects a target language prefix token | |
| prefix = "<2en> " # English target | |
| inputs = indictrans_tokenizer(prefix + text, return_tensors="pt") | |
| #inputs = indictrans_tokenizer(text, return_tensors="pt", src_lang="san", tgt_lang="en") | |
| outputs = indictrans_model.generate(**inputs, max_new_tokens=256) | |
| translated = indictrans_tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
| # Detect nonsense outputs (repeated single word) | |
| if translated and len(set(translated.split())) == 1: | |
| translated = f"β οΈ Translation returned nonsense (repeated '{translated.split()[0]}')." | |
| print("β Sanskrit translation using IndicTrans2:", translated) | |
| return translated | |
| except Exception as e: | |
| print("β οΈ IndicTrans2 failed, falling back to MBART:", e) | |
| #indictrans_tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| #indictrans_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt") | |
| # Fallback to MBART with correct language codes | |
| try: | |
| # MBART-50 requires src_lang and forced_bos_token_id | |
| translation_tokenizer.src_lang = "sa_IN" # Sanskrit input | |
| forced_bos = translation_tokenizer.lang_code_to_id.get("en_XX", None) | |
| inputs = translation_tokenizer(text, return_tensors="pt") | |
| outputs = translation_model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| forced_bos_token_id=forced_bos | |
| ) | |
| translated = translation_tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
| if translated and len(set(translated.split())) == 1: | |
| translated = f"β οΈ Translation returned nonsense (repeated '{translated.split()[0]}')." | |
| print("β Sanskrit translation using MBART fallback:", translated) | |
| return translated | |
| except Exception as e2: | |
| return f"Translation error: {e2}" | |
| elif language == "Tibetan": | |
| try: | |
| # Load Monlam AI TibetanβEnglish model | |
| #tib_tokenizer = AutoTokenizer.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN) | |
| #tib_model = AutoModelForSeq2SeqLM.from_pretrained("monlam-ai/mt-bod-eng", token=HF_TOKEN) | |
| tib_tokenizer = AutoTokenizer.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1") | |
| tib_model = AutoModelForSeq2SeqLM.from_pretrained("billingsmoore/prototype-tibetan-to-english-translation-v1") | |
| # Encode Tibetan input | |
| inputs = tib_tokenizer(text, return_tensors="pt") | |
| # Generate translation | |
| outputs = tib_model.generate(**inputs, max_new_tokens=256) | |
| translated = tib_tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
| translator = Translator() | |
| translated = translator.translate(text) | |
| print("Translated Text with mlotsawa:", translated) | |
| # Handle nonsense or empty outputs | |
| if not translated or translated.isspace(): | |
| translated = "β οΈ Translation failed or returned empty output." | |
| elif len(set(translated.split())) == 1: | |
| translated = f"β οΈ Translation returned nonsense (repeated '{translated.split()[0]}')." | |
| print("β Tibetan translation using mlotsawa:", translated) | |
| return translated | |
| except Exception as e: | |
| print("β οΈ Monlam AI failed, falling back to MBART:", e) | |
| try: | |
| # Optionally skip segmentation | |
| # 1) Segment Tibetan text with Botok | |
| #tokens = [t.text for t in wt.tokenize(text)] | |
| #segmented_text = " ".join(tokens) | |
| #print("Segmented Tibetan:", segmented_text) | |
| # 2) Set source and target languages | |
| # MBART-50 requires src_lang and forced_bos_token_id | |
| tokenizer.src_lang = "bo_CN" | |
| forced_bos = tokenizer.lang_code_to_id["en_XX"] # β correct | |
| # 3) Translate using MBART-50 | |
| inputs = tokenizer(text, return_tensors="pt") # try raw input | |
| #inputs = tokenizer(segmented_text, return_tensors="pt") | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| forced_bos_token_id=forced_bos | |
| ) | |
| translated = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
| # New Decode Output | |
| english_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
| # Handle nonsense or empty outputs | |
| if not translated or translated.isspace(): | |
| translated = "β οΈ Translation failed or returned empty output." | |
| elif len(set(translated.split())) == 1: | |
| translated = f"β οΈ Translation returned nonsense (repeated '{translated.split()[0]}')." | |
| print("β Tibetan translation using MBART:", translated) | |
| return translated | |
| #if not english_text or english_text.isspace(): | |
| # return None, None, "β οΈ Translation failed or returned empty output." | |
| # 4) Decode output | |
| #english_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| #print("Translation output:", english_text) | |
| #return None, None, english_text | |
| except Exception as e: | |
| return f"Tibetan translation error: {e}" | |
| #translated_text = translate_with_quota(text, src_lang="bo", tgt_lang="en") | |
| #return None, None, translated_text | |
| else: | |
| return "Unsupported language" | |
| elif task == "Tokenize": | |
| if language == "Tibetan": | |
| # 1) Segment Tibetan text with Botok | |
| tokens = [t.text for t in wt.tokenize(text)] | |
| segmented_text = " ".join(tokens) | |
| return segmented_text | |
| #return None, None, xlm_tokenizer.tokenize(text) | |
| elif language == "Sanskrit": | |
| machine_tokens, human_tokens = safe_tokenize_sanskrit(text) | |
| # Format machine tokens | |
| raw_machine = " ".join(machine_tokens) if machine_tokens else "None" | |
| clean_machine = " ".join([t.replace("β", "") for t in machine_tokens]) if machine_tokens else "None" | |
| # Format human tokens | |
| human_str = " ".join(human_tokens) if human_tokens else "None" | |
| return ( | |
| f"Raw machine tokens:\n{raw_machine}\n\n" | |
| f"Cleaned machine tokens:\n{clean_machine}\n\n" | |
| f"Human-readable tokens:\n{human_str}" | |
| ) | |
| #raw_tokens = safe_tokenize_sanskrit(text) | |
| # Return a human-readable string; if you prefer list, wrap with str(tokens) | |
| #tokens = normalize_sp_tokens(raw_tokens) | |
| #return None, None, " ".join(tokens) | |
| #return None, None, indictrans_tokenizer.tokenize(text) | |
| else: | |
| return "Unsupported language" | |
| elif task == "Phonetics": | |
| if language == "Tibetan": | |
| # The get_all_phonetics function now returns the formatted multi-line string | |
| #formatted_output = get_all_phonetics(text) | |
| #return formatted_output | |
| # 1. Get all schemes data | |
| schemes_data = get_all_phonetics_schemes(text) | |
| # 2. Use the formatter to create the detailed, word-by-word output | |
| formatted_output = format_word_by_word_output(schemes_data) | |
| # 2. Format the three outputs in parallel (Unicode + Wylie + Phonetic) | |
| unicode_output = " ".join([t for t in schemes_data['unicode'] if t.strip()]) # Cleaned up display | |
| wylie_output = " ".join([t for t in schemes_data['wylie'] if t.strip()]) | |
| mst_output = " ".join([t for t in schemes_data['mst_ipa'] if t.strip()]) | |
| kvp_output = " ".join([t for t in schemes_data['kvp_phonetic'] if t.strip()]) | |
| # 3. Present all outputs in a single, formatted string for the Textbox | |
| # You can copy and paste from this single box now. | |
| output = ( | |
| f"--- Tibetan Phonetic Analysis ---\n\n" | |
| #f"Unicode Text (Input):\n{unicode_output}\n\n" | |
| f"KVP (Phonetic):\n{kvp_output}\n\n" | |
| f"Wylie (Morphological):\n{wylie_output}\n\n" | |
| f"MST (IPA):\n{mst_output}\n\n\n" | |
| f"--- Detailed Word-by-Word ---\n\n{formatted_output}" | |
| ) | |
| return output | |
| elif language == "Sanskrit": | |
| return "Phonetics conversion for Sanskrit is not supported by the current Bophono scheme." | |
| else: | |
| return "Unsupported language for Phonetics task." | |
| def normalize_sp_tokens(tokens): | |
| # Remove SentencePiece underscores and collapse spaces | |
| return [t.replace("β", "") for t in tokens] | |
| # --- Build interface --- | |
| iface_text = gr.Interface( | |
| fn=run_task, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", lines=10), | |
| gr.Dropdown(choices=["Tibetan", "Sanskrit"], label="Language"), | |
| gr.Radio(choices=["Translate", "Tokenize", "Phonetics"], label="Task") | |
| ], | |
| outputs=gr.Textbox(label="Text Output", lines=20), | |
| title="Translation & Tokenization & Phonetics" | |
| ) | |
| iface_tts = gr.Interface( | |
| fn=run_task_tts, # your existing TTS function | |
| inputs=gr.Textbox(label="Tibetan Input Text", lines=20), | |
| outputs=[ | |
| gr.Audio(label="Play in Browser", type="numpy"), | |
| gr.Audio(label="Download/URL for Flutter", type="filepath"), | |
| gr.Textbox(label="Status") | |
| ], | |
| title="Tibetan TTS" | |
| ) | |
| demo = gr.TabbedInterface([iface_tts, iface_text], tab_names=["TTS", "Translate/Tokenize"]) | |
| if __name__ == "__main__": | |
| demo.launch() | |
| ############################################# | |
| # π₯ Add a real API endpoint for Flutter | |
| ############################################# | |
| from fastapi import FastAPI | |
| from fastapi.responses import FileResponse | |
| import gradio as gr | |
| api = FastAPI() | |
| # --- Wrap your real TTS function --- | |
| # You MUST replace "run_task_tts" below | |
| # with the actual TTS function you already defined. | |
| #def generate_tts_file(text): | |
| # """ | |
| # Wrapper to your internal TTS function. | |
| # This should return a path to a WAV/MP3 file. | |
| # """ | |
| # output_path = run_task_tts(text) # <-- keep your original function | |
| # return output_path | |
| def generate_tts_file(text: str) -> str: | |
| """ | |
| Wrapper to your internal TTS function. | |
| Returns the path to the generated WAV file. | |
| """ | |
| _, file_path, _ = run_task_tts(text) # unpack tuple | |
| return file_path | |
| #@api.post("/api/tts") | |
| #async def api_tts(request: gr.Request): | |
| # body = await request.json() | |
| # text = body.get("text", "") | |
| # if not text: | |
| # return {"error": "No text provided"} | |
| # output_path = generate_tts_file(text) | |
| # return FileResponse( | |
| # output_path, | |
| # media_type="audio/wav", | |
| # filename="tts.wav" | |
| # ) | |
| async def api_tts(request: gr.Request): | |
| body = await request.json() | |
| text = body.get("text", "") | |
| # β Ensure text is always a string | |
| if not isinstance(text, str): | |
| text = str(text) | |
| if not text.strip(): | |
| return {"error": "No text provided"} | |
| # Call your wrapper | |
| _, output_path, status = run_task_tts(text) | |
| return FileResponse( | |
| output_path, | |
| media_type="audio/wav", | |
| filename="tts.wav" | |
| ) | |
| ############################################# | |
| # π₯ Attach your existing Gradio UI | |
| ############################################# | |
| # Replace "demo" with your real Blocks variable. | |
| # Example: | |
| # with gr.Blocks() as demo: | |
| # ... your UI ... | |
| app = gr.mount_gradio_app(api, demo, path="/") | |