Spaces:

tsuching
/

Tibetan-tts

Running

App Files Files Community

tsuching commited on Dec 2, 2025

Commit

bbcda01

verified ·

1 Parent(s): 58a6962

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -24

app.py CHANGED Viewed

@@ -153,7 +153,7 @@ def safe_tokenize_sanskrit(text):
-def get_ipa_phonetics(text):
     """
     Converts Tibetan text into parallel Unicode, MST (IPA), and KVP (Romanization) output,
     formatted clearly by segmented word.
@@ -164,13 +164,21 @@ def get_ipa_phonetics(text):
     # Botok tokens include words, punctuation, and whitespace elements.
     tokens = [t.text for t in wt.tokenize(text)]
-    output_lines = []
     # Define headers for the output
-    HEADER_UNICODE = "Unicode:"
-    HEADER_WYLIE = "  Wylie (Morphological):"
-    HEADER_MST = "  MST (IPA):"
-    HEADER_KVP = "  KVP (Phonetic):"
     # 2. Process each token (word, punctuation, or space)
     for tok in tokens:
@@ -179,15 +187,27 @@ def get_ipa_phonetics(text):
             continue
         # Punctuation/Whitespace Handling: Pass through for spacing
-        if not tok.strip() or len(tok) == 1 and tok in '།།.':
             # Add a vertical space to clearly separate output by word/phrase
-            output_lines.append("\n")
-            continue
         unicode_str = tok
-        wylie_str = ""  # Initialized to prevent UnboundLocalError
-        mst_ipa = ""
-        kvp_phonetic = ""
         try:
             # Calculate Wylie first (always needed)
@@ -206,17 +226,25 @@ def get_ipa_phonetics(text):
             kvp_phonetic = "(Conversion Failed)"
         # 3. Format the output for one word
-        output = (
-            f"{HEADER_UNICODE} {unicode_str}\n"
-            f"{HEADER_WYLIE} {wylie_str}\n"
-            f"{HEADER_MST} {mst_ipa}\n"
-            f"{HEADER_KVP} {kvp_phonetic}\n"
-        )
-        output_lines.append(output)
     # 4. Join all formatted outputs into a single string
-    return "\n".join(output_lines)
 # Tibetan TTS function
@@ -395,9 +423,29 @@ def run_task(text, language, task):
     elif task == "Phonetics":
         if language == "Tibetan":
-            # The get_ipa_phonetics function now returns the formatted multi-line string
-            formatted_output = get_ipa_phonetics(text)
-            return formatted_output
         elif language == "Sanskrit":
             return "Phonetics conversion for Sanskrit is not supported by the current Bophono scheme."
         else:
@@ -419,7 +467,7 @@ iface_text = gr.Interface(
         gr.Radio(choices=["Translate", "Tokenize", "Phonetics"], label="Task")
     ],
     outputs=gr.Textbox(label="Text Output", lines=20),
-    title="Translation & Tokenization"
 )
 iface_tts = gr.Interface(

+def get_all_phonetics(text):
     """
     Converts Tibetan text into parallel Unicode, MST (IPA), and KVP (Romanization) output,
     formatted clearly by segmented word.
     # Botok tokens include words, punctuation, and whitespace elements.
     tokens = [t.text for t in wt.tokenize(text)]
+    #output_lines = []
+    # Dictionaries to store the results by token
+    results = {
+        "unicode": [],
+        "wylie": [],
+        "mst_ipa": [],
+        "kvp_phonetic": []
+    }
     # Define headers for the output
+    #HEADER_UNICODE = "Unicode:"
+    #HEADER_WYLIE = "  Wylie (Morphological):"
+    #HEADER_MST = "  MST (IPA):"
+    #HEADER_KVP = "  KVP (Phonetic):"
     # 2. Process each token (word, punctuation, or space)
     for tok in tokens:
             continue
         # Punctuation/Whitespace Handling: Pass through for spacing
+        #if not tok.strip() or len(tok) == 1 and tok in '།།.':
             # Add a vertical space to clearly separate output by word/phrase
+        #    output_lines.append("\n")
+        #    continue
+        # Punctuation/Whitespace Handling: Use a consistent placeholder for spacing
+        is_separator = not tok.strip() or len(tok) == 1 and tok in '།།.'
+        if is_separator:
+            # Use a placeholder that will be converted to a break later
+            results["unicode"].append("")
+            results["wylie"].append("")
+            results["mst_ipa"].append("")
+            results["kvp_phonetic"].append("")
+            continue
         unicode_str = tok
+        #wylie_str = ""  # Initialized to prevent UnboundLocalError
+        #mst_ipa = ""
+        #kvp_phonetic = ""
+        # Initialize to avoid UnboundLocalError during failure
+        wylie_str, mst_ipa, kvp_phonetic = "(Failed)", "(Failed)", "(Failed)"
         try:
             # Calculate Wylie first (always needed)
             kvp_phonetic = "(Conversion Failed)"
         # 3. Format the output for one word
+        #output = (
+        #    f"{HEADER_UNICODE} {unicode_str}\n"
+        #    f"{HEADER_WYLIE} {wylie_str}\n"
+        #    f"{HEADER_MST} {mst_ipa}\n"
+        #    f"{HEADER_KVP} {kvp_phonetic}\n"
+        #)
+        #output_lines.append(output)
+        # Store results
+        results["unicode"].append(tok)
+        results["wylie"].append(wylie_str)
+        results["mst_ipa"].append(mst_ipa)
+        results["kvp_phonetic"].append(kvp_phonetic)
     # 4. Join all formatted outputs into a single string
+    #return "\n".join(output_lines)
+    return results
 # Tibetan TTS function
     elif task == "Phonetics":
         if language == "Tibetan":
+            # The get_all_phonetics function now returns the formatted multi-line string
+            #formatted_output = get_all_phonetics(text)
+            #return formatted_output
+            # 1. Get all schemes data
+            schemes_data = get_all_phonetics_schemes(text)
+            # 2. Format the three outputs in parallel (Unicode + Wylie + Phonetic)
+            unicode_output = " ".join([t for t in schemes_data['unicode'] if t.strip()]) # Cleaned up display
+            wylie_output = " ".join([t for t in schemes_data['wylie'] if t.strip()])
+            mst_output = " ".join([t for t in schemes_data['mst_ipa'] if t.strip()])
+            kvp_output = " ".join([t for t in schemes_data['kvp_phonetic'] if t.strip()])
+            # 3. Present all outputs in a single, formatted string for the Textbox
+            # You can copy and paste from this single box now.
+            output = (
+                f"--- Tibetan Phonetic Analysis ---\n\n"
+                f"1. Unicode Text (Input):\n{unicode_output}\n\n"
+                f"2. Wylie (Morphological):\n{wylie_output}\n\n"
+                f"3. MST (IPA):\n{mst_output}\n\n"
+                f"4. KVP (Phonetic):\n{kvp_output}"
+            )
+            return output
         elif language == "Sanskrit":
             return "Phonetics conversion for Sanskrit is not supported by the current Bophono scheme."
         else:
         gr.Radio(choices=["Translate", "Tokenize", "Phonetics"], label="Task")
     ],
     outputs=gr.Textbox(label="Text Output", lines=20),
+    title="Translation & Tokenization & Phonetics"
 )
 iface_tts = gr.Interface(