Spaces:
Sleeping
Sleeping
Bug fix for very long text
Browse files- app.py +34 -34
- text_cleaning.py +11 -6
app.py
CHANGED
|
@@ -134,60 +134,60 @@ def split_text_by_paragraphs(text, max_duration_minutes=5, max_chars=500):
|
|
| 134 |
import io
|
| 135 |
|
| 136 |
async def generate_audio_segment(text_segment, voice_short_name, rate_str, volume_str, pitch_str, segment_index):
|
| 137 |
-
"""Generate audio for a single text segment and
|
| 138 |
logger.info(f"Generating segment {segment_index}...")
|
| 139 |
communicate = edge_tts.Communicate(text_segment, voice_short_name, rate=rate_str, volume=volume_str, pitch=pitch_str)
|
| 140 |
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
try:
|
| 143 |
-
|
| 144 |
-
if chunk["type"] == "audio":
|
| 145 |
-
audio_data.write(chunk["data"])
|
| 146 |
except Exception as e:
|
| 147 |
logger.error(f"Error generating segment {segment_index} (Length: {len(text_segment)} chars): {e}")
|
|
|
|
|
|
|
| 148 |
raise gr.Error(f"Error generating segment {segment_index}: {e}")
|
| 149 |
|
| 150 |
-
audio_data.seek(0)
|
| 151 |
-
|
| 152 |
# Verify segment duration
|
| 153 |
try:
|
| 154 |
-
|
| 155 |
-
verify_buffer = io.BytesIO(audio_data.getvalue())
|
| 156 |
-
seg_audio = AudioSegment.from_mp3(verify_buffer)
|
| 157 |
duration_min = len(seg_audio) / 1000 / 60
|
| 158 |
-
logger.info(f"Segment {segment_index}
|
| 159 |
except Exception as e:
|
| 160 |
logger.error(f"Error checking segment {segment_index} duration: {e}")
|
| 161 |
|
| 162 |
-
|
| 163 |
-
return audio_data
|
| 164 |
|
| 165 |
-
async def merge_audio_files(
|
| 166 |
-
"""Merge multiple audio
|
| 167 |
-
if not
|
| 168 |
return None
|
| 169 |
|
| 170 |
-
logger.info(f"Merging {len(
|
| 171 |
-
|
| 172 |
-
#
|
| 173 |
-
combined = AudioSegment.empty()
|
| 174 |
-
for i, audio_obj in enumerate(audio_objects):
|
| 175 |
-
try:
|
| 176 |
-
audio_obj.seek(0)
|
| 177 |
-
segment = AudioSegment.from_mp3(audio_obj)
|
| 178 |
-
combined += segment
|
| 179 |
-
# Explicitly close/clear the BytesIO object to free memory
|
| 180 |
-
audio_obj.close()
|
| 181 |
-
except Exception as e:
|
| 182 |
-
logger.error(f"Error merging segment {i+1}: {e}")
|
| 183 |
-
|
| 184 |
-
# Save merged audio to a single temporary file
|
| 185 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 186 |
merged_path = tmp_file.name
|
| 187 |
-
combined.export(merged_path, format="mp3")
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
return merged_path
|
| 192 |
|
| 193 |
async def text_to_speech_generator(text, voice, rate, volume, pitch, cleaning_options=None):
|
|
|
|
| 134 |
import io
|
| 135 |
|
| 136 |
async def generate_audio_segment(text_segment, voice_short_name, rate_str, volume_str, pitch_str, segment_index):
|
| 137 |
+
"""Generate audio for a single text segment and save to temporary file"""
|
| 138 |
logger.info(f"Generating segment {segment_index}...")
|
| 139 |
communicate = edge_tts.Communicate(text_segment, voice_short_name, rate=rate_str, volume=volume_str, pitch=pitch_str)
|
| 140 |
|
| 141 |
+
# Save directly to temporary file instead of memory
|
| 142 |
+
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f"_seg{segment_index}.mp3")
|
| 143 |
+
tmp_path = tmp_file.name
|
| 144 |
+
tmp_file.close()
|
| 145 |
+
|
| 146 |
try:
|
| 147 |
+
await communicate.save(tmp_path)
|
|
|
|
|
|
|
| 148 |
except Exception as e:
|
| 149 |
logger.error(f"Error generating segment {segment_index} (Length: {len(text_segment)} chars): {e}")
|
| 150 |
+
if os.path.exists(tmp_path):
|
| 151 |
+
os.remove(tmp_path)
|
| 152 |
raise gr.Error(f"Error generating segment {segment_index}: {e}")
|
| 153 |
|
|
|
|
|
|
|
| 154 |
# Verify segment duration
|
| 155 |
try:
|
| 156 |
+
seg_audio = AudioSegment.from_mp3(tmp_path)
|
|
|
|
|
|
|
| 157 |
duration_min = len(seg_audio) / 1000 / 60
|
| 158 |
+
logger.info(f"Segment {segment_index} saved to temp file (Duration: {duration_min:.2f} min)")
|
| 159 |
except Exception as e:
|
| 160 |
logger.error(f"Error checking segment {segment_index} duration: {e}")
|
| 161 |
|
| 162 |
+
return tmp_path
|
|
|
|
| 163 |
|
| 164 |
+
async def merge_audio_files(audio_paths):
|
| 165 |
+
"""Merge multiple audio files into one file using binary concatenation"""
|
| 166 |
+
if not audio_paths:
|
| 167 |
return None
|
| 168 |
|
| 169 |
+
logger.info(f"Merging {len(audio_paths)} audio segments...")
|
| 170 |
+
|
| 171 |
+
# Create output file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 173 |
merged_path = tmp_file.name
|
|
|
|
| 174 |
|
| 175 |
+
# Binary concatenation of MP3 files (avoids WAV size limit)
|
| 176 |
+
total_size = 0
|
| 177 |
+
with open(merged_path, 'wb') as outfile:
|
| 178 |
+
for i, audio_path in enumerate(audio_paths):
|
| 179 |
+
try:
|
| 180 |
+
with open(audio_path, 'rb') as infile:
|
| 181 |
+
data = infile.read()
|
| 182 |
+
outfile.write(data)
|
| 183 |
+
total_size += len(data)
|
| 184 |
+
# Delete temporary segment file after merging
|
| 185 |
+
os.remove(audio_path)
|
| 186 |
+
logger.info(f"Merged and deleted segment {i+1}")
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error(f"Error merging segment {i+1}: {e}")
|
| 189 |
+
|
| 190 |
+
logger.info(f"Merged audio saved to {merged_path} (Total size: {total_size / 1024 / 1024:.2f} MB)")
|
| 191 |
return merged_path
|
| 192 |
|
| 193 |
async def text_to_speech_generator(text, voice, rate, volume, pitch, cleaning_options=None):
|
text_cleaning.py
CHANGED
|
@@ -125,12 +125,17 @@ class TextCleaner:
|
|
| 125 |
|
| 126 |
@staticmethod
|
| 127 |
def remove_special_chars(text):
|
| 128 |
-
"""Remove
|
| 129 |
-
#
|
| 130 |
-
#
|
| 131 |
-
|
| 132 |
-
#
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
@staticmethod
|
| 136 |
def wetext_normalize(text):
|
|
|
|
| 125 |
|
| 126 |
@staticmethod
|
| 127 |
def remove_special_chars(text):
|
| 128 |
+
"""Remove special characters that affect TTS but keep normal punctuation"""
|
| 129 |
+
# Only remove characters that TTS engines typically read aloud incorrectly
|
| 130 |
+
# Keep: letters, numbers, spaces, newlines, and common punctuation
|
| 131 |
+
|
| 132 |
+
# Characters to remove (symbols that TTS might read literally)
|
| 133 |
+
text = re.sub(r'[@#$%^&*+=|\\<>{}\[\]~`]', '', text)
|
| 134 |
+
|
| 135 |
+
# Remove multiple consecutive special punctuation (like *** or ---)
|
| 136 |
+
text = re.sub(r'([!?.,;:\-])\1{2,}', r'\1', text)
|
| 137 |
+
|
| 138 |
+
return text
|
| 139 |
|
| 140 |
@staticmethod
|
| 141 |
def wetext_normalize(text):
|