asbgig commited on
Commit
50f1b46
·
verified ·
1 Parent(s): 9aaaf3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -43
app.py CHANGED
@@ -1,16 +1,14 @@
1
- # app.py — TalkClone (HF Space, 1-column, persistent output, CPU-friendly)
2
 
3
- import os, re, tempfile, shutil
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
7
 
8
- # Agree to Coqui CPML non-interactively on Spaces
9
  os.environ.setdefault("COQUI_TOS_AGREED", "1")
10
 
11
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
 
13
- # Show labels, send codes (XTTS v2 supported only)
14
  LANGS = [
15
  ("English", "en"),
16
  ("Spanish", "es"),
@@ -35,7 +33,6 @@ LANG_MAP = {name: code for name, code in LANGS}
35
 
36
  _tts = None
37
  def get_tts():
38
- """Lazy-load TTS; try GPU if available, else CPU."""
39
  global _tts
40
  if _tts is not None:
41
  return _tts
@@ -48,7 +45,6 @@ def get_tts():
48
  use_gpu = torch.cuda.is_available()
49
  except Exception:
50
  use_gpu = False
51
-
52
  from TTS.api import TTS
53
  try:
54
  _tts = TTS(MODEL_NAME, gpu=use_gpu)
@@ -67,24 +63,27 @@ def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed):
67
  tts.tts_to_file(text=txt, file_path=out_path,
68
  speaker_wav=wav_path, language=lang)
69
 
 
 
 
 
 
 
70
  def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
71
  if ref_audio is None:
72
  raise gr.Error("Upload a reference voice (10–60s, clean speech).")
73
  text = clean_text(text)
74
  if not text:
75
  raise gr.Error("Please enter some text.")
76
-
77
- # Limit extremely long jobs on free CPU
78
  if len(text) > 1400 and not split_sentences:
79
  raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.")
80
 
81
  lang = LANG_MAP.get(lang_label, "en")
82
  wav_path = ref_audio
83
 
84
- # Sentence split + also break very long sentences into ~200 chars
85
  chunks = [text]
86
  if split_sentences:
87
- rough = [s.strip() for s in re.split(r'(?<=[.!?؟。.。،،]|[\u0964\u0965])\s+', text) if s.strip()]
88
  chunks = []
89
  for s in rough:
90
  if len(s) <= 220:
@@ -95,8 +94,6 @@ def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.P
95
 
96
  tts = get_tts()
97
  out_wavs = []
98
-
99
- # Use a temp dir for parts, but write the FINAL file to a persistent temp path
100
  with tempfile.TemporaryDirectory() as td:
101
  total = max(len(chunks), 1)
102
  for i, chunk in enumerate(chunks, 1):
@@ -106,68 +103,67 @@ def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.P
106
  data, sr = sf.read(part_path)
107
  out_wavs.append((data, sr))
108
 
109
- # Concatenate and save to a persistent temp file that survives function return
110
  if len(out_wavs) == 1:
111
  final_data, sr = out_wavs[0]
112
  else:
113
  sr = out_wavs[0][1]
114
  final_data = np.concatenate([d for d, _ in out_wavs], axis=0)
115
 
116
- persistent_tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
117
- persistent_tmp_path = persistent_tmp.name
118
- persistent_tmp.close() # path remains; we write to it next
119
- sf.write(persistent_tmp_path, final_data, sr)
 
120
 
121
- return persistent_tmp_path
 
 
 
 
 
 
 
 
122
 
123
- # ==== Styles (1 column + colors + hide HF/Gradio UI chrome) ====
124
  CUSTOM_CSS = """
125
  .gradio-container { max-width: 860px !important; margin: 0 auto; }
126
-
127
  #wrap, #ref, #lang, #txt, #spd, #split, #out_audio, #dl {
128
- background: #f8fafc !important; /* slate-50 */
129
- border: 1px solid #e5e7eb !important; /* gray-200 */
130
  border-radius: 14px !important;
131
  padding: 14px !important;
132
  }
133
-
134
- /* Make the component surfaces non-white */
135
- #ref, #out_audio, #dl { background: #eef2ff !important; } /* indigo-50-ish */
136
-
137
- /* Primary button color */
138
  #gen button, #gen { background: #10b981 !important; color: #fff !important; }
139
  #gen button:hover { filter: brightness(0.95); }
140
-
141
- /* Hide footer/API/Settings & obvious Space links */
142
  footer, .footer, #footer,
143
  a[href*="gradio.live"], a[href*="gradio.app"], a[href*="/api"], a[href*="hf.space"],
144
  button[aria-label="Settings"],
145
  [data-testid="block-analytics"], [data-testid="embed-info"] { display: none !important; }
146
  """
147
 
148
- with gr.Blocks(
149
- title="TalkClone - Voice Cloning & TTS",
150
- css=CUSTOM_CSS,
151
- analytics_enabled=False
152
- ) as demo:
153
  with gr.Column(elem_id="wrap"):
154
  gr.Markdown("## TalkClone — Text-to-Speech with Voice Cloning")
155
  gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. "
156
  "On free CPU, keep text short or enable **Auto split** for speed.")
157
 
158
  ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref")
159
- language = gr.Dropdown(choices=[name for name, _ in LANGS], value="English", label="Language", elem_id="lang")
160
- text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt")
161
- speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd")
162
- split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split")
163
- submit = gr.Button("Generate", variant="primary", elem_id="gen")
164
 
165
- output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False, elem_id="out_audio")
166
- download = gr.File(label="Download audio", elem_id="dl")
167
 
168
  def run_and_return(text, ref_audio, language, speed, split):
169
- p = tts_clone(text, ref_audio, language, speed, split)
170
- return p, p
 
171
 
172
  submit.click(run_and_return,
173
  inputs=[text, ref_audio, language, speed, split],
 
1
+ # app.py — TalkClone (HF Space, 1-column, persistent output, DownloadButton)
2
 
3
+ import os, re, tempfile, shutil, time
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
7
 
 
8
  os.environ.setdefault("COQUI_TOS_AGREED", "1")
9
 
10
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
11
 
 
12
  LANGS = [
13
  ("English", "en"),
14
  ("Spanish", "es"),
 
33
 
34
  _tts = None
35
  def get_tts():
 
36
  global _tts
37
  if _tts is not None:
38
  return _tts
 
45
  use_gpu = torch.cuda.is_available()
46
  except Exception:
47
  use_gpu = False
 
48
  from TTS.api import TTS
49
  try:
50
  _tts = TTS(MODEL_NAME, gpu=use_gpu)
 
63
  tts.tts_to_file(text=txt, file_path=out_path,
64
  speaker_wav=wav_path, language=lang)
65
 
66
+ def safe_filename(seed_text: str, lang_code: str) -> str:
67
+ base = clean_text(seed_text)[:40] or "talkclone"
68
+ base = re.sub(r"[^A-Za-z0-9_-]+", "_", base).strip("_")
69
+ ts = time.strftime("%Y%m%d-%H%M%S")
70
+ return f"{base}_{lang_code}_{ts}.wav"
71
+
72
  def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
73
  if ref_audio is None:
74
  raise gr.Error("Upload a reference voice (10–60s, clean speech).")
75
  text = clean_text(text)
76
  if not text:
77
  raise gr.Error("Please enter some text.")
 
 
78
  if len(text) > 1400 and not split_sentences:
79
  raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.")
80
 
81
  lang = LANG_MAP.get(lang_label, "en")
82
  wav_path = ref_audio
83
 
 
84
  chunks = [text]
85
  if split_sentences:
86
+ rough = [s.strip() for s in re.split(r'(?<=[.!?؟۔]|[\u0964\u0965])\s+', text) if s.strip()]
87
  chunks = []
88
  for s in rough:
89
  if len(s) <= 220:
 
94
 
95
  tts = get_tts()
96
  out_wavs = []
 
 
97
  with tempfile.TemporaryDirectory() as td:
98
  total = max(len(chunks), 1)
99
  for i, chunk in enumerate(chunks, 1):
 
103
  data, sr = sf.read(part_path)
104
  out_wavs.append((data, sr))
105
 
106
+ # concat
107
  if len(out_wavs) == 1:
108
  final_data, sr = out_wavs[0]
109
  else:
110
  sr = out_wavs[0][1]
111
  final_data = np.concatenate([d for d, _ in out_wavs], axis=0)
112
 
113
+ # write to persistent temp + copy to a nice-named path for downloading
114
+ ntf = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
115
+ ntf_path = ntf.name
116
+ ntf.close()
117
+ sf.write(ntf_path, final_data, sr)
118
 
119
+ pretty_name = os.path.join("/tmp", safe_filename(text, lang))
120
+ try:
121
+ shutil.copyfile(ntf_path, pretty_name)
122
+ dl_path = pretty_name
123
+ except Exception:
124
+ dl_path = ntf_path # fallback
125
+
126
+ # return both: audio preview path, and a file path for DownloadButton
127
+ return ntf_path, dl_path
128
 
 
129
  CUSTOM_CSS = """
130
  .gradio-container { max-width: 860px !important; margin: 0 auto; }
 
131
  #wrap, #ref, #lang, #txt, #spd, #split, #out_audio, #dl {
132
+ background: #f8fafc !important;
133
+ border: 1px solid #e5e7eb !important;
134
  border-radius: 14px !important;
135
  padding: 14px !important;
136
  }
137
+ #ref, #out_audio, #dl { background: #eef2ff !important; }
 
 
 
 
138
  #gen button, #gen { background: #10b981 !important; color: #fff !important; }
139
  #gen button:hover { filter: brightness(0.95); }
140
+ /* hide HF/Gradio chrome */
 
141
  footer, .footer, #footer,
142
  a[href*="gradio.live"], a[href*="gradio.app"], a[href*="/api"], a[href*="hf.space"],
143
  button[aria-label="Settings"],
144
  [data-testid="block-analytics"], [data-testid="embed-info"] { display: none !important; }
145
  """
146
 
147
+ with gr.Blocks(title="TalkClone - Voice Cloning & TTS", css=CUSTOM_CSS, analytics_enabled=False) as demo:
 
 
 
 
148
  with gr.Column(elem_id="wrap"):
149
  gr.Markdown("## TalkClone — Text-to-Speech with Voice Cloning")
150
  gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. "
151
  "On free CPU, keep text short or enable **Auto split** for speed.")
152
 
153
  ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref")
154
+ language = gr.Dropdown(choices=LANG_LABELS, value="English", label="Language", elem_id="lang")
155
+ text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt")
156
+ speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd")
157
+ split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split")
158
+ submit = gr.Button("Generate", variant="primary", elem_id="gen")
159
 
160
+ output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False, elem_id="out_audio")
161
+ download = gr.DownloadButton(label="Download audio", elem_id="dl")
162
 
163
  def run_and_return(text, ref_audio, language, speed, split):
164
+ audio_path, dl_path = tts_clone(text, ref_audio, language, speed, split)
165
+ # set button to download the file we just wrote
166
+ return audio_path, gr.update(value=dl_path, label=f"Download ({os.path.basename(dl_path)})")
167
 
168
  submit.click(run_and_return,
169
  inputs=[text, ref_audio, language, speed, split],