subhankarg's picture
Upload folder using huggingface_hub
0558aa4 verified
import gradio as gr
import numpy as np
from nemo.collections.tts.modules.magpietts_inference.utils import ModelLoadConfig, load_magpie_model
'''
If gradio is not already installed, run: pip install --no-cache-dir gradio
export PYTHONPATH=$PYTHONPATH:/workspace/NeMo
pip install kaldialign
pip install git+https://github.com/sarulab-speech/[email protected]
place this file in root directory of NeMo
'''
CHECKPOINT_PATH = "/checkpoints/results/ML_MagpieTTS/CE-Removed_GRPO_Magpie_TTS_ML_V1.nemo"
CODEC_MODEL_PATH = "nvidia/nemo-nano-codec-22khz-1.89kbps-21.5fps"
def setup_model():
model_config = ModelLoadConfig(
nemo_file=CHECKPOINT_PATH,
codecmodel_path=CODEC_MODEL_PATH,
legacy_codebooks=False,
legacy_text_conditioning=False,
hparams_from_wandb=None,
)
model, _ = load_magpie_model(model_config)
model.eval().cuda()
return model
def main():
model = setup_model()
def demo_tts(input_text, language):
audio, audio_len = model.do_tts(input_text, language=language, apply_TN=True)
audio_np = audio[0, :audio_len[0]].cpu().numpy()
return model.sample_rate, audio_np
demo = gr.Interface(
fn=demo_tts,
inputs=[gr.Textbox(label="Text to synthesize"),
gr.Textbox(label="Language", value="en")],
outputs="audio",
title="Text to Speech MagpieTTS Demo")
demo.launch(server_name="0.0.0.0", server_port=6007, share=True)
if __name__ == "__main__":
main()