Spaces:

gagndeep
/

anycoder-fd7c5b01

Runtime error

App Files Files Community

gagndeep commited on 29 days ago

Commit

fa63eb7

verified ·

1 Parent(s): 32bb546

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +206 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import gradio as gr
+import requests
+import io
+import numpy as np
+from pydub import AudioSegment
+import tempfile
+import os
+# Create a custom theme for the application
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="indigo",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    text_size="lg",
+    spacing_size="lg",
+    radius_size="md"
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_title_text_weight="600",
+)
+def vibevoice_conversion(audio_file, speaker_id="default"):
+    """
+    Convert audio using the VibeVoice Realtime 0.5B model
+    """
+    try:
+        # Check if audio file is provided
+        if audio_file is None:
+            raise gr.Error("Please upload an audio file")
+        # Create a temporary file to store the uploaded audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_audio_path = temp_audio.name
+        # Save the uploaded audio to the temporary file
+        if isinstance(audio_file, tuple):
+            # If it's a tuple (sample_rate, audio_data)
+            sample_rate, audio_data = audio_file
+            # Convert numpy array to AudioSegment and export as WAV
+            audio_segment = AudioSegment(
+                audio_data.tobytes(),
+                frame_rate=sample_rate,
+                sample_width=audio_data.dtype.itemsize,
+                channels=1 if len(audio_data.shape) == 1 else audio_data.shape[0]
+            )
+            audio_segment.export(temp_audio_path, format="wav")
+        else:
+            # If it's a file path
+            audio_segment = AudioSegment.from_file(audio_file)
+            audio_segment.export(temp_audio_path, format="wav")
+        # Prepare the request to the VibeVoice API
+        api_url = "https://anycoderapps-vibevice-realtime-0-5b.hf.space/run/predict"
+        # Read the audio file as bytes
+        with open(temp_audio_path, "rb") as f:
+            audio_bytes = f.read()
+        # Prepare the payload
+        payload = {
+            "data": [
+                audio_bytes,
+                speaker_id
+            ]
+        }
+        # Send request to the VibeVoice API
+        response = requests.post(api_url, json=payload)
+        # Clean up temporary file
+        os.unlink(temp_audio_path)
+        if response.status_code == 200:
+            result = response.json()
+            if "data" in result and len(result["data"]) > 0:
+                # Get the converted audio data
+                converted_audio_bytes = result["data"][0]
+                # Create a temporary file for the converted audio
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_converted:
+                    temp_converted_path = temp_converted.name
+                    temp_converted.write(converted_audio_bytes)
+                # Return the converted audio file path
+                return temp_converted_path
+            else:
+                raise gr.Error("No audio data received from VibeVoice API")
+        else:
+            raise gr.Error(f"VibeVoice API request failed with status code: {response.status_code}")
+    except Exception as e:
+        raise gr.Error(f"An error occurred during voice conversion: {str(e)}")
+def process_audio(audio_file, speaker_id):
+    """
+    Process the audio file and return the converted audio
+    """
+    try:
+        # Convert the audio using VibeVoice
+        converted_audio_path = vibevoice_conversion(audio_file, speaker_id)
+        # Return the converted audio
+        return converted_audio_path
+    except Exception as e:
+        raise gr.Error(f"Error processing audio: {str(e)}")
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎤 VibeVoice Realtime 0.5B - Voice Conversion")
+    gr.Markdown("""
+    ### Convert your voice to different styles using the VibeVoice Realtime 0.5B model
+    **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)**
+    Upload an audio file and select a speaker style to convert your voice. The VibeVoice model can transform your voice while preserving the emotional content and prosody.
+    """)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Input Audio")
+            input_audio = gr.Audio(
+                label="Upload your audio file",
+                type="filepath",
+                sources=["upload", "microphone"],
+                format="wav"
+            )
+            speaker_style = gr.Dropdown(
+                choices=[
+                    "default",
+                    "female_1",
+                    "male_1",
+                    "child",
+                    "elderly",
+                    "emotional"
+                ],
+                value="default",
+                label="Select Speaker Style"
+            )
+            convert_btn = gr.Button("🔄 Convert Voice", variant="primary", size="lg")
+        with gr.Column():
+            gr.Markdown("### Converted Audio")
+            output_audio = gr.Audio(
+                label="Converted Audio",
+                type="filepath",
+                format="wav"
+            )
+            status_text = gr.Textbox(
+                label="Status",
+                value="Ready to convert your voice!",
+                interactive=False
+            )
+    # Add examples
+    examples = gr.Examples(
+        examples=[
+            ["https://example.com/sample1.wav", "female_1"],
+            ["https://example.com/sample2.wav", "male_1"],
+            ["https://example.com/sample3.wav", "emotional"]
+        ],
+        inputs=[input_audio, speaker_style],
+        label="Try these examples:"
+    )
+    # Set up the conversion event
+    convert_btn.click(
+        fn=process_audio,
+        inputs=[input_audio, speaker_style],
+        outputs=[output_audio, status_text],
+        api_visibility="public",
+        api_name="convert_voice"
+    )
+    gr.Markdown("""
+    ### About VibeVoice Realtime 0.5B
+    - **Model**: VibeVoice Realtime 0.5B
+    - **Size**: 0.5 Billion parameters
+    - **Features**: Real-time voice conversion with emotional preservation
+    - **Capabilities**: Speaker style transfer, emotional content preservation, high-quality voice conversion
+    ### Tips for Best Results
+    - Use clear, high-quality audio recordings
+    - Speak naturally and expressively
+    - For best results, use audio samples of 5-15 seconds
+    - The model preserves emotional content and prosody from the original voice
+    """)
+# Launch the application with custom theme and settings
+demo.launch(
+    theme=custom_theme,
+    footer_links=[
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "VibeVoice Model", "url": "https://huggingface.co/spaces/anycoderapps/VibeVoice-Realtime-0.5B"},
+        {"label": "Gradio", "url": "https://gradio.app"},
+        {"label": "Hugging Face", "url": "https://huggingface.co"}
+    ],
+    title="VibeVoice Realtime 0.5B - Voice Conversion",
+    description="Convert your voice to different styles using the VibeVoice Realtime 0.5B model",
+    show_error=True
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+requests
+pydub
+gradio>=6.0
+numpy
+Pillow
+scipy
+librosa
+soundfile
+pandas
+uvicorn
+fastapi
+pydantic
+python-multipart
+aiofiles