Spaces:

MedSwin
/

MedicalDiagnosisSystem

Runtime error

LiamKhoaLe commited on Sep 22

Commit

dd35bcb

1 Parent(s): 6257342

Upd whisper json sender

Files changed (3) hide show

src/api/routes/audio.py CHANGED Viewed

@@ -33,14 +33,14 @@ async def transcribe_audio(
         JSON response with transcribed text
     """
     try:
-        # Validate file type
-        if not file.content_type or not any(
-            file.content_type.startswith(f"audio/{fmt}")
-            for fmt in ["wav", "opus", "flac", "webm"]
-        ):
-            # Also check file extension as fallback
-            file_extension = file.filename.split('.')[-1].lower() if file.filename else ""
-            if file_extension not in get_supported_formats():
                 raise HTTPException(
                     status_code=400,
                     detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
@@ -56,7 +56,7 @@ async def transcribe_audio(
         if not validate_audio_format(audio_bytes):
             raise HTTPException(
                 status_code=400,
-                detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, or FLAC file."
             )
         # Transcribe audio

         JSON response with transcribed text
     """
     try:
+        # Validate file type by content-type or extension
+        valid_ctypes = {
+            "audio/wav", "audio/x-wav", "audio/webm", "audio/ogg", "audio/opus", "audio/flac"
+        }
+        content_type_ok = (file.content_type in valid_ctypes) if file.content_type else False
+        if not content_type_ok:
+            file_name = (file.filename or "").lower()
+            if not any(file_name.endswith(ext) for ext in get_supported_formats()):
                 raise HTTPException(
                     status_code=400,
                     detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
         if not validate_audio_format(audio_bytes):
             raise HTTPException(
                 status_code=400,
+                detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, FLAC, or WebM file."
             )
         # Transcribe audio

src/services/audio_transcription.py CHANGED Viewed

@@ -15,8 +15,8 @@ RIVA_FUNCTION_ID = "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"
 async def transcribe_audio_file(
     audio_file_path: str,
-    rotator: APIKeyRotator,
-    language_code: str = "en"
 ) -> Optional[str]:
     """
     Transcribe audio file using NVIDIA Riva API.
@@ -45,7 +45,8 @@ async def transcribe_audio_file(
         url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
         headers = {
-            "Authorization": f"Bearer {api_key}",
             "Content-Type": "application/octet-stream"
         }
@@ -53,15 +54,9 @@ async def transcribe_audio_file(
         with open(audio_file_path, 'rb') as audio_file:
             audio_data = audio_file.read()
-        # Prepare metadata for NVIDIA API
-        metadata = {
-            "function-id": RIVA_FUNCTION_ID,
-            "language-code": language_code
-        }
-        # Add metadata to headers
-        for key, value in metadata.items():
-            headers[f"x-{key}"] = value
         # Make the request
         logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
@@ -83,7 +78,7 @@ async def transcribe_audio_file(
                     # Retry with new key
                     api_key = rotator.get_key()
                     if api_key:
-                        headers["Authorization"] = f"Bearer {api_key}"
                         response = await client.post(url, headers=headers, content=audio_data)
                 response.raise_for_status()

 async def transcribe_audio_file(
     audio_file_path: str,
+    language_code: str,
+    rotator: APIKeyRotator
 ) -> Optional[str]:
     """
     Transcribe audio file using NVIDIA Riva API.
         url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
         headers = {
+            # Provider docs show metadata key named "authorization" (lowercase) with Bearer token
+            "authorization": f"Bearer {api_key}",
             "Content-Type": "application/octet-stream"
         }
         with open(audio_file_path, 'rb') as audio_file:
             audio_data = audio_file.read()
+        # Prepare metadata for NVIDIA API (as headers per provider doc)
+        headers["function-id"] = RIVA_FUNCTION_ID
+        headers["language-code"] = language_code
         # Make the request
         logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
                     # Retry with new key
                     api_key = rotator.get_key()
                     if api_key:
+                        headers["authorization"] = f"Bearer {api_key}"
                         response = await client.post(url, headers=headers, content=audio_data)
                 response.raise_for_status()

static/js/app.js CHANGED Viewed

@@ -59,8 +59,16 @@ class MedicalChatbotApp {
         this.setTheme(prefs.theme || 'auto');
         this.setupTheme();
-        // Initialize audio recording
-        this.initializeAudioRecording();
     }
     setupEventListeners() {

         this.setTheme(prefs.theme || 'auto');
         this.setupTheme();
+        // Initialize audio recording (guarded if module not present)
+        try {
+            if (typeof AudioRecordingUI !== 'undefined') {
+                this.initializeAudioRecording();
+            } else {
+                console.warn('[Audio] Recorder module not loaded; skipping initialization');
+            }
+        } catch (e) {
+            console.warn('[Audio] Failed to initialize recorder', e);
+        }
     }
     setupEventListeners() {