Spaces:
Runtime error
Runtime error
Commit
·
dd35bcb
1
Parent(s):
6257342
Upd whisper json sender
Browse files- src/api/routes/audio.py +9 -9
- src/services/audio_transcription.py +8 -13
- static/js/app.js +10 -2
src/api/routes/audio.py
CHANGED
|
@@ -33,14 +33,14 @@ async def transcribe_audio(
|
|
| 33 |
JSON response with transcribed text
|
| 34 |
"""
|
| 35 |
try:
|
| 36 |
-
# Validate file type
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
if
|
| 44 |
raise HTTPException(
|
| 45 |
status_code=400,
|
| 46 |
detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
|
|
@@ -56,7 +56,7 @@ async def transcribe_audio(
|
|
| 56 |
if not validate_audio_format(audio_bytes):
|
| 57 |
raise HTTPException(
|
| 58 |
status_code=400,
|
| 59 |
-
detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, or
|
| 60 |
)
|
| 61 |
|
| 62 |
# Transcribe audio
|
|
|
|
| 33 |
JSON response with transcribed text
|
| 34 |
"""
|
| 35 |
try:
|
| 36 |
+
# Validate file type by content-type or extension
|
| 37 |
+
valid_ctypes = {
|
| 38 |
+
"audio/wav", "audio/x-wav", "audio/webm", "audio/ogg", "audio/opus", "audio/flac"
|
| 39 |
+
}
|
| 40 |
+
content_type_ok = (file.content_type in valid_ctypes) if file.content_type else False
|
| 41 |
+
if not content_type_ok:
|
| 42 |
+
file_name = (file.filename or "").lower()
|
| 43 |
+
if not any(file_name.endswith(ext) for ext in get_supported_formats()):
|
| 44 |
raise HTTPException(
|
| 45 |
status_code=400,
|
| 46 |
detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
|
|
|
|
| 56 |
if not validate_audio_format(audio_bytes):
|
| 57 |
raise HTTPException(
|
| 58 |
status_code=400,
|
| 59 |
+
detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, FLAC, or WebM file."
|
| 60 |
)
|
| 61 |
|
| 62 |
# Transcribe audio
|
src/services/audio_transcription.py
CHANGED
|
@@ -15,8 +15,8 @@ RIVA_FUNCTION_ID = "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"
|
|
| 15 |
|
| 16 |
async def transcribe_audio_file(
|
| 17 |
audio_file_path: str,
|
| 18 |
-
|
| 19 |
-
|
| 20 |
) -> Optional[str]:
|
| 21 |
"""
|
| 22 |
Transcribe audio file using NVIDIA Riva API.
|
|
@@ -45,7 +45,8 @@ async def transcribe_audio_file(
|
|
| 45 |
url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
|
| 46 |
|
| 47 |
headers = {
|
| 48 |
-
"
|
|
|
|
| 49 |
"Content-Type": "application/octet-stream"
|
| 50 |
}
|
| 51 |
|
|
@@ -53,15 +54,9 @@ async def transcribe_audio_file(
|
|
| 53 |
with open(audio_file_path, 'rb') as audio_file:
|
| 54 |
audio_data = audio_file.read()
|
| 55 |
|
| 56 |
-
# Prepare metadata for NVIDIA API
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
"language-code": language_code
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
# Add metadata to headers
|
| 63 |
-
for key, value in metadata.items():
|
| 64 |
-
headers[f"x-{key}"] = value
|
| 65 |
|
| 66 |
# Make the request
|
| 67 |
logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
|
|
@@ -83,7 +78,7 @@ async def transcribe_audio_file(
|
|
| 83 |
# Retry with new key
|
| 84 |
api_key = rotator.get_key()
|
| 85 |
if api_key:
|
| 86 |
-
headers["
|
| 87 |
response = await client.post(url, headers=headers, content=audio_data)
|
| 88 |
|
| 89 |
response.raise_for_status()
|
|
|
|
| 15 |
|
| 16 |
async def transcribe_audio_file(
|
| 17 |
audio_file_path: str,
|
| 18 |
+
language_code: str,
|
| 19 |
+
rotator: APIKeyRotator
|
| 20 |
) -> Optional[str]:
|
| 21 |
"""
|
| 22 |
Transcribe audio file using NVIDIA Riva API.
|
|
|
|
| 45 |
url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
|
| 46 |
|
| 47 |
headers = {
|
| 48 |
+
# Provider docs show metadata key named "authorization" (lowercase) with Bearer token
|
| 49 |
+
"authorization": f"Bearer {api_key}",
|
| 50 |
"Content-Type": "application/octet-stream"
|
| 51 |
}
|
| 52 |
|
|
|
|
| 54 |
with open(audio_file_path, 'rb') as audio_file:
|
| 55 |
audio_data = audio_file.read()
|
| 56 |
|
| 57 |
+
# Prepare metadata for NVIDIA API (as headers per provider doc)
|
| 58 |
+
headers["function-id"] = RIVA_FUNCTION_ID
|
| 59 |
+
headers["language-code"] = language_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Make the request
|
| 62 |
logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
|
|
|
|
| 78 |
# Retry with new key
|
| 79 |
api_key = rotator.get_key()
|
| 80 |
if api_key:
|
| 81 |
+
headers["authorization"] = f"Bearer {api_key}"
|
| 82 |
response = await client.post(url, headers=headers, content=audio_data)
|
| 83 |
|
| 84 |
response.raise_for_status()
|
static/js/app.js
CHANGED
|
@@ -59,8 +59,16 @@ class MedicalChatbotApp {
|
|
| 59 |
this.setTheme(prefs.theme || 'auto');
|
| 60 |
this.setupTheme();
|
| 61 |
|
| 62 |
-
// Initialize audio recording
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
setupEventListeners() {
|
|
|
|
| 59 |
this.setTheme(prefs.theme || 'auto');
|
| 60 |
this.setupTheme();
|
| 61 |
|
| 62 |
+
// Initialize audio recording (guarded if module not present)
|
| 63 |
+
try {
|
| 64 |
+
if (typeof AudioRecordingUI !== 'undefined') {
|
| 65 |
+
this.initializeAudioRecording();
|
| 66 |
+
} else {
|
| 67 |
+
console.warn('[Audio] Recorder module not loaded; skipping initialization');
|
| 68 |
+
}
|
| 69 |
+
} catch (e) {
|
| 70 |
+
console.warn('[Audio] Failed to initialize recorder', e);
|
| 71 |
+
}
|
| 72 |
}
|
| 73 |
|
| 74 |
setupEventListeners() {
|