LiamKhoaLe commited on
Commit
dd35bcb
·
1 Parent(s): 6257342

Upd whisper json sender

Browse files
src/api/routes/audio.py CHANGED
@@ -33,14 +33,14 @@ async def transcribe_audio(
33
  JSON response with transcribed text
34
  """
35
  try:
36
- # Validate file type
37
- if not file.content_type or not any(
38
- file.content_type.startswith(f"audio/{fmt}")
39
- for fmt in ["wav", "opus", "flac", "webm"]
40
- ):
41
- # Also check file extension as fallback
42
- file_extension = file.filename.split('.')[-1].lower() if file.filename else ""
43
- if file_extension not in get_supported_formats():
44
  raise HTTPException(
45
  status_code=400,
46
  detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
@@ -56,7 +56,7 @@ async def transcribe_audio(
56
  if not validate_audio_format(audio_bytes):
57
  raise HTTPException(
58
  status_code=400,
59
- detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, or FLAC file."
60
  )
61
 
62
  # Transcribe audio
 
33
  JSON response with transcribed text
34
  """
35
  try:
36
+ # Validate file type by content-type or extension
37
+ valid_ctypes = {
38
+ "audio/wav", "audio/x-wav", "audio/webm", "audio/ogg", "audio/opus", "audio/flac"
39
+ }
40
+ content_type_ok = (file.content_type in valid_ctypes) if file.content_type else False
41
+ if not content_type_ok:
42
+ file_name = (file.filename or "").lower()
43
+ if not any(file_name.endswith(ext) for ext in get_supported_formats()):
44
  raise HTTPException(
45
  status_code=400,
46
  detail=f"Unsupported audio format. Supported formats: {', '.join(get_supported_formats())}"
 
56
  if not validate_audio_format(audio_bytes):
57
  raise HTTPException(
58
  status_code=400,
59
+ detail="Invalid audio format. Please ensure the file is a valid WAV, OPUS, FLAC, or WebM file."
60
  )
61
 
62
  # Transcribe audio
src/services/audio_transcription.py CHANGED
@@ -15,8 +15,8 @@ RIVA_FUNCTION_ID = "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"
15
 
16
  async def transcribe_audio_file(
17
  audio_file_path: str,
18
- rotator: APIKeyRotator,
19
- language_code: str = "en"
20
  ) -> Optional[str]:
21
  """
22
  Transcribe audio file using NVIDIA Riva API.
@@ -45,7 +45,8 @@ async def transcribe_audio_file(
45
  url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
46
 
47
  headers = {
48
- "Authorization": f"Bearer {api_key}",
 
49
  "Content-Type": "application/octet-stream"
50
  }
51
 
@@ -53,15 +54,9 @@ async def transcribe_audio_file(
53
  with open(audio_file_path, 'rb') as audio_file:
54
  audio_data = audio_file.read()
55
 
56
- # Prepare metadata for NVIDIA API
57
- metadata = {
58
- "function-id": RIVA_FUNCTION_ID,
59
- "language-code": language_code
60
- }
61
-
62
- # Add metadata to headers
63
- for key, value in metadata.items():
64
- headers[f"x-{key}"] = value
65
 
66
  # Make the request
67
  logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
@@ -83,7 +78,7 @@ async def transcribe_audio_file(
83
  # Retry with new key
84
  api_key = rotator.get_key()
85
  if api_key:
86
- headers["Authorization"] = f"Bearer {api_key}"
87
  response = await client.post(url, headers=headers, content=audio_data)
88
 
89
  response.raise_for_status()
 
15
 
16
  async def transcribe_audio_file(
17
  audio_file_path: str,
18
+ language_code: str,
19
+ rotator: APIKeyRotator
20
  ) -> Optional[str]:
21
  """
22
  Transcribe audio file using NVIDIA Riva API.
 
45
  url = f"https://{RIVA_SERVER}/v1/speech/transcribe"
46
 
47
  headers = {
48
+ # Provider docs show metadata key named "authorization" (lowercase) with Bearer token
49
+ "authorization": f"Bearer {api_key}",
50
  "Content-Type": "application/octet-stream"
51
  }
52
 
 
54
  with open(audio_file_path, 'rb') as audio_file:
55
  audio_data = audio_file.read()
56
 
57
+ # Prepare metadata for NVIDIA API (as headers per provider doc)
58
+ headers["function-id"] = RIVA_FUNCTION_ID
59
+ headers["language-code"] = language_code
 
 
 
 
 
 
60
 
61
  # Make the request
62
  logger.info(f"Transcribing audio file: {audio_file_path} (language: {language_code})")
 
78
  # Retry with new key
79
  api_key = rotator.get_key()
80
  if api_key:
81
+ headers["authorization"] = f"Bearer {api_key}"
82
  response = await client.post(url, headers=headers, content=audio_data)
83
 
84
  response.raise_for_status()
static/js/app.js CHANGED
@@ -59,8 +59,16 @@ class MedicalChatbotApp {
59
  this.setTheme(prefs.theme || 'auto');
60
  this.setupTheme();
61
 
62
- // Initialize audio recording
63
- this.initializeAudioRecording();
 
 
 
 
 
 
 
 
64
  }
65
 
66
  setupEventListeners() {
 
59
  this.setTheme(prefs.theme || 'auto');
60
  this.setupTheme();
61
 
62
+ // Initialize audio recording (guarded if module not present)
63
+ try {
64
+ if (typeof AudioRecordingUI !== 'undefined') {
65
+ this.initializeAudioRecording();
66
+ } else {
67
+ console.warn('[Audio] Recorder module not loaded; skipping initialization');
68
+ }
69
+ } catch (e) {
70
+ console.warn('[Audio] Failed to initialize recorder', e);
71
+ }
72
  }
73
 
74
  setupEventListeners() {