Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemo_skills/inference/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class GenerationTaskConfig:
enable_litellm_cache: bool = False

# List of content types to drop from messages (e.g., base64 audio) to keep output files smaller
drop_content_types: list[str] = field(default_factory=lambda: ["audio_url"])
drop_content_types: list[str] = field(default_factory=lambda: ["audio_url", "input_audio"])

# Audio configuration - set by benchmarks that need audio processing (mmau-pro, audiobench, etc.)
enable_audio: bool = False # Enable audio preprocessing (set by benchmark configs)
Expand Down
4 changes: 3 additions & 1 deletion nemo_skills/inference/model/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ def make_audio_content_block(base64_audio: str, audio_format: str = "audio_url")
if audio_format == "input_audio":
# OpenAI native format (works with NVIDIA API / Gemini / Azure)
return {"type": "input_audio", "input_audio": {"data": base64_audio, "format": "wav"}}
else:
elif audio_format == "audio_url":
# Data URI format (works with vLLM / Qwen)
return {"type": "audio_url", "audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"}}
else:
raise ValueError(f"Unsupported audio_format '{audio_format}'. Use 'audio_url' or 'input_audio'.")
Loading
Loading