Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/goose-cli/src/commands/configure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,8 @@ pub fn configure_extensions_dialog() -> Result<(), Box<dyn Error>> {
let extension = cliclack::select("Which built-in extension would you like to enable?")
.item(
"autovisualiser",
"Auto Visualizer",
"Data visualization and UI generation tools",
"Auto Visualiser",
"Data visualisation and UI generation tools",
)
.item(
"computercontroller",
Expand Down
209 changes: 82 additions & 127 deletions crates/goose-server/src/routes/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,35 @@ struct WhisperResponse {
text: String,
}

/// Validate audio input and return decoded bytes and file extension
fn validate_audio_input(
audio: &str,
mime_type: &str,
) -> Result<(Vec<u8>, &'static str), StatusCode> {
/// Transcribe audio using OpenAI's Whisper API
///
/// # Request
/// - `audio`: Base64 encoded audio data
/// - `mime_type`: MIME type of the audio (e.g., "audio/webm", "audio/wav")
///
/// # Response
/// - `text`: Transcribed text from the audio
///
/// # Errors
/// - 401: Unauthorized (missing or invalid X-Secret-Key header)
/// - 412: Precondition Failed (OpenAI API key not configured)
/// - 400: Bad Request (invalid base64 audio data)
/// - 413: Payload Too Large (audio file exceeds 25MB limit)
/// - 415: Unsupported Media Type (unsupported audio format)
/// - 502: Bad Gateway (OpenAI API error)
/// - 503: Service Unavailable (network error)
async fn transcribe_handler(
State(state): State<Arc<AppState>>,
headers: HeaderMap,
Json(request): Json<TranscribeRequest>,
) -> Result<Json<TranscribeResponse>, StatusCode> {
verify_secret_key(&headers, &state)?;

// Validate input first before checking API key configuration
// Decode the base64 audio data
let audio_bytes = BASE64.decode(audio).map_err(|_| StatusCode::BAD_REQUEST)?;
let audio_bytes = BASE64
.decode(&request.audio)
.map_err(|_| StatusCode::BAD_REQUEST)?;

// Check file size
if audio_bytes.len() > MAX_AUDIO_SIZE_BYTES {
Expand All @@ -61,9 +83,8 @@ fn validate_audio_input(
}

// Determine file extension based on MIME type
let file_extension = match mime_type {
let file_extension = match request.mime_type.as_str() {
"audio/webm" => "webm",
"audio/webm;codecs=opus" => "webm",
"audio/mp4" => "mp4",
"audio/mpeg" => "mp3",
"audio/mpga" => "mpga",
Expand All @@ -73,18 +94,13 @@ fn validate_audio_input(
_ => return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE),
};

Ok((audio_bytes, file_extension))
}

/// Get OpenAI configuration (API key and host)
fn get_openai_config() -> Result<(String, String), StatusCode> {
// Get the OpenAI API key from config (after input validation)
let config = goose::config::Config::global();
let api_key: String = config
.get_secret("OPENAI_API_KEY")
.map_err(|_| StatusCode::PRECONDITION_FAILED)?;

let api_key: String = config.get_secret("OPENAI_API_KEY").map_err(|e| {
tracing::error!("Failed to get OpenAI API key: {:?}", e);
StatusCode::PRECONDITION_FAILED
})?;

// Get the OpenAI host from config (with default)
let openai_host = match config.get("OPENAI_HOST", false) {
Ok(value) => value
.as_str()
Expand All @@ -93,41 +109,19 @@ fn get_openai_config() -> Result<(String, String), StatusCode> {
Err(_) => "https://api.openai.com".to_string(),
};

Ok((api_key, openai_host))
}

/// Send transcription request to OpenAI Whisper API
async fn send_openai_request(
audio_bytes: Vec<u8>,
file_extension: &str,
mime_type: &str,
api_key: &str,
openai_host: &str,
) -> Result<WhisperResponse, StatusCode> {
tracing::info!("Using OpenAI host: {}", openai_host);
tracing::info!(
"Audio file size: {} bytes, extension: {}, mime_type: {}",
audio_bytes.len(),
file_extension,
mime_type
);
tracing::debug!("Using OpenAI host: {}", openai_host);

// Create a multipart form with the audio file
let part = reqwest::multipart::Part::bytes(audio_bytes)
.file_name(format!("audio.{}", file_extension))
.mime_str(mime_type)
.map_err(|e| {
tracing::error!("Failed to create multipart part: {:?}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
.mime_str(&request.mime_type)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

let form = reqwest::multipart::Form::new()
.part("file", part)
.text("model", "whisper-1")
.text("response_format", "json");

tracing::info!("Created multipart form for OpenAI Whisper API");

// Make request to OpenAI Whisper API
let client = Client::builder()
.timeout(Duration::from_secs(OPENAI_TIMEOUT_SECONDS))
Expand All @@ -137,11 +131,6 @@ async fn send_openai_request(
StatusCode::INTERNAL_SERVER_ERROR
})?;

tracing::info!(
"Sending request to OpenAI: {}/v1/audio/transcriptions",
openai_host
);

let response = client
.post(format!("{}/v1/audio/transcriptions", openai_host))
.header("Authorization", format!("Bearer {}", api_key))
Expand All @@ -161,25 +150,9 @@ async fn send_openai_request(
}
})?;

tracing::info!(
"Received response from OpenAI with status: {}",
response.status()
);

if !response.status().is_success() {
let status = response.status();
let error_text = response.text().await.unwrap_or_default();
tracing::error!("OpenAI API error (status: {}): {}", status, error_text);

// Check for specific error codes
if status == 401 {
tracing::error!("OpenAI API key appears to be invalid or unauthorized");
return Err(StatusCode::UNAUTHORIZED);
} else if status == 429 {
tracing::error!("OpenAI API quota or rate limit exceeded");
return Err(StatusCode::TOO_MANY_REQUESTS);
}

tracing::error!("OpenAI API error: {}", error_text);
return Err(StatusCode::BAD_GATEWAY);
}

Expand All @@ -188,45 +161,6 @@ async fn send_openai_request(
StatusCode::INTERNAL_SERVER_ERROR
})?;

Ok(whisper_response)
}

/// Transcribe audio using OpenAI's Whisper API
///
/// # Request
/// - `audio`: Base64 encoded audio data
/// - `mime_type`: MIME type of the audio (e.g., "audio/webm", "audio/wav")
///
/// # Response
/// - `text`: Transcribed text from the audio
///
/// # Errors
/// - 401: Unauthorized (missing or invalid X-Secret-Key header)
/// - 412: Precondition Failed (OpenAI API key not configured)
/// - 400: Bad Request (invalid base64 audio data)
/// - 413: Payload Too Large (audio file exceeds 25MB limit)
/// - 415: Unsupported Media Type (unsupported audio format)
/// - 502: Bad Gateway (OpenAI API error)
/// - 503: Service Unavailable (network error)
async fn transcribe_handler(
State(state): State<Arc<AppState>>,
headers: HeaderMap,
Json(request): Json<TranscribeRequest>,
) -> Result<Json<TranscribeResponse>, StatusCode> {
verify_secret_key(&headers, &state)?;

let (audio_bytes, file_extension) = validate_audio_input(&request.audio, &request.mime_type)?;
let (api_key, openai_host) = get_openai_config()?;

let whisper_response = send_openai_request(
audio_bytes,
file_extension,
&request.mime_type,
&api_key,
&openai_host,
)
.await?;

Ok(Json(TranscribeResponse {
text: whisper_response.text,
}))
Expand All @@ -243,20 +177,47 @@ async fn transcribe_elevenlabs_handler(
) -> Result<Json<TranscribeResponse>, StatusCode> {
verify_secret_key(&headers, &state)?;

let (audio_bytes, file_extension) = validate_audio_input(&request.audio, &request.mime_type)?;
// Validate input first before checking API key configuration
// Decode the base64 audio data
let audio_bytes = BASE64
.decode(&request.audio)
.map_err(|_| StatusCode::BAD_REQUEST)?;

// Check file size
if audio_bytes.len() > MAX_AUDIO_SIZE_BYTES {
tracing::warn!(
"Audio file too large: {} bytes (max: {} bytes)",
audio_bytes.len(),
MAX_AUDIO_SIZE_BYTES
);
return Err(StatusCode::PAYLOAD_TOO_LARGE);
}

// Determine file extension and content type based on MIME type
let (file_extension, content_type) = match request.mime_type.as_str() {
"audio/webm" => ("webm", "audio/webm"),
"audio/mp4" => ("mp4", "audio/mp4"),
"audio/mpeg" => ("mp3", "audio/mpeg"),
"audio/mpga" => ("mp3", "audio/mpeg"),
"audio/m4a" => ("m4a", "audio/m4a"),
"audio/wav" => ("wav", "audio/wav"),
"audio/x-wav" => ("wav", "audio/wav"),
_ => return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE),
};

// Get the ElevenLabs API key from config (after input validation)
let config = goose::config::Config::global();

// First try to get it as a secret
let api_key: String = match config.get_secret::<String>("ELEVENLABS_API_KEY") {
let api_key: String = match config.get_secret("ELEVENLABS_API_KEY") {
Ok(key) => key,
Err(_) => {
// Try to get it as non-secret (for backward compatibility)
match config.get("ELEVENLABS_API_KEY", false) {
Ok(value) => {
match value.as_str() {
Some(key_str) => {
tracing::info!("Migrating ElevenLabs API key to secret storage");
let key = key_str.to_string();
// Migrate to secret storage
if let Err(e) = config.set(
Expand All @@ -267,25 +228,17 @@ async fn transcribe_elevenlabs_handler(
tracing::error!("Failed to migrate ElevenLabs API key: {:?}", e);
}
// Delete the non-secret version
if let Err(e) = config.delete("ELEVENLABS_API_KEY") {
tracing::warn!(
"Failed to delete non-secret ElevenLabs API key: {:?}",
e
);
}
let _ = config.delete("ELEVENLABS_API_KEY");
key
}
None => {
tracing::error!(
"ElevenLabs API key is not a string, found: {:?}",
value
);
tracing::error!("ElevenLabs API key is not a string");
return Err(StatusCode::PRECONDITION_FAILED);
}
}
}
Err(_) => {
tracing::error!("No ElevenLabs API key found in configuration");
Err(e) => {
tracing::error!("Failed to get ElevenLabs API key from config: {:?}", e);
return Err(StatusCode::PRECONDITION_FAILED);
}
}
Expand All @@ -295,7 +248,7 @@ async fn transcribe_elevenlabs_handler(
// Create multipart form for ElevenLabs API
let part = reqwest::multipart::Part::bytes(audio_bytes)
.file_name(format!("audio.{}", file_extension))
.mime_str(&request.mime_type)
.mime_str(content_type)
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

let form = reqwest::multipart::Form::new()
Expand Down Expand Up @@ -333,9 +286,8 @@ async fn transcribe_elevenlabs_handler(
})?;

if !response.status().is_success() {
let status = response.status();
let error_text = response.text().await.unwrap_or_default();
tracing::error!("ElevenLabs API error (status: {}): {}", status, error_text);
tracing::error!("ElevenLabs API error: {}", error_text);

// Check for specific error codes
if error_text.contains("Unauthorized") || error_text.contains("Invalid API key") {
Expand Down Expand Up @@ -378,13 +330,16 @@ async fn check_dictation_config(
let config = goose::config::Config::global();

// Check if ElevenLabs API key is configured
let has_elevenlabs = match config.get_secret::<String>("ELEVENLABS_API_KEY") {
Ok(_) => true,
Err(_) => {
let has_elevenlabs = config
.get_secret::<String>("ELEVENLABS_API_KEY")
.map(|_| true)
.unwrap_or_else(|_| {
// Check non-secret for backward compatibility
config.get("ELEVENLABS_API_KEY", false).is_ok()
}
};
config
.get("ELEVENLABS_API_KEY", false)
.map(|_| true)
.unwrap_or(false)
});

Ok(Json(serde_json::json!({
"elevenlabs": has_elevenlabs
Expand Down
5 changes: 1 addition & 4 deletions crates/goose-server/src/routes/config_management.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ pub async fn read_config(
}

let config = Config::global();

let response_value = match config.get(&query.key, query.is_secret) {
Ok(value) => {
if query.is_secret {
Expand All @@ -183,9 +182,7 @@ pub async fn read_config(
Value::Null
}
}
Err(_) => {
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
Err(_) => return Err(StatusCode::INTERNAL_SERVER_ERROR),
};
Ok(Json(response_value))
}
Expand Down
Loading
Loading