feat(voice): remove the feature

efugier · Oct 30, 2024 · 6584411 · 6584411
1 parent 2a01eec
commit 6584411
Show file tree

Hide file tree

Showing 12 changed files with 4 additions and 364 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -23,19 +23,8 @@ src/
 │   │   # config structs for API config definition (url, key...)
 │   ├── api.rs
 │   │   # config structs for prompt defition (messages, model, temperature...)
-│   ├── prompt.rs
-│   │   # config structs for voice config (model, url, voice recording command...)
-│   └── voice.rs
-│   # voice api related code (request, adapters)
-├── voice
-│   │   # orchestrate the voice recording and request
-│   ├── mod.rs
-│   │   # start and stop the recording program
-│   ├── recording.rs
-│   │   # make the request to the api and read the result
-│   ├── api_call.rs
-│   │   # structs to parse and extract the message from third party answers
-│   └── response_schemas.rs
+│   └── prompt.rs
+│   # text api related code (request, adapters)
 └── text
     │   # make third party requests and read the result
     ├── mod.rs
@@ -74,21 +63,6 @@ main
 # exit
 ```
 
-**Voice**
-
-```python
-main 
--> prompt_customization::customize_prompt
--> voice::record_voice_and_get_transcript
-   -> voice::recording::start_recording
-   -> voice::recording::strop_recording
-   -> voice::api_call::post_audio_and_get_transcript
-<-
--> text::process_input_with_request
-  -> text::api_call::post_prompt_and_get_answer
-<-
-```
-
 ### Testing
 
 Some tests rely on environement variables and don't behave well with multi-threading. They are marked with `#[serial]` from the [serial_test](https://docs.rs/serial_test/latest/serial_test/index.html) crate.

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -25,9 +25,7 @@ serde_json = "1"
 toml = "0"
 ureq = { version="2", features = ["json"] }
 env_logger = "0"
-# device_query = { version = "2", optional = true }
 reqwest = { version = "0", features = ["json", "blocking", "multipart"] }
-device_query = "2"
 
 [dev-dependencies]
 tempfile = "3"

diff --git a/README.md b/README.md
@@ -50,7 +50,6 @@ Answers might be slow depending on your setup, you may want to try the third par
     - [Example workflows](#example-workflows)
 - [Configuration](#configuration) ← please read this carefully
     - [Ollama setup](#ollama-setup) ← easiest way to get running for free
-- [Voice](#voice)
 - [How to help?](./CONTRIBUTING.md)
 
 ## Installation
@@ -87,7 +86,6 @@ Arguments:
 Options:
   -e, --extend-conversation        whether to extend the previous conversation or start a new one
   -r, --repeat-input               whether to repeat the input before the output, useful to extend instead of replacing
-  -v, --voice                      whether to use voice for input
       --api <API>                  overrides which api to hit [possible values: another-api-for-tests, ollama, anthropic, groq, mistral, openai]
   -m, --model <MODEL>              overrides which model (of the api) to use
   -t, --temperature <TEMPERATURE>  higher temperature  means answer further from the average
@@ -107,8 +105,6 @@ The key to make this work seamlessly is a good default prompt that tells the mod
 ```
 sc "say hi"  # just ask (uses default prompt template)
 
-sc -v  # use your voice to ask (then press <space> to stop the recording)
-
 sc test                         # use templated prompts
 sc test "and parametrize them"  # extend them on the fly
 
@@ -307,14 +303,6 @@ content ='''Write tests using pytest for the following code. Parametrize it if a
 '''
 ```
 
-```toml
-url = "https://api.openai.com/v1/audio/transcriptions"
-# make sure this command fit you OS and works on its own
-recording_command = "arecord -f S16_LE --quiet <audio_file_path_placeholder>"
-model = "whisper-1"
-api = "openai"
-```
-
 see [the config setup file](./src/config/mod.rs) for more details.
 
 ## Ollama setup
@@ -327,44 +315,6 @@ see [the config setup file](./src/config/mod.rs) for more details.
 
 ⚠️ Answers might be slow depending on your setup, you may want to try the third party APIs for an optimal workflow. Timeout is configurable and set to 30s by default.
 
-# Voice
-
-⚠️ **Deprecation in progress**
-
-I only have a linux system and wasn't able to test the recording commands for other OS. The good news is you can make up your own that works and then plug it in the config.
-
-Use the `-v` flag to ask for voice input then press space to end it. It will replace the prompt customization arg.
-
-- uses openai whisper
-- make sure your `recording_command` field works in your termimal command, it should create a wav file
-- requires you to have an openai key in your `.api_keys.toml`
-- you can still use any prompt template or text model to get your output
-
-```
-sc -v
-
-sc test -v
-
-sc test -v -c src/**/*
-```
-
-This could be a good accessiblity feature but I personnaly never use it and given its current state I am considering removing it.
-
-## How does it work?
-
-`smartcat` call an external program that handles the voice recording and instructs it to save the result in a wav file. It then listens to keyboard inputs and stops the recording when space is pressed.
-
-The recording is then sent to a speech to text model, the resulting transcript is finally added to the prompt and sent to the text model to get an answer.
-
-On linux: TODO
-On Mac: TODO
-On windows: TODO
-
-To debug, you can check the `conversation.toml` file or listen to the `audio.wav` in the smart config home and see what the model heard and transcripted.
-
-This feature shoud be offered as an extra down the road, totally optional on install. PRs are welcomed!
-
-
 ## How to help?
 
 See [CONTRIBUTING.md](./CONTRIBUTING.md).
diff --git a/src/config/mod.rs b/src/config/mod.rs
@@ -1,13 +1,11 @@
 pub mod api;
 pub mod prompt;
-pub mod voice;
 
 use std::{path::PathBuf, process::Command};
 
 use self::{
     api::{api_keys_path, generate_api_keys_file, get_api_config},
     prompt::{generate_prompts_file, get_prompts, prompts_path},
-    voice::{generate_voice_file, voice_config_path},
 };
 use crate::utils::is_interactive;
 
@@ -49,14 +47,6 @@ pub fn ensure_config_files() -> std::io::Result<()> {
         generate_prompts_file()?
     }
 
-    if !voice_config_path().exists() {
-        println!(
-            "Voice config file not found at {}, generating one.\n...",
-            voice_config_path().display()
-        );
-        generate_voice_file().expect("Unable to generate config files");
-    };
-
     if !api_keys_path().exists() {
         println!(
             "API config file not found at {}, generating one.\n...",
@@ -118,9 +108,7 @@ mod tests {
             api::{api_keys_path, default_timeout_seconds, Api, ApiConfig},
             ensure_config_files,
             prompt::{prompts_path, Prompt},
-            resolve_config_path,
-            voice::{voice_config_path, VoiceConfig},
-            CUSTOM_CONFIG_ENV_VAR, DEFAULT_CONFIG_PATH,
+            resolve_config_path, CUSTOM_CONFIG_ENV_VAR, DEFAULT_CONFIG_PATH,
         },
         utils::IS_NONINTERACTIVE_ENV_VAR,
     };
@@ -184,11 +172,9 @@ mod tests {
 
         let api_keys_path = api_keys_path();
         let prompts_path = prompts_path();
-        let voice_path = voice_config_path();
 
         assert!(!api_keys_path.exists());
         assert!(!prompts_path.exists());
-        assert!(!voice_path.exists());
 
         let result = ensure_config_files();
 
@@ -201,7 +187,6 @@ mod tests {
 
         assert!(api_keys_path.exists());
         assert!(prompts_path.exists());
-        assert!(voice_path.exists());
 
         Ok(())
     }
@@ -217,7 +202,6 @@ mod tests {
 
         let api_keys_path = api_keys_path();
         let prompts_path = prompts_path();
-        let voice_path = voice_config_path();
 
         // Precreate files with some content
         let mut api_keys_file = fs::File::create(&api_keys_path)?;
@@ -226,9 +210,6 @@ mod tests {
         let mut prompts_file = fs::File::create(&prompts_path)?;
         prompts_file.write_all(b"Some prompts data")?;
 
-        let mut voice_file = fs::File::create(&voice_path)?;
-        voice_file.write_all(b"Some voice data")?;
-
         let result = ensure_config_files();
 
         // Restoring the original environment variable
@@ -242,7 +223,6 @@ mod tests {
         // Check if files still exist
         assert!(api_keys_path.exists());
         assert!(prompts_path.exists());
-        assert!(voice_path.exists());
 
         // Check if the contents remain unchanged
         let mut api_keys_content = String::new();
@@ -253,10 +233,6 @@ mod tests {
         fs::File::open(&prompts_path)?.read_to_string(&mut prompts_content)?;
         assert_eq!(prompts_content, "Some prompts data".to_string());
 
-        let mut voice_content = String::new();
-        fs::File::open(&voice_path)?.read_to_string(&mut voice_content)?;
-        assert_eq!(voice_content, "Some voice data".to_string());
-
         Ok(())
     }
 
@@ -271,11 +247,9 @@ mod tests {
 
         let api_keys_path = api_keys_path();
         let prompts_path = prompts_path();
-        let voice_path = voice_config_path();
 
         assert!(!api_keys_path.exists());
         assert!(!prompts_path.exists());
-        assert!(!voice_path.exists());
 
         let result = ensure_config_files();
 
@@ -289,7 +263,6 @@ mod tests {
         // Read back the files and deserialize
         let api_config_contents = fs::read_to_string(&api_keys_path)?;
         let prompts_config_contents = fs::read_to_string(&prompts_path)?;
-        let voice_file_content = fs::read_to_string(&voice_path)?;
 
         // Deserialize contents to expected data structures
         // TODO: would be better to use `get_config` and `get_prompts` but
@@ -301,9 +274,6 @@ mod tests {
         let prompt_config: HashMap<String, Prompt> =
             toml::from_str(&prompts_config_contents).expect("Failed to deserialize prompts config");
 
-        let voice_config: VoiceConfig =
-            toml::from_str(&voice_file_content).expect("Failed to deserialize voice config");
-
         // Check if the content matches the default values
 
         // API
@@ -330,9 +300,6 @@ mod tests {
         let empty_prompt = Prompt::empty();
         assert_eq!(prompt_config.get("empty"), Some(&empty_prompt));
 
-        // Voice
-        assert_eq!(voice_config, VoiceConfig::default());
-
         Ok(())
     }
 }
diff --git a/src/config/prompt.rs b/src/config/prompt.rs
@@ -10,7 +10,6 @@ use crate::config::{api::Api, resolve_config_path};
 
 const PROMPT_FILE: &str = "prompts.toml";
 const CONVERSATION_FILE: &str = "conversation.toml";
-const AUDIO_FILE: &str = "audio.wav";
 
 #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
 pub struct Prompt {
@@ -101,10 +100,6 @@ pub fn conversation_file_path() -> PathBuf {
     resolve_config_path().join(CONVERSATION_FILE)
 }
 
-pub fn audio_file_path() -> PathBuf {
-    resolve_config_path().join(AUDIO_FILE)
-}
-
 pub fn get_last_conversation_as_prompt() -> Prompt {
     let content = fs::read_to_string(conversation_file_path()).unwrap_or_else(|error| {
         panic!(