From a65e84244c46c2d8ffa56e59807c4fb4e8942070 Mon Sep 17 00:00:00 2001 From: Zhenbo Li Date: Sat, 7 Sep 2024 10:21:32 -0400 Subject: [PATCH 1/3] use cow string (slightly) --- fire_seq_search_server/src/local_llm/mod.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fire_seq_search_server/src/local_llm/mod.rs b/fire_seq_search_server/src/local_llm/mod.rs index 4a7c769..5bd2337 100644 --- a/fire_seq_search_server/src/local_llm/mod.rs +++ b/fire_seq_search_server/src/local_llm/mod.rs @@ -90,6 +90,8 @@ pub struct LlmEngine { +use std::borrow::Cow; +use std::borrow::Cow::Borrowed; use tokio::task::yield_now; use tokio::task; @@ -154,15 +156,18 @@ impl LlmEngine { } } - fn build_data(full_text: &str) -> OpenAiData { - fn build_message(full_text:&str) -> Message { + fn build_data(full_text: Cow<'_, str>) -> OpenAiData { + + fn build_message(chat:String) -> Message { Message{ role: "user".to_owned(), - content: full_text.to_owned(), + content: chat, } } let mut msgs = Vec::new(); - msgs.push( build_message(full_text) ); + let mut chat_text = String::default(); // TODO + chat_text += &full_text; + msgs.push( build_message(chat_text) ); OpenAiData { model: "model".to_owned(), messages: msgs, @@ -174,7 +179,7 @@ impl LlmEngine{ pub async fn summarize(&self, full_text: &str) -> String { //http://localhost:8080/completion let ep = self.endpoint.to_owned() + "/v1/chat/completions"; - let data = Self::build_data(full_text); + let data = Self::build_data( Borrowed(full_text) ); let res = self.client.post(&ep) .header("Content-Type", "application/json") .json(&data) From 422c38d7a06ca8e205e04aa232dbe95219482a88 Mon Sep 17 00:00:00 2001 From: Zhenbo Li Date: Sat, 7 Sep 2024 15:13:41 -0400 Subject: [PATCH 2/3] set prompt --- .../src/http_client/endpoints.rs | 2 -- fire_seq_search_server/src/local_llm/mod.rs | 17 +++++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fire_seq_search_server/src/http_client/endpoints.rs b/fire_seq_search_server/src/http_client/endpoints.rs index 58f0899..89bbc6f 100644 --- a/fire_seq_search_server/src/http_client/endpoints.rs +++ b/fire_seq_search_server/src/http_client/endpoints.rs @@ -33,8 +33,6 @@ pub async fn summarize( pub async fn get_llm_done_list( State(engine_arc): State> ) -> Html{ - - info!("get list endpoint called"); let r = engine_arc.get_llm_done_list(); Html(r.await) } diff --git a/fire_seq_search_server/src/local_llm/mod.rs b/fire_seq_search_server/src/local_llm/mod.rs index 5bd2337..9ba88e1 100644 --- a/fire_seq_search_server/src/local_llm/mod.rs +++ b/fire_seq_search_server/src/local_llm/mod.rs @@ -10,6 +10,18 @@ use serde_derive::Deserialize; use serde_derive::Serialize; use serde; +// TODO Allow user to set prompt, instead of hard-coded in code +const prompt_string: &'static str = r##" +You are a seasoned summary expert, capable of condensing and summarizing given articles, papers, or posts, accurately conveying the main idea to make the content easier to understand. + +You place great emphasis on user experience, never adding irrelevant content like "Summary," "The summary is as follows," "Original text," "You can check the original text if interested," or "Original link." Your summaries always convey the core information directly. + +You are adept at handling various large, small, and even chaotic text content, always accurately extracting key information and summarizing the core content globally to make it easier to understand. + +=== Below is the article === + +"##; + #[derive(Debug, Serialize, Deserialize)] pub struct OpenAiData { pub model: String, @@ -165,9 +177,11 @@ impl LlmEngine { } } let mut msgs = Vec::new(); - let mut chat_text = String::default(); // TODO + + let mut chat_text = prompt_string.to_owned(); chat_text += &full_text; msgs.push( build_message(chat_text) ); + OpenAiData { model: "model".to_owned(), messages: msgs, @@ -247,7 +261,6 @@ impl LlmEngine{ let mut r = Vec::new(); let jcache = self.job_cache.lock().await; for (title, _text) in &jcache.done_job { - info!("already done : {}", &title); r.push(title.to_owned()); } return r; From bb75fc1ee37b87b04fd10ce27d8bea95b13b4317 Mon Sep 17 00:00:00 2001 From: Zhenbo Li Date: Sat, 7 Sep 2024 16:23:26 -0400 Subject: [PATCH 3/3] expand shell --- fire_seq_search_server/Cargo.toml | 3 ++- fire_seq_search_server/src/local_llm/mod.rs | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fire_seq_search_server/Cargo.toml b/fire_seq_search_server/Cargo.toml index 3fd1101..e052d4f 100644 --- a/fire_seq_search_server/Cargo.toml +++ b/fire_seq_search_server/Cargo.toml @@ -47,7 +47,7 @@ stop-words = "0.7.2" regex = "1" lingua = { version = "1.4.0", default-features = false, features = ["chinese", "english"] } - +shellexpand = "3.1" #Highlight (Output) html-escape = "0.2.13" @@ -63,6 +63,7 @@ pdf-extract-temporary-mitigation-panic = "0.7.1" # llm related +# TODO I should make them optional sha256 = "1.5.0" reqwest = { version = "0.12", features = ["json"] } futures = "0.3" diff --git a/fire_seq_search_server/src/local_llm/mod.rs b/fire_seq_search_server/src/local_llm/mod.rs index 9ba88e1..0b15c9e 100644 --- a/fire_seq_search_server/src/local_llm/mod.rs +++ b/fire_seq_search_server/src/local_llm/mod.rs @@ -286,8 +286,8 @@ struct LlamaFileDef { } +use shellexpand::tilde; async fn locate_llamafile() -> Option { - // TODO let mut lf = LlamaFileDef { filename: "mistral-7b-instruct-v0.2.Q4_0.llamafile".to_owned(), filepath: None, @@ -295,10 +295,8 @@ async fn locate_llamafile() -> Option { download_link: "mistral-7b-instruct-v0.2.Q4_0.llamafile".to_owned(), }; - // TODO hack in dev - //let lf_path = "/var/home/lizhenbo/Downloads/mistral-7b-instruct-v0.2.Q4_0.llamafile"; - let lf_base = "/Users/zhenboli/.llamafile/"; - let lf_path = lf_base.to_owned() + &lf.filename; + let lf_base = tilde("~/.llamafile/"); + let lf_path = lf_base.to_string() + &lf.filename; lf.filepath = Some( lf_path.to_owned() ); info!("lf {:?}", &lf);