From 51e5a6da13eb6bc72f08fe314d8e78372e9cb559 Mon Sep 17 00:00:00 2001 From: Tyler Nickerson Date: Sun, 14 Apr 2024 11:41:17 -0700 Subject: [PATCH 1/4] idk --- Cargo.lock | 3 ++- cli/Cargo.toml | 3 ++- cli/src/cli.rs | 5 ++++ cli/src/lib.rs | 2 ++ cli/src/main.rs | 12 ++++++--- cli/src/serve/lookup.rs | 31 +++++++++++++++++++++++ cli/src/serve/mod.rs | 56 +++++++++++++++++++++++++++++++++++++++++ lib/Cargo.toml | 2 -- lib/src/core/read.rs | 4 +-- 9 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 cli/src/serve/lookup.rs create mode 100644 cli/src/serve/mod.rs diff --git a/Cargo.lock b/Cargo.lock index daf0b2ab..ec49e257 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,12 +619,14 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" name = "cli" version = "2.0.0" dependencies = [ + "actix-web", "clap", "console", "indicatif", "odict", "once_cell", "pulldown-cmark", + "serde", ] [[package]] @@ -1822,7 +1824,6 @@ dependencies = [ name = "odict" version = "0.1.0" dependencies = [ - "actix-web", "byteorder", "charabia", "dirs", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 815348f1..bda8feda 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -10,7 +10,6 @@ odict = { path = "../lib", features = [ "charabia", "sql", "search", - "serve", "json", ] } clap = { version = "4.5.4", features = ["derive", "cargo"] } @@ -18,3 +17,5 @@ console = "0.15.8" once_cell = "1.19.0" indicatif = "0.17.8" pulldown-cmark = "0.10.2" +actix-web = "4.5.1" +serde = { version = "1.0.197", features = ["derive"] } diff --git a/cli/src/cli.rs b/cli/src/cli.rs index a1a72207..c96ccdff 100644 --- a/cli/src/cli.rs +++ b/cli/src/cli.rs @@ -3,6 +3,7 @@ use clap::{command, crate_version, Parser, Subcommand}; use crate::alias::AliasCommands; use crate::{ CompileArgs, DumpArgs, IndexArgs, LexiconArgs, LookupArgs, MergeArgs, NewArgs, SearchArgs, + ServeArgs, }; #[derive(Debug, Parser)] @@ -57,4 +58,8 @@ pub enum Commands { /// Run a full-text query on a compiled dictionary #[command(arg_required_else_help = true)] Search(SearchArgs), + + /// Start a local web server to serve one or several dictionaries + #[command(arg_required_else_help = true)] + Serve(ServeArgs), } diff --git a/cli/src/lib.rs b/cli/src/lib.rs index d5a9cfb2..9c3c017e 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -11,6 +11,7 @@ mod merge; mod new; mod print; mod search; +mod serve; mod utils; pub use alias::*; @@ -25,4 +26,5 @@ pub use merge::*; pub use new::*; pub use print::*; pub use search::*; +pub use serve::*; pub use utils::*; diff --git a/cli/src/main.rs b/cli/src/main.rs index 76e70b6d..5297db22 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,10 @@ use std::io::Write; use clap::Parser; -use cli::{alias, compile, lexicon, lookup, merge, search, t, CLIContext, Commands, CLI}; +use cli::{ + alias, compile, dump, index, lexicon, lookup, merge, new, search, serve, t, CLIContext, + Commands, CLI, +}; fn main() { let cli = CLI::parse(); @@ -11,13 +14,14 @@ fn main() { |c| match cli.command { Commands::Alias(ref args) => alias(c, args), Commands::Compile(ref args) => compile(c, args), - Commands::Dump(ref args) => cli::dump(c, args), - Commands::Index(ref args) => cli::index(c, args), + Commands::Dump(ref args) => dump(c, args), + Commands::Index(ref args) => index(c, args), Commands::Lexicon(ref args) => lexicon(c, args), Commands::Lookup(ref args) => lookup(c, args), Commands::Merge(ref args) => merge(c, args), - Commands::New(ref args) => cli::new(c, args), + Commands::New(ref args) => new(c, args), Commands::Search(ref args) => search(c, args), + Commands::Serve(ref args) => serve(c, args), }, &mut ctx, ); diff --git a/cli/src/serve/lookup.rs b/cli/src/serve/lookup.rs new file mode 100644 index 00000000..9a555527 --- /dev/null +++ b/cli/src/serve/lookup.rs @@ -0,0 +1,31 @@ +use std::collections::HashMap; + +use actix_web::{ + get, + web::{Path, Query}, + HttpRequest, HttpResponse, Responder, +}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct LookupRequest { + query: String, + follow: Option, + split: Option, +} + +#[get("/lookup/{name}")] +async fn handle_lookup( + req: HttpRequest, + params: Query, + dict: Path, +) -> impl Responder { + let dictionary_map = req.app_data::>().unwrap(); + + if let Some(dictionary_path) = dictionary_map.get(&dict.into_inner()) { + // let dictionary = + return HttpResponse::Ok().body(format!("{:?}", params)); + } else { + return HttpResponse::NotFound().body("Dictionary not found"); + } +} diff --git a/cli/src/serve/mod.rs b/cli/src/serve/mod.rs new file mode 100644 index 00000000..3d901e9d --- /dev/null +++ b/cli/src/serve/mod.rs @@ -0,0 +1,56 @@ +use std::{collections::HashMap, error::Error, fs::canonicalize, ops::Deref, path::PathBuf}; + +use actix_web::{App, HttpServer}; +use clap::{command, Args}; + +use crate::CLIContext; + +mod lookup; + +#[derive(Debug, Args)] +#[command(args_conflicts_with_subcommands = true)] +#[command(flatten_help = true)] +pub struct ServeArgs { + #[arg(short, default_value_t = 5005, help = "Port to listen on")] + port: u16, + + #[arg(help = "List of dictionary paths or aliases to serve")] + dictionaries: Vec, +} + +#[actix_web::main] +pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box> { + let port = args.port; + let dictionaries = &args.dictionaries; + let mut dictionary_map = HashMap::::new(); + let alias_manager = &ctx.alias_manager; + + for dictionary in dictionaries { + let path = alias_manager.get(&dictionary); + + if let Some(p) = path { + dictionary_map.insert(dictionary.to_owned(), p.to_owned()); + } else { + let pb = PathBuf::from(dictionary); + + if let Some(name) = pb.file_stem().map(|s| s.to_string_lossy().to_string()) { + let p = canonicalize(&pb)?.to_string_lossy().to_string(); + dictionary_map.insert(name, p); + } + } + } + + println!("{:?}", dictionary_map); + ctx.println(format!("\n🟢 Listening on port {}\n", port)); + + HttpServer::new(move || { + App::new() + .app_data(dictionary_map.to_owned()) + .service(lookup::handle_lookup) + }) + .bind(("127.0.0.1", port))? + .run() + .await?; + + Ok(()) +} diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 543ae91e..7a02809c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -9,7 +9,6 @@ sql = ["dep:sea-query"] config = ["dep:dirs"] charabia = ["dep:charabia"] search = ["dep:tantivy", "dep:tantivy-tokenizer-api"] -serve = ["dep:actix-web"] default = ["json", "sql", "config"] @@ -34,7 +33,6 @@ dirs = { version = "5.0.1", optional = true } once_cell = "1.19.0" serde_json = { version = "1.0.115", optional = true } validation = "0.0.1" -actix-web = { version = "4.5.1", optional = true } tantivy = { version = "0.21.1", optional = true } charabia = { version = "0.8.8", optional = true } tantivy-tokenizer-api = { version = "0.2.0", optional = true } diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs index f2274faf..c4df70bc 100644 --- a/lib/src/core/read.rs +++ b/lib/src/core/read.rs @@ -1,6 +1,6 @@ use std::{ error::Error, - fs::File, + fs::{canonicalize, File}, io::{Cursor, Read, Seek}, path::PathBuf, }; @@ -96,7 +96,7 @@ impl DictionaryReader { let mut result = self.read_from_bytes(&buffer)?; - result.path = Some(PathBuf::from(path)); + result.path = Some(canonicalize(PathBuf::from(path))?); Ok(result) } From e6fda2f2c4f552823ccedfda7ffd4bd5060c0efd Mon Sep 17 00:00:00 2001 From: Tyler Nickerson Date: Sun, 14 Apr 2024 18:56:17 -0700 Subject: [PATCH 2/4] idk --- cli/src/serve/mod.rs | 41 +++++++++++++++++++++-------------------- lib/src/core/read.rs | 5 +++-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/cli/src/serve/mod.rs b/cli/src/serve/mod.rs index 3d901e9d..f646975b 100644 --- a/cli/src/serve/mod.rs +++ b/cli/src/serve/mod.rs @@ -1,7 +1,11 @@ -use std::{collections::HashMap, error::Error, fs::canonicalize, ops::Deref, path::PathBuf}; +use std::{ + collections::HashMap, error::Error, fs::canonicalize, ops::Deref, path::PathBuf, rc::Rc, + sync::Arc, +}; use actix_web::{App, HttpServer}; use clap::{command, Args}; +use odict::ArchivedDictionary; use crate::CLIContext; @@ -20,35 +24,32 @@ pub struct ServeArgs { #[actix_web::main] pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box> { - let port = args.port; - let dictionaries = &args.dictionaries; - let mut dictionary_map = HashMap::::new(); - let alias_manager = &ctx.alias_manager; + let ServeArgs { port, dictionaries } = args; + let CLIContext { + alias_manager, + reader, + .. + } = ctx; + + let mut dictionary_map = HashMap::::new(); + + println!("{:?}", dictionary_map); for dictionary in dictionaries { - let path = alias_manager.get(&dictionary); - - if let Some(p) = path { - dictionary_map.insert(dictionary.to_owned(), p.to_owned()); - } else { - let pb = PathBuf::from(dictionary); - - if let Some(name) = pb.file_stem().map(|s| s.to_string_lossy().to_string()) { - let p = canonicalize(&pb)?.to_string_lossy().to_string(); - dictionary_map.insert(name, p); - } - } + let dict = reader.read_from_path_or_alias_with_manager(&dictionary, &alias_manager)?; + dictionary_map.insert(dictionary.to_owned(), dict.to_archive()?); } - println!("{:?}", dictionary_map); ctx.println(format!("\n🟢 Listening on port {}\n", port)); + let r = Arc::new(dictionary_map); + HttpServer::new(move || { App::new() - .app_data(dictionary_map.to_owned()) + .app_data(r.clone()) .service(lookup::handle_lookup) }) - .bind(("127.0.0.1", port))? + .bind(("127.0.0.1", *port))? .run() .await?; diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs index c4df70bc..596a62f9 100644 --- a/lib/src/core/read.rs +++ b/lib/src/core/read.rs @@ -89,14 +89,15 @@ impl DictionaryReader { } pub fn read_from_path(&self, path: &str) -> Result> { - let mut file = File::open(path)?; + let pb = canonicalize(PathBuf::from(path))?; + let mut file = File::open(&pb)?; let mut buffer = Vec::new(); file.read_to_end(&mut buffer)?; let mut result = self.read_from_bytes(&buffer)?; - result.path = Some(canonicalize(PathBuf::from(path))?); + result.path = Some(pb); Ok(result) } From b4dfd69825c8e97b281cd128e6bfa2a96127054d Mon Sep 17 00:00:00 2001 From: Tyler Nickerson Date: Sun, 14 Apr 2024 21:55:02 -0700 Subject: [PATCH 3/4] wip --- Cargo.lock | 10 +++-- cli/Cargo.toml | 2 + cli/src/serve/lookup.rs | 75 +++++++++++++++++++++++++++++++----- cli/src/serve/mod.rs | 60 +++++++++++++++++++++++------ lib/src/core/lookup.rs | 19 ++++++--- lib/src/core/read.rs | 2 +- lib/src/dump/json/entry.rs | 8 +++- lib/src/dump/json/json.rs | 31 ++++++++++++++- lib/src/models/dictionary.rs | 9 ++++- lib/src/models/entry.rs | 2 +- lib/src/search/search.rs | 3 +- 11 files changed, 182 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec49e257..5b7cff8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -622,6 +622,8 @@ dependencies = [ "actix-web", "clap", "console", + "derive_more", + "env_logger", "indicatif", "odict", "once_cell", @@ -939,9 +941,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" +checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" dependencies = [ "anstream", "anstyle", @@ -1654,9 +1656,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "loom" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index bda8feda..c90dfccb 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -19,3 +19,5 @@ indicatif = "0.17.8" pulldown-cmark = "0.10.2" actix-web = "4.5.1" serde = { version = "1.0.197", features = ["derive"] } +env_logger = "0.11.3" +derive_more = "0.99.17" diff --git a/cli/src/serve/lookup.rs b/cli/src/serve/lookup.rs index 9a555527..878858f0 100644 --- a/cli/src/serve/lookup.rs +++ b/cli/src/serve/lookup.rs @@ -2,16 +2,46 @@ use std::collections::HashMap; use actix_web::{ get, - web::{Path, Query}, - HttpRequest, HttpResponse, Responder, + http::{header::ContentType, StatusCode}, + web::{Data, Path, Query}, + HttpRequest, HttpResponse, Responder, ResponseError, }; +use derive_more::{Display, Error}; +use odict::{DictionaryFile, LookupOptions, ToJSON}; use serde::Deserialize; #[derive(Debug, Deserialize)] pub struct LookupRequest { - query: String, + queries: Vec, follow: Option, - split: Option, + split: Option, +} + +#[derive(Debug, Display, Error)] +enum LookupError { + #[display(fmt = "Dictionary not found: {}", name)] + DictionaryNotFound { name: String }, + + #[display(fmt = "Failed to read dictionary: {}", name)] + DictionaryReadError { name: String }, + + #[display(fmt = "Lookup error: {}", message)] + LookupError { message: String }, +} + +impl ResponseError for LookupError { + fn error_response(&self) -> HttpResponse { + HttpResponse::build(self.status_code()) + .insert_header(ContentType::html()) + .body(self.to_string()) + } + + fn status_code(&self) -> StatusCode { + match *self { + LookupError::DictionaryNotFound { .. } => StatusCode::NOT_FOUND, + LookupError::DictionaryReadError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + } + } } #[get("/lookup/{name}")] @@ -19,13 +49,38 @@ async fn handle_lookup( req: HttpRequest, params: Query, dict: Path, -) -> impl Responder { - let dictionary_map = req.app_data::>().unwrap(); + dictionary_map: Data>, +) -> Result { + let LookupRequest { + queries, + follow, + split, + } = params.0; + + let dictionary_name = &dict.into_inner(); + + if let Some(file) = dictionary_map.get(dictionary_name) { + let dictionary = file + .to_archive() + .map_err(|_e| LookupError::DictionaryReadError { + name: dictionary_name.to_string(), + })?; + + let entries = dictionary + .lookup( + &queries, + LookupOptions::default() + .follow(follow.unwrap_or(false)) + .split(split.unwrap_or(0)), + ) + .map_err(|e| LookupError::LookupError { + message: e.to_string(), + })?; + + entries.to_json(true); - if let Some(dictionary_path) = dictionary_map.get(&dict.into_inner()) { - // let dictionary = - return HttpResponse::Ok().body(format!("{:?}", params)); + return Ok(HttpResponse::Ok().body(format!("{:?}", params))); } else { - return HttpResponse::NotFound().body("Dictionary not found"); + return Ok(HttpResponse::NotFound().body("Dictionary not found")); } } diff --git a/cli/src/serve/mod.rs b/cli/src/serve/mod.rs index f646975b..21c91657 100644 --- a/cli/src/serve/mod.rs +++ b/cli/src/serve/mod.rs @@ -1,16 +1,39 @@ use std::{ - collections::HashMap, error::Error, fs::canonicalize, ops::Deref, path::PathBuf, rc::Rc, - sync::Arc, + collections::HashMap, + error::Error, + fmt::{self, Display, Formatter}, }; -use actix_web::{App, HttpServer}; -use clap::{command, Args}; -use odict::ArchivedDictionary; +use actix_web::{middleware::Logger, web::Data, App, HttpServer}; +use clap::{command, Args, ValueEnum}; +use env_logger::Env; +use odict::DictionaryFile; use crate::CLIContext; mod lookup; +#[derive(Debug, Clone, ValueEnum)] +enum LogLevel { + Trace, + Debug, + Info, + Warn, + Error, +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + LogLevel::Trace => write!(f, "trace"), + LogLevel::Debug => write!(f, "debug"), + LogLevel::Info => write!(f, "info"), + LogLevel::Warn => write!(f, "warn"), + LogLevel::Error => write!(f, "error"), + } + } +} + #[derive(Debug, Args)] #[command(args_conflicts_with_subcommands = true)] #[command(flatten_help = true)] @@ -18,13 +41,22 @@ pub struct ServeArgs { #[arg(short, default_value_t = 5005, help = "Port to listen on")] port: u16, - #[arg(help = "List of dictionary paths or aliases to serve")] + // Log level + #[arg(short, long)] + level: Option, + + // List of dictionary paths or aliases to serve + #[arg()] dictionaries: Vec, } #[actix_web::main] pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box> { - let ServeArgs { port, dictionaries } = args; + let ServeArgs { + port, + dictionaries, + level, + } = args; let CLIContext { alias_manager, @@ -32,21 +64,25 @@ pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box::new(); + let mut dictionary_map = HashMap::::new(); - println!("{:?}", dictionary_map); for dictionary in dictionaries { let dict = reader.read_from_path_or_alias_with_manager(&dictionary, &alias_manager)?; - dictionary_map.insert(dictionary.to_owned(), dict.to_archive()?); + dictionary_map.insert(dictionary.to_owned(), dict); } ctx.println(format!("\n🟢 Listening on port {}\n", port)); - let r = Arc::new(dictionary_map); + env_logger::init_from_env( + Env::new().filter(format!("{}", level.as_ref().unwrap_or(&LogLevel::Info))), + ); + + let data = Data::new(dictionary_map); HttpServer::new(move || { App::new() - .app_data(r.clone()) + .app_data(Data::clone(&data)) + .wrap(Logger::default()) .service(lookup::handle_lookup) }) .bind(("127.0.0.1", *port))? diff --git a/lib/src/core/lookup.rs b/lib/src/core/lookup.rs index 23d13261..a0abe8d8 100644 --- a/lib/src/core/lookup.rs +++ b/lib/src/core/lookup.rs @@ -4,6 +4,7 @@ use crate::{ArchivedDictionary, ArchivedEntry, Dictionary, Entry, SplitOptions}; use once_cell::sync::Lazy; use rayon::prelude::*; +use std::marker::{Send, Sync}; use regex::Regex; @@ -18,6 +19,12 @@ pub struct LookupOptions { split: usize, } +impl AsRef for LookupOptions { + fn as_ref(&self) -> &Self { + self + } +} + impl LookupOptions { pub fn default() -> Self { Self { @@ -71,15 +78,15 @@ fn parse_query(query: &str) -> LookupQuery { macro_rules! lookup { ($tys:ident, $ret:ident) => { impl $tys { - fn lookup_( + fn lookup_ + Send + Sync>( &self, query: &LookupQuery, - options: &LookupOptions, + options: Options, ) -> Result, Box> { let mut entries: Vec<&$ret> = Vec::new(); let LookupQuery { term, fallback } = query; - let LookupOptions { follow, split } = options; + let LookupOptions { follow, split } = options.as_ref(); let mut found = self.entries.get(term.as_str()); @@ -111,14 +118,14 @@ macro_rules! lookup { Ok(entries) } - pub fn lookup( + pub fn lookup + Send + Sync>( &self, queries: &Vec, - options: &LookupOptions, + options: Options, ) -> Result>, Box> { queries .par_iter() - .map(|query| self.lookup_(&parse_query(query), options)) + .map(|query| self.lookup_(&parse_query(query), &options)) .collect() } } diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs index 596a62f9..a2427b4d 100644 --- a/lib/src/core/read.rs +++ b/lib/src/core/read.rs @@ -29,7 +29,7 @@ impl DictionaryFile { } pub fn to_dictionary(&self) -> Result> { - let dict: Dictionary = self.to_archive()?.deserialize(&mut Infallible)?; + let dict: Dictionary = self.to_archive()?.to_dictionary()?; Ok(dict) } } diff --git a/lib/src/dump/json/entry.rs b/lib/src/dump/json/entry.rs index 19396a6d..72579e60 100644 --- a/lib/src/dump/json/entry.rs +++ b/lib/src/dump/json/entry.rs @@ -1,4 +1,4 @@ -use crate::Entry; +use crate::{ArchivedEntry, Entry}; use serde::Serialize; @@ -33,3 +33,9 @@ impl From for EntryJSON { } } } + +impl From<&ArchivedEntry> for EntryJSON { + fn from(entry: &ArchivedEntry) -> Self { + EntryJSON::from(entry.to_entry().unwrap()) + } +} diff --git a/lib/src/dump/json/json.rs b/lib/src/dump/json/json.rs index dae5215e..21af1311 100644 --- a/lib/src/dump/json/json.rs +++ b/lib/src/dump/json/json.rs @@ -6,7 +6,7 @@ use serde_json::{to_string, to_string_pretty}; use super::dictionary::DictionaryJSON; use super::entry::EntryJSON; -use crate::{Dictionary, Entry}; +use crate::{ArchivedEntry, Dictionary, Entry}; pub struct JSONSerializer {} @@ -59,3 +59,32 @@ impl ToJSON for Vec> { stringify(&json, pretty) } } + +impl ToJSON for &ArchivedEntry { + fn to_json(self, pretty: bool) -> Result> { + let json = EntryJSON::from(self.to_entry().unwrap()); + stringify(&json, pretty) + } +} + +impl ToJSON for Vec<&ArchivedEntry> { + fn to_json(self, pretty: bool) -> Result> { + let json = self + .into_iter() + .map(|v| EntryJSON::from(v)) + .collect::>(); + + stringify(&json, pretty) + } +} + +impl ToJSON for Vec> { + fn to_json(self, pretty: bool) -> Result> { + let json = self + .into_iter() + .map(|v| v.into_iter().map(|v| EntryJSON::from(v)).collect()) + .collect::>>(); + + stringify(&json, pretty) + } +} diff --git a/lib/src/models/dictionary.rs b/lib/src/models/dictionary.rs index dd756308..0b8101cb 100644 --- a/lib/src/models/dictionary.rs +++ b/lib/src/models/dictionary.rs @@ -1,5 +1,5 @@ use quick_xml::de::from_str; -use rkyv::to_bytes; +use rkyv::{to_bytes, Deserialize, Infallible}; use std::{collections::HashMap, error::Error}; use crate::serializable; @@ -69,3 +69,10 @@ impl From<&str> for Dictionary { from_str(xml).unwrap() } } + +impl ArchivedDictionary { + pub fn to_dictionary(&self) -> Result> { + let dict: Dictionary = self.deserialize(&mut Infallible)?; + Ok(dict) + } +} diff --git a/lib/src/models/entry.rs b/lib/src/models/entry.rs index 0ac9689b..063856aa 100644 --- a/lib/src/models/entry.rs +++ b/lib/src/models/entry.rs @@ -1,6 +1,6 @@ use std::error::Error; -use rkyv::{to_bytes, Deserialize, Infallible}; +use rkyv::{archived_root, to_bytes, Deserialize, Infallible}; use crate::{serializable, Etymology}; diff --git a/lib/src/search/search.rs b/lib/src/search/search.rs index 2d4eb250..238f2af4 100644 --- a/lib/src/search/search.rs +++ b/lib/src/search/search.rs @@ -98,9 +98,8 @@ impl Dictionary { .unwrap(); let archive = unsafe { archived_root::(&bytes[..]) }; - let entry: Entry = archive.deserialize(&mut Infallible).unwrap(); - entry + archive.to_entry().unwrap() }) .collect(); From 15483e85119a63a087deb82f758fc6108909327a Mon Sep 17 00:00:00 2001 From: Tyler Nickerson Date: Sun, 14 Apr 2024 23:18:27 -0700 Subject: [PATCH 4/4] wip --- cli/src/context.rs | 8 ++- cli/src/serve/lookup.rs | 77 +++++++++++++++----------- cli/src/serve/mod.rs | 62 ++++++++++++++++----- cli/src/serve/search.rs | 106 ++++++++++++++++++++++++++++++++++++ lib/src/core/lookup.rs | 2 + lib/src/core/read.rs | 2 +- lib/src/models/entry.rs | 2 +- lib/src/search/index.rs | 92 ++++++++++++++++++------------- lib/src/search/search.rs | 113 +++++++++++++++++++++++---------------- 9 files changed, 335 insertions(+), 129 deletions(-) create mode 100644 cli/src/serve/search.rs diff --git a/cli/src/context.rs b/cli/src/context.rs index 2580e747..5ec59357 100644 --- a/cli/src/context.rs +++ b/cli/src/context.rs @@ -26,10 +26,14 @@ impl<'a> CLIContext<'a> { } } - pub fn println(&mut self, msg: String) { + pub fn println(&mut self, msg: S) + where + S: AsRef, + { self.stdout - .write_all(format!("{}\n", msg).as_bytes()) + .write_all(format!("{}\n", msg.as_ref()).as_bytes()) .unwrap(); + self.stdout.flush().unwrap(); } } diff --git a/cli/src/serve/lookup.rs b/cli/src/serve/lookup.rs index 878858f0..89765ab9 100644 --- a/cli/src/serve/lookup.rs +++ b/cli/src/serve/lookup.rs @@ -4,7 +4,7 @@ use actix_web::{ get, http::{header::ContentType, StatusCode}, web::{Data, Path, Query}, - HttpRequest, HttpResponse, Responder, ResponseError, + HttpResponse, Responder, ResponseError, }; use derive_more::{Display, Error}; use odict::{DictionaryFile, LookupOptions, ToJSON}; @@ -12,7 +12,7 @@ use serde::Deserialize; #[derive(Debug, Deserialize)] pub struct LookupRequest { - queries: Vec, + queries: String, follow: Option, split: Option, } @@ -27,6 +27,9 @@ enum LookupError { #[display(fmt = "Lookup error: {}", message)] LookupError { message: String }, + + #[display(fmt = "Failed to serialize response")] + SerializeError, } impl ResponseError for LookupError { @@ -40,47 +43,59 @@ impl ResponseError for LookupError { match *self { LookupError::DictionaryNotFound { .. } => StatusCode::NOT_FOUND, LookupError::DictionaryReadError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + LookupError::LookupError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + LookupError::SerializeError => StatusCode::INTERNAL_SERVER_ERROR, } } } -#[get("/lookup/{name}")] +#[get("/{name}/lookup")] async fn handle_lookup( - req: HttpRequest, params: Query, dict: Path, dictionary_map: Data>, ) -> Result { let LookupRequest { - queries, + queries: raw_queries, follow, split, } = params.0; - let dictionary_name = &dict.into_inner(); - - if let Some(file) = dictionary_map.get(dictionary_name) { - let dictionary = file - .to_archive() - .map_err(|_e| LookupError::DictionaryReadError { - name: dictionary_name.to_string(), - })?; - - let entries = dictionary - .lookup( - &queries, - LookupOptions::default() - .follow(follow.unwrap_or(false)) - .split(split.unwrap_or(0)), - ) - .map_err(|e| LookupError::LookupError { - message: e.to_string(), - })?; - - entries.to_json(true); - - return Ok(HttpResponse::Ok().body(format!("{:?}", params))); - } else { - return Ok(HttpResponse::NotFound().body("Dictionary not found")); - } + let queries = raw_queries + .split(',') + .map(|s| s.to_string()) + .collect::>(); + + let dictionary_name = dict.into_inner(); + + let file = dictionary_map + .get(&dictionary_name) + .ok_or(LookupError::DictionaryNotFound { + name: dictionary_name.to_string(), + })?; + + let dictionary = file + .to_archive() + .map_err(|_e| LookupError::DictionaryReadError { + name: dictionary_name.to_string(), + })?; + + let entries = dictionary + .lookup( + &queries, + LookupOptions::default() + .follow(follow.unwrap_or(false)) + .split(split.unwrap_or(0)), + ) + .map_err(|e| LookupError::LookupError { + message: e.to_string(), + })?; + + let json = entries + .to_json(true) + .map_err(|_e| LookupError::SerializeError)?; + + Ok(HttpResponse::Ok() + .content_type("application/json") + .body(json)) } diff --git a/cli/src/serve/mod.rs b/cli/src/serve/mod.rs index 21c91657..6f5ddcc0 100644 --- a/cli/src/serve/mod.rs +++ b/cli/src/serve/mod.rs @@ -2,16 +2,19 @@ use std::{ collections::HashMap, error::Error, fmt::{self, Display, Formatter}, + path::PathBuf, }; use actix_web::{middleware::Logger, web::Data, App, HttpServer}; use clap::{command, Args, ValueEnum}; +use console::style; use env_logger::Env; -use odict::DictionaryFile; +use odict::{config::AliasManager, DictionaryFile, DictionaryReader}; use crate::CLIContext; mod lookup; +mod search; #[derive(Debug, Clone, ValueEnum)] enum LogLevel { @@ -41,7 +44,7 @@ pub struct ServeArgs { #[arg(short, default_value_t = 5005, help = "Port to listen on")] port: u16, - // Log level + // Sets the default log level #[arg(short, long)] level: Option, @@ -50,6 +53,29 @@ pub struct ServeArgs { dictionaries: Vec, } +pub(self) fn get_dictionary_map( + reader: &DictionaryReader, + alias_manager: &AliasManager, + dictionaries: &Vec, +) -> Result, Box> { + let mut dictionary_map = HashMap::::new(); + + for dictionary in dictionaries { + let dict = reader.read_from_path_or_alias_with_manager(&dictionary, &alias_manager)?; + + dictionary_map.insert( + PathBuf::from(dictionary) + .file_stem() + .unwrap() + .to_string_lossy() + .to_string(), + dict, + ); + } + + Ok(dictionary_map) +} + #[actix_web::main] pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box> { let ServeArgs { @@ -64,26 +90,38 @@ pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box::new(); - - for dictionary in dictionaries { - let dict = reader.read_from_path_or_alias_with_manager(&dictionary, &alias_manager)?; - dictionary_map.insert(dictionary.to_owned(), dict); + let dictionary_map = get_dictionary_map(reader, alias_manager, &dictionaries)?; + let log_level = format!("{}", level.as_ref().unwrap_or(&LogLevel::Info)); + + ctx.println(format!( + "\n🟢 Serving the following dictionaries on port {} with log level \"{}\":\n", + port, log_level + )); + + for (name, dict) in &dictionary_map { + ctx.println(format!( + " • {} {}", + style(name).bold(), + style(format!( + "({})", + dict.path.as_ref().unwrap().to_string_lossy() + )) + .dim() + )); } - ctx.println(format!("\n🟢 Listening on port {}\n", port)); + ctx.println(""); - env_logger::init_from_env( - Env::new().filter(format!("{}", level.as_ref().unwrap_or(&LogLevel::Info))), - ); + env_logger::init_from_env(Env::new().default_filter_or(log_level)); let data = Data::new(dictionary_map); HttpServer::new(move || { App::new() - .app_data(Data::clone(&data)) .wrap(Logger::default()) + .app_data(Data::clone(&data)) .service(lookup::handle_lookup) + .service(search::handle_search) }) .bind(("127.0.0.1", *port))? .run() diff --git a/cli/src/serve/search.rs b/cli/src/serve/search.rs new file mode 100644 index 00000000..37fcfa11 --- /dev/null +++ b/cli/src/serve/search.rs @@ -0,0 +1,106 @@ +use std::collections::HashMap; + +use actix_web::{ + get, + http::{header::ContentType, StatusCode}, + web::{Data, Path, Query}, + HttpResponse, Responder, ResponseError, +}; +use derive_more::{Display, Error}; +use odict::{search::SearchOptions, DictionaryFile, ToJSON}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct SearchRequest { + query: String, + limit: Option, +} + +#[derive(Debug, Display, Error)] +enum SearchError { + #[display(fmt = "Dictionary not found: {}", name)] + DictionaryNotFound { name: String }, + + #[display(fmt = "Failed to read dictionary: {}", name)] + DictionaryReadError { name: String }, + + #[display(fmt = "Search error: {}", message)] + SearchError { message: String }, + + #[display(fmt = "Failed to serialize response")] + SerializeError, +} + +impl ResponseError for SearchError { + fn error_response(&self) -> HttpResponse { + HttpResponse::build(self.status_code()) + .insert_header(ContentType::html()) + .body(self.to_string()) + } + + fn status_code(&self) -> StatusCode { + match *self { + SearchError::DictionaryNotFound { .. } => StatusCode::NOT_FOUND, + SearchError::DictionaryReadError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + SearchError::SearchError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + SearchError::SerializeError => StatusCode::INTERNAL_SERVER_ERROR, + } + } +} + +#[get("/{name}/search")] +async fn handle_search( + params: Query, + dict: Path, + dictionary_map: Data>, +) -> Result { + let SearchRequest { query, limit } = params.0; + + let dictionary_name = dict.into_inner(); + + let file = dictionary_map + .get(&dictionary_name) + .ok_or(SearchError::DictionaryNotFound { + name: dictionary_name.to_string(), + })?; + + let dictionary = file + .to_archive() + .map_err(|_e| SearchError::DictionaryReadError { + name: dictionary_name.to_string(), + })?; + + let entries = dictionary + .search(&query, SearchOptions::default().limit(limit.unwrap_or(10))) + .map_err(|e| SearchError::SearchError { + message: e.to_string(), + })?; + + let json = entries + .to_json(true) + .map_err(|_e| SearchError::SerializeError)?; + + Ok(HttpResponse::Ok() + .content_type("application/json") + .body(json)) +} + +// #[cfg(test)] +// mod tests { +// use actix_web::{http::header::ContentType, test, App}; + +// use crate::serve::get_dictionary_map; + +// use super::handle_search; + +// #[actix_web::test] +// async fn test_index_get() { +// let app = test::init_service(App::new().service(handle_search)).await; +// let req = test::TestRequest::default() +// // get_dictionary_map(reader, alias_manager, dictionaries) +// .insert_header(ContentType::plaintext()) +// .to_request(); +// let resp = test::call_service(&app, req).await; +// assert!(resp.status().is_success()); +// } +// } diff --git a/lib/src/core/lookup.rs b/lib/src/core/lookup.rs index a0abe8d8..3689fddb 100644 --- a/lib/src/core/lookup.rs +++ b/lib/src/core/lookup.rs @@ -106,6 +106,8 @@ macro_rules! lookup { }, options, ); + } else { + entries.push(entry); } } else if *split > 0 { let split = self.split(term, &SplitOptions::default().threshold(*split))?; diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs index a2427b4d..defc73a9 100644 --- a/lib/src/core/read.rs +++ b/lib/src/core/read.rs @@ -6,7 +6,7 @@ use std::{ }; use byteorder::{LittleEndian, ReadBytesExt}; -use rkyv::{archived_root, Deserialize, Infallible}; +use rkyv::archived_root; use super::constants::FILE_VERSION; use crate::{lz4::decompress, ArchivedDictionary, Dictionary}; diff --git a/lib/src/models/entry.rs b/lib/src/models/entry.rs index 063856aa..0ac9689b 100644 --- a/lib/src/models/entry.rs +++ b/lib/src/models/entry.rs @@ -1,6 +1,6 @@ use std::error::Error; -use rkyv::{archived_root, to_bytes, Deserialize, Infallible}; +use rkyv::{to_bytes, Deserialize, Infallible}; use crate::{serializable, Etymology}; diff --git a/lib/src/search/index.rs b/lib/src/search/index.rs index e0cb7e3b..0b10ebe1 100644 --- a/lib/src/search/index.rs +++ b/lib/src/search/index.rs @@ -1,10 +1,9 @@ use std::{error::Error, ffi::OsStr, fs::create_dir_all, fs::remove_dir_all, path::PathBuf}; +use tantivy::{doc, tokenizer::TextAnalyzer, Index}; -use tantivy::tokenizer::TextAnalyzer; -use tantivy::{doc, Index}; - -use crate::config::get_config_dir; -use crate::{Dictionary, PreviewOptions}; +use crate::{ + config::get_config_dir, ArchivedDictionary, ArchivedEntry, Dictionary, PreviewOptions, +}; use super::constants::{CUSTOM_TOKENIZER, DEFAULT_TOKENIZER}; use super::schema::{FIELD_BUFFER, FIELD_DEFINITIONS, FIELD_TERM, SCHEMA}; @@ -70,46 +69,65 @@ impl AsRef for IndexOptions { } } -impl Dictionary { - pub fn index>( - &self, - options: Options, - ) -> Result<(), Box> { - let opts = options.as_ref(); - let index_path = opts.dir.join(self.id.as_str()); +macro_rules! index { + ($name:ident) => { + impl $name { + pub fn index>( + &self, + options: Options, + ) -> Result<(), Box> { + let opts = options.as_ref(); + let index_path = opts.dir.join(self.id.as_str()); + + if opts.overwrite && index_path.exists() { + remove_dir_all(&index_path)?; + } - if opts.overwrite && index_path.exists() { - remove_dir_all(&index_path)?; - } + if !index_path.exists() { + create_dir_all(&index_path)?; + } - if !index_path.exists() { - create_dir_all(&index_path)?; - } + let index = Index::create_in_dir(&index_path, SCHEMA.to_owned())?; - let index = Index::create_in_dir(&index_path, SCHEMA.to_owned())?; + index + .tokenizers() + .register(CUSTOM_TOKENIZER, opts.tokenizer.clone()); - index - .tokenizers() - .register(CUSTOM_TOKENIZER, opts.tokenizer.clone()); + let mut index_writer = index.writer(opts.memory)?; - let mut index_writer = index.writer(opts.memory)?; + self.entries.values().enumerate().for_each(|(i, entry)| { + let document = doc!( + *FIELD_TERM => entry.term.as_str(), + *FIELD_DEFINITIONS => entry.preview(PreviewOptions::default()), + *FIELD_BUFFER => entry.serialize().unwrap() + ); - self.entries.values().enumerate().for_each(|(i, entry)| { - let document = doc!( - *FIELD_TERM => entry.term.as_str(), - *FIELD_DEFINITIONS => entry.preview(PreviewOptions::default()), - *FIELD_BUFFER => entry.serialize().unwrap() - ); + if index_writer.add_document(document).is_ok() { + let cb = options.as_ref().cb_on_item.as_ref(); + cb(i, entry.term.as_str()); + } + }); - if index_writer.add_document(document).is_ok() { - let cb = opts.cb_on_item.as_ref(); - cb(i, entry.term.as_str()); - } - }); + index_writer.commit()?; + index_writer.wait_merging_threads()?; - index_writer.commit()?; - index_writer.wait_merging_threads()?; + Ok(()) + } + } + }; +} + +// Workaround - Ok(()) +trait SerializeEntry { + fn serialize(&self) -> Result, Box>; +} + +impl SerializeEntry for &ArchivedEntry { + fn serialize(&self) -> Result, Box> { + self.to_entry().unwrap().serialize() } } + +index!(Dictionary); +index!(ArchivedDictionary); diff --git a/lib/src/search/search.rs b/lib/src/search/search.rs index 238f2af4..f16ef77e 100644 --- a/lib/src/search/search.rs +++ b/lib/src/search/search.rs @@ -1,14 +1,15 @@ use std::{error::Error, ffi::OsStr, path::PathBuf}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; -use rkyv::{archived_root, Deserialize, Infallible}; +use rkyv::archived_root; use tantivy::{ collector::TopDocs, query::QueryParser, tokenizer::TextAnalyzer, Index, ReloadPolicy, }; -use crate::{Dictionary, Entry}; +use crate::{ArchivedDictionary, Dictionary, Entry}; use super::constants::{CUSTOM_TOKENIZER, DEFAULT_TOKENIZER}; +use super::index::IndexOptions; use super::{ get_default_index_dir, @@ -18,6 +19,7 @@ use super::{ pub struct SearchOptions { pub dir: PathBuf, pub threshold: u32, + pub autoindex: bool, pub limit: usize, pub tokenizer: TextAnalyzer, } @@ -27,6 +29,7 @@ impl SearchOptions { Self { dir: get_default_index_dir(), threshold: 1, + autoindex: false, limit: 10, tokenizer: DEFAULT_TOKENIZER.to_owned(), } @@ -37,6 +40,11 @@ impl SearchOptions { self } + pub fn autoindex(mut self, autoindex: bool) -> Self { + self.autoindex = autoindex; + self + } + pub fn tokenizer(mut self, tokenizer: T) -> Self where TextAnalyzer: From, @@ -62,47 +70,62 @@ impl AsRef for SearchOptions { } } -impl Dictionary { - pub fn search>( - &self, - query: &str, - options: Options, - ) -> Result, Box> { - let opts = options.as_ref(); - let index_path = opts.dir.join(self.id.as_str()); - let index = Index::open_in_dir(&index_path)?; - - index - .tokenizers() - .register(CUSTOM_TOKENIZER, opts.tokenizer.to_owned()); - - let reader = index - .reader_builder() - .reload_policy(ReloadPolicy::OnCommit) - .try_into()?; - - let searcher = reader.searcher(); - let query_parser = QueryParser::for_index(&index, vec![*FIELD_TERM, *FIELD_DEFINITIONS]); - let query_obj = query_parser.parse_query(query)?; - let top_docs = searcher.search(&query_obj, &TopDocs::with_limit(opts.limit))?; - let entries = top_docs - .par_iter() - .filter(|(score, _)| score >= &(opts.threshold as f32)) - .map(|(_, doc_address)| -> Entry { - let retrieved_doc = searcher.doc(*doc_address).unwrap(); - - let bytes = retrieved_doc - .get_first(*FIELD_BUFFER) - .unwrap() - .as_bytes() - .unwrap(); - - let archive = unsafe { archived_root::(&bytes[..]) }; - - archive.to_entry().unwrap() - }) - .collect(); - - Ok(entries) - } +macro_rules! search { + ($name:ident) => { + impl $name { + pub fn search>( + &self, + query: &str, + options: Options, + ) -> Result, Box> { + let opts = options.as_ref(); + let index_path = opts.dir.join(self.id.as_str()); + + if opts.autoindex { + if !index_path.exists() { + self.index(IndexOptions::default().tokenizer(opts.tokenizer.clone()))?; + } + } + + let index = Index::open_in_dir(&index_path)?; + + index + .tokenizers() + .register(CUSTOM_TOKENIZER, opts.tokenizer.to_owned()); + + let reader = index + .reader_builder() + .reload_policy(ReloadPolicy::OnCommit) + .try_into()?; + + let searcher = reader.searcher(); + let query_parser = + QueryParser::for_index(&index, vec![*FIELD_TERM, *FIELD_DEFINITIONS]); + let query_obj = query_parser.parse_query(query)?; + let top_docs = searcher.search(&query_obj, &TopDocs::with_limit(opts.limit))?; + let entries = top_docs + .par_iter() + .filter(|(score, _)| score >= &(opts.threshold as f32)) + .map(|(_, doc_address)| -> Entry { + let retrieved_doc = searcher.doc(*doc_address).unwrap(); + + let bytes = retrieved_doc + .get_first(*FIELD_BUFFER) + .unwrap() + .as_bytes() + .unwrap(); + + let archive = unsafe { archived_root::(&bytes[..]) }; + + archive.to_entry().unwrap() + }) + .collect(); + + Ok(entries) + } + } + }; } + +search!(Dictionary); +search!(ArchivedDictionary);