diff --git a/Cargo.lock b/Cargo.lock index daf0b2ab..5b7cff8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,12 +619,16 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" name = "cli" version = "2.0.0" dependencies = [ + "actix-web", "clap", "console", + "derive_more", + "env_logger", "indicatif", "odict", "once_cell", "pulldown-cmark", + "serde", ] [[package]] @@ -937,9 +941,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" +checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" dependencies = [ "anstream", "anstyle", @@ -1652,9 +1656,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "loom" @@ -1822,7 +1826,6 @@ dependencies = [ name = "odict" version = "0.1.0" dependencies = [ - "actix-web", "byteorder", "charabia", "dirs", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 815348f1..c90dfccb 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -10,7 +10,6 @@ odict = { path = "../lib", features = [ "charabia", "sql", "search", - "serve", "json", ] } clap = { version = "4.5.4", features = ["derive", "cargo"] } @@ -18,3 +17,7 @@ console = "0.15.8" once_cell = "1.19.0" indicatif = "0.17.8" pulldown-cmark = "0.10.2" +actix-web = "4.5.1" +serde = { version = "1.0.197", features = ["derive"] } +env_logger = "0.11.3" +derive_more = "0.99.17" diff --git a/cli/src/cli.rs b/cli/src/cli.rs index a1a72207..c96ccdff 100644 --- a/cli/src/cli.rs +++ b/cli/src/cli.rs @@ -3,6 +3,7 @@ use clap::{command, crate_version, Parser, Subcommand}; use crate::alias::AliasCommands; use crate::{ CompileArgs, DumpArgs, IndexArgs, LexiconArgs, LookupArgs, MergeArgs, NewArgs, SearchArgs, + ServeArgs, }; #[derive(Debug, Parser)] @@ -57,4 +58,8 @@ pub enum Commands { /// Run a full-text query on a compiled dictionary #[command(arg_required_else_help = true)] Search(SearchArgs), + + /// Start a local web server to serve one or several dictionaries + #[command(arg_required_else_help = true)] + Serve(ServeArgs), } diff --git a/cli/src/context.rs b/cli/src/context.rs index 2580e747..5ec59357 100644 --- a/cli/src/context.rs +++ b/cli/src/context.rs @@ -26,10 +26,14 @@ impl<'a> CLIContext<'a> { } } - pub fn println(&mut self, msg: String) { + pub fn println(&mut self, msg: S) + where + S: AsRef, + { self.stdout - .write_all(format!("{}\n", msg).as_bytes()) + .write_all(format!("{}\n", msg.as_ref()).as_bytes()) .unwrap(); + self.stdout.flush().unwrap(); } } diff --git a/cli/src/lib.rs b/cli/src/lib.rs index d5a9cfb2..9c3c017e 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -11,6 +11,7 @@ mod merge; mod new; mod print; mod search; +mod serve; mod utils; pub use alias::*; @@ -25,4 +26,5 @@ pub use merge::*; pub use new::*; pub use print::*; pub use search::*; +pub use serve::*; pub use utils::*; diff --git a/cli/src/main.rs b/cli/src/main.rs index 76e70b6d..5297db22 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,10 @@ use std::io::Write; use clap::Parser; -use cli::{alias, compile, lexicon, lookup, merge, search, t, CLIContext, Commands, CLI}; +use cli::{ + alias, compile, dump, index, lexicon, lookup, merge, new, search, serve, t, CLIContext, + Commands, CLI, +}; fn main() { let cli = CLI::parse(); @@ -11,13 +14,14 @@ fn main() { |c| match cli.command { Commands::Alias(ref args) => alias(c, args), Commands::Compile(ref args) => compile(c, args), - Commands::Dump(ref args) => cli::dump(c, args), - Commands::Index(ref args) => cli::index(c, args), + Commands::Dump(ref args) => dump(c, args), + Commands::Index(ref args) => index(c, args), Commands::Lexicon(ref args) => lexicon(c, args), Commands::Lookup(ref args) => lookup(c, args), Commands::Merge(ref args) => merge(c, args), - Commands::New(ref args) => cli::new(c, args), + Commands::New(ref args) => new(c, args), Commands::Search(ref args) => search(c, args), + Commands::Serve(ref args) => serve(c, args), }, &mut ctx, ); diff --git a/cli/src/serve/lookup.rs b/cli/src/serve/lookup.rs new file mode 100644 index 00000000..89765ab9 --- /dev/null +++ b/cli/src/serve/lookup.rs @@ -0,0 +1,101 @@ +use std::collections::HashMap; + +use actix_web::{ + get, + http::{header::ContentType, StatusCode}, + web::{Data, Path, Query}, + HttpResponse, Responder, ResponseError, +}; +use derive_more::{Display, Error}; +use odict::{DictionaryFile, LookupOptions, ToJSON}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct LookupRequest { + queries: String, + follow: Option, + split: Option, +} + +#[derive(Debug, Display, Error)] +enum LookupError { + #[display(fmt = "Dictionary not found: {}", name)] + DictionaryNotFound { name: String }, + + #[display(fmt = "Failed to read dictionary: {}", name)] + DictionaryReadError { name: String }, + + #[display(fmt = "Lookup error: {}", message)] + LookupError { message: String }, + + #[display(fmt = "Failed to serialize response")] + SerializeError, +} + +impl ResponseError for LookupError { + fn error_response(&self) -> HttpResponse { + HttpResponse::build(self.status_code()) + .insert_header(ContentType::html()) + .body(self.to_string()) + } + + fn status_code(&self) -> StatusCode { + match *self { + LookupError::DictionaryNotFound { .. } => StatusCode::NOT_FOUND, + LookupError::DictionaryReadError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + LookupError::LookupError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + LookupError::SerializeError => StatusCode::INTERNAL_SERVER_ERROR, + } + } +} + +#[get("/{name}/lookup")] +async fn handle_lookup( + params: Query, + dict: Path, + dictionary_map: Data>, +) -> Result { + let LookupRequest { + queries: raw_queries, + follow, + split, + } = params.0; + + let queries = raw_queries + .split(',') + .map(|s| s.to_string()) + .collect::>(); + + let dictionary_name = dict.into_inner(); + + let file = dictionary_map + .get(&dictionary_name) + .ok_or(LookupError::DictionaryNotFound { + name: dictionary_name.to_string(), + })?; + + let dictionary = file + .to_archive() + .map_err(|_e| LookupError::DictionaryReadError { + name: dictionary_name.to_string(), + })?; + + let entries = dictionary + .lookup( + &queries, + LookupOptions::default() + .follow(follow.unwrap_or(false)) + .split(split.unwrap_or(0)), + ) + .map_err(|e| LookupError::LookupError { + message: e.to_string(), + })?; + + let json = entries + .to_json(true) + .map_err(|_e| LookupError::SerializeError)?; + + Ok(HttpResponse::Ok() + .content_type("application/json") + .body(json)) +} diff --git a/cli/src/serve/mod.rs b/cli/src/serve/mod.rs new file mode 100644 index 00000000..6f5ddcc0 --- /dev/null +++ b/cli/src/serve/mod.rs @@ -0,0 +1,131 @@ +use std::{ + collections::HashMap, + error::Error, + fmt::{self, Display, Formatter}, + path::PathBuf, +}; + +use actix_web::{middleware::Logger, web::Data, App, HttpServer}; +use clap::{command, Args, ValueEnum}; +use console::style; +use env_logger::Env; +use odict::{config::AliasManager, DictionaryFile, DictionaryReader}; + +use crate::CLIContext; + +mod lookup; +mod search; + +#[derive(Debug, Clone, ValueEnum)] +enum LogLevel { + Trace, + Debug, + Info, + Warn, + Error, +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + LogLevel::Trace => write!(f, "trace"), + LogLevel::Debug => write!(f, "debug"), + LogLevel::Info => write!(f, "info"), + LogLevel::Warn => write!(f, "warn"), + LogLevel::Error => write!(f, "error"), + } + } +} + +#[derive(Debug, Args)] +#[command(args_conflicts_with_subcommands = true)] +#[command(flatten_help = true)] +pub struct ServeArgs { + #[arg(short, default_value_t = 5005, help = "Port to listen on")] + port: u16, + + // Sets the default log level + #[arg(short, long)] + level: Option, + + // List of dictionary paths or aliases to serve + #[arg()] + dictionaries: Vec, +} + +pub(self) fn get_dictionary_map( + reader: &DictionaryReader, + alias_manager: &AliasManager, + dictionaries: &Vec, +) -> Result, Box> { + let mut dictionary_map = HashMap::::new(); + + for dictionary in dictionaries { + let dict = reader.read_from_path_or_alias_with_manager(&dictionary, &alias_manager)?; + + dictionary_map.insert( + PathBuf::from(dictionary) + .file_stem() + .unwrap() + .to_string_lossy() + .to_string(), + dict, + ); + } + + Ok(dictionary_map) +} + +#[actix_web::main] +pub async fn serve(ctx: &mut CLIContext, args: &ServeArgs) -> Result<(), Box> { + let ServeArgs { + port, + dictionaries, + level, + } = args; + + let CLIContext { + alias_manager, + reader, + .. + } = ctx; + + let dictionary_map = get_dictionary_map(reader, alias_manager, &dictionaries)?; + let log_level = format!("{}", level.as_ref().unwrap_or(&LogLevel::Info)); + + ctx.println(format!( + "\n🟢 Serving the following dictionaries on port {} with log level \"{}\":\n", + port, log_level + )); + + for (name, dict) in &dictionary_map { + ctx.println(format!( + " • {} {}", + style(name).bold(), + style(format!( + "({})", + dict.path.as_ref().unwrap().to_string_lossy() + )) + .dim() + )); + } + + ctx.println(""); + + env_logger::init_from_env(Env::new().default_filter_or(log_level)); + + let data = Data::new(dictionary_map); + + HttpServer::new(move || { + App::new() + .wrap(Logger::default()) + .app_data(Data::clone(&data)) + .service(lookup::handle_lookup) + .service(search::handle_search) + }) + .bind(("127.0.0.1", *port))? + .run() + .await?; + + Ok(()) +} diff --git a/cli/src/serve/search.rs b/cli/src/serve/search.rs new file mode 100644 index 00000000..37fcfa11 --- /dev/null +++ b/cli/src/serve/search.rs @@ -0,0 +1,106 @@ +use std::collections::HashMap; + +use actix_web::{ + get, + http::{header::ContentType, StatusCode}, + web::{Data, Path, Query}, + HttpResponse, Responder, ResponseError, +}; +use derive_more::{Display, Error}; +use odict::{search::SearchOptions, DictionaryFile, ToJSON}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct SearchRequest { + query: String, + limit: Option, +} + +#[derive(Debug, Display, Error)] +enum SearchError { + #[display(fmt = "Dictionary not found: {}", name)] + DictionaryNotFound { name: String }, + + #[display(fmt = "Failed to read dictionary: {}", name)] + DictionaryReadError { name: String }, + + #[display(fmt = "Search error: {}", message)] + SearchError { message: String }, + + #[display(fmt = "Failed to serialize response")] + SerializeError, +} + +impl ResponseError for SearchError { + fn error_response(&self) -> HttpResponse { + HttpResponse::build(self.status_code()) + .insert_header(ContentType::html()) + .body(self.to_string()) + } + + fn status_code(&self) -> StatusCode { + match *self { + SearchError::DictionaryNotFound { .. } => StatusCode::NOT_FOUND, + SearchError::DictionaryReadError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + SearchError::SearchError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + SearchError::SerializeError => StatusCode::INTERNAL_SERVER_ERROR, + } + } +} + +#[get("/{name}/search")] +async fn handle_search( + params: Query, + dict: Path, + dictionary_map: Data>, +) -> Result { + let SearchRequest { query, limit } = params.0; + + let dictionary_name = dict.into_inner(); + + let file = dictionary_map + .get(&dictionary_name) + .ok_or(SearchError::DictionaryNotFound { + name: dictionary_name.to_string(), + })?; + + let dictionary = file + .to_archive() + .map_err(|_e| SearchError::DictionaryReadError { + name: dictionary_name.to_string(), + })?; + + let entries = dictionary + .search(&query, SearchOptions::default().limit(limit.unwrap_or(10))) + .map_err(|e| SearchError::SearchError { + message: e.to_string(), + })?; + + let json = entries + .to_json(true) + .map_err(|_e| SearchError::SerializeError)?; + + Ok(HttpResponse::Ok() + .content_type("application/json") + .body(json)) +} + +// #[cfg(test)] +// mod tests { +// use actix_web::{http::header::ContentType, test, App}; + +// use crate::serve::get_dictionary_map; + +// use super::handle_search; + +// #[actix_web::test] +// async fn test_index_get() { +// let app = test::init_service(App::new().service(handle_search)).await; +// let req = test::TestRequest::default() +// // get_dictionary_map(reader, alias_manager, dictionaries) +// .insert_header(ContentType::plaintext()) +// .to_request(); +// let resp = test::call_service(&app, req).await; +// assert!(resp.status().is_success()); +// } +// } diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 543ae91e..7a02809c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -9,7 +9,6 @@ sql = ["dep:sea-query"] config = ["dep:dirs"] charabia = ["dep:charabia"] search = ["dep:tantivy", "dep:tantivy-tokenizer-api"] -serve = ["dep:actix-web"] default = ["json", "sql", "config"] @@ -34,7 +33,6 @@ dirs = { version = "5.0.1", optional = true } once_cell = "1.19.0" serde_json = { version = "1.0.115", optional = true } validation = "0.0.1" -actix-web = { version = "4.5.1", optional = true } tantivy = { version = "0.21.1", optional = true } charabia = { version = "0.8.8", optional = true } tantivy-tokenizer-api = { version = "0.2.0", optional = true } diff --git a/lib/src/core/lookup.rs b/lib/src/core/lookup.rs index 23d13261..3689fddb 100644 --- a/lib/src/core/lookup.rs +++ b/lib/src/core/lookup.rs @@ -4,6 +4,7 @@ use crate::{ArchivedDictionary, ArchivedEntry, Dictionary, Entry, SplitOptions}; use once_cell::sync::Lazy; use rayon::prelude::*; +use std::marker::{Send, Sync}; use regex::Regex; @@ -18,6 +19,12 @@ pub struct LookupOptions { split: usize, } +impl AsRef for LookupOptions { + fn as_ref(&self) -> &Self { + self + } +} + impl LookupOptions { pub fn default() -> Self { Self { @@ -71,15 +78,15 @@ fn parse_query(query: &str) -> LookupQuery { macro_rules! lookup { ($tys:ident, $ret:ident) => { impl $tys { - fn lookup_( + fn lookup_ + Send + Sync>( &self, query: &LookupQuery, - options: &LookupOptions, + options: Options, ) -> Result, Box> { let mut entries: Vec<&$ret> = Vec::new(); let LookupQuery { term, fallback } = query; - let LookupOptions { follow, split } = options; + let LookupOptions { follow, split } = options.as_ref(); let mut found = self.entries.get(term.as_str()); @@ -99,6 +106,8 @@ macro_rules! lookup { }, options, ); + } else { + entries.push(entry); } } else if *split > 0 { let split = self.split(term, &SplitOptions::default().threshold(*split))?; @@ -111,14 +120,14 @@ macro_rules! lookup { Ok(entries) } - pub fn lookup( + pub fn lookup + Send + Sync>( &self, queries: &Vec, - options: &LookupOptions, + options: Options, ) -> Result>, Box> { queries .par_iter() - .map(|query| self.lookup_(&parse_query(query), options)) + .map(|query| self.lookup_(&parse_query(query), &options)) .collect() } } diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs index f2274faf..defc73a9 100644 --- a/lib/src/core/read.rs +++ b/lib/src/core/read.rs @@ -1,12 +1,12 @@ use std::{ error::Error, - fs::File, + fs::{canonicalize, File}, io::{Cursor, Read, Seek}, path::PathBuf, }; use byteorder::{LittleEndian, ReadBytesExt}; -use rkyv::{archived_root, Deserialize, Infallible}; +use rkyv::archived_root; use super::constants::FILE_VERSION; use crate::{lz4::decompress, ArchivedDictionary, Dictionary}; @@ -29,7 +29,7 @@ impl DictionaryFile { } pub fn to_dictionary(&self) -> Result> { - let dict: Dictionary = self.to_archive()?.deserialize(&mut Infallible)?; + let dict: Dictionary = self.to_archive()?.to_dictionary()?; Ok(dict) } } @@ -89,14 +89,15 @@ impl DictionaryReader { } pub fn read_from_path(&self, path: &str) -> Result> { - let mut file = File::open(path)?; + let pb = canonicalize(PathBuf::from(path))?; + let mut file = File::open(&pb)?; let mut buffer = Vec::new(); file.read_to_end(&mut buffer)?; let mut result = self.read_from_bytes(&buffer)?; - result.path = Some(PathBuf::from(path)); + result.path = Some(pb); Ok(result) } diff --git a/lib/src/dump/json/entry.rs b/lib/src/dump/json/entry.rs index 19396a6d..72579e60 100644 --- a/lib/src/dump/json/entry.rs +++ b/lib/src/dump/json/entry.rs @@ -1,4 +1,4 @@ -use crate::Entry; +use crate::{ArchivedEntry, Entry}; use serde::Serialize; @@ -33,3 +33,9 @@ impl From for EntryJSON { } } } + +impl From<&ArchivedEntry> for EntryJSON { + fn from(entry: &ArchivedEntry) -> Self { + EntryJSON::from(entry.to_entry().unwrap()) + } +} diff --git a/lib/src/dump/json/json.rs b/lib/src/dump/json/json.rs index dae5215e..21af1311 100644 --- a/lib/src/dump/json/json.rs +++ b/lib/src/dump/json/json.rs @@ -6,7 +6,7 @@ use serde_json::{to_string, to_string_pretty}; use super::dictionary::DictionaryJSON; use super::entry::EntryJSON; -use crate::{Dictionary, Entry}; +use crate::{ArchivedEntry, Dictionary, Entry}; pub struct JSONSerializer {} @@ -59,3 +59,32 @@ impl ToJSON for Vec> { stringify(&json, pretty) } } + +impl ToJSON for &ArchivedEntry { + fn to_json(self, pretty: bool) -> Result> { + let json = EntryJSON::from(self.to_entry().unwrap()); + stringify(&json, pretty) + } +} + +impl ToJSON for Vec<&ArchivedEntry> { + fn to_json(self, pretty: bool) -> Result> { + let json = self + .into_iter() + .map(|v| EntryJSON::from(v)) + .collect::>(); + + stringify(&json, pretty) + } +} + +impl ToJSON for Vec> { + fn to_json(self, pretty: bool) -> Result> { + let json = self + .into_iter() + .map(|v| v.into_iter().map(|v| EntryJSON::from(v)).collect()) + .collect::>>(); + + stringify(&json, pretty) + } +} diff --git a/lib/src/models/dictionary.rs b/lib/src/models/dictionary.rs index dd756308..0b8101cb 100644 --- a/lib/src/models/dictionary.rs +++ b/lib/src/models/dictionary.rs @@ -1,5 +1,5 @@ use quick_xml::de::from_str; -use rkyv::to_bytes; +use rkyv::{to_bytes, Deserialize, Infallible}; use std::{collections::HashMap, error::Error}; use crate::serializable; @@ -69,3 +69,10 @@ impl From<&str> for Dictionary { from_str(xml).unwrap() } } + +impl ArchivedDictionary { + pub fn to_dictionary(&self) -> Result> { + let dict: Dictionary = self.deserialize(&mut Infallible)?; + Ok(dict) + } +} diff --git a/lib/src/search/index.rs b/lib/src/search/index.rs index e0cb7e3b..0b10ebe1 100644 --- a/lib/src/search/index.rs +++ b/lib/src/search/index.rs @@ -1,10 +1,9 @@ use std::{error::Error, ffi::OsStr, fs::create_dir_all, fs::remove_dir_all, path::PathBuf}; +use tantivy::{doc, tokenizer::TextAnalyzer, Index}; -use tantivy::tokenizer::TextAnalyzer; -use tantivy::{doc, Index}; - -use crate::config::get_config_dir; -use crate::{Dictionary, PreviewOptions}; +use crate::{ + config::get_config_dir, ArchivedDictionary, ArchivedEntry, Dictionary, PreviewOptions, +}; use super::constants::{CUSTOM_TOKENIZER, DEFAULT_TOKENIZER}; use super::schema::{FIELD_BUFFER, FIELD_DEFINITIONS, FIELD_TERM, SCHEMA}; @@ -70,46 +69,65 @@ impl AsRef for IndexOptions { } } -impl Dictionary { - pub fn index>( - &self, - options: Options, - ) -> Result<(), Box> { - let opts = options.as_ref(); - let index_path = opts.dir.join(self.id.as_str()); +macro_rules! index { + ($name:ident) => { + impl $name { + pub fn index>( + &self, + options: Options, + ) -> Result<(), Box> { + let opts = options.as_ref(); + let index_path = opts.dir.join(self.id.as_str()); + + if opts.overwrite && index_path.exists() { + remove_dir_all(&index_path)?; + } - if opts.overwrite && index_path.exists() { - remove_dir_all(&index_path)?; - } + if !index_path.exists() { + create_dir_all(&index_path)?; + } - if !index_path.exists() { - create_dir_all(&index_path)?; - } + let index = Index::create_in_dir(&index_path, SCHEMA.to_owned())?; - let index = Index::create_in_dir(&index_path, SCHEMA.to_owned())?; + index + .tokenizers() + .register(CUSTOM_TOKENIZER, opts.tokenizer.clone()); - index - .tokenizers() - .register(CUSTOM_TOKENIZER, opts.tokenizer.clone()); + let mut index_writer = index.writer(opts.memory)?; - let mut index_writer = index.writer(opts.memory)?; + self.entries.values().enumerate().for_each(|(i, entry)| { + let document = doc!( + *FIELD_TERM => entry.term.as_str(), + *FIELD_DEFINITIONS => entry.preview(PreviewOptions::default()), + *FIELD_BUFFER => entry.serialize().unwrap() + ); - self.entries.values().enumerate().for_each(|(i, entry)| { - let document = doc!( - *FIELD_TERM => entry.term.as_str(), - *FIELD_DEFINITIONS => entry.preview(PreviewOptions::default()), - *FIELD_BUFFER => entry.serialize().unwrap() - ); + if index_writer.add_document(document).is_ok() { + let cb = options.as_ref().cb_on_item.as_ref(); + cb(i, entry.term.as_str()); + } + }); - if index_writer.add_document(document).is_ok() { - let cb = opts.cb_on_item.as_ref(); - cb(i, entry.term.as_str()); - } - }); + index_writer.commit()?; + index_writer.wait_merging_threads()?; - index_writer.commit()?; - index_writer.wait_merging_threads()?; + Ok(()) + } + } + }; +} + +// Workaround - Ok(()) +trait SerializeEntry { + fn serialize(&self) -> Result, Box>; +} + +impl SerializeEntry for &ArchivedEntry { + fn serialize(&self) -> Result, Box> { + self.to_entry().unwrap().serialize() } } + +index!(Dictionary); +index!(ArchivedDictionary); diff --git a/lib/src/search/search.rs b/lib/src/search/search.rs index 2d4eb250..f16ef77e 100644 --- a/lib/src/search/search.rs +++ b/lib/src/search/search.rs @@ -1,14 +1,15 @@ use std::{error::Error, ffi::OsStr, path::PathBuf}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; -use rkyv::{archived_root, Deserialize, Infallible}; +use rkyv::archived_root; use tantivy::{ collector::TopDocs, query::QueryParser, tokenizer::TextAnalyzer, Index, ReloadPolicy, }; -use crate::{Dictionary, Entry}; +use crate::{ArchivedDictionary, Dictionary, Entry}; use super::constants::{CUSTOM_TOKENIZER, DEFAULT_TOKENIZER}; +use super::index::IndexOptions; use super::{ get_default_index_dir, @@ -18,6 +19,7 @@ use super::{ pub struct SearchOptions { pub dir: PathBuf, pub threshold: u32, + pub autoindex: bool, pub limit: usize, pub tokenizer: TextAnalyzer, } @@ -27,6 +29,7 @@ impl SearchOptions { Self { dir: get_default_index_dir(), threshold: 1, + autoindex: false, limit: 10, tokenizer: DEFAULT_TOKENIZER.to_owned(), } @@ -37,6 +40,11 @@ impl SearchOptions { self } + pub fn autoindex(mut self, autoindex: bool) -> Self { + self.autoindex = autoindex; + self + } + pub fn tokenizer(mut self, tokenizer: T) -> Self where TextAnalyzer: From, @@ -62,48 +70,62 @@ impl AsRef for SearchOptions { } } -impl Dictionary { - pub fn search>( - &self, - query: &str, - options: Options, - ) -> Result, Box> { - let opts = options.as_ref(); - let index_path = opts.dir.join(self.id.as_str()); - let index = Index::open_in_dir(&index_path)?; - - index - .tokenizers() - .register(CUSTOM_TOKENIZER, opts.tokenizer.to_owned()); - - let reader = index - .reader_builder() - .reload_policy(ReloadPolicy::OnCommit) - .try_into()?; - - let searcher = reader.searcher(); - let query_parser = QueryParser::for_index(&index, vec![*FIELD_TERM, *FIELD_DEFINITIONS]); - let query_obj = query_parser.parse_query(query)?; - let top_docs = searcher.search(&query_obj, &TopDocs::with_limit(opts.limit))?; - let entries = top_docs - .par_iter() - .filter(|(score, _)| score >= &(opts.threshold as f32)) - .map(|(_, doc_address)| -> Entry { - let retrieved_doc = searcher.doc(*doc_address).unwrap(); - - let bytes = retrieved_doc - .get_first(*FIELD_BUFFER) - .unwrap() - .as_bytes() - .unwrap(); - - let archive = unsafe { archived_root::(&bytes[..]) }; - let entry: Entry = archive.deserialize(&mut Infallible).unwrap(); - - entry - }) - .collect(); - - Ok(entries) - } +macro_rules! search { + ($name:ident) => { + impl $name { + pub fn search>( + &self, + query: &str, + options: Options, + ) -> Result, Box> { + let opts = options.as_ref(); + let index_path = opts.dir.join(self.id.as_str()); + + if opts.autoindex { + if !index_path.exists() { + self.index(IndexOptions::default().tokenizer(opts.tokenizer.clone()))?; + } + } + + let index = Index::open_in_dir(&index_path)?; + + index + .tokenizers() + .register(CUSTOM_TOKENIZER, opts.tokenizer.to_owned()); + + let reader = index + .reader_builder() + .reload_policy(ReloadPolicy::OnCommit) + .try_into()?; + + let searcher = reader.searcher(); + let query_parser = + QueryParser::for_index(&index, vec![*FIELD_TERM, *FIELD_DEFINITIONS]); + let query_obj = query_parser.parse_query(query)?; + let top_docs = searcher.search(&query_obj, &TopDocs::with_limit(opts.limit))?; + let entries = top_docs + .par_iter() + .filter(|(score, _)| score >= &(opts.threshold as f32)) + .map(|(_, doc_address)| -> Entry { + let retrieved_doc = searcher.doc(*doc_address).unwrap(); + + let bytes = retrieved_doc + .get_first(*FIELD_BUFFER) + .unwrap() + .as_bytes() + .unwrap(); + + let archive = unsafe { archived_root::(&bytes[..]) }; + + archive.to_entry().unwrap() + }) + .collect(); + + Ok(entries) + } + } + }; } + +search!(Dictionary); +search!(ArchivedDictionary);