diff --git a/Cargo.lock b/Cargo.lock index 1df3868003..8d6fb9ef64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,15 +47,6 @@ dependencies = [ "url", ] -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi 0.3.9", -] - [[package]] name = "any_ascii" version = "0.1.7" @@ -179,6 +170,12 @@ dependencies = [ "serde", ] +[[package]] +name = "build_const" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" + [[package]] name = "bumpalo" version = "3.9.1" @@ -296,19 +293,10 @@ dependencies = [ ] [[package]] -name = "clap" -version = "2.34.0" +name = "chunked_transfer" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags", - "strsim 0.8.0", - "textwrap 0.11.0", - "unicode-width", - "vec_map", -] +checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" [[package]] name = "clap" @@ -322,9 +310,9 @@ dependencies = [ "clap_lex", "indexmap", "lazy_static", - "strsim 0.10.0", + "strsim", "termcolor", - "textwrap 0.15.0", + "textwrap", ] [[package]] @@ -333,7 +321,7 @@ version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da92e6facd8d73c22745a5d3cbb59bdf8e46e3235c923e516527d8e81eec14a4" dependencies = [ - "clap 3.1.17", + "clap", ] [[package]] @@ -342,7 +330,7 @@ version = "3.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1" dependencies = [ - "heck 0.4.0", + "heck", "proc-macro-error", "proc-macro2", "quote", @@ -446,6 +434,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +dependencies = [ + "build_const", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -590,12 +587,11 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "elasticlunr-rs" -version = "2.3.14" +version = "3.0.0-beta.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60eee99ae400fb1c4521ea3bd678994cb66572754d240449368e8ecd40281569" +checksum = "58ac9fc52e5290f2c865293cc87fc6d2f2b744b2b93743dde54f499cf7e08a25" dependencies = [ "jieba-rs", - "lazy_static", "lindera", "lindera-core", "regex", @@ -603,8 +599,6 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "strum", - "strum_macros", ] [[package]] @@ -686,6 +680,19 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + [[package]] name = "errors" version = "0.1.0" @@ -1018,15 +1025,6 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.4.0" @@ -1096,6 +1094,12 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.18" @@ -1450,42 +1454,78 @@ dependencies = [ [[package]] name = "lindera" -version = "0.8.1" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e067b79992ab4ee575f5113ca7ccc1b011f67378f7627169e9bf95d48a8d481" +checksum = "7d1c5db4b1d12637aa316dc1adb215f78fe79025080af750942516c5ff17d1a0" dependencies = [ "anyhow", "bincode", "byteorder", "encoding", + "lindera-cc-cedict-builder", "lindera-core", "lindera-dictionary", "lindera-ipadic", "lindera-ipadic-builder", + "lindera-ko-dic-builder", + "lindera-unidic-builder", "serde", "serde_json", + "thiserror", +] + +[[package]] +name = "lindera-cc-cedict-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73a3509fb497340571d49feddb57e1db2ce5248c4d449f2548d0ee8cb745eb1e" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", ] [[package]] name = "lindera-core" -version = "0.8.1" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d34134111feb8c9424de5743a9ead4c22cb1c5a48cb90322ebbe21a2bc27c1" +checksum = "5d20d1b2c085393aed58625d741beca69410e1143fc35bc67ebc35c9885f9f74" dependencies = [ "anyhow", "bincode", "byteorder", "encoding", + "log", "serde", "thiserror", "yada", ] +[[package]] +name = "lindera-decompress" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b96b8050cded13927a99bcb8cbb0987f89fc8f35429fc153b4bc05ddc7a53a44" +dependencies = [ + "anyhow", + "lzma-rs", + "serde", +] + [[package]] name = "lindera-dictionary" -version = "0.8.1" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68ac4ac60f3ca650e4ab1280a5b6d57f73267902477ab9c9fd3b6609a7fb5888" +checksum = "5abe3dddc22303402957edb4472ab0c996e0d93b3b00643de3bee8b28c2f9297" dependencies = [ "anyhow", "bincode", @@ -1495,9 +1535,9 @@ dependencies = [ [[package]] name = "lindera-ipadic" -version = "0.8.1" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266fda136179e607d6ebcf2ef326fbdb2a133f9bdea9a68e6ac4fa8627e47ced" +checksum = "b8f4c111f6ad9eb9e015d02061af2ed36fc0255f29359294415c7c2f1ea5b5b6" dependencies = [ "bincode", "byteorder", @@ -1505,28 +1545,71 @@ dependencies = [ "flate2", "lindera-core", "lindera-ipadic-builder", - "reqwest", + "once_cell", "tar", - "tokio", + "ureq", ] [[package]] name = "lindera-ipadic-builder" -version = "0.8.1" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ede56e474b8fda9d4df2b9dc7683018111d3298260e1f594655e34287f26c64" +checksum = "a2b9893f22a4a7511ac70ff7d96cda9b8d7259b7d7121784183c73bc593ce6e7" dependencies = [ "anyhow", "bincode", "byteorder", - "clap 2.34.0", + "clap", "encoding", + "env_logger", "glob", "lindera-core", + "lindera-decompress", + "log", "serde", "yada", ] +[[package]] +name = "lindera-ko-dic-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14282600ebfe7ab6fd4f3042143024ff9d74c09d58fd983d0c587839cf940d4a" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "lindera-unidic-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b20825d46c95854e47c532c3e548dfec07c8f187c1ed89383cb6c35790338088" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + [[package]] name = "line-wrap" version = "0.1.1" @@ -1572,6 +1655,16 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "lzma-rs" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1" +dependencies = [ + "byteorder", + "crc", +] + [[package]] name = "mac" version = "0.1.1" @@ -2796,15 +2889,6 @@ dependencies = [ "digest 0.10.3", ] -[[package]] -name = "signal-hook-registry" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" -dependencies = [ - "libc", -] - [[package]] name = "similar" version = "2.1.0" @@ -2914,36 +2998,12 @@ dependencies = [ "quote", ] -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "strum" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2" - -[[package]] -name = "strum_macros" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "svg_metadata" version = "0.4.2" @@ -3096,15 +3156,6 @@ dependencies = [ "syn", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "textwrap" version = "0.15.0" @@ -3211,25 +3262,11 @@ dependencies = [ "mio 0.8.2", "num_cpus", "once_cell", - "parking_lot", "pin-project-lite", - "signal-hook-registry", "socket2", - "tokio-macros", "winapi 0.3.9", ] -[[package]] -name = "tokio-macros" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tokio-native-tls" version = "0.3.0" @@ -3437,12 +3474,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" -[[package]] -name = "unicode-width" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" - [[package]] name = "unicode-xid" version = "0.2.3" @@ -3455,6 +3486,22 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "ureq" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5" +dependencies = [ + "base64", + "chunked_transfer", + "log", + "once_cell", + "rustls", + "url", + "webpki", + "webpki-roots", +] + [[package]] name = "url" version = "2.2.2" @@ -3489,12 +3536,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.4" @@ -3823,7 +3864,7 @@ dependencies = [ name = "zola" version = "0.16.0" dependencies = [ - "clap 3.1.17", + "clap", "clap_complete", "console 0.1.0", "ctrlc", diff --git a/components/libs/Cargo.toml b/components/libs/Cargo.toml index 87d02a1c0e..b17547ca99 100644 --- a/components/libs/Cargo.toml +++ b/components/libs/Cargo.toml @@ -9,7 +9,7 @@ ammonia = "3" atty = "0.2.11" base64 = "0.13" csv = "1" -elasticlunr-rs = {version = "2", default-features = false, features = ["da", "no", "de", "du", "es", "fi", "fr", "it", "pt", "ro", "ru", "sv", "tr"] } +elasticlunr-rs = { version = "3.0.0-beta.2", features = ["da", "no", "de", "du", "es", "fi", "fr", "it", "pt", "ro", "ru", "sv", "tr"] } filetime = "0.2" gh-emoji = "1" glob = "0.3" diff --git a/components/search/src/lib.rs b/components/search/src/lib.rs index 02f0a177d7..d03b622a81 100644 --- a/components/search/src/lib.rs +++ b/components/search/src/lib.rs @@ -1,9 +1,7 @@ use std::collections::{HashMap, HashSet}; use libs::ammonia; -use libs::elasticlunr::pipeline; -use libs::elasticlunr::pipeline::TokenizerFn; -use libs::elasticlunr::{Index, Language}; +use libs::elasticlunr::{lang, Index, IndexBuilder}; use libs::once_cell::sync::Lazy; use config::{Config, Search}; @@ -27,25 +25,24 @@ static AMMONIA: Lazy> = Lazy::new(|| { builder }); -fn build_fields(search_config: &Search) -> Vec { - let mut fields = vec![]; +fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder { if search_config.include_title { - fields.push("title".to_owned()); + index = index.add_field("title"); } if search_config.include_description { - fields.push("description".to_owned()); + index = index.add_field("description"); } if search_config.include_path { - fields.push("path".to_owned()); + index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer)); } if search_config.include_content { - fields.push("body".to_owned()); + index = index.add_field("body") } - fields + index } fn path_tokenizer(text: &str) -> Vec { @@ -55,34 +52,6 @@ fn path_tokenizer(text: &str) -> Vec { .collect() } -fn build_tokenizers(search_config: &Search, language: Language) -> Vec { - let text_tokenizer = match language { - #[cfg(feature = "indexing-zh")] - Language::Chinese => pipeline::tokenize_chinese, - #[cfg(feature = "indexing-ja")] - Language::Japanese => pipeline::tokenize_japanese, - _ => pipeline::tokenize, - }; - let mut tokenizers: Vec = vec![]; - if search_config.include_title { - tokenizers.push(text_tokenizer); - } - - if search_config.include_description { - tokenizers.push(text_tokenizer); - } - - if search_config.include_path { - tokenizers.push(path_tokenizer); - } - - if search_config.include_content { - tokenizers.push(text_tokenizer); - } - - tokenizers -} - fn fill_index( search_config: &Search, title: &Option, @@ -126,26 +95,20 @@ fn fill_index( /// Errors if the language given is not available in Elasticlunr /// TODO: is making `in_search_index` apply to subsections of a `false` section useful? pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result { - let language = match Language::from_code(lang) { + let language = match lang::from_code(lang) { Some(l) => l, None => { bail!("Tried to build search index for language {} which is not supported", lang); } }; let language_options = &config.languages[lang]; - let mut index = Index::with_language(language, &build_fields(&language_options.search)); - - let tokenizers = build_tokenizers(&language_options.search, language); + let mut index = IndexBuilder::with_language(language); + index = build_fields(&language_options.search, index); + let mut index = index.build(); for (_, section) in &library.sections { if section.lang == lang { - add_section_to_index( - &mut index, - section, - library, - &language_options.search, - tokenizers.clone(), - ); + add_section_to_index(&mut index, section, library, &language_options.search); } } @@ -157,7 +120,6 @@ fn add_section_to_index( section: &Section, library: &Library, search_config: &Search, - tokenizers: Vec, ) { if !section.meta.in_search_index { return; @@ -165,7 +127,7 @@ fn add_section_to_index( // Don't index redirecting sections if section.meta.redirect_to.is_none() { - index.add_doc_with_tokenizers( + index.add_doc( §ion.permalink, &fill_index( search_config, @@ -174,7 +136,6 @@ fn add_section_to_index( §ion.path, §ion.content, ), - tokenizers.clone(), ); } @@ -184,7 +145,7 @@ fn add_section_to_index( continue; } - index.add_doc_with_tokenizers( + index.add_doc( &page.permalink, &fill_index( search_config, @@ -193,7 +154,6 @@ fn add_section_to_index( &page.path, &page.content, ), - tokenizers.clone(), ); } } @@ -207,21 +167,21 @@ mod tests { #[test] fn can_build_fields() { let mut config = Config::default(); - let fields = build_fields(&config.search); - assert_eq!(fields, vec!["title", "body"]); + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "body"]); config.search.include_content = false; config.search.include_description = true; - let fields = build_fields(&config.search); - assert_eq!(fields, vec!["title", "description"]); + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "description"]); config.search.include_content = true; - let fields = build_fields(&config.search); - assert_eq!(fields, vec!["title", "description", "body"]); + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "description", "body"]); config.search.include_title = false; - let fields = build_fields(&config.search); - assert_eq!(fields, vec!["description", "body"]); + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["description", "body"]); } #[test] diff --git a/themes/abridge b/themes/abridge new file mode 160000 index 0000000000..6bdc489863 --- /dev/null +++ b/themes/abridge @@ -0,0 +1 @@ +Subproject commit 6bdc489863018b2cb97541ac019a49aa1364537b