diff --git a/server/Cargo.lock b/server/Cargo.lock index 9e60b5022..4ec1974e0 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -106,7 +106,7 @@ dependencies = [ "http 0.2.12", "httparse", "httpdate", - "itoa", + "itoa 1.0.11", "language-tags", "local-channel", "mime", @@ -253,7 +253,7 @@ dependencies = [ "futures-core", "futures-util", "impl-more", - "itoa", + "itoa 1.0.11", "language-tags", "log", "mime", @@ -494,6 +494,19 @@ dependencies = [ "futures-core", ] +[[package]] +name = "async-compression" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" +dependencies = [ + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -617,7 +630,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "itoa", + "itoa 1.0.11", "matchit", "memchr", "mime", @@ -734,6 +747,21 @@ dependencies = [ "serde", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -809,6 +837,26 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "brownstone" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5839ee4f953e811bfdcf223f509cb2c6a3e1447959b0bff459405575bc17f22" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata 0.1.10", +] + [[package]] name = "bstr" version = "1.9.1" @@ -969,7 +1017,7 @@ version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0875e527e299fc5f4faba42870bf199a39ab0bb2dbba1b8aef0a2151451130f" dependencies = [ - "bstr", + "bstr 1.9.1", "bytes", "clickhouse-derive 0.1.1", "clickhouse-rs-cityhash-sys", @@ -992,7 +1040,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eab6d70c534d5e54680aae99712800fca8d048cdfad3fbd154daf823d444f08b" dependencies = [ - "bstr", + "bstr 1.9.1", "bytes", "clickhouse-derive 0.2.0", "clickhouse-rs-cityhash-sys", @@ -1074,6 +1122,31 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "config-derive" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7329955b015b82dbcf7bf217f85cbcc016a1a825bf3b074093cd39a5c071a60c" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.52.0", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -1100,6 +1173,26 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_format" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "126f97965c8ad46d6d9163268ff28432e8f6a1196a55578867832e3049df63dd" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.3.0" @@ -1112,6 +1205,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.16.2" @@ -1249,6 +1351,23 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa 0.4.8", + "matches", + "phf 0.8.0", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + [[package]] name = "cssparser" version = "0.31.2" @@ -1257,7 +1376,7 @@ checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" dependencies = [ "cssparser-macros", "dtoa-short", - "itoa", + "itoa 1.0.11", "phf 0.11.2", "smallvec", ] @@ -1457,7 +1576,7 @@ version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", @@ -1495,7 +1614,7 @@ dependencies = [ "byteorder", "chrono", "diesel_derives", - "itoa", + "itoa 1.0.11", "pq-sys", "serde_json", "uuid 1.10.0", @@ -1700,6 +1819,21 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1019fa28f600f5b581b7a603d515c3f1635da041ca211b5055804788673abfe" +[[package]] +name = "emojis" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99e1f1df1f181f2539bac8bf027d31ca5ffbf9e559e3f2d09413b9107b5c02f4" +dependencies = [ + "phf 0.11.2", +] + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -1732,6 +1866,15 @@ dependencies = [ "log", ] +[[package]] +name = "envy" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f47e0157f2cb54f5ae1bd371b30a2ae4311e1c028f575cd4e81de7353215965" +dependencies = [ + "serde", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -2108,6 +2251,16 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", + "serde", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -2189,6 +2342,15 @@ dependencies = [ "windows", ] +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "html5ever" version = "0.26.0" @@ -2231,7 +2393,7 @@ checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.11", ] [[package]] @@ -2242,7 +2404,7 @@ checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", - "itoa", + "itoa 1.0.11", ] [[package]] @@ -2339,7 +2501,7 @@ dependencies = [ "http-body 0.4.6", "httparse", "httpdate", - "itoa", + "itoa 1.0.11", "pin-project-lite", "socket2", "tokio", @@ -2362,7 +2524,7 @@ dependencies = [ "http-body 1.0.0", "httparse", "httpdate", - "itoa", + "itoa 1.0.11", "pin-project-lite", "smallvec", "tokio", @@ -2508,6 +2670,31 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae21c3177a27788957044151cc2800043d127acaa460a47ebb9b84dfa2c6aa0" +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "indent_write" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" + [[package]] name = "indexmap" version = "1.9.3" @@ -2596,6 +2783,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + [[package]] name = "itoa" version = "1.0.11" @@ -2611,6 +2804,12 @@ dependencies = [ "libc", ] +[[package]] +name = "joinery" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72167d68f5fce3b8655487b8038691a3c9984ee769590f93f2a631f4ad64e4f5" + [[package]] name = "js-sys" version = "0.3.69" @@ -2635,6 +2834,12 @@ dependencies = [ "spin", ] +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "lettre" version = "0.11.7" @@ -2666,6 +2871,70 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.155" @@ -2688,6 +2957,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -2727,6 +3002,26 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lol_html" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1610d7994d67a05bb35861cd733b069b1171de8693bc8452849c59361a1bb87b" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cssparser 0.27.2", + "encoding_rs", + "hashbrown 0.13.2", + "lazy_static", + "lazycell", + "memchr", + "mime", + "safemem", + "selectors 0.22.0", + "thiserror", +] + [[package]] name = "lru" version = "0.12.3" @@ -2808,6 +3103,12 @@ dependencies = [ "xml5ever", ] +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "matchit" version = "0.7.3" @@ -2919,6 +3220,26 @@ dependencies = [ "unicase", ] +[[package]] +name = "minicbor" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7005aaf257a59ff4de471a9d5538ec868a21586534fff7f85dd97d4043a6139" +dependencies = [ + "minicbor-derive", +] + +[[package]] +name = "minicbor-derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1154809406efdb7982841adb6311b3d095b46f78342dd646736122fe6b19e267" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "minidom" version = "0.15.2" @@ -2928,6 +3249,12 @@ dependencies = [ "rxml", ] +[[package]] +name = "minifier" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5394aa376422b4b2b6c02fd9cfcb657e4ec544ae98e43d7d5d785fd0d042fd6d" + [[package]] name = "minijinja" version = "2.3.1" @@ -3021,6 +3348,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "7.1.3" @@ -3031,6 +3364,19 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom-supreme" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd3ae6c901f1959588759ff51c95d24b491ecb9ff91aa9c2ef4acc5b1dcab27" +dependencies = [ + "brownstone", + "indent_write", + "joinery", + "memchr", + "nom", +] + [[package]] name = "num-bigint-dig" version = "0.8.4" @@ -3336,6 +3682,51 @@ dependencies = [ "sha2", ] +[[package]] +name = "pagefind" +version = "0.0.0" +dependencies = [ + "actix-files", + "actix-web", + "anyhow", + "async-compression", + "base64 0.21.7", + "bit-set", + "clap", + "console", + "convert_case 0.6.0", + "either", + "emojis", + "flate2", + "futures", + "hashbrown 0.13.2", + "html-escape", + "include_dir", + "lazy_static", + "lexical-core", + "lol_html", + "minicbor", + "minifier", + "pagefind_stem", + "path-slash", + "portpicker", + "regex", + "rust-patch", + "serde", + "serde_json", + "sha-1", + "tokio", + "twelf", + "unicode-segmentation", + "wax", +] + +[[package]] +name = "pagefind_stem" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70b9cf5d3cd867dd32e54385d85ecfda45c6f2f896a9d464426ab564e7391467" + [[package]] name = "parking" version = "2.2.0" @@ -3371,6 +3762,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -3386,6 +3783,17 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", +] + [[package]] name = "phf" version = "0.10.1" @@ -3401,10 +3809,20 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ - "phf_macros", + "phf_macros 0.11.2", "phf_shared 0.11.2", ] +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", +] + [[package]] name = "phf_codegen" version = "0.10.0" @@ -3425,6 +3843,16 @@ dependencies = [ "phf_shared 0.11.2", ] +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared 0.8.0", + "rand 0.7.3", +] + [[package]] name = "phf_generator" version = "0.10.0" @@ -3445,6 +3873,20 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator 0.8.0", + "phf_shared 0.8.0", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "phf_macros" version = "0.11.2" @@ -3458,6 +3900,15 @@ dependencies = [ "syn 2.0.85", ] +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -3547,6 +3998,24 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "pori" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a63d338dec139f56dacc692ca63ad35a6be6a797442479b55acd611d79e906" +dependencies = [ + "nom", +] + +[[package]] +name = "portpicker" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be97d76faf1bfab666e1375477b23fde79eccf0276e9b63b92a39d676a889ba9" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "postgres-openssl" version = "0.5.0" @@ -3658,6 +4127,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -3830,6 +4305,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc", + "rand_pcg", ] [[package]] @@ -3900,6 +4376,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rawpointer" version = "0.2.1" @@ -3938,7 +4423,7 @@ dependencies = [ "combine", "futures", "futures-util", - "itoa", + "itoa 1.0.11", "native-tls", "percent-encoding", "pin-project-lite", @@ -3960,7 +4445,7 @@ dependencies = [ "bytes", "combine", "futures-util", - "itoa", + "itoa 1.0.11", "percent-encoding", "pin-project-lite", "rustls 0.22.4", @@ -4002,10 +4487,16 @@ checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata", + "regex-automata 0.4.7", "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-automata" version = "0.4.7" @@ -4254,6 +4745,27 @@ dependencies = [ "trim-in-place", ] +[[package]] +name = "rust-patch" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4076837f5df7460d37d1e245c966e64f6aaeeb59a76f186f352ca91d6087fb43" +dependencies = [ + "rust-patch-derive", +] + +[[package]] +name = "rust-patch-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9927610a0a7c3e3dece1e89a114c31e435f27db01b1d630e81eb02ecd820f0b" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rust-s3" version = "0.35.1" @@ -4477,6 +4989,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + [[package]] name = "same-file" version = "1.0.6" @@ -4530,12 +5048,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b80b33679ff7a0ea53d37f3b39de77ea0c75b12c5805ac43ec0c33b3051af1b" dependencies = [ "ahash", - "cssparser", + "cssparser 0.31.2", "ego-tree", "getopts", "html5ever 0.26.0", "once_cell", - "selectors", + "selectors 0.25.0", "tendril", ] @@ -4610,6 +5128,26 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser 0.27.2", + "derive_more 0.99.18", + "fxhash", + "log", + "matches", + "phf 0.8.0", + "phf_codegen 0.8.0", + "precomputed-hash", + "servo_arc 0.1.1", + "smallvec", + "thin-slice", +] + [[package]] name = "selectors" version = "0.25.0" @@ -4617,7 +5155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" dependencies = [ "bitflags 2.6.0", - "cssparser", + "cssparser 0.31.2", "derive_more 0.99.18", "fxhash", "log", @@ -4625,7 +5163,7 @@ dependencies = [ "phf 0.10.1", "phf_codegen 0.10.0", "precomputed-hash", - "servo_arc", + "servo_arc 0.3.0", "smallvec", ] @@ -4699,7 +5237,7 @@ version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ - "itoa", + "itoa 1.0.11", "memchr", "ryu", "serde", @@ -4711,7 +5249,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" dependencies = [ - "itoa", + "itoa 1.0.11", "serde", ] @@ -4762,7 +5300,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa", + "itoa 1.0.11", "ryu", "serde", ] @@ -4797,6 +5335,18 @@ dependencies = [ "syn 2.0.85", ] +[[package]] +name = "serde_yaml" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" +dependencies = [ + "indexmap 1.9.3", + "ryu", + "serde", + "yaml-rust", +] + [[package]] name = "serde_yml" version = "0.0.12" @@ -4804,7 +5354,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" dependencies = [ "indexmap 2.2.6", - "itoa", + "itoa 1.0.11", "libyml", "memchr", "ryu", @@ -4812,6 +5362,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "servo_arc" version = "0.3.0" @@ -4821,6 +5381,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "sha-1" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha1" version = "0.10.6" @@ -5304,6 +5875,12 @@ dependencies = [ "utf-8", ] +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.65" @@ -5331,7 +5908,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", - "itoa", + "itoa 1.0.11", "num-conv", "powerfmt", "serde", @@ -5524,6 +6101,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + [[package]] name = "toml" version = "0.7.8" @@ -5727,6 +6313,7 @@ dependencies = [ "futures", "futures-util", "glob", + "hashbrown 0.13.2", "itertools 0.13.0", "lazy_static", "lettre", @@ -5742,6 +6329,7 @@ dependencies = [ "openai_dive", "openidconnect", "openssl", + "pagefind", "postgres-openssl", "prometheus", "qdrant-client", @@ -5791,6 +6379,23 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twelf" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f6b76f0d5feab6eeb6a36900c5e1f6867f5061ce87917acc3d1c2d985db5212" +dependencies = [ + "clap", + "config-derive", + "envy", + "log", + "serde", + "serde_json", + "serde_yaml", + "thiserror", + "toml 0.5.11", +] + [[package]] name = "typenum" version = "1.17.0" @@ -5909,6 +6514,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + [[package]] name = "utf8parse" version = "0.2.2" @@ -6131,6 +6742,24 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wax" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06c7a3bac6110ac062b7b422a442b7ee23e07209e2784a036654cab1e71bbafc" +dependencies = [ + "bstr 0.2.17", + "const_format", + "itertools 0.10.5", + "nom", + "nom-supreme", + "pori", + "regex", + "smallvec", + "thiserror", + "walkdir", +] + [[package]] name = "web-sys" version = "0.3.69" @@ -6425,6 +7054,15 @@ dependencies = [ "markup5ever 0.12.1", ] +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "zerocopy" version = "0.7.35" diff --git a/server/Cargo.toml b/server/Cargo.toml index de36d7f8c..fb32b68b7 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -177,6 +177,8 @@ oas3 = "0.10.0" sanitize_html = "0.8.1" minijinja-embed = "2.2.0" minijinja = { version = "2.2.0", features = ["loader", "json"] } +hashbrown = { version = "0.13.2", features = ["serde"] } +pagefind = { path = "../../pagefind/pagefind/" } [build-dependencies] diff --git a/server/src/handlers/dataset_handler.rs b/server/src/handlers/dataset_handler.rs index 653c29fcc..8bfec8c93 100644 --- a/server/src/handlers/dataset_handler.rs +++ b/server/src/handlers/dataset_handler.rs @@ -22,6 +22,7 @@ use crate::{ send_ditto_event, DittoDatasetCreated, DittoTrackProperties, DittoTrackRequest, }, organization_operator::{get_org_dataset_count, get_org_from_id_query}, + pagefind_operator::build_index_for_dataset_id, }, }; use actix_web::{web, FromRequest, HttpMessage, HttpResponse}; @@ -471,6 +472,36 @@ pub async fn clear_dataset( Ok(HttpResponse::NoContent().finish()) } +/// Create Pagefind Index for Dataset +/// +/// Removes all chunks, files, and groups from the dataset while retaining the analytics and dataset itself. The auth'ed user must be an owner of the organization to clear a dataset. +#[utoipa::path( + put, + path = "/dataset/pagefind", + context_path = "/api", + tag = "Dataset", + responses( + (status = 204, description = "Dataset indexed successfully"), + (status = 400, description = "Service error relating to creating the index", body = ErrorResponseBody), + ), + params( + ("TR-Dataset" = uuid::Uuid, Header, description = "The dataset id or tracking_id to use for the request. We assume you intend to use an id if the value is a valid uuid."), + + ), + security( + ("ApiKey" = ["admin"]), + ) +)] +pub async fn create_pagefind_index_for_dataset( + dataset_org_plan_sub: DatasetAndOrgWithSubAndPlan, + _user: AdminOnly, + pool: web::Data, +) -> Result { + build_index_for_dataset_id(dataset_org_plan_sub.dataset, pool.clone()).await?; + + Ok(HttpResponse::NoContent().finish()) +} + /// Delete Dataset by Tracking ID /// /// Auth'ed user must be an owner of the organization to delete a dataset. diff --git a/server/src/lib.rs b/server/src/lib.rs index c0c3e199c..f489cf04e 100644 --- a/server/src/lib.rs +++ b/server/src/lib.rs @@ -243,6 +243,7 @@ impl Modify for SecurityAddon { handlers::dataset_handler::get_dataset_crawl_options, handlers::dataset_handler::get_usage_by_dataset_id, handlers::dataset_handler::get_datasets_from_organization, + handlers::dataset_handler::create_pagefind_index_for_dataset, handlers::dataset_handler::clear_dataset, handlers::stripe_handler::direct_to_payment_link, handlers::stripe_handler::cancel_subscription, @@ -807,6 +808,10 @@ pub fn main() -> std::io::Result<()> { ) .route(web::put().to(handlers::dataset_handler::update_dataset)) ) + .service( + web::resource("/pagefind") + .route(web::post().to(handlers::dataset_handler::create_pagefind_index_for_dataset)) + ) .service( web::resource("/batch_create_datasets").route( web::post().to(handlers::dataset_handler::batch_create_datasets), diff --git a/server/src/operators/file_operator.rs b/server/src/operators/file_operator.rs index 601448f3c..74dee26a4 100644 --- a/server/src/operators/file_operator.rs +++ b/server/src/operators/file_operator.rs @@ -57,6 +57,45 @@ pub fn get_aws_bucket() -> Result { Ok(*aws_bucket) } +pub fn get_pagefind_aws_bucket() -> Result { + let aws_region_name = std::env::var("AWS_REGION_PAGEFIND").unwrap_or("".to_string()); + let s3_endpoint = std::env::var("S3_ENDPOINT_PAGEFIND") + .unwrap_or(get_env!("S3_ENDPOINT", "S3_ENDPOINT should be set").to_string()); + let s3_bucket_name = std::env::var("S3_BUCKET_PAGEFIND") + .unwrap_or(get_env!("S3_BUCKET", "S3_BUCKET should be set").to_string()); + + let aws_region = Region::Custom { + region: aws_region_name, + endpoint: s3_endpoint, + }; + + let aws_credentials = if let Ok(creds) = Credentials::from_instance_metadata() { + creds + } else { + let s3_access_key = std::env::var("S3_ACCESS_KEY_PAGEFIND") + .unwrap_or(get_env!("S3_ACCESS_KEY", "S3_ACCESS_KEY should be set").to_string()); + let s3_secret_key = std::env::var("S3_SECRET_KEY_PAGEFIND") + .unwrap_or(get_env!("S3_SECRET_KEY", "S3_SECRET_KEY should be set").to_string()); + + Credentials { + access_key: Some(s3_access_key), + secret_key: Some(s3_secret_key), + security_token: None, + session_token: None, + expiration: None, + } + }; + + let aws_bucket = Bucket::new(&s3_bucket_name, aws_region, aws_credentials) + .map_err(|e| { + log::error!("Could not create or get bucket {:?}", e); + ServiceError::BadRequest("Could not create or get bucket".to_string()) + })? + .with_path_style(); + + Ok(*aws_bucket) +} + pub async fn create_file_query( file_id: uuid::Uuid, file_size: i64, diff --git a/server/src/operators/mod.rs b/server/src/operators/mod.rs index 0dde38f20..d87f18163 100644 --- a/server/src/operators/mod.rs +++ b/server/src/operators/mod.rs @@ -12,6 +12,7 @@ pub mod invitation_operator; pub mod message_operator; pub mod model_operator; pub mod organization_operator; +pub mod pagefind_operator; pub mod parse_operator; pub mod qdrant_operator; pub mod search_operator; diff --git a/server/src/operators/pagefind_operator.rs b/server/src/operators/pagefind_operator.rs new file mode 100644 index 000000000..a7881668a --- /dev/null +++ b/server/src/operators/pagefind_operator.rs @@ -0,0 +1,135 @@ +use std::path::PathBuf; + +use actix_web::web; +use hashbrown::HashMap; +use pagefind::{Fossicker, SearchState}; + +use crate::{ + data::models::{Dataset, DatasetConfiguration, Pool, QdrantChunkMetadata}, + errors::ServiceError, + operators::file_operator::get_pagefind_aws_bucket, +}; + +use super::{qdrant_operator::scroll_dataset_points, search_operator::assemble_qdrant_filter}; + +pub fn create_pagefind_index() -> SearchState { + let config = pagefind::PagefindInboundConfig { + source: "source".into(), + site: "site".into(), + bundle_dir: None, + output_subdir: None, + output_path: None, + root_selector: "root_selector".into(), + exclude_selectors: vec![], + glob: "**/*.{html}".into(), + force_language: None, + serve: false, + verbose: false, + logfile: None, + keep_index_url: false, + service: false, + }; + let opts = pagefind::SearchOptions::load(config).expect("Config is always valid"); + + SearchState::new(opts) +} + +pub async fn add_record( + index: &mut SearchState, + url: String, + content: String, + language: String, + meta: Option>, + filters: Option>>, + sort: Option>, +) -> Result { + let data = pagefind::fossick::parser::DomParserResult { + digest: content, + filters: filters.unwrap_or_default(), + sort: sort.unwrap_or_default(), + meta: meta.unwrap_or_default(), + anchor_content: HashMap::new(), + has_custom_body: false, + force_inclusion: true, + has_html_element: true, + has_old_bundle_reference: false, + language: index.options.force_language.clone().unwrap_or(language), + }; + let file = Fossicker::new_with_data(url, data); + index.fossick_one(file).await +} + +pub async fn get_files(index: &mut SearchState) -> Vec { + index.build_indexes().await; + index.get_files().await +} + +pub async fn build_index_for_dataset_id( + dataset: Dataset, + pool: web::Data, +) -> Result<(), ServiceError> { + let mut search_index = create_pagefind_index(); + + let filter = assemble_qdrant_filter(None, None, None, dataset.id, pool.clone()).await?; + + let mut offset: Option = None; + let mut first_iteration = true; + + let mut dataset_config = DatasetConfiguration::from_json(dataset.server_configuration); + + // HACK set QDRANT_ONLY to true to get the payload from QDRANT + dataset_config.QDRANT_ONLY = true; + + while offset.is_some() || first_iteration { + let (search_results, offset_id) = + scroll_dataset_points(100, offset, None, dataset_config.clone(), filter.clone()) + .await?; + + for result in search_results.iter() { + let payload: QdrantChunkMetadata = result.clone().into(); + + let _ = add_record( + &mut search_index, + payload.link.unwrap_or_default().to_string(), + payload.chunk_html.unwrap_or_default().to_string(), + "en".to_string(), + payload.metadata.unwrap_or_default().as_object().map(|m| { + m.iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() + }), + None, + None, + ) + .await; + } + + offset = offset_id; + first_iteration = false; + } + + search_index.build_indexes().await; + + for file in search_index.get_files().await { + let bucket = get_pagefind_aws_bucket()?; + + // WARNING This s3 bucket cannot be default public. put ACL's on this somehow in case + // the user does not want their data to be public. + let mut filename = PathBuf::from("/pagefind"); + filename.push(dataset.id.to_string()); + filename.push(file.filename.clone()); + + bucket + .put_object( + filename.to_string_lossy().to_string(), + file.contents.as_ref(), + ) + .await + .map_err(|e| { + log::error!("Could not upload file to S3 {:?}", e); + ServiceError::BadRequest("Could not upload file to S3".to_string()) + })?; + } + + Ok(()) +}