diff --git a/Cargo.lock b/Cargo.lock index fc1cfce7..44eb1db4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,9 +150,9 @@ checksum = "70033777eb8b5124a81a1889416543dddef2de240019b674c81285a2635a7e1e" [[package]] name = "anyhow" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "arbitrary" @@ -168,7 +168,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -402,9 +402,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" [[package]] name = "byteorder" @@ -436,9 +436,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "camino" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" dependencies = [ "serde", ] @@ -467,9 +467,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.97" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" dependencies = [ "jobserver", "libc", @@ -618,7 +618,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -765,9 +765,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if 1.0.0", ] @@ -802,9 +802,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.19" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" @@ -851,7 +851,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -924,9 +924,9 @@ dependencies = [ [[package]] name = "deunicode" -version = "1.4.4" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322ef0094744e63628e6f0eb2295517f79276a5b342a4c2ff3042566ca181d4e" +checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" [[package]] name = "digest" @@ -955,7 +955,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -981,9 +981,9 @@ dependencies = [ [[package]] name = "either" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "elasticlunr-rs" @@ -1367,8 +1367,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if 1.0.0", + "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1429,18 +1431,19 @@ dependencies = [ [[package]] name = "grass" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b89786a806d5b192cf4e573f9831c847a455a142d000c922bdfc1e5edad14303" +checksum = "a46def7216d331efa51a6aa796ef777bfdfe9605378382827a553344b7e5eefc" dependencies = [ + "getrandom 0.2.15", "grass_compiler", ] [[package]] name = "grass_compiler" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf7d155dd7cef20195016d01005033a5521aad307033f0f8e8bf0a02f5f7554" +checksum = "f39216c1843182f78541276fec96f88406861f16aa19cc9f8add70f8e67b7577" dependencies = [ "codemap", "indexmap 2.2.6", @@ -1536,7 +1539,7 @@ dependencies = [ "markup5ever", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -1782,9 +1785,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.38.0" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3eab73f58e59ca6526037208f0e98851159ec1633cf17b6cd2e1f2c3fd5d53cc" +checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5" dependencies = [ "console 0.15.8", "lazy_static", @@ -1800,7 +1803,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -1983,9 +1986,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.154" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libfuzzer-sys" @@ -2042,7 +2045,7 @@ dependencies = [ "tera", "termcolor", "time", - "toml 0.8.12", + "toml 0.8.13", "unic-langid", "unicode-segmentation", "url", @@ -2062,9 +2065,9 @@ dependencies = [ [[package]] name = "lightningcss" -version = "1.0.0-alpha.55" +version = "1.0.0-alpha.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bd5bed3814fb631bfc1e24c2be6f7e86a9837c660909acab79a38374dcb8798" +checksum = "668e9f1774a4dda9e2233ad0f78c6987878bcf4201d2085bc3517a7f84d0ee92" dependencies = [ "ahash 0.8.11", "bitflags 2.5.0", @@ -2074,6 +2077,7 @@ dependencies = [ "dashmap", "data-encoding", "getrandom 0.2.15", + "indexmap 2.2.6", "itertools 0.10.5", "lazy_static", "parcel_selectors", @@ -2267,9 +2271,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "lock_api" @@ -2443,9 +2447,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ "adler", "simd-adler32", @@ -2694,7 +2698,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -2790,9 +2794,9 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" [[package]] name = "open" -version = "5.1.2" +version = "5.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "449f0ff855d85ddbf1edd5b646d65249ead3f5e422aaa86b7d2d0b049b103e32" +checksum = "2eb49fbd5616580e9974662cb96a3463da4476e649a7e4b258df0de065db0657" dependencies = [ "is-wsl", "libc", @@ -2822,7 +2826,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -2857,9 +2861,9 @@ checksum = "7f222829ae9293e33a9f5e9f440c6760a3d450a64affe1846486b140db81c1f4" [[package]] name = "parcel_selectors" -version = "0.26.4" +version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d74befe2d076330d9a58bf9ca2da424568724ab278adf15fb5718253133887" +checksum = "ce9c47a67c66fee4a5a42756f9784d92941bd0ab2b653539a9e90521a44b66f0" dependencies = [ "bitflags 2.5.0", "cssparser", @@ -2985,7 +2989,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -3068,7 +3072,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -3178,9 +3182,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" dependencies = [ "unicode-ident", ] @@ -3201,7 +3205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" dependencies = [ "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -3749,6 +3753,7 @@ dependencies = [ "content", "errors", "libs", + "serde", ] [[package]] @@ -3785,22 +3790,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.201" +version = "1.0.202" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.201" +version = "1.0.202" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -3817,9 +3822,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" dependencies = [ "serde", ] @@ -4056,9 +4061,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.61" +version = "2.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" dependencies = [ "proc-macro2", "quote", @@ -4123,7 +4128,7 @@ dependencies = [ "cfg-expr", "heck 0.5.0", "pkg-config", - "toml 0.8.12", + "toml 0.8.13", "version-compare", ] @@ -4237,7 +4242,7 @@ dependencies = [ "cfg-if 1.0.0", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -4248,7 +4253,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", "test-case-core", ] @@ -4260,22 +4265,22 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.60" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.60" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] @@ -4405,9 +4410,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.12" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" dependencies = [ "serde", "serde_spanned", @@ -4417,18 +4422,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.12" +version = "0.22.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" +checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" dependencies = [ "indexmap 2.2.6", "serde", @@ -4503,18 +4508,18 @@ checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" [[package]] name = "unic-langid" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "238722e6d794ed130f91f4ea33e01fcff4f188d92337a21297892521c72df516" +checksum = "23dd9d1e72a73b25e07123a80776aae3e7b0ec461ef94f9151eed6ec88005a44" dependencies = [ "unic-langid-impl", ] [[package]] name = "unic-langid-impl" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd55a2063fdea4ef1f8633243a7b0524cbeef1905ae04c31a1c9b9775c55bc6" +checksum = "0a5422c1f65949306c99240b81de9f3f15929f5a8bfe05bb44b034cc8bf593e5" dependencies = [ "tinystr", ] @@ -4722,7 +4727,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", "wasm-bindgen-shared", ] @@ -4756,7 +4761,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5104,7 +5109,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.61", + "syn 2.0.65", ] [[package]] diff --git a/components/config/src/config/search.rs b/components/config/src/config/search.rs index 3ce6878c..e259b57b 100644 --- a/components/config/src/config/search.rs +++ b/components/config/src/config/search.rs @@ -7,6 +7,23 @@ pub enum IndexFormat { ElasticlunrJson, #[default] ElasticlunrJavascript, + FuseJson, + FuseJavascript, +} + +impl IndexFormat { + /// file extension which ought to be used for this index format. + fn extension(&self) -> &'static str { + match *self { + IndexFormat::ElasticlunrJavascript | IndexFormat::FuseJavascript => "js", + IndexFormat::ElasticlunrJson | IndexFormat::FuseJson => "json", + } + } + + /// the filename which ought to be used for this format and language `lang` + pub fn filename(&self, lang: &str) -> String { + format!("search_index.{}.{}", lang, self.extension()) + } } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] @@ -17,7 +34,7 @@ pub struct Search { /// Includes the whole content in the search index. Ok for small sites but becomes /// too big on large sites. `true` by default. pub include_content: bool, - /// Optionally truncate the content down to `n` chars. This might cut content in a word + /// Optionally truncate the content down to `n` code points. This might cut content in a word pub truncate_content_length: Option, /// Includes the description in the search index. When the site becomes too large, you can switch /// to that instead. `false` by default @@ -26,7 +43,7 @@ pub struct Search { pub include_date: bool, /// Include the path of the page in the search index. `false` by default. pub include_path: bool, - /// Foramt of the search index to be produced. Javascript by default + /// Foramt of the search index to be produced. 'elasticlunr_javascript' by default. pub index_format: IndexFormat, } diff --git a/components/search/Cargo.toml b/components/search/Cargo.toml index d2b26b52..eb12f7e5 100644 --- a/components/search/Cargo.toml +++ b/components/search/Cargo.toml @@ -8,3 +8,4 @@ errors = { path = "../errors" } content = { path = "../content" } config = { path = "../config" } libs = { path = "../libs" } +serde = { version = "1.0", features = ["derive"] } diff --git a/components/search/src/elasticlunr.rs b/components/search/src/elasticlunr.rs new file mode 100644 index 00000000..af7583ef --- /dev/null +++ b/components/search/src/elasticlunr.rs @@ -0,0 +1,236 @@ +use config::{Config, Search}; +use content::{Library, Section}; +use errors::{bail, Result}; +use libs::elasticlunr::{lang, Index, IndexBuilder}; +use libs::time::format_description::well_known::Rfc3339; +use libs::time::OffsetDateTime; + +use crate::clean_and_truncate_body; + +pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js"); + +fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder { + if search_config.include_title { + index = index.add_field("title"); + } + + if search_config.include_description { + index = index.add_field("description"); + } + + if search_config.include_date { + index = index.add_field("date") + } + + if search_config.include_path { + index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer)); + } + + if search_config.include_content { + index = index.add_field("body") + } + + index +} + +fn path_tokenizer(text: &str) -> Vec { + text.split(|c: char| c.is_whitespace() || c == '-' || c == '/') + .filter(|s| !s.is_empty()) + .map(|s| s.trim().to_lowercase()) + .collect() +} + +fn fill_index( + search_config: &Search, + title: &Option, + description: &Option, + datetime: &Option, + path: &str, + content: &str, +) -> Vec { + let mut row = vec![]; + + if search_config.include_title { + row.push(title.clone().unwrap_or_default()); + } + + if search_config.include_description { + row.push(description.clone().unwrap_or_default()); + } + + if search_config.include_date { + if let Some(date) = datetime { + if let Ok(d) = date.format(&Rfc3339) { + row.push(d); + } + } + } + + if search_config.include_path { + row.push(path.to_string()); + } + + if search_config.include_content { + row.push(clean_and_truncate_body(search_config.truncate_content_length, content)); + } + row +} + +/// Returns the generated JSON index with all the documents of the site added using +/// the language given +/// Errors if the language given is not available in Elasticlunr +/// TODO: is making `in_search_index` apply to subsections of a `false` section useful? +pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result { + let language = match lang::from_code(lang) { + Some(l) => l, + None => { + bail!("Tried to build search index for language {} which is not supported", lang); + } + }; + let language_options = &config.languages[lang]; + let mut index = IndexBuilder::with_language(language); + index = build_fields(&language_options.search, index); + let mut index = index.build(); + + for (_, section) in &library.sections { + if section.lang == lang { + add_section_to_index(&mut index, section, library, &language_options.search); + } + } + + Ok(index.to_json()) +} + +fn add_section_to_index( + index: &mut Index, + section: &Section, + library: &Library, + search_config: &Search, +) { + if !section.meta.in_search_index { + return; + } + + // Don't index redirecting sections + if section.meta.redirect_to.is_none() { + index.add_doc( + §ion.permalink, + &fill_index( + search_config, + §ion.meta.title, + §ion.meta.description, + &None, + §ion.path, + §ion.content, + ), + ); + } + + for key in §ion.pages { + let page = &library.pages[key]; + if !page.meta.in_search_index { + continue; + } + + index.add_doc( + &page.permalink, + &fill_index( + search_config, + &page.meta.title, + &page.meta.description, + &page.meta.datetime, + &page.path, + &page.content, + ), + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use config::Config; + use libs::elasticlunr::IndexBuilder; + + #[test] + fn can_build_fields() { + let mut config = Config::default(); + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "body"]); + + config.search.include_content = false; + config.search.include_description = true; + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "description"]); + + config.search.include_content = true; + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["title", "description", "body"]); + + config.search.include_title = false; + let index = build_fields(&config.search, IndexBuilder::new()).build(); + assert_eq!(index.get_fields(), vec!["description", "body"]); + } + + #[test] + fn can_fill_index_default() { + let config = Config::default(); + let title = Some("A title".to_string()); + let description = Some("A description".to_string()); + let path = "/a/page/".to_string(); + let content = "Some content".to_string(); + + let res = fill_index(&config.search, &title, &description, &None, &path, &content); + assert_eq!(res.len(), 2); + assert_eq!(res[0], title.unwrap()); + assert_eq!(res[1], content); + } + + #[test] + fn can_fill_index_description() { + let mut config = Config::default(); + config.search.include_description = true; + let title = Some("A title".to_string()); + let description = Some("A description".to_string()); + let path = "/a/page/".to_string(); + let content = "Some content".to_string(); + + let res = fill_index(&config.search, &title, &description, &None, &path, &content); + assert_eq!(res.len(), 3); + assert_eq!(res[0], title.unwrap()); + assert_eq!(res[1], description.unwrap()); + assert_eq!(res[2], content); + } + + #[test] + fn can_fill_index_truncated_content() { + let mut config = Config::default(); + config.search.truncate_content_length = Some(5); + let title = Some("A title".to_string()); + let description = Some("A description".to_string()); + let path = "/a/page/".to_string(); + let content = "Some content".to_string(); + + let res = fill_index(&config.search, &title, &description, &None, &path, &content); + assert_eq!(res.len(), 2); + assert_eq!(res[0], title.unwrap()); + assert_eq!(res[1], content[..5]); + } + + #[test] + fn can_fill_index_date() { + let mut config = Config::default(); + config.search.include_date = true; + let title = Some("A title".to_string()); + let description = Some("A description".to_string()); + let path = "/a/page/".to_string(); + let content = "Some content".to_string(); + let datetime = Some(OffsetDateTime::parse("2023-01-31T00:00:00Z", &Rfc3339).unwrap()); + + let res = fill_index(&config.search, &title, &description, &datetime, &path, &content); + assert_eq!(res.len(), 3); + assert_eq!(res[0], title.unwrap()); + assert_eq!(res[1], "2023-01-31T00:00:00Z"); + assert_eq!(res[2], content); + } +} diff --git a/components/search/src/fuse.rs b/components/search/src/fuse.rs new file mode 100644 index 00000000..604a0d9f --- /dev/null +++ b/components/search/src/fuse.rs @@ -0,0 +1,76 @@ +use config::Search; +use content::Library; +use errors::Result; +use libs::serde_json; + +use crate::clean_and_truncate_body; + +/// build index in Fuse.js format. +pub fn build_index(lang: &str, library: &Library, config: &Search) -> Result { + #[derive(serde::Serialize)] + struct Item<'a> { + url: &'a str, + title: Option<&'a str>, + description: Option<&'a str>, + body: Option, // AMMONIA.clean has to allocate anyway + path: Option<&'a str>, + } + let mut items: Vec = Vec::new(); + for (_, section) in &library.sections { + if section.lang == lang + && section.meta.redirect_to.is_none() + && section.meta.in_search_index + { + items.push(Item { + url: §ion.permalink, + title: match config.include_title { + true => Some(§ion.meta.title.as_deref().unwrap_or_default()), + false => None, + }, + description: match config.include_description { + true => Some(§ion.meta.description.as_deref().unwrap_or_default()), + false => None, + }, + body: match config.include_content { + true => Some(clean_and_truncate_body( + config.truncate_content_length, + §ion.content, + )), + false => None, + }, + path: match config.include_path { + true => Some(§ion.path), + false => None, + }, + }); + for page in §ion.pages { + let page = &library.pages[page]; + if page.meta.in_search_index { + items.push(Item { + url: &page.permalink, + title: match config.include_title { + true => Some(&page.meta.title.as_deref().unwrap_or_default()), + false => None, + }, + description: match config.include_description { + true => Some(&page.meta.description.as_deref().unwrap_or_default()), + false => None, + }, + body: match config.include_content { + true => Some(super::clean_and_truncate_body( + config.truncate_content_length, + &page.content, + )), + false => None, + }, + path: match config.include_path { + true => Some(&page.path), + false => None, + }, + }) + } + } + } + } + Ok(serde_json::to_string(&items)?) +} diff --git a/components/search/src/lib.rs b/components/search/src/lib.rs index 51d25579..cf2908bb 100644 --- a/components/search/src/lib.rs +++ b/components/search/src/lib.rs @@ -1,16 +1,12 @@ -use std::collections::{HashMap, HashSet}; +mod elasticlunr; +mod fuse; use libs::ammonia; -use libs::elasticlunr::{lang, Index, IndexBuilder}; use libs::once_cell::sync::Lazy; -use libs::time::format_description::well_known::Rfc3339; -use libs::time::OffsetDateTime; +use std::collections::{HashMap, HashSet}; -use config::{Config, Search}; -use content::{Library, Section}; -use errors::{bail, Result}; - -pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js"); +pub use elasticlunr::{build_index as build_elasticlunr, ELASTICLUNR_JS}; +pub use fuse::build_index as build_fuse; static AMMONIA: Lazy> = Lazy::new(|| { let mut clean_content = HashSet::new(); @@ -28,238 +24,25 @@ static AMMONIA: Lazy> = Lazy::new(|| { builder }); -fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder { - if search_config.include_title { - index = index.add_field("title"); - } - - if search_config.include_description { - index = index.add_field("description"); - } - - if search_config.include_date { - index = index.add_field("date") - } - - if search_config.include_path { - index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer)); - } - - if search_config.include_content { - index = index.add_field("body") - } - - index -} - -fn path_tokenizer(text: &str) -> Vec { - text.split(|c: char| c.is_whitespace() || c == '-' || c == '/') - .filter(|s| !s.is_empty()) - .map(|s| s.trim().to_lowercase()) - .collect() -} - -fn fill_index( - search_config: &Search, - title: &Option, - description: &Option, - datetime: &Option, - path: &str, - content: &str, -) -> Vec { - let mut row = vec![]; - - if search_config.include_title { - row.push(title.clone().unwrap_or_default()); - } - - if search_config.include_description { - row.push(description.clone().unwrap_or_default()); - } - - if search_config.include_date { - if let Some(date) = datetime { - if let Ok(d) = date.format(&Rfc3339) { - row.push(d); - } - } - } - - if search_config.include_path { - row.push(path.to_string()); - } - - if search_config.include_content { - let body = AMMONIA.clean(content).to_string(); - if let Some(truncate_len) = search_config.truncate_content_length { - // Not great for unicode - // TODO: fix it like the truncate in Tera - match body.char_indices().nth(truncate_len) { - None => row.push(body), - Some((idx, _)) => row.push((body[..idx]).to_string()), - }; - } else { - row.push(body); - }; - } - row -} - -/// Returns the generated JSON index with all the documents of the site added using -/// the language given -/// Errors if the language given is not available in Elasticlunr -/// TODO: is making `in_search_index` apply to subsections of a `false` section useful? -pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result { - let language = match lang::from_code(lang) { - Some(l) => l, - None => { - bail!("Tried to build search index for language {} which is not supported", lang); - } - }; - let language_options = &config.languages[lang]; - let mut index = IndexBuilder::with_language(language); - index = build_fields(&language_options.search, index); - let mut index = index.build(); - - for (_, section) in &library.sections { - if section.lang == lang { - add_section_to_index(&mut index, section, library, &language_options.search); - } - } - - Ok(index.to_json()) -} - -fn add_section_to_index( - index: &mut Index, - section: &Section, - library: &Library, - search_config: &Search, -) { - if !section.meta.in_search_index { - return; - } - - // Don't index redirecting sections - if section.meta.redirect_to.is_none() { - index.add_doc( - §ion.permalink, - &fill_index( - search_config, - §ion.meta.title, - §ion.meta.description, - &None, - §ion.path, - §ion.content, - ), - ); - } - - for key in §ion.pages { - let page = &library.pages[key]; - if !page.meta.in_search_index { - continue; - } - - index.add_doc( - &page.permalink, - &fill_index( - search_config, - &page.meta.title, - &page.meta.description, - &page.meta.datetime, - &page.path, - &page.content, - ), - ); +/// uses ammonia to clean the body, and truncates it to `truncate_content_length` +pub fn clean_and_truncate_body(truncate_content_length: Option, body: &str) -> String { + let mut clean = AMMONIA.clean(body).to_string(); + if let Some(new_len) = truncate_content_length { + clean.truncate(clean.char_indices().nth(new_len).map(|(i, _)| i).unwrap_or(clean.len())) } + clean } #[cfg(test)] -mod tests { - use super::*; - - use config::Config; - - #[test] - fn can_build_fields() { - let mut config = Config::default(); - let index = build_fields(&config.search, IndexBuilder::new()).build(); - assert_eq!(index.get_fields(), vec!["title", "body"]); - - config.search.include_content = false; - config.search.include_description = true; - let index = build_fields(&config.search, IndexBuilder::new()).build(); - assert_eq!(index.get_fields(), vec!["title", "description"]); - - config.search.include_content = true; - let index = build_fields(&config.search, IndexBuilder::new()).build(); - assert_eq!(index.get_fields(), vec!["title", "description", "body"]); - - config.search.include_title = false; - let index = build_fields(&config.search, IndexBuilder::new()).build(); - assert_eq!(index.get_fields(), vec!["description", "body"]); - } - - #[test] - fn can_fill_index_default() { - let config = Config::default(); - let title = Some("A title".to_string()); - let description = Some("A description".to_string()); - let path = "/a/page/".to_string(); - let content = "Some content".to_string(); - - let res = fill_index(&config.search, &title, &description, &None, &path, &content); - assert_eq!(res.len(), 2); - assert_eq!(res[0], title.unwrap()); - assert_eq!(res[1], content); - } - - #[test] - fn can_fill_index_description() { - let mut config = Config::default(); - config.search.include_description = true; - let title = Some("A title".to_string()); - let description = Some("A description".to_string()); - let path = "/a/page/".to_string(); - let content = "Some content".to_string(); - - let res = fill_index(&config.search, &title, &description, &None, &path, &content); - assert_eq!(res.len(), 3); - assert_eq!(res[0], title.unwrap()); - assert_eq!(res[1], description.unwrap()); - assert_eq!(res[2], content); - } - - #[test] - fn can_fill_index_truncated_content() { - let mut config = Config::default(); - config.search.truncate_content_length = Some(5); - let title = Some("A title".to_string()); - let description = Some("A description".to_string()); - let path = "/a/page/".to_string(); - let content = "Some content".to_string(); - - let res = fill_index(&config.search, &title, &description, &None, &path, &content); - assert_eq!(res.len(), 2); - assert_eq!(res[0], title.unwrap()); - assert_eq!(res[1], content[..5]); - } - - #[test] - fn can_fill_index_date() { - let mut config = Config::default(); - config.search.include_date = true; - let title = Some("A title".to_string()); - let description = Some("A description".to_string()); - let path = "/a/page/".to_string(); - let content = "Some content".to_string(); - let datetime = Some(OffsetDateTime::parse("2023-01-31T00:00:00Z", &Rfc3339).unwrap()); - - let res = fill_index(&config.search, &title, &description, &datetime, &path, &content); - assert_eq!(res.len(), 3); - assert_eq!(res[0], title.unwrap()); - assert_eq!(res[1], "2023-01-31T00:00:00Z"); - assert_eq!(res[2], content); - } +#[test] +fn clean_and_truncate_body_test() { + assert_eq!(clean_and_truncate_body(None, "hello world"), "hello world"); + assert_eq!( + clean_and_truncate_body(None, "hello world"), + "hello world" + ); + assert_eq!(clean_and_truncate_body(Some(100), "hello"), "hello"); + assert_eq!(clean_and_truncate_body(Some(2), "hello"), "he"); + assert_eq!(clean_and_truncate_body(Some(6), "hello \u{202E} world"), "hello "); + assert_eq!(clean_and_truncate_body(Some(7), "hello \u{202E} world"), "hello \u{202e}"); } diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index a84ca526..2d3d578c 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -799,19 +799,26 @@ impl Site { } fn index_for_lang(&self, lang: &str) -> Result<()> { - let index_json = search::build_index(lang, &self.library.read().unwrap(), &self.config)?; - let (path, content) = match &self.config.search.index_format { - IndexFormat::ElasticlunrJson => { - let path = self.output_path.join(format!("search_index.{}.json", lang)); - (path, index_json) + let path = &self.output_path.join(self.config.search.index_format.filename(lang)); + let library = self.library.read().unwrap(); + let content = match &self.config.search.index_format { + IndexFormat::ElasticlunrJavascript | IndexFormat::ElasticlunrJson => { + search::build_elasticlunr(lang, &library, &self.config)? } - IndexFormat::ElasticlunrJavascript => { - let path = self.output_path.join(format!("search_index.{}.js", lang)); - let content = format!("window.searchIndex = {};", index_json); - (path, content) + IndexFormat::FuseJson | IndexFormat::FuseJavascript => { + search::build_fuse(lang, &library, &self.config.search)? } }; - create_file(&path, &content) + drop(library); // no need to hold on to this guard while writing + create_file( + path, + match self.config.search.index_format { + IndexFormat::ElasticlunrJson | IndexFormat::FuseJson => content, + IndexFormat::ElasticlunrJavascript | IndexFormat::FuseJavascript => { + format!("window.searchIndex = {}", content) + } + }, + ) } pub fn build_search_index(&self) -> Result<()> { @@ -827,8 +834,13 @@ impl Site { } } - // then elasticlunr.min.js - create_file(&self.output_path.join("elasticlunr.min.js"), search::ELASTICLUNR_JS)?; + match self.config.search.index_format { + IndexFormat::ElasticlunrJavascript | IndexFormat::ElasticlunrJson => { + // then elasticlunr.min.js + create_file(&self.output_path.join("elasticlunr.min.js"), search::ELASTICLUNR_JS)?; + } + _ => {} + } Ok(()) } diff --git a/components/utils/src/fs.rs b/components/utils/src/fs.rs index 2b155706..4aa13994 100644 --- a/components/utils/src/fs.rs +++ b/components/utils/src/fs.rs @@ -28,11 +28,12 @@ fn create_parent(path: &Path) -> Result<()> { } /// Create a file with the content given -pub fn create_file(path: &Path, content: &str) -> Result<()> { +/// `content`` can be `&str`, `String`, or `&String` (and probably others) +pub fn create_file(path: &Path, content: impl AsRef) -> Result<()> { create_parent(path)?; let mut file = File::create(path).with_context(|| format!("Failed to create file {}", path.display()))?; - file.write_all(content.as_bytes())?; + file.write_all(content.as_ref().as_bytes())?; Ok(()) } diff --git a/docs/content/documentation/content/search.md b/docs/content/documentation/content/search.md index c57c1fc1..b66aa06f 100644 --- a/docs/content/documentation/content/search.md +++ b/docs/content/documentation/content/search.md @@ -4,7 +4,7 @@ weight = 100 +++ Zola can build a search index from the sections and pages content to -be used by a JavaScript library such as [elasticlunr](http://elasticlunr.com/). +be used by a JavaScript library such as [elasticlunr](http://elasticlunr.com/) or [fuse](https://www.fusejs.io). To enable it, you only need to set `build_search_index = true` in your `config.toml` and Zola will generate an index for the `default_language` set for all pages not excluded from the search index. @@ -12,21 +12,36 @@ generate an index for the `default_language` set for all pages not excluded from It is very important to set the `default_language` in your `config.toml` if you are writing a site not in English; the index building pipelines are very different depending on the language. -After `zola build` or `zola serve`, you should see two files in your public directory: - -- `search_index.${default_language}.js`: so `search_index.en.js` for a default setup -- `elasticlunr.min.js` - -If you set `index_format = "elasticlunr_json"` in your `config.toml`, a `search_index.${default_language}.json` is generated -instead of the default `search_index.${default_language}.js`. - As each site will be different, Zola makes no assumptions about your search function and doesn't provide the JavaScript/CSS code to do an actual search and display results. You can look at how this site -implements it to get an idea: [search.js](https://github.com/getzola/zola/tree/master/docs/static/search.js). +implements it (using elasticlunr) to get an idea: [search.js](https://github.com/getzola/zola/tree/master/docs/static/search.js). -If you are using a language other than English, you will also need to include the corresponding JavaScript stemmer file. -See for details. ## Configuring the search index In some cases, the default indexing strategy is not suitable. You can customize which fields to include and whether to truncate the content in the [search configuration](@/documentation/getting-started/configuration.md). + +## Index Formats + +### Elasticlunr + +Compatible with [elasticlunr](http://elasticlunr.com/). Also produces `elasticlunr.min.js`. + +```toml +# config.toml +[search] +index_format = "elasticlunr_javascript" # or "elasticlunr_json" +``` + +If you are using a language other than English, you will also need to include the corresponding JavaScript stemmer file. +See for details. + +### Fuse + +Compatible with [fuse.js](https://www.fusejs.io/) and [tinysearch](https://github.com/tinysearch/tinysearch). + +```toml +# config.toml +[search] +index_format = "fuse_javascript" # or "fuse_json" +``` diff --git a/docs/content/documentation/getting-started/configuration.md b/docs/content/documentation/getting-started/configuration.md index dce920ce..1130cecb 100644 --- a/docs/content/documentation/getting-started/configuration.md +++ b/docs/content/documentation/getting-started/configuration.md @@ -174,16 +174,18 @@ include_title = true include_description = false # Whether to include the RFC3339 datetime of the page in the search index include_date = false -# Whether to include the path of the page/section in the index +# Whether to include the path of the page/section in the index (the permalink is always included) include_path = false # Whether to include the rendered content of the page/section in the index include_content = true -# At which character to truncate the content to. Useful if you have a lot of pages and the index would +# At which code point to truncate the content to. Useful if you have a lot of pages and the index would # become too big to load on the site. Defaults to not being set. # truncate_content_length = 100 # Wether to produce the search index as a javascript file or as a JSON file -# Accepted value "elasticlunr_javascript" or "elasticlunr_json" +# Accepted values: +# - "elasticlunr_javascript", "elasticlunr_json" +# - "fuse_javascript", "fuse_json" index_format = "elasticlunr_javascript" # Optional translation object for the default language