Add support for Fuse.js search format (#2507)

* inital "just barely works" Fuse.js support

* implement FuseJavascript; refactor index_for_lang

* support search config

* move fuse index building to it's own file

* update doc of Search.index_format

* update config docs

* update search documentation

* use &str where possible

* use libs::serde_json

remmeber to commit Cargo.lock

* move extension logic to IndexFormat

* move the entire filename logic inside IndexFormat

* move elasticlunr to it's own module

* only create elasticlunr.min.js if we're actually using elasticlunr

* move ELASTICLUNR_JS to elasticlunr.js

* hide the details of search's submodules

* optionally include path

* explain include_path better

* remove references to stork

* replace if with match

* support include_description

* specify "permalink"

* move body cleaning and truncation to a function

* update truncate_content_length docs to specify *code points*
This commit is contained in:
Connor K 2024-05-31 06:29:26 -04:00 committed by Vincent Prouillet
parent 0d0036e14a
commit 26f6677bfb
10 changed files with 496 additions and 348 deletions

161
Cargo.lock generated
View File

@ -150,9 +150,9 @@ checksum = "70033777eb8b5124a81a1889416543dddef2de240019b674c81285a2635a7e1e"
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.83" version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]] [[package]]
name = "arbitrary" name = "arbitrary"
@ -168,7 +168,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -402,9 +402,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.15.0" version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
@ -436,9 +436,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]] [[package]]
name = "camino" name = "camino"
version = "1.1.6" version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -467,9 +467,9 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.97" version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
@ -618,7 +618,7 @@ dependencies = [
"heck 0.5.0", "heck 0.5.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -765,9 +765,9 @@ dependencies = [
[[package]] [[package]]
name = "crc32fast" name = "crc32fast"
version = "1.4.0" version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
] ]
@ -802,9 +802,9 @@ dependencies = [
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.19" version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]] [[package]]
name = "crunchy" name = "crunchy"
@ -851,7 +851,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [ dependencies = [
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -924,9 +924,9 @@ dependencies = [
[[package]] [[package]]
name = "deunicode" name = "deunicode"
version = "1.4.4" version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "322ef0094744e63628e6f0eb2295517f79276a5b342a4c2ff3042566ca181d4e" checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00"
[[package]] [[package]]
name = "digest" name = "digest"
@ -955,7 +955,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -981,9 +981,9 @@ dependencies = [
[[package]] [[package]]
name = "either" name = "either"
version = "1.11.0" version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
[[package]] [[package]]
name = "elasticlunr-rs" name = "elasticlunr-rs"
@ -1367,8 +1367,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"js-sys",
"libc", "libc",
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
] ]
[[package]] [[package]]
@ -1429,18 +1431,19 @@ dependencies = [
[[package]] [[package]]
name = "grass" name = "grass"
version = "0.13.2" version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b89786a806d5b192cf4e573f9831c847a455a142d000c922bdfc1e5edad14303" checksum = "a46def7216d331efa51a6aa796ef777bfdfe9605378382827a553344b7e5eefc"
dependencies = [ dependencies = [
"getrandom 0.2.15",
"grass_compiler", "grass_compiler",
] ]
[[package]] [[package]]
name = "grass_compiler" name = "grass_compiler"
version = "0.13.2" version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cf7d155dd7cef20195016d01005033a5521aad307033f0f8e8bf0a02f5f7554" checksum = "f39216c1843182f78541276fec96f88406861f16aa19cc9f8add70f8e67b7577"
dependencies = [ dependencies = [
"codemap", "codemap",
"indexmap 2.2.6", "indexmap 2.2.6",
@ -1536,7 +1539,7 @@ dependencies = [
"markup5ever", "markup5ever",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -1782,9 +1785,9 @@ dependencies = [
[[package]] [[package]]
name = "insta" name = "insta"
version = "1.38.0" version = "1.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3eab73f58e59ca6526037208f0e98851159ec1633cf17b6cd2e1f2c3fd5d53cc" checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5"
dependencies = [ dependencies = [
"console 0.15.8", "console 0.15.8",
"lazy_static", "lazy_static",
@ -1800,7 +1803,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -1983,9 +1986,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.154" version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]] [[package]]
name = "libfuzzer-sys" name = "libfuzzer-sys"
@ -2042,7 +2045,7 @@ dependencies = [
"tera", "tera",
"termcolor", "termcolor",
"time", "time",
"toml 0.8.12", "toml 0.8.13",
"unic-langid", "unic-langid",
"unicode-segmentation", "unicode-segmentation",
"url", "url",
@ -2062,9 +2065,9 @@ dependencies = [
[[package]] [[package]]
name = "lightningcss" name = "lightningcss"
version = "1.0.0-alpha.55" version = "1.0.0-alpha.56"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bd5bed3814fb631bfc1e24c2be6f7e86a9837c660909acab79a38374dcb8798" checksum = "668e9f1774a4dda9e2233ad0f78c6987878bcf4201d2085bc3517a7f84d0ee92"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.5.0", "bitflags 2.5.0",
@ -2074,6 +2077,7 @@ dependencies = [
"dashmap", "dashmap",
"data-encoding", "data-encoding",
"getrandom 0.2.15", "getrandom 0.2.15",
"indexmap 2.2.6",
"itertools 0.10.5", "itertools 0.10.5",
"lazy_static", "lazy_static",
"parcel_selectors", "parcel_selectors",
@ -2267,9 +2271,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.4.13" version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@ -2443,9 +2447,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.7.2" version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
dependencies = [ dependencies = [
"adler", "adler",
"simd-adler32", "simd-adler32",
@ -2694,7 +2698,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -2790,9 +2794,9 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
[[package]] [[package]]
name = "open" name = "open"
version = "5.1.2" version = "5.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "449f0ff855d85ddbf1edd5b646d65249ead3f5e422aaa86b7d2d0b049b103e32" checksum = "2eb49fbd5616580e9974662cb96a3463da4476e649a7e4b258df0de065db0657"
dependencies = [ dependencies = [
"is-wsl", "is-wsl",
"libc", "libc",
@ -2822,7 +2826,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -2857,9 +2861,9 @@ checksum = "7f222829ae9293e33a9f5e9f440c6760a3d450a64affe1846486b140db81c1f4"
[[package]] [[package]]
name = "parcel_selectors" name = "parcel_selectors"
version = "0.26.4" version = "0.26.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05d74befe2d076330d9a58bf9ca2da424568724ab278adf15fb5718253133887" checksum = "ce9c47a67c66fee4a5a42756f9784d92941bd0ab2b653539a9e90521a44b66f0"
dependencies = [ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"cssparser", "cssparser",
@ -2985,7 +2989,7 @@ dependencies = [
"pest_meta", "pest_meta",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -3068,7 +3072,7 @@ dependencies = [
"phf_shared 0.11.2", "phf_shared 0.11.2",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -3178,9 +3182,9 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.82" version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
@ -3201,7 +3205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
dependencies = [ dependencies = [
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -3749,6 +3753,7 @@ dependencies = [
"content", "content",
"errors", "errors",
"libs", "libs",
"serde",
] ]
[[package]] [[package]]
@ -3785,22 +3790,22 @@ dependencies = [
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.201" version = "1.0.202"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.201" version = "1.0.202"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -3817,9 +3822,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_spanned" name = "serde_spanned"
version = "0.6.5" version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -4056,9 +4061,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.61" version = "2.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c993ed8ccba56ae856363b1845da7266a7cb78e1d146c8a32d54b45a8b831fc9" checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -4123,7 +4128,7 @@ dependencies = [
"cfg-expr", "cfg-expr",
"heck 0.5.0", "heck 0.5.0",
"pkg-config", "pkg-config",
"toml 0.8.12", "toml 0.8.13",
"version-compare", "version-compare",
] ]
@ -4237,7 +4242,7 @@ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -4248,7 +4253,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
"test-case-core", "test-case-core",
] ]
@ -4260,22 +4265,22 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.60" version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.60" version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]
@ -4405,9 +4410,9 @@ dependencies = [
[[package]] [[package]]
name = "toml" name = "toml"
version = "0.8.12" version = "0.8.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba"
dependencies = [ dependencies = [
"serde", "serde",
"serde_spanned", "serde_spanned",
@ -4417,18 +4422,18 @@ dependencies = [
[[package]] [[package]]
name = "toml_datetime" name = "toml_datetime"
version = "0.6.5" version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
dependencies = [ dependencies = [
"serde", "serde",
] ]
[[package]] [[package]]
name = "toml_edit" name = "toml_edit"
version = "0.22.12" version = "0.22.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c"
dependencies = [ dependencies = [
"indexmap 2.2.6", "indexmap 2.2.6",
"serde", "serde",
@ -4503,18 +4508,18 @@ checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]] [[package]]
name = "unic-langid" name = "unic-langid"
version = "0.9.4" version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "238722e6d794ed130f91f4ea33e01fcff4f188d92337a21297892521c72df516" checksum = "23dd9d1e72a73b25e07123a80776aae3e7b0ec461ef94f9151eed6ec88005a44"
dependencies = [ dependencies = [
"unic-langid-impl", "unic-langid-impl",
] ]
[[package]] [[package]]
name = "unic-langid-impl" name = "unic-langid-impl"
version = "0.9.4" version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bd55a2063fdea4ef1f8633243a7b0524cbeef1905ae04c31a1c9b9775c55bc6" checksum = "0a5422c1f65949306c99240b81de9f3f15929f5a8bfe05bb44b034cc8bf593e5"
dependencies = [ dependencies = [
"tinystr", "tinystr",
] ]
@ -4722,7 +4727,7 @@ dependencies = [
"once_cell", "once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@ -4756,7 +4761,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@ -5104,7 +5109,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.61", "syn 2.0.65",
] ]
[[package]] [[package]]

View File

@ -7,6 +7,23 @@ pub enum IndexFormat {
ElasticlunrJson, ElasticlunrJson,
#[default] #[default]
ElasticlunrJavascript, ElasticlunrJavascript,
FuseJson,
FuseJavascript,
}
impl IndexFormat {
/// file extension which ought to be used for this index format.
fn extension(&self) -> &'static str {
match *self {
IndexFormat::ElasticlunrJavascript | IndexFormat::FuseJavascript => "js",
IndexFormat::ElasticlunrJson | IndexFormat::FuseJson => "json",
}
}
/// the filename which ought to be used for this format and language `lang`
pub fn filename(&self, lang: &str) -> String {
format!("search_index.{}.{}", lang, self.extension())
}
} }
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
@ -17,7 +34,7 @@ pub struct Search {
/// Includes the whole content in the search index. Ok for small sites but becomes /// Includes the whole content in the search index. Ok for small sites but becomes
/// too big on large sites. `true` by default. /// too big on large sites. `true` by default.
pub include_content: bool, pub include_content: bool,
/// Optionally truncate the content down to `n` chars. This might cut content in a word /// Optionally truncate the content down to `n` code points. This might cut content in a word
pub truncate_content_length: Option<usize>, pub truncate_content_length: Option<usize>,
/// Includes the description in the search index. When the site becomes too large, you can switch /// Includes the description in the search index. When the site becomes too large, you can switch
/// to that instead. `false` by default /// to that instead. `false` by default
@ -26,7 +43,7 @@ pub struct Search {
pub include_date: bool, pub include_date: bool,
/// Include the path of the page in the search index. `false` by default. /// Include the path of the page in the search index. `false` by default.
pub include_path: bool, pub include_path: bool,
/// Foramt of the search index to be produced. Javascript by default /// Foramt of the search index to be produced. 'elasticlunr_javascript' by default.
pub index_format: IndexFormat, pub index_format: IndexFormat,
} }

View File

@ -8,3 +8,4 @@ errors = { path = "../errors" }
content = { path = "../content" } content = { path = "../content" }
config = { path = "../config" } config = { path = "../config" }
libs = { path = "../libs" } libs = { path = "../libs" }
serde = { version = "1.0", features = ["derive"] }

View File

@ -0,0 +1,236 @@
use config::{Config, Search};
use content::{Library, Section};
use errors::{bail, Result};
use libs::elasticlunr::{lang, Index, IndexBuilder};
use libs::time::format_description::well_known::Rfc3339;
use libs::time::OffsetDateTime;
use crate::clean_and_truncate_body;
pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js");
fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder {
if search_config.include_title {
index = index.add_field("title");
}
if search_config.include_description {
index = index.add_field("description");
}
if search_config.include_date {
index = index.add_field("date")
}
if search_config.include_path {
index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer));
}
if search_config.include_content {
index = index.add_field("body")
}
index
}
fn path_tokenizer(text: &str) -> Vec<String> {
text.split(|c: char| c.is_whitespace() || c == '-' || c == '/')
.filter(|s| !s.is_empty())
.map(|s| s.trim().to_lowercase())
.collect()
}
fn fill_index(
search_config: &Search,
title: &Option<String>,
description: &Option<String>,
datetime: &Option<OffsetDateTime>,
path: &str,
content: &str,
) -> Vec<String> {
let mut row = vec![];
if search_config.include_title {
row.push(title.clone().unwrap_or_default());
}
if search_config.include_description {
row.push(description.clone().unwrap_or_default());
}
if search_config.include_date {
if let Some(date) = datetime {
if let Ok(d) = date.format(&Rfc3339) {
row.push(d);
}
}
}
if search_config.include_path {
row.push(path.to_string());
}
if search_config.include_content {
row.push(clean_and_truncate_body(search_config.truncate_content_length, content));
}
row
}
/// Returns the generated JSON index with all the documents of the site added using
/// the language given
/// Errors if the language given is not available in Elasticlunr
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful?
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> {
let language = match lang::from_code(lang) {
Some(l) => l,
None => {
bail!("Tried to build search index for language {} which is not supported", lang);
}
};
let language_options = &config.languages[lang];
let mut index = IndexBuilder::with_language(language);
index = build_fields(&language_options.search, index);
let mut index = index.build();
for (_, section) in &library.sections {
if section.lang == lang {
add_section_to_index(&mut index, section, library, &language_options.search);
}
}
Ok(index.to_json())
}
fn add_section_to_index(
index: &mut Index,
section: &Section,
library: &Library,
search_config: &Search,
) {
if !section.meta.in_search_index {
return;
}
// Don't index redirecting sections
if section.meta.redirect_to.is_none() {
index.add_doc(
&section.permalink,
&fill_index(
search_config,
&section.meta.title,
&section.meta.description,
&None,
&section.path,
&section.content,
),
);
}
for key in &section.pages {
let page = &library.pages[key];
if !page.meta.in_search_index {
continue;
}
index.add_doc(
&page.permalink,
&fill_index(
search_config,
&page.meta.title,
&page.meta.description,
&page.meta.datetime,
&page.path,
&page.content,
),
);
}
}
#[cfg(test)]
mod tests {
use super::*;
use config::Config;
use libs::elasticlunr::IndexBuilder;
#[test]
fn can_build_fields() {
let mut config = Config::default();
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "body"]);
config.search.include_content = false;
config.search.include_description = true;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description"]);
config.search.include_content = true;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description", "body"]);
config.search.include_title = false;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["description", "body"]);
}
#[test]
fn can_fill_index_default() {
let config = Config::default();
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content);
}
#[test]
fn can_fill_index_description() {
let mut config = Config::default();
config.search.include_description = true;
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 3);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], description.unwrap());
assert_eq!(res[2], content);
}
#[test]
fn can_fill_index_truncated_content() {
let mut config = Config::default();
config.search.truncate_content_length = Some(5);
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content[..5]);
}
#[test]
fn can_fill_index_date() {
let mut config = Config::default();
config.search.include_date = true;
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let datetime = Some(OffsetDateTime::parse("2023-01-31T00:00:00Z", &Rfc3339).unwrap());
let res = fill_index(&config.search, &title, &description, &datetime, &path, &content);
assert_eq!(res.len(), 3);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], "2023-01-31T00:00:00Z");
assert_eq!(res[2], content);
}
}

View File

@ -0,0 +1,76 @@
use config::Search;
use content::Library;
use errors::Result;
use libs::serde_json;
use crate::clean_and_truncate_body;
/// build index in Fuse.js format.
pub fn build_index(lang: &str, library: &Library, config: &Search) -> Result<String> {
#[derive(serde::Serialize)]
struct Item<'a> {
url: &'a str,
title: Option<&'a str>,
description: Option<&'a str>,
body: Option<String>, // AMMONIA.clean has to allocate anyway
path: Option<&'a str>,
}
let mut items: Vec<Item> = Vec::new();
for (_, section) in &library.sections {
if section.lang == lang
&& section.meta.redirect_to.is_none()
&& section.meta.in_search_index
{
items.push(Item {
url: &section.permalink,
title: match config.include_title {
true => Some(&section.meta.title.as_deref().unwrap_or_default()),
false => None,
},
description: match config.include_description {
true => Some(&section.meta.description.as_deref().unwrap_or_default()),
false => None,
},
body: match config.include_content {
true => Some(clean_and_truncate_body(
config.truncate_content_length,
&section.content,
)),
false => None,
},
path: match config.include_path {
true => Some(&section.path),
false => None,
},
});
for page in &section.pages {
let page = &library.pages[page];
if page.meta.in_search_index {
items.push(Item {
url: &page.permalink,
title: match config.include_title {
true => Some(&page.meta.title.as_deref().unwrap_or_default()),
false => None,
},
description: match config.include_description {
true => Some(&page.meta.description.as_deref().unwrap_or_default()),
false => None,
},
body: match config.include_content {
true => Some(super::clean_and_truncate_body(
config.truncate_content_length,
&page.content,
)),
false => None,
},
path: match config.include_path {
true => Some(&page.path),
false => None,
},
})
}
}
}
}
Ok(serde_json::to_string(&items)?)
}

View File

@ -1,16 +1,12 @@
use std::collections::{HashMap, HashSet}; mod elasticlunr;
mod fuse;
use libs::ammonia; use libs::ammonia;
use libs::elasticlunr::{lang, Index, IndexBuilder};
use libs::once_cell::sync::Lazy; use libs::once_cell::sync::Lazy;
use libs::time::format_description::well_known::Rfc3339; use std::collections::{HashMap, HashSet};
use libs::time::OffsetDateTime;
use config::{Config, Search}; pub use elasticlunr::{build_index as build_elasticlunr, ELASTICLUNR_JS};
use content::{Library, Section}; pub use fuse::build_index as build_fuse;
use errors::{bail, Result};
pub const ELASTICLUNR_JS: &str = include_str!("elasticlunr.min.js");
static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| { static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
let mut clean_content = HashSet::new(); let mut clean_content = HashSet::new();
@ -28,238 +24,25 @@ static AMMONIA: Lazy<ammonia::Builder<'static>> = Lazy::new(|| {
builder builder
}); });
fn build_fields(search_config: &Search, mut index: IndexBuilder) -> IndexBuilder { /// uses ammonia to clean the body, and truncates it to `truncate_content_length`
if search_config.include_title { pub fn clean_and_truncate_body(truncate_content_length: Option<usize>, body: &str) -> String {
index = index.add_field("title"); let mut clean = AMMONIA.clean(body).to_string();
} if let Some(new_len) = truncate_content_length {
clean.truncate(clean.char_indices().nth(new_len).map(|(i, _)| i).unwrap_or(clean.len()))
if search_config.include_description {
index = index.add_field("description");
}
if search_config.include_date {
index = index.add_field("date")
}
if search_config.include_path {
index = index.add_field_with_tokenizer("path", Box::new(path_tokenizer));
}
if search_config.include_content {
index = index.add_field("body")
}
index
}
fn path_tokenizer(text: &str) -> Vec<String> {
text.split(|c: char| c.is_whitespace() || c == '-' || c == '/')
.filter(|s| !s.is_empty())
.map(|s| s.trim().to_lowercase())
.collect()
}
fn fill_index(
search_config: &Search,
title: &Option<String>,
description: &Option<String>,
datetime: &Option<OffsetDateTime>,
path: &str,
content: &str,
) -> Vec<String> {
let mut row = vec![];
if search_config.include_title {
row.push(title.clone().unwrap_or_default());
}
if search_config.include_description {
row.push(description.clone().unwrap_or_default());
}
if search_config.include_date {
if let Some(date) = datetime {
if let Ok(d) = date.format(&Rfc3339) {
row.push(d);
}
}
}
if search_config.include_path {
row.push(path.to_string());
}
if search_config.include_content {
let body = AMMONIA.clean(content).to_string();
if let Some(truncate_len) = search_config.truncate_content_length {
// Not great for unicode
// TODO: fix it like the truncate in Tera
match body.char_indices().nth(truncate_len) {
None => row.push(body),
Some((idx, _)) => row.push((body[..idx]).to_string()),
};
} else {
row.push(body);
};
}
row
}
/// Returns the generated JSON index with all the documents of the site added using
/// the language given
/// Errors if the language given is not available in Elasticlunr
/// TODO: is making `in_search_index` apply to subsections of a `false` section useful?
pub fn build_index(lang: &str, library: &Library, config: &Config) -> Result<String> {
let language = match lang::from_code(lang) {
Some(l) => l,
None => {
bail!("Tried to build search index for language {} which is not supported", lang);
}
};
let language_options = &config.languages[lang];
let mut index = IndexBuilder::with_language(language);
index = build_fields(&language_options.search, index);
let mut index = index.build();
for (_, section) in &library.sections {
if section.lang == lang {
add_section_to_index(&mut index, section, library, &language_options.search);
}
}
Ok(index.to_json())
}
fn add_section_to_index(
index: &mut Index,
section: &Section,
library: &Library,
search_config: &Search,
) {
if !section.meta.in_search_index {
return;
}
// Don't index redirecting sections
if section.meta.redirect_to.is_none() {
index.add_doc(
&section.permalink,
&fill_index(
search_config,
&section.meta.title,
&section.meta.description,
&None,
&section.path,
&section.content,
),
);
}
for key in &section.pages {
let page = &library.pages[key];
if !page.meta.in_search_index {
continue;
}
index.add_doc(
&page.permalink,
&fill_index(
search_config,
&page.meta.title,
&page.meta.description,
&page.meta.datetime,
&page.path,
&page.content,
),
);
} }
clean
} }
#[cfg(test)] #[cfg(test)]
mod tests { #[test]
use super::*; fn clean_and_truncate_body_test() {
assert_eq!(clean_and_truncate_body(None, "hello world"), "hello world");
use config::Config; assert_eq!(
clean_and_truncate_body(None, "hello <script>alert('xss')</script> world"),
#[test] "hello world"
fn can_build_fields() { );
let mut config = Config::default(); assert_eq!(clean_and_truncate_body(Some(100), "hello"), "hello");
let index = build_fields(&config.search, IndexBuilder::new()).build(); assert_eq!(clean_and_truncate_body(Some(2), "hello"), "he");
assert_eq!(index.get_fields(), vec!["title", "body"]); assert_eq!(clean_and_truncate_body(Some(6), "hello \u{202E} world"), "hello ");
assert_eq!(clean_and_truncate_body(Some(7), "hello \u{202E} world"), "hello \u{202e}");
config.search.include_content = false;
config.search.include_description = true;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description"]);
config.search.include_content = true;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["title", "description", "body"]);
config.search.include_title = false;
let index = build_fields(&config.search, IndexBuilder::new()).build();
assert_eq!(index.get_fields(), vec!["description", "body"]);
}
#[test]
fn can_fill_index_default() {
let config = Config::default();
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content);
}
#[test]
fn can_fill_index_description() {
let mut config = Config::default();
config.search.include_description = true;
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 3);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], description.unwrap());
assert_eq!(res[2], content);
}
#[test]
fn can_fill_index_truncated_content() {
let mut config = Config::default();
config.search.truncate_content_length = Some(5);
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let res = fill_index(&config.search, &title, &description, &None, &path, &content);
assert_eq!(res.len(), 2);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], content[..5]);
}
#[test]
fn can_fill_index_date() {
let mut config = Config::default();
config.search.include_date = true;
let title = Some("A title".to_string());
let description = Some("A description".to_string());
let path = "/a/page/".to_string();
let content = "Some content".to_string();
let datetime = Some(OffsetDateTime::parse("2023-01-31T00:00:00Z", &Rfc3339).unwrap());
let res = fill_index(&config.search, &title, &description, &datetime, &path, &content);
assert_eq!(res.len(), 3);
assert_eq!(res[0], title.unwrap());
assert_eq!(res[1], "2023-01-31T00:00:00Z");
assert_eq!(res[2], content);
}
} }

View File

@ -799,19 +799,26 @@ impl Site {
} }
fn index_for_lang(&self, lang: &str) -> Result<()> { fn index_for_lang(&self, lang: &str) -> Result<()> {
let index_json = search::build_index(lang, &self.library.read().unwrap(), &self.config)?; let path = &self.output_path.join(self.config.search.index_format.filename(lang));
let (path, content) = match &self.config.search.index_format { let library = self.library.read().unwrap();
IndexFormat::ElasticlunrJson => { let content = match &self.config.search.index_format {
let path = self.output_path.join(format!("search_index.{}.json", lang)); IndexFormat::ElasticlunrJavascript | IndexFormat::ElasticlunrJson => {
(path, index_json) search::build_elasticlunr(lang, &library, &self.config)?
} }
IndexFormat::ElasticlunrJavascript => { IndexFormat::FuseJson | IndexFormat::FuseJavascript => {
let path = self.output_path.join(format!("search_index.{}.js", lang)); search::build_fuse(lang, &library, &self.config.search)?
let content = format!("window.searchIndex = {};", index_json);
(path, content)
} }
}; };
create_file(&path, &content) drop(library); // no need to hold on to this guard while writing
create_file(
path,
match self.config.search.index_format {
IndexFormat::ElasticlunrJson | IndexFormat::FuseJson => content,
IndexFormat::ElasticlunrJavascript | IndexFormat::FuseJavascript => {
format!("window.searchIndex = {}", content)
}
},
)
} }
pub fn build_search_index(&self) -> Result<()> { pub fn build_search_index(&self) -> Result<()> {
@ -827,8 +834,13 @@ impl Site {
} }
} }
// then elasticlunr.min.js match self.config.search.index_format {
create_file(&self.output_path.join("elasticlunr.min.js"), search::ELASTICLUNR_JS)?; IndexFormat::ElasticlunrJavascript | IndexFormat::ElasticlunrJson => {
// then elasticlunr.min.js
create_file(&self.output_path.join("elasticlunr.min.js"), search::ELASTICLUNR_JS)?;
}
_ => {}
}
Ok(()) Ok(())
} }

View File

@ -28,11 +28,12 @@ fn create_parent(path: &Path) -> Result<()> {
} }
/// Create a file with the content given /// Create a file with the content given
pub fn create_file(path: &Path, content: &str) -> Result<()> { /// `content`` can be `&str`, `String`, or `&String` (and probably others)
pub fn create_file(path: &Path, content: impl AsRef<str>) -> Result<()> {
create_parent(path)?; create_parent(path)?;
let mut file = let mut file =
File::create(path).with_context(|| format!("Failed to create file {}", path.display()))?; File::create(path).with_context(|| format!("Failed to create file {}", path.display()))?;
file.write_all(content.as_bytes())?; file.write_all(content.as_ref().as_bytes())?;
Ok(()) Ok(())
} }

View File

@ -4,7 +4,7 @@ weight = 100
+++ +++
Zola can build a search index from the sections and pages content to Zola can build a search index from the sections and pages content to
be used by a JavaScript library such as [elasticlunr](http://elasticlunr.com/). be used by a JavaScript library such as [elasticlunr](http://elasticlunr.com/) or [fuse](https://www.fusejs.io).
To enable it, you only need to set `build_search_index = true` in your `config.toml` and Zola will To enable it, you only need to set `build_search_index = true` in your `config.toml` and Zola will
generate an index for the `default_language` set for all pages not excluded from the search index. generate an index for the `default_language` set for all pages not excluded from the search index.
@ -12,21 +12,36 @@ generate an index for the `default_language` set for all pages not excluded from
It is very important to set the `default_language` in your `config.toml` if you are writing a site not in It is very important to set the `default_language` in your `config.toml` if you are writing a site not in
English; the index building pipelines are very different depending on the language. English; the index building pipelines are very different depending on the language.
After `zola build` or `zola serve`, you should see two files in your public directory:
- `search_index.${default_language}.js`: so `search_index.en.js` for a default setup
- `elasticlunr.min.js`
If you set `index_format = "elasticlunr_json"` in your `config.toml`, a `search_index.${default_language}.json` is generated
instead of the default `search_index.${default_language}.js`.
As each site will be different, Zola makes no assumptions about your search function and doesn't provide As each site will be different, Zola makes no assumptions about your search function and doesn't provide
the JavaScript/CSS code to do an actual search and display results. You can look at how this site the JavaScript/CSS code to do an actual search and display results. You can look at how this site
implements it to get an idea: [search.js](https://github.com/getzola/zola/tree/master/docs/static/search.js). implements it (using elasticlunr) to get an idea: [search.js](https://github.com/getzola/zola/tree/master/docs/static/search.js).
If you are using a language other than English, you will also need to include the corresponding JavaScript stemmer file.
See <https://github.com/weixsong/lunr-languages#in-a-web-browser> for details.
## Configuring the search index ## Configuring the search index
In some cases, the default indexing strategy is not suitable. You can customize which fields to include and whether In some cases, the default indexing strategy is not suitable. You can customize which fields to include and whether
to truncate the content in the [search configuration](@/documentation/getting-started/configuration.md). to truncate the content in the [search configuration](@/documentation/getting-started/configuration.md).
## Index Formats
### Elasticlunr
Compatible with [elasticlunr](http://elasticlunr.com/). Also produces `elasticlunr.min.js`.
```toml
# config.toml
[search]
index_format = "elasticlunr_javascript" # or "elasticlunr_json"
```
If you are using a language other than English, you will also need to include the corresponding JavaScript stemmer file.
See <https://github.com/weixsong/lunr-languages#in-a-web-browser> for details.
### Fuse
Compatible with [fuse.js](https://www.fusejs.io/) and [tinysearch](https://github.com/tinysearch/tinysearch).
```toml
# config.toml
[search]
index_format = "fuse_javascript" # or "fuse_json"
```

View File

@ -174,16 +174,18 @@ include_title = true
include_description = false include_description = false
# Whether to include the RFC3339 datetime of the page in the search index # Whether to include the RFC3339 datetime of the page in the search index
include_date = false include_date = false
# Whether to include the path of the page/section in the index # Whether to include the path of the page/section in the index (the permalink is always included)
include_path = false include_path = false
# Whether to include the rendered content of the page/section in the index # Whether to include the rendered content of the page/section in the index
include_content = true include_content = true
# At which character to truncate the content to. Useful if you have a lot of pages and the index would # At which code point to truncate the content to. Useful if you have a lot of pages and the index would
# become too big to load on the site. Defaults to not being set. # become too big to load on the site. Defaults to not being set.
# truncate_content_length = 100 # truncate_content_length = 100
# Wether to produce the search index as a javascript file or as a JSON file # Wether to produce the search index as a javascript file or as a JSON file
# Accepted value "elasticlunr_javascript" or "elasticlunr_json" # Accepted values:
# - "elasticlunr_javascript", "elasticlunr_json"
# - "fuse_javascript", "fuse_json"
index_format = "elasticlunr_javascript" index_format = "elasticlunr_javascript"
# Optional translation object for the default language # Optional translation object for the default language