Start fleshing out the indexer

This commit is contained in:
Olivier 'reivilibre' 2022-03-23 20:11:12 +00:00
parent 0060ec0764
commit 1773ba4f44
11 changed files with 731 additions and 14 deletions

640
Cargo.lock generated
View File

@ -83,6 +83,15 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd"
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anyhow"
version = "1.0.56"
@ -101,6 +110,12 @@ dependencies = [
"serde",
]
[[package]]
name = "arrayvec"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "askama"
version = "0.11.1"
@ -139,7 +154,7 @@ dependencies = [
"humansize",
"mime",
"mime_guess",
"nom",
"nom 7.1.1",
"num-traits",
"percent-encoding",
"proc-macro2",
@ -149,6 +164,17 @@ dependencies = [
"toml",
]
[[package]]
name = "async-channel"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2114d64672151c0c5eaa5e131ec84a74f06e1e559830dabba01ca30605d66319"
dependencies = [
"concurrent-queue",
"event-listener",
"futures-core",
]
[[package]]
name = "async-trait"
version = "0.1.52"
@ -309,6 +335,15 @@ dependencies = [
"serde",
]
[[package]]
name = "bitpacking"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7"
dependencies = [
"crunchy",
]
[[package]]
name = "block-buffer"
version = "0.7.3"
@ -384,6 +419,18 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "cache-padded"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c"
[[package]]
name = "castaway"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6"
[[package]]
name = "cc"
version = "1.0.73"
@ -393,13 +440,19 @@ dependencies = [
"jobserver",
]
[[package]]
name = "census"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5927edd8345aef08578bcbb4aea7314f340d80c7f4931f99fbeb40b99d8f5060"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
"nom 7.1.1",
]
[[package]]
@ -417,7 +470,7 @@ dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"time 0.1.44",
"winapi",
]
@ -489,6 +542,24 @@ dependencies = [
"crossterm",
]
[[package]]
name = "combine"
version = "4.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50b727aacc797f9fc28e355d21f34709ac4fc9adecfe470ad07b8f4464f53062"
dependencies = [
"memchr",
]
[[package]]
name = "concurrent-queue"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30ed07550be01594c6026cff2a1d7fe9c8f683caa798e12b68694ac9e88286a3"
dependencies = [
"cache-padded",
]
[[package]]
name = "const-random"
version = "0.1.13"
@ -566,6 +637,20 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845"
dependencies = [
"cfg-if",
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.3"
@ -679,6 +764,47 @@ dependencies = [
"syn",
]
[[package]]
name = "ctor"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "curl"
version = "0.4.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d855aeef205b43f65a5001e0997d81f8efca7badad4fad7d897aa7f0d0651f"
dependencies = [
"curl-sys",
"libc",
"openssl-probe",
"openssl-sys",
"schannel",
"socket2",
"winapi",
]
[[package]]
name = "curl-sys"
version = "0.4.53+curl-7.82.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8092905a5a9502c312f223b2775f57ec5c5b715f9a15ee9d2a8591d1364a0352"
dependencies = [
"cc",
"libc",
"libnghttp2-sys",
"libz-sys",
"openssl-sys",
"pkg-config",
"vcpkg",
"winapi",
]
[[package]]
name = "cylon"
version = "0.2.0"
@ -712,6 +838,12 @@ dependencies = [
"syn",
]
[[package]]
name = "diff"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499"
[[package]]
name = "digest"
version = "0.8.1"
@ -746,6 +878,12 @@ version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "dtoa"
version = "0.4.8"
@ -789,12 +927,45 @@ dependencies = [
"termcolor",
]
[[package]]
name = "event-listener"
version = "2.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71"
[[package]]
name = "fail"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011"
dependencies = [
"lazy_static",
"log",
"rand 0.8.5",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
[[package]]
name = "fastdivide"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04"
[[package]]
name = "fastfield_codecs"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0e8bfa31546b4ace05092c9db8d251d7bbc298a384875a08c945a473de4f1f"
dependencies = [
"tantivy-bitpacker",
"tantivy-common 0.1.0",
]
[[package]]
name = "fastrand"
version = "1.7.0"
@ -887,6 +1058,16 @@ dependencies = [
"num",
]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "futf"
version = "0.1.5"
@ -897,6 +1078,21 @@ dependencies = [
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.21"
@ -922,6 +1118,7 @@ dependencies = [
"futures-core",
"futures-task",
"futures-util",
"num_cpus",
]
[[package]]
@ -941,6 +1138,21 @@ version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b"
[[package]]
name = "futures-lite"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48"
dependencies = [
"fastrand",
"futures-core",
"futures-io",
"memchr",
"parking",
"pin-project-lite",
"waker-fn",
]
[[package]]
name = "futures-macro"
version = "0.3.21"
@ -970,6 +1182,7 @@ version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
@ -1102,7 +1315,7 @@ dependencies = [
"byteorder",
"crossbeam-channel",
"flate2",
"nom",
"nom 7.1.1",
"num-traits",
]
@ -1150,6 +1363,12 @@ dependencies = [
"syn",
]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]]
name = "http"
version = "0.2.6"
@ -1303,6 +1522,42 @@ dependencies = [
"serde",
]
[[package]]
name = "isahc"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "437f8808009c031df3c1d532c8fd7e3d73239dfe522ebf0b94b5e34d5d01044b"
dependencies = [
"async-channel",
"castaway",
"crossbeam-utils",
"curl",
"curl-sys",
"encoding_rs",
"event-listener",
"futures-lite",
"http",
"log",
"mime",
"once_cell",
"polling",
"slab",
"sluice",
"tracing",
"tracing-futures",
"url",
"waker-fn",
]
[[package]]
name = "iso8601-duration"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b51dd97fa24074214b9eb14da518957573f4dec3189112610ae1ccec9ac464"
dependencies = [
"nom 5.1.2",
]
[[package]]
name = "itertools"
version = "0.10.3"
@ -1366,6 +1621,25 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "levenshtein_automata"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
[[package]]
name = "lexical-core"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
dependencies = [
"arrayvec",
"bitflags",
"cfg-if",
"ryu",
"static_assertions",
]
[[package]]
name = "libc"
version = "0.2.120"
@ -1398,6 +1672,16 @@ dependencies = [
"thiserror",
]
[[package]]
name = "libnghttp2-sys"
version = "0.1.7+1.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "libsqlite3-sys"
version = "0.23.2"
@ -1409,6 +1693,18 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "libz-sys"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f35facd4a5673cb5a48822be2be1d4236c1c99cb4113cab7061ac720d5bf859"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "lifeguard"
version = "0.6.1"
@ -2196,9 +2492,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.14"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
dependencies = [
"cfg-if",
]
@ -2212,6 +2508,15 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "lz4_flex"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42c51df9d8d4842336c835df1d85ed447c4813baa237d033d95128bf5552ad8a"
dependencies = [
"twox-hash",
]
[[package]]
name = "mac"
version = "0.1.1"
@ -2271,12 +2576,50 @@ dependencies = [
"libc",
]
[[package]]
name = "measure_time"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f07966480d8562b3622f51df0b4e3fe6ea7ddb3b48b19b0f44ef863c455bdf9"
dependencies = [
"log",
]
[[package]]
name = "meilisearch-sdk"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eae404a5052ee03460ad87998e00cc78e5c68ec3eb23f673f1c13007d150697"
dependencies = [
"async-trait",
"futures",
"isahc",
"iso8601-duration",
"js-sys",
"log",
"serde",
"serde_json",
"time 0.3.9",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memmap2"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.6.5"
@ -2440,6 +2783,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "murmurhash32"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a"
dependencies = [
"byteorder",
]
[[package]]
name = "native-tls"
version = "0.2.8"
@ -2470,6 +2822,17 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "nom"
version = "5.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
dependencies = [
"lexical-core",
"memchr",
"version_check",
]
[[package]]
name = "nom"
version = "7.1.1"
@ -2576,6 +2939,15 @@ dependencies = [
"libc",
]
[[package]]
name = "num_threads"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0"
dependencies = [
"libc",
]
[[package]]
name = "once_cell"
version = "1.10.0"
@ -2660,6 +3032,30 @@ dependencies = [
"syn",
]
[[package]]
name = "output_vt100"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66"
dependencies = [
"winapi",
]
[[package]]
name = "ownedbytes"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bfa208b217a39411d78b85427792e4c1bc40508acbcefd2836e765f44a5c99e"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "parking"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
[[package]]
name = "parking_lot"
version = "0.11.2"
@ -2870,6 +3266,19 @@ version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
[[package]]
name = "polling"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "685404d509889fade3e86fe3a5803bca2ec09b0c0778d5ada6ec8bf7a8de5259"
dependencies = [
"cfg-if",
"libc",
"log",
"wepoll-ffi",
"winapi",
]
[[package]]
name = "ppv-lite86"
version = "0.2.16"
@ -2882,6 +3291,18 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "pretty_assertions"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c038cb5319b9c704bf9c227c261d275bfec0ad438118a2787ce47944fb228b"
dependencies = [
"ansi_term",
"ctor",
"diff",
"output_vt100",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -3022,6 +3443,18 @@ dependencies = [
[[package]]
name = "quickpeep_indexer"
version = "0.1.0"
dependencies = [
"anyhow",
"env_logger",
"log",
"meilisearch-sdk",
"quickpeep_structs",
"serde",
"serde_bare",
"tantivy",
"tokio",
"toml",
]
[[package]]
name = "quickpeep_moz_readability"
@ -3258,9 +3691,15 @@ checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.6.25",
]
[[package]]
name = "regex-syntax"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e931c58b93d86f080c734bfd2bce7dd0079ae2331235818133c8be7f422e20e"
[[package]]
name = "regex-syntax"
version = "0.6.25"
@ -3371,6 +3810,16 @@ dependencies = [
"serde",
]
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
@ -3645,6 +4094,17 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5"
[[package]]
name = "sluice"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7400c0eff44aa2fcb5e31a5f24ba9716ed90138769e4977a2ba6014ae63eb5"
dependencies = [
"async-channel",
"futures-core",
"futures-io",
]
[[package]]
name = "smallvec"
version = "1.8.0"
@ -3692,7 +4152,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4b7922be017ee70900be125523f38bdd644f4f06a1b16e8fa5a8ee8c34bffd4"
dependencies = [
"itertools",
"nom",
"nom 7.1.1",
"unicode_categories",
]
@ -3870,6 +4330,102 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"
[[package]]
name = "tantivy"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "264c2549892aa83975386a924ef8d0b8e909674c837d37ea58b4bd8739495c6e"
dependencies = [
"async-trait",
"base64",
"bitpacking",
"byteorder",
"census",
"chrono",
"crc32fast",
"crossbeam",
"downcast-rs",
"fail",
"fastdivide",
"fastfield_codecs",
"fnv",
"fs2",
"futures",
"htmlescape",
"itertools",
"levenshtein_automata",
"log",
"lru",
"lz4_flex",
"measure_time",
"memmap2",
"murmurhash32",
"num_cpus",
"once_cell",
"ownedbytes",
"pretty_assertions",
"rayon",
"regex",
"rust-stemmers",
"serde",
"serde_json",
"smallvec",
"stable_deref_trait",
"tantivy-bitpacker",
"tantivy-common 0.2.0",
"tantivy-fst",
"tantivy-query-grammar",
"tempfile",
"thiserror",
"uuid",
"winapi",
]
[[package]]
name = "tantivy-bitpacker"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66d10a5ed75437a4f6bbbba67601cd5adab8d71f5188b677055381f0f36064f2"
[[package]]
name = "tantivy-common"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "760e44073e328f4ea3f38660da9ba2598a19ad5ad4149cfb89ad89b4d5ee88d9"
dependencies = [
"byteorder",
]
[[package]]
name = "tantivy-common"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2078cd12c7e46eb2cd66ec813eac8472e0f9dfe816f26159effceffd2dbe4793"
dependencies = [
"byteorder",
"ownedbytes",
]
[[package]]
name = "tantivy-fst"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb20cdc0d83e9184560bdde9cd60142dbb4af2e0f770e88fce45770495224205"
dependencies = [
"byteorder",
"regex-syntax 0.4.2",
"utf8-ranges",
]
[[package]]
name = "tantivy-query-grammar"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "466e0218472a9b276a73e38b2571ac02f9a1b270b4481c9cd8cc23a63d1307e9"
dependencies = [
"combine",
]
[[package]]
name = "tempfile"
version = "3.3.0"
@ -3947,6 +4503,18 @@ dependencies = [
"winapi",
]
[[package]]
name = "time"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
dependencies = [
"itoa 1.0.1",
"libc",
"num_threads",
"serde",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
@ -4134,9 +4702,21 @@ dependencies = [
"cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.23"
@ -4146,6 +4726,16 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "tracing-futures"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
dependencies = [
"pin-project",
"tracing",
]
[[package]]
name = "try-lock"
version = "0.2.3"
@ -4162,6 +4752,16 @@ dependencies = [
"unchecked-index",
]
[[package]]
name = "twox-hash"
version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee73e6e4924fe940354b8d4d98cad5231175d615cd855b758adc658c0aac6a0"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "typenum"
version = "1.15.0"
@ -4246,6 +4846,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
[[package]]
name = "uuid"
version = "0.8.2"
@ -4253,6 +4859,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom 0.2.5",
"serde",
]
[[package]]
@ -4267,6 +4874,12 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "waker-fn"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
[[package]]
name = "want"
version = "0.3.0"
@ -4390,6 +5003,15 @@ dependencies = [
"webpki",
]
[[package]]
name = "wepoll-ffi"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d743fdedc5c64377b5fc2bc036b01c7fd642205a0d96356034ae3404d49eb7fb"
dependencies = [
"cc",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -4500,7 +5122,7 @@ dependencies = [
"crc32fast",
"flate2",
"thiserror",
"time",
"time 0.1.44",
]
[[package]]

10
qp_indexer.sample.toml Normal file
View File

@ -0,0 +1,10 @@
seed_dir = "../quickpeep_seeds"
# Tantivy Backend
# [backend.tantivy]
# index_dir = "./index"
# Meilisearch Backend
# [backend.meili]
# url = "http://..."
# token = ""

View File

@ -11,8 +11,8 @@ use tokio::io::{AsyncBufReadExt, BufReader};
pub const SORTING_OUTCOMES: [&str; 5] = [
// Included
"y", // Not included (generic)
"n", // Duplicates an existing entry
"y", // Not included (generic)
"n", // Duplicates an existing entry
"dupe", // Spam entry
"spam", // Invalid format for some reason (e.g. pattern or URL invalid)
"inv",

View File

@ -6,3 +6,15 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tantivy = "0.17.0"
meilisearch-sdk = "0.15.0"
anyhow = "1.0.56"
tokio = { version = "1.17.0", features = ["full"] }
log = "0.4.16"
env_logger = "0.9.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_bare = "0.5.0"
toml = "0.5.8"
quickpeep_structs = { path = "../quickpeep_structs" }

View File

@ -0,0 +1,6 @@
pub mod meili;
pub mod tantivy;
/// Trait representing a search index backend;
/// either Tantivy (embedded) or Meilisearch (via HTTP API).
pub trait Backend {}

View File

@ -0,0 +1 @@
fn experiment_meili() {}

View File

@ -0,0 +1,9 @@
use std::path::Path;
use tantivy::schema::Schema;
fn experiment_tantivy() {
let schema = Schema::builder()
// TODO fields
.build();
tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema);
}

View File

@ -0,0 +1,6 @@
pub struct IndexerConfig {}
#[tokio::main]
pub async fn main() -> anyhow::Result<()> {
Ok(())
}

View File

@ -0,0 +1,51 @@
use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
#[derive(Serialize, Deserialize, Debug, Clone)]
/// Config for an indexer. All paths are relative to the config file if needed, but will be resolved
/// when loading.
pub struct IndexerConfig {
/// Path to seeds
pub seed_dir: PathBuf,
/// Configuration about which backend to use.
pub backend: BackendConfig,
}
impl IndexerConfig {
/// Loads a config at the specified path.
/// Will resolve all the paths in the IndexerConfig for you.
pub fn load(path: &Path) -> anyhow::Result<IndexerConfig> {
let config_dir = path.parent().context("Can't get parent of config file.")?;
let bytes = std::fs::read(path)?;
let mut indexer_config: IndexerConfig = toml::from_slice(&bytes)?;
indexer_config.seed_dir = config_dir.join(indexer_config.seed_dir);
match &mut indexer_config.backend {
BackendConfig::Tantivy(tantivy) => {
tantivy.index_dir = config_dir.join(&tantivy.index_dir);
}
BackendConfig::Meili(_) => {}
}
Ok(indexer_config)
}
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub enum BackendConfig {
Tantivy(TantivyBackendConfig),
Meili(MeiliBackendConfig),
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct TantivyBackendConfig {
index_dir: PathBuf,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct MeiliBackendConfig {
url: String,
token: String,
}

View File

@ -0,0 +1,3 @@
pub mod config;
pub mod backend;

View File

@ -1,3 +0,0 @@
fn main() {
println!("Hello, world!");
}