diff --git a/Cargo.lock b/Cargo.lock index 826c119..a0a61a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,15 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" version = "1.0.56" @@ -101,6 +110,12 @@ dependencies = [ "serde", ] +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "askama" version = "0.11.1" @@ -139,7 +154,7 @@ dependencies = [ "humansize", "mime", "mime_guess", - "nom", + "nom 7.1.1", "num-traits", "percent-encoding", "proc-macro2", @@ -149,6 +164,17 @@ dependencies = [ "toml", ] +[[package]] +name = "async-channel" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2114d64672151c0c5eaa5e131ec84a74f06e1e559830dabba01ca30605d66319" +dependencies = [ + "concurrent-queue", + "event-listener", + "futures-core", +] + [[package]] name = "async-trait" version = "0.1.52" @@ -309,6 +335,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + [[package]] name = "block-buffer" version = "0.7.3" @@ -384,6 +419,18 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cache-padded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" + +[[package]] +name = "castaway" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6" + [[package]] name = "cc" version = "1.0.73" @@ -393,13 +440,19 @@ dependencies = [ "jobserver", ] +[[package]] +name = "census" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5927edd8345aef08578bcbb4aea7314f340d80c7f4931f99fbeb40b99d8f5060" + [[package]] name = "cexpr" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "nom", + "nom 7.1.1", ] [[package]] @@ -417,7 +470,7 @@ dependencies = [ "libc", "num-integer", "num-traits", - "time", + "time 0.1.44", "winapi", ] @@ -489,6 +542,24 @@ dependencies = [ "crossterm", ] +[[package]] +name = "combine" +version = "4.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b727aacc797f9fc28e355d21f34709ac4fc9adecfe470ad07b8f4464f53062" +dependencies = [ + "memchr", +] + +[[package]] +name = "concurrent-queue" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30ed07550be01594c6026cff2a1d7fe9c8f683caa798e12b68694ac9e88286a3" +dependencies = [ + "cache-padded", +] + [[package]] name = "const-random" version = "0.1.13" @@ -566,6 +637,20 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + [[package]] name = "crossbeam-channel" version = "0.5.3" @@ -679,6 +764,47 @@ dependencies = [ "syn", ] +[[package]] +name = "ctor" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "curl" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d855aeef205b43f65a5001e0997d81f8efca7badad4fad7d897aa7f0d0651f" +dependencies = [ + "curl-sys", + "libc", + "openssl-probe", + "openssl-sys", + "schannel", + "socket2", + "winapi", +] + +[[package]] +name = "curl-sys" +version = "0.4.53+curl-7.82.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8092905a5a9502c312f223b2775f57ec5c5b715f9a15ee9d2a8591d1364a0352" +dependencies = [ + "cc", + "libc", + "libnghttp2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", + "winapi", +] + [[package]] name = "cylon" version = "0.2.0" @@ -712,6 +838,12 @@ dependencies = [ "syn", ] +[[package]] +name = "diff" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" + [[package]] name = "digest" version = "0.8.1" @@ -746,6 +878,12 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "dtoa" version = "0.4.8" @@ -789,12 +927,45 @@ dependencies = [ "termcolor", ] +[[package]] +name = "event-listener" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77f3309417938f28bf8228fcff79a4a37103981e3e186d2ccd19c74b38f4eb71" + +[[package]] +name = "fail" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3245a0ca564e7f3c797d20d833a6870f57a728ac967d5225b3ffdef4465011" +dependencies = [ + "lazy_static", + "log", + "rand 0.8.5", +] + [[package]] name = "fake-simd" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fastdivide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" + +[[package]] +name = "fastfield_codecs" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e8bfa31546b4ace05092c9db8d251d7bbc298a384875a08c945a473de4f1f" +dependencies = [ + "tantivy-bitpacker", + "tantivy-common 0.1.0", +] + [[package]] name = "fastrand" version = "1.7.0" @@ -887,6 +1058,16 @@ dependencies = [ "num", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futf" version = "0.1.5" @@ -897,6 +1078,21 @@ dependencies = [ "new_debug_unreachable", ] +[[package]] +name = "futures" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.21" @@ -922,6 +1118,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -941,6 +1138,21 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +[[package]] +name = "futures-lite" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-macro" version = "0.3.21" @@ -970,6 +1182,7 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ + "futures-channel", "futures-core", "futures-io", "futures-macro", @@ -1102,7 +1315,7 @@ dependencies = [ "byteorder", "crossbeam-channel", "flate2", - "nom", + "nom 7.1.1", "num-traits", ] @@ -1150,6 +1363,12 @@ dependencies = [ "syn", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "0.2.6" @@ -1303,6 +1522,42 @@ dependencies = [ "serde", ] +[[package]] +name = "isahc" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "437f8808009c031df3c1d532c8fd7e3d73239dfe522ebf0b94b5e34d5d01044b" +dependencies = [ + "async-channel", + "castaway", + "crossbeam-utils", + "curl", + "curl-sys", + "encoding_rs", + "event-listener", + "futures-lite", + "http", + "log", + "mime", + "once_cell", + "polling", + "slab", + "sluice", + "tracing", + "tracing-futures", + "url", + "waker-fn", +] + +[[package]] +name = "iso8601-duration" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b51dd97fa24074214b9eb14da518957573f4dec3189112610ae1ccec9ac464" +dependencies = [ + "nom 5.1.2", +] + [[package]] name = "itertools" version = "0.10.3" @@ -1366,6 +1621,25 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.120" @@ -1398,6 +1672,16 @@ dependencies = [ "thiserror", ] +[[package]] +name = "libnghttp2-sys" +version = "0.1.7+1.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "libsqlite3-sys" version = "0.23.2" @@ -1409,6 +1693,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libz-sys" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f35facd4a5673cb5a48822be2be1d4236c1c99cb4113cab7061ac720d5bf859" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "lifeguard" version = "0.6.1" @@ -2196,9 +2492,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.14" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" dependencies = [ "cfg-if", ] @@ -2212,6 +2508,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lz4_flex" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42c51df9d8d4842336c835df1d85ed447c4813baa237d033d95128bf5552ad8a" +dependencies = [ + "twox-hash", +] + [[package]] name = "mac" version = "0.1.1" @@ -2271,12 +2576,50 @@ dependencies = [ "libc", ] +[[package]] +name = "measure_time" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f07966480d8562b3622f51df0b4e3fe6ea7ddb3b48b19b0f44ef863c455bdf9" +dependencies = [ + "log", +] + +[[package]] +name = "meilisearch-sdk" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eae404a5052ee03460ad87998e00cc78e5c68ec3eb23f673f1c13007d150697" +dependencies = [ + "async-trait", + "futures", + "isahc", + "iso8601-duration", + "js-sys", + "log", + "serde", + "serde_json", + "time 0.3.9", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "memchr" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -2440,6 +2783,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "murmurhash32" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" +dependencies = [ + "byteorder", +] + [[package]] name = "native-tls" version = "0.2.8" @@ -2470,6 +2822,17 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "lexical-core", + "memchr", + "version_check", +] + [[package]] name = "nom" version = "7.1.1" @@ -2576,6 +2939,15 @@ dependencies = [ "libc", ] +[[package]] +name = "num_threads" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.10.0" @@ -2660,6 +3032,30 @@ dependencies = [ "syn", ] +[[package]] +name = "output_vt100" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +dependencies = [ + "winapi", +] + +[[package]] +name = "ownedbytes" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfa208b217a39411d78b85427792e4c1bc40508acbcefd2836e765f44a5c99e" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" + [[package]] name = "parking_lot" version = "0.11.2" @@ -2870,6 +3266,19 @@ version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" +[[package]] +name = "polling" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "685404d509889fade3e86fe3a5803bca2ec09b0c0778d5ada6ec8bf7a8de5259" +dependencies = [ + "cfg-if", + "libc", + "log", + "wepoll-ffi", + "winapi", +] + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -2882,6 +3291,18 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "pretty_assertions" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c038cb5319b9c704bf9c227c261d275bfec0ad438118a2787ce47944fb228b" +dependencies = [ + "ansi_term", + "ctor", + "diff", + "output_vt100", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -3022,6 +3443,18 @@ dependencies = [ [[package]] name = "quickpeep_indexer" version = "0.1.0" +dependencies = [ + "anyhow", + "env_logger", + "log", + "meilisearch-sdk", + "quickpeep_structs", + "serde", + "serde_bare", + "tantivy", + "tokio", + "toml", +] [[package]] name = "quickpeep_moz_readability" @@ -3258,9 +3691,15 @@ checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.6.25", ] +[[package]] +name = "regex-syntax" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e931c58b93d86f080c734bfd2bce7dd0079ae2331235818133c8be7f422e20e" + [[package]] name = "regex-syntax" version = "0.6.25" @@ -3371,6 +3810,16 @@ dependencies = [ "serde", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-hash" version = "1.1.0" @@ -3645,6 +4094,17 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" +[[package]] +name = "sluice" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7400c0eff44aa2fcb5e31a5f24ba9716ed90138769e4977a2ba6014ae63eb5" +dependencies = [ + "async-channel", + "futures-core", + "futures-io", +] + [[package]] name = "smallvec" version = "1.8.0" @@ -3692,7 +4152,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4b7922be017ee70900be125523f38bdd644f4f06a1b16e8fa5a8ee8c34bffd4" dependencies = [ "itertools", - "nom", + "nom 7.1.1", "unicode_categories", ] @@ -3870,6 +4330,102 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" +[[package]] +name = "tantivy" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "264c2549892aa83975386a924ef8d0b8e909674c837d37ea58b4bd8739495c6e" +dependencies = [ + "async-trait", + "base64", + "bitpacking", + "byteorder", + "census", + "chrono", + "crc32fast", + "crossbeam", + "downcast-rs", + "fail", + "fastdivide", + "fastfield_codecs", + "fnv", + "fs2", + "futures", + "htmlescape", + "itertools", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "murmurhash32", + "num_cpus", + "once_cell", + "ownedbytes", + "pretty_assertions", + "rayon", + "regex", + "rust-stemmers", + "serde", + "serde_json", + "smallvec", + "stable_deref_trait", + "tantivy-bitpacker", + "tantivy-common 0.2.0", + "tantivy-fst", + "tantivy-query-grammar", + "tempfile", + "thiserror", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66d10a5ed75437a4f6bbbba67601cd5adab8d71f5188b677055381f0f36064f2" + +[[package]] +name = "tantivy-common" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760e44073e328f4ea3f38660da9ba2598a19ad5ad4149cfb89ad89b4d5ee88d9" +dependencies = [ + "byteorder", +] + +[[package]] +name = "tantivy-common" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2078cd12c7e46eb2cd66ec813eac8472e0f9dfe816f26159effceffd2dbe4793" +dependencies = [ + "byteorder", + "ownedbytes", +] + +[[package]] +name = "tantivy-fst" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb20cdc0d83e9184560bdde9cd60142dbb4af2e0f770e88fce45770495224205" +dependencies = [ + "byteorder", + "regex-syntax 0.4.2", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "466e0218472a9b276a73e38b2571ac02f9a1b270b4481c9cd8cc23a63d1307e9" +dependencies = [ + "combine", +] + [[package]] name = "tempfile" version = "3.3.0" @@ -3947,6 +4503,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "time" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" +dependencies = [ + "itoa 1.0.1", + "libc", + "num_threads", + "serde", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -4134,9 +4702,21 @@ dependencies = [ "cfg-if", "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.23" @@ -4146,6 +4726,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "pin-project", + "tracing", +] + [[package]] name = "try-lock" version = "0.2.3" @@ -4162,6 +4752,16 @@ dependencies = [ "unchecked-index", ] +[[package]] +name = "twox-hash" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee73e6e4924fe940354b8d4d98cad5231175d615cd855b758adc658c0aac6a0" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.15.0" @@ -4246,6 +4846,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" + [[package]] name = "uuid" version = "0.8.2" @@ -4253,6 +4859,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ "getrandom 0.2.5", + "serde", ] [[package]] @@ -4267,6 +4874,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "waker-fn" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" + [[package]] name = "want" version = "0.3.0" @@ -4390,6 +5003,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "wepoll-ffi" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d743fdedc5c64377b5fc2bc036b01c7fd642205a0d96356034ae3404d49eb7fb" +dependencies = [ + "cc", +] + [[package]] name = "winapi" version = "0.3.9" @@ -4500,7 +5122,7 @@ dependencies = [ "crc32fast", "flate2", "thiserror", - "time", + "time 0.1.44", ] [[package]] diff --git a/qp_indexer.sample.toml b/qp_indexer.sample.toml new file mode 100644 index 0000000..015b230 --- /dev/null +++ b/qp_indexer.sample.toml @@ -0,0 +1,10 @@ +seed_dir = "../quickpeep_seeds" + +# Tantivy Backend +# [backend.tantivy] +# index_dir = "./index" + +# Meilisearch Backend +# [backend.meili] +# url = "http://..." +# token = "" diff --git a/quickpeep/src/bin/qp-seedcoll-sort.rs b/quickpeep/src/bin/qp-seedcoll-sort.rs index eda90ed..5780c45 100644 --- a/quickpeep/src/bin/qp-seedcoll-sort.rs +++ b/quickpeep/src/bin/qp-seedcoll-sort.rs @@ -11,8 +11,8 @@ use tokio::io::{AsyncBufReadExt, BufReader}; pub const SORTING_OUTCOMES: [&str; 5] = [ // Included - "y", // Not included (generic) - "n", // Duplicates an existing entry + "y", // Not included (generic) + "n", // Duplicates an existing entry "dupe", // Spam entry "spam", // Invalid format for some reason (e.g. pattern or URL invalid) "inv", diff --git a/quickpeep_indexer/Cargo.toml b/quickpeep_indexer/Cargo.toml index 5f573bd..6e535e2 100644 --- a/quickpeep_indexer/Cargo.toml +++ b/quickpeep_indexer/Cargo.toml @@ -6,3 +6,15 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +tantivy = "0.17.0" +meilisearch-sdk = "0.15.0" +anyhow = "1.0.56" +tokio = { version = "1.17.0", features = ["full"] } +log = "0.4.16" +env_logger = "0.9.0" +serde = { version = "1.0.136", features = ["derive"] } +serde_bare = "0.5.0" +toml = "0.5.8" + + +quickpeep_structs = { path = "../quickpeep_structs" } diff --git a/quickpeep_indexer/src/backend.rs b/quickpeep_indexer/src/backend.rs new file mode 100644 index 0000000..c10a969 --- /dev/null +++ b/quickpeep_indexer/src/backend.rs @@ -0,0 +1,6 @@ +pub mod meili; +pub mod tantivy; + +/// Trait representing a search index backend; +/// either Tantivy (embedded) or Meilisearch (via HTTP API). +pub trait Backend {} diff --git a/quickpeep_indexer/src/backend/meili.rs b/quickpeep_indexer/src/backend/meili.rs new file mode 100644 index 0000000..411a2f5 --- /dev/null +++ b/quickpeep_indexer/src/backend/meili.rs @@ -0,0 +1 @@ +fn experiment_meili() {} diff --git a/quickpeep_indexer/src/backend/tantivy.rs b/quickpeep_indexer/src/backend/tantivy.rs new file mode 100644 index 0000000..3bb7527 --- /dev/null +++ b/quickpeep_indexer/src/backend/tantivy.rs @@ -0,0 +1,9 @@ +use std::path::Path; +use tantivy::schema::Schema; + +fn experiment_tantivy() { + let schema = Schema::builder() + // TODO fields + .build(); + tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema); +} diff --git a/quickpeep_indexer/src/bin/qp-indexer.rs b/quickpeep_indexer/src/bin/qp-indexer.rs new file mode 100644 index 0000000..7b8f22d --- /dev/null +++ b/quickpeep_indexer/src/bin/qp-indexer.rs @@ -0,0 +1,6 @@ +pub struct IndexerConfig {} + +#[tokio::main] +pub async fn main() -> anyhow::Result<()> { + Ok(()) +} diff --git a/quickpeep_indexer/src/config.rs b/quickpeep_indexer/src/config.rs new file mode 100644 index 0000000..985ee9e --- /dev/null +++ b/quickpeep_indexer/src/config.rs @@ -0,0 +1,51 @@ +use anyhow::Context; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; + +#[derive(Serialize, Deserialize, Debug, Clone)] +/// Config for an indexer. All paths are relative to the config file if needed, but will be resolved +/// when loading. +pub struct IndexerConfig { + /// Path to seeds + pub seed_dir: PathBuf, + + /// Configuration about which backend to use. + pub backend: BackendConfig, +} + +impl IndexerConfig { + /// Loads a config at the specified path. + /// Will resolve all the paths in the IndexerConfig for you. + pub fn load(path: &Path) -> anyhow::Result { + let config_dir = path.parent().context("Can't get parent of config file.")?; + let bytes = std::fs::read(path)?; + let mut indexer_config: IndexerConfig = toml::from_slice(&bytes)?; + + indexer_config.seed_dir = config_dir.join(indexer_config.seed_dir); + match &mut indexer_config.backend { + BackendConfig::Tantivy(tantivy) => { + tantivy.index_dir = config_dir.join(&tantivy.index_dir); + } + BackendConfig::Meili(_) => {} + } + + Ok(indexer_config) + } +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum BackendConfig { + Tantivy(TantivyBackendConfig), + Meili(MeiliBackendConfig), +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct TantivyBackendConfig { + index_dir: PathBuf, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct MeiliBackendConfig { + url: String, + token: String, +} diff --git a/quickpeep_indexer/src/lib.rs b/quickpeep_indexer/src/lib.rs new file mode 100644 index 0000000..27b6d15 --- /dev/null +++ b/quickpeep_indexer/src/lib.rs @@ -0,0 +1,3 @@ +pub mod config; + +pub mod backend; diff --git a/quickpeep_indexer/src/main.rs b/quickpeep_indexer/src/main.rs deleted file mode 100644 index e7a11a9..0000000 --- a/quickpeep_indexer/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello, world!"); -}