diff --git a/Cargo.lock b/Cargo.lock index 079d0c3..d21361f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -597,7 +597,7 @@ checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" dependencies = [ "cfg-if", "libc", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi 0.10.0+wasi-snapshot-preview1", ] [[package]] @@ -2139,6 +2139,39 @@ dependencies = [ [[package]] name = "quickpeep" version = "0.1.0" + +[[package]] +name = "quickpeep_densedoc" +version = "0.1.0" +dependencies = [ + "anyhow", + "html5ever", + "kuchiki", + "lazy_static", + "regex", + "serde", +] + +[[package]] +name = "quickpeep_indexer" +version = "0.1.0" + +[[package]] +name = "quickpeep_moz_readability" +version = "0.1.0" +dependencies = [ + "anyhow", + "html5ever", + "kuchiki", + "lazy_static", + "log", + "regex", + "url", +] + +[[package]] +name = "quickpeep_raker" +version = "0.1.0" dependencies = [ "adblock", "anyhow", @@ -2169,31 +2202,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "quickpeep_densedoc" -version = "0.1.0" -dependencies = [ - "anyhow", - "html5ever", - "kuchiki", - "lazy_static", - "regex", - "serde", -] - -[[package]] -name = "quickpeep_moz_readability" -version = "0.1.0" -dependencies = [ - "anyhow", - "html5ever", - "kuchiki", - "lazy_static", - "log", - "regex", - "url", -] - [[package]] name = "quickpeep_structs" version = "0.1.0" @@ -2330,9 +2338,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f242f1488a539a79bac6dbe7c8609ae43b7914b7736210f239a37cccb32525" +checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb" dependencies = [ "base64", "bytes", @@ -2791,11 +2799,12 @@ dependencies = [ [[package]] name = "time" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", + "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] @@ -3018,9 +3027,9 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasi" @@ -3199,9 +3208,9 @@ checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316" [[package]] name = "winreg" -version = "0.7.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" dependencies = [ "winapi", ] diff --git a/Cargo.toml b/Cargo.toml index 0940d25..522d085 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [workspace] members = [ "quickpeep", + "quickpeep_indexer", + "quickpeep_raker", "quickpeep_densedoc", "quickpeep_moz_readability", "quickpeep_structs", diff --git a/quickpeep/Cargo.toml b/quickpeep/Cargo.toml index 3f59bb6..3a37869 100644 --- a/quickpeep/Cargo.toml +++ b/quickpeep/Cargo.toml @@ -6,51 +6,3 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -### Subcrates -quickpeep_moz_readability = { path = "../quickpeep_moz_readability" } -quickpeep_densedoc = { path = "../quickpeep_densedoc" } -quickpeep_structs = { path = "../quickpeep_structs" } -quickpeep_utils = { path = "../quickpeep_utils" } - -### CLI Helpers -clap = { version = "3.1.6", features = ["derive"] } -colour = "0.6.0" - -### Document Parsing -kuchiki = "0.8.1" -html5ever = "0.25.1" -serde = { version = "1.0.136", features = ["derive"] } -serde_bare = "0.5.0" - -### Dates -chrono = "0.4.19" - -### Utils -lazy_static = "1.4.0" -bytes = "1.1.0" -itertools = "0.10.3" -ipnetwork = "0.18.0" -futures-util = "0.3.21" -tokio = { version = "1.17.0", features = ["full"] } -anyhow = "1.0.55" -log = "0.4.14" -env_logger = "0.9.0" - -### Raking helpers -# HTTP Requests -reqwest = { version = "0.11.9", features = ["stream"] } -# Gemini Requests -# N.B. TODO gemfeeds are Atom feeds for Gemini. Should support those. -gemini-fetch = "0.2.1" -# Robots.txt -cylon = { version = "0.2.0", features = ["crawl-delay"] } -# RSS/Atom/JSON feeds -feed-rs = "1.0.0" -# Sitemaps -sitemap = "0.4.1" - -### Filtering helpers -# AdBlock -adblock = "0.5.0" -# Language detection -lingua = "1.3.3" \ No newline at end of file diff --git a/quickpeep/src/main.rs b/quickpeep/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/quickpeep/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/quickpeep_indexer/Cargo.toml b/quickpeep_indexer/Cargo.toml new file mode 100644 index 0000000..5f573bd --- /dev/null +++ b/quickpeep_indexer/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "quickpeep_indexer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/quickpeep_indexer/src/main.rs b/quickpeep_indexer/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/quickpeep_indexer/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/quickpeep_raker/Cargo.toml b/quickpeep_raker/Cargo.toml new file mode 100644 index 0000000..e11c875 --- /dev/null +++ b/quickpeep_raker/Cargo.toml @@ -0,0 +1,56 @@ +[package] +name = "quickpeep_raker" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +### Subcrates +quickpeep_moz_readability = { path = "../quickpeep_moz_readability" } +quickpeep_densedoc = { path = "../quickpeep_densedoc" } +quickpeep_structs = { path = "../quickpeep_structs" } +quickpeep_utils = { path = "../quickpeep_utils" } + +### CLI Helpers +clap = { version = "3.1.6", features = ["derive"] } +colour = "0.6.0" + +### Document Parsing +kuchiki = "0.8.1" +html5ever = "0.25.1" +serde = { version = "1.0.136", features = ["derive"] } +serde_bare = "0.5.0" + +### Dates +chrono = "0.4.19" + +### Utils +lazy_static = "1.4.0" +bytes = "1.1.0" +itertools = "0.10.3" +ipnetwork = "0.18.0" +futures-util = "0.3.21" +tokio = { version = "1.17.0", features = ["full"] } +anyhow = "1.0.55" +log = "0.4.14" +env_logger = "0.9.0" + +### Raking helpers +# HTTP Requests +reqwest = { version = "0.11.9", features = ["stream"] } +# Gemini Requests +# N.B. TODO gemfeeds are Atom feeds for Gemini. Should support those. +gemini-fetch = "0.2.1" +# Robots.txt +cylon = { version = "0.2.0", features = ["crawl-delay"] } +# RSS/Atom/JSON feeds +feed-rs = "1.0.0" +# Sitemaps +sitemap = "0.4.1" + +### Filtering helpers +# AdBlock +adblock = "0.5.0" +# Language detection +lingua = "1.3.3" \ No newline at end of file diff --git a/quickpeep/src/bin/qp-rake1.rs b/quickpeep_raker/src/bin/qp-rake1.rs similarity index 93% rename from quickpeep/src/bin/qp-rake1.rs rename to quickpeep_raker/src/bin/qp-rake1.rs index 6930b03..8e26d9c 100644 --- a/quickpeep/src/bin/qp-rake1.rs +++ b/quickpeep_raker/src/bin/qp-rake1.rs @@ -4,10 +4,10 @@ use clap::Parser; use colour::{blue_ln, green_ln, red_ln, yellow_ln}; use env_logger::Env; use log::warn; -use quickpeep::raking::analysis::{load_adblock_engine, IpSet}; -use quickpeep::raking::references::references_from_urlrakes; -use quickpeep::raking::{RakeIntent, RakeOutcome}; -use quickpeep::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT}; +use quickpeep_raker::raking::analysis::{load_adblock_engine, IpSet}; +use quickpeep_raker::raking::references::references_from_urlrakes; +use quickpeep_raker::raking::{RakeIntent, RakeOutcome}; +use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT}; use quickpeep_structs::rake_entries::{AnalysisAntifeatures, RakedReference, ReferenceKind}; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::redirect::Policy; diff --git a/quickpeep/src/lib.rs b/quickpeep_raker/src/lib.rs similarity index 100% rename from quickpeep/src/lib.rs rename to quickpeep_raker/src/lib.rs diff --git a/quickpeep/src/raking.rs b/quickpeep_raker/src/raking.rs similarity index 100% rename from quickpeep/src/raking.rs rename to quickpeep_raker/src/raking.rs diff --git a/quickpeep/src/raking/analysis.rs b/quickpeep_raker/src/raking/analysis.rs similarity index 100% rename from quickpeep/src/raking/analysis.rs rename to quickpeep_raker/src/raking/analysis.rs diff --git a/quickpeep/src/raking/references.rs b/quickpeep_raker/src/raking/references.rs similarity index 100% rename from quickpeep/src/raking/references.rs rename to quickpeep_raker/src/raking/references.rs diff --git a/quickpeep/src/test.rs b/quickpeep_raker/src/test.rs similarity index 100% rename from quickpeep/src/test.rs rename to quickpeep_raker/src/test.rs