Rearrange quickpeep crates

This commit is contained in:
Olivier 'reivilibre' 2022-03-14 23:36:37 +00:00
parent 403cc2a994
commit 1f681164c9
13 changed files with 119 additions and 86 deletions

77
Cargo.lock generated
View File

@ -597,7 +597,7 @@ checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"wasi 0.10.2+wasi-snapshot-preview1", "wasi 0.10.0+wasi-snapshot-preview1",
] ]
[[package]] [[package]]
@ -2139,6 +2139,39 @@ dependencies = [
[[package]] [[package]]
name = "quickpeep" name = "quickpeep"
version = "0.1.0" version = "0.1.0"
[[package]]
name = "quickpeep_densedoc"
version = "0.1.0"
dependencies = [
"anyhow",
"html5ever",
"kuchiki",
"lazy_static",
"regex",
"serde",
]
[[package]]
name = "quickpeep_indexer"
version = "0.1.0"
[[package]]
name = "quickpeep_moz_readability"
version = "0.1.0"
dependencies = [
"anyhow",
"html5ever",
"kuchiki",
"lazy_static",
"log",
"regex",
"url",
]
[[package]]
name = "quickpeep_raker"
version = "0.1.0"
dependencies = [ dependencies = [
"adblock", "adblock",
"anyhow", "anyhow",
@ -2169,31 +2202,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "quickpeep_densedoc"
version = "0.1.0"
dependencies = [
"anyhow",
"html5ever",
"kuchiki",
"lazy_static",
"regex",
"serde",
]
[[package]]
name = "quickpeep_moz_readability"
version = "0.1.0"
dependencies = [
"anyhow",
"html5ever",
"kuchiki",
"lazy_static",
"log",
"regex",
"url",
]
[[package]] [[package]]
name = "quickpeep_structs" name = "quickpeep_structs"
version = "0.1.0" version = "0.1.0"
@ -2330,9 +2338,9 @@ dependencies = [
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.11.9" version = "0.11.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f242f1488a539a79bac6dbe7c8609ae43b7914b7736210f239a37cccb32525" checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb"
dependencies = [ dependencies = [
"base64", "base64",
"bytes", "bytes",
@ -2791,11 +2799,12 @@ dependencies = [
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.43" version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [ dependencies = [
"libc", "libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi", "winapi",
] ]
@ -3018,9 +3027,9 @@ checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.10.2+wasi-snapshot-preview1" version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]] [[package]]
name = "wasi" name = "wasi"
@ -3199,9 +3208,9 @@ checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316"
[[package]] [[package]]
name = "winreg" name = "winreg"
version = "0.7.0" version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
dependencies = [ dependencies = [
"winapi", "winapi",
] ]

View File

@ -1,6 +1,8 @@
[workspace] [workspace]
members = [ members = [
"quickpeep", "quickpeep",
"quickpeep_indexer",
"quickpeep_raker",
"quickpeep_densedoc", "quickpeep_densedoc",
"quickpeep_moz_readability", "quickpeep_moz_readability",
"quickpeep_structs", "quickpeep_structs",

View File

@ -6,51 +6,3 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
### Subcrates
quickpeep_moz_readability = { path = "../quickpeep_moz_readability" }
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
quickpeep_structs = { path = "../quickpeep_structs" }
quickpeep_utils = { path = "../quickpeep_utils" }
### CLI Helpers
clap = { version = "3.1.6", features = ["derive"] }
colour = "0.6.0"
### Document Parsing
kuchiki = "0.8.1"
html5ever = "0.25.1"
serde = { version = "1.0.136", features = ["derive"] }
serde_bare = "0.5.0"
### Dates
chrono = "0.4.19"
### Utils
lazy_static = "1.4.0"
bytes = "1.1.0"
itertools = "0.10.3"
ipnetwork = "0.18.0"
futures-util = "0.3.21"
tokio = { version = "1.17.0", features = ["full"] }
anyhow = "1.0.55"
log = "0.4.14"
env_logger = "0.9.0"
### Raking helpers
# HTTP Requests
reqwest = { version = "0.11.9", features = ["stream"] }
# Gemini Requests
# N.B. TODO gemfeeds are Atom feeds for Gemini. Should support those.
gemini-fetch = "0.2.1"
# Robots.txt
cylon = { version = "0.2.0", features = ["crawl-delay"] }
# RSS/Atom/JSON feeds
feed-rs = "1.0.0"
# Sitemaps
sitemap = "0.4.1"
### Filtering helpers
# AdBlock
adblock = "0.5.0"
# Language detection
lingua = "1.3.3"

3
quickpeep/src/main.rs Normal file
View File

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@ -0,0 +1,8 @@
[package]
name = "quickpeep_indexer"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}

View File

@ -0,0 +1,56 @@
[package]
name = "quickpeep_raker"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
### Subcrates
quickpeep_moz_readability = { path = "../quickpeep_moz_readability" }
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
quickpeep_structs = { path = "../quickpeep_structs" }
quickpeep_utils = { path = "../quickpeep_utils" }
### CLI Helpers
clap = { version = "3.1.6", features = ["derive"] }
colour = "0.6.0"
### Document Parsing
kuchiki = "0.8.1"
html5ever = "0.25.1"
serde = { version = "1.0.136", features = ["derive"] }
serde_bare = "0.5.0"
### Dates
chrono = "0.4.19"
### Utils
lazy_static = "1.4.0"
bytes = "1.1.0"
itertools = "0.10.3"
ipnetwork = "0.18.0"
futures-util = "0.3.21"
tokio = { version = "1.17.0", features = ["full"] }
anyhow = "1.0.55"
log = "0.4.14"
env_logger = "0.9.0"
### Raking helpers
# HTTP Requests
reqwest = { version = "0.11.9", features = ["stream"] }
# Gemini Requests
# N.B. TODO gemfeeds are Atom feeds for Gemini. Should support those.
gemini-fetch = "0.2.1"
# Robots.txt
cylon = { version = "0.2.0", features = ["crawl-delay"] }
# RSS/Atom/JSON feeds
feed-rs = "1.0.0"
# Sitemaps
sitemap = "0.4.1"
### Filtering helpers
# AdBlock
adblock = "0.5.0"
# Language detection
lingua = "1.3.3"

View File

@ -4,10 +4,10 @@ use clap::Parser;
use colour::{blue_ln, green_ln, red_ln, yellow_ln}; use colour::{blue_ln, green_ln, red_ln, yellow_ln};
use env_logger::Env; use env_logger::Env;
use log::warn; use log::warn;
use quickpeep::raking::analysis::{load_adblock_engine, IpSet}; use quickpeep_raker::raking::analysis::{load_adblock_engine, IpSet};
use quickpeep::raking::references::references_from_urlrakes; use quickpeep_raker::raking::references::references_from_urlrakes;
use quickpeep::raking::{RakeIntent, RakeOutcome}; use quickpeep_raker::raking::{RakeIntent, RakeOutcome};
use quickpeep::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT}; use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
use quickpeep_structs::rake_entries::{AnalysisAntifeatures, RakedReference, ReferenceKind}; use quickpeep_structs::rake_entries::{AnalysisAntifeatures, RakedReference, ReferenceKind};
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::redirect::Policy; use reqwest::redirect::Policy;