104 lines
3.2 KiB
Rust
104 lines
3.2 KiB
Rust
use adblock::lists::RuleTypes;
|
|
use anyhow::Context;
|
|
use clap::Parser;
|
|
use colour::{blue_ln, green_ln, red_ln, yellow_ln};
|
|
use env_logger::Env;
|
|
use log::warn;
|
|
use quickpeep::raking::analysis::{load_adblock_engine, IpSet};
|
|
use quickpeep::raking::{RakeIntent, RakeOutcome};
|
|
use quickpeep::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
|
use quickpeep_structs::rake_entries::AnalysisAntifeatures;
|
|
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
|
use reqwest::redirect::Policy;
|
|
use reqwest::Url;
|
|
use std::path::PathBuf;
|
|
use tokio::fs::File;
|
|
|
|
pub const ADBLOCK_FILTER_PATHS: [(AnalysisAntifeatures, &'static str); 4] = [
|
|
(AnalysisAntifeatures::COOKIE_NAG, "cookie_nag"),
|
|
(AnalysisAntifeatures::ANNOYANCE, "annoyance"),
|
|
(AnalysisAntifeatures::PRIVACY, "privacy"),
|
|
(AnalysisAntifeatures::ADVERTS, "adverts"),
|
|
];
|
|
|
|
/// Rakes one URL and prints out the description of it.
|
|
#[derive(Clone, Debug, Parser)]
|
|
pub struct Opts {
|
|
url: Url,
|
|
}
|
|
|
|
#[tokio::main]
|
|
pub async fn main() -> anyhow::Result<()> {
|
|
env_logger::Builder::from_env(Env::default().default_filter_or("info,quickpeep=debug")).init();
|
|
|
|
let opts: Opts = Opts::parse();
|
|
|
|
let mut header_map = HeaderMap::new();
|
|
header_map.insert(USER_AGENT, HeaderValue::from_static(RAKER_USER_AGENT));
|
|
|
|
let client = reqwest::ClientBuilder::new()
|
|
.timeout(TIME_LIMIT)
|
|
.default_headers(header_map)
|
|
// TODO We want to handle redirects ourselves so we can track them...
|
|
.redirect(Policy::none())
|
|
.build()?;
|
|
|
|
let mut adblock_engines = Vec::new();
|
|
|
|
for (antifeature, name) in &ADBLOCK_FILTER_PATHS {
|
|
// TODO Don't hardcode these paths in quite as bad a way...
|
|
let path = PathBuf::from(format!("./data/{}.adblock", name));
|
|
if !path.exists() {
|
|
warn!("Missing adblock rules: {:?}.", path);
|
|
continue;
|
|
}
|
|
let file = File::open(&path).await?;
|
|
adblock_engines.push((
|
|
*antifeature,
|
|
load_adblock_engine(file, RuleTypes::All).await?,
|
|
));
|
|
}
|
|
|
|
let mut antifeature_ip_set = IpSet::new();
|
|
|
|
let ips_file = File::open("./data/cf_ips.txt")
|
|
.await
|
|
.context("Failed to open CF IPs file")?;
|
|
antifeature_ip_set.add_all_from_file(ips_file).await?;
|
|
|
|
let raker = Raker {
|
|
adblock_engines,
|
|
antifeature_ip_set,
|
|
};
|
|
|
|
let outcome = raker.rake(&opts.url, RakeIntent::Any, &client).await?;
|
|
|
|
match outcome {
|
|
RakeOutcome::RakedPage(page) => {
|
|
let content_size = serde_bare::to_vec(&page)?.len();
|
|
green_ln!("Page ({} bytes)", content_size);
|
|
// TODO
|
|
}
|
|
RakeOutcome::RakedFeed(feed) => {
|
|
green_ln!("Feed");
|
|
// TODO
|
|
}
|
|
RakeOutcome::RakedSitemap(sitemap) => {
|
|
green_ln!("Sitemap");
|
|
// TODO
|
|
}
|
|
RakeOutcome::Redirect { reason, new_url } => {
|
|
blue_ln!("Redirect ({:?})", reason);
|
|
println!(" → {}", new_url.as_str());
|
|
}
|
|
RakeOutcome::TemporaryFailure(fail) => {
|
|
yellow_ln!("Temporary Failure\n\t{:?}", &fail.reason);
|
|
}
|
|
RakeOutcome::PermanentFailure(fail) => {
|
|
red_ln!("Permanent Failure\n\t{:?}", &fail.reason)
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|