Add a lot more foundational work for raking
This commit is contained in:
parent
210e8ef10a
commit
a1097ef183
|
@ -1,2 +1,3 @@
|
|||
|
||||
.idea
|
||||
.idea
|
||||
data/cf_ips.txt
|
|
@ -612,6 +612,15 @@ version = "2.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35e70ee094dc02fd9c13fdad4940090f22dbd6ac7c9e7094a46cf0232a50bc7c"
|
||||
|
||||
[[package]]
|
||||
name = "ipnetwork"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4088d739b183546b239688ddbc79891831df421773df95e236daf7867866d355"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.3"
|
||||
|
@ -1066,18 +1075,36 @@ dependencies = [
|
|||
"cylon",
|
||||
"env_logger",
|
||||
"feed-rs",
|
||||
"futures-util",
|
||||
"gemini-fetch",
|
||||
"html5ever",
|
||||
"ipnetwork",
|
||||
"itertools",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"quickpeep_densedoc",
|
||||
"quickpeep_moz_readability",
|
||||
"quickpeep_structs",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_bare",
|
||||
"sitemap",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quickpeep_densedoc"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"html5ever",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quickpeep_moz_readability"
|
||||
version = "0.1.0"
|
||||
|
@ -1091,6 +1118,14 @@ dependencies = [
|
|||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quickpeep_structs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"quickpeep_densedoc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.15"
|
||||
|
@ -1388,6 +1423,15 @@ dependencies = [
|
|||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_bare"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51c55386eed0f1ae957b091dc2ca8122f287b60c79c774cbe3d5f2b69fded660"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.136"
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"quickpeep",
|
||||
"quickpeep_moz_readability"
|
||||
"quickpeep_densedoc",
|
||||
"quickpeep_moz_readability",
|
||||
"quickpeep_structs"
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -11,12 +11,14 @@ anyhow = "1.0.55"
|
|||
log = "0.4.14"
|
||||
env_logger = "0.9.0"
|
||||
quickpeep_moz_readability = { path = "../quickpeep_moz_readability" }
|
||||
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
||||
|
||||
# TODO: why do we need these here?
|
||||
kuchiki = "0.8.1"
|
||||
html5ever = "0.25.1"
|
||||
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_bare = "0.5.0"
|
||||
|
||||
chrono = "0.4.19"
|
||||
|
||||
|
@ -24,9 +26,12 @@ lazy_static = "1.4.0"
|
|||
|
||||
bytes = "1.1.0"
|
||||
|
||||
# TODO: rkyv and memmap2 should be an efficient way to load index packs into processes.
|
||||
# rkyv = "0.7.35"
|
||||
# memmap2 = "0.5.3"
|
||||
itertools = "0.10.3"
|
||||
|
||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||
ipnetwork = "0.18.0"
|
||||
|
||||
futures-util = "0.3.21"
|
||||
|
||||
### Raking helpers
|
||||
# HTTP Requests
|
||||
|
|
|
@ -1,25 +1,77 @@
|
|||
use quickpeep::raking::rake;
|
||||
use adblock::lists::RuleTypes;
|
||||
use anyhow::Context;
|
||||
use quickpeep::raking::analysis::{load_adblock_engine, IpSet};
|
||||
use quickpeep::raking::RakeIntent;
|
||||
use quickpeep::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
||||
use quickpeep_structs::rake_entries::AnalysisAntifeatures;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use reqwest::redirect::Policy;
|
||||
use reqwest::Url;
|
||||
use std::str::FromStr;
|
||||
use tokio::fs::File;
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main() -> anyhow::Result<()> {
|
||||
let client = reqwest::Client::new();
|
||||
// TODO max timeout, max body size
|
||||
rake(
|
||||
&Url::from_str("http://nothings.org/gamedev/ssao/")?,
|
||||
RakeIntent::Page,
|
||||
&client,
|
||||
)
|
||||
.await?;
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(USER_AGENT, HeaderValue::from_static(RAKER_USER_AGENT));
|
||||
|
||||
rake(
|
||||
&Url::from_str("https://github.com/kuchiki-rs/kuchiki")?,
|
||||
RakeIntent::Page,
|
||||
&client,
|
||||
)
|
||||
.await?;
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.timeout(TIME_LIMIT)
|
||||
.default_headers(header_map)
|
||||
// TODO We want to handle redirects ourselves so we can track them...
|
||||
.redirect(Policy::none())
|
||||
.build()?;
|
||||
|
||||
// TODO Don't hardcode these paths in quite as bad a way...
|
||||
let adblock_file = File::open("./cosmetic_filters.adblock")
|
||||
.await
|
||||
.context("Failed to open cosmetic filters file")?;
|
||||
let adblock_engines = vec![(
|
||||
AnalysisAntifeatures::ANNOYANCE,
|
||||
load_adblock_engine(adblock_file, RuleTypes::CosmeticOnly).await?,
|
||||
)];
|
||||
|
||||
let mut antifeature_ip_set = IpSet::new();
|
||||
|
||||
let ips_file = File::open("./data/cf_ips.txt")
|
||||
.await
|
||||
.context("Failed to open CF IPs file")?;
|
||||
antifeature_ip_set.add_all_from_file(ips_file).await?;
|
||||
|
||||
let raker = Raker {
|
||||
adblock_engines,
|
||||
antifeature_ip_set,
|
||||
};
|
||||
|
||||
// raker.rake(
|
||||
// &Url::from_str("http://nothings.org/gamedev/ssao/")?,
|
||||
// RakeIntent::Page,
|
||||
// &client,
|
||||
// )
|
||||
// .await?;
|
||||
//
|
||||
// raker.rake(
|
||||
// &Url::from_str("https://github.com/kuchiki-rs/kuchiki")?,
|
||||
// RakeIntent::Page,
|
||||
// &client,
|
||||
// )
|
||||
// .await?;
|
||||
|
||||
raker
|
||||
.rake(
|
||||
&Url::from_str("https://www.thesprucepets.com/")?,
|
||||
RakeIntent::Page,
|
||||
&client,
|
||||
)
|
||||
.await?;
|
||||
|
||||
raker
|
||||
.rake(
|
||||
&Url::from_str("https://matrix.org/")?,
|
||||
RakeIntent::Page,
|
||||
&client,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,17 +1,33 @@
|
|||
use crate::raking::analysis::{analyse_with_ad_block_cosmetic_filter, IpSet};
|
||||
use adblock::engine::Engine;
|
||||
use anyhow::{bail, Context};
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, FixedOffset, Utc};
|
||||
use cylon::Cylon;
|
||||
use futures_util::stream::StreamExt;
|
||||
use html5ever::tendril::fmt::Slice;
|
||||
use html5ever::QualName;
|
||||
use kuchiki::traits::TendrilSink;
|
||||
use kuchiki::NodeRef;
|
||||
use lazy_static::lazy_static;
|
||||
use log::debug;
|
||||
use reqwest::{Client, Url};
|
||||
use quickpeep_densedoc::DenseTree;
|
||||
use quickpeep_structs::rake_entries::AnalysisAntifeatures;
|
||||
use reqwest::{Client, Response, Url};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sitemap::reader::SiteMapEntity;
|
||||
use std::collections::HashSet;
|
||||
use std::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
|
||||
mod analysis;
|
||||
pub mod analysis;
|
||||
|
||||
pub const USER_AGENT: &'static str = "QuickPeepBot";
|
||||
/// 4 MiB ought to be enough for anybody.
|
||||
pub const SIZE_LIMIT: usize = 4 * 1024 * 1024;
|
||||
/// If it's not loaded in ten seconds, that's pretty severe.
|
||||
/// 10 seconds is almost too generous (assuming that the best of things can run slowly sometimes).
|
||||
pub const TIME_LIMIT: Duration = Duration::from_secs(10);
|
||||
pub const RAKER_USER_AGENT: &'static str = "QuickPeepBot";
|
||||
|
||||
pub enum RakeOutcome {
|
||||
RakedPage(RakedPage),
|
||||
|
@ -81,85 +97,169 @@ lazy_static! {
|
|||
]);
|
||||
}
|
||||
|
||||
pub async fn rake(url: &Url, intent: RakeIntent, client: &Client) -> anyhow::Result<RakeOutcome> {
|
||||
let response = client.get(url.clone()).send().await?;
|
||||
async fn response_to_bytes_limited(
|
||||
mut response: Response,
|
||||
size_limit: usize,
|
||||
time_limit: Duration,
|
||||
) -> anyhow::Result<Vec<u8>> {
|
||||
let deadline = Instant::now() + time_limit;
|
||||
let mut buffer = Vec::new();
|
||||
let mut bytestream = response.bytes_stream();
|
||||
|
||||
if !response.status().is_success() {
|
||||
bail!("Not successful: {:?}", response.status().as_u16());
|
||||
}
|
||||
|
||||
let content_type = if let Some(content_type) = response.headers().get("content-type") {
|
||||
let content_type = content_type
|
||||
.to_str()
|
||||
.context("Can't convert content-type to str")?;
|
||||
eprintln!("CT {:?}", content_type);
|
||||
content_type.split(";").next().unwrap().trim().to_owned()
|
||||
} else {
|
||||
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||
reason: TemporaryFailureReason::MissingInformation("content-type".to_owned()),
|
||||
backoff_sec: 86400 * 7,
|
||||
}));
|
||||
};
|
||||
|
||||
let content = response.bytes().await?;
|
||||
|
||||
if content_type == "text/html" && (intent == RakeIntent::Any || intent == RakeIntent::Page) {
|
||||
match rake_html_page(&content, url) {
|
||||
Ok(page_rake) => {
|
||||
return Ok(RakeOutcome::RakedPage(page_rake));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake HTML page: {:?}", error);
|
||||
loop {
|
||||
tokio::select! {
|
||||
next_chunk = bytestream.next() => {
|
||||
match next_chunk {
|
||||
Some(next_chunk) => {
|
||||
buffer.extend_from_slice(next_chunk?.as_bytes());
|
||||
if buffer.len() > size_limit {
|
||||
bail!("Exceeds size limit");
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// Finished! :)
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
_ = tokio::time::sleep_until(deadline) => {
|
||||
bail!("Exceeded time limit");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if FEED_MIME_TYPES.contains(content_type.as_str())
|
||||
&& (intent == RakeIntent::Any || intent == RakeIntent::Feed)
|
||||
{
|
||||
match rake_feed(&content, url) {
|
||||
Ok(feed) => {
|
||||
return Ok(RakeOutcome::RakedFeed(feed));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake as feed: {:?}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if SITEMAP_MIME_TYPES.contains(content_type.as_str())
|
||||
&& (intent == RakeIntent::Any || intent == RakeIntent::SiteMap)
|
||||
{
|
||||
match rake_sitemap(&content) {
|
||||
Ok(sitemap) => {
|
||||
return Ok(RakeOutcome::RakedSitemap(sitemap));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake as sitemap: {:?}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||
reason: PermanentFailureReason::UnknownContentType(content_type.to_owned()),
|
||||
}));
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
pub fn rake_html_page(content: &[u8], url: &Url) -> anyhow::Result<RakedPage> {
|
||||
let content_str = std::str::from_utf8(content)?;
|
||||
pub struct Raker {
|
||||
pub adblock_engines: Vec<(AnalysisAntifeatures, Engine)>,
|
||||
pub antifeature_ip_set: IpSet,
|
||||
}
|
||||
|
||||
let mut readability = quickpeep_moz_readability::Readability::new(content_str);
|
||||
readability
|
||||
.parse(url.as_str())
|
||||
.context("failed to analyse readability")?;
|
||||
impl Raker {
|
||||
pub async fn rake(
|
||||
&self,
|
||||
url: &Url,
|
||||
intent: RakeIntent,
|
||||
client: &Client,
|
||||
) -> anyhow::Result<RakeOutcome> {
|
||||
let response = client.get(url.clone()).send().await?;
|
||||
|
||||
eprintln!("{:#?}", readability.metadata);
|
||||
if let Some(remote_addr) = response.remote_addr() {
|
||||
eprintln!("rA {:?}", remote_addr);
|
||||
let is_cf = self.antifeature_ip_set.contains(remote_addr.ip());
|
||||
eprintln!("CF? {:?}", is_cf);
|
||||
}
|
||||
|
||||
if let Some(node) = readability.article_node {
|
||||
eprintln!("{}", node.to_string());
|
||||
if !response.status().is_success() {
|
||||
bail!("Not successful: {:?}", response.status().as_u16());
|
||||
}
|
||||
|
||||
let content_type = if let Some(content_type) = response.headers().get("content-type") {
|
||||
let content_type = content_type
|
||||
.to_str()
|
||||
.context("Can't convert content-type to str")?;
|
||||
eprintln!("CT {:?}", content_type);
|
||||
content_type.split(";").next().unwrap().trim().to_owned()
|
||||
} else {
|
||||
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||
reason: TemporaryFailureReason::MissingInformation("content-type".to_owned()),
|
||||
backoff_sec: 86400 * 7,
|
||||
}));
|
||||
};
|
||||
|
||||
let content = response_to_bytes_limited(response, SIZE_LIMIT, TIME_LIMIT).await?;
|
||||
|
||||
if content_type == "text/html" && (intent == RakeIntent::Any || intent == RakeIntent::Page)
|
||||
{
|
||||
match self.rake_html_page(&content, url) {
|
||||
Ok(page_rake) => {
|
||||
return Ok(RakeOutcome::RakedPage(page_rake));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake HTML page: {:?}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if FEED_MIME_TYPES.contains(content_type.as_str())
|
||||
&& (intent == RakeIntent::Any || intent == RakeIntent::Feed)
|
||||
{
|
||||
match rake_feed(&content, url) {
|
||||
Ok(feed) => {
|
||||
return Ok(RakeOutcome::RakedFeed(feed));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake as feed: {:?}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if SITEMAP_MIME_TYPES.contains(content_type.as_str())
|
||||
&& (intent == RakeIntent::Any || intent == RakeIntent::SiteMap)
|
||||
{
|
||||
match rake_sitemap(&content) {
|
||||
Ok(sitemap) => {
|
||||
return Ok(RakeOutcome::RakedSitemap(sitemap));
|
||||
}
|
||||
Err(error) => {
|
||||
debug!("Failed to rake as sitemap: {:?}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||
reason: PermanentFailureReason::UnknownContentType(content_type.to_owned()),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(todo!())
|
||||
pub fn rake_html_page(&self, content: &[u8], url: &Url) -> anyhow::Result<RakedPage> {
|
||||
let content_str = std::str::from_utf8(content)?;
|
||||
|
||||
let root_node: NodeRef = kuchiki::parse_html().one(content_str);
|
||||
|
||||
let mut antifeature_flags = AnalysisAntifeatures::empty();
|
||||
|
||||
for (engine_antifeature_flag, adblock_engine) in &self.adblock_engines {
|
||||
match analyse_with_ad_block_cosmetic_filter(
|
||||
&root_node,
|
||||
adblock_engine,
|
||||
url.as_str(),
|
||||
true,
|
||||
) {
|
||||
Ok(cosmetic_filters_tripped) => {
|
||||
eprintln!("?cosmetic filters tripped: {}", cosmetic_filters_tripped);
|
||||
antifeature_flags |= *engine_antifeature_flag;
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("Cosmetic Filter Err {:?}", err);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let dense_doc = DenseTree::from_body(root_node.clone());
|
||||
let dense_doc_text = DenseTree::generate_textual_format(&dense_doc);
|
||||
eprintln!("~~~~~\n{}\n~~~~~", dense_doc_text);
|
||||
eprintln!("^^^^^\n{:#?}\n^^^^^", dense_doc);
|
||||
|
||||
let mut readability = quickpeep_moz_readability::Readability::new_from_node(root_node);
|
||||
readability
|
||||
.parse(url.as_str())
|
||||
.context("failed to analyse readability")?;
|
||||
|
||||
eprintln!("{:#?}", readability.metadata);
|
||||
|
||||
if let Some(node) = readability.article_node {
|
||||
//eprintln!("{}", node.to_string());
|
||||
}
|
||||
|
||||
let bare_size = serde_bare::to_vec(&dense_doc)?.len();
|
||||
eprintln!("CS {:?} → {:?}", content.len(), bare_size);
|
||||
|
||||
Ok(RakedPage {
|
||||
// TODO
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rake_feed(content: &[u8], url: &Url) -> anyhow::Result<Vec<UrlRaked>> {
|
||||
|
@ -286,7 +386,7 @@ pub async fn decode_robots_txt(bytes: &[u8]) -> anyhow::Result<Option<RobotsTxt>
|
|||
}
|
||||
}
|
||||
|
||||
let rules = cylon::Compiler::new(USER_AGENT)
|
||||
let rules = cylon::Compiler::new(RAKER_USER_AGENT)
|
||||
.compile(bytes.as_bytes())
|
||||
.await?;
|
||||
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
use adblock::filters::cosmetic::CosmeticFilter;
|
||||
use anyhow::anyhow;
|
||||
use adblock::engine::Engine;
|
||||
use adblock::lists::{ParseOptions, RuleTypes};
|
||||
use anyhow::Context;
|
||||
use ipnetwork::IpNetwork;
|
||||
use kuchiki::NodeRef;
|
||||
use log::debug;
|
||||
use std::path::Path;
|
||||
use tokio::fs::File;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::net::IpAddr;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncRead, BufReader};
|
||||
|
||||
pub async fn load_cosmetic_filters<R: AsyncRead + Unpin>(
|
||||
pub async fn load_adblock_engine<R: AsyncRead + Unpin>(
|
||||
reader: R,
|
||||
) -> anyhow::Result<Vec<CosmeticFilter>> {
|
||||
rule_types: RuleTypes,
|
||||
) -> anyhow::Result<Engine> {
|
||||
let mut br = BufReader::new(reader);
|
||||
let mut rules = Vec::new();
|
||||
let mut buf = String::new();
|
||||
|
@ -17,27 +19,172 @@ pub async fn load_cosmetic_filters<R: AsyncRead + Unpin>(
|
|||
if br.read_line(&mut buf).await? == 0 {
|
||||
break;
|
||||
}
|
||||
if let Ok(rule) = CosmeticFilter::parse(&buf, false) {
|
||||
rules.push(rule);
|
||||
rules.push(buf.trim().to_owned());
|
||||
}
|
||||
Ok(Engine::from_rules(
|
||||
&rules,
|
||||
ParseOptions {
|
||||
format: Default::default(),
|
||||
include_redirect_urls: false,
|
||||
rule_types,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
// Relevant:
|
||||
// https://github.com/brave/adblock-rust/issues/152#issuecomment-771259069
|
||||
|
||||
pub struct ExtractedClassesAndIds {
|
||||
classes: Vec<String>,
|
||||
ids: Vec<String>,
|
||||
}
|
||||
|
||||
pub fn extract_classes_and_ids_from_page(root: &NodeRef) -> ExtractedClassesAndIds {
|
||||
let mut class_set = HashSet::new();
|
||||
let mut id_set = HashSet::new();
|
||||
|
||||
for node in root.inclusive_descendants() {
|
||||
if let Some(element) = node.0.as_element() {
|
||||
let attrs = element.attributes.borrow();
|
||||
if let Some(id) = attrs.get("id") {
|
||||
id_set.insert(id.to_owned());
|
||||
}
|
||||
if let Some(classes) = attrs.get("class") {
|
||||
for class in classes.trim().split_whitespace() {
|
||||
class_set.insert(class.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(rules)
|
||||
ExtractedClassesAndIds {
|
||||
classes: class_set.into_iter().collect(),
|
||||
ids: id_set.into_iter().collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn analyse_with_ad_block_cosmetic_filter(
|
||||
root: NodeRef,
|
||||
filters: &Vec<CosmeticFilter>,
|
||||
root: &NodeRef,
|
||||
engine: &Engine,
|
||||
url: &str,
|
||||
remove: bool,
|
||||
) -> anyhow::Result<bool> {
|
||||
let mut matches = 0;
|
||||
for rule in filters {
|
||||
for ele in root
|
||||
.select(&rule.selector)
|
||||
.map_err(|_| anyhow!("Failed to select(..)"))?
|
||||
{
|
||||
debug!("Cosmetic Filter {:?} Matches {:?}", rule, ele);
|
||||
matches += 1;
|
||||
|
||||
let url_resources = engine.url_cosmetic_resources(url);
|
||||
let specialist_hide_selectors = if !url_resources.generichide {
|
||||
let ExtractedClassesAndIds { classes, ids } = extract_classes_and_ids_from_page(root);
|
||||
|
||||
//eprintln!("ID {:#?}", ids);
|
||||
//eprintln!("CC {:#?}", classes);
|
||||
|
||||
engine.hidden_class_id_selectors(&classes, &ids, &url_resources.exceptions)
|
||||
} else {
|
||||
Vec::with_capacity(0)
|
||||
};
|
||||
|
||||
//eprintln!("UR {:#?}", url_resources);
|
||||
//eprintln!("sHS {:#?}", specialist_hide_selectors);
|
||||
//eprintln!("----");
|
||||
|
||||
for rule in itertools::chain(specialist_hide_selectors, url_resources.hide_selectors) {
|
||||
if let Ok(result) = root.select(&rule) {
|
||||
for ele in result {
|
||||
eprintln!("Cosmetic Filter {:?} Matches {:?}", rule, ele);
|
||||
matches += 1;
|
||||
if remove {
|
||||
ele.as_node().detach();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//eprintln!("(fail)");
|
||||
}
|
||||
}
|
||||
Ok(matches > 0)
|
||||
}
|
||||
|
||||
// TODO this isn't particularly efficient. Probably want a trie if it's important...
|
||||
pub struct IpSet {
|
||||
ips: BTreeSet<IpNetwork>,
|
||||
}
|
||||
|
||||
impl IpSet {
|
||||
pub fn new() -> IpSet {
|
||||
IpSet {
|
||||
ips: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn add_all_from_file<R: AsyncRead + Unpin>(
|
||||
&mut self,
|
||||
reader: R,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut br = BufReader::new(reader);
|
||||
|
||||
let mut buf = String::new();
|
||||
loop {
|
||||
buf.clear();
|
||||
if br.read_line(&mut buf).await? == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let trimmed = buf.trim();
|
||||
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ip_net = trimmed
|
||||
.parse::<IpNetwork>()
|
||||
.context("Parsing CIDR IP range")?;
|
||||
self.add(ip_net);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&mut self, network: IpNetwork) {
|
||||
// We jump through a couple of hoops to make sure we store the lowest address in the network,
|
||||
// since we use that for sorting.
|
||||
self.ips
|
||||
.insert(IpNetwork::new(network.network(), network.prefix()).unwrap());
|
||||
}
|
||||
|
||||
pub fn contains(&self, addr: IpAddr) -> bool {
|
||||
let prefix = if addr.is_ipv4() {
|
||||
32
|
||||
} else {
|
||||
assert!(addr.is_ipv6());
|
||||
128
|
||||
};
|
||||
let addr_as_net =
|
||||
IpNetwork::new(addr, prefix).expect("Conversion to IpNetwork should be correct");
|
||||
for ipnet in self.ips.range(..=addr_as_net).rev().next() {
|
||||
if ipnet.contains(addr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::raking::analysis::IpSet;
|
||||
use ipnetwork::IpNetwork;
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
pub fn test_ipset_contains() {
|
||||
let mut set = IpSet::new();
|
||||
set.add(IpNetwork::from_str("1.2.3.4/16").unwrap());
|
||||
set.add(IpNetwork::from_str("1.1.2.3/16").unwrap());
|
||||
set.add(IpNetwork::from_str("85.42.36.17/24").unwrap());
|
||||
|
||||
assert!(set.contains(IpAddr::from_str("1.2.42.42").unwrap()));
|
||||
assert!(set.contains(IpAddr::from_str("85.42.36.14").unwrap()));
|
||||
assert!(!set.contains(IpAddr::from_str("85.42.37.14").unwrap()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
name = "quickpeep_densedoc"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.56"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
kuchiki = "0.8.1"
|
||||
html5ever = "0.25.1"
|
||||
regex = "1.5.5"
|
||||
lazy_static = "1.4.0"
|
|
@ -0,0 +1,403 @@
|
|||
use kuchiki::NodeRef;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Borrow;
|
||||
use std::ops::Deref;
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct DenseDocument {
|
||||
head: DenseHead,
|
||||
body: Vec<DenseTree>,
|
||||
}
|
||||
|
||||
impl DenseDocument {
|
||||
pub fn from_document(root_node: NodeRef) {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct DenseHead {
|
||||
title: String,
|
||||
feed_urls: Vec<String>,
|
||||
// TODO how best to expose this?? We actually don't care about storing it though ...
|
||||
// Probably move to the raker.
|
||||
canonical: (), // TODO I'm sure we'd benefit by digging up some metadata, but that's possibly for later :)
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub enum DenseTree {
|
||||
Heading1(Vec<DenseTree>),
|
||||
Heading2(Vec<DenseTree>),
|
||||
Heading3(Vec<DenseTree>),
|
||||
Heading4(Vec<DenseTree>),
|
||||
Heading5(Vec<DenseTree>),
|
||||
Heading6(Vec<DenseTree>),
|
||||
Link {
|
||||
children: Vec<DenseTree>,
|
||||
href: String,
|
||||
nofollow: bool,
|
||||
},
|
||||
Image {
|
||||
src: String,
|
||||
alt: String,
|
||||
// title? I don't know if it'd be very useful.
|
||||
},
|
||||
Text(String),
|
||||
}
|
||||
|
||||
impl DenseTree {
|
||||
pub fn from_body(body_node: NodeRef) -> Vec<DenseTree> {
|
||||
let mut builder = DenseTreeBuilder::new();
|
||||
builder.add_children_of_node(body_node);
|
||||
builder.into_tree()
|
||||
}
|
||||
|
||||
pub fn is_text(&self) -> bool {
|
||||
match self {
|
||||
DenseTree::Text(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_textual_format(nodes: &Vec<DenseTree>) -> String {
|
||||
let mut buf = String::new();
|
||||
for node in nodes {
|
||||
node.append_in_textual_format(&mut buf);
|
||||
}
|
||||
simplify_newlines(&buf)
|
||||
}
|
||||
|
||||
fn append_in_textual_format(&self, string: &mut String) {
|
||||
match self {
|
||||
DenseTree::Heading1(children) => {
|
||||
string.push_str("\n\n# ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Heading2(children) => {
|
||||
string.push_str("\n\n## ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Heading3(children) => {
|
||||
string.push_str("\n\n### ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Heading4(children) => {
|
||||
string.push_str("\n\n#### ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Heading5(children) => {
|
||||
string.push_str("\n\n##### ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Heading6(children) => {
|
||||
string.push_str("\n\n###### ");
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str("\n");
|
||||
}
|
||||
DenseTree::Link { children, href, .. } => {
|
||||
string.push('[');
|
||||
for child in children {
|
||||
child.append_in_textual_format(string);
|
||||
}
|
||||
string.push_str(&format!("]({})", href));
|
||||
}
|
||||
DenseTree::Image { .. } => {
|
||||
string.push_str("[IMG]");
|
||||
}
|
||||
DenseTree::Text(text) => {
|
||||
string.push_str(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DenseTreeBuilder {
|
||||
/// Siblings in the buffer.
|
||||
nodes: Vec<DenseTree>,
|
||||
|
||||
/// Number of preceding newlines at the end of the buffer.
|
||||
/// Used for generating text that preserves some vague structure.
|
||||
preceding_newlines: u32,
|
||||
}
|
||||
|
||||
impl DenseTreeBuilder {
|
||||
pub fn new() -> Self {
|
||||
DenseTreeBuilder {
|
||||
nodes: vec![],
|
||||
preceding_newlines: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_tree(mut self) -> Vec<DenseTree> {
|
||||
self.simplify();
|
||||
self.nodes
|
||||
}
|
||||
|
||||
/// Simplify the DenseTree nodes: coalesce Text nodes and
|
||||
pub fn simplify(&mut self) {
|
||||
// First coalesce all text nodes
|
||||
// TODO(perf): Do it in a better way to reduce the cost.
|
||||
let mut idx = 1;
|
||||
while idx < self.nodes.len() {
|
||||
if self.nodes[idx].is_text() && self.nodes[idx - 1].is_text() {
|
||||
// Merge the two text nodes is a text node, consume it and merge it in.
|
||||
match self.nodes.remove(idx) {
|
||||
DenseTree::Text(append_text) => {
|
||||
match &mut self.nodes[idx - 1] {
|
||||
DenseTree::Text(string) => {
|
||||
string.push_str(&append_text);
|
||||
// Continue so we don't advance, as we just moved the list down a
|
||||
// bit.
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
panic!(
|
||||
"Should be unreachable: checked to be text first. ({})",
|
||||
idx - 1
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Should be unreachable: checked to be text first. ({})", idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
for node in &mut self.nodes {
|
||||
match node {
|
||||
DenseTree::Text(text) => {
|
||||
// Coalesce newlines so there are never more than 2 in a row.
|
||||
*text = simplify_newlines(&simplify_whitespace(&text));
|
||||
}
|
||||
_ => { /* nop */ }
|
||||
}
|
||||
}
|
||||
|
||||
match self.nodes.get_mut(0) {
|
||||
Some(DenseTree::Text(text)) => {
|
||||
*text = text.trim_start().to_owned();
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let num_nodes = self.nodes.len();
|
||||
if num_nodes > 1 {
|
||||
match self.nodes.get_mut(num_nodes - 1) {
|
||||
Some(DenseTree::Text(text)) => {
|
||||
*text = text.trim_end().to_owned();
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a HTML node's children into DenseTree nodes.
|
||||
pub fn add_children_of_node(&mut self, node: NodeRef) {
|
||||
for child in node.children() {
|
||||
if let Some(element) = child.as_element() {
|
||||
match element.name.local.deref() {
|
||||
"h1" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading1(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"h2" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading2(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"h3" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading3(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"h4" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading4(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"h5" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading5(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"h6" => {
|
||||
self.nodes
|
||||
.push(DenseTree::Heading6(DenseTree::from_body(child)));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
"a" => {
|
||||
let attrs = element.attributes.borrow();
|
||||
let href = attrs.get("href").unwrap_or("").to_owned();
|
||||
|
||||
if href.starts_with("javascript:") || href.starts_with("data:") {
|
||||
// Skip this link. Just unwrap it.
|
||||
self.add_children_of_node(child.clone());
|
||||
continue;
|
||||
}
|
||||
|
||||
let nofollow = attrs
|
||||
.get("rel")
|
||||
.map(|rel: &str| {
|
||||
rel.split_whitespace()
|
||||
.any(|rel_word: &str| rel_word.eq_ignore_ascii_case("nofollow"))
|
||||
})
|
||||
.unwrap_or(false);
|
||||
drop(attrs);
|
||||
|
||||
self.nodes.push(DenseTree::Link {
|
||||
children: DenseTree::from_body(child),
|
||||
href,
|
||||
nofollow,
|
||||
});
|
||||
|
||||
self.preceding_newlines = 0;
|
||||
}
|
||||
"img" => {
|
||||
// TODO Decide if this is worth the space...
|
||||
let attrs = element.attributes.borrow();
|
||||
let src = attrs.get("src").unwrap_or("").to_owned();
|
||||
|
||||
if src.starts_with("javascript:") || src.starts_with("data:") {
|
||||
// Skip this image.
|
||||
continue;
|
||||
}
|
||||
|
||||
let alt = simplify_whitespace(attrs.get("alt").unwrap_or("").trim());
|
||||
|
||||
self.nodes.push(DenseTree::Image { src, alt });
|
||||
}
|
||||
"p" | "pre" => {
|
||||
// Paragraphs must have 2 preceding newlines.
|
||||
if self.preceding_newlines < 2 {
|
||||
self.nodes.push(DenseTree::Text(
|
||||
match self.preceding_newlines {
|
||||
0 => "\n\n",
|
||||
1 => "\n",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.to_owned(),
|
||||
));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
|
||||
self.add_children_of_node(child);
|
||||
|
||||
// Paragraphs must have 2 trailing newlines.
|
||||
if self.preceding_newlines < 2 {
|
||||
self.nodes.push(DenseTree::Text(
|
||||
match self.preceding_newlines {
|
||||
0 => "\n\n",
|
||||
1 => "\n",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.to_owned(),
|
||||
));
|
||||
self.preceding_newlines = 2;
|
||||
}
|
||||
}
|
||||
"br" => {
|
||||
self.nodes.push(DenseTree::Text("\n".to_owned()));
|
||||
self.preceding_newlines += 1;
|
||||
}
|
||||
"div" | "li" => {
|
||||
// Divs must have 1 preceding newline.
|
||||
if self.preceding_newlines < 1 {
|
||||
self.nodes.push(DenseTree::Text("\n".to_owned()));
|
||||
self.preceding_newlines = 1;
|
||||
}
|
||||
|
||||
self.add_children_of_node(child);
|
||||
|
||||
// Divs must have 1 trailing newline.
|
||||
if self.preceding_newlines < 1 {
|
||||
self.nodes.push(DenseTree::Text("\n".to_owned()));
|
||||
self.preceding_newlines = 1;
|
||||
}
|
||||
}
|
||||
"script" | "style" | "svg" | "noscript" => {
|
||||
// We just prune these, as we don't want them.
|
||||
// (noscript tends just to be noisy 'enable JS now!!' messages, so prune those too.)
|
||||
continue;
|
||||
}
|
||||
_ => {
|
||||
// Simply unwrap the unknown element.
|
||||
self.add_children_of_node(child);
|
||||
}
|
||||
}
|
||||
//element.name.local
|
||||
} else if let Some(text) = child.as_text() {
|
||||
let text_to_add =
|
||||
simplify_whitespace(&simplify_newlines(&text.borrow().replace("\n", " ")));
|
||||
self.preceding_newlines =
|
||||
text_to_add.chars().rev().take_while(|c| *c == '\n').count() as u32;
|
||||
self.nodes.push(DenseTree::Text(text_to_add));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref MANY_WHITESPACE: Regex = Regex::new(r"[ \t]+").unwrap();
|
||||
static ref THREE_OR_MORE_NEWLINES: Regex = Regex::new(r"\n+[ \t\n]+\n+").unwrap();
|
||||
static ref UNNECESSARY_LS_WHITESPACE: Regex = Regex::new(r"\n[ \s]+").unwrap();
|
||||
static ref UNNECESSARY_LE_WHITESPACE: Regex = Regex::new(r"[ \s]+\n").unwrap();
|
||||
}
|
||||
|
||||
pub fn simplify_whitespace(input: &str) -> String {
|
||||
let s = MANY_WHITESPACE.replace_all(input, " ");
|
||||
let s = UNNECESSARY_LS_WHITESPACE.replace_all(s.borrow(), "\n");
|
||||
UNNECESSARY_LE_WHITESPACE
|
||||
.replace_all(s.borrow(), "\n")
|
||||
.into_owned()
|
||||
}
|
||||
|
||||
pub fn simplify_newlines(input: &str) -> String {
|
||||
THREE_OR_MORE_NEWLINES
|
||||
.replace_all(&input.replace("\r", ""), "\n\n")
|
||||
.into_owned()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{simplify_newlines, simplify_whitespace};
|
||||
|
||||
#[test]
|
||||
pub fn test_simplify_whitespace() {
|
||||
assert_eq!(
|
||||
simplify_whitespace("hello cat\tdog \t bat"),
|
||||
"hello cat dog bat"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_simplify_newlines() {
|
||||
assert_eq!(
|
||||
simplify_newlines("hello\n\n\n\nare\n\n\nyou\n\n\n\n\n\n\t\n\n\nthere?"),
|
||||
"hello\n\nare\n\nyou\n\nthere?"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -60,7 +60,8 @@ const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "
|
|||
pub mod regexes;
|
||||
|
||||
pub struct Readability {
|
||||
root_node: NodeRef,
|
||||
/// Left-over document. Note that readable article pieces are detached from the parent.
|
||||
pub root_node: NodeRef,
|
||||
byline: Option<String>,
|
||||
article_title: String,
|
||||
pub article_node: Option<NodeRef>,
|
||||
|
@ -77,8 +78,12 @@ struct SizeInfo {
|
|||
|
||||
impl Readability {
|
||||
pub fn new(html_str: &str) -> Self {
|
||||
Self::new_from_node(kuchiki::parse_html().one(html_str))
|
||||
}
|
||||
|
||||
pub fn new_from_node(root_node: NodeRef) -> Self {
|
||||
Self {
|
||||
root_node: kuchiki::parse_html().one(html_str),
|
||||
root_node,
|
||||
byline: None,
|
||||
article_title: "".into(),
|
||||
article_node: None,
|
||||
|
@ -87,6 +92,7 @@ impl Readability {
|
|||
metadata: MetaData::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(&mut self, url: &str) -> anyhow::Result<()> {
|
||||
self.unwrap_no_script_tags();
|
||||
self.remove_scripts();
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "quickpeep_structs"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
bitflags = "1.3.2"
|
||||
#arc-interner = "0.7.0"
|
||||
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
|
@ -0,0 +1 @@
|
|||
pub mod rake_entries;
|
|
@ -0,0 +1,24 @@
|
|||
use bitflags::bitflags;
|
||||
|
||||
bitflags! {
|
||||
pub struct AnalysisAntifeatures: u8 {
|
||||
/// Adverts are present on the page, according to a filter.
|
||||
const ADVERTS = 0x01;
|
||||
/// Some things are blocked due to privacy concerns, according to a filter.
|
||||
const PRIVACY = 0x02;
|
||||
/// Annoying cookie nags are present on this page, according to a cosmetic filter.
|
||||
const COOKIE_NAG = 0x04;
|
||||
/// Unspecified annoyances are present on this page, according to a cosmetic filter.
|
||||
const ANNOYANCE = 0x08;
|
||||
|
||||
/// The web page was served over CloudFlare at the time of indexing, which is not in the
|
||||
/// spirit of decentralisation.
|
||||
const CLOUDFLARE = 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RakedPageEntry {
|
||||
pub analysed_antifeatures: AnalysisAntifeatures,
|
||||
//pub article: Option<DenseTree>,
|
||||
//pub non_article: Option<DenseTree>,
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
dir_path="$(dirname "$0")"
|
||||
|
||||
mkdir -p "$dir_path/../data"
|
||||
wget -O "$dir_path/../data/cf_ips_v4.txt" https://www.cloudflare.com/ips-v4
|
||||
wget -O "$dir_path/../data/cf_ips_v6.txt" https://www.cloudflare.com/ips-v6
|
||||
echo "\n" >> "$dir_path/../data/cf_ips_v4.txt"
|
||||
cat "$dir_path/../data/cf_ips_v4.txt" "$dir_path/../data/cf_ips_v6.txt" > "$dir_path/../data/cf_ips.txt"
|
||||
rm "$dir_path/../data/cf_ips_v4.txt" "$dir_path/../data/cf_ips_v6.txt"
|
Loading…
Reference in New Issue