Fix weeds slipping in as seeds
This commit is contained in:
parent
be84c0e1cc
commit
753d03327a
|
@ -4,7 +4,6 @@ use colour::{blue, yellow_ln};
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use libmdbx::Database;
|
use libmdbx::Database;
|
||||||
use log::debug;
|
|
||||||
use quickpeep_raker::config;
|
use quickpeep_raker::config;
|
||||||
use quickpeep_raker::storage::mdbx_helper_types::{MdbxBare, MdbxString};
|
use quickpeep_raker::storage::mdbx_helper_types::{MdbxBare, MdbxString};
|
||||||
use quickpeep_raker::storage::records::OnHoldUrlRecord;
|
use quickpeep_raker::storage::records::OnHoldUrlRecord;
|
||||||
|
|
|
@ -75,7 +75,7 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
pub async fn import_seeds(store: RakerStore, config: &RakerConfig) -> anyhow::Result<()> {
|
pub async fn import_seeds(store: RakerStore, config: &RakerConfig) -> anyhow::Result<()> {
|
||||||
let (weed_tx, weed_rx) = mpsc::channel(128);
|
let (weed_tx, weed_rx) = mpsc::channel(128);
|
||||||
|
|
||||||
let weed_files = find_seed_files(config.seed_dir.clone(), WEED_EXTENSION).await?;
|
let weed_files = find_seed_files(config.seed_dir.clone(), SEED_EXTENSION).await?;
|
||||||
|
|
||||||
eprintln!("{:?}", weed_files);
|
eprintln!("{:?}", weed_files);
|
||||||
|
|
||||||
|
@ -370,33 +370,6 @@ async fn import_and_flush_batch_weeds(
|
||||||
let _: &mut WeedDomainRecord = weed_domain_record.borrow_mut();
|
let _: &mut WeedDomainRecord = weed_domain_record.borrow_mut();
|
||||||
}
|
}
|
||||||
|
|
||||||
match &seed.url {
|
|
||||||
UrlOrUrlPattern::Url(url_str) => {
|
|
||||||
let url = Url::parse(url_str.as_str())?;
|
|
||||||
if txn.enqueue_url(url.as_str(), None, RakeIntent::Any)? {
|
|
||||||
stats.new_urls += 1;
|
|
||||||
} else {
|
|
||||||
stats.already_present_urls += 1;
|
|
||||||
}
|
|
||||||
url
|
|
||||||
}
|
|
||||||
UrlOrUrlPattern::UrlPrefix(prefix) => {
|
|
||||||
let prefix_as_url = Url::parse(prefix.as_str())?;
|
|
||||||
if txn.enqueue_url(prefix_as_url.as_str(), None, RakeIntent::Any)? {
|
|
||||||
stats.new_urls += 1;
|
|
||||||
} else {
|
|
||||||
stats.already_present_urls += 1;
|
|
||||||
}
|
|
||||||
if is_domain_new {
|
|
||||||
let weed_domain_record: &mut WeedDomainRecord = weed_domain_record.borrow_mut();
|
|
||||||
weed_domain_record
|
|
||||||
.restricted_prefixes
|
|
||||||
.insert(prefix_as_url.path().to_string());
|
|
||||||
}
|
|
||||||
prefix_as_url
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if weed_domain_record.is_dirty() {
|
if weed_domain_record.is_dirty() {
|
||||||
txn.put_weed_domain_record(domain.borrow(), weed_domain_record.into_inner())?;
|
txn.put_weed_domain_record(domain.borrow(), weed_domain_record.into_inner())?;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue