Fix weeds slipping in as seeds

This commit is contained in:
Olivier 'reivilibre' 2022-03-22 23:23:17 +00:00
parent be84c0e1cc
commit 753d03327a
2 changed files with 1 additions and 29 deletions

View File

@ -4,7 +4,6 @@ use colour::{blue, yellow_ln};
use env_logger::Env; use env_logger::Env;
use itertools::Itertools; use itertools::Itertools;
use libmdbx::Database; use libmdbx::Database;
use log::debug;
use quickpeep_raker::config; use quickpeep_raker::config;
use quickpeep_raker::storage::mdbx_helper_types::{MdbxBare, MdbxString}; use quickpeep_raker::storage::mdbx_helper_types::{MdbxBare, MdbxString};
use quickpeep_raker::storage::records::OnHoldUrlRecord; use quickpeep_raker::storage::records::OnHoldUrlRecord;

View File

@ -75,7 +75,7 @@ pub async fn main() -> anyhow::Result<()> {
pub async fn import_seeds(store: RakerStore, config: &RakerConfig) -> anyhow::Result<()> { pub async fn import_seeds(store: RakerStore, config: &RakerConfig) -> anyhow::Result<()> {
let (weed_tx, weed_rx) = mpsc::channel(128); let (weed_tx, weed_rx) = mpsc::channel(128);
let weed_files = find_seed_files(config.seed_dir.clone(), WEED_EXTENSION).await?; let weed_files = find_seed_files(config.seed_dir.clone(), SEED_EXTENSION).await?;
eprintln!("{:?}", weed_files); eprintln!("{:?}", weed_files);
@ -370,33 +370,6 @@ async fn import_and_flush_batch_weeds(
let _: &mut WeedDomainRecord = weed_domain_record.borrow_mut(); let _: &mut WeedDomainRecord = weed_domain_record.borrow_mut();
} }
match &seed.url {
UrlOrUrlPattern::Url(url_str) => {
let url = Url::parse(url_str.as_str())?;
if txn.enqueue_url(url.as_str(), None, RakeIntent::Any)? {
stats.new_urls += 1;
} else {
stats.already_present_urls += 1;
}
url
}
UrlOrUrlPattern::UrlPrefix(prefix) => {
let prefix_as_url = Url::parse(prefix.as_str())?;
if txn.enqueue_url(prefix_as_url.as_str(), None, RakeIntent::Any)? {
stats.new_urls += 1;
} else {
stats.already_present_urls += 1;
}
if is_domain_new {
let weed_domain_record: &mut WeedDomainRecord = weed_domain_record.borrow_mut();
weed_domain_record
.restricted_prefixes
.insert(prefix_as_url.path().to_string());
}
prefix_as_url
}
};
if weed_domain_record.is_dirty() { if weed_domain_record.is_dirty() {
txn.put_weed_domain_record(domain.borrow(), weed_domain_record.into_inner())?; txn.put_weed_domain_record(domain.borrow(), weed_domain_record.into_inner())?;
} }