81 lines
2.5 KiB
Rust
81 lines
2.5 KiB
Rust
use crate::storage::mdbx_helper_types::{MdbxBare, MdbxString};
|
|
use crate::storage::records::{DomainRecord, OnHoldUrlRecord};
|
|
use crate::storage::RakerTxn;
|
|
use anyhow::Context;
|
|
use libmdbx::{Database, WriteFlags, RW};
|
|
use reqwest::Url;
|
|
|
|
/// Runs one big transaction that:
|
|
/// - scans on-hold URLs
|
|
/// - moves 'allowed' ones to the queue
|
|
/// - deletes 'weeds'
|
|
/// - leaves unknown ones alone
|
|
///
|
|
/// Ideally should be applied after importing seeds and weeds on an existing database.
|
|
pub fn reapply_seeds_and_weeds_to_on_hold_urls(txn: RakerTxn<RW>) -> anyhow::Result<()> {
|
|
struct DomainState {
|
|
pub domain: String,
|
|
pub domain_record: Option<DomainRecord>,
|
|
}
|
|
|
|
let urls_on_hold: &Database = &txn.mdbx.borrow_dbs().urls_on_hold;
|
|
|
|
let mut domain_state = None;
|
|
|
|
// Scan through the on-hold URLs
|
|
let mut cur = txn.mdbx_txn.cursor(urls_on_hold)?;
|
|
let mut first_iteration = true;
|
|
|
|
while let Some((MdbxString(domain_then_url), MdbxBare(record))) = if first_iteration {
|
|
first_iteration = false;
|
|
cur.first::<MdbxString, MdbxBare<OnHoldUrlRecord>>()
|
|
} else {
|
|
cur.next::<MdbxString, MdbxBare<OnHoldUrlRecord>>()
|
|
}? {
|
|
let mut split = domain_then_url.as_ref().split("\n");
|
|
let domain = split.next().context("No first split..?")?;
|
|
let url_str = split.next().context("No URL")?;
|
|
|
|
// Is the domain new?
|
|
if domain_state
|
|
.as_ref()
|
|
.map(|ds: &DomainState| &ds.domain != domain)
|
|
.unwrap_or(true)
|
|
{
|
|
// Then load the relevant records for it.
|
|
domain_state = Some(DomainState {
|
|
domain: domain.to_owned(),
|
|
domain_record: txn.get_domain_record(domain)?,
|
|
});
|
|
}
|
|
|
|
let url = Url::parse(url_str)?;
|
|
|
|
let domain_state = domain_state.as_ref().unwrap();
|
|
|
|
let is_rakeable = domain_state
|
|
.domain_record
|
|
.as_ref()
|
|
.map(|dr: &DomainRecord| dr.is_url_rakeable(&url))
|
|
.flatten();
|
|
|
|
match is_rakeable {
|
|
Some(true) => {
|
|
// ALLOWED
|
|
// Make it a queued URL
|
|
txn.enqueue_url(url_str, None, record.queue_record.intent)?;
|
|
cur.del(WriteFlags::empty())?;
|
|
}
|
|
Some(false) => {
|
|
// WEED
|
|
// Just delete
|
|
cur.del(WriteFlags::empty())?;
|
|
}
|
|
None => { /* nop: neither allowed nor a weed. Keep on hold. */ }
|
|
}
|
|
}
|
|
|
|
txn.commit()?;
|
|
Ok(())
|
|
}
|