diff --git a/quickpeep_raker/src/bin/qp-raker-db.rs b/quickpeep_raker/src/bin/qp-raker-db.rs new file mode 100644 index 0000000..a3ac683 --- /dev/null +++ b/quickpeep_raker/src/bin/qp-raker-db.rs @@ -0,0 +1,114 @@ +use clap::Parser; +use std::borrow::{Borrow, BorrowMut}; +use std::ffi::OsStr; +use std::fmt::Debug; + +use env_logger::Env; + +use anyhow::{anyhow, bail, Context}; + +use colour::{ + dark_green_ln, dark_red_ln, dark_yellow, dark_yellow_ln, green, red, red_ln, yellow_ln, +}; +use libmdbx::{Database, TableObject, RO}; +use log::warn; +use reqwest::{Client, Url}; +use std::path::PathBuf; +use tokio::sync::mpsc; +use tokio::sync::mpsc::{Receiver, Sender}; + +use quickpeep_raker::config; +use quickpeep_raker::config::RakerConfig; +use quickpeep_raker::raking::analysis::get_reduced_domain; +use quickpeep_raker::raking::{get_robots_txt_for, RakeIntent}; +use quickpeep_raker::storage::mdbx_helper_types::MdbxBare; +use quickpeep_raker::storage::records::{AllowedDomainRecord, WeedDomainRecord}; +use quickpeep_raker::storage::{maintenance, RakerStore, RakerTxn}; +use quickpeep_seed_parser::parse_seeds; +use quickpeep_utils::dirty::DirtyTracker; + +/// Seeds a raker's queue with URLs +#[derive(Clone, Debug, Parser)] +pub struct Opts { + #[clap(long = "config")] + config: Option, + + /// Table name + table: String, + + /// Key name to look up + key_name: String, + + /// Search for any prefix, not an exact match. + #[clap(long = "prefix", short = 'p')] + prefix: bool, +} + +#[tokio::main] +pub async fn main() -> anyhow::Result<()> { + env_logger::Builder::from_env(Env::default().default_filter_or("info,quickpeep=debug")).init(); + + let opts: Opts = Opts::parse(); + + let config_path = opts + .config + .unwrap_or_else(|| PathBuf::from("qp_raker.toml")); + let config = config::RakerConfig::load(&config_path).context("Failed to load config")?; + + if !config.workbench_dir.exists() { + bail!( + "Workbench directory ({:?}) doesn't exist.", + config.workbench_dir + ); + } + if !config.seed_dir.exists() { + bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir); + } + + let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; + + let txn = store.ro_txn()?; + match opts.table.as_ref() { + "allowed_domains" => { + inspect::>( + opts.key_name.as_ref(), + opts.prefix, + &txn.mdbx.borrow_dbs().allowed_domains, + &txn, + )?; + } + other => { + dark_yellow_ln!("Unknown database {:?}", other); + } + } + + Ok(()) +} + +trait Inspectable { + fn inspect(&self) -> String; +} + +impl Inspectable for MdbxBare { + fn inspect(&self) -> String { + format!("{:?}", &self.0) + } +} + +fn inspect<'a, IV: Inspectable + TableObject<'a>>( + key: &str, + prefix: bool, + database: &Database<'a>, + txn: &'a RakerTxn<'a, RO>, +) -> anyhow::Result<()> { + if prefix { + } else { + if let Some(entry) = txn.mdbx_txn.get::(database, key.as_bytes())? { + println!("{}", entry.inspect()); + } else { + red_ln!("no value"); + } + } + + Ok(()) +}