Import the seeds and show stats

This commit is contained in:
Olivier 'reivilibre' 2022-03-20 21:46:50 +00:00
parent abf814550a
commit 179f04b2dd
1 changed files with 20 additions and 3 deletions

View File

@ -7,8 +7,10 @@ use env_logger::Env;
use anyhow::{anyhow, bail, Context}; use anyhow::{anyhow, bail, Context};
use smartstring::alias::CompactString; use smartstring::alias::CompactString;
use colour::{cyan, dark_green_ln, dark_yellow, green, green_ln, yellow, yellow_ln};
use reqwest::{Client, Url}; use reqwest::{Client, Url};
use std::path::PathBuf; use std::path::PathBuf;
use tokio::sync::mpsc;
use tokio::sync::mpsc::{Receiver, Sender}; use tokio::sync::mpsc::{Receiver, Sender};
use quickpeep_raker::config; use quickpeep_raker::config;
@ -53,11 +55,26 @@ pub async fn main() -> anyhow::Result<()> {
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?;
// TODO progress bar? let (seed_tx, seed_rx) = mpsc::channel(128);
// TODO discover sitemaps at the same time as digging up robots.txt files let seed_files = find_seed_files(config.seed_dir.clone()).await?;
tokio::spawn(async move {
seed_loader(seed_files, &seed_tx).await?;
eprintln!("{:#?}", config); Ok(()) as anyhow::Result<()>
});
let stats = importer(store, seed_rx).await?;
dark_green_ln!("=== Seeds Imported! ===");
green!("New URLs: ");
yellow_ln!("{:?}", stats.new_urls);
green!("New sitemaps: ");
yellow_ln!("{:?}", stats.new_sitemaps);
green!("New domains: ");
yellow_ln!("{:?}", stats.new_domains);
dark_yellow!("Seen URLs: ");
yellow_ln!("{:?}", stats.already_present_urls);
Ok(()) Ok(())
} }