This commit is contained in:
parent
f884324648
commit
4665bfd3a3
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3483,6 +3483,7 @@ dependencies = [
|
||||
"log",
|
||||
"quickpeep_densedoc",
|
||||
"quickpeep_index",
|
||||
"quickpeep_seed_parser",
|
||||
"quickpeep_structs",
|
||||
"serde",
|
||||
"serde_bare",
|
||||
|
@ -23,3 +23,4 @@ zstd = "0.11.1"
|
||||
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
||||
quickpeep_index = { path = "../quickpeep_index" }
|
||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||
quickpeep_seed_parser = { path = "../quickpeep_seed_parser" }
|
||||
|
@ -8,6 +8,7 @@ use std::io::{BufRead, BufReader};
|
||||
use quickpeep_densedoc::DenseTree;
|
||||
use quickpeep_index::backend::BackendIndependentDocument;
|
||||
use quickpeep_indexer::config::IndexerConfig;
|
||||
use quickpeep_seed_parser::loader::{find_seed_files, seed_loader, SEED_EXTENSION};
|
||||
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
||||
use std::path::PathBuf;
|
||||
|
||||
@ -20,7 +21,8 @@ pub struct Opts {
|
||||
rakepacks: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
pub fn main() -> anyhow::Result<()> {
|
||||
#[tokio::main]
|
||||
pub async fn main() -> anyhow::Result<()> {
|
||||
env_logger::Builder::from_env(Env::default().default_filter_or("info,qp_indexer=debug")).init();
|
||||
|
||||
let opts: Opts = Opts::parse();
|
||||
@ -30,6 +32,20 @@ pub fn main() -> anyhow::Result<()> {
|
||||
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
||||
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
||||
|
||||
let seed_files = find_seed_files(config.seed_dir.clone(), SEED_EXTENSION).await?;
|
||||
let (seed_tx, mut seed_rx) = tokio::sync::mpsc::channel(64);
|
||||
let handle = tokio::spawn(async move {
|
||||
seed_loader(seed_files, &seed_tx).await?;
|
||||
Ok(()) as anyhow::Result<()>
|
||||
});
|
||||
|
||||
while let Some(seed) = seed_rx.recv().await {
|
||||
// TODO store this seed in an efficient structure for looking up...
|
||||
todo!();
|
||||
}
|
||||
|
||||
handle.await??;
|
||||
|
||||
let mut indexer_backend = config.open_indexer_backend()?;
|
||||
|
||||
for pack in opts.rakepacks {
|
||||
|
@ -17,12 +17,11 @@ use quickpeep_raker::raking::analysis::get_reduced_domain;
|
||||
use quickpeep_raker::raking::{get_robots_txt_for, RakeIntent};
|
||||
use quickpeep_raker::storage::records::{AllowedDomainRecord, WeedDomainRecord};
|
||||
use quickpeep_raker::storage::{maintenance, RakerStore};
|
||||
use quickpeep_seed_parser::loader::{find_seed_files, seed_loader, Seed, UrlOrUrlPattern};
|
||||
use quickpeep_seed_parser::loader::{
|
||||
find_seed_files, seed_loader, Seed, UrlOrUrlPattern, SEED_EXTENSION, WEED_EXTENSION,
|
||||
};
|
||||
use quickpeep_utils::dirty::DirtyTracker;
|
||||
|
||||
pub const SEED_EXTENSION: &'static str = ".seed";
|
||||
pub const WEED_EXTENSION: &'static str = ".weed";
|
||||
|
||||
/// Seeds a raker's queue with URLs
|
||||
#[derive(Clone, Debug, Parser)]
|
||||
pub struct Opts {
|
||||
|
@ -5,6 +5,9 @@ use std::ffi::OsStr;
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
|
||||
pub const SEED_EXTENSION: &'static str = ".seed";
|
||||
pub const WEED_EXTENSION: &'static str = ".weed";
|
||||
|
||||
pub struct Seed {
|
||||
pub url: UrlOrUrlPattern,
|
||||
// TODO(later) These make more sense at the indexer stage. tags: BTreeSet<CompactString>,
|
||||
|
Loading…
Reference in New Issue
Block a user