This commit is contained in:
parent
f884324648
commit
4665bfd3a3
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3483,6 +3483,7 @@ dependencies = [
|
|||||||
"log",
|
"log",
|
||||||
"quickpeep_densedoc",
|
"quickpeep_densedoc",
|
||||||
"quickpeep_index",
|
"quickpeep_index",
|
||||||
|
"quickpeep_seed_parser",
|
||||||
"quickpeep_structs",
|
"quickpeep_structs",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_bare",
|
"serde_bare",
|
||||||
|
@ -23,3 +23,4 @@ zstd = "0.11.1"
|
|||||||
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
||||||
quickpeep_index = { path = "../quickpeep_index" }
|
quickpeep_index = { path = "../quickpeep_index" }
|
||||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||||
|
quickpeep_seed_parser = { path = "../quickpeep_seed_parser" }
|
||||||
|
@ -8,6 +8,7 @@ use std::io::{BufRead, BufReader};
|
|||||||
use quickpeep_densedoc::DenseTree;
|
use quickpeep_densedoc::DenseTree;
|
||||||
use quickpeep_index::backend::BackendIndependentDocument;
|
use quickpeep_index::backend::BackendIndependentDocument;
|
||||||
use quickpeep_indexer::config::IndexerConfig;
|
use quickpeep_indexer::config::IndexerConfig;
|
||||||
|
use quickpeep_seed_parser::loader::{find_seed_files, seed_loader, SEED_EXTENSION};
|
||||||
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
@ -20,7 +21,8 @@ pub struct Opts {
|
|||||||
rakepacks: Vec<PathBuf>,
|
rakepacks: Vec<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() -> anyhow::Result<()> {
|
#[tokio::main]
|
||||||
|
pub async fn main() -> anyhow::Result<()> {
|
||||||
env_logger::Builder::from_env(Env::default().default_filter_or("info,qp_indexer=debug")).init();
|
env_logger::Builder::from_env(Env::default().default_filter_or("info,qp_indexer=debug")).init();
|
||||||
|
|
||||||
let opts: Opts = Opts::parse();
|
let opts: Opts = Opts::parse();
|
||||||
@ -30,6 +32,20 @@ pub fn main() -> anyhow::Result<()> {
|
|||||||
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
||||||
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
|
let seed_files = find_seed_files(config.seed_dir.clone(), SEED_EXTENSION).await?;
|
||||||
|
let (seed_tx, mut seed_rx) = tokio::sync::mpsc::channel(64);
|
||||||
|
let handle = tokio::spawn(async move {
|
||||||
|
seed_loader(seed_files, &seed_tx).await?;
|
||||||
|
Ok(()) as anyhow::Result<()>
|
||||||
|
});
|
||||||
|
|
||||||
|
while let Some(seed) = seed_rx.recv().await {
|
||||||
|
// TODO store this seed in an efficient structure for looking up...
|
||||||
|
todo!();
|
||||||
|
}
|
||||||
|
|
||||||
|
handle.await??;
|
||||||
|
|
||||||
let mut indexer_backend = config.open_indexer_backend()?;
|
let mut indexer_backend = config.open_indexer_backend()?;
|
||||||
|
|
||||||
for pack in opts.rakepacks {
|
for pack in opts.rakepacks {
|
||||||
|
@ -17,12 +17,11 @@ use quickpeep_raker::raking::analysis::get_reduced_domain;
|
|||||||
use quickpeep_raker::raking::{get_robots_txt_for, RakeIntent};
|
use quickpeep_raker::raking::{get_robots_txt_for, RakeIntent};
|
||||||
use quickpeep_raker::storage::records::{AllowedDomainRecord, WeedDomainRecord};
|
use quickpeep_raker::storage::records::{AllowedDomainRecord, WeedDomainRecord};
|
||||||
use quickpeep_raker::storage::{maintenance, RakerStore};
|
use quickpeep_raker::storage::{maintenance, RakerStore};
|
||||||
use quickpeep_seed_parser::loader::{find_seed_files, seed_loader, Seed, UrlOrUrlPattern};
|
use quickpeep_seed_parser::loader::{
|
||||||
|
find_seed_files, seed_loader, Seed, UrlOrUrlPattern, SEED_EXTENSION, WEED_EXTENSION,
|
||||||
|
};
|
||||||
use quickpeep_utils::dirty::DirtyTracker;
|
use quickpeep_utils::dirty::DirtyTracker;
|
||||||
|
|
||||||
pub const SEED_EXTENSION: &'static str = ".seed";
|
|
||||||
pub const WEED_EXTENSION: &'static str = ".weed";
|
|
||||||
|
|
||||||
/// Seeds a raker's queue with URLs
|
/// Seeds a raker's queue with URLs
|
||||||
#[derive(Clone, Debug, Parser)]
|
#[derive(Clone, Debug, Parser)]
|
||||||
pub struct Opts {
|
pub struct Opts {
|
||||||
|
@ -5,6 +5,9 @@ use std::ffi::OsStr;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use tokio::sync::mpsc::Sender;
|
use tokio::sync::mpsc::Sender;
|
||||||
|
|
||||||
|
pub const SEED_EXTENSION: &'static str = ".seed";
|
||||||
|
pub const WEED_EXTENSION: &'static str = ".weed";
|
||||||
|
|
||||||
pub struct Seed {
|
pub struct Seed {
|
||||||
pub url: UrlOrUrlPattern,
|
pub url: UrlOrUrlPattern,
|
||||||
// TODO(later) These make more sense at the indexer stage. tags: BTreeSet<CompactString>,
|
// TODO(later) These make more sense at the indexer stage. tags: BTreeSet<CompactString>,
|
||||||
|
Loading…
Reference in New Issue
Block a user