Use the unified config in the raker

This commit is contained in:
Olivier 'reivilibre' 2022-04-05 22:17:27 +01:00
parent dd0097c1aa
commit 4fd2dc393e
7 changed files with 41 additions and 35 deletions

2
Cargo.lock generated
View File

@ -3813,6 +3813,7 @@ dependencies = [
"rand 0.8.5", "rand 0.8.5",
"reivilibre_fork_cylon", "reivilibre_fork_cylon",
"reqwest", "reqwest",
"ron",
"serde", "serde",
"serde_bare", "serde_bare",
"serde_json", "serde_json",
@ -3820,7 +3821,6 @@ dependencies = [
"sitemap", "sitemap",
"smartstring", "smartstring",
"tokio", "tokio",
"toml",
"webp", "webp",
"zstd", "zstd",
] ]

View File

@ -27,7 +27,7 @@ serde = { version = "1.0.136", features = ["derive"] }
serde_bare = "0.5.0" serde_bare = "0.5.0"
serde_json = "1.0.79" serde_json = "1.0.79"
toml = "0.5.8" ron = "0.7.0"
bytesize = {version = "1.1.0", features = ["serde"]} bytesize = {version = "1.1.0", features = ["serde"]}
### Dates ### Dates

View File

@ -45,17 +45,17 @@ pub async fn main() -> anyhow::Result<()> {
.unwrap_or_else(|| PathBuf::from("qp_raker.toml")); .unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?; let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
if !config.workbench_dir.exists() { if !config.raker.workbench_dir.exists() {
bail!( bail!(
"Workbench directory ({:?}) doesn't exist.", "Workbench directory ({:?}) doesn't exist.",
config.workbench_dir config.raker.workbench_dir
); );
} }
if !config.seed_dir.exists() { if !config.seed_dir.exists() {
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir); bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
} }
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
let txn = store.ro_txn()?; let txn = store.ro_txn()?;
match opts.table.as_ref() { match opts.table.as_ref() {

View File

@ -36,14 +36,14 @@ pub async fn main() -> anyhow::Result<()> {
.unwrap_or_else(|| PathBuf::from("qp_raker.toml")); .unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?; let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
if !config.workbench_dir.exists() { if !config.raker.workbench_dir.exists() {
bail!( bail!(
"Workbench directory ({:?}) doesn't exist.", "Workbench directory ({:?}) doesn't exist.",
config.workbench_dir config.raker.workbench_dir
); );
} }
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
let is_urls = opts.urls; let is_urls = opts.urls;
let counts = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<_>> { let counts = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<_>> {

View File

@ -70,13 +70,13 @@ pub async fn main() -> anyhow::Result<()> {
let config_path = opts let config_path = opts
.config .config
.unwrap_or_else(|| PathBuf::from("qp_raker.toml")); .unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?; let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
if !config.workbench_dir.exists() { if !config.raker.workbench_dir.exists() {
bail!( bail!(
"Workbench directory ({:?}) doesn't exist.", "Workbench directory ({:?}) doesn't exist.",
config.workbench_dir config.raker.workbench_dir
); );
} }
@ -101,11 +101,11 @@ pub async fn main() -> anyhow::Result<()> {
.redirect(Policy::limited(5)) .redirect(Policy::limited(5))
.build()?; .build()?;
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
let mut adblock_engines = Vec::new(); let mut adblock_engines = Vec::new();
for (antifeature, name) in &ADBLOCK_FILTER_PATHS { for (antifeature, name) in &ADBLOCK_FILTER_PATHS {
let path = PathBuf::from(config.data_dir.join(format!("{}.adblock", name))); let path = PathBuf::from(config.raker.data_dir.join(format!("{}.adblock", name)));
if !path.exists() { if !path.exists() {
warn!("Missing adblock rules: {:?}.", path); warn!("Missing adblock rules: {:?}.", path);
continue; continue;
@ -119,7 +119,7 @@ pub async fn main() -> anyhow::Result<()> {
let mut antifeature_ip_set = IpSet::new(); let mut antifeature_ip_set = IpSet::new();
let ips_file = File::open(config.data_dir.join("cf_ips.txt")) let ips_file = File::open(config.raker.data_dir.join("cf_ips.txt"))
.await .await
.context("Failed to open CF IPs file")?; .context("Failed to open CF IPs file")?;
antifeature_ip_set.add_all_from_file(ips_file).await?; antifeature_ip_set.add_all_from_file(ips_file).await?;
@ -129,15 +129,16 @@ pub async fn main() -> anyhow::Result<()> {
page_extraction: PageExtractionService::new(adblock_engines)?, page_extraction: PageExtractionService::new(adblock_engines)?,
}; };
if let Some(addr) = config.metrics.prometheus { if let Some(addr) = config.raker.metrics.prometheus {
PrometheusBuilder::new() PrometheusBuilder::new()
.with_http_listener(addr) .with_http_listener(addr)
.install()?; .install()?;
} else if config.metrics.bare_metrics { } else if config.raker.metrics.bare_metrics {
warn!("BARE Metrics not supported yet, sorry."); warn!("BARE Metrics not supported yet, sorry.");
} }
let metrics_enabled = config.metrics.prometheus.is_some() || config.metrics.bare_metrics; let metrics_enabled =
config.raker.metrics.prometheus.is_some() || config.raker.metrics.bare_metrics;
if metrics_enabled { if metrics_enabled {
metrics_process_promstyle::describe(); metrics_process_promstyle::describe();
@ -158,8 +159,8 @@ pub async fn main() -> anyhow::Result<()> {
let mut emitters = Vec::with_capacity(3); let mut emitters = Vec::with_capacity(3);
{ {
let emit_dir = config.emit_dir.clone(); let emit_dir = config.raker.emit_dir.clone();
let settings = config.pack_emitter.clone(); let settings = config.raker.pack_emitter.clone();
let stop = graceful_stop.clone(); let stop = graceful_stop.clone();
let notify = graceful_stop_notify.clone(); let notify = graceful_stop_notify.clone();
emitters.push( emitters.push(
@ -181,8 +182,8 @@ pub async fn main() -> anyhow::Result<()> {
} }
{ {
let emit_dir = config.emit_dir.clone(); let emit_dir = config.raker.emit_dir.clone();
let settings = config.pack_emitter.clone(); let settings = config.raker.pack_emitter.clone();
let stop = graceful_stop.clone(); let stop = graceful_stop.clone();
let notify = graceful_stop_notify.clone(); let notify = graceful_stop_notify.clone();
emitters.push( emitters.push(
@ -204,8 +205,8 @@ pub async fn main() -> anyhow::Result<()> {
} }
{ {
let emit_dir = config.emit_dir.clone(); let emit_dir = config.raker.emit_dir.clone();
let settings = config.pack_emitter.clone(); let settings = config.raker.pack_emitter.clone();
let stop = graceful_stop.clone(); let stop = graceful_stop.clone();
let notify = graceful_stop_notify.clone(); let notify = graceful_stop_notify.clone();
emitters.push( emitters.push(
@ -227,8 +228,8 @@ pub async fn main() -> anyhow::Result<()> {
} }
{ {
let emit_dir = config.emit_dir.clone(); let emit_dir = config.raker.emit_dir.clone();
let settings = config.pack_emitter.clone(); let settings = config.raker.pack_emitter.clone();
let stop = graceful_stop.clone(); let stop = graceful_stop.clone();
let notify = graceful_stop_notify.clone(); let notify = graceful_stop_notify.clone();
emitters.push( emitters.push(

View File

@ -44,17 +44,17 @@ pub async fn main() -> anyhow::Result<()> {
.unwrap_or_else(|| PathBuf::from("qp_raker.toml")); .unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?; let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
if !config.workbench_dir.exists() { if !config.raker.workbench_dir.exists() {
bail!( bail!(
"Workbench directory ({:?}) doesn't exist.", "Workbench directory ({:?}) doesn't exist.",
config.workbench_dir config.raker.workbench_dir
); );
} }
if !config.seed_dir.exists() { if !config.seed_dir.exists() {
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir); bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
} }
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?; let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
import_seeds(store.clone(), &config).await?; import_seeds(store.clone(), &config).await?;

View File

@ -8,12 +8,17 @@ use std::path::{Path, PathBuf};
/// Config for a raker. All paths are relative to the config file if needed, but will be resolved /// Config for a raker. All paths are relative to the config file if needed, but will be resolved
/// when loading. /// when loading.
pub struct RakerConfig { pub struct RakerConfig {
/// Path to data files
pub data_dir: PathBuf,
/// Path to seeds /// Path to seeds
pub seed_dir: PathBuf, pub seed_dir: PathBuf,
pub raker: RakerOnlyConfig,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct RakerOnlyConfig {
/// Path to data files
pub data_dir: PathBuf,
/// Path to the raker's workbench (queue etc) /// Path to the raker's workbench (queue etc)
pub workbench_dir: PathBuf, pub workbench_dir: PathBuf,
@ -31,12 +36,12 @@ impl RakerConfig {
pub fn load(path: &Path) -> anyhow::Result<RakerConfig> { pub fn load(path: &Path) -> anyhow::Result<RakerConfig> {
let config_dir = path.parent().context("Can't get parent of config file.")?; let config_dir = path.parent().context("Can't get parent of config file.")?;
let bytes = std::fs::read(path)?; let bytes = std::fs::read(path)?;
let mut raker_config: RakerConfig = toml::from_slice(&bytes)?; let mut raker_config: RakerConfig = ron::de::from_bytes(&bytes)?;
raker_config.data_dir = config_dir.join(raker_config.data_dir); raker_config.raker.data_dir = config_dir.join(raker_config.raker.data_dir);
raker_config.seed_dir = config_dir.join(raker_config.seed_dir); raker_config.seed_dir = config_dir.join(raker_config.seed_dir);
raker_config.workbench_dir = config_dir.join(raker_config.workbench_dir); raker_config.raker.workbench_dir = config_dir.join(raker_config.raker.workbench_dir);
raker_config.emit_dir = config_dir.join(raker_config.emit_dir); raker_config.raker.emit_dir = config_dir.join(raker_config.raker.emit_dir);
Ok(raker_config) Ok(raker_config)
} }