Use the unified config in the raker
This commit is contained in:
parent
dd0097c1aa
commit
4fd2dc393e
|
@ -3813,6 +3813,7 @@ dependencies = [
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"reivilibre_fork_cylon",
|
"reivilibre_fork_cylon",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"ron",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_bare",
|
"serde_bare",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
@ -3820,7 +3821,6 @@ dependencies = [
|
||||||
"sitemap",
|
"sitemap",
|
||||||
"smartstring",
|
"smartstring",
|
||||||
"tokio",
|
"tokio",
|
||||||
"toml",
|
|
||||||
"webp",
|
"webp",
|
||||||
"zstd",
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
|
@ -27,7 +27,7 @@ serde = { version = "1.0.136", features = ["derive"] }
|
||||||
serde_bare = "0.5.0"
|
serde_bare = "0.5.0"
|
||||||
serde_json = "1.0.79"
|
serde_json = "1.0.79"
|
||||||
|
|
||||||
toml = "0.5.8"
|
ron = "0.7.0"
|
||||||
bytesize = {version = "1.1.0", features = ["serde"]}
|
bytesize = {version = "1.1.0", features = ["serde"]}
|
||||||
|
|
||||||
### Dates
|
### Dates
|
||||||
|
|
|
@ -45,17 +45,17 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
||||||
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
if !config.workbench_dir.exists() {
|
if !config.raker.workbench_dir.exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Workbench directory ({:?}) doesn't exist.",
|
"Workbench directory ({:?}) doesn't exist.",
|
||||||
config.workbench_dir
|
config.raker.workbench_dir
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if !config.seed_dir.exists() {
|
if !config.seed_dir.exists() {
|
||||||
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
|
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?;
|
let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
|
||||||
|
|
||||||
let txn = store.ro_txn()?;
|
let txn = store.ro_txn()?;
|
||||||
match opts.table.as_ref() {
|
match opts.table.as_ref() {
|
||||||
|
|
|
@ -36,14 +36,14 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
||||||
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
if !config.workbench_dir.exists() {
|
if !config.raker.workbench_dir.exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Workbench directory ({:?}) doesn't exist.",
|
"Workbench directory ({:?}) doesn't exist.",
|
||||||
config.workbench_dir
|
config.raker.workbench_dir
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?;
|
let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
|
||||||
let is_urls = opts.urls;
|
let is_urls = opts.urls;
|
||||||
|
|
||||||
let counts = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<_>> {
|
let counts = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<_>> {
|
||||||
|
|
|
@ -70,13 +70,13 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
let config_path = opts
|
let config_path = opts
|
||||||
.config
|
.config
|
||||||
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
.unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
|
||||||
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
if !config.workbench_dir.exists() {
|
if !config.raker.workbench_dir.exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Workbench directory ({:?}) doesn't exist.",
|
"Workbench directory ({:?}) doesn't exist.",
|
||||||
config.workbench_dir
|
config.raker.workbench_dir
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,11 +101,11 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
.redirect(Policy::limited(5))
|
.redirect(Policy::limited(5))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?;
|
let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
|
||||||
|
|
||||||
let mut adblock_engines = Vec::new();
|
let mut adblock_engines = Vec::new();
|
||||||
for (antifeature, name) in &ADBLOCK_FILTER_PATHS {
|
for (antifeature, name) in &ADBLOCK_FILTER_PATHS {
|
||||||
let path = PathBuf::from(config.data_dir.join(format!("{}.adblock", name)));
|
let path = PathBuf::from(config.raker.data_dir.join(format!("{}.adblock", name)));
|
||||||
if !path.exists() {
|
if !path.exists() {
|
||||||
warn!("Missing adblock rules: {:?}.", path);
|
warn!("Missing adblock rules: {:?}.", path);
|
||||||
continue;
|
continue;
|
||||||
|
@ -119,7 +119,7 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
let mut antifeature_ip_set = IpSet::new();
|
let mut antifeature_ip_set = IpSet::new();
|
||||||
|
|
||||||
let ips_file = File::open(config.data_dir.join("cf_ips.txt"))
|
let ips_file = File::open(config.raker.data_dir.join("cf_ips.txt"))
|
||||||
.await
|
.await
|
||||||
.context("Failed to open CF IPs file")?;
|
.context("Failed to open CF IPs file")?;
|
||||||
antifeature_ip_set.add_all_from_file(ips_file).await?;
|
antifeature_ip_set.add_all_from_file(ips_file).await?;
|
||||||
|
@ -129,15 +129,16 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
page_extraction: PageExtractionService::new(adblock_engines)?,
|
page_extraction: PageExtractionService::new(adblock_engines)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(addr) = config.metrics.prometheus {
|
if let Some(addr) = config.raker.metrics.prometheus {
|
||||||
PrometheusBuilder::new()
|
PrometheusBuilder::new()
|
||||||
.with_http_listener(addr)
|
.with_http_listener(addr)
|
||||||
.install()?;
|
.install()?;
|
||||||
} else if config.metrics.bare_metrics {
|
} else if config.raker.metrics.bare_metrics {
|
||||||
warn!("BARE Metrics not supported yet, sorry.");
|
warn!("BARE Metrics not supported yet, sorry.");
|
||||||
}
|
}
|
||||||
|
|
||||||
let metrics_enabled = config.metrics.prometheus.is_some() || config.metrics.bare_metrics;
|
let metrics_enabled =
|
||||||
|
config.raker.metrics.prometheus.is_some() || config.raker.metrics.bare_metrics;
|
||||||
|
|
||||||
if metrics_enabled {
|
if metrics_enabled {
|
||||||
metrics_process_promstyle::describe();
|
metrics_process_promstyle::describe();
|
||||||
|
@ -158,8 +159,8 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
let mut emitters = Vec::with_capacity(3);
|
let mut emitters = Vec::with_capacity(3);
|
||||||
|
|
||||||
{
|
{
|
||||||
let emit_dir = config.emit_dir.clone();
|
let emit_dir = config.raker.emit_dir.clone();
|
||||||
let settings = config.pack_emitter.clone();
|
let settings = config.raker.pack_emitter.clone();
|
||||||
let stop = graceful_stop.clone();
|
let stop = graceful_stop.clone();
|
||||||
let notify = graceful_stop_notify.clone();
|
let notify = graceful_stop_notify.clone();
|
||||||
emitters.push(
|
emitters.push(
|
||||||
|
@ -181,8 +182,8 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let emit_dir = config.emit_dir.clone();
|
let emit_dir = config.raker.emit_dir.clone();
|
||||||
let settings = config.pack_emitter.clone();
|
let settings = config.raker.pack_emitter.clone();
|
||||||
let stop = graceful_stop.clone();
|
let stop = graceful_stop.clone();
|
||||||
let notify = graceful_stop_notify.clone();
|
let notify = graceful_stop_notify.clone();
|
||||||
emitters.push(
|
emitters.push(
|
||||||
|
@ -204,8 +205,8 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let emit_dir = config.emit_dir.clone();
|
let emit_dir = config.raker.emit_dir.clone();
|
||||||
let settings = config.pack_emitter.clone();
|
let settings = config.raker.pack_emitter.clone();
|
||||||
let stop = graceful_stop.clone();
|
let stop = graceful_stop.clone();
|
||||||
let notify = graceful_stop_notify.clone();
|
let notify = graceful_stop_notify.clone();
|
||||||
emitters.push(
|
emitters.push(
|
||||||
|
@ -227,8 +228,8 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
let emit_dir = config.emit_dir.clone();
|
let emit_dir = config.raker.emit_dir.clone();
|
||||||
let settings = config.pack_emitter.clone();
|
let settings = config.raker.pack_emitter.clone();
|
||||||
let stop = graceful_stop.clone();
|
let stop = graceful_stop.clone();
|
||||||
let notify = graceful_stop_notify.clone();
|
let notify = graceful_stop_notify.clone();
|
||||||
emitters.push(
|
emitters.push(
|
||||||
|
|
|
@ -44,17 +44,17 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
.unwrap_or_else(|| PathBuf::from("qp_raker.toml"));
|
||||||
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
if !config.workbench_dir.exists() {
|
if !config.raker.workbench_dir.exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Workbench directory ({:?}) doesn't exist.",
|
"Workbench directory ({:?}) doesn't exist.",
|
||||||
config.workbench_dir
|
config.raker.workbench_dir
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if !config.seed_dir.exists() {
|
if !config.seed_dir.exists() {
|
||||||
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
|
bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
let store = RakerStore::open(&config.workbench_dir.join("raker.mdbx"))?;
|
let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
|
||||||
|
|
||||||
import_seeds(store.clone(), &config).await?;
|
import_seeds(store.clone(), &config).await?;
|
||||||
|
|
||||||
|
|
|
@ -8,12 +8,17 @@ use std::path::{Path, PathBuf};
|
||||||
/// Config for a raker. All paths are relative to the config file if needed, but will be resolved
|
/// Config for a raker. All paths are relative to the config file if needed, but will be resolved
|
||||||
/// when loading.
|
/// when loading.
|
||||||
pub struct RakerConfig {
|
pub struct RakerConfig {
|
||||||
/// Path to data files
|
|
||||||
pub data_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Path to seeds
|
/// Path to seeds
|
||||||
pub seed_dir: PathBuf,
|
pub seed_dir: PathBuf,
|
||||||
|
|
||||||
|
pub raker: RakerOnlyConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
pub struct RakerOnlyConfig {
|
||||||
|
/// Path to data files
|
||||||
|
pub data_dir: PathBuf,
|
||||||
|
|
||||||
/// Path to the raker's workbench (queue etc)
|
/// Path to the raker's workbench (queue etc)
|
||||||
pub workbench_dir: PathBuf,
|
pub workbench_dir: PathBuf,
|
||||||
|
|
||||||
|
@ -31,12 +36,12 @@ impl RakerConfig {
|
||||||
pub fn load(path: &Path) -> anyhow::Result<RakerConfig> {
|
pub fn load(path: &Path) -> anyhow::Result<RakerConfig> {
|
||||||
let config_dir = path.parent().context("Can't get parent of config file.")?;
|
let config_dir = path.parent().context("Can't get parent of config file.")?;
|
||||||
let bytes = std::fs::read(path)?;
|
let bytes = std::fs::read(path)?;
|
||||||
let mut raker_config: RakerConfig = toml::from_slice(&bytes)?;
|
let mut raker_config: RakerConfig = ron::de::from_bytes(&bytes)?;
|
||||||
|
|
||||||
raker_config.data_dir = config_dir.join(raker_config.data_dir);
|
raker_config.raker.data_dir = config_dir.join(raker_config.raker.data_dir);
|
||||||
raker_config.seed_dir = config_dir.join(raker_config.seed_dir);
|
raker_config.seed_dir = config_dir.join(raker_config.seed_dir);
|
||||||
raker_config.workbench_dir = config_dir.join(raker_config.workbench_dir);
|
raker_config.raker.workbench_dir = config_dir.join(raker_config.raker.workbench_dir);
|
||||||
raker_config.emit_dir = config_dir.join(raker_config.emit_dir);
|
raker_config.raker.emit_dir = config_dir.join(raker_config.raker.emit_dir);
|
||||||
|
|
||||||
Ok(raker_config)
|
Ok(raker_config)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue