diff --git a/Cargo.lock b/Cargo.lock index ee06922..3cc2fe7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,6 +148,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-shim" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cd4b51d303cf3501c301e8125df442128d3c6d7c69f71b27833d253de47e77" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "atty" version = "0.2.14" @@ -209,6 +218,35 @@ dependencies = [ "mime", ] +[[package]] +name = "bare-metrics-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff7b4664025c0967087f03e9f1a9be00b3ce61cc4e99448afa9d4daa757e9954" +dependencies = [ + "hdrhistogram", + "serde", + "serde_bare", +] + +[[package]] +name = "bare-metrics-recorder" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f81335882068ff256d4545eee4c8721e75331d4420e954adf5c88dbceb2dc74" +dependencies = [ + "anyhow", + "bare-metrics-core", + "crossbeam-channel", + "dashmap", + "fxhash", + "hdrhistogram", + "log", + "metrics 0.17.1", + "serde_bare", + "thiserror", +] + [[package]] name = "base64" version = "0.13.0" @@ -572,6 +610,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "dashmap" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" +dependencies = [ + "cfg-if", + "num_cpus", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -941,6 +989,20 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "hdrhistogram" +version = "7.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31672b7011be2c4f7456c4ddbcb40e7e9a4a9fad8efe49a6ebaf5f307d0109c0" +dependencies = [ + "base64", + "byteorder", + "crossbeam-channel", + "flate2", + "nom", + "num-traits", +] + [[package]] name = "heck" version = "0.3.3" @@ -2044,6 +2106,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "maplit" version = "1.0.2" @@ -2103,6 +2174,84 @@ dependencies = [ "autocfg", ] +[[package]] +name = "metrics" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55586aa936c35f34ba8aa5d97356d554311206e1ce1f9e68fe7b07288e5ad827" +dependencies = [ + "ahash", + "metrics-macros 0.4.1", +] + +[[package]] +name = "metrics" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e52eb6380b6d2a10eb3434aec0885374490f5b82c8aaf5cd487a183c98be834" +dependencies = [ + "ahash", + "metrics-macros 0.5.1", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b93b470b04c005178058e18ac8bb2eb3fda562cf87af5ea05ba8d44190d458c" +dependencies = [ + "hyper", + "indexmap", + "ipnet", + "metrics 0.18.1", + "metrics-util", + "parking_lot 0.11.2", + "quanta", + "thiserror", + "tokio", +] + +[[package]] +name = "metrics-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0daa0ab3a0ae956d0e2c1f42511422850e577d36a255357d1a7d08d45ee3a2f1" +dependencies = [ + "lazy_static", + "proc-macro2", + "quote", + "regex", + "syn", +] + +[[package]] +name = "metrics-macros" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e30813093f757be5cf21e50389a24dc7dbb22c49f23b7e8f51d69b508a5ffa" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "metrics-util" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "107a38013e91c04ddf31826b0d0dcc2e0d4ebedded8234cc0dc2b7bbd0c121e8" +dependencies = [ + "atomic-shim", + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown", + "metrics 0.18.1", + "num_cpus", + "parking_lot 0.11.2", + "quanta", + "sketches-ddsketch", +] + [[package]] name = "mime" version = "0.3.16" @@ -2630,6 +2779,22 @@ dependencies = [ "psl-types", ] +[[package]] +name = "quanta" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8" +dependencies = [ + "crossbeam-utils", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.0+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-xml" version = "0.22.0" @@ -2692,6 +2857,7 @@ version = "0.1.0" dependencies = [ "adblock", "anyhow", + "bare-metrics-recorder", "bytes", "chrono", "clap", @@ -2712,6 +2878,8 @@ dependencies = [ "log", "lru", "mdbx-sys", + "metrics 0.18.1", + "metrics-exporter-prometheus", "ouroboros", "publicsuffix", "quickpeep_densedoc", @@ -2838,6 +3006,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "raw-cpuid" +version = "10.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "738bc47119e3eeccc7e94c4a506901aea5e7b4944ecd0829cbebf4af04ceda12" +dependencies = [ + "bitflags", +] + [[package]] name = "rayon" version = "1.5.1" @@ -3233,6 +3410,12 @@ dependencies = [ "xml-rs", ] +[[package]] +name = "sketches-ddsketch" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a77a8fd93886010f05e7ea0720e569d6d16c65329dbe3ec033bbbccccb017b" + [[package]] name = "slab" version = "0.4.5" diff --git a/qp_raker.sample.toml b/qp_raker.sample.toml index a7fa384..ee37a62 100644 --- a/qp_raker.sample.toml +++ b/qp_raker.sample.toml @@ -2,3 +2,7 @@ data_dir = "./data" seed_dir = "../quickpeep_seeds" workbench_dir = "./workbench" emit_dir = "./rakepacks" + +[metrics] +prometheus = "127.0.0.1:9774" +# bare_metrics = true \ No newline at end of file diff --git a/quickpeep_raker/Cargo.toml b/quickpeep_raker/Cargo.toml index c3f9a5d..ce468e8 100644 --- a/quickpeep_raker/Cargo.toml +++ b/quickpeep_raker/Cargo.toml @@ -66,4 +66,9 @@ publicsuffix = "2.1.1" # AdBlock adblock = "0.5.0" # Language detection -lingua = "1.3.3" \ No newline at end of file +lingua = "1.3.3" + +### Metrics +metrics = "0.18.1" +metrics-exporter-prometheus = { version = "0.9.0", default-features = false, features = ["http-listener"] } +bare-metrics-recorder = "0.1.0" diff --git a/quickpeep_raker/src/bin/qp-raker.rs b/quickpeep_raker/src/bin/qp-raker.rs index 5eddb0c..ddc4491 100644 --- a/quickpeep_raker/src/bin/qp-raker.rs +++ b/quickpeep_raker/src/bin/qp-raker.rs @@ -6,6 +6,7 @@ use adblock::lists::RuleTypes; use anyhow::{bail, Context}; use log::{error, warn}; use lru::LruCache; +use metrics_exporter_prometheus::PrometheusBuilder; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::redirect::Policy; use std::path::PathBuf; @@ -107,6 +108,14 @@ pub async fn main() -> anyhow::Result<()> { page_extraction: PageExtractionService::new(adblock_engines)?, }; + if let Some(addr) = config.metrics.prometheus { + PrometheusBuilder::new() + .with_http_listener(addr) + .install()?; + } else if config.metrics.bare_metrics { + warn!("BARE Metrics not supported yet, sorry."); + } + let num_tasks = opts.concurrent_jobs + opts.concurrent_sleepers; let semaphore = Arc::new(Semaphore::new(opts.concurrent_jobs as usize)); @@ -146,7 +155,9 @@ pub async fn main() -> anyhow::Result<()> { drop(task_context); - eprintln!("{:#?}", config); + for task in tasks { + task.await?; + } Ok(()) } diff --git a/quickpeep_raker/src/config.rs b/quickpeep_raker/src/config.rs index f6d4553..a00a11d 100644 --- a/quickpeep_raker/src/config.rs +++ b/quickpeep_raker/src/config.rs @@ -1,5 +1,6 @@ use anyhow::Context; use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; use std::path::{Path, PathBuf}; #[derive(Serialize, Deserialize, Debug, Clone)] @@ -17,6 +18,8 @@ pub struct RakerConfig { /// Directory where new rake packs will be emitted pub emit_dir: PathBuf, + + pub metrics: MetricsConfig, } impl RakerConfig { @@ -35,3 +38,11 @@ impl RakerConfig { Ok(raker_config) } } + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct MetricsConfig { + #[serde(default)] + pub bare_metrics: bool, + #[serde(default)] + pub prometheus: Option, +}