From ea0b08a3e11da2816e25d597902cb99c148f2c77 Mon Sep 17 00:00:00 2001 From: Olivier Date: Mon, 28 Mar 2022 22:43:53 +0100 Subject: [PATCH] Add some raking metrics --- quickpeep_raker/src/bin/qp-raker.rs | 2 ++ quickpeep_raker/src/raking.rs | 9 +++++++++ quickpeep_raker/src/raking/rakemetrics.rs | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 quickpeep_raker/src/raking/rakemetrics.rs diff --git a/quickpeep_raker/src/bin/qp-raker.rs b/quickpeep_raker/src/bin/qp-raker.rs index 54e5327..2f9a58b 100644 --- a/quickpeep_raker/src/bin/qp-raker.rs +++ b/quickpeep_raker/src/bin/qp-raker.rs @@ -23,6 +23,7 @@ use quickpeep_raker::config; use quickpeep_raker::rakepack_emitter::pack_emitter; use quickpeep_raker::raking::analysis::{preload_adblock_engine, IpSet}; use quickpeep_raker::raking::page_extraction::PageExtractionService; +use quickpeep_raker::raking::rakemetrics::describe_raking_metrics; use quickpeep_raker::raking::task::{TaskContext, TaskResultSubmission}; use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT}; use quickpeep_raker::storage::RakerStore; @@ -140,6 +141,7 @@ pub async fn main() -> anyhow::Result<()> { if metrics_enabled { metrics_process_promstyle::describe(); + describe_raking_metrics(); } let num_tasks = opts.concurrent_jobs + opts.concurrent_sleepers; diff --git a/quickpeep_raker/src/raking.rs b/quickpeep_raker/src/raking.rs index d4e1c36..f714b41 100644 --- a/quickpeep_raker/src/raking.rs +++ b/quickpeep_raker/src/raking.rs @@ -1,5 +1,6 @@ use crate::raking::analysis::IpSet; use crate::raking::page_extraction::{ExtractedPage, PageExtractionService}; +use ::metrics::increment_counter; use anyhow::{anyhow, bail, Context}; use chrono::{DateTime, FixedOffset, Utc}; use cylon::Cylon; @@ -23,6 +24,7 @@ use tokio::time::Instant; pub mod analysis; pub mod page_extraction; +pub mod rakemetrics; pub mod references; pub mod task; @@ -213,6 +215,7 @@ async fn response_to_bytes_limited( Some(next_chunk) => { buffer.extend_from_slice(next_chunk?.as_bytes()); if buffer.len() > size_limit { + increment_counter!("qprake_rake_specific_fail_count", "reason" => "SizeLimit"); bail!("Exceeds size limit"); } }, @@ -223,6 +226,7 @@ async fn response_to_bytes_limited( } }, _ = tokio::time::sleep_until(deadline) => { + increment_counter!("qprake_rake_specific_fail_count", "reason" => "TimeLimit"); bail!("Exceeded time limit"); } } @@ -285,7 +289,11 @@ impl Raker { } } + let code = response.status().as_u16().to_string(); + increment_counter!("qprake_rake_status_count", "status" => code); + if response.status().is_client_error() { + increment_counter!("qprake_rake_status_count", "status" => "4xx"); return Ok(RakeOutcome::PermanentFailure(PermanentFailure { reason: PermanentFailureReason::ResourceDenied(http_code), })); @@ -309,6 +317,7 @@ impl Raker { .context("Can't convert content-type to str")?; content_type.split(";").next().unwrap().trim().to_owned() } else { + increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT"); return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure { reason: TemporaryFailureReason::MissingInformation("content-type".to_owned()), backoff_sec: 86400 * 7, diff --git a/quickpeep_raker/src/raking/rakemetrics.rs b/quickpeep_raker/src/raking/rakemetrics.rs new file mode 100644 index 0000000..cd94c3e --- /dev/null +++ b/quickpeep_raker/src/raking/rakemetrics.rs @@ -0,0 +1,21 @@ +use metrics::{describe_counter, Unit}; + +pub fn describe_raking_metrics() { + describe_counter!( + "qprake_rake_status_count", + Unit::Count, + "Number of rakes, by status." + ); + + describe_counter!( + "qprake_rake_specific_fail_count", + Unit::Count, + "Number of specific rake failures." + ); + + describe_counter!( + "qprake_rake_content_bytes", + Unit::Bytes, + "Number of content bytes raked." + ); +}