This commit is contained in:
parent
98c05f59b5
commit
ea0b08a3e1
@ -23,6 +23,7 @@ use quickpeep_raker::config;
|
|||||||
use quickpeep_raker::rakepack_emitter::pack_emitter;
|
use quickpeep_raker::rakepack_emitter::pack_emitter;
|
||||||
use quickpeep_raker::raking::analysis::{preload_adblock_engine, IpSet};
|
use quickpeep_raker::raking::analysis::{preload_adblock_engine, IpSet};
|
||||||
use quickpeep_raker::raking::page_extraction::PageExtractionService;
|
use quickpeep_raker::raking::page_extraction::PageExtractionService;
|
||||||
|
use quickpeep_raker::raking::rakemetrics::describe_raking_metrics;
|
||||||
use quickpeep_raker::raking::task::{TaskContext, TaskResultSubmission};
|
use quickpeep_raker::raking::task::{TaskContext, TaskResultSubmission};
|
||||||
use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
||||||
use quickpeep_raker::storage::RakerStore;
|
use quickpeep_raker::storage::RakerStore;
|
||||||
@ -140,6 +141,7 @@ pub async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
if metrics_enabled {
|
if metrics_enabled {
|
||||||
metrics_process_promstyle::describe();
|
metrics_process_promstyle::describe();
|
||||||
|
describe_raking_metrics();
|
||||||
}
|
}
|
||||||
|
|
||||||
let num_tasks = opts.concurrent_jobs + opts.concurrent_sleepers;
|
let num_tasks = opts.concurrent_jobs + opts.concurrent_sleepers;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use crate::raking::analysis::IpSet;
|
use crate::raking::analysis::IpSet;
|
||||||
use crate::raking::page_extraction::{ExtractedPage, PageExtractionService};
|
use crate::raking::page_extraction::{ExtractedPage, PageExtractionService};
|
||||||
|
use ::metrics::increment_counter;
|
||||||
use anyhow::{anyhow, bail, Context};
|
use anyhow::{anyhow, bail, Context};
|
||||||
use chrono::{DateTime, FixedOffset, Utc};
|
use chrono::{DateTime, FixedOffset, Utc};
|
||||||
use cylon::Cylon;
|
use cylon::Cylon;
|
||||||
@ -23,6 +24,7 @@ use tokio::time::Instant;
|
|||||||
|
|
||||||
pub mod analysis;
|
pub mod analysis;
|
||||||
pub mod page_extraction;
|
pub mod page_extraction;
|
||||||
|
pub mod rakemetrics;
|
||||||
pub mod references;
|
pub mod references;
|
||||||
pub mod task;
|
pub mod task;
|
||||||
|
|
||||||
@ -213,6 +215,7 @@ async fn response_to_bytes_limited(
|
|||||||
Some(next_chunk) => {
|
Some(next_chunk) => {
|
||||||
buffer.extend_from_slice(next_chunk?.as_bytes());
|
buffer.extend_from_slice(next_chunk?.as_bytes());
|
||||||
if buffer.len() > size_limit {
|
if buffer.len() > size_limit {
|
||||||
|
increment_counter!("qprake_rake_specific_fail_count", "reason" => "SizeLimit");
|
||||||
bail!("Exceeds size limit");
|
bail!("Exceeds size limit");
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -223,6 +226,7 @@ async fn response_to_bytes_limited(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
_ = tokio::time::sleep_until(deadline) => {
|
_ = tokio::time::sleep_until(deadline) => {
|
||||||
|
increment_counter!("qprake_rake_specific_fail_count", "reason" => "TimeLimit");
|
||||||
bail!("Exceeded time limit");
|
bail!("Exceeded time limit");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -285,7 +289,11 @@ impl Raker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let code = response.status().as_u16().to_string();
|
||||||
|
increment_counter!("qprake_rake_status_count", "status" => code);
|
||||||
|
|
||||||
if response.status().is_client_error() {
|
if response.status().is_client_error() {
|
||||||
|
increment_counter!("qprake_rake_status_count", "status" => "4xx");
|
||||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||||
reason: PermanentFailureReason::ResourceDenied(http_code),
|
reason: PermanentFailureReason::ResourceDenied(http_code),
|
||||||
}));
|
}));
|
||||||
@ -309,6 +317,7 @@ impl Raker {
|
|||||||
.context("Can't convert content-type to str")?;
|
.context("Can't convert content-type to str")?;
|
||||||
content_type.split(";").next().unwrap().trim().to_owned()
|
content_type.split(";").next().unwrap().trim().to_owned()
|
||||||
} else {
|
} else {
|
||||||
|
increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT");
|
||||||
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||||
reason: TemporaryFailureReason::MissingInformation("content-type".to_owned()),
|
reason: TemporaryFailureReason::MissingInformation("content-type".to_owned()),
|
||||||
backoff_sec: 86400 * 7,
|
backoff_sec: 86400 * 7,
|
||||||
|
21
quickpeep_raker/src/raking/rakemetrics.rs
Normal file
21
quickpeep_raker/src/raking/rakemetrics.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
use metrics::{describe_counter, Unit};
|
||||||
|
|
||||||
|
pub fn describe_raking_metrics() {
|
||||||
|
describe_counter!(
|
||||||
|
"qprake_rake_status_count",
|
||||||
|
Unit::Count,
|
||||||
|
"Number of rakes, by status."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"qprake_rake_specific_fail_count",
|
||||||
|
Unit::Count,
|
||||||
|
"Number of specific rake failures."
|
||||||
|
);
|
||||||
|
|
||||||
|
describe_counter!(
|
||||||
|
"qprake_rake_content_bytes",
|
||||||
|
Unit::Bytes,
|
||||||
|
"Number of content bytes raked."
|
||||||
|
);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user