From 438beed86a68726c8dc438876fd95fe6112ca464 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sat, 26 Nov 2022 22:59:14 +0000 Subject: [PATCH] Add more error context --- quickpeep_raker/src/raking/task.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/quickpeep_raker/src/raking/task.rs b/quickpeep_raker/src/raking/task.rs index 45eb46e..3d2fe3e 100644 --- a/quickpeep_raker/src/raking/task.rs +++ b/quickpeep_raker/src/raking/task.rs @@ -1,5 +1,5 @@ use crate::config::RerakeTimings; -use crate::raking::references::references_from_urlrakes; +use crate::raking::references::{clean_url, references_from_urlrakes}; use crate::raking::{ get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent, RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason, @@ -425,7 +425,7 @@ impl TaskContext { RakeOutcome::Redirect { reason, new_url } => { let refs = RakedReferrerEntry { references: [RakedReference { - target: new_url.to_string(), + target: clean_url(&new_url).to_string(), kind: match reason { RedirectReason::Redirected { .. } => ReferenceKind::Redirect, RedirectReason::NotCanonical { .. } => ReferenceKind::CanonicalUrl, @@ -584,12 +584,20 @@ impl EventProcessor<'_> { last_visited_days: datestamp, }, rerake_on, - )?; + ) + .context("failed to mark URL as visited")?; // track all the referred-to URLs! for reference in refs.references { - let ref_url = Url::parse(&reference.target)?; - let domain = get_reduced_domain(&ref_url)?; + let ref_url = Url::parse(&reference.target).with_context(|| { + format!( + "failed to parse target URL of reference: {:?}", + reference.target + ) + })?; + let domain = get_reduced_domain(&ref_url).with_context(|| { + format!("failed to reduce domain: {:?}", reference.target) + })?; // First check if this URL is an allowed URL (hence should be enqueued) let allowed = txn