Add more error context

This commit is contained in:
Olivier 'reivilibre' 2022-11-26 22:59:14 +00:00
parent 08f4b7aeaa
commit 438beed86a
1 changed files with 13 additions and 5 deletions

View File

@ -1,5 +1,5 @@
use crate::config::RerakeTimings;
use crate::raking::references::references_from_urlrakes;
use crate::raking::references::{clean_url, references_from_urlrakes};
use crate::raking::{
get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent,
RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason,
@ -425,7 +425,7 @@ impl TaskContext {
RakeOutcome::Redirect { reason, new_url } => {
let refs = RakedReferrerEntry {
references: [RakedReference {
target: new_url.to_string(),
target: clean_url(&new_url).to_string(),
kind: match reason {
RedirectReason::Redirected { .. } => ReferenceKind::Redirect,
RedirectReason::NotCanonical { .. } => ReferenceKind::CanonicalUrl,
@ -584,12 +584,20 @@ impl EventProcessor<'_> {
last_visited_days: datestamp,
},
rerake_on,
)?;
)
.context("failed to mark URL as visited")?;
// track all the referred-to URLs!
for reference in refs.references {
let ref_url = Url::parse(&reference.target)?;
let domain = get_reduced_domain(&ref_url)?;
let ref_url = Url::parse(&reference.target).with_context(|| {
format!(
"failed to parse target URL of reference: {:?}",
reference.target
)
})?;
let domain = get_reduced_domain(&ref_url).with_context(|| {
format!("failed to reduce domain: {:?}", reference.target)
})?;
// First check if this URL is an allowed URL (hence should be enqueued)
let allowed = txn