Add more error context
This commit is contained in:
parent
08f4b7aeaa
commit
438beed86a
|
@ -1,5 +1,5 @@
|
||||||
use crate::config::RerakeTimings;
|
use crate::config::RerakeTimings;
|
||||||
use crate::raking::references::references_from_urlrakes;
|
use crate::raking::references::{clean_url, references_from_urlrakes};
|
||||||
use crate::raking::{
|
use crate::raking::{
|
||||||
get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent,
|
get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent,
|
||||||
RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason,
|
RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason,
|
||||||
|
@ -425,7 +425,7 @@ impl TaskContext {
|
||||||
RakeOutcome::Redirect { reason, new_url } => {
|
RakeOutcome::Redirect { reason, new_url } => {
|
||||||
let refs = RakedReferrerEntry {
|
let refs = RakedReferrerEntry {
|
||||||
references: [RakedReference {
|
references: [RakedReference {
|
||||||
target: new_url.to_string(),
|
target: clean_url(&new_url).to_string(),
|
||||||
kind: match reason {
|
kind: match reason {
|
||||||
RedirectReason::Redirected { .. } => ReferenceKind::Redirect,
|
RedirectReason::Redirected { .. } => ReferenceKind::Redirect,
|
||||||
RedirectReason::NotCanonical { .. } => ReferenceKind::CanonicalUrl,
|
RedirectReason::NotCanonical { .. } => ReferenceKind::CanonicalUrl,
|
||||||
|
@ -584,12 +584,20 @@ impl EventProcessor<'_> {
|
||||||
last_visited_days: datestamp,
|
last_visited_days: datestamp,
|
||||||
},
|
},
|
||||||
rerake_on,
|
rerake_on,
|
||||||
)?;
|
)
|
||||||
|
.context("failed to mark URL as visited")?;
|
||||||
|
|
||||||
// track all the referred-to URLs!
|
// track all the referred-to URLs!
|
||||||
for reference in refs.references {
|
for reference in refs.references {
|
||||||
let ref_url = Url::parse(&reference.target)?;
|
let ref_url = Url::parse(&reference.target).with_context(|| {
|
||||||
let domain = get_reduced_domain(&ref_url)?;
|
format!(
|
||||||
|
"failed to parse target URL of reference: {:?}",
|
||||||
|
reference.target
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let domain = get_reduced_domain(&ref_url).with_context(|| {
|
||||||
|
format!("failed to reduce domain: {:?}", reference.target)
|
||||||
|
})?;
|
||||||
|
|
||||||
// First check if this URL is an allowed URL (hence should be enqueued)
|
// First check if this URL is an allowed URL (hence should be enqueued)
|
||||||
let allowed = txn
|
let allowed = txn
|
||||||
|
|
Loading…
Reference in New Issue