Add a lot of debug output
Some checks failed
ci/woodpecker/push/check Pipeline was successful
ci/woodpecker/push/manual Pipeline failed
ci/woodpecker/push/release Pipeline was successful

This commit is contained in:
Olivier 'reivilibre' 2022-11-26 22:45:51 +00:00
parent 2ce8e2ba8e
commit 08f4b7aeaa

View File

@ -145,7 +145,8 @@ impl TaskContext {
// Get a URL to process // Get a URL to process
let url = { let url = {
let txn = self.store.ro_txn()?; let txn = self.store.ro_txn()?;
txn.choose_url_for_domain(&domain)? txn.choose_url_for_domain(&domain)
.context("failed to choose URL for domain")?
}; };
let (url_str, url_record) = if let Some(url) = url { let (url_str, url_record) = if let Some(url) = url {
@ -166,12 +167,14 @@ impl TaskContext {
} }
// Delete the active domain from the store // Delete the active domain from the store
txn.remove_active_domain(&domain)?; txn.remove_active_domain(&domain)
.context("failed to remove active domain")?;
txn.commit()?; txn.commit()?;
Ok(true) Ok(true)
}) })
.await?; .await
.context("failed to check if we're out of URLs")?;
if out_of_urls { if out_of_urls {
break; break;
} else { } else {
@ -179,10 +182,12 @@ impl TaskContext {
} }
}; };
let url = Url::parse(&url_str)?; let url = Url::parse(&url_str)
.with_context(|| format!("failed to parse as URL: {url_str:?}"))?;
// Check our robot rules are valid for that URL. // Check our robot rules are valid for that URL.
let robot_url = robots_txt_url_for(&url)?; let robot_url = robots_txt_url_for(&url)
.with_context(|| format!("failed to get robots.txt URL for {url_str:?}"))?;
if Some(&robot_url) != current_robot_rules_url.as_ref() { if Some(&robot_url) != current_robot_rules_url.as_ref() {
// We need to update our robot rules! // We need to update our robot rules!
match self.get_robot_rules(&url).await { match self.get_robot_rules(&url).await {
@ -201,7 +206,8 @@ impl TaskContext {
backoff_sec: 86400, backoff_sec: 86400,
}), }),
) )
.await?; .await
.context("failed to handle TemporaryFailure outcome for robots.txt")?;
// Forcefully change domain // Forcefully change domain
return Ok(()); return Ok(());
} }
@ -219,7 +225,8 @@ impl TaskContext {
reason: PermanentFailureReason::DeniedToRobots, reason: PermanentFailureReason::DeniedToRobots,
}), }),
) )
.await?; .await
.context("failed to process PermanentFailure outcome for robots.txt")?;
continue; continue;
} }
} }
@ -323,7 +330,8 @@ impl TaskContext {
txn.commit()?; txn.commit()?;
Ok(()) Ok(())
}) })
.await?; .await
.context("failure whilst turning long crawl delay into backoff")?;
} }
} }
@ -349,10 +357,12 @@ impl TaskContext {
self.as_event_processor() self.as_event_processor()
.process_page(url.clone(), page.page_entry, today) .process_page(url.clone(), page.page_entry, today)
.await?; .await
.context("failure processing page for RakedPage")?;
self.as_event_processor() self.as_event_processor()
.process_refs(url.clone(), page.referrer_entry, today, false) .process_refs(url.clone(), page.referrer_entry, today, false)
.await?; .await
.context("failure processing refs for RakedPage")?;
Ok(NextAction::Continue) Ok(NextAction::Continue)
} }
@ -369,7 +379,8 @@ impl TaskContext {
self.as_event_processor() self.as_event_processor()
.process_refs(url.clone(), refs, today, true) .process_refs(url.clone(), refs, today, true)
.await?; .await
.context("failure processing refs for RakedFeed")?;
Ok(NextAction::Continue) Ok(NextAction::Continue)
} }
@ -386,7 +397,8 @@ impl TaskContext {
self.as_event_processor() self.as_event_processor()
.process_refs(url.clone(), refs, today, true) .process_refs(url.clone(), refs, today, true)
.await?; .await
.context("failure processing refs for RakedSitemap")?;
Ok(NextAction::Continue) Ok(NextAction::Continue)
} }
@ -405,7 +417,8 @@ impl TaskContext {
self.as_event_processor() self.as_event_processor()
.process_icon(url.clone(), today) .process_icon(url.clone(), today)
.await?; .await
.context("failure processing icon for RakedIcon")?;
Ok(NextAction::Continue) Ok(NextAction::Continue)
} }
@ -431,7 +444,8 @@ impl TaskContext {
self.as_event_processor() self.as_event_processor()
.process_refs(url.clone(), refs, today, false) .process_refs(url.clone(), refs, today, false)
.await?; .await
.context("Failure processing refs for Redirect")?;
Ok(NextAction::Continue) Ok(NextAction::Continue)
} }
@ -452,7 +466,8 @@ impl TaskContext {
txn.commit()?; txn.commit()?;
Ok(()) Ok(())
}) })
.await?; .await
.context("failed to store backoff")?;
// Change domain now // Change domain now
Ok(NextAction::ChangeDomain) Ok(NextAction::ChangeDomain)
@ -465,7 +480,8 @@ impl TaskContext {
.context("Rejection processor shut down; can't stream rejection!!")?; .context("Rejection processor shut down; can't stream rejection!!")?;
self.as_event_processor() self.as_event_processor()
.process_rejection(url.clone(), today) .process_rejection(url.clone(), today)
.await?; .await
.context("failed to process rejection for PermanentFailure")?;
// Reasons for permanent rejection aren't our fault or a site-wide fault; // Reasons for permanent rejection aren't our fault or a site-wide fault;
// so don't worry about carrying on. // so don't worry about carrying on.