Add a lot of debug output
This commit is contained in:
parent
2ce8e2ba8e
commit
08f4b7aeaa
|
@ -145,7 +145,8 @@ impl TaskContext {
|
|||
// Get a URL to process
|
||||
let url = {
|
||||
let txn = self.store.ro_txn()?;
|
||||
txn.choose_url_for_domain(&domain)?
|
||||
txn.choose_url_for_domain(&domain)
|
||||
.context("failed to choose URL for domain")?
|
||||
};
|
||||
|
||||
let (url_str, url_record) = if let Some(url) = url {
|
||||
|
@ -166,12 +167,14 @@ impl TaskContext {
|
|||
}
|
||||
|
||||
// Delete the active domain from the store
|
||||
txn.remove_active_domain(&domain)?;
|
||||
txn.remove_active_domain(&domain)
|
||||
.context("failed to remove active domain")?;
|
||||
|
||||
txn.commit()?;
|
||||
Ok(true)
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.context("failed to check if we're out of URLs")?;
|
||||
if out_of_urls {
|
||||
break;
|
||||
} else {
|
||||
|
@ -179,10 +182,12 @@ impl TaskContext {
|
|||
}
|
||||
};
|
||||
|
||||
let url = Url::parse(&url_str)?;
|
||||
let url = Url::parse(&url_str)
|
||||
.with_context(|| format!("failed to parse as URL: {url_str:?}"))?;
|
||||
|
||||
// Check our robot rules are valid for that URL.
|
||||
let robot_url = robots_txt_url_for(&url)?;
|
||||
let robot_url = robots_txt_url_for(&url)
|
||||
.with_context(|| format!("failed to get robots.txt URL for {url_str:?}"))?;
|
||||
if Some(&robot_url) != current_robot_rules_url.as_ref() {
|
||||
// We need to update our robot rules!
|
||||
match self.get_robot_rules(&url).await {
|
||||
|
@ -201,7 +206,8 @@ impl TaskContext {
|
|||
backoff_sec: 86400,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context("failed to handle TemporaryFailure outcome for robots.txt")?;
|
||||
// Forcefully change domain
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -219,7 +225,8 @@ impl TaskContext {
|
|||
reason: PermanentFailureReason::DeniedToRobots,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.context("failed to process PermanentFailure outcome for robots.txt")?;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -323,7 +330,8 @@ impl TaskContext {
|
|||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.context("failure whilst turning long crawl delay into backoff")?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -349,10 +357,12 @@ impl TaskContext {
|
|||
|
||||
self.as_event_processor()
|
||||
.process_page(url.clone(), page.page_entry, today)
|
||||
.await?;
|
||||
.await
|
||||
.context("failure processing page for RakedPage")?;
|
||||
self.as_event_processor()
|
||||
.process_refs(url.clone(), page.referrer_entry, today, false)
|
||||
.await?;
|
||||
.await
|
||||
.context("failure processing refs for RakedPage")?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
|
@ -369,7 +379,8 @@ impl TaskContext {
|
|||
|
||||
self.as_event_processor()
|
||||
.process_refs(url.clone(), refs, today, true)
|
||||
.await?;
|
||||
.await
|
||||
.context("failure processing refs for RakedFeed")?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
|
@ -386,7 +397,8 @@ impl TaskContext {
|
|||
|
||||
self.as_event_processor()
|
||||
.process_refs(url.clone(), refs, today, true)
|
||||
.await?;
|
||||
.await
|
||||
.context("failure processing refs for RakedSitemap")?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
|
@ -405,7 +417,8 @@ impl TaskContext {
|
|||
|
||||
self.as_event_processor()
|
||||
.process_icon(url.clone(), today)
|
||||
.await?;
|
||||
.await
|
||||
.context("failure processing icon for RakedIcon")?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
|
@ -431,7 +444,8 @@ impl TaskContext {
|
|||
|
||||
self.as_event_processor()
|
||||
.process_refs(url.clone(), refs, today, false)
|
||||
.await?;
|
||||
.await
|
||||
.context("Failure processing refs for Redirect")?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
|
@ -452,7 +466,8 @@ impl TaskContext {
|
|||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.context("failed to store backoff")?;
|
||||
|
||||
// Change domain now
|
||||
Ok(NextAction::ChangeDomain)
|
||||
|
@ -465,7 +480,8 @@ impl TaskContext {
|
|||
.context("Rejection processor shut down; can't stream rejection!!")?;
|
||||
self.as_event_processor()
|
||||
.process_rejection(url.clone(), today)
|
||||
.await?;
|
||||
.await
|
||||
.context("failed to process rejection for PermanentFailure")?;
|
||||
|
||||
// Reasons for permanent rejection aren't our fault or a site-wide fault;
|
||||
// so don't worry about carrying on.
|
||||
|
|
Loading…
Reference in New Issue