Don't hammer robots.txt
This commit is contained in:
parent
d8f4baf9a3
commit
d18d0635d7
@ -182,7 +182,27 @@ impl TaskContext {
|
|||||||
let robot_url = robots_txt_url_for(&url)?;
|
let robot_url = robots_txt_url_for(&url)?;
|
||||||
if Some(&robot_url) != current_robot_rules_url.as_ref() {
|
if Some(&robot_url) != current_robot_rules_url.as_ref() {
|
||||||
// We need to update our robot rules!
|
// We need to update our robot rules!
|
||||||
current_robot_rules = self.get_robot_rules(&url).await?;
|
match self.get_robot_rules(&url).await {
|
||||||
|
Ok(rules) => {
|
||||||
|
current_robot_rules = rules;
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
self.process_outcome(
|
||||||
|
&url,
|
||||||
|
RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||||
|
reason: TemporaryFailureReason::UnknownClientError(format!(
|
||||||
|
"robots.txt failure {:?}: {:?}",
|
||||||
|
url, err
|
||||||
|
)),
|
||||||
|
// Back off for a day: this ought to be enough time for the operator to fix the problem... maybe?
|
||||||
|
backoff_sec: 86400,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
// Forcefully change domain
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
current_robot_rules_url = Some(robot_url);
|
current_robot_rules_url = Some(robot_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user