Don't hammer robots.txt
This commit is contained in:
parent
d8f4baf9a3
commit
d18d0635d7
@ -182,7 +182,27 @@ impl TaskContext {
|
||||
let robot_url = robots_txt_url_for(&url)?;
|
||||
if Some(&robot_url) != current_robot_rules_url.as_ref() {
|
||||
// We need to update our robot rules!
|
||||
current_robot_rules = self.get_robot_rules(&url).await?;
|
||||
match self.get_robot_rules(&url).await {
|
||||
Ok(rules) => {
|
||||
current_robot_rules = rules;
|
||||
}
|
||||
Err(err) => {
|
||||
self.process_outcome(
|
||||
&url,
|
||||
RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||
reason: TemporaryFailureReason::UnknownClientError(format!(
|
||||
"robots.txt failure {:?}: {:?}",
|
||||
url, err
|
||||
)),
|
||||
// Back off for a day: this ought to be enough time for the operator to fix the problem... maybe?
|
||||
backoff_sec: 86400,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
// Forcefully change domain
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
current_robot_rules_url = Some(robot_url);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user