completely isolate crawl-delay code into a feature
This commit is contained in:
parent
e904e91972
commit
f331be990f
|
@ -7,6 +7,7 @@ use serde_derive::{Deserialize, Serialize};
|
|||
pub enum Rule<'a> {
|
||||
Allow(&'a str),
|
||||
Disallow(&'a str),
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
Delay(&'a str),
|
||||
}
|
||||
|
||||
|
@ -15,6 +16,7 @@ impl<'a> Rule<'a> {
|
|||
match self {
|
||||
Rule::Allow(inner) => inner,
|
||||
Rule::Disallow(inner) => inner,
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
Rule::Delay(inner) => inner,
|
||||
}
|
||||
}
|
||||
|
|
17
src/parse.rs
17
src/parse.rs
|
@ -5,6 +5,7 @@ use futures_util::{
|
|||
};
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
const UA_PREFIX: &str = "user-agent:";
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
const DELAY_PREFIX: &str = "crawl-delay:";
|
||||
const ALLOW_PREFIX: &str = "allow:";
|
||||
const DISALLOW_PREFIX: &str = "disallow:";
|
||||
|
@ -13,6 +14,7 @@ const DISALLOW_PREFIX: &str = "disallow:";
|
|||
enum ParsedRule {
|
||||
Allow(String),
|
||||
Disallow(String),
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
Delay(String),
|
||||
}
|
||||
|
||||
|
@ -21,6 +23,7 @@ impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
|||
match self {
|
||||
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
||||
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
ParsedRule::Delay(delay) => Rule::Delay(delay),
|
||||
}
|
||||
}
|
||||
|
@ -170,12 +173,21 @@ fn parse_line(line: String) -> ParsedLine {
|
|||
|
||||
// This tries to parse lines roughly in order of most frequent kind to
|
||||
// least frequent kind in order to minimize CPU cycles on average.
|
||||
parse_disallow(line)
|
||||
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
return parse_disallow(line)
|
||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
||||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
||||
.unwrap_or(ParsedLine::Nothing)
|
||||
.unwrap_or(ParsedLine::Nothing);
|
||||
|
||||
#[cfg(not(feature = "crawl-delay"))]
|
||||
return parse_disallow(line)
|
||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
||||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||
.unwrap_or(ParsedLine::Nothing);
|
||||
}
|
||||
|
||||
fn strip_comments(line: &str) -> &str {
|
||||
|
@ -199,6 +211,7 @@ fn parse_user_agent(line: &str) -> Option<&str> {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
fn parse_delay(line: &str) -> Option<&str> {
|
||||
if line.len() < DELAY_PREFIX.len() {
|
||||
return None;
|
||||
|
|
Loading…
Reference in New Issue