completely isolate crawl-delay code into a feature
This commit is contained in:
parent
e904e91972
commit
f331be990f
|
@ -7,6 +7,7 @@ use serde_derive::{Deserialize, Serialize};
|
||||||
pub enum Rule<'a> {
|
pub enum Rule<'a> {
|
||||||
Allow(&'a str),
|
Allow(&'a str),
|
||||||
Disallow(&'a str),
|
Disallow(&'a str),
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
Delay(&'a str),
|
Delay(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,6 +16,7 @@ impl<'a> Rule<'a> {
|
||||||
match self {
|
match self {
|
||||||
Rule::Allow(inner) => inner,
|
Rule::Allow(inner) => inner,
|
||||||
Rule::Disallow(inner) => inner,
|
Rule::Disallow(inner) => inner,
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
Rule::Delay(inner) => inner,
|
Rule::Delay(inner) => inner,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
17
src/parse.rs
17
src/parse.rs
|
@ -5,6 +5,7 @@ use futures_util::{
|
||||||
};
|
};
|
||||||
use serde_derive::{Deserialize, Serialize};
|
use serde_derive::{Deserialize, Serialize};
|
||||||
const UA_PREFIX: &str = "user-agent:";
|
const UA_PREFIX: &str = "user-agent:";
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
const DELAY_PREFIX: &str = "crawl-delay:";
|
const DELAY_PREFIX: &str = "crawl-delay:";
|
||||||
const ALLOW_PREFIX: &str = "allow:";
|
const ALLOW_PREFIX: &str = "allow:";
|
||||||
const DISALLOW_PREFIX: &str = "disallow:";
|
const DISALLOW_PREFIX: &str = "disallow:";
|
||||||
|
@ -13,6 +14,7 @@ const DISALLOW_PREFIX: &str = "disallow:";
|
||||||
enum ParsedRule {
|
enum ParsedRule {
|
||||||
Allow(String),
|
Allow(String),
|
||||||
Disallow(String),
|
Disallow(String),
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
Delay(String),
|
Delay(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +23,7 @@ impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
||||||
match self {
|
match self {
|
||||||
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
||||||
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
ParsedRule::Delay(delay) => Rule::Delay(delay),
|
ParsedRule::Delay(delay) => Rule::Delay(delay),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,12 +173,21 @@ fn parse_line(line: String) -> ParsedLine {
|
||||||
|
|
||||||
// This tries to parse lines roughly in order of most frequent kind to
|
// This tries to parse lines roughly in order of most frequent kind to
|
||||||
// least frequent kind in order to minimize CPU cycles on average.
|
// least frequent kind in order to minimize CPU cycles on average.
|
||||||
parse_disallow(line)
|
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
|
return parse_disallow(line)
|
||||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||||
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
||||||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||||
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
||||||
.unwrap_or(ParsedLine::Nothing)
|
.unwrap_or(ParsedLine::Nothing);
|
||||||
|
|
||||||
|
#[cfg(not(feature = "crawl-delay"))]
|
||||||
|
return parse_disallow(line)
|
||||||
|
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||||
|
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
||||||
|
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||||
|
.unwrap_or(ParsedLine::Nothing);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strip_comments(line: &str) -> &str {
|
fn strip_comments(line: &str) -> &str {
|
||||||
|
@ -199,6 +211,7 @@ fn parse_user_agent(line: &str) -> Option<&str> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "crawl-delay")]
|
||||||
fn parse_delay(line: &str) -> Option<&str> {
|
fn parse_delay(line: &str) -> Option<&str> {
|
||||||
if line.len() < DELAY_PREFIX.len() {
|
if line.len() < DELAY_PREFIX.len() {
|
||||||
return None;
|
return None;
|
||||||
|
|
Loading…
Reference in New Issue