Make structs cloneable
This commit is contained in:
parent
56e73e017d
commit
b6b30d01c7
|
@ -215,7 +215,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cylon"
|
name = "cylon"
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "cylon"
|
name = "cylon"
|
||||||
description = "An efficient compiler for robots.txt files"
|
description = "An efficient compiler for robots.txt files"
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
authors = ["Creston Bunch <rust@bunch.im>"]
|
authors = ["Creston Bunch <rust@bunch.im>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
|
34
src/dfa.rs
34
src/dfa.rs
|
@ -45,7 +45,7 @@ enum State {
|
||||||
/// file. By providing it a URL path, it can decide whether or not
|
/// file. By providing it a URL path, it can decide whether or not
|
||||||
/// the robots file that compiled it allows or disallows that path in
|
/// the robots file that compiled it allows or disallows that path in
|
||||||
/// roughly O(n) time, where n is the length of the path.
|
/// roughly O(n) time, where n is the length of the path.
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Cylon {
|
pub struct Cylon {
|
||||||
states: Vec<State>,
|
states: Vec<State>,
|
||||||
transitions: Vec<Vec<Transition>>,
|
transitions: Vec<Vec<Transition>>,
|
||||||
|
@ -92,8 +92,7 @@ impl Cylon {
|
||||||
|
|
||||||
// Follow the EoW transition, if necessary
|
// Follow the EoW transition, if necessary
|
||||||
let t = &self.transitions[state];
|
let t = &self.transitions[state];
|
||||||
t
|
t.iter()
|
||||||
.iter()
|
|
||||||
.rev()
|
.rev()
|
||||||
.find(|transition| match transition {
|
.find(|transition| match transition {
|
||||||
Transition(Edge::MatchEow, ..) => true,
|
Transition(Edge::MatchEow, ..) => true,
|
||||||
|
@ -210,7 +209,7 @@ impl Cylon {
|
||||||
|
|
||||||
states.push(match state {
|
states.push(match state {
|
||||||
#[cfg(feature = "crawl-delay")]
|
#[cfg(feature = "crawl-delay")]
|
||||||
State::Allow | State::Disallow | State::Delay => state,
|
State::Allow | State::Disallow | State::Delay => state,
|
||||||
#[cfg(not(feature = "crawl-delay"))]
|
#[cfg(not(feature = "crawl-delay"))]
|
||||||
State::Allow | State::Disallow => state,
|
State::Allow | State::Disallow => state,
|
||||||
State::Intermediate => states[wildcard_state],
|
State::Intermediate => states[wildcard_state],
|
||||||
|
@ -220,22 +219,19 @@ impl Cylon {
|
||||||
|
|
||||||
#[cfg(feature = "crawl-delay")]
|
#[cfg(feature = "crawl-delay")]
|
||||||
{
|
{
|
||||||
let mut delays: Vec<Option<u64>> = rules.iter().filter(|rule| {
|
let mut delays: Vec<Option<u64>> = rules
|
||||||
match rule {
|
.iter()
|
||||||
|
.filter(|rule| match rule {
|
||||||
Rule::Delay(_) => true,
|
Rule::Delay(_) => true,
|
||||||
_ => false
|
_ => false,
|
||||||
}
|
})
|
||||||
}).map(|r| {
|
.map(|r| r.inner().parse::<u64>().ok())
|
||||||
r.inner().parse::<u64>().ok()
|
.collect();
|
||||||
}).collect();
|
delays.sort_unstable_by(|a, b| match (a, b) {
|
||||||
delays.sort_unstable_by(|a, b| {
|
(None, Some(_)) => Ordering::Greater,
|
||||||
match (a, b) {
|
(Some(_), None) => Ordering::Less,
|
||||||
(None, Some(_)) => Ordering::Greater,
|
(None, None) => Ordering::Equal,
|
||||||
(Some(_), None) => Ordering::Less,
|
(Some(aa), Some(bb)) => aa.cmp(bb),
|
||||||
(None, None) => Ordering::Equal,
|
|
||||||
(Some(aa), Some(bb)) => aa.cmp(bb)
|
|
||||||
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
Self {
|
Self {
|
||||||
delay: *delays.get(0).unwrap_or(&None),
|
delay: *delays.get(0).unwrap_or(&None),
|
||||||
|
|
|
@ -39,7 +39,7 @@ enum ParsedLine {
|
||||||
/// A compiler takes an input robots.txt file and outputs a compiled Cylon,
|
/// A compiler takes an input robots.txt file and outputs a compiled Cylon,
|
||||||
/// which can be used to efficiently match a large number of paths against
|
/// which can be used to efficiently match a large number of paths against
|
||||||
/// the robots.txt file.
|
/// the robots.txt file.
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Compiler {
|
pub struct Compiler {
|
||||||
user_agent: String,
|
user_agent: String,
|
||||||
}
|
}
|
||||||
|
@ -173,7 +173,7 @@ fn parse_line(line: String) -> ParsedLine {
|
||||||
|
|
||||||
// This tries to parse lines roughly in order of most frequent kind to
|
// This tries to parse lines roughly in order of most frequent kind to
|
||||||
// least frequent kind in order to minimize CPU cycles on average.
|
// least frequent kind in order to minimize CPU cycles on average.
|
||||||
|
|
||||||
#[cfg(feature = "crawl-delay")]
|
#[cfg(feature = "crawl-delay")]
|
||||||
return parse_disallow(line)
|
return parse_disallow(line)
|
||||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||||
|
@ -181,7 +181,7 @@ fn parse_line(line: String) -> ParsedLine {
|
||||||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||||
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
||||||
.unwrap_or(ParsedLine::Nothing);
|
.unwrap_or(ParsedLine::Nothing);
|
||||||
|
|
||||||
#[cfg(not(feature = "crawl-delay"))]
|
#[cfg(not(feature = "crawl-delay"))]
|
||||||
return parse_disallow(line)
|
return parse_disallow(line)
|
||||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||||
|
|
Loading…
Reference in New Issue