Make structs cloneable

This commit is contained in:
Creston Bunch 2021-04-13 21:49:36 -05:00
parent 56e73e017d
commit b6b30d01c7
4 changed files with 20 additions and 24 deletions

2
Cargo.lock generated
View File

@ -215,7 +215,7 @@ dependencies = [
[[package]] [[package]]
name = "cylon" name = "cylon"
version = "0.1.2" version = "0.1.3"
dependencies = [ dependencies = [
"criterion", "criterion",
"futures-util", "futures-util",

View File

@ -1,7 +1,7 @@
[package] [package]
name = "cylon" name = "cylon"
description = "An efficient compiler for robots.txt files" description = "An efficient compiler for robots.txt files"
version = "0.1.2" version = "0.1.3"
authors = ["Creston Bunch <rust@bunch.im>"] authors = ["Creston Bunch <rust@bunch.im>"]
edition = "2018" edition = "2018"

View File

@ -45,7 +45,7 @@ enum State {
/// file. By providing it a URL path, it can decide whether or not /// file. By providing it a URL path, it can decide whether or not
/// the robots file that compiled it allows or disallows that path in /// the robots file that compiled it allows or disallows that path in
/// roughly O(n) time, where n is the length of the path. /// roughly O(n) time, where n is the length of the path.
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Cylon { pub struct Cylon {
states: Vec<State>, states: Vec<State>,
transitions: Vec<Vec<Transition>>, transitions: Vec<Vec<Transition>>,
@ -92,8 +92,7 @@ impl Cylon {
// Follow the EoW transition, if necessary // Follow the EoW transition, if necessary
let t = &self.transitions[state]; let t = &self.transitions[state];
t t.iter()
.iter()
.rev() .rev()
.find(|transition| match transition { .find(|transition| match transition {
Transition(Edge::MatchEow, ..) => true, Transition(Edge::MatchEow, ..) => true,
@ -210,7 +209,7 @@ impl Cylon {
states.push(match state { states.push(match state {
#[cfg(feature = "crawl-delay")] #[cfg(feature = "crawl-delay")]
State::Allow | State::Disallow | State::Delay => state, State::Allow | State::Disallow | State::Delay => state,
#[cfg(not(feature = "crawl-delay"))] #[cfg(not(feature = "crawl-delay"))]
State::Allow | State::Disallow => state, State::Allow | State::Disallow => state,
State::Intermediate => states[wildcard_state], State::Intermediate => states[wildcard_state],
@ -220,22 +219,19 @@ impl Cylon {
#[cfg(feature = "crawl-delay")] #[cfg(feature = "crawl-delay")]
{ {
let mut delays: Vec<Option<u64>> = rules.iter().filter(|rule| { let mut delays: Vec<Option<u64>> = rules
match rule { .iter()
.filter(|rule| match rule {
Rule::Delay(_) => true, Rule::Delay(_) => true,
_ => false _ => false,
} })
}).map(|r| { .map(|r| r.inner().parse::<u64>().ok())
r.inner().parse::<u64>().ok() .collect();
}).collect(); delays.sort_unstable_by(|a, b| match (a, b) {
delays.sort_unstable_by(|a, b| { (None, Some(_)) => Ordering::Greater,
match (a, b) { (Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater, (None, None) => Ordering::Equal,
(Some(_), None) => Ordering::Less, (Some(aa), Some(bb)) => aa.cmp(bb),
(None, None) => Ordering::Equal,
(Some(aa), Some(bb)) => aa.cmp(bb)
}
}); });
Self { Self {
delay: *delays.get(0).unwrap_or(&None), delay: *delays.get(0).unwrap_or(&None),

View File

@ -39,7 +39,7 @@ enum ParsedLine {
/// A compiler takes an input robots.txt file and outputs a compiled Cylon, /// A compiler takes an input robots.txt file and outputs a compiled Cylon,
/// which can be used to efficiently match a large number of paths against /// which can be used to efficiently match a large number of paths against
/// the robots.txt file. /// the robots.txt file.
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Compiler { pub struct Compiler {
user_agent: String, user_agent: String,
} }