Make structs cloneable
This commit is contained in:
parent
56e73e017d
commit
b6b30d01c7
|
@ -215,7 +215,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cylon"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"futures-util",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "cylon"
|
||||
description = "An efficient compiler for robots.txt files"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
authors = ["Creston Bunch <rust@bunch.im>"]
|
||||
edition = "2018"
|
||||
|
||||
|
|
34
src/dfa.rs
34
src/dfa.rs
|
@ -45,7 +45,7 @@ enum State {
|
|||
/// file. By providing it a URL path, it can decide whether or not
|
||||
/// the robots file that compiled it allows or disallows that path in
|
||||
/// roughly O(n) time, where n is the length of the path.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Cylon {
|
||||
states: Vec<State>,
|
||||
transitions: Vec<Vec<Transition>>,
|
||||
|
@ -92,8 +92,7 @@ impl Cylon {
|
|||
|
||||
// Follow the EoW transition, if necessary
|
||||
let t = &self.transitions[state];
|
||||
t
|
||||
.iter()
|
||||
t.iter()
|
||||
.rev()
|
||||
.find(|transition| match transition {
|
||||
Transition(Edge::MatchEow, ..) => true,
|
||||
|
@ -210,7 +209,7 @@ impl Cylon {
|
|||
|
||||
states.push(match state {
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
State::Allow | State::Disallow | State::Delay => state,
|
||||
State::Allow | State::Disallow | State::Delay => state,
|
||||
#[cfg(not(feature = "crawl-delay"))]
|
||||
State::Allow | State::Disallow => state,
|
||||
State::Intermediate => states[wildcard_state],
|
||||
|
@ -220,22 +219,19 @@ impl Cylon {
|
|||
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
{
|
||||
let mut delays: Vec<Option<u64>> = rules.iter().filter(|rule| {
|
||||
match rule {
|
||||
let mut delays: Vec<Option<u64>> = rules
|
||||
.iter()
|
||||
.filter(|rule| match rule {
|
||||
Rule::Delay(_) => true,
|
||||
_ => false
|
||||
}
|
||||
}).map(|r| {
|
||||
r.inner().parse::<u64>().ok()
|
||||
}).collect();
|
||||
delays.sort_unstable_by(|a, b| {
|
||||
match (a, b) {
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
(Some(aa), Some(bb)) => aa.cmp(bb)
|
||||
|
||||
}
|
||||
_ => false,
|
||||
})
|
||||
.map(|r| r.inner().parse::<u64>().ok())
|
||||
.collect();
|
||||
delays.sort_unstable_by(|a, b| match (a, b) {
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
(Some(aa), Some(bb)) => aa.cmp(bb),
|
||||
});
|
||||
Self {
|
||||
delay: *delays.get(0).unwrap_or(&None),
|
||||
|
|
|
@ -39,7 +39,7 @@ enum ParsedLine {
|
|||
/// A compiler takes an input robots.txt file and outputs a compiled Cylon,
|
||||
/// which can be used to efficiently match a large number of paths against
|
||||
/// the robots.txt file.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Compiler {
|
||||
user_agent: String,
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ fn parse_line(line: String) -> ParsedLine {
|
|||
|
||||
// This tries to parse lines roughly in order of most frequent kind to
|
||||
// least frequent kind in order to minimize CPU cycles on average.
|
||||
|
||||
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
return parse_disallow(line)
|
||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||
|
@ -181,7 +181,7 @@ fn parse_line(line: String) -> ParsedLine {
|
|||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
||||
.unwrap_or(ParsedLine::Nothing);
|
||||
|
||||
|
||||
#[cfg(not(feature = "crawl-delay"))]
|
||||
return parse_disallow(line)
|
||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||
|
|
Loading…
Reference in New Issue