test - bug-report
This commit is contained in:
parent
b2bab4873c
commit
86ee746b96
|
@ -247,9 +247,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
|
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
|
@ -257,9 +257,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
|
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
|
@ -274,15 +274,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
|
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
|
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -292,24 +292,21 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
|
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
|
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
|
||||||
dependencies = [
|
|
||||||
"once_cell",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.12"
|
version = "0.3.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
|
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -428,12 +425,6 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.5.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "oorandom"
|
name = "oorandom"
|
||||||
version = "11.1.3"
|
version = "11.1.3"
|
||||||
|
@ -606,9 +597,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.123"
|
version = "1.0.124"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
|
checksum = "bd761ff957cb2a45fbb9ab3da6512de9de55872866160b23c25f1a841e99d29f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_cbor"
|
name = "serde_cbor"
|
||||||
|
@ -622,9 +613,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.123"
|
version = "1.0.124"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
|
checksum = "1800f7693e94e186f5e25a28291ae1570da908aff7d97a095dec1e56ff99069b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|
|
@ -11,9 +11,9 @@ keywords = ["robots", "txt", "parse", "compile"]
|
||||||
repository = "https://github.com/crestonbunch/cylon"
|
repository = "https://github.com/crestonbunch/cylon"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
futures-util = "0.3"
|
futures-util = "0.3.13"
|
||||||
serde = "1.0"
|
serde = "1.0.124"
|
||||||
serde_derive = "1.0"
|
serde_derive = "1.0.124"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = { version = "0.3", features = ["async_futures"] }
|
criterion = { version = "0.3", features = ["async_futures"] }
|
||||||
|
|
|
@ -4,6 +4,7 @@ use serde_derive::{Deserialize, Serialize};
|
||||||
pub enum Rule<'a> {
|
pub enum Rule<'a> {
|
||||||
Allow(&'a str),
|
Allow(&'a str),
|
||||||
Disallow(&'a str),
|
Disallow(&'a str),
|
||||||
|
Delay(u64),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Rule<'a> {
|
impl<'a> Rule<'a> {
|
||||||
|
@ -11,6 +12,7 @@ impl<'a> Rule<'a> {
|
||||||
match self {
|
match self {
|
||||||
Rule::Allow(inner) => inner,
|
Rule::Allow(inner) => inner,
|
||||||
Rule::Disallow(inner) => inner,
|
Rule::Disallow(inner) => inner,
|
||||||
|
Rule::Delay(inner) => inner,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
18
src/parse.rs
18
src/parse.rs
|
@ -5,6 +5,7 @@ use futures_util::{
|
||||||
};
|
};
|
||||||
use serde_derive::{Deserialize, Serialize};
|
use serde_derive::{Deserialize, Serialize};
|
||||||
const UA_PREFIX: &str = "user-agent:";
|
const UA_PREFIX: &str = "user-agent:";
|
||||||
|
const DELAY_PREFIX: &str = "crawl-delay:";
|
||||||
const ALLOW_PREFIX: &str = "allow:";
|
const ALLOW_PREFIX: &str = "allow:";
|
||||||
const DISALLOW_PREFIX: &str = "disallow:";
|
const DISALLOW_PREFIX: &str = "disallow:";
|
||||||
|
|
||||||
|
@ -12,6 +13,7 @@ const DISALLOW_PREFIX: &str = "disallow:";
|
||||||
enum ParsedRule {
|
enum ParsedRule {
|
||||||
Allow(String),
|
Allow(String),
|
||||||
Disallow(String),
|
Disallow(String),
|
||||||
|
Delay(u64),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
||||||
|
@ -19,6 +21,7 @@ impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
||||||
match self {
|
match self {
|
||||||
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
||||||
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
||||||
|
ParsedRule::Delay(delay) => Rule.Delay(delay),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -171,6 +174,7 @@ fn parse_line(line: String) -> ParsedLine {
|
||||||
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
.map(|s| ParsedLine::Rule(ParsedRule::Disallow(s.into())))
|
||||||
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
.or_else(|| parse_user_agent(line).map(|s| ParsedLine::UserAgent(s.to_lowercase())))
|
||||||
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
.or_else(|| parse_allow(line).map(|s| ParsedLine::Rule(ParsedRule::Allow(s.into()))))
|
||||||
|
.or_else(|| parse_delay(line).map(|s| ParsedLine::Rule(ParsedRule::Delay(s.into()))))
|
||||||
.unwrap_or(ParsedLine::Nothing)
|
.unwrap_or(ParsedLine::Nothing)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,6 +199,20 @@ fn parse_user_agent(line: &str) -> Option<&str> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_delay(line: &str) -> Option<u64> {
|
||||||
|
if line.len() < DELAY_PREFIX.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let prefix = &line[..DELAY_PREFIX.len()].to_ascii_lowercase();
|
||||||
|
let suffix = &line[DELAY_PREFIX.len()..];
|
||||||
|
if prefix == DELAY_PREFIX {
|
||||||
|
Some(suffix.trim())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_allow(line: &str) -> Option<&str> {
|
fn parse_allow(line: &str) -> Option<&str> {
|
||||||
if line.len() < ALLOW_PREFIX.len() {
|
if line.len() < ALLOW_PREFIX.len() {
|
||||||
return None;
|
return None;
|
||||||
|
|
Loading…
Reference in New Issue