Fix unicode support by operating on bytes
This commit is contained in:
parent
b6b30d01c7
commit
380dbb57a5
|
@ -215,7 +215,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cylon"
|
||||
version = "0.1.3"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"futures-util",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "cylon"
|
||||
description = "An efficient compiler for robots.txt files"
|
||||
version = "0.1.3"
|
||||
version = "0.2.0"
|
||||
authors = ["Creston Bunch <rust@bunch.im>"]
|
||||
edition = "2018"
|
||||
|
||||
|
|
203
src/dfa.rs
203
src/dfa.rs
|
@ -3,16 +3,19 @@ use std::cmp::Ordering;
|
|||
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
|
||||
const EOW_BYTE: u8 = 36; // '$'
|
||||
const WILDCARD_BYTE: u8 = 42; // '*'
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Rule<'a> {
|
||||
Allow(&'a str),
|
||||
Disallow(&'a str),
|
||||
Allow(&'a [u8]),
|
||||
Disallow(&'a [u8]),
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
Delay(&'a str),
|
||||
Delay(&'a [u8]),
|
||||
}
|
||||
|
||||
impl<'a> Rule<'a> {
|
||||
fn inner(&self) -> &str {
|
||||
fn inner(&self) -> &[u8] {
|
||||
match self {
|
||||
Rule::Allow(inner) => inner,
|
||||
Rule::Disallow(inner) => inner,
|
||||
|
@ -24,7 +27,7 @@ impl<'a> Rule<'a> {
|
|||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)]
|
||||
enum Edge {
|
||||
MatchChar(char),
|
||||
MatchByte(u8),
|
||||
MatchAny,
|
||||
MatchEow,
|
||||
}
|
||||
|
@ -60,8 +63,8 @@ impl Cylon {
|
|||
}
|
||||
|
||||
/// Match whether the rules allow or disallow the target path.
|
||||
pub fn allow(&self, path: &str) -> bool {
|
||||
match self.states[self.state(path)] {
|
||||
pub fn allow<T: AsRef<[u8]>>(&self, path: T) -> bool {
|
||||
match self.states[self.state(path.as_ref())] {
|
||||
State::Allow => true,
|
||||
State::Disallow => false,
|
||||
// Intermediate states are not preserved in the DFA
|
||||
|
@ -72,8 +75,8 @@ impl Cylon {
|
|||
}
|
||||
}
|
||||
|
||||
fn state(&self, path: &str) -> usize {
|
||||
let state = path.chars().fold(2, |state, path_char| {
|
||||
fn state(&self, path: &[u8]) -> usize {
|
||||
let state = path.into_iter().fold(2, |state, path_char| {
|
||||
let t = &self.transitions[state];
|
||||
t.iter()
|
||||
.rev()
|
||||
|
@ -82,7 +85,7 @@ impl Cylon {
|
|||
.find(|transition| match transition {
|
||||
Transition(Edge::MatchAny, ..) => true,
|
||||
Transition(Edge::MatchEow, ..) => false,
|
||||
Transition(Edge::MatchChar(edge_char), ..) => *edge_char == path_char,
|
||||
Transition(Edge::MatchByte(edge_char), ..) => edge_char == path_char,
|
||||
})
|
||||
.map(|Transition(.., next_state)| *next_state)
|
||||
// We are guaranteed at least one matching state because of
|
||||
|
@ -118,7 +121,7 @@ impl Cylon {
|
|||
|
||||
rules.sort_by(|a, b| Ord::cmp(a.inner(), b.inner()));
|
||||
|
||||
let mut queue = vec![("", 0, 0, State::Intermediate)];
|
||||
let mut queue = vec![(b"" as &[u8], 0, 0, State::Intermediate)];
|
||||
while !queue.is_empty() {
|
||||
// parent_prefix is the "parent node" in the prefix tree. We are
|
||||
// going to visit its children by filtering from the list of
|
||||
|
@ -128,23 +131,23 @@ impl Cylon {
|
|||
// that can match any character legally, but is also a prefix
|
||||
// (read: ancestor) of the current node.
|
||||
let (parent_prefix, mut wildcard_state, parent_state, state) = queue.remove(0);
|
||||
let last_char = parent_prefix.chars().last();
|
||||
let last_byte = parent_prefix.last();
|
||||
|
||||
wildcard_state = match state {
|
||||
State::Allow => 0,
|
||||
State::Disallow if last_char == Some('$') => wildcard_state,
|
||||
State::Disallow if last_byte == Some(&EOW_BYTE) => wildcard_state,
|
||||
State::Disallow => 1,
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
State::Delay => 1,
|
||||
State::Intermediate => wildcard_state,
|
||||
};
|
||||
|
||||
let mut t = match last_char {
|
||||
Some('$') => {
|
||||
let mut t = match last_byte {
|
||||
Some(&EOW_BYTE) => {
|
||||
// The EOW character cannot match anything else
|
||||
vec![Transition(Edge::MatchAny, wildcard_state)]
|
||||
}
|
||||
Some('*') => {
|
||||
Some(&WILDCARD_BYTE) => {
|
||||
// The wildcard character overrides the wildcard state
|
||||
vec![Transition(Edge::MatchAny, transitions.len())]
|
||||
}
|
||||
|
@ -154,7 +157,7 @@ impl Cylon {
|
|||
}
|
||||
};
|
||||
|
||||
let mut curr_prefix = "";
|
||||
let mut curr_prefix: &[u8] = b"";
|
||||
rules
|
||||
.iter()
|
||||
.map(Rule::inner)
|
||||
|
@ -187,19 +190,19 @@ impl Cylon {
|
|||
// NB: we can predict what state index the child
|
||||
// will have before it's even pushed onto the state vec.
|
||||
let child_index = transitions.len() + queue.len();
|
||||
let edge_char = child_prefix.chars().last().unwrap();
|
||||
let edge_char = child_prefix.last().unwrap();
|
||||
let transition = Transition(
|
||||
match edge_char {
|
||||
'*' => Edge::MatchAny,
|
||||
'$' => Edge::MatchEow,
|
||||
c => Edge::MatchChar(c),
|
||||
match *edge_char {
|
||||
WILDCARD_BYTE => Edge::MatchAny,
|
||||
EOW_BYTE => Edge::MatchEow,
|
||||
c => Edge::MatchByte(c),
|
||||
},
|
||||
child_index,
|
||||
);
|
||||
|
||||
// Add transitions from the parent state to the child state
|
||||
// so that the wildcard character matches are optional.
|
||||
if last_char == Some('*') {
|
||||
if last_byte == Some(&WILDCARD_BYTE) {
|
||||
let parent_t = &mut transitions[parent_state];
|
||||
parent_t.push(transition);
|
||||
}
|
||||
|
@ -225,7 +228,9 @@ impl Cylon {
|
|||
Rule::Delay(_) => true,
|
||||
_ => false,
|
||||
})
|
||||
.map(|r| r.inner().parse::<u64>().ok())
|
||||
.map(|r| r.inner())
|
||||
.flat_map(|r| std::str::from_utf8(r).ok())
|
||||
.map(|r| r.parse::<u64>().ok())
|
||||
.collect();
|
||||
delays.sort_unstable_by(|a, b| match (a, b) {
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
|
@ -260,28 +265,55 @@ mod tests {
|
|||
Transition(Edge::MatchEow, $x)
|
||||
};
|
||||
($x:expr => $y:expr) => {
|
||||
Transition(Edge::MatchChar($x), $y)
|
||||
Transition(Edge::MatchByte($x), $y)
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! b {
|
||||
('.') => {
|
||||
46
|
||||
};
|
||||
('/') => {
|
||||
47
|
||||
};
|
||||
('a') => {
|
||||
97
|
||||
};
|
||||
('b') => {
|
||||
98
|
||||
};
|
||||
('c') => {
|
||||
99
|
||||
};
|
||||
('d') => {
|
||||
100
|
||||
};
|
||||
('x') => {
|
||||
120
|
||||
};
|
||||
('y') => {
|
||||
121
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compile() {
|
||||
let rules = vec![
|
||||
Rule::Disallow("/"),
|
||||
Rule::Allow("/a"),
|
||||
Rule::Allow("/abc"),
|
||||
Rule::Allow("/b"),
|
||||
Rule::Disallow(b"/"),
|
||||
Rule::Allow(b"/a"),
|
||||
Rule::Allow(b"/abc"),
|
||||
Rule::Allow(b"/b"),
|
||||
];
|
||||
|
||||
let expect_transitions = vec![
|
||||
vec![t!('*' => 0)],
|
||||
vec![t!('*' => 1)],
|
||||
vec![t!('*' => 0), t!('/' => 3)], // ""
|
||||
vec![t!('*' => 1), t!('a' => 4), t!('b' => 5)], // "/"
|
||||
vec![t!('*' => 0), t!('b' => 6)], // "/a"
|
||||
vec![t!('*' => 0)], // "/b"
|
||||
vec![t!('*' => 0), t!('c' => 7)], // "/ab"
|
||||
vec![t!('*' => 0)], // "/abc"
|
||||
vec![t!('*' => 0), t!(b!('/') => 3)], // ""
|
||||
vec![t!('*' => 1), t!(b!('a') => 4), t!(b!('b') => 5)], // "/"
|
||||
vec![t!('*' => 0), t!(b!('b') => 6)], // "/a"
|
||||
vec![t!('*' => 0)], // "/b"
|
||||
vec![t!('*' => 0), t!(b!('c') => 7)], // "/ab"
|
||||
vec![t!('*' => 0)], // "/abc"
|
||||
];
|
||||
|
||||
let expect_states = vec![
|
||||
|
@ -302,17 +334,26 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_compile_with_wildcard() {
|
||||
let rules = vec![Rule::Disallow("/"), Rule::Allow("/a"), Rule::Allow("/*.b")];
|
||||
let rules = vec![
|
||||
Rule::Disallow(b"/"),
|
||||
Rule::Allow(b"/a"),
|
||||
Rule::Allow(b"/*.b"),
|
||||
];
|
||||
|
||||
let expect_transitions = vec![
|
||||
vec![t!('*' => 0)],
|
||||
vec![t!('*' => 1)],
|
||||
vec![t!('*' => 0), t!('/' => 3)], // ""
|
||||
vec![t!('*' => 1), t!('*' => 4), t!('a' => 5), t!('.' => 6)], // "/"
|
||||
vec![t!('*' => 4), t!('.' => 6)], // "/*"
|
||||
vec![t!('*' => 0)], // "/a"
|
||||
vec![t!('*' => 1), t!('b' => 7)], // "/*."
|
||||
vec![t!('*' => 0)], // "/*.b"
|
||||
vec![t!('*' => 0), t!(b!('/') => 3)], // ""
|
||||
vec![
|
||||
t!('*' => 1),
|
||||
t!('*' => 4),
|
||||
t!(b!('a') => 5),
|
||||
t!(b!('.') => 6),
|
||||
], // "/"
|
||||
vec![t!('*' => 4), t!(b!('.') => 6)], // "/*"
|
||||
vec![t!('*' => 0)], // "/a"
|
||||
vec![t!('*' => 1), t!(b!('b') => 7)], // "/*."
|
||||
vec![t!('*' => 0)], // "/*.b"
|
||||
];
|
||||
|
||||
let expect_states = vec![
|
||||
|
@ -333,15 +374,15 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_compile_tricky_wildcard() {
|
||||
let rules = vec![Rule::Disallow("/"), Rule::Allow("/*.")];
|
||||
let rules = vec![Rule::Disallow(b"/"), Rule::Allow(b"/*.")];
|
||||
|
||||
let expect_transitions = vec![
|
||||
vec![t!('*' => 0)],
|
||||
vec![t!('*' => 1)],
|
||||
vec![t!('*' => 0), t!('/' => 3)], // ""
|
||||
vec![t!('*' => 1), t!('*' => 4), t!('.' => 5)], // "/"
|
||||
vec![t!('*' => 4), t!('.' => 5)], // "/*"
|
||||
vec![t!('*' => 0)], // "/*."
|
||||
vec![t!('*' => 0), t!(b!('/') => 3)], // ""
|
||||
vec![t!('*' => 1), t!('*' => 4), t!(b!('.') => 5)], // "/"
|
||||
vec![t!('*' => 4), t!(b!('.') => 5)], // "/*"
|
||||
vec![t!('*' => 0)], // "/*."
|
||||
];
|
||||
|
||||
let expect_states = vec![
|
||||
|
@ -361,24 +402,24 @@ mod tests {
|
|||
#[test]
|
||||
fn test_compile_with_eow() {
|
||||
let rules = vec![
|
||||
Rule::Allow("/"),
|
||||
Rule::Disallow("/a$"),
|
||||
Rule::Allow(b"/"),
|
||||
Rule::Disallow(b"/a$"),
|
||||
// Note that this rule is nonsensical. It will compile, but
|
||||
// no guarantees are made as to how it's matched. Rules should
|
||||
// use url-encoded strings to escape $.
|
||||
Rule::Disallow("/x$y"),
|
||||
Rule::Disallow(b"/x$y"),
|
||||
];
|
||||
|
||||
let expect_transitions = vec![
|
||||
vec![t!('*' => 0)],
|
||||
vec![t!('*' => 1)],
|
||||
vec![t!('*' => 0), t!('/' => 3)], // ""
|
||||
vec![t!('*' => 0), t!('a' => 4), t!('x' => 5)], // "/"
|
||||
vec![t!('*' => 0), t!('$' => 6)], // "/a"
|
||||
vec![t!('*' => 0), t!('$' => 7)], // "/x"
|
||||
vec![t!('*' => 0)], // "/a$"
|
||||
vec![t!('*' => 0), t!('y' => 8)], // "/x$"
|
||||
vec![t!('*' => 1)], // "/x$y"
|
||||
vec![t!('*' => 0), t!(b!('/') => 3)], // ""
|
||||
vec![t!('*' => 0), t!(b!('a') => 4), t!(b!('x') => 5)], // "/"
|
||||
vec![t!('*' => 0), t!('$' => 6)], // "/a"
|
||||
vec![t!('*' => 0), t!('$' => 7)], // "/x"
|
||||
vec![t!('*' => 0)], // "/a$"
|
||||
vec![t!('*' => 0), t!(b!('y') => 8)], // "/x$"
|
||||
vec![t!('*' => 1)], // "/x$y"
|
||||
];
|
||||
|
||||
let expect_states = vec![
|
||||
|
@ -401,10 +442,10 @@ mod tests {
|
|||
#[test]
|
||||
fn test_allow() {
|
||||
let rules = vec![
|
||||
Rule::Disallow("/"),
|
||||
Rule::Allow("/a"),
|
||||
Rule::Allow("/abc"),
|
||||
Rule::Allow("/b"),
|
||||
Rule::Disallow(b"/"),
|
||||
Rule::Allow(b"/a"),
|
||||
Rule::Allow(b"/abc"),
|
||||
Rule::Allow(b"/b"),
|
||||
];
|
||||
|
||||
let machine = Cylon::compile(rules);
|
||||
|
@ -421,9 +462,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_allow_match_any() {
|
||||
let rules = vec![
|
||||
Rule::Allow("/"),
|
||||
Rule::Disallow("/secret/*.txt"),
|
||||
Rule::Disallow("/private/*"),
|
||||
Rule::Allow(b"/"),
|
||||
Rule::Disallow(b"/secret/*.txt"),
|
||||
Rule::Disallow(b"/private/*"),
|
||||
];
|
||||
|
||||
let machine = Cylon::compile(rules);
|
||||
|
@ -442,9 +483,9 @@ mod tests {
|
|||
#[test]
|
||||
fn test_allow_match_eow() {
|
||||
let rules = vec![
|
||||
Rule::Allow("/"),
|
||||
Rule::Disallow("/ignore$"),
|
||||
Rule::Disallow("/foo$bar"),
|
||||
Rule::Allow(b"/"),
|
||||
Rule::Disallow(b"/ignore$"),
|
||||
Rule::Disallow(b"/foo$bar"),
|
||||
];
|
||||
|
||||
let machine = Cylon::compile(rules);
|
||||
|
@ -463,14 +504,14 @@ mod tests {
|
|||
#[test]
|
||||
fn test_allow_more_complicated() {
|
||||
let rules = vec![
|
||||
Rule::Allow("/"),
|
||||
Rule::Disallow("/a$"),
|
||||
Rule::Disallow("/abc"),
|
||||
Rule::Allow("/abc/*"),
|
||||
Rule::Disallow("/foo/bar"),
|
||||
Rule::Allow("/*/bar"),
|
||||
Rule::Disallow("/www/*/images"),
|
||||
Rule::Allow("/www/public/images"),
|
||||
Rule::Allow(b"/"),
|
||||
Rule::Disallow(b"/a$"),
|
||||
Rule::Disallow(b"/abc"),
|
||||
Rule::Allow(b"/abc/*"),
|
||||
Rule::Disallow(b"/foo/bar"),
|
||||
Rule::Allow(b"/*/bar"),
|
||||
Rule::Disallow(b"/www/*/images"),
|
||||
Rule::Allow(b"/www/public/images"),
|
||||
];
|
||||
|
||||
let machine = Cylon::compile(rules);
|
||||
|
@ -494,7 +535,7 @@ mod tests {
|
|||
// Test cases from:
|
||||
// https://developers.google.com/search/reference/robots_txt#group-member-rules
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/fish")]);
|
||||
assert_eq!(true, machine.allow("/fish"));
|
||||
assert_eq!(true, machine.allow("/fish.html"));
|
||||
assert_eq!(true, machine.allow("/fish/salmon.html"));
|
||||
|
@ -505,7 +546,7 @@ mod tests {
|
|||
assert_eq!(false, machine.allow("/catfish"));
|
||||
assert_eq!(false, machine.allow("/?id=fish"));
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish*")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/fish*")]);
|
||||
assert_eq!(true, machine.allow("/fish"));
|
||||
assert_eq!(true, machine.allow("/fish.html"));
|
||||
assert_eq!(true, machine.allow("/fish/salmon.html"));
|
||||
|
@ -516,7 +557,7 @@ mod tests {
|
|||
assert_eq!(false, machine.allow("/catfish"));
|
||||
assert_eq!(false, machine.allow("/?id=fish"));
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish/")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/fish/")]);
|
||||
assert_eq!(true, machine.allow("/fish/"));
|
||||
assert_eq!(true, machine.allow("/fish/?id=anything"));
|
||||
assert_eq!(true, machine.allow("/fish/salmon.htm"));
|
||||
|
@ -524,7 +565,7 @@ mod tests {
|
|||
assert_eq!(false, machine.allow("/fish.html"));
|
||||
assert_eq!(false, machine.allow("/Fish/Salmon.asp"));
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/*.php")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/*.php")]);
|
||||
assert_eq!(true, machine.allow("/filename.php"));
|
||||
assert_eq!(true, machine.allow("/folder/filename.php"));
|
||||
assert_eq!(true, machine.allow("/folder/filename.php?parameters"));
|
||||
|
@ -533,7 +574,7 @@ mod tests {
|
|||
assert_eq!(false, machine.allow("/"));
|
||||
assert_eq!(false, machine.allow("/windows.PHP"));
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/*.php$")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/*.php$")]);
|
||||
assert_eq!(true, machine.allow("/filename.php"));
|
||||
assert_eq!(true, machine.allow("/folder/filename.php"));
|
||||
assert_eq!(false, machine.allow("/filename.php?parameters"));
|
||||
|
@ -541,7 +582,7 @@ mod tests {
|
|||
assert_eq!(false, machine.allow("/filename.php5"));
|
||||
assert_eq!(false, machine.allow("/windows.PHP"));
|
||||
|
||||
let machine = Cylon::compile(vec![Rule::Disallow("/"), Rule::Allow("/fish*.php")]);
|
||||
let machine = Cylon::compile(vec![Rule::Disallow(b"/"), Rule::Allow(b"/fish*.php")]);
|
||||
assert_eq!(true, machine.allow("/fish.php"));
|
||||
assert_eq!(true, machine.allow("/fishheads/catfish.php?parameters"));
|
||||
assert_eq!(false, machine.allow("/Fish.PHP"));
|
||||
|
|
38
src/parse.rs
38
src/parse.rs
|
@ -21,10 +21,10 @@ enum ParsedRule {
|
|||
impl<'a> Into<Rule<'a>> for &'a ParsedRule {
|
||||
fn into(self) -> Rule<'a> {
|
||||
match self {
|
||||
ParsedRule::Allow(path) => Rule::Allow(&path[..]),
|
||||
ParsedRule::Disallow(path) => Rule::Disallow(&path[..]),
|
||||
ParsedRule::Allow(path) => Rule::Allow(path.as_bytes()),
|
||||
ParsedRule::Disallow(path) => Rule::Disallow(path.as_bytes()),
|
||||
#[cfg(feature = "crawl-delay")]
|
||||
ParsedRule::Delay(delay) => Rule::Delay(delay),
|
||||
ParsedRule::Delay(delay) => Rule::Delay(delay.as_bytes()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -453,4 +453,36 @@ mod tests {
|
|||
assert_eq!(true, imabot_machine.allow("/index.html"));
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unicode_support() {
|
||||
tokio_test::block_on(async {
|
||||
// From: wikipedia.org/robots.txt
|
||||
let example_robots = r#"
|
||||
User-agent: test
|
||||
Disallow: /wiki/ויקיפדיה:רשימת_מועמדים_למחיקה/
|
||||
Disallow: /wiki/ויקיפדיה%3Aרשימת_מועמדים_למחיקה/
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/
|
||||
Disallow: /wiki/ויקיפדיה:ערכים_לא_קיימים_ומוגנים
|
||||
Disallow: /wiki/ויקיפדיה%3Aערכים_לא_קיימים_ומוגנים
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
|
||||
Disallow: /wiki/ויקיפדיה:דפים_לא_קיימים_ומוגנים
|
||||
Disallow: /wiki/ויקיפדיה%3Aדפים_לא_קיימים_ומוגנים
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
|
||||
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
|
||||
"#
|
||||
.as_bytes();
|
||||
|
||||
let parser = Compiler::new("test");
|
||||
let machine = parser.compile(example_robots).await.unwrap();
|
||||
|
||||
assert_eq!(true, machine.allow("/index.html"));
|
||||
assert_eq!(
|
||||
false,
|
||||
machine.allow("/wiki/ויקיפדיה:ערכים_לא_קיימים_ומוגנים")
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue