This commit is contained in:
Olivier 'reivilibre' 2022-03-14 19:47:03 +00:00
parent f1ce8b2c62
commit df50f3607d
2 changed files with 28 additions and 17 deletions

View File

@ -43,7 +43,7 @@ pub enum RedirectReason {
/// The page redirected somewhere else.
Redirected {
/// HTTP Status Code of the redirect
http_code: u16
http_code: u16,
},
/// The page was not canonical, and should not be indexed.
NotCanonical,
@ -163,33 +163,38 @@ impl Raker {
if response.status().is_redirection() {
if let Some(redirect_target) = response.headers().get("location") {
let new_url = url.join(redirect_target.to_str()
.context("Failed to convert Location header to str")?)
let new_url = url
.join(
redirect_target
.to_str()
.context("Failed to convert Location header to str")?,
)
.context("Failed to resolve Location header target")?;
return Ok(RakeOutcome::Redirect {
reason: RedirectReason::Redirected {
http_code
},
new_url
reason: RedirectReason::Redirected { http_code },
new_url,
});
} else {
bail!("Redirection {:?} received, but no Location header.", response.status());
bail!(
"Redirection {:?} received, but no Location header.",
response.status()
);
}
}
if response.status().is_client_error() {
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
reason: PermanentFailureReason::ResourceDenied(http_code)
}))
reason: PermanentFailureReason::ResourceDenied(http_code),
}));
}
if response.status().is_server_error() {
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
reason: TemporaryFailureReason::ServerError(http_code),
// Try again tomorrow. Maybe the server is overloaded?
backoff_sec: 86400
}))
backoff_sec: 86400,
}));
}
if !response.status().is_success() {
@ -254,7 +259,12 @@ impl Raker {
}));
}
pub fn rake_html_page(&self, content: &[u8], url: &Url, is_cf: bool) -> anyhow::Result<RakeOutcome> {
pub fn rake_html_page(
&self,
content: &[u8],
url: &Url,
is_cf: bool,
) -> anyhow::Result<RakeOutcome> {
let content_str = std::str::from_utf8(content)?;
let root_node: NodeRef = kuchiki::parse_html().one(content_str);
@ -263,13 +273,14 @@ impl Raker {
// If it's not, then we redirect the raker to the canonical URL.
if let Ok(canonical_link_node) = root_node.select_first("head link[rel=canonical]") {
if let Some(canonical_href) = canonical_link_node.attributes.borrow().get("href") {
let canonical_url = url.join(canonical_href)
let canonical_url = url
.join(canonical_href)
.context("Failed to resolve or parse canonical URL")?;
if &canonical_url != url {
return Ok(RakeOutcome::Redirect {
reason: RedirectReason::NotCanonical,
new_url: canonical_url
new_url: canonical_url,
});
}
}
@ -310,7 +321,7 @@ impl Raker {
eprintln!("{:#?}", readability.metadata);
if let Some(node) = readability.article_node {
if let Some(_node) = readability.article_node {
//eprintln!("{}", node.to_string());
}

View File

@ -12,7 +12,7 @@ pub struct DenseDocument {
}
impl DenseDocument {
pub fn from_document(root_node: NodeRef) {
pub fn from_document(_root_node: NodeRef) {
todo!()
}
}