Reformat
This commit is contained in:
parent
f1ce8b2c62
commit
df50f3607d
|
@ -43,7 +43,7 @@ pub enum RedirectReason {
|
||||||
/// The page redirected somewhere else.
|
/// The page redirected somewhere else.
|
||||||
Redirected {
|
Redirected {
|
||||||
/// HTTP Status Code of the redirect
|
/// HTTP Status Code of the redirect
|
||||||
http_code: u16
|
http_code: u16,
|
||||||
},
|
},
|
||||||
/// The page was not canonical, and should not be indexed.
|
/// The page was not canonical, and should not be indexed.
|
||||||
NotCanonical,
|
NotCanonical,
|
||||||
|
@ -163,33 +163,38 @@ impl Raker {
|
||||||
|
|
||||||
if response.status().is_redirection() {
|
if response.status().is_redirection() {
|
||||||
if let Some(redirect_target) = response.headers().get("location") {
|
if let Some(redirect_target) = response.headers().get("location") {
|
||||||
let new_url = url.join(redirect_target.to_str()
|
let new_url = url
|
||||||
.context("Failed to convert Location header to str")?)
|
.join(
|
||||||
|
redirect_target
|
||||||
|
.to_str()
|
||||||
|
.context("Failed to convert Location header to str")?,
|
||||||
|
)
|
||||||
.context("Failed to resolve Location header target")?;
|
.context("Failed to resolve Location header target")?;
|
||||||
|
|
||||||
return Ok(RakeOutcome::Redirect {
|
return Ok(RakeOutcome::Redirect {
|
||||||
reason: RedirectReason::Redirected {
|
reason: RedirectReason::Redirected { http_code },
|
||||||
http_code
|
new_url,
|
||||||
},
|
|
||||||
new_url
|
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
bail!("Redirection {:?} received, but no Location header.", response.status());
|
bail!(
|
||||||
|
"Redirection {:?} received, but no Location header.",
|
||||||
|
response.status()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.status().is_client_error() {
|
if response.status().is_client_error() {
|
||||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||||
reason: PermanentFailureReason::ResourceDenied(http_code)
|
reason: PermanentFailureReason::ResourceDenied(http_code),
|
||||||
}))
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
if response.status().is_server_error() {
|
if response.status().is_server_error() {
|
||||||
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||||
reason: TemporaryFailureReason::ServerError(http_code),
|
reason: TemporaryFailureReason::ServerError(http_code),
|
||||||
// Try again tomorrow. Maybe the server is overloaded?
|
// Try again tomorrow. Maybe the server is overloaded?
|
||||||
backoff_sec: 86400
|
backoff_sec: 86400,
|
||||||
}))
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
if !response.status().is_success() {
|
if !response.status().is_success() {
|
||||||
|
@ -254,7 +259,12 @@ impl Raker {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn rake_html_page(&self, content: &[u8], url: &Url, is_cf: bool) -> anyhow::Result<RakeOutcome> {
|
pub fn rake_html_page(
|
||||||
|
&self,
|
||||||
|
content: &[u8],
|
||||||
|
url: &Url,
|
||||||
|
is_cf: bool,
|
||||||
|
) -> anyhow::Result<RakeOutcome> {
|
||||||
let content_str = std::str::from_utf8(content)?;
|
let content_str = std::str::from_utf8(content)?;
|
||||||
|
|
||||||
let root_node: NodeRef = kuchiki::parse_html().one(content_str);
|
let root_node: NodeRef = kuchiki::parse_html().one(content_str);
|
||||||
|
@ -263,13 +273,14 @@ impl Raker {
|
||||||
// If it's not, then we redirect the raker to the canonical URL.
|
// If it's not, then we redirect the raker to the canonical URL.
|
||||||
if let Ok(canonical_link_node) = root_node.select_first("head link[rel=canonical]") {
|
if let Ok(canonical_link_node) = root_node.select_first("head link[rel=canonical]") {
|
||||||
if let Some(canonical_href) = canonical_link_node.attributes.borrow().get("href") {
|
if let Some(canonical_href) = canonical_link_node.attributes.borrow().get("href") {
|
||||||
let canonical_url = url.join(canonical_href)
|
let canonical_url = url
|
||||||
|
.join(canonical_href)
|
||||||
.context("Failed to resolve or parse canonical URL")?;
|
.context("Failed to resolve or parse canonical URL")?;
|
||||||
|
|
||||||
if &canonical_url != url {
|
if &canonical_url != url {
|
||||||
return Ok(RakeOutcome::Redirect {
|
return Ok(RakeOutcome::Redirect {
|
||||||
reason: RedirectReason::NotCanonical,
|
reason: RedirectReason::NotCanonical,
|
||||||
new_url: canonical_url
|
new_url: canonical_url,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -310,7 +321,7 @@ impl Raker {
|
||||||
|
|
||||||
eprintln!("{:#?}", readability.metadata);
|
eprintln!("{:#?}", readability.metadata);
|
||||||
|
|
||||||
if let Some(node) = readability.article_node {
|
if let Some(_node) = readability.article_node {
|
||||||
//eprintln!("{}", node.to_string());
|
//eprintln!("{}", node.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ pub struct DenseDocument {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DenseDocument {
|
impl DenseDocument {
|
||||||
pub fn from_document(root_node: NodeRef) {
|
pub fn from_document(_root_node: NodeRef) {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue