Minor clean ups, plus redirect support

This commit is contained in:
Olivier 'reivilibre' 2022-03-14 19:44:53 +00:00
parent 6d3d7c5f47
commit 7a0cd15018
3 changed files with 50 additions and 9 deletions

View File

@ -31,16 +31,24 @@ pub enum RakeOutcome {
RakedPage(RakedPage),
RakedFeed(Vec<UrlRaked>),
RakedSitemap(Vec<UrlRaked>),
/// The page was not canonical, and should not be indexed.
/// However here is the URL of the canonical page.
// TODO call this a Redirect and also use for 3xx redirects?
NotCanonical {
Redirect {
reason: RedirectReason,
new_url: Url,
},
TemporaryFailure(TemporaryFailure),
PermanentFailure(PermanentFailure),
}
pub enum RedirectReason {
/// The page redirected somewhere else.
Redirected {
/// HTTP Status Code of the redirect
http_code: u16
},
/// The page was not canonical, and should not be indexed.
NotCanonical,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct UrlRaked {
pub url: Url,
@ -66,10 +74,11 @@ pub struct PermanentFailure {
pub enum TemporaryFailureReason {
MissingInformation(String),
ServerError(u16),
}
pub enum PermanentFailureReason {
ResourceDenied(u32),
ResourceDenied(u16),
WrongLanguage(String),
UnknownContentType(String),
}
@ -150,8 +159,41 @@ impl Raker {
eprintln!("CF? {:?}", is_cf);
}
let http_code = response.status().as_u16();
if response.status().is_redirection() {
if let Some(redirect_target) = response.headers().get("location") {
let new_url = url.join(redirect_target.to_str()
.context("Failed to convert Location header to str")?)
.context("Failed to resolve Location header target")?;
return Ok(RakeOutcome::Redirect {
reason: RedirectReason::Redirected {
http_code
},
new_url
});
} else {
bail!("Redirection {:?} received, but no Location header.", response.status());
}
}
if response.status().is_client_error() {
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
reason: PermanentFailureReason::ResourceDenied(http_code)
}))
}
if response.status().is_server_error() {
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
reason: TemporaryFailureReason::ServerError(http_code),
// Try again tomorrow. Maybe the server is overloaded?
backoff_sec: 86400
}))
}
if !response.status().is_success() {
bail!("Not successful: {:?}", response.status().as_u16());
bail!("Unknown failure code: {:?}", response.status());
}
let content_type = if let Some(content_type) = response.headers().get("content-type") {

View File

@ -21,9 +21,8 @@ impl DenseDocument {
pub struct DenseHead {
title: String,
feed_urls: Vec<String>,
// TODO how best to expose this?? We actually don't care about storing it though ...
// Probably move to the raker.
canonical: (), // TODO I'm sure we'd benefit by digging up some metadata, but that's possibly for later :)
/// URL to icon of the page. May be empty if none were discovered.
icon: String,
}
#[derive(Serialize, Deserialize, Clone, Debug)]