Minor clean ups, plus redirect support

This commit is contained in:
Olivier 'reivilibre' 2022-03-14 19:44:53 +00:00
parent 6d3d7c5f47
commit 7a0cd15018
3 changed files with 50 additions and 9 deletions

View File

@ -31,16 +31,24 @@ pub enum RakeOutcome {
RakedPage(RakedPage), RakedPage(RakedPage),
RakedFeed(Vec<UrlRaked>), RakedFeed(Vec<UrlRaked>),
RakedSitemap(Vec<UrlRaked>), RakedSitemap(Vec<UrlRaked>),
/// The page was not canonical, and should not be indexed. Redirect {
/// However here is the URL of the canonical page. reason: RedirectReason,
// TODO call this a Redirect and also use for 3xx redirects?
NotCanonical {
new_url: Url, new_url: Url,
}, },
TemporaryFailure(TemporaryFailure), TemporaryFailure(TemporaryFailure),
PermanentFailure(PermanentFailure), PermanentFailure(PermanentFailure),
} }
pub enum RedirectReason {
/// The page redirected somewhere else.
Redirected {
/// HTTP Status Code of the redirect
http_code: u16
},
/// The page was not canonical, and should not be indexed.
NotCanonical,
}
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct UrlRaked { pub struct UrlRaked {
pub url: Url, pub url: Url,
@ -66,10 +74,11 @@ pub struct PermanentFailure {
pub enum TemporaryFailureReason { pub enum TemporaryFailureReason {
MissingInformation(String), MissingInformation(String),
ServerError(u16),
} }
pub enum PermanentFailureReason { pub enum PermanentFailureReason {
ResourceDenied(u32), ResourceDenied(u16),
WrongLanguage(String), WrongLanguage(String),
UnknownContentType(String), UnknownContentType(String),
} }
@ -150,8 +159,41 @@ impl Raker {
eprintln!("CF? {:?}", is_cf); eprintln!("CF? {:?}", is_cf);
} }
let http_code = response.status().as_u16();
if response.status().is_redirection() {
if let Some(redirect_target) = response.headers().get("location") {
let new_url = url.join(redirect_target.to_str()
.context("Failed to convert Location header to str")?)
.context("Failed to resolve Location header target")?;
return Ok(RakeOutcome::Redirect {
reason: RedirectReason::Redirected {
http_code
},
new_url
});
} else {
bail!("Redirection {:?} received, but no Location header.", response.status());
}
}
if response.status().is_client_error() {
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
reason: PermanentFailureReason::ResourceDenied(http_code)
}))
}
if response.status().is_server_error() {
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
reason: TemporaryFailureReason::ServerError(http_code),
// Try again tomorrow. Maybe the server is overloaded?
backoff_sec: 86400
}))
}
if !response.status().is_success() { if !response.status().is_success() {
bail!("Not successful: {:?}", response.status().as_u16()); bail!("Unknown failure code: {:?}", response.status());
} }
let content_type = if let Some(content_type) = response.headers().get("content-type") { let content_type = if let Some(content_type) = response.headers().get("content-type") {

View File

@ -21,9 +21,8 @@ impl DenseDocument {
pub struct DenseHead { pub struct DenseHead {
title: String, title: String,
feed_urls: Vec<String>, feed_urls: Vec<String>,
// TODO how best to expose this?? We actually don't care about storing it though ... /// URL to icon of the page. May be empty if none were discovered.
// Probably move to the raker. icon: String,
canonical: (), // TODO I'm sure we'd benefit by digging up some metadata, but that's possibly for later :)
} }
#[derive(Serialize, Deserialize, Clone, Debug)] #[derive(Serialize, Deserialize, Clone, Debug)]