Minor clean ups, plus redirect support
This commit is contained in:
parent
6d3d7c5f47
commit
7a0cd15018
|
@ -31,16 +31,24 @@ pub enum RakeOutcome {
|
||||||
RakedPage(RakedPage),
|
RakedPage(RakedPage),
|
||||||
RakedFeed(Vec<UrlRaked>),
|
RakedFeed(Vec<UrlRaked>),
|
||||||
RakedSitemap(Vec<UrlRaked>),
|
RakedSitemap(Vec<UrlRaked>),
|
||||||
/// The page was not canonical, and should not be indexed.
|
Redirect {
|
||||||
/// However here is the URL of the canonical page.
|
reason: RedirectReason,
|
||||||
// TODO call this a Redirect and also use for 3xx redirects?
|
|
||||||
NotCanonical {
|
|
||||||
new_url: Url,
|
new_url: Url,
|
||||||
},
|
},
|
||||||
TemporaryFailure(TemporaryFailure),
|
TemporaryFailure(TemporaryFailure),
|
||||||
PermanentFailure(PermanentFailure),
|
PermanentFailure(PermanentFailure),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum RedirectReason {
|
||||||
|
/// The page redirected somewhere else.
|
||||||
|
Redirected {
|
||||||
|
/// HTTP Status Code of the redirect
|
||||||
|
http_code: u16
|
||||||
|
},
|
||||||
|
/// The page was not canonical, and should not be indexed.
|
||||||
|
NotCanonical,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct UrlRaked {
|
pub struct UrlRaked {
|
||||||
pub url: Url,
|
pub url: Url,
|
||||||
|
@ -66,10 +74,11 @@ pub struct PermanentFailure {
|
||||||
|
|
||||||
pub enum TemporaryFailureReason {
|
pub enum TemporaryFailureReason {
|
||||||
MissingInformation(String),
|
MissingInformation(String),
|
||||||
|
ServerError(u16),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum PermanentFailureReason {
|
pub enum PermanentFailureReason {
|
||||||
ResourceDenied(u32),
|
ResourceDenied(u16),
|
||||||
WrongLanguage(String),
|
WrongLanguage(String),
|
||||||
UnknownContentType(String),
|
UnknownContentType(String),
|
||||||
}
|
}
|
||||||
|
@ -150,8 +159,41 @@ impl Raker {
|
||||||
eprintln!("CF? {:?}", is_cf);
|
eprintln!("CF? {:?}", is_cf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let http_code = response.status().as_u16();
|
||||||
|
|
||||||
|
if response.status().is_redirection() {
|
||||||
|
if let Some(redirect_target) = response.headers().get("location") {
|
||||||
|
let new_url = url.join(redirect_target.to_str()
|
||||||
|
.context("Failed to convert Location header to str")?)
|
||||||
|
.context("Failed to resolve Location header target")?;
|
||||||
|
|
||||||
|
return Ok(RakeOutcome::Redirect {
|
||||||
|
reason: RedirectReason::Redirected {
|
||||||
|
http_code
|
||||||
|
},
|
||||||
|
new_url
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
bail!("Redirection {:?} received, but no Location header.", response.status());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if response.status().is_client_error() {
|
||||||
|
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||||
|
reason: PermanentFailureReason::ResourceDenied(http_code)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
if response.status().is_server_error() {
|
||||||
|
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||||
|
reason: TemporaryFailureReason::ServerError(http_code),
|
||||||
|
// Try again tomorrow. Maybe the server is overloaded?
|
||||||
|
backoff_sec: 86400
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
if !response.status().is_success() {
|
if !response.status().is_success() {
|
||||||
bail!("Not successful: {:?}", response.status().as_u16());
|
bail!("Unknown failure code: {:?}", response.status());
|
||||||
}
|
}
|
||||||
|
|
||||||
let content_type = if let Some(content_type) = response.headers().get("content-type") {
|
let content_type = if let Some(content_type) = response.headers().get("content-type") {
|
||||||
|
|
|
@ -21,9 +21,8 @@ impl DenseDocument {
|
||||||
pub struct DenseHead {
|
pub struct DenseHead {
|
||||||
title: String,
|
title: String,
|
||||||
feed_urls: Vec<String>,
|
feed_urls: Vec<String>,
|
||||||
// TODO how best to expose this?? We actually don't care about storing it though ...
|
/// URL to icon of the page. May be empty if none were discovered.
|
||||||
// Probably move to the raker.
|
icon: String,
|
||||||
canonical: (), // TODO I'm sure we'd benefit by digging up some metadata, but that's possibly for later :)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
|
|
Loading…
Reference in New Issue