Minor clean ups, plus redirect support
This commit is contained in:
parent
6d3d7c5f47
commit
7a0cd15018
|
@ -31,16 +31,24 @@ pub enum RakeOutcome {
|
|||
RakedPage(RakedPage),
|
||||
RakedFeed(Vec<UrlRaked>),
|
||||
RakedSitemap(Vec<UrlRaked>),
|
||||
/// The page was not canonical, and should not be indexed.
|
||||
/// However here is the URL of the canonical page.
|
||||
// TODO call this a Redirect and also use for 3xx redirects?
|
||||
NotCanonical {
|
||||
Redirect {
|
||||
reason: RedirectReason,
|
||||
new_url: Url,
|
||||
},
|
||||
TemporaryFailure(TemporaryFailure),
|
||||
PermanentFailure(PermanentFailure),
|
||||
}
|
||||
|
||||
pub enum RedirectReason {
|
||||
/// The page redirected somewhere else.
|
||||
Redirected {
|
||||
/// HTTP Status Code of the redirect
|
||||
http_code: u16
|
||||
},
|
||||
/// The page was not canonical, and should not be indexed.
|
||||
NotCanonical,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct UrlRaked {
|
||||
pub url: Url,
|
||||
|
@ -66,10 +74,11 @@ pub struct PermanentFailure {
|
|||
|
||||
pub enum TemporaryFailureReason {
|
||||
MissingInformation(String),
|
||||
ServerError(u16),
|
||||
}
|
||||
|
||||
pub enum PermanentFailureReason {
|
||||
ResourceDenied(u32),
|
||||
ResourceDenied(u16),
|
||||
WrongLanguage(String),
|
||||
UnknownContentType(String),
|
||||
}
|
||||
|
@ -150,8 +159,41 @@ impl Raker {
|
|||
eprintln!("CF? {:?}", is_cf);
|
||||
}
|
||||
|
||||
let http_code = response.status().as_u16();
|
||||
|
||||
if response.status().is_redirection() {
|
||||
if let Some(redirect_target) = response.headers().get("location") {
|
||||
let new_url = url.join(redirect_target.to_str()
|
||||
.context("Failed to convert Location header to str")?)
|
||||
.context("Failed to resolve Location header target")?;
|
||||
|
||||
return Ok(RakeOutcome::Redirect {
|
||||
reason: RedirectReason::Redirected {
|
||||
http_code
|
||||
},
|
||||
new_url
|
||||
});
|
||||
} else {
|
||||
bail!("Redirection {:?} received, but no Location header.", response.status());
|
||||
}
|
||||
}
|
||||
|
||||
if response.status().is_client_error() {
|
||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||
reason: PermanentFailureReason::ResourceDenied(http_code)
|
||||
}))
|
||||
}
|
||||
|
||||
if response.status().is_server_error() {
|
||||
return Ok(RakeOutcome::TemporaryFailure(TemporaryFailure {
|
||||
reason: TemporaryFailureReason::ServerError(http_code),
|
||||
// Try again tomorrow. Maybe the server is overloaded?
|
||||
backoff_sec: 86400
|
||||
}))
|
||||
}
|
||||
|
||||
if !response.status().is_success() {
|
||||
bail!("Not successful: {:?}", response.status().as_u16());
|
||||
bail!("Unknown failure code: {:?}", response.status());
|
||||
}
|
||||
|
||||
let content_type = if let Some(content_type) = response.headers().get("content-type") {
|
||||
|
|
|
@ -21,9 +21,8 @@ impl DenseDocument {
|
|||
pub struct DenseHead {
|
||||
title: String,
|
||||
feed_urls: Vec<String>,
|
||||
// TODO how best to expose this?? We actually don't care about storing it though ...
|
||||
// Probably move to the raker.
|
||||
canonical: (), // TODO I'm sure we'd benefit by digging up some metadata, but that's possibly for later :)
|
||||
/// URL to icon of the page. May be empty if none were discovered.
|
||||
icon: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
|
|
Loading…
Reference in New Issue