Deny content based on content-type before downloading it
This commit is contained in:
parent
5d1f35a8ee
commit
504be33b8a
|
@ -37,24 +37,6 @@ pub const SIZE_LIMIT: usize = 4 * 1024 * 1024;
|
||||||
pub const TIME_LIMIT: Duration = Duration::from_secs(10);
|
pub const TIME_LIMIT: Duration = Duration::from_secs(10);
|
||||||
pub const RAKER_USER_AGENT: &'static str = "QuickPeepBot";
|
pub const RAKER_USER_AGENT: &'static str = "QuickPeepBot";
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
|
|
||||||
[
|
|
||||||
("image/png", ImageFormat::Png),
|
|
||||||
("image/webp", ImageFormat::WebP),
|
|
||||||
("image/jpeg", ImageFormat::Jpeg),
|
|
||||||
("image/gif", ImageFormat::Gif),
|
|
||||||
("image/vnd.microsoft.icon", ImageFormat::Ico),
|
|
||||||
("image/x-icon", ImageFormat::Ico),
|
|
||||||
("image/icon", ImageFormat::Ico),
|
|
||||||
("image/ico", ImageFormat::Ico),
|
|
||||||
("application/ico", ImageFormat::Ico),
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.collect()
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum RakeOutcome {
|
pub enum RakeOutcome {
|
||||||
RakedPage(RakedPage),
|
RakedPage(RakedPage),
|
||||||
RakedFeed(Vec<UrlRaked>),
|
RakedFeed(Vec<UrlRaked>),
|
||||||
|
@ -186,7 +168,22 @@ impl From<ReferenceKind> for RakeIntent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl RakeIntent {
|
||||||
|
pub fn supports_mime_type(&self, mime_type: &str) -> bool {
|
||||||
|
match self {
|
||||||
|
RakeIntent::Any => ALL_MIME_TYPES.contains(mime_type),
|
||||||
|
RakeIntent::Page => PAGE_MIME_TYPES.contains(mime_type),
|
||||||
|
RakeIntent::Feed => FEED_MIME_TYPES.contains(mime_type),
|
||||||
|
RakeIntent::SiteMap => SITEMAP_MIME_TYPES.contains(mime_type),
|
||||||
|
RakeIntent::Icon => IMAGE_MIME_TYPES.contains_key(mime_type),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
|
static ref PAGE_MIME_TYPES: HashSet<&'static str> =
|
||||||
|
HashSet::from_iter(vec!["text/html", "text/gemini",]);
|
||||||
|
|
||||||
static ref SITEMAP_MIME_TYPES: HashSet<&'static str> =
|
static ref SITEMAP_MIME_TYPES: HashSet<&'static str> =
|
||||||
HashSet::from_iter(vec!["text/xml", "application/xml",]);
|
HashSet::from_iter(vec!["text/xml", "application/xml",]);
|
||||||
|
|
||||||
|
@ -208,6 +205,30 @@ lazy_static! {
|
||||||
"application/rdf+xml",
|
"application/rdf+xml",
|
||||||
"application/feed+json"
|
"application/feed+json"
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
|
||||||
|
[
|
||||||
|
("image/png", ImageFormat::Png),
|
||||||
|
("image/webp", ImageFormat::WebP),
|
||||||
|
("image/jpeg", ImageFormat::Jpeg),
|
||||||
|
("image/gif", ImageFormat::Gif),
|
||||||
|
("image/vnd.microsoft.icon", ImageFormat::Ico),
|
||||||
|
("image/x-icon", ImageFormat::Ico),
|
||||||
|
("image/icon", ImageFormat::Ico),
|
||||||
|
("image/ico", ImageFormat::Ico),
|
||||||
|
("application/ico", ImageFormat::Ico),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
|
pub static ref ALL_MIME_TYPES: HashSet<&'static str> = HashSet::from_iter(
|
||||||
|
PAGE_MIME_TYPES.iter().cloned()
|
||||||
|
.chain(SITEMAP_MIME_TYPES.iter().cloned())
|
||||||
|
.chain(FEED_MIME_TYPES.iter().cloned())
|
||||||
|
.chain(FEED_LINK_MIME_TYPES.iter().cloned())
|
||||||
|
.chain(IMAGE_MIME_TYPES.keys().cloned())
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn response_to_bytes_limited(
|
async fn response_to_bytes_limited(
|
||||||
|
@ -348,7 +369,12 @@ impl Raker {
|
||||||
let content_type = content_type
|
let content_type = content_type
|
||||||
.to_str()
|
.to_str()
|
||||||
.context("Can't convert content-type to str")?;
|
.context("Can't convert content-type to str")?;
|
||||||
content_type.split(";").next().unwrap().trim().to_owned()
|
content_type
|
||||||
|
.split(";")
|
||||||
|
.next()
|
||||||
|
.unwrap()
|
||||||
|
.trim()
|
||||||
|
.to_lowercase()
|
||||||
} else {
|
} else {
|
||||||
increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT");
|
increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT");
|
||||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||||
|
@ -356,6 +382,13 @@ impl Raker {
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if !intent.supports_mime_type(&content_type) {
|
||||||
|
increment_counter!("qprake_rake_specific_fail_count", "reason" => "OtherCT");
|
||||||
|
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||||
|
reason: PermanentFailureReason::UnknownContentType(content_type.to_owned()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
let headers = response.headers().clone();
|
let headers = response.headers().clone();
|
||||||
let content = response_to_bytes_limited(response, SIZE_LIMIT, TIME_LIMIT).await?;
|
let content = response_to_bytes_limited(response, SIZE_LIMIT, TIME_LIMIT).await?;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue