Deny content based on content-type before downloading it

This commit is contained in:
Olivier 'reivilibre' 2022-06-11 00:57:24 +01:00
parent 5d1f35a8ee
commit 504be33b8a

View File

@ -37,24 +37,6 @@ pub const SIZE_LIMIT: usize = 4 * 1024 * 1024;
pub const TIME_LIMIT: Duration = Duration::from_secs(10);
pub const RAKER_USER_AGENT: &'static str = "QuickPeepBot";
lazy_static! {
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
[
("image/png", ImageFormat::Png),
("image/webp", ImageFormat::WebP),
("image/jpeg", ImageFormat::Jpeg),
("image/gif", ImageFormat::Gif),
("image/vnd.microsoft.icon", ImageFormat::Ico),
("image/x-icon", ImageFormat::Ico),
("image/icon", ImageFormat::Ico),
("image/ico", ImageFormat::Ico),
("application/ico", ImageFormat::Ico),
]
.into_iter()
.collect()
};
}
pub enum RakeOutcome {
RakedPage(RakedPage),
RakedFeed(Vec<UrlRaked>),
@ -186,7 +168,22 @@ impl From<ReferenceKind> for RakeIntent {
}
}
impl RakeIntent {
pub fn supports_mime_type(&self, mime_type: &str) -> bool {
match self {
RakeIntent::Any => ALL_MIME_TYPES.contains(mime_type),
RakeIntent::Page => PAGE_MIME_TYPES.contains(mime_type),
RakeIntent::Feed => FEED_MIME_TYPES.contains(mime_type),
RakeIntent::SiteMap => SITEMAP_MIME_TYPES.contains(mime_type),
RakeIntent::Icon => IMAGE_MIME_TYPES.contains_key(mime_type),
}
}
}
lazy_static! {
static ref PAGE_MIME_TYPES: HashSet<&'static str> =
HashSet::from_iter(vec!["text/html", "text/gemini",]);
static ref SITEMAP_MIME_TYPES: HashSet<&'static str> =
HashSet::from_iter(vec!["text/xml", "application/xml",]);
@ -208,6 +205,30 @@ lazy_static! {
"application/rdf+xml",
"application/feed+json"
]);
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
[
("image/png", ImageFormat::Png),
("image/webp", ImageFormat::WebP),
("image/jpeg", ImageFormat::Jpeg),
("image/gif", ImageFormat::Gif),
("image/vnd.microsoft.icon", ImageFormat::Ico),
("image/x-icon", ImageFormat::Ico),
("image/icon", ImageFormat::Ico),
("image/ico", ImageFormat::Ico),
("application/ico", ImageFormat::Ico),
]
.into_iter()
.collect()
};
pub static ref ALL_MIME_TYPES: HashSet<&'static str> = HashSet::from_iter(
PAGE_MIME_TYPES.iter().cloned()
.chain(SITEMAP_MIME_TYPES.iter().cloned())
.chain(FEED_MIME_TYPES.iter().cloned())
.chain(FEED_LINK_MIME_TYPES.iter().cloned())
.chain(IMAGE_MIME_TYPES.keys().cloned())
);
}
async fn response_to_bytes_limited(
@ -348,7 +369,12 @@ impl Raker {
let content_type = content_type
.to_str()
.context("Can't convert content-type to str")?;
content_type.split(";").next().unwrap().trim().to_owned()
content_type
.split(";")
.next()
.unwrap()
.trim()
.to_lowercase()
} else {
increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT");
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
@ -356,6 +382,13 @@ impl Raker {
}));
};
if !intent.supports_mime_type(&content_type) {
increment_counter!("qprake_rake_specific_fail_count", "reason" => "OtherCT");
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
reason: PermanentFailureReason::UnknownContentType(content_type.to_owned()),
}));
}
let headers = response.headers().clone();
let content = response_to_bytes_limited(response, SIZE_LIMIT, TIME_LIMIT).await?;