Deny content based on content-type before downloading it
This commit is contained in:
parent
5d1f35a8ee
commit
504be33b8a
|
@ -37,24 +37,6 @@ pub const SIZE_LIMIT: usize = 4 * 1024 * 1024;
|
|||
pub const TIME_LIMIT: Duration = Duration::from_secs(10);
|
||||
pub const RAKER_USER_AGENT: &'static str = "QuickPeepBot";
|
||||
|
||||
lazy_static! {
|
||||
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
|
||||
[
|
||||
("image/png", ImageFormat::Png),
|
||||
("image/webp", ImageFormat::WebP),
|
||||
("image/jpeg", ImageFormat::Jpeg),
|
||||
("image/gif", ImageFormat::Gif),
|
||||
("image/vnd.microsoft.icon", ImageFormat::Ico),
|
||||
("image/x-icon", ImageFormat::Ico),
|
||||
("image/icon", ImageFormat::Ico),
|
||||
("image/ico", ImageFormat::Ico),
|
||||
("application/ico", ImageFormat::Ico),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
};
|
||||
}
|
||||
|
||||
pub enum RakeOutcome {
|
||||
RakedPage(RakedPage),
|
||||
RakedFeed(Vec<UrlRaked>),
|
||||
|
@ -186,7 +168,22 @@ impl From<ReferenceKind> for RakeIntent {
|
|||
}
|
||||
}
|
||||
|
||||
impl RakeIntent {
|
||||
pub fn supports_mime_type(&self, mime_type: &str) -> bool {
|
||||
match self {
|
||||
RakeIntent::Any => ALL_MIME_TYPES.contains(mime_type),
|
||||
RakeIntent::Page => PAGE_MIME_TYPES.contains(mime_type),
|
||||
RakeIntent::Feed => FEED_MIME_TYPES.contains(mime_type),
|
||||
RakeIntent::SiteMap => SITEMAP_MIME_TYPES.contains(mime_type),
|
||||
RakeIntent::Icon => IMAGE_MIME_TYPES.contains_key(mime_type),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref PAGE_MIME_TYPES: HashSet<&'static str> =
|
||||
HashSet::from_iter(vec!["text/html", "text/gemini",]);
|
||||
|
||||
static ref SITEMAP_MIME_TYPES: HashSet<&'static str> =
|
||||
HashSet::from_iter(vec!["text/xml", "application/xml",]);
|
||||
|
||||
|
@ -208,6 +205,30 @@ lazy_static! {
|
|||
"application/rdf+xml",
|
||||
"application/feed+json"
|
||||
]);
|
||||
|
||||
pub static ref IMAGE_MIME_TYPES: HashMap<&'static str, ImageFormat> = {
|
||||
[
|
||||
("image/png", ImageFormat::Png),
|
||||
("image/webp", ImageFormat::WebP),
|
||||
("image/jpeg", ImageFormat::Jpeg),
|
||||
("image/gif", ImageFormat::Gif),
|
||||
("image/vnd.microsoft.icon", ImageFormat::Ico),
|
||||
("image/x-icon", ImageFormat::Ico),
|
||||
("image/icon", ImageFormat::Ico),
|
||||
("image/ico", ImageFormat::Ico),
|
||||
("application/ico", ImageFormat::Ico),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
};
|
||||
|
||||
pub static ref ALL_MIME_TYPES: HashSet<&'static str> = HashSet::from_iter(
|
||||
PAGE_MIME_TYPES.iter().cloned()
|
||||
.chain(SITEMAP_MIME_TYPES.iter().cloned())
|
||||
.chain(FEED_MIME_TYPES.iter().cloned())
|
||||
.chain(FEED_LINK_MIME_TYPES.iter().cloned())
|
||||
.chain(IMAGE_MIME_TYPES.keys().cloned())
|
||||
);
|
||||
}
|
||||
|
||||
async fn response_to_bytes_limited(
|
||||
|
@ -348,7 +369,12 @@ impl Raker {
|
|||
let content_type = content_type
|
||||
.to_str()
|
||||
.context("Can't convert content-type to str")?;
|
||||
content_type.split(";").next().unwrap().trim().to_owned()
|
||||
content_type
|
||||
.split(";")
|
||||
.next()
|
||||
.unwrap()
|
||||
.trim()
|
||||
.to_lowercase()
|
||||
} else {
|
||||
increment_counter!("qprake_rake_specific_fail_count", "reason" => "NoCT");
|
||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||
|
@ -356,6 +382,13 @@ impl Raker {
|
|||
}));
|
||||
};
|
||||
|
||||
if !intent.supports_mime_type(&content_type) {
|
||||
increment_counter!("qprake_rake_specific_fail_count", "reason" => "OtherCT");
|
||||
return Ok(RakeOutcome::PermanentFailure(PermanentFailure {
|
||||
reason: PermanentFailureReason::UnknownContentType(content_type.to_owned()),
|
||||
}));
|
||||
}
|
||||
|
||||
let headers = response.headers().clone();
|
||||
let content = response_to_bytes_limited(response, SIZE_LIMIT, TIME_LIMIT).await?;
|
||||
|
||||
|
|
Loading…
Reference in New Issue