Dodge some places where we enqueue URLs without checking they have supported schemes
This commit is contained in:
parent
1e8aa95e7a
commit
1c10cb203a
@ -12,6 +12,7 @@ use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::Receiver;
|
||||
|
||||
use quickpeep_raker::config::RakerConfig;
|
||||
use quickpeep_raker::raking::references::SUPPORTED_SCHEMES;
|
||||
use quickpeep_raker::raking::{get_robots_txt_for, RakeIntent};
|
||||
use quickpeep_raker::storage::records::{AllowedDomainRecord, WeedDomainRecord};
|
||||
use quickpeep_raker::storage::{maintenance, RakerStore};
|
||||
@ -226,8 +227,10 @@ async fn import_and_flush_batch_seeds(
|
||||
// look at robots.txt and discover sitemaps!
|
||||
if let Some(robots_txt) = get_robots_txt_for(&url_like, &client).await? {
|
||||
for sitemap in robots_txt.sitemaps {
|
||||
txn.enqueue_url(sitemap.url.as_str(), None, RakeIntent::SiteMap)?;
|
||||
stats.new_sitemaps += 1;
|
||||
if SUPPORTED_SCHEMES.contains(&sitemap.url.scheme()) {
|
||||
txn.enqueue_url(sitemap.url.as_str(), None, RakeIntent::SiteMap)?;
|
||||
stats.new_sitemaps += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ pub fn find_references(
|
||||
debug!(
|
||||
"ignoring reference {:?}: not a supported scheme",
|
||||
full_url.as_str()
|
||||
)
|
||||
);
|
||||
}
|
||||
} else {
|
||||
debug!("Can't join {:?} + {:?} to get full URL", page_url, href);
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::config::RerakeTimings;
|
||||
use crate::raking::references::{clean_url, references_from_urlrakes};
|
||||
use crate::raking::references::{clean_url, references_from_urlrakes, SUPPORTED_SCHEMES};
|
||||
use crate::raking::{
|
||||
get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent,
|
||||
RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason,
|
||||
@ -489,7 +489,9 @@ impl EventProcessor<'_> {
|
||||
// If there's a favicon to be tried, add it to the list...
|
||||
let favicon_url_rel = page.document.head.effective_favicon_url();
|
||||
if let Ok(favicon_url) = url.join(favicon_url_rel) {
|
||||
txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?;
|
||||
if SUPPORTED_SCHEMES.contains(&favicon_url.scheme()) {
|
||||
txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?;
|
||||
}
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
|
Loading…
Reference in New Issue
Block a user