diff --git a/quickpeep_raker/src/raking.rs b/quickpeep_raker/src/raking.rs index f6a0d42..bd6a293 100644 --- a/quickpeep_raker/src/raking.rs +++ b/quickpeep_raker/src/raking.rs @@ -103,6 +103,7 @@ pub enum RakeIntent { Page, Feed, SiteMap, + Icon, } impl From for RakeIntent { diff --git a/quickpeep_raker/src/raking/task.rs b/quickpeep_raker/src/raking/task.rs index 256ed27..8083422 100644 --- a/quickpeep_raker/src/raking/task.rs +++ b/quickpeep_raker/src/raking/task.rs @@ -1,8 +1,8 @@ use crate::raking::analysis::get_reduced_domain; use crate::raking::references::references_from_urlrakes; use crate::raking::{ - get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeOutcome, - Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason, + get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent, + RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason, }; use crate::storage::records::{AllowedDomainRecord, UrlVisitedRecord, WeedDomainRecord}; use crate::storage::{RakerStore, RandomActiveDomainAcquisition}; @@ -429,7 +429,12 @@ pub struct EventProcessor<'a> { } impl EventProcessor<'_> { - pub async fn process_page(&self, url: Url, datestamp: u16) -> anyhow::Result<()> { + pub async fn process_page( + &self, + url: Url, + page: &RakedPageEntry, + datestamp: u16, + ) -> anyhow::Result<()> { self.store .as_ref() .async_rw_txn(move |txn| { @@ -441,6 +446,18 @@ impl EventProcessor<'_> { last_visited_days: datestamp, }, )?; + + // If there's a favicon to be tried, add it to the list... + let favicon_url_rel = if page.document.head.icon.is_empty() { + "/favicon.ico" + } else { + page.document.head.icon.as_str() + }; + + if let Ok(favicon_url) = url.join(favicon_url_rel) { + txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?; + } + txn.commit()?; Ok(()) })