Emit icons from the raker
continuous-integration/drone the build failed
Details
continuous-integration/drone the build failed
Details
This commit is contained in:
parent
bdfacc643e
commit
27c3218097
|
@ -27,7 +27,8 @@ use quickpeep_raker::raking::task::{TaskContext, TaskResultSubmission};
|
|||
use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
||||
use quickpeep_raker::storage::RakerStore;
|
||||
use quickpeep_structs::rake_entries::{
|
||||
AnalysisAntifeatures, SCHEMA_RAKED_PAGES, SCHEMA_RAKED_REFERENCES, SCHEMA_RAKED_REJECTIONS,
|
||||
AnalysisAntifeatures, SCHEMA_RAKED_ICONS, SCHEMA_RAKED_PAGES, SCHEMA_RAKED_REFERENCES,
|
||||
SCHEMA_RAKED_REJECTIONS,
|
||||
};
|
||||
|
||||
/// The ordering is slightly important on these: more specific things should come first.
|
||||
|
@ -147,6 +148,7 @@ pub async fn main() -> anyhow::Result<()> {
|
|||
let (pages_tx, pages_rx) = mpsc::channel(32);
|
||||
let (refs_tx, refs_rx) = mpsc::channel(32);
|
||||
let (rejections_tx, rejections_rx) = mpsc::channel(32);
|
||||
let (icons_tx, icons_rx) = mpsc::channel(32);
|
||||
|
||||
let mut emitters = Vec::with_capacity(3);
|
||||
|
||||
|
@ -201,10 +203,24 @@ pub async fn main() -> anyhow::Result<()> {
|
|||
);
|
||||
}
|
||||
|
||||
{
|
||||
let emit_dir = config.emit_dir.clone();
|
||||
let settings = config.pack_emitter.clone();
|
||||
emitters.push(
|
||||
std::thread::Builder::new()
|
||||
.name("icons emitter".to_owned())
|
||||
.spawn(move || -> anyhow::Result<()> {
|
||||
pack_emitter(&emit_dir, "icons", SCHEMA_RAKED_ICONS, icons_rx, &settings)?;
|
||||
Ok(())
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
let submission = TaskResultSubmission {
|
||||
pages: pages_tx,
|
||||
references: refs_tx,
|
||||
rejections: rejections_tx,
|
||||
icons: icons_tx,
|
||||
};
|
||||
|
||||
let graceful_stop = Arc::new(AtomicBool::new(false));
|
||||
|
|
|
@ -12,7 +12,7 @@ use cylon::Cylon;
|
|||
use log::warn;
|
||||
use lru::LruCache;
|
||||
use quickpeep_structs::rake_entries::{
|
||||
RakedPageEntry, RakedReference, RakedReferrerEntry, ReferenceKind,
|
||||
IconEntry, RakedPageEntry, RakedReference, RakedReferrerEntry, ReferenceKind,
|
||||
};
|
||||
use quickpeep_utils::dates::date_to_quickpeep_days;
|
||||
use reqwest::{Client, Url};
|
||||
|
@ -44,6 +44,7 @@ pub struct TaskResultSubmission {
|
|||
pub pages: Sender<(Url, RakedPageEntry)>,
|
||||
pub references: Sender<(Url, RakedReferrerEntry)>,
|
||||
pub rejections: Sender<(Url, PermanentFailure)>,
|
||||
pub icons: Sender<(Url, IconEntry)>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -359,7 +360,22 @@ impl TaskContext {
|
|||
}
|
||||
RakeOutcome::RakedIcon(icon) => {
|
||||
// Store icon to icon store
|
||||
todo!();
|
||||
|
||||
self.submission
|
||||
.icons
|
||||
.send((
|
||||
url.clone(),
|
||||
IconEntry {
|
||||
webp_bytes: icon.webp_bytes,
|
||||
},
|
||||
))
|
||||
.await?;
|
||||
|
||||
self.as_event_processor()
|
||||
.process_icon(url.clone(), today)
|
||||
.await?;
|
||||
|
||||
Ok(NextAction::Continue)
|
||||
}
|
||||
RakeOutcome::Redirect { reason, new_url } => {
|
||||
let refs = RakedReferrerEntry {
|
||||
|
@ -476,6 +492,25 @@ impl EventProcessor<'_> {
|
|||
.await
|
||||
}
|
||||
|
||||
pub async fn process_icon(&self, url: Url, datestamp: u16) -> anyhow::Result<()> {
|
||||
self.store
|
||||
.as_ref()
|
||||
.async_rw_txn(move |txn| {
|
||||
let domain = get_reduced_domain(&url)?;
|
||||
txn.mark_url_as_visited(
|
||||
domain.as_ref(),
|
||||
url.as_ref(),
|
||||
UrlVisitedRecord {
|
||||
last_visited_days: datestamp,
|
||||
},
|
||||
)?;
|
||||
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn process_refs(
|
||||
&self,
|
||||
url: Url,
|
||||
|
|
|
@ -29,6 +29,7 @@ impl_serde_for_bitflags!(AnalysisAntifeatures);
|
|||
pub const SCHEMA_RAKED_PAGES: &str = "quickpeep_pages:0.1.0";
|
||||
pub const SCHEMA_RAKED_REFERENCES: &str = "quickpeep_references:0.1.0";
|
||||
pub const SCHEMA_RAKED_REJECTIONS: &str = "quickpeep_rejections:0.1.0";
|
||||
pub const SCHEMA_RAKED_ICONS: &str = "quickpeep_icons:0.1.0";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct RakedPageEntry {
|
||||
|
@ -36,6 +37,12 @@ pub struct RakedPageEntry {
|
|||
pub document: DenseDocument,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct IconEntry {
|
||||
/// Densely-packed WebP bytes (with low quality).
|
||||
pub webp_bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct RakedReferrerEntry {
|
||||
pub references: BTreeSet<RakedReference>,
|
||||
|
|
Loading…
Reference in New Issue