Emit icons from the raker
continuous-integration/drone the build failed
Details
continuous-integration/drone the build failed
Details
This commit is contained in:
parent
bdfacc643e
commit
27c3218097
|
@ -27,7 +27,8 @@ use quickpeep_raker::raking::task::{TaskContext, TaskResultSubmission};
|
||||||
use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
use quickpeep_raker::raking::{Raker, RAKER_USER_AGENT, TIME_LIMIT};
|
||||||
use quickpeep_raker::storage::RakerStore;
|
use quickpeep_raker::storage::RakerStore;
|
||||||
use quickpeep_structs::rake_entries::{
|
use quickpeep_structs::rake_entries::{
|
||||||
AnalysisAntifeatures, SCHEMA_RAKED_PAGES, SCHEMA_RAKED_REFERENCES, SCHEMA_RAKED_REJECTIONS,
|
AnalysisAntifeatures, SCHEMA_RAKED_ICONS, SCHEMA_RAKED_PAGES, SCHEMA_RAKED_REFERENCES,
|
||||||
|
SCHEMA_RAKED_REJECTIONS,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The ordering is slightly important on these: more specific things should come first.
|
/// The ordering is slightly important on these: more specific things should come first.
|
||||||
|
@ -147,6 +148,7 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
let (pages_tx, pages_rx) = mpsc::channel(32);
|
let (pages_tx, pages_rx) = mpsc::channel(32);
|
||||||
let (refs_tx, refs_rx) = mpsc::channel(32);
|
let (refs_tx, refs_rx) = mpsc::channel(32);
|
||||||
let (rejections_tx, rejections_rx) = mpsc::channel(32);
|
let (rejections_tx, rejections_rx) = mpsc::channel(32);
|
||||||
|
let (icons_tx, icons_rx) = mpsc::channel(32);
|
||||||
|
|
||||||
let mut emitters = Vec::with_capacity(3);
|
let mut emitters = Vec::with_capacity(3);
|
||||||
|
|
||||||
|
@ -201,10 +203,24 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let emit_dir = config.emit_dir.clone();
|
||||||
|
let settings = config.pack_emitter.clone();
|
||||||
|
emitters.push(
|
||||||
|
std::thread::Builder::new()
|
||||||
|
.name("icons emitter".to_owned())
|
||||||
|
.spawn(move || -> anyhow::Result<()> {
|
||||||
|
pack_emitter(&emit_dir, "icons", SCHEMA_RAKED_ICONS, icons_rx, &settings)?;
|
||||||
|
Ok(())
|
||||||
|
})?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let submission = TaskResultSubmission {
|
let submission = TaskResultSubmission {
|
||||||
pages: pages_tx,
|
pages: pages_tx,
|
||||||
references: refs_tx,
|
references: refs_tx,
|
||||||
rejections: rejections_tx,
|
rejections: rejections_tx,
|
||||||
|
icons: icons_tx,
|
||||||
};
|
};
|
||||||
|
|
||||||
let graceful_stop = Arc::new(AtomicBool::new(false));
|
let graceful_stop = Arc::new(AtomicBool::new(false));
|
||||||
|
|
|
@ -12,7 +12,7 @@ use cylon::Cylon;
|
||||||
use log::warn;
|
use log::warn;
|
||||||
use lru::LruCache;
|
use lru::LruCache;
|
||||||
use quickpeep_structs::rake_entries::{
|
use quickpeep_structs::rake_entries::{
|
||||||
RakedPageEntry, RakedReference, RakedReferrerEntry, ReferenceKind,
|
IconEntry, RakedPageEntry, RakedReference, RakedReferrerEntry, ReferenceKind,
|
||||||
};
|
};
|
||||||
use quickpeep_utils::dates::date_to_quickpeep_days;
|
use quickpeep_utils::dates::date_to_quickpeep_days;
|
||||||
use reqwest::{Client, Url};
|
use reqwest::{Client, Url};
|
||||||
|
@ -44,6 +44,7 @@ pub struct TaskResultSubmission {
|
||||||
pub pages: Sender<(Url, RakedPageEntry)>,
|
pub pages: Sender<(Url, RakedPageEntry)>,
|
||||||
pub references: Sender<(Url, RakedReferrerEntry)>,
|
pub references: Sender<(Url, RakedReferrerEntry)>,
|
||||||
pub rejections: Sender<(Url, PermanentFailure)>,
|
pub rejections: Sender<(Url, PermanentFailure)>,
|
||||||
|
pub icons: Sender<(Url, IconEntry)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -359,7 +360,22 @@ impl TaskContext {
|
||||||
}
|
}
|
||||||
RakeOutcome::RakedIcon(icon) => {
|
RakeOutcome::RakedIcon(icon) => {
|
||||||
// Store icon to icon store
|
// Store icon to icon store
|
||||||
todo!();
|
|
||||||
|
self.submission
|
||||||
|
.icons
|
||||||
|
.send((
|
||||||
|
url.clone(),
|
||||||
|
IconEntry {
|
||||||
|
webp_bytes: icon.webp_bytes,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
self.as_event_processor()
|
||||||
|
.process_icon(url.clone(), today)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(NextAction::Continue)
|
||||||
}
|
}
|
||||||
RakeOutcome::Redirect { reason, new_url } => {
|
RakeOutcome::Redirect { reason, new_url } => {
|
||||||
let refs = RakedReferrerEntry {
|
let refs = RakedReferrerEntry {
|
||||||
|
@ -476,6 +492,25 @@ impl EventProcessor<'_> {
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn process_icon(&self, url: Url, datestamp: u16) -> anyhow::Result<()> {
|
||||||
|
self.store
|
||||||
|
.as_ref()
|
||||||
|
.async_rw_txn(move |txn| {
|
||||||
|
let domain = get_reduced_domain(&url)?;
|
||||||
|
txn.mark_url_as_visited(
|
||||||
|
domain.as_ref(),
|
||||||
|
url.as_ref(),
|
||||||
|
UrlVisitedRecord {
|
||||||
|
last_visited_days: datestamp,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
txn.commit()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn process_refs(
|
pub async fn process_refs(
|
||||||
&self,
|
&self,
|
||||||
url: Url,
|
url: Url,
|
||||||
|
|
|
@ -29,6 +29,7 @@ impl_serde_for_bitflags!(AnalysisAntifeatures);
|
||||||
pub const SCHEMA_RAKED_PAGES: &str = "quickpeep_pages:0.1.0";
|
pub const SCHEMA_RAKED_PAGES: &str = "quickpeep_pages:0.1.0";
|
||||||
pub const SCHEMA_RAKED_REFERENCES: &str = "quickpeep_references:0.1.0";
|
pub const SCHEMA_RAKED_REFERENCES: &str = "quickpeep_references:0.1.0";
|
||||||
pub const SCHEMA_RAKED_REJECTIONS: &str = "quickpeep_rejections:0.1.0";
|
pub const SCHEMA_RAKED_REJECTIONS: &str = "quickpeep_rejections:0.1.0";
|
||||||
|
pub const SCHEMA_RAKED_ICONS: &str = "quickpeep_icons:0.1.0";
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
pub struct RakedPageEntry {
|
pub struct RakedPageEntry {
|
||||||
|
@ -36,6 +37,12 @@ pub struct RakedPageEntry {
|
||||||
pub document: DenseDocument,
|
pub document: DenseDocument,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
pub struct IconEntry {
|
||||||
|
/// Densely-packed WebP bytes (with low quality).
|
||||||
|
pub webp_bytes: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
pub struct RakedReferrerEntry {
|
pub struct RakedReferrerEntry {
|
||||||
pub references: BTreeSet<RakedReference>,
|
pub references: BTreeSet<RakedReference>,
|
||||||
|
|
Loading…
Reference in New Issue