Some bugfixes that get the raker mostly going

This commit is contained in:
Olivier 'reivilibre' 2022-03-21 19:24:20 +00:00
parent 51d5b9208b
commit e0fb714f7a
5 changed files with 26 additions and 22 deletions

3
.gitignore vendored
View File

@ -12,4 +12,5 @@ quickpeep/testdb.sqlite
qp_web.ron qp_web.ron
dist dist
book book
workbench workbench
rakepacks

View File

@ -5,4 +5,6 @@ emit_dir = "./rakepacks"
[metrics] [metrics]
prometheus = "127.0.0.1:9774" prometheus = "127.0.0.1:9774"
# bare_metrics = true # bare_metrics = true
[pack_emitter]

View File

@ -4,7 +4,7 @@ use env_logger::Env;
use adblock::lists::RuleTypes; use adblock::lists::RuleTypes;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use log::{error, warn}; use log::{debug, error, warn};
use lru::LruCache; use lru::LruCache;
use metrics_exporter_prometheus::PrometheusBuilder; use metrics_exporter_prometheus::PrometheusBuilder;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
@ -56,7 +56,11 @@ pub struct Opts {
#[tokio::main] #[tokio::main]
pub async fn main() -> anyhow::Result<()> { pub async fn main() -> anyhow::Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("info,quickpeep=debug")).init(); env_logger::Builder::from_env(
Env::default().default_filter_or("info,quickpeep_raker=debug,qp_raker=debug"),
)
.init();
debug!("Starting up...");
let opts: Opts = Opts::parse(); let opts: Opts = Opts::parse();
@ -233,6 +237,12 @@ pub async fn main() -> anyhow::Result<()> {
for task in tasks { for task in tasks {
task.await?; task.await?;
} }
for task in emitters {
let result = task.join().expect("Can't join thread");
result?;
}
let _ = dsmu_cancel_tx.send(()); let _ = dsmu_cancel_tx.send(());
datastore_metrics_updater.await??; datastore_metrics_updater.await??;

View File

@ -231,20 +231,8 @@ impl TaskContext {
match self.process_outcome(&url, rake_outcome).await? { match self.process_outcome(&url, rake_outcome).await? {
NextAction::Continue => { NextAction::Continue => {
// Take that URL off the queue // The URL has already been taken off the queue.
let now = Utc::today(); // We just need to continue!
let record = UrlVisitedRecord {
last_visited_days: date_to_quickpeep_days(&now)?,
};
let domain = domain.clone();
self.store
.async_rw_txn(move |txn| {
txn.mark_url_as_visited(&domain, &url_str, record)?;
txn.commit()?;
Ok(())
})
.await?;
} }
NextAction::ChangeDomain => { NextAction::ChangeDomain => {
let mut cache = self let mut cache = self

View File

@ -324,10 +324,13 @@ impl<'a> RakerTxn<'a, RW> {
let queue_key = format!("{}\n{}", domain, url_str); let queue_key = format!("{}\n{}", domain, url_str);
ensure!( // We legitimately want this to NOP when already dequeued; so don't ensure the opposite.
self.mdbx_txn.del(&queue_urls, queue_key.as_bytes(), None)?, // ensure!(
"No queued URL to delete" // self.mdbx_txn.del(&queue_urls, queue_key.as_bytes(), None)?,
); // "No queued URL to delete ({})", queue_key
// );
self.mdbx_txn.del(&queue_urls, queue_key.as_bytes(), None)?;
self.mdbx_txn.put( self.mdbx_txn.put(
visited_urls, visited_urls,