From 0060ec07640ecdb0217f69c845a1940d10226fa3 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Tue, 22 Mar 2022 23:54:28 +0000 Subject: [PATCH] Add seed sorting tool in order to approach first proof of concept --- Cargo.lock | 1 + quickpeep/Cargo.toml | 1 + .../20220322223532_seed_sorting.sql | 16 ++ quickpeep/src/bin/qp-seedcoll-sort.rs | 181 ++++++++++++++++++ quickpeep/src/{main.rs => bin/quickpeep.rs} | 10 +- quickpeep/src/lib.rs | 5 + quickpeep/src/web/seed_collector.rs | 2 +- 7 files changed, 207 insertions(+), 9 deletions(-) create mode 100644 quickpeep/migrations/20220322223532_seed_sorting.sql create mode 100644 quickpeep/src/bin/qp-seedcoll-sort.rs rename quickpeep/src/{main.rs => bin/quickpeep.rs} (94%) create mode 100644 quickpeep/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0e7e67f..826c119 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2996,6 +2996,7 @@ dependencies = [ "anyhow", "askama", "axum", + "colour", "env_logger", "itertools", "log", diff --git a/quickpeep/Cargo.toml b/quickpeep/Cargo.toml index ca58190..1773886 100644 --- a/quickpeep/Cargo.toml +++ b/quickpeep/Cargo.toml @@ -17,3 +17,4 @@ log = "0.4.14" env_logger = "0.9.0" sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] } itertools = "0.10.3" +colour = "0.6.0" diff --git a/quickpeep/migrations/20220322223532_seed_sorting.sql b/quickpeep/migrations/20220322223532_seed_sorting.sql new file mode 100644 index 0000000..b700ee6 --- /dev/null +++ b/quickpeep/migrations/20220322223532_seed_sorting.sql @@ -0,0 +1,16 @@ +-- Support post-processing the seeds with a manual sorting action + +CREATE TABLE sorted_seeds ( + collected_seed_id INTEGER NOT NULL PRIMARY KEY REFERENCES collected_seeds(collected_seed_id), + + -- See qp-seedcoll-sort for definitions. + decision INTEGER NOT NULL, + + -- Specified tags, as a diff of what was there. + -- + means 'add', - means 'remove', '-*' means 'remove all specified'. + -- e.g. '-*+cat' or '-manual+blog'. + tag_diff TEXT NOT NULL, + + -- Any comment that will be published as a line comment preceding this entry + comment_published TEXT NOT NULL +) diff --git a/quickpeep/src/bin/qp-seedcoll-sort.rs b/quickpeep/src/bin/qp-seedcoll-sort.rs new file mode 100644 index 0000000..eda90ed --- /dev/null +++ b/quickpeep/src/bin/qp-seedcoll-sort.rs @@ -0,0 +1,181 @@ +use anyhow::{bail, Context}; +use colour::{blue_ln, dark_red, dark_yellow, dark_yellow_ln, grey_ln, magenta_ln, yellow}; +use env_logger::Env; +use itertools::Itertools; +use quickpeep::config::WebConfig; +use sqlx::sqlite::SqlitePoolOptions; +use sqlx::SqlitePool; +use std::path::PathBuf; +use tokio::io; +use tokio::io::{AsyncBufReadExt, BufReader}; + +pub const SORTING_OUTCOMES: [&str; 5] = [ + // Included + "y", // Not included (generic) + "n", // Duplicates an existing entry + "dupe", // Spam entry + "spam", // Invalid format for some reason (e.g. pattern or URL invalid) + "inv", +]; + +#[tokio::main] +pub async fn main() -> anyhow::Result<()> { + env_logger::Builder::from_env( + Env::default().default_filter_or("info,quickpeep=debug,sqlx=warn"), + ) + .init(); + + let config_path = + PathBuf::from(std::env::var("QP_WEB_CONFIG").unwrap_or_else(|_| "qp_web.ron".to_owned())); + + if !config_path.exists() { + bail!( + "Config path {:?} doesn't exist. QP_WEB_CONFIG env var overrides.", + config_path + ); + } + + let file_bytes = std::fs::read(&config_path).context("Failed to read web config file")?; + let web_config: WebConfig = + ron::de::from_bytes(&file_bytes).context("Failed to parse web config")?; + + let pool = SqlitePoolOptions::new() + .min_connections(1) + .after_connect(|conn| { + Box::pin(async move { + // Use the WAL because it just makes more sense :) + sqlx::query("PRAGMA journal_mode = WAL") + .execute(&mut *conn) + .await?; + + // Enable foreign keys because we like them! + sqlx::query("PRAGMA foreign_keys = ON") + .execute(&mut *conn) + .await?; + + Ok(()) + }) + }) + .connect( + &web_config + .sqlite_db_path + .to_str() + .context("SQLite DB path should be UTF-8")?, + ) + .await?; + + sqlx::migrate!().run(&pool).await?; + + seed_sorter_loop(&pool).await?; + + Ok(()) +} + +pub async fn seed_sorter_loop(pool: &SqlitePool) -> anyhow::Result<()> { + let count_row = sqlx::query!( + " + SELECT COALESCE(COUNT(collected_seed_id), 0) AS numseeds + FROM collected_seeds cs + WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds) + " + ) + .fetch_one(&*pool) + .await?; + + grey_ln!("{} seeds to be sorted.", count_row.numseeds); + + let mut stdin = BufReader::new(io::stdin()); + + loop { + let next_sortable_seed = sqlx::query!( + " + SELECT collected_seed_id, collected_ts, url, tags, extra_tags, remarks_private + FROM collected_seeds cs + WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds) + ORDER BY collected_seed_id ASC + LIMIT 1 + " + ) + .fetch_optional(&*pool) + .await?; + + let next_sortable_seed = match next_sortable_seed { + Some(nss) => nss, + None => break, + }; + + println!(); + println!(); + yellow!("URL: "); + blue_ln!("{}", next_sortable_seed.url); + yellow!("Tags: "); + magenta_ln!("{}", next_sortable_seed.tags.replace(",", ", ")); + + if !next_sortable_seed.extra_tags.is_empty() { + dark_yellow!("Suggested extra tags: "); + blue_ln!("{}", next_sortable_seed.extra_tags.replace(",", ", ")); + } + + if !next_sortable_seed.remarks_private.is_empty() { + dark_yellow_ln!("Remarks (private): "); + blue_ln!( + "\t{}", + next_sortable_seed.remarks_private.replace("\n", "\n\t") + ); + } + + dark_red!("? "); + + let mut buf = String::with_capacity(0); + if stdin.read_line(&mut buf).await? == 0 { + break; + } + + let mut comment_split = buf.trim().splitn(2, '#'); + let action_part = comment_split.next().unwrap().trim(); + let comment = comment_split.next().unwrap_or("").trim(); + + let mut action_pieces = action_part.split_whitespace(); + let action = action_pieces.next().unwrap(); + let diff_tags = action_pieces.collect_vec(); + for diff_tag in &diff_tags { + if !diff_tag.starts_with('+') && !diff_tag.starts_with('-') { + eprintln!("Diff tag doesn't start with + or -: {}", diff_tag); + continue; + } + } + + let diff_tags_joined = diff_tags.join(","); + + let decision_id = match SORTING_OUTCOMES + .iter() + .position(|outcome| *outcome == action) + { + Some(di) => di, + None => { + eprintln!( + "Decision {:?} not found. Try one of {}", + action, + SORTING_OUTCOMES.iter().join(", ") + ); + continue; + } + } as i64; + + // TODO export the seed too...? or is that a separate pass...? + sqlx::query!( + " + INSERT INTO sorted_seeds ( + collected_seed_id, decision, tag_diff, comment_published + ) VALUES (?, ?, ?, ?) + ", + next_sortable_seed.collected_seed_id, + decision_id, + diff_tags_joined, + comment + ) + .execute(&*pool) + .await?; + } + Ok(()) +} diff --git a/quickpeep/src/main.rs b/quickpeep/src/bin/quickpeep.rs similarity index 94% rename from quickpeep/src/main.rs rename to quickpeep/src/bin/quickpeep.rs index 0659e72..9dbfaae 100644 --- a/quickpeep/src/main.rs +++ b/quickpeep/src/bin/quickpeep.rs @@ -1,21 +1,15 @@ -use crate::config::WebConfig; -use crate::web::seed_collector::{seed_collection_root, seed_collection_root_post}; use anyhow::{bail, Context}; use axum::extract::Extension; use axum::http::StatusCode; use axum::routing::{get, get_service, post}; use axum::Router; use env_logger::Env; +use quickpeep::config::WebConfig; +use quickpeep::web::seed_collector::{seed_collection_root, seed_collection_root_post}; use sqlx::sqlite::SqlitePoolOptions; use std::path::PathBuf; use tower_http::services::ServeDir; -mod config; - -mod web; - -mod webutil; - #[tokio::main] async fn main() -> anyhow::Result<()> { env_logger::Builder::from_env( diff --git a/quickpeep/src/lib.rs b/quickpeep/src/lib.rs new file mode 100644 index 0000000..f2a7ab8 --- /dev/null +++ b/quickpeep/src/lib.rs @@ -0,0 +1,5 @@ +pub mod config; + +pub mod web; + +pub mod webutil; diff --git a/quickpeep/src/web/seed_collector.rs b/quickpeep/src/web/seed_collector.rs index 832f44a..a0fa8c3 100644 --- a/quickpeep/src/web/seed_collector.rs +++ b/quickpeep/src/web/seed_collector.rs @@ -1,5 +1,5 @@ +use crate::config::WebConfig; use crate::webutil::{internal_error, TemplatedHtml}; -use crate::WebConfig; use askama::Template; use axum::extract::{Extension, Form}; use axum::response::IntoResponse;