Add seed sorting tool in order to approach first proof of concept
Some checks failed
continuous-integration/drone the build failed

This commit is contained in:
Olivier 'reivilibre' 2022-03-22 23:54:28 +00:00
parent 528e0bbf43
commit 0060ec0764
7 changed files with 207 additions and 9 deletions

1
Cargo.lock generated
View File

@ -2996,6 +2996,7 @@ dependencies = [
"anyhow", "anyhow",
"askama", "askama",
"axum", "axum",
"colour",
"env_logger", "env_logger",
"itertools", "itertools",
"log", "log",

View File

@ -17,3 +17,4 @@ log = "0.4.14"
env_logger = "0.9.0" env_logger = "0.9.0"
sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] } sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] }
itertools = "0.10.3" itertools = "0.10.3"
colour = "0.6.0"

View File

@ -0,0 +1,16 @@
-- Support post-processing the seeds with a manual sorting action
CREATE TABLE sorted_seeds (
collected_seed_id INTEGER NOT NULL PRIMARY KEY REFERENCES collected_seeds(collected_seed_id),
-- See qp-seedcoll-sort for definitions.
decision INTEGER NOT NULL,
-- Specified tags, as a diff of what was there.
-- + means 'add', - means 'remove', '-*' means 'remove all specified'.
-- e.g. '-*+cat' or '-manual+blog'.
tag_diff TEXT NOT NULL,
-- Any comment that will be published as a line comment preceding this entry
comment_published TEXT NOT NULL
)

View File

@ -0,0 +1,181 @@
use anyhow::{bail, Context};
use colour::{blue_ln, dark_red, dark_yellow, dark_yellow_ln, grey_ln, magenta_ln, yellow};
use env_logger::Env;
use itertools::Itertools;
use quickpeep::config::WebConfig;
use sqlx::sqlite::SqlitePoolOptions;
use sqlx::SqlitePool;
use std::path::PathBuf;
use tokio::io;
use tokio::io::{AsyncBufReadExt, BufReader};
pub const SORTING_OUTCOMES: [&str; 5] = [
// Included
"y", // Not included (generic)
"n", // Duplicates an existing entry
"dupe", // Spam entry
"spam", // Invalid format for some reason (e.g. pattern or URL invalid)
"inv",
];
#[tokio::main]
pub async fn main() -> anyhow::Result<()> {
env_logger::Builder::from_env(
Env::default().default_filter_or("info,quickpeep=debug,sqlx=warn"),
)
.init();
let config_path =
PathBuf::from(std::env::var("QP_WEB_CONFIG").unwrap_or_else(|_| "qp_web.ron".to_owned()));
if !config_path.exists() {
bail!(
"Config path {:?} doesn't exist. QP_WEB_CONFIG env var overrides.",
config_path
);
}
let file_bytes = std::fs::read(&config_path).context("Failed to read web config file")?;
let web_config: WebConfig =
ron::de::from_bytes(&file_bytes).context("Failed to parse web config")?;
let pool = SqlitePoolOptions::new()
.min_connections(1)
.after_connect(|conn| {
Box::pin(async move {
// Use the WAL because it just makes more sense :)
sqlx::query("PRAGMA journal_mode = WAL")
.execute(&mut *conn)
.await?;
// Enable foreign keys because we like them!
sqlx::query("PRAGMA foreign_keys = ON")
.execute(&mut *conn)
.await?;
Ok(())
})
})
.connect(
&web_config
.sqlite_db_path
.to_str()
.context("SQLite DB path should be UTF-8")?,
)
.await?;
sqlx::migrate!().run(&pool).await?;
seed_sorter_loop(&pool).await?;
Ok(())
}
pub async fn seed_sorter_loop(pool: &SqlitePool) -> anyhow::Result<()> {
let count_row = sqlx::query!(
"
SELECT COALESCE(COUNT(collected_seed_id), 0) AS numseeds
FROM collected_seeds cs
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
"
)
.fetch_one(&*pool)
.await?;
grey_ln!("{} seeds to be sorted.", count_row.numseeds);
let mut stdin = BufReader::new(io::stdin());
loop {
let next_sortable_seed = sqlx::query!(
"
SELECT collected_seed_id, collected_ts, url, tags, extra_tags, remarks_private
FROM collected_seeds cs
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
ORDER BY collected_seed_id ASC
LIMIT 1
"
)
.fetch_optional(&*pool)
.await?;
let next_sortable_seed = match next_sortable_seed {
Some(nss) => nss,
None => break,
};
println!();
println!();
yellow!("URL: ");
blue_ln!("{}", next_sortable_seed.url);
yellow!("Tags: ");
magenta_ln!("{}", next_sortable_seed.tags.replace(",", ", "));
if !next_sortable_seed.extra_tags.is_empty() {
dark_yellow!("Suggested extra tags: ");
blue_ln!("{}", next_sortable_seed.extra_tags.replace(",", ", "));
}
if !next_sortable_seed.remarks_private.is_empty() {
dark_yellow_ln!("Remarks (private): ");
blue_ln!(
"\t{}",
next_sortable_seed.remarks_private.replace("\n", "\n\t")
);
}
dark_red!("? ");
let mut buf = String::with_capacity(0);
if stdin.read_line(&mut buf).await? == 0 {
break;
}
let mut comment_split = buf.trim().splitn(2, '#');
let action_part = comment_split.next().unwrap().trim();
let comment = comment_split.next().unwrap_or("").trim();
let mut action_pieces = action_part.split_whitespace();
let action = action_pieces.next().unwrap();
let diff_tags = action_pieces.collect_vec();
for diff_tag in &diff_tags {
if !diff_tag.starts_with('+') && !diff_tag.starts_with('-') {
eprintln!("Diff tag doesn't start with + or -: {}", diff_tag);
continue;
}
}
let diff_tags_joined = diff_tags.join(",");
let decision_id = match SORTING_OUTCOMES
.iter()
.position(|outcome| *outcome == action)
{
Some(di) => di,
None => {
eprintln!(
"Decision {:?} not found. Try one of {}",
action,
SORTING_OUTCOMES.iter().join(", ")
);
continue;
}
} as i64;
// TODO export the seed too...? or is that a separate pass...?
sqlx::query!(
"
INSERT INTO sorted_seeds (
collected_seed_id, decision, tag_diff, comment_published
) VALUES (?, ?, ?, ?)
",
next_sortable_seed.collected_seed_id,
decision_id,
diff_tags_joined,
comment
)
.execute(&*pool)
.await?;
}
Ok(())
}

View File

@ -1,21 +1,15 @@
use crate::config::WebConfig;
use crate::web::seed_collector::{seed_collection_root, seed_collection_root_post};
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use axum::extract::Extension; use axum::extract::Extension;
use axum::http::StatusCode; use axum::http::StatusCode;
use axum::routing::{get, get_service, post}; use axum::routing::{get, get_service, post};
use axum::Router; use axum::Router;
use env_logger::Env; use env_logger::Env;
use quickpeep::config::WebConfig;
use quickpeep::web::seed_collector::{seed_collection_root, seed_collection_root_post};
use sqlx::sqlite::SqlitePoolOptions; use sqlx::sqlite::SqlitePoolOptions;
use std::path::PathBuf; use std::path::PathBuf;
use tower_http::services::ServeDir; use tower_http::services::ServeDir;
mod config;
mod web;
mod webutil;
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
env_logger::Builder::from_env( env_logger::Builder::from_env(

5
quickpeep/src/lib.rs Normal file
View File

@ -0,0 +1,5 @@
pub mod config;
pub mod web;
pub mod webutil;

View File

@ -1,5 +1,5 @@
use crate::config::WebConfig;
use crate::webutil::{internal_error, TemplatedHtml}; use crate::webutil::{internal_error, TemplatedHtml};
use crate::WebConfig;
use askama::Template; use askama::Template;
use axum::extract::{Extension, Form}; use axum::extract::{Extension, Form};
use axum::response::IntoResponse; use axum::response::IntoResponse;