Add seed sorting tool in order to approach first proof of concept
continuous-integration/drone the build failed
Details
continuous-integration/drone the build failed
Details
This commit is contained in:
parent
528e0bbf43
commit
0060ec0764
|
@ -2996,6 +2996,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"askama",
|
"askama",
|
||||||
"axum",
|
"axum",
|
||||||
|
"colour",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"itertools",
|
"itertools",
|
||||||
"log",
|
"log",
|
||||||
|
|
|
@ -17,3 +17,4 @@ log = "0.4.14"
|
||||||
env_logger = "0.9.0"
|
env_logger = "0.9.0"
|
||||||
sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] }
|
sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] }
|
||||||
itertools = "0.10.3"
|
itertools = "0.10.3"
|
||||||
|
colour = "0.6.0"
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
-- Support post-processing the seeds with a manual sorting action
|
||||||
|
|
||||||
|
CREATE TABLE sorted_seeds (
|
||||||
|
collected_seed_id INTEGER NOT NULL PRIMARY KEY REFERENCES collected_seeds(collected_seed_id),
|
||||||
|
|
||||||
|
-- See qp-seedcoll-sort for definitions.
|
||||||
|
decision INTEGER NOT NULL,
|
||||||
|
|
||||||
|
-- Specified tags, as a diff of what was there.
|
||||||
|
-- + means 'add', - means 'remove', '-*' means 'remove all specified'.
|
||||||
|
-- e.g. '-*+cat' or '-manual+blog'.
|
||||||
|
tag_diff TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Any comment that will be published as a line comment preceding this entry
|
||||||
|
comment_published TEXT NOT NULL
|
||||||
|
)
|
|
@ -0,0 +1,181 @@
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use colour::{blue_ln, dark_red, dark_yellow, dark_yellow_ln, grey_ln, magenta_ln, yellow};
|
||||||
|
use env_logger::Env;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use quickpeep::config::WebConfig;
|
||||||
|
use sqlx::sqlite::SqlitePoolOptions;
|
||||||
|
use sqlx::SqlitePool;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use tokio::io;
|
||||||
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
|
|
||||||
|
pub const SORTING_OUTCOMES: [&str; 5] = [
|
||||||
|
// Included
|
||||||
|
"y", // Not included (generic)
|
||||||
|
"n", // Duplicates an existing entry
|
||||||
|
"dupe", // Spam entry
|
||||||
|
"spam", // Invalid format for some reason (e.g. pattern or URL invalid)
|
||||||
|
"inv",
|
||||||
|
];
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
pub async fn main() -> anyhow::Result<()> {
|
||||||
|
env_logger::Builder::from_env(
|
||||||
|
Env::default().default_filter_or("info,quickpeep=debug,sqlx=warn"),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let config_path =
|
||||||
|
PathBuf::from(std::env::var("QP_WEB_CONFIG").unwrap_or_else(|_| "qp_web.ron".to_owned()));
|
||||||
|
|
||||||
|
if !config_path.exists() {
|
||||||
|
bail!(
|
||||||
|
"Config path {:?} doesn't exist. QP_WEB_CONFIG env var overrides.",
|
||||||
|
config_path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_bytes = std::fs::read(&config_path).context("Failed to read web config file")?;
|
||||||
|
let web_config: WebConfig =
|
||||||
|
ron::de::from_bytes(&file_bytes).context("Failed to parse web config")?;
|
||||||
|
|
||||||
|
let pool = SqlitePoolOptions::new()
|
||||||
|
.min_connections(1)
|
||||||
|
.after_connect(|conn| {
|
||||||
|
Box::pin(async move {
|
||||||
|
// Use the WAL because it just makes more sense :)
|
||||||
|
sqlx::query("PRAGMA journal_mode = WAL")
|
||||||
|
.execute(&mut *conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Enable foreign keys because we like them!
|
||||||
|
sqlx::query("PRAGMA foreign_keys = ON")
|
||||||
|
.execute(&mut *conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.connect(
|
||||||
|
&web_config
|
||||||
|
.sqlite_db_path
|
||||||
|
.to_str()
|
||||||
|
.context("SQLite DB path should be UTF-8")?,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
sqlx::migrate!().run(&pool).await?;
|
||||||
|
|
||||||
|
seed_sorter_loop(&pool).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn seed_sorter_loop(pool: &SqlitePool) -> anyhow::Result<()> {
|
||||||
|
let count_row = sqlx::query!(
|
||||||
|
"
|
||||||
|
SELECT COALESCE(COUNT(collected_seed_id), 0) AS numseeds
|
||||||
|
FROM collected_seeds cs
|
||||||
|
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
|
||||||
|
"
|
||||||
|
)
|
||||||
|
.fetch_one(&*pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
grey_ln!("{} seeds to be sorted.", count_row.numseeds);
|
||||||
|
|
||||||
|
let mut stdin = BufReader::new(io::stdin());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let next_sortable_seed = sqlx::query!(
|
||||||
|
"
|
||||||
|
SELECT collected_seed_id, collected_ts, url, tags, extra_tags, remarks_private
|
||||||
|
FROM collected_seeds cs
|
||||||
|
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
|
||||||
|
ORDER BY collected_seed_id ASC
|
||||||
|
LIMIT 1
|
||||||
|
"
|
||||||
|
)
|
||||||
|
.fetch_optional(&*pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let next_sortable_seed = match next_sortable_seed {
|
||||||
|
Some(nss) => nss,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!();
|
||||||
|
yellow!("URL: ");
|
||||||
|
blue_ln!("{}", next_sortable_seed.url);
|
||||||
|
yellow!("Tags: ");
|
||||||
|
magenta_ln!("{}", next_sortable_seed.tags.replace(",", ", "));
|
||||||
|
|
||||||
|
if !next_sortable_seed.extra_tags.is_empty() {
|
||||||
|
dark_yellow!("Suggested extra tags: ");
|
||||||
|
blue_ln!("{}", next_sortable_seed.extra_tags.replace(",", ", "));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !next_sortable_seed.remarks_private.is_empty() {
|
||||||
|
dark_yellow_ln!("Remarks (private): ");
|
||||||
|
blue_ln!(
|
||||||
|
"\t{}",
|
||||||
|
next_sortable_seed.remarks_private.replace("\n", "\n\t")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
dark_red!("? ");
|
||||||
|
|
||||||
|
let mut buf = String::with_capacity(0);
|
||||||
|
if stdin.read_line(&mut buf).await? == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut comment_split = buf.trim().splitn(2, '#');
|
||||||
|
let action_part = comment_split.next().unwrap().trim();
|
||||||
|
let comment = comment_split.next().unwrap_or("").trim();
|
||||||
|
|
||||||
|
let mut action_pieces = action_part.split_whitespace();
|
||||||
|
let action = action_pieces.next().unwrap();
|
||||||
|
let diff_tags = action_pieces.collect_vec();
|
||||||
|
for diff_tag in &diff_tags {
|
||||||
|
if !diff_tag.starts_with('+') && !diff_tag.starts_with('-') {
|
||||||
|
eprintln!("Diff tag doesn't start with + or -: {}", diff_tag);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let diff_tags_joined = diff_tags.join(",");
|
||||||
|
|
||||||
|
let decision_id = match SORTING_OUTCOMES
|
||||||
|
.iter()
|
||||||
|
.position(|outcome| *outcome == action)
|
||||||
|
{
|
||||||
|
Some(di) => di,
|
||||||
|
None => {
|
||||||
|
eprintln!(
|
||||||
|
"Decision {:?} not found. Try one of {}",
|
||||||
|
action,
|
||||||
|
SORTING_OUTCOMES.iter().join(", ")
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} as i64;
|
||||||
|
|
||||||
|
// TODO export the seed too...? or is that a separate pass...?
|
||||||
|
sqlx::query!(
|
||||||
|
"
|
||||||
|
INSERT INTO sorted_seeds (
|
||||||
|
collected_seed_id, decision, tag_diff, comment_published
|
||||||
|
) VALUES (?, ?, ?, ?)
|
||||||
|
",
|
||||||
|
next_sortable_seed.collected_seed_id,
|
||||||
|
decision_id,
|
||||||
|
diff_tags_joined,
|
||||||
|
comment
|
||||||
|
)
|
||||||
|
.execute(&*pool)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -1,21 +1,15 @@
|
||||||
use crate::config::WebConfig;
|
|
||||||
use crate::web::seed_collector::{seed_collection_root, seed_collection_root_post};
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use axum::extract::Extension;
|
use axum::extract::Extension;
|
||||||
use axum::http::StatusCode;
|
use axum::http::StatusCode;
|
||||||
use axum::routing::{get, get_service, post};
|
use axum::routing::{get, get_service, post};
|
||||||
use axum::Router;
|
use axum::Router;
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
|
use quickpeep::config::WebConfig;
|
||||||
|
use quickpeep::web::seed_collector::{seed_collection_root, seed_collection_root_post};
|
||||||
use sqlx::sqlite::SqlitePoolOptions;
|
use sqlx::sqlite::SqlitePoolOptions;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use tower_http::services::ServeDir;
|
use tower_http::services::ServeDir;
|
||||||
|
|
||||||
mod config;
|
|
||||||
|
|
||||||
mod web;
|
|
||||||
|
|
||||||
mod webutil;
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
env_logger::Builder::from_env(
|
env_logger::Builder::from_env(
|
|
@ -0,0 +1,5 @@
|
||||||
|
pub mod config;
|
||||||
|
|
||||||
|
pub mod web;
|
||||||
|
|
||||||
|
pub mod webutil;
|
|
@ -1,5 +1,5 @@
|
||||||
|
use crate::config::WebConfig;
|
||||||
use crate::webutil::{internal_error, TemplatedHtml};
|
use crate::webutil::{internal_error, TemplatedHtml};
|
||||||
use crate::WebConfig;
|
|
||||||
use askama::Template;
|
use askama::Template;
|
||||||
use axum::extract::{Extension, Form};
|
use axum::extract::{Extension, Form};
|
||||||
use axum::response::IntoResponse;
|
use axum::response::IntoResponse;
|
||||||
|
|
Loading…
Reference in New Issue