Add seed sorting tool in order to approach first proof of concept
continuous-integration/drone the build failed
Details
continuous-integration/drone the build failed
Details
This commit is contained in:
parent
528e0bbf43
commit
0060ec0764
|
@ -2996,6 +2996,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"askama",
|
||||
"axum",
|
||||
"colour",
|
||||
"env_logger",
|
||||
"itertools",
|
||||
"log",
|
||||
|
|
|
@ -17,3 +17,4 @@ log = "0.4.14"
|
|||
env_logger = "0.9.0"
|
||||
sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] }
|
||||
itertools = "0.10.3"
|
||||
colour = "0.6.0"
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
-- Support post-processing the seeds with a manual sorting action
|
||||
|
||||
CREATE TABLE sorted_seeds (
|
||||
collected_seed_id INTEGER NOT NULL PRIMARY KEY REFERENCES collected_seeds(collected_seed_id),
|
||||
|
||||
-- See qp-seedcoll-sort for definitions.
|
||||
decision INTEGER NOT NULL,
|
||||
|
||||
-- Specified tags, as a diff of what was there.
|
||||
-- + means 'add', - means 'remove', '-*' means 'remove all specified'.
|
||||
-- e.g. '-*+cat' or '-manual+blog'.
|
||||
tag_diff TEXT NOT NULL,
|
||||
|
||||
-- Any comment that will be published as a line comment preceding this entry
|
||||
comment_published TEXT NOT NULL
|
||||
)
|
|
@ -0,0 +1,181 @@
|
|||
use anyhow::{bail, Context};
|
||||
use colour::{blue_ln, dark_red, dark_yellow, dark_yellow_ln, grey_ln, magenta_ln, yellow};
|
||||
use env_logger::Env;
|
||||
use itertools::Itertools;
|
||||
use quickpeep::config::WebConfig;
|
||||
use sqlx::sqlite::SqlitePoolOptions;
|
||||
use sqlx::SqlitePool;
|
||||
use std::path::PathBuf;
|
||||
use tokio::io;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
|
||||
pub const SORTING_OUTCOMES: [&str; 5] = [
|
||||
// Included
|
||||
"y", // Not included (generic)
|
||||
"n", // Duplicates an existing entry
|
||||
"dupe", // Spam entry
|
||||
"spam", // Invalid format for some reason (e.g. pattern or URL invalid)
|
||||
"inv",
|
||||
];
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main() -> anyhow::Result<()> {
|
||||
env_logger::Builder::from_env(
|
||||
Env::default().default_filter_or("info,quickpeep=debug,sqlx=warn"),
|
||||
)
|
||||
.init();
|
||||
|
||||
let config_path =
|
||||
PathBuf::from(std::env::var("QP_WEB_CONFIG").unwrap_or_else(|_| "qp_web.ron".to_owned()));
|
||||
|
||||
if !config_path.exists() {
|
||||
bail!(
|
||||
"Config path {:?} doesn't exist. QP_WEB_CONFIG env var overrides.",
|
||||
config_path
|
||||
);
|
||||
}
|
||||
|
||||
let file_bytes = std::fs::read(&config_path).context("Failed to read web config file")?;
|
||||
let web_config: WebConfig =
|
||||
ron::de::from_bytes(&file_bytes).context("Failed to parse web config")?;
|
||||
|
||||
let pool = SqlitePoolOptions::new()
|
||||
.min_connections(1)
|
||||
.after_connect(|conn| {
|
||||
Box::pin(async move {
|
||||
// Use the WAL because it just makes more sense :)
|
||||
sqlx::query("PRAGMA journal_mode = WAL")
|
||||
.execute(&mut *conn)
|
||||
.await?;
|
||||
|
||||
// Enable foreign keys because we like them!
|
||||
sqlx::query("PRAGMA foreign_keys = ON")
|
||||
.execute(&mut *conn)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
.connect(
|
||||
&web_config
|
||||
.sqlite_db_path
|
||||
.to_str()
|
||||
.context("SQLite DB path should be UTF-8")?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
sqlx::migrate!().run(&pool).await?;
|
||||
|
||||
seed_sorter_loop(&pool).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn seed_sorter_loop(pool: &SqlitePool) -> anyhow::Result<()> {
|
||||
let count_row = sqlx::query!(
|
||||
"
|
||||
SELECT COALESCE(COUNT(collected_seed_id), 0) AS numseeds
|
||||
FROM collected_seeds cs
|
||||
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
|
||||
"
|
||||
)
|
||||
.fetch_one(&*pool)
|
||||
.await?;
|
||||
|
||||
grey_ln!("{} seeds to be sorted.", count_row.numseeds);
|
||||
|
||||
let mut stdin = BufReader::new(io::stdin());
|
||||
|
||||
loop {
|
||||
let next_sortable_seed = sqlx::query!(
|
||||
"
|
||||
SELECT collected_seed_id, collected_ts, url, tags, extra_tags, remarks_private
|
||||
FROM collected_seeds cs
|
||||
WHERE collected_seed_id NOT IN (SELECT collected_seed_id FROM sorted_seeds)
|
||||
ORDER BY collected_seed_id ASC
|
||||
LIMIT 1
|
||||
"
|
||||
)
|
||||
.fetch_optional(&*pool)
|
||||
.await?;
|
||||
|
||||
let next_sortable_seed = match next_sortable_seed {
|
||||
Some(nss) => nss,
|
||||
None => break,
|
||||
};
|
||||
|
||||
println!();
|
||||
println!();
|
||||
yellow!("URL: ");
|
||||
blue_ln!("{}", next_sortable_seed.url);
|
||||
yellow!("Tags: ");
|
||||
magenta_ln!("{}", next_sortable_seed.tags.replace(",", ", "));
|
||||
|
||||
if !next_sortable_seed.extra_tags.is_empty() {
|
||||
dark_yellow!("Suggested extra tags: ");
|
||||
blue_ln!("{}", next_sortable_seed.extra_tags.replace(",", ", "));
|
||||
}
|
||||
|
||||
if !next_sortable_seed.remarks_private.is_empty() {
|
||||
dark_yellow_ln!("Remarks (private): ");
|
||||
blue_ln!(
|
||||
"\t{}",
|
||||
next_sortable_seed.remarks_private.replace("\n", "\n\t")
|
||||
);
|
||||
}
|
||||
|
||||
dark_red!("? ");
|
||||
|
||||
let mut buf = String::with_capacity(0);
|
||||
if stdin.read_line(&mut buf).await? == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut comment_split = buf.trim().splitn(2, '#');
|
||||
let action_part = comment_split.next().unwrap().trim();
|
||||
let comment = comment_split.next().unwrap_or("").trim();
|
||||
|
||||
let mut action_pieces = action_part.split_whitespace();
|
||||
let action = action_pieces.next().unwrap();
|
||||
let diff_tags = action_pieces.collect_vec();
|
||||
for diff_tag in &diff_tags {
|
||||
if !diff_tag.starts_with('+') && !diff_tag.starts_with('-') {
|
||||
eprintln!("Diff tag doesn't start with + or -: {}", diff_tag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let diff_tags_joined = diff_tags.join(",");
|
||||
|
||||
let decision_id = match SORTING_OUTCOMES
|
||||
.iter()
|
||||
.position(|outcome| *outcome == action)
|
||||
{
|
||||
Some(di) => di,
|
||||
None => {
|
||||
eprintln!(
|
||||
"Decision {:?} not found. Try one of {}",
|
||||
action,
|
||||
SORTING_OUTCOMES.iter().join(", ")
|
||||
);
|
||||
continue;
|
||||
}
|
||||
} as i64;
|
||||
|
||||
// TODO export the seed too...? or is that a separate pass...?
|
||||
sqlx::query!(
|
||||
"
|
||||
INSERT INTO sorted_seeds (
|
||||
collected_seed_id, decision, tag_diff, comment_published
|
||||
) VALUES (?, ?, ?, ?)
|
||||
",
|
||||
next_sortable_seed.collected_seed_id,
|
||||
decision_id,
|
||||
diff_tags_joined,
|
||||
comment
|
||||
)
|
||||
.execute(&*pool)
|
||||
.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
|
@ -1,21 +1,15 @@
|
|||
use crate::config::WebConfig;
|
||||
use crate::web::seed_collector::{seed_collection_root, seed_collection_root_post};
|
||||
use anyhow::{bail, Context};
|
||||
use axum::extract::Extension;
|
||||
use axum::http::StatusCode;
|
||||
use axum::routing::{get, get_service, post};
|
||||
use axum::Router;
|
||||
use env_logger::Env;
|
||||
use quickpeep::config::WebConfig;
|
||||
use quickpeep::web::seed_collector::{seed_collection_root, seed_collection_root_post};
|
||||
use sqlx::sqlite::SqlitePoolOptions;
|
||||
use std::path::PathBuf;
|
||||
use tower_http::services::ServeDir;
|
||||
|
||||
mod config;
|
||||
|
||||
mod web;
|
||||
|
||||
mod webutil;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
env_logger::Builder::from_env(
|
|
@ -0,0 +1,5 @@
|
|||
pub mod config;
|
||||
|
||||
pub mod web;
|
||||
|
||||
pub mod webutil;
|
|
@ -1,5 +1,5 @@
|
|||
use crate::config::WebConfig;
|
||||
use crate::webutil::{internal_error, TemplatedHtml};
|
||||
use crate::WebConfig;
|
||||
use askama::Template;
|
||||
use axum::extract::{Extension, Form};
|
||||
use axum::response::IntoResponse;
|
||||
|
|
Loading…
Reference in New Issue