Fix the seed dump script

This commit is contained in:
Olivier 'reivilibre' 2022-03-28 23:17:45 +01:00
parent 61398f6153
commit de610e5aab
1 changed files with 29 additions and 15 deletions

View File

@ -1,14 +1,15 @@
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use env_logger::Env; use env_logger::Env;
use futures_util::stream::StreamExt; use futures_util::stream::StreamExt;
use itertools::Itertools;
use quickpeep::config::WebConfig; use quickpeep::config::WebConfig;
use smartstring::alias::CompactString; use smartstring::alias::CompactString;
use sqlx::sqlite::SqlitePoolOptions; use sqlx::sqlite::SqlitePoolOptions;
use sqlx::{Connection, SqlitePool}; use sqlx::{Connection, SqlitePool};
use std::collections::{BTreeSet, HashMap}; use std::collections::{BTreeSet, HashMap};
use std::fs::OpenOptions;
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio::fs::OpenOptions;
use tokio::io::{AsyncWriteExt, BufWriter};
pub const DECISION_INCLUDED: i64 = 0; pub const DECISION_INCLUDED: i64 = 0;
@ -94,7 +95,7 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
.fetch_optional(&mut *txn) .fetch_optional(&mut *txn)
.await?; .await?;
let file = OpenOptions::new().create_new(true).open(path).await?; let file = OpenOptions::new().create_new(true).write(true).open(path)?;
let mut last_processed_position: Option<i64> = None; let mut last_processed_position: Option<i64> = None;
@ -108,8 +109,8 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
WHERE collected_seed_id > ? AND decision = ? WHERE collected_seed_id > ? AND decision = ?
ORDER BY collected_seed_id ASC ORDER BY collected_seed_id ASC
", ",
process_from,
DECISION_INCLUDED, DECISION_INCLUDED,
process_from
) )
.fetch(&mut *txn); .fetch(&mut *txn);
@ -122,7 +123,7 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
let mut tags: BTreeSet<&str> = row.tags.split(",").collect(); let mut tags: BTreeSet<&str> = row.tags.split(",").collect();
let diff_tags: Vec<&str> = row.tag_diff.split(",").collect(); let diff_tags: Vec<&str> = row.tag_diff.split(",").collect();
for diff_tag in diff_tags { for diff_tag in diff_tags.into_iter().filter(|s| !s.is_empty()) {
if diff_tag.starts_with('+') { if diff_tag.starts_with('+') {
tags.insert(&diff_tag[1..]); tags.insert(&diff_tag[1..]);
} else if diff_tag.starts_with('-') { } else if diff_tag.starts_with('-') {
@ -146,17 +147,30 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
let mut buf_writer = BufWriter::new(file); let mut buf_writer = BufWriter::new(file);
buf_writer.flush().await?; for (tags, seeds) in seed_sets_to_seeds {
writeln!(buf_writer, "{}:", tags.iter().join(", "))?;
for seed in seeds {
if !seed.comment.is_empty() {
writeln!(buf_writer, "# {}", seed.comment.replace('\n', "\n# "))?;
}
writeln!(buf_writer, "{}", seed.url)?;
}
writeln!(buf_writer)?;
}
buf_writer.flush()?;
if let Some(last_processed) = last_processed_position {
sqlx::query!( sqlx::query!(
" "
REPLACE INTO seed_processing_positions (name, last_processed) REPLACE INTO seed_processing_positions (name, last_processed)
VALUES ('dumped', ?) VALUES ('dumped', ?)
", ",
last_processed_position last_processed
) )
.execute(&mut *txn) .execute(&mut *txn)
.await?; .await?;
}
txn.commit().await?; txn.commit().await?;
Ok(()) Ok(())