Fix the seed dump script

This commit is contained in:
Olivier 'reivilibre' 2022-03-28 23:17:45 +01:00
parent 61398f6153
commit de610e5aab
1 changed files with 29 additions and 15 deletions

View File

@ -1,14 +1,15 @@
use anyhow::{bail, Context};
use env_logger::Env;
use futures_util::stream::StreamExt;
use itertools::Itertools;
use quickpeep::config::WebConfig;
use smartstring::alias::CompactString;
use sqlx::sqlite::SqlitePoolOptions;
use sqlx::{Connection, SqlitePool};
use std::collections::{BTreeSet, HashMap};
use std::fs::OpenOptions;
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use tokio::fs::OpenOptions;
use tokio::io::{AsyncWriteExt, BufWriter};
pub const DECISION_INCLUDED: i64 = 0;
@ -94,7 +95,7 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
.fetch_optional(&mut *txn)
.await?;
let file = OpenOptions::new().create_new(true).open(path).await?;
let file = OpenOptions::new().create_new(true).write(true).open(path)?;
let mut last_processed_position: Option<i64> = None;
@ -108,8 +109,8 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
WHERE collected_seed_id > ? AND decision = ?
ORDER BY collected_seed_id ASC
",
process_from,
DECISION_INCLUDED,
process_from
)
.fetch(&mut *txn);
@ -122,7 +123,7 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
let mut tags: BTreeSet<&str> = row.tags.split(",").collect();
let diff_tags: Vec<&str> = row.tag_diff.split(",").collect();
for diff_tag in diff_tags {
for diff_tag in diff_tags.into_iter().filter(|s| !s.is_empty()) {
if diff_tag.starts_with('+') {
tags.insert(&diff_tag[1..]);
} else if diff_tag.starts_with('-') {
@ -146,17 +147,30 @@ pub async fn seed_dump(pool: &SqlitePool, path: &Path) -> anyhow::Result<()> {
let mut buf_writer = BufWriter::new(file);
buf_writer.flush().await?;
for (tags, seeds) in seed_sets_to_seeds {
writeln!(buf_writer, "{}:", tags.iter().join(", "))?;
for seed in seeds {
if !seed.comment.is_empty() {
writeln!(buf_writer, "# {}", seed.comment.replace('\n', "\n# "))?;
}
writeln!(buf_writer, "{}", seed.url)?;
}
writeln!(buf_writer)?;
}
buf_writer.flush()?;
if let Some(last_processed) = last_processed_position {
sqlx::query!(
"
REPLACE INTO seed_processing_positions (name, last_processed)
VALUES ('dumped', ?)
",
last_processed_position
last_processed
)
.execute(&mut *txn)
.await?;
}
txn.commit().await?;
Ok(())