Add size hints for Datman Backup on dir trees

This commit is contained in:
Olivier 'reivilibre' 2023-08-13 22:12:45 +01:00
parent 9c3ea26ea6
commit d07351d465
1 changed files with 42 additions and 4 deletions

View File

@ -7,6 +7,7 @@ use dashmap::DashSet;
use eyre::{bail, ensure, eyre, Context, ContextCompat}; use eyre::{bail, ensure, eyre, Context, ContextCompat};
use indicatif::ProgressStyle; use indicatif::ProgressStyle;
use patricia_tree::PatriciaMap; use patricia_tree::PatriciaMap;
use std::cmp::max;
use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::io::Write; use std::io::Write;
use std::path::PathBuf; use std::path::PathBuf;
@ -250,6 +251,11 @@ async fn scan_dir_sources(
let options = options.clone(); let options = options.clone();
joinset.spawn_blocking(move || -> eyre::Result<DirSourcePrep> { joinset.spawn_blocking(move || -> eyre::Result<DirSourcePrep> {
let scan_entry_map = scan::scan(&path, &ignore).context("Failed to scan")?; let scan_entry_map = scan::scan(&path, &ignore).context("Failed to scan")?;
info!(
"size estimate for {:?} (full scan): {}",
path,
summarise_scan_entry_map_size(&scan_entry_map)
);
// TODO This whole section is messy. // TODO This whole section is messy.
// Maybe we should consider combining prepopulate_unmodified and limit_scan_entry_map_to_size // Maybe we should consider combining prepopulate_unmodified and limit_scan_entry_map_to_size
@ -275,6 +281,12 @@ async fn scan_dir_sources(
bail!("bug: intersecting prepop and prune keys: {ix_keys:?}"); bail!("bug: intersecting prepop and prune keys: {ix_keys:?}");
} }
info!(
"size estimate for {:?} (differential): {}",
path,
summarise_scan_entry_map_size(&pruned)
);
(cfm, pruned, prepopulated) (cfm, pruned, prepopulated)
} else { } else {
( (
@ -285,10 +297,18 @@ async fn scan_dir_sources(
}; };
let pruned_scan_entry_map = match options.gradual { let pruned_scan_entry_map = match options.gradual {
Some(gradual_size_limit) => limit_scan_entry_map_to_size( Some(gradual_size_limit) => {
pruned_scan_entry_map, let limited = limit_scan_entry_map_to_size(
gradual_size_limit.as_u64(), pruned_scan_entry_map,
), gradual_size_limit.as_u64(),
);
info!(
"size estimate for {:?} (gradual/limited): {}",
path,
summarise_scan_entry_map_size(&limited)
);
limited
}
None => pruned_scan_entry_map, None => pruned_scan_entry_map,
}; };
@ -313,6 +333,24 @@ async fn scan_dir_sources(
Ok(result) Ok(result)
} }
fn summarise_scan_entry_map_size(scan_entry_map: &PatriciaMap<ScanEntry>) -> String {
let mut num_bytes = 0u64;
for (_, entry) in scan_entry_map.iter() {
num_bytes += match entry {
ScanEntry::NormalFile { size, .. } => max(*size, 4096),
_ => 4096,
};
}
let num_files = scan_entry_map.len();
format!(
"{num_files} files ({})",
ByteSize(num_bytes).to_string_as(true)
)
}
struct BackupDirSourcesReturn { struct BackupDirSourcesReturn {
pub chunkmaps: BTreeMap<BloblogId, IndexBloblogEntry>, pub chunkmaps: BTreeMap<BloblogId, IndexBloblogEntry>,
pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap<Option<(RecursiveChunkRef, u64)>>)>, pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap<Option<(RecursiveChunkRef, u64)>>)>,