diff --git a/datman/src/backup.rs b/datman/src/backup.rs index 19b2d39..6dc6f0e 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -7,6 +7,7 @@ use dashmap::DashSet; use eyre::{bail, ensure, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; +use std::cmp::max; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::io::Write; use std::path::PathBuf; @@ -250,6 +251,11 @@ async fn scan_dir_sources( let options = options.clone(); joinset.spawn_blocking(move || -> eyre::Result { let scan_entry_map = scan::scan(&path, &ignore).context("Failed to scan")?; + info!( + "size estimate for {:?} (full scan): {}", + path, + summarise_scan_entry_map_size(&scan_entry_map) + ); // TODO This whole section is messy. // Maybe we should consider combining prepopulate_unmodified and limit_scan_entry_map_to_size @@ -275,6 +281,12 @@ async fn scan_dir_sources( bail!("bug: intersecting prepop and prune keys: {ix_keys:?}"); } + info!( + "size estimate for {:?} (differential): {}", + path, + summarise_scan_entry_map_size(&pruned) + ); + (cfm, pruned, prepopulated) } else { ( @@ -285,10 +297,18 @@ async fn scan_dir_sources( }; let pruned_scan_entry_map = match options.gradual { - Some(gradual_size_limit) => limit_scan_entry_map_to_size( - pruned_scan_entry_map, - gradual_size_limit.as_u64(), - ), + Some(gradual_size_limit) => { + let limited = limit_scan_entry_map_to_size( + pruned_scan_entry_map, + gradual_size_limit.as_u64(), + ); + info!( + "size estimate for {:?} (gradual/limited): {}", + path, + summarise_scan_entry_map_size(&limited) + ); + limited + } None => pruned_scan_entry_map, }; @@ -313,6 +333,24 @@ async fn scan_dir_sources( Ok(result) } +fn summarise_scan_entry_map_size(scan_entry_map: &PatriciaMap) -> String { + let mut num_bytes = 0u64; + + for (_, entry) in scan_entry_map.iter() { + num_bytes += match entry { + ScanEntry::NormalFile { size, .. } => max(*size, 4096), + _ => 4096, + }; + } + + let num_files = scan_entry_map.len(); + + format!( + "{num_files} files ({})", + ByteSize(num_bytes).to_string_as(true) + ) +} + struct BackupDirSourcesReturn { pub chunkmaps: BTreeMap, pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap>)>,