Add a command to show a report of the Datman system
This commit is contained in:
parent
438af9164e
commit
948ca3f2b5
|
@ -233,7 +233,7 @@ version = "3.1.18"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -249,6 +249,18 @@ dependencies = [
|
|||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "6.0.0-rc.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a1a275e66c69adb0600a13650aed718c99337d9a185d353efa13ff1e05576c4"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.0"
|
||||
|
@ -340,6 +352,31 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"crossterm_winapi",
|
||||
"libc",
|
||||
"mio",
|
||||
"parking_lot",
|
||||
"signal-hook",
|
||||
"signal-hook-mio",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm_winapi"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.2.2"
|
||||
|
@ -378,6 +415,7 @@ dependencies = [
|
|||
"byteorder",
|
||||
"chrono",
|
||||
"clap",
|
||||
"comfy-table",
|
||||
"crossbeam-channel",
|
||||
"env_logger",
|
||||
"glob",
|
||||
|
@ -578,6 +616,15 @@ dependencies = [
|
|||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.0"
|
||||
|
@ -1096,6 +1143,12 @@ dependencies = [
|
|||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f"
|
||||
|
||||
[[package]]
|
||||
name = "rustyline"
|
||||
version = "7.1.0"
|
||||
|
@ -1187,6 +1240,27 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-mio"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"signal-hook",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.0"
|
||||
|
@ -1242,6 +1316,25 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
|
||||
dependencies = [
|
||||
"heck 0.3.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.95"
|
||||
|
|
|
@ -33,3 +33,4 @@ hostname = "0.3.1"
|
|||
yama = { path = "../yama", version = "0.6.0-alpha.1" }
|
||||
metrics = "0.17.1"
|
||||
bare-metrics-recorder = { version = "0.1.0" }
|
||||
comfy-table = "6.0.0-rc.1"
|
|
@ -111,6 +111,11 @@ pub enum DatmanCommand {
|
|||
skip_metadata: bool,
|
||||
},
|
||||
|
||||
Report {
|
||||
/// Name of the pile to report on.
|
||||
pile_name: String,
|
||||
},
|
||||
|
||||
#[clap(name = "_backup_source_responder")]
|
||||
InternalBackupSourceResponder,
|
||||
}
|
||||
|
@ -307,6 +312,15 @@ fn main() -> anyhow::Result<()> {
|
|||
info!("Datman responder at {:?}", std::env::current_exe()?);
|
||||
backup_source_responder::handler_stdio()?;
|
||||
}
|
||||
|
||||
DatmanCommand::Report { pile_name } => {
|
||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||
let destination = &descriptor.piles[&pile_name];
|
||||
let report = datman::commands::report::generate_report(destination, &descriptor)?;
|
||||
// TODO Display report
|
||||
// TODO E-mail report (Can just pipe through aha and then apprise though!)
|
||||
datman::commands::report::print_report(&report)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ pub mod backup;
|
|||
pub mod extract;
|
||||
pub mod ibrowse;
|
||||
pub mod ilabel;
|
||||
pub mod report;
|
||||
|
||||
pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
|
||||
std::fs::create_dir_all(path)?;
|
||||
|
|
|
@ -21,7 +21,7 @@ use crate::labelling::{label_node, load_labelling_rules, str_to_label, Label, St
|
|||
use crate::tree::{scan, FileTree, FileTree1};
|
||||
use anyhow::{anyhow, bail};
|
||||
use arc_interner::ArcIntern;
|
||||
use chrono::{DateTime, Utc};
|
||||
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
|
||||
use log::{info, warn};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
|
@ -49,6 +49,13 @@ pub fn get_pointer_name_at(source_name: &str, datetime: DateTime<Utc>) -> String
|
|||
)
|
||||
}
|
||||
|
||||
pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime<Utc>)> {
|
||||
let (source_name, date_time_str) = pointer_name.rsplit_once("+")?;
|
||||
let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?;
|
||||
let date_time = Utc.from_utc_datetime(&date_time);
|
||||
Some((source_name.to_owned(), date_time))
|
||||
}
|
||||
|
||||
pub fn open_stdout_backup_process(
|
||||
extra_args: &HashMap<String, toml::Value>,
|
||||
program_name: &str,
|
||||
|
|
|
@ -0,0 +1,260 @@
|
|||
use crate::commands::backup::split_pointer_name;
|
||||
use crate::descriptor::{Descriptor, DestPileDescriptor};
|
||||
use anyhow::Context;
|
||||
use chrono::{DateTime, Utc};
|
||||
use comfy_table::presets::UTF8_FULL;
|
||||
use comfy_table::{Cell, Color, ContentArrangement, Table};
|
||||
use humansize::FileSize;
|
||||
use log::info;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::io::Read;
|
||||
use std::mem::size_of;
|
||||
use yama::chunking::RecursiveUnchunker;
|
||||
use yama::commands::{load_pile_descriptor, open_pile, retrieve_tree_node};
|
||||
use yama::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
|
||||
use yama::pile::{DebugStatistics, Pile, RawPile};
|
||||
|
||||
// This module generates reports for a Datman system.
|
||||
// Referenced Chunk IDs are counted and used to give an indication of size.
|
||||
// Chunk IDs are summarised into u32s to reduce memory usage. Since the report is approximate,
|
||||
// it doesn't matter if there are a few collisions (although they are still fairly unlikely to
|
||||
// affect much).
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Report {
|
||||
pub last_source_backups: BTreeMap<String, Option<DateTime<Utc>>>,
|
||||
|
||||
pub chunk_usage: BTreeMap<String, Sizes>,
|
||||
|
||||
pub debug_stats: Option<DebugStatistics>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Sizes {
|
||||
/// Total number of chunks that we refer to.
|
||||
pub total: u32,
|
||||
|
||||
/// Each referred chunk is counted once here, but divided by the number of sharers.
|
||||
/// We are 'morally responsible' for this many chunks.
|
||||
pub moral: u32,
|
||||
|
||||
/// Number of chunks that only we point to.
|
||||
pub unique: u32,
|
||||
|
||||
/// Number of chunks for which we are the oldest (lexicographically earliest) pointer to point
|
||||
/// to those chunks.
|
||||
pub rollup: u32,
|
||||
}
|
||||
|
||||
type CondensedChunkId = u32;
|
||||
|
||||
fn condense_chunk_id(chunk_id: ChunkId) -> CondensedChunkId {
|
||||
CondensedChunkId::from_be_bytes(
|
||||
chunk_id[0..size_of::<CondensedChunkId>()]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn generate_report(
|
||||
dest_pile_descriptor: &DestPileDescriptor,
|
||||
descriptor: &Descriptor,
|
||||
) -> anyhow::Result<Report> {
|
||||
let pile_descriptor = load_pile_descriptor(&dest_pile_descriptor.path)?;
|
||||
let pile = open_pile(&dest_pile_descriptor.path, &pile_descriptor)?;
|
||||
|
||||
let debug_stats = pile.raw_pile.debug_statistics()?;
|
||||
|
||||
let mut pointers_to_parent_and_chunkids = BTreeMap::new();
|
||||
|
||||
info!("Collecting chunk IDs... This will probably be slow.");
|
||||
for pointer_name in pile.list_pointers()? {
|
||||
let pointer = pile
|
||||
.read_pointer(&pointer_name)?
|
||||
.context("listed pointer doesn't exist")?;
|
||||
let root_node = retrieve_tree_node(&pile, pointer.chunk_ref)?;
|
||||
let pointer_chunk_ids = collect_chunk_ids(&pile, &root_node.node)?;
|
||||
pointers_to_parent_and_chunkids
|
||||
.insert(pointer_name, (pointer.parent_pointer, pointer_chunk_ids));
|
||||
}
|
||||
|
||||
// Now we iterate in reverse order, making a list of count of Chunk IDs.
|
||||
// At the same time, we can also calculate 'rollup' sizes.
|
||||
let mut chunk_sharer_counts: BTreeMap<CondensedChunkId, u16> = BTreeMap::new();
|
||||
|
||||
let mut pointer_stats: BTreeMap<String, Sizes> = BTreeMap::new();
|
||||
|
||||
for pointer_name in pointers_to_parent_and_chunkids.keys().rev() {
|
||||
let deduped_chunks: BTreeSet<CondensedChunkId> =
|
||||
iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name)
|
||||
.collect();
|
||||
let mut rollup_count = 0;
|
||||
for chunk in deduped_chunks {
|
||||
let count = chunk_sharer_counts.entry(chunk).or_default();
|
||||
*count += 1;
|
||||
if *count == 1 {
|
||||
rollup_count += 1;
|
||||
}
|
||||
}
|
||||
let entry = pointer_stats.entry(pointer_name.to_owned()).or_default();
|
||||
entry.rollup = rollup_count;
|
||||
}
|
||||
|
||||
// Now go through again and update all the stats!
|
||||
for pointer_name in pointers_to_parent_and_chunkids.keys().rev() {
|
||||
let deduped_chunks: BTreeSet<CondensedChunkId> =
|
||||
iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name)
|
||||
.collect();
|
||||
let mut unique_count = 0;
|
||||
let mut shared_count_by_sharers = [0u32; 256];
|
||||
let total_count = deduped_chunks.len();
|
||||
for chunk in deduped_chunks {
|
||||
let count = chunk_sharer_counts[&chunk];
|
||||
if count == 1 {
|
||||
unique_count += 1;
|
||||
} else {
|
||||
let num_sharers = (count as usize).min(256);
|
||||
shared_count_by_sharers[num_sharers - 1] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut sharers_sum: f64 = 0.0;
|
||||
for (sharers_minus_one, count) in shared_count_by_sharers.into_iter().enumerate() {
|
||||
sharers_sum += (count as f64) / (sharers_minus_one + 1) as f64;
|
||||
}
|
||||
|
||||
let entry = pointer_stats.entry(pointer_name.to_owned()).or_default();
|
||||
entry.moral = (sharers_sum.ceil() as u32) + unique_count;
|
||||
entry.unique = unique_count;
|
||||
entry.total = total_count as u32;
|
||||
}
|
||||
|
||||
let mut last_backed_up = BTreeMap::new();
|
||||
for source_name in descriptor.sources.keys().cloned() {
|
||||
last_backed_up.insert(source_name, None);
|
||||
}
|
||||
|
||||
for pointer_name in pointers_to_parent_and_chunkids.keys() {
|
||||
if let Some((source_name, date_time)) = split_pointer_name(&pointer_name) {
|
||||
last_backed_up.insert(source_name, Some(date_time));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Report {
|
||||
last_source_backups: last_backed_up,
|
||||
chunk_usage: pointer_stats,
|
||||
debug_stats,
|
||||
})
|
||||
}
|
||||
|
||||
// Does not filter duplicates...
|
||||
fn iter_over_all_chunkids_incl_parents<'a>(
|
||||
pointers_to_parent_and_chunkids: &'a BTreeMap<
|
||||
String,
|
||||
(Option<String>, BTreeSet<CondensedChunkId>),
|
||||
>,
|
||||
pointer_name: &'a str,
|
||||
) -> Box<dyn Iterator<Item = CondensedChunkId> + 'a> {
|
||||
let (parent, chunks) = &pointers_to_parent_and_chunkids[pointer_name];
|
||||
match parent {
|
||||
None => Box::new(chunks.iter().copied()),
|
||||
Some(parent) => Box::new(chunks.iter().copied().chain(
|
||||
iter_over_all_chunkids_incl_parents(pointers_to_parent_and_chunkids, &parent),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_chunk_ids<RP: RawPile>(
|
||||
pile: &Pile<RP>,
|
||||
root: &TreeNode,
|
||||
) -> anyhow::Result<BTreeSet<CondensedChunkId>> {
|
||||
let mut chunk_ids = BTreeSet::new();
|
||||
root.visit(
|
||||
&mut |tree_node, _| {
|
||||
match tree_node {
|
||||
TreeNode::NormalFile { content, .. } => {
|
||||
collect_chunk_ids_from_chunkref(pile, content, &mut chunk_ids)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
"".to_owned(),
|
||||
)?;
|
||||
Ok(chunk_ids)
|
||||
}
|
||||
|
||||
fn collect_chunk_ids_from_chunkref<RP: RawPile>(
|
||||
pile: &Pile<RP>,
|
||||
chunk_ref: &RecursiveChunkRef,
|
||||
collection: &mut BTreeSet<CondensedChunkId>,
|
||||
) -> anyhow::Result<()> {
|
||||
if chunk_ref.depth == 0 {
|
||||
collection.insert(condense_chunk_id(chunk_ref.chunk_id));
|
||||
} else {
|
||||
let shallower_chunk_ref = RecursiveChunkRef {
|
||||
chunk_id: chunk_ref.chunk_id,
|
||||
depth: chunk_ref.depth - 1,
|
||||
};
|
||||
let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
|
||||
let mut next_chunk_id: ChunkId = Default::default();
|
||||
loop {
|
||||
let read = unchunker.read(&mut next_chunk_id[..])?;
|
||||
if read == 0 {
|
||||
break;
|
||||
} else if read < next_chunk_id.len() {
|
||||
unchunker.read_exact(&mut next_chunk_id[read..])?;
|
||||
}
|
||||
collection.insert(condense_chunk_id(next_chunk_id));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_report(report: &Report) -> anyhow::Result<()> {
|
||||
let mut table = Table::new();
|
||||
table
|
||||
.load_preset(UTF8_FULL)
|
||||
.set_content_arrangement(ContentArrangement::DynamicFullWidth);
|
||||
//.set_width(100);
|
||||
table.set_header(vec![
|
||||
Cell::new("Pointer name").fg(Color::Cyan),
|
||||
Cell::new("Rollup size").fg(Color::Magenta),
|
||||
Cell::new("Unique size").fg(Color::Magenta),
|
||||
Cell::new("Moral size").fg(Color::Magenta),
|
||||
Cell::new("Total size").fg(Color::Magenta),
|
||||
]);
|
||||
|
||||
let average_chunk_size = report
|
||||
.debug_stats
|
||||
.as_ref()
|
||||
.map(|stats| stats.total_chunk_size as f64 / stats.number_of_chunks as f64);
|
||||
for (pointer_name, sizes) in &report.chunk_usage {
|
||||
table.add_row(vec![
|
||||
Cell::new(pointer_name).fg(Color::Blue),
|
||||
Cell::new(format_size(sizes.rollup, average_chunk_size)).fg(Color::Yellow),
|
||||
Cell::new(format_size(sizes.unique, average_chunk_size)).fg(Color::Yellow),
|
||||
Cell::new(format_size(sizes.moral, average_chunk_size)).fg(Color::Yellow),
|
||||
Cell::new(format_size(sizes.total, average_chunk_size)).fg(Color::Yellow),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn format_size(chunks: u32, average_chunk_size: Option<f64>) -> String {
|
||||
let est_size_suffix = average_chunk_size
|
||||
.map(|bytes_per_chunk| {
|
||||
let num_bytes = (chunks as f64 * bytes_per_chunk) as u64;
|
||||
format!(
|
||||
" ~{}",
|
||||
num_bytes
|
||||
.file_size(humansize::file_size_opts::BINARY)
|
||||
.unwrap()
|
||||
)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
format!("{} c{}", chunks, est_size_suffix)
|
||||
}
|
Loading…
Reference in New Issue