Add a command to show a report of the Datman system
This commit is contained in:
		
							parent
							
								
									438af9164e
								
							
						
					
					
						commit
						948ca3f2b5
					
				
							
								
								
									
										95
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										95
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @ -233,7 +233,7 @@ version = "3.1.18" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" | ||||
| dependencies = [ | ||||
|  "heck", | ||||
|  "heck 0.4.0", | ||||
|  "proc-macro-error", | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
| @ -249,6 +249,18 @@ dependencies = [ | ||||
|  "os_str_bytes", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "comfy-table" | ||||
| version = "6.0.0-rc.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "7a1a275e66c69adb0600a13650aed718c99337d9a185d353efa13ff1e05576c4" | ||||
| dependencies = [ | ||||
|  "crossterm", | ||||
|  "strum", | ||||
|  "strum_macros", | ||||
|  "unicode-width", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "console" | ||||
| version = "0.15.0" | ||||
| @ -340,6 +352,31 @@ dependencies = [ | ||||
|  "lazy_static", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "crossterm" | ||||
| version = "0.23.2" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" | ||||
| dependencies = [ | ||||
|  "bitflags", | ||||
|  "crossterm_winapi", | ||||
|  "libc", | ||||
|  "mio", | ||||
|  "parking_lot", | ||||
|  "signal-hook", | ||||
|  "signal-hook-mio", | ||||
|  "winapi", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "crossterm_winapi" | ||||
| version = "0.9.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" | ||||
| dependencies = [ | ||||
|  "winapi", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "crunchy" | ||||
| version = "0.2.2" | ||||
| @ -378,6 +415,7 @@ dependencies = [ | ||||
|  "byteorder", | ||||
|  "chrono", | ||||
|  "clap", | ||||
|  "comfy-table", | ||||
|  "crossbeam-channel", | ||||
|  "env_logger", | ||||
|  "glob", | ||||
| @ -578,6 +616,15 @@ dependencies = [ | ||||
|  "num-traits", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "heck" | ||||
| version = "0.3.3" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" | ||||
| dependencies = [ | ||||
|  "unicode-segmentation", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "heck" | ||||
| version = "0.4.0" | ||||
| @ -1096,6 +1143,12 @@ dependencies = [ | ||||
|  "smallvec", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "rustversion" | ||||
| version = "1.0.6" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "rustyline" | ||||
| version = "7.1.0" | ||||
| @ -1187,6 +1240,27 @@ dependencies = [ | ||||
|  "serde", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "signal-hook" | ||||
| version = "0.3.14" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" | ||||
| dependencies = [ | ||||
|  "libc", | ||||
|  "signal-hook-registry", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "signal-hook-mio" | ||||
| version = "0.2.3" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" | ||||
| dependencies = [ | ||||
|  "libc", | ||||
|  "mio", | ||||
|  "signal-hook", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "signal-hook-registry" | ||||
| version = "1.4.0" | ||||
| @ -1242,6 +1316,25 @@ version = "0.10.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "strum" | ||||
| version = "0.23.0" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" | ||||
| 
 | ||||
| [[package]] | ||||
| name = "strum_macros" | ||||
| version = "0.23.1" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" | ||||
| dependencies = [ | ||||
|  "heck 0.3.3", | ||||
|  "proc-macro2", | ||||
|  "quote", | ||||
|  "rustversion", | ||||
|  "syn", | ||||
| ] | ||||
| 
 | ||||
| [[package]] | ||||
| name = "syn" | ||||
| version = "1.0.95" | ||||
|  | ||||
| @ -33,3 +33,4 @@ hostname = "0.3.1" | ||||
| yama = { path = "../yama", version = "0.6.0-alpha.1" } | ||||
| metrics = "0.17.1" | ||||
| bare-metrics-recorder = { version = "0.1.0" } | ||||
| comfy-table = "6.0.0-rc.1" | ||||
| @ -111,6 +111,11 @@ pub enum DatmanCommand { | ||||
|         skip_metadata: bool, | ||||
|     }, | ||||
| 
 | ||||
|     Report { | ||||
|         /// Name of the pile to report on.
 | ||||
|         pile_name: String, | ||||
|     }, | ||||
| 
 | ||||
|     #[clap(name = "_backup_source_responder")] | ||||
|     InternalBackupSourceResponder, | ||||
| } | ||||
| @ -307,6 +312,15 @@ fn main() -> anyhow::Result<()> { | ||||
|             info!("Datman responder at {:?}", std::env::current_exe()?); | ||||
|             backup_source_responder::handler_stdio()?; | ||||
|         } | ||||
| 
 | ||||
|         DatmanCommand::Report { pile_name } => { | ||||
|             let descriptor = load_descriptor(Path::new(".")).unwrap(); | ||||
|             let destination = &descriptor.piles[&pile_name]; | ||||
|             let report = datman::commands::report::generate_report(destination, &descriptor)?; | ||||
|             // TODO Display report
 | ||||
|             // TODO E-mail report (Can just pipe through aha and then apprise though!)
 | ||||
|             datman::commands::report::print_report(&report)?; | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| @ -26,6 +26,7 @@ pub mod backup; | ||||
| pub mod extract; | ||||
| pub mod ibrowse; | ||||
| pub mod ilabel; | ||||
| pub mod report; | ||||
| 
 | ||||
| pub fn init_descriptor(path: &Path) -> anyhow::Result<()> { | ||||
|     std::fs::create_dir_all(path)?; | ||||
|  | ||||
| @ -21,7 +21,7 @@ use crate::labelling::{label_node, load_labelling_rules, str_to_label, Label, St | ||||
| use crate::tree::{scan, FileTree, FileTree1}; | ||||
| use anyhow::{anyhow, bail}; | ||||
| use arc_interner::ArcIntern; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; | ||||
| use log::{info, warn}; | ||||
| use std::collections::{HashMap, HashSet}; | ||||
| use std::fmt::Debug; | ||||
| @ -49,6 +49,13 @@ pub fn get_pointer_name_at(source_name: &str, datetime: DateTime<Utc>) -> String | ||||
|     ) | ||||
| } | ||||
| 
 | ||||
| pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime<Utc>)> { | ||||
|     let (source_name, date_time_str) = pointer_name.rsplit_once("+")?; | ||||
|     let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?; | ||||
|     let date_time = Utc.from_utc_datetime(&date_time); | ||||
|     Some((source_name.to_owned(), date_time)) | ||||
| } | ||||
| 
 | ||||
| pub fn open_stdout_backup_process( | ||||
|     extra_args: &HashMap<String, toml::Value>, | ||||
|     program_name: &str, | ||||
|  | ||||
							
								
								
									
										260
									
								
								datman/src/commands/report.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										260
									
								
								datman/src/commands/report.rs
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,260 @@ | ||||
| use crate::commands::backup::split_pointer_name; | ||||
| use crate::descriptor::{Descriptor, DestPileDescriptor}; | ||||
| use anyhow::Context; | ||||
| use chrono::{DateTime, Utc}; | ||||
| use comfy_table::presets::UTF8_FULL; | ||||
| use comfy_table::{Cell, Color, ContentArrangement, Table}; | ||||
| use humansize::FileSize; | ||||
| use log::info; | ||||
| use std::collections::{BTreeMap, BTreeSet}; | ||||
| use std::io::Read; | ||||
| use std::mem::size_of; | ||||
| use yama::chunking::RecursiveUnchunker; | ||||
| use yama::commands::{load_pile_descriptor, open_pile, retrieve_tree_node}; | ||||
| use yama::definitions::{ChunkId, RecursiveChunkRef, TreeNode}; | ||||
| use yama::pile::{DebugStatistics, Pile, RawPile}; | ||||
| 
 | ||||
| // This module generates reports for a Datman system.
 | ||||
| // Referenced Chunk IDs are counted and used to give an indication of size.
 | ||||
| // Chunk IDs are summarised into u32s to reduce memory usage. Since the report is approximate,
 | ||||
| // it doesn't matter if there are a few collisions (although they are still fairly unlikely to
 | ||||
| // affect much).
 | ||||
| 
 | ||||
| #[derive(Clone)] | ||||
| pub struct Report { | ||||
|     pub last_source_backups: BTreeMap<String, Option<DateTime<Utc>>>, | ||||
| 
 | ||||
|     pub chunk_usage: BTreeMap<String, Sizes>, | ||||
| 
 | ||||
|     pub debug_stats: Option<DebugStatistics>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Default)] | ||||
| pub struct Sizes { | ||||
|     /// Total number of chunks that we refer to.
 | ||||
|     pub total: u32, | ||||
| 
 | ||||
|     /// Each referred chunk is counted once here, but divided by the number of sharers.
 | ||||
|     /// We are 'morally responsible' for this many chunks.
 | ||||
|     pub moral: u32, | ||||
| 
 | ||||
|     /// Number of chunks that only we point to.
 | ||||
|     pub unique: u32, | ||||
| 
 | ||||
|     /// Number of chunks for which we are the oldest (lexicographically earliest) pointer to point
 | ||||
|     /// to those chunks.
 | ||||
|     pub rollup: u32, | ||||
| } | ||||
| 
 | ||||
| type CondensedChunkId = u32; | ||||
| 
 | ||||
| fn condense_chunk_id(chunk_id: ChunkId) -> CondensedChunkId { | ||||
|     CondensedChunkId::from_be_bytes( | ||||
|         chunk_id[0..size_of::<CondensedChunkId>()] | ||||
|             .try_into() | ||||
|             .unwrap(), | ||||
|     ) | ||||
| } | ||||
| 
 | ||||
| pub fn generate_report( | ||||
|     dest_pile_descriptor: &DestPileDescriptor, | ||||
|     descriptor: &Descriptor, | ||||
| ) -> anyhow::Result<Report> { | ||||
|     let pile_descriptor = load_pile_descriptor(&dest_pile_descriptor.path)?; | ||||
|     let pile = open_pile(&dest_pile_descriptor.path, &pile_descriptor)?; | ||||
| 
 | ||||
|     let debug_stats = pile.raw_pile.debug_statistics()?; | ||||
| 
 | ||||
|     let mut pointers_to_parent_and_chunkids = BTreeMap::new(); | ||||
| 
 | ||||
|     info!("Collecting chunk IDs... This will probably be slow."); | ||||
|     for pointer_name in pile.list_pointers()? { | ||||
|         let pointer = pile | ||||
|             .read_pointer(&pointer_name)? | ||||
|             .context("listed pointer doesn't exist")?; | ||||
|         let root_node = retrieve_tree_node(&pile, pointer.chunk_ref)?; | ||||
|         let pointer_chunk_ids = collect_chunk_ids(&pile, &root_node.node)?; | ||||
|         pointers_to_parent_and_chunkids | ||||
|             .insert(pointer_name, (pointer.parent_pointer, pointer_chunk_ids)); | ||||
|     } | ||||
| 
 | ||||
|     // Now we iterate in reverse order, making a list of count of Chunk IDs.
 | ||||
|     // At the same time, we can also calculate 'rollup' sizes.
 | ||||
|     let mut chunk_sharer_counts: BTreeMap<CondensedChunkId, u16> = BTreeMap::new(); | ||||
| 
 | ||||
|     let mut pointer_stats: BTreeMap<String, Sizes> = BTreeMap::new(); | ||||
| 
 | ||||
|     for pointer_name in pointers_to_parent_and_chunkids.keys().rev() { | ||||
|         let deduped_chunks: BTreeSet<CondensedChunkId> = | ||||
|             iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name) | ||||
|                 .collect(); | ||||
|         let mut rollup_count = 0; | ||||
|         for chunk in deduped_chunks { | ||||
|             let count = chunk_sharer_counts.entry(chunk).or_default(); | ||||
|             *count += 1; | ||||
|             if *count == 1 { | ||||
|                 rollup_count += 1; | ||||
|             } | ||||
|         } | ||||
|         let entry = pointer_stats.entry(pointer_name.to_owned()).or_default(); | ||||
|         entry.rollup = rollup_count; | ||||
|     } | ||||
| 
 | ||||
|     // Now go through again and update all the stats!
 | ||||
|     for pointer_name in pointers_to_parent_and_chunkids.keys().rev() { | ||||
|         let deduped_chunks: BTreeSet<CondensedChunkId> = | ||||
|             iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name) | ||||
|                 .collect(); | ||||
|         let mut unique_count = 0; | ||||
|         let mut shared_count_by_sharers = [0u32; 256]; | ||||
|         let total_count = deduped_chunks.len(); | ||||
|         for chunk in deduped_chunks { | ||||
|             let count = chunk_sharer_counts[&chunk]; | ||||
|             if count == 1 { | ||||
|                 unique_count += 1; | ||||
|             } else { | ||||
|                 let num_sharers = (count as usize).min(256); | ||||
|                 shared_count_by_sharers[num_sharers - 1] += 1; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         let mut sharers_sum: f64 = 0.0; | ||||
|         for (sharers_minus_one, count) in shared_count_by_sharers.into_iter().enumerate() { | ||||
|             sharers_sum += (count as f64) / (sharers_minus_one + 1) as f64; | ||||
|         } | ||||
| 
 | ||||
|         let entry = pointer_stats.entry(pointer_name.to_owned()).or_default(); | ||||
|         entry.moral = (sharers_sum.ceil() as u32) + unique_count; | ||||
|         entry.unique = unique_count; | ||||
|         entry.total = total_count as u32; | ||||
|     } | ||||
| 
 | ||||
|     let mut last_backed_up = BTreeMap::new(); | ||||
|     for source_name in descriptor.sources.keys().cloned() { | ||||
|         last_backed_up.insert(source_name, None); | ||||
|     } | ||||
| 
 | ||||
|     for pointer_name in pointers_to_parent_and_chunkids.keys() { | ||||
|         if let Some((source_name, date_time)) = split_pointer_name(&pointer_name) { | ||||
|             last_backed_up.insert(source_name, Some(date_time)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     Ok(Report { | ||||
|         last_source_backups: last_backed_up, | ||||
|         chunk_usage: pointer_stats, | ||||
|         debug_stats, | ||||
|     }) | ||||
| } | ||||
| 
 | ||||
| // Does not filter duplicates...
 | ||||
| fn iter_over_all_chunkids_incl_parents<'a>( | ||||
|     pointers_to_parent_and_chunkids: &'a BTreeMap< | ||||
|         String, | ||||
|         (Option<String>, BTreeSet<CondensedChunkId>), | ||||
|     >, | ||||
|     pointer_name: &'a str, | ||||
| ) -> Box<dyn Iterator<Item = CondensedChunkId> + 'a> { | ||||
|     let (parent, chunks) = &pointers_to_parent_and_chunkids[pointer_name]; | ||||
|     match parent { | ||||
|         None => Box::new(chunks.iter().copied()), | ||||
|         Some(parent) => Box::new(chunks.iter().copied().chain( | ||||
|             iter_over_all_chunkids_incl_parents(pointers_to_parent_and_chunkids, &parent), | ||||
|         )), | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn collect_chunk_ids<RP: RawPile>( | ||||
|     pile: &Pile<RP>, | ||||
|     root: &TreeNode, | ||||
| ) -> anyhow::Result<BTreeSet<CondensedChunkId>> { | ||||
|     let mut chunk_ids = BTreeSet::new(); | ||||
|     root.visit( | ||||
|         &mut |tree_node, _| { | ||||
|             match tree_node { | ||||
|                 TreeNode::NormalFile { content, .. } => { | ||||
|                     collect_chunk_ids_from_chunkref(pile, content, &mut chunk_ids)?; | ||||
|                 } | ||||
|                 _ => {} | ||||
|             } | ||||
|             Ok(()) | ||||
|         }, | ||||
|         "".to_owned(), | ||||
|     )?; | ||||
|     Ok(chunk_ids) | ||||
| } | ||||
| 
 | ||||
| fn collect_chunk_ids_from_chunkref<RP: RawPile>( | ||||
|     pile: &Pile<RP>, | ||||
|     chunk_ref: &RecursiveChunkRef, | ||||
|     collection: &mut BTreeSet<CondensedChunkId>, | ||||
| ) -> anyhow::Result<()> { | ||||
|     if chunk_ref.depth == 0 { | ||||
|         collection.insert(condense_chunk_id(chunk_ref.chunk_id)); | ||||
|     } else { | ||||
|         let shallower_chunk_ref = RecursiveChunkRef { | ||||
|             chunk_id: chunk_ref.chunk_id, | ||||
|             depth: chunk_ref.depth - 1, | ||||
|         }; | ||||
|         let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref); | ||||
|         let mut next_chunk_id: ChunkId = Default::default(); | ||||
|         loop { | ||||
|             let read = unchunker.read(&mut next_chunk_id[..])?; | ||||
|             if read == 0 { | ||||
|                 break; | ||||
|             } else if read < next_chunk_id.len() { | ||||
|                 unchunker.read_exact(&mut next_chunk_id[read..])?; | ||||
|             } | ||||
|             collection.insert(condense_chunk_id(next_chunk_id)); | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| pub fn print_report(report: &Report) -> anyhow::Result<()> { | ||||
|     let mut table = Table::new(); | ||||
|     table | ||||
|         .load_preset(UTF8_FULL) | ||||
|         .set_content_arrangement(ContentArrangement::DynamicFullWidth); | ||||
|     //.set_width(100);
 | ||||
|     table.set_header(vec![ | ||||
|         Cell::new("Pointer name").fg(Color::Cyan), | ||||
|         Cell::new("Rollup size").fg(Color::Magenta), | ||||
|         Cell::new("Unique size").fg(Color::Magenta), | ||||
|         Cell::new("Moral size").fg(Color::Magenta), | ||||
|         Cell::new("Total size").fg(Color::Magenta), | ||||
|     ]); | ||||
| 
 | ||||
|     let average_chunk_size = report | ||||
|         .debug_stats | ||||
|         .as_ref() | ||||
|         .map(|stats| stats.total_chunk_size as f64 / stats.number_of_chunks as f64); | ||||
|     for (pointer_name, sizes) in &report.chunk_usage { | ||||
|         table.add_row(vec![ | ||||
|             Cell::new(pointer_name).fg(Color::Blue), | ||||
|             Cell::new(format_size(sizes.rollup, average_chunk_size)).fg(Color::Yellow), | ||||
|             Cell::new(format_size(sizes.unique, average_chunk_size)).fg(Color::Yellow), | ||||
|             Cell::new(format_size(sizes.moral, average_chunk_size)).fg(Color::Yellow), | ||||
|             Cell::new(format_size(sizes.total, average_chunk_size)).fg(Color::Yellow), | ||||
|         ]); | ||||
|     } | ||||
| 
 | ||||
|     println!("{table}"); | ||||
| 
 | ||||
|     Ok(()) | ||||
| } | ||||
| 
 | ||||
| fn format_size(chunks: u32, average_chunk_size: Option<f64>) -> String { | ||||
|     let est_size_suffix = average_chunk_size | ||||
|         .map(|bytes_per_chunk| { | ||||
|             let num_bytes = (chunks as f64 * bytes_per_chunk) as u64; | ||||
|             format!( | ||||
|                 " ~{}", | ||||
|                 num_bytes | ||||
|                     .file_size(humansize::file_size_opts::BINARY) | ||||
|                     .unwrap() | ||||
|             ) | ||||
|         }) | ||||
|         .unwrap_or_default(); | ||||
|     format!("{} c{}", chunks, est_size_suffix) | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user