Track statistics when integrating pointers

Signed-off-by: Olivier <olivier@librepush.net>
This commit is contained in:
Olivier 'reivilibre' 2024-09-29 09:53:24 +01:00
parent 9d06016d06
commit 0873997f1e
5 changed files with 61 additions and 14 deletions

View File

@ -19,7 +19,7 @@ use tokio::task::JoinSet;
use tracing::{debug, info, info_span, Instrument, Span}; use tracing::{debug, info, info_span, Instrument, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt; use tracing_indicatif::span_ext::IndicatifSpanExt;
use users::{get_current_gid, get_current_uid}; use users::{get_current_gid, get_current_uid};
use yama::pile_with_cache::PileWithCache; use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size}; use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size};
use yama::storing::{ use yama::storing::{
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState, assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
@ -211,11 +211,13 @@ async fn find_suitable_parent_pointers(
.next() .next()
{ {
debug!("for {source_name:?}, using parent {most_recent_pointer:?}"); debug!("for {source_name:?}, using parent {most_recent_pointer:?}");
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc let pointer = pwc
.read_pointer_fully_integrated(&most_recent_pointer) .read_pointer_fully_integrated(&most_recent_pointer, &mut stats)
.await .await
.context("failed to read parent pointer")? .context("failed to read parent pointer")?
.context("no parent pointer despite having just listed it")?; .context("no parent pointer despite having just listed it")?;
debug!("when loading parent, stats = {stats:?}");
result.insert( result.insert(
source_name.to_owned(), source_name.to_owned(),
(most_recent_pointer.clone(), pointer), (most_recent_pointer.clone(), pointer),

View File

@ -9,7 +9,7 @@ use std::sync::Arc;
use tracing::{info_span, warn, Instrument}; use tracing::{info_span, warn, Instrument};
use yama::extract; use yama::extract;
use yama::extract::flatten_treenode; use yama::extract::flatten_treenode;
use yama::pile_with_cache::PileWithCache; use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode}; use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode};
use yama_wormfile::boxed::BoxedWormFileProvider; use yama_wormfile::boxed::BoxedWormFileProvider;
@ -114,8 +114,9 @@ pub async fn load_pointers_for_extraction(
) -> eyre::Result<BTreeMap<String, RootTreeNode>> { ) -> eyre::Result<BTreeMap<String, RootTreeNode>> {
let mut result = BTreeMap::new(); let mut result = BTreeMap::new();
for (source_name, pointer_name) in &what_to_extract { for (source_name, pointer_name) in &what_to_extract {
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc let pointer = pwc
.read_pointer_fully_integrated(&pointer_name) .read_pointer_fully_integrated(&pointer_name, &mut stats)
.await? .await?
.context("pointer doesn't exist??")?; .context("pointer doesn't exist??")?;
// TODO(ownership): adapt uid/gids here // TODO(ownership): adapt uid/gids here

View File

@ -42,6 +42,7 @@ use yama::open::{
open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache, open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache,
}; };
use yama::pile_connector::PileConnectionScheme; use yama::pile_connector::PileConnectionScheme;
use yama::pile_with_cache::PointerIntegrationStatistics;
use yama::scan::create_uidgid_lookup_tables; use yama::scan::create_uidgid_lookup_tables;
use yama::storing::{ use yama::storing::{
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState, assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
@ -416,8 +417,9 @@ async fn main() -> eyre::Result<()> {
update_cache(&pwc).await?; update_cache(&pwc).await?;
let parent_pointer = if let Some(ref parent) = parent { let parent_pointer = if let Some(ref parent) = parent {
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc let pointer = pwc
.read_pointer_fully_integrated(parent.0.as_str()) .read_pointer_fully_integrated(parent.0.as_str(), &mut stats)
.await .await
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))? .with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
.with_context(|| { .with_context(|| {
@ -700,8 +702,9 @@ async fn main() -> eyre::Result<()> {
); );
update_cache(&pwc).await?; update_cache(&pwc).await?;
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc let pointer = pwc
.read_pointer_fully_integrated(source.pointer.0.as_str()) .read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
.await .await
.context("failed to read pointer")? .context("failed to read pointer")?
.with_context(|| { .with_context(|| {
@ -762,8 +765,9 @@ async fn main() -> eyre::Result<()> {
); );
update_cache(&pwc).await?; update_cache(&pwc).await?;
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc let pointer = pwc
.read_pointer_fully_integrated(source.pointer.0.as_str()) .read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
.await .await
.context("failed to read pointer")? .context("failed to read pointer")?
.with_context(|| { .with_context(|| {

View File

@ -1,6 +1,6 @@
use yama_localcache::Store; use yama_localcache::Store;
use yama_pile::pointers::Pointer;
use yama_pile::Pile; use yama_pile::Pile;
use yama_pile::{pointers::Pointer, tree::IntegrationStats};
use yama_wormfile::WormFileProvider; use yama_wormfile::WormFileProvider;
use crate::scan::integrate_uid_or_gid_map; use crate::scan::integrate_uid_or_gid_map;
@ -13,14 +13,22 @@ pub struct PileWithCache<WFP: WormFileProvider> {
pub localcache: Store, pub localcache: Store,
} }
#[derive(Clone, Debug, Default)]
pub struct PointerIntegrationStatistics {
pub integration: IntegrationStats,
/// Number of pointers that were integrated to get here.
pub depth: u64,
}
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> { impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
pub async fn fully_integrate_pointer_in_place( pub async fn fully_integrate_pointer_in_place(
&self, &self,
pointer: &mut Pointer, pointer: &mut Pointer,
stats: &mut PointerIntegrationStatistics,
) -> eyre::Result<()> { ) -> eyre::Result<()> {
if let Some(parent_pointer_name) = pointer.parent.as_ref() { if let Some(parent_pointer_name) = pointer.parent.as_ref() {
let parent_pointer = self let parent_pointer = self
.read_pointer_fully_integrated(parent_pointer_name) .read_pointer_fully_integrated(parent_pointer_name, stats)
.await .await
.with_context(|| { .with_context(|| {
format!("failed to read pointer {parent_pointer_name} whilst integrating") format!("failed to read pointer {parent_pointer_name} whilst integrating")
@ -29,19 +37,27 @@ impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
format!("whilst integrating, expected pointer {parent_pointer_name} to exist") format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
})?; })?;
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node); stats.integration +=
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids); integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids); integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
pointer.parent = None; pointer.parent = None;
stats.depth += 1;
} }
Ok(()) Ok(())
} }
#[async_recursion] #[async_recursion]
pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result<Option<Pointer>> { pub async fn read_pointer_fully_integrated(
&self,
name: &str,
stats: &mut PointerIntegrationStatistics,
) -> eyre::Result<Option<Pointer>> {
match self.pile.read_pointer(name).await? { match self.pile.read_pointer(name).await? {
Some(mut pointer) => { Some(mut pointer) => {
self.fully_integrate_pointer_in_place(&mut pointer).await?; self.fully_integrate_pointer_in_place(&mut pointer, stats)
.await?;
Ok(Some(pointer)) Ok(Some(pointer))
} }
None => Ok(None), None => Ok(None),

View File

@ -20,6 +20,7 @@ use std::collections::btree_map::Entry;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::fs::Metadata; use std::fs::Metadata;
use std::ops::{Add, AddAssign};
use std::os::unix::fs::MetadataExt; use std::os::unix::fs::MetadataExt;
use crate::definitions::RecursiveChunkRef; use crate::definitions::RecursiveChunkRef;
@ -350,6 +351,23 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
Ok(()) Ok(())
} }
#[derive(Clone, Debug, Default)]
pub struct IntegrationStats {
pub descends: u64,
pub inserts: u64,
pub deletes: u64,
pub nops: u64,
}
impl AddAssign for IntegrationStats {
fn add_assign(&mut self, rhs: Self) {
self.descends += rhs.descends;
self.inserts += rhs.inserts;
self.deletes += rhs.deletes;
self.nops += rhs.nops;
}
}
/// Integrates a node in place. /// Integrates a node in place.
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating /// This makes `new` no longer have a parent (remember, the caller is responsible for updating
/// `PointerData` appropriately if needed to reflect this). /// `PointerData` appropriately if needed to reflect this).
@ -360,7 +378,8 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
/// Preconditions: /// Preconditions:
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.) /// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
/// - `old` is the parent of `new` /// - `old` is the parent of `new`
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) { pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> IntegrationStats {
let mut stats = IntegrationStats::default();
if let TreeNode::Directory { children, .. } = new { if let TreeNode::Directory { children, .. } = new {
if let TreeNode::Directory { if let TreeNode::Directory {
children: old_children, children: old_children,
@ -371,14 +390,16 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
match children.entry(name.clone()) { match children.entry(name.clone()) {
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
entry.insert(node.clone()); entry.insert(node.clone());
stats.inserts += 1;
} }
Entry::Occupied(entry) => { Entry::Occupied(entry) => {
if entry.get() == &TreeNode::Deleted { if entry.get() == &TreeNode::Deleted {
// We don't insert the old node but we do remove the 'deleted' marker // We don't insert the old node but we do remove the 'deleted' marker
// node! // node!
entry.remove(); entry.remove();
stats.deletes += 1;
} else { } else {
integrate_node_in_place(entry.into_mut(), node); stats += integrate_node_in_place(entry.into_mut(), node);
} }
} }
} }
@ -387,7 +408,10 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
} else { } else {
// the node stays the same... // the node stays the same...
// intentional NOP! // intentional NOP!
stats.nops += 1;
} }
stats
} }
/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories /// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories