Track statistics when integrating pointers

Signed-off-by: Olivier <olivier@librepush.net>
This commit is contained in:
Olivier 'reivilibre' 2024-09-29 09:53:24 +01:00
parent 9d06016d06
commit 0873997f1e
5 changed files with 61 additions and 14 deletions

View File

@ -19,7 +19,7 @@ use tokio::task::JoinSet;
use tracing::{debug, info, info_span, Instrument, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt;
use users::{get_current_gid, get_current_uid};
use yama::pile_with_cache::PileWithCache;
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size};
use yama::storing::{
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
@ -211,11 +211,13 @@ async fn find_suitable_parent_pointers(
.next()
{
debug!("for {source_name:?}, using parent {most_recent_pointer:?}");
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc
.read_pointer_fully_integrated(&most_recent_pointer)
.read_pointer_fully_integrated(&most_recent_pointer, &mut stats)
.await
.context("failed to read parent pointer")?
.context("no parent pointer despite having just listed it")?;
debug!("when loading parent, stats = {stats:?}");
result.insert(
source_name.to_owned(),
(most_recent_pointer.clone(), pointer),

View File

@ -9,7 +9,7 @@ use std::sync::Arc;
use tracing::{info_span, warn, Instrument};
use yama::extract;
use yama::extract::flatten_treenode;
use yama::pile_with_cache::PileWithCache;
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode};
use yama_wormfile::boxed::BoxedWormFileProvider;
@ -114,8 +114,9 @@ pub async fn load_pointers_for_extraction(
) -> eyre::Result<BTreeMap<String, RootTreeNode>> {
let mut result = BTreeMap::new();
for (source_name, pointer_name) in &what_to_extract {
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc
.read_pointer_fully_integrated(&pointer_name)
.read_pointer_fully_integrated(&pointer_name, &mut stats)
.await?
.context("pointer doesn't exist??")?;
// TODO(ownership): adapt uid/gids here

View File

@ -42,6 +42,7 @@ use yama::open::{
open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache,
};
use yama::pile_connector::PileConnectionScheme;
use yama::pile_with_cache::PointerIntegrationStatistics;
use yama::scan::create_uidgid_lookup_tables;
use yama::storing::{
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
@ -416,8 +417,9 @@ async fn main() -> eyre::Result<()> {
update_cache(&pwc).await?;
let parent_pointer = if let Some(ref parent) = parent {
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc
.read_pointer_fully_integrated(parent.0.as_str())
.read_pointer_fully_integrated(parent.0.as_str(), &mut stats)
.await
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
.with_context(|| {
@ -700,8 +702,9 @@ async fn main() -> eyre::Result<()> {
);
update_cache(&pwc).await?;
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc
.read_pointer_fully_integrated(source.pointer.0.as_str())
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
.await
.context("failed to read pointer")?
.with_context(|| {
@ -762,8 +765,9 @@ async fn main() -> eyre::Result<()> {
);
update_cache(&pwc).await?;
let mut stats = PointerIntegrationStatistics::default();
let pointer = pwc
.read_pointer_fully_integrated(source.pointer.0.as_str())
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
.await
.context("failed to read pointer")?
.with_context(|| {

View File

@ -1,6 +1,6 @@
use yama_localcache::Store;
use yama_pile::pointers::Pointer;
use yama_pile::Pile;
use yama_pile::{pointers::Pointer, tree::IntegrationStats};
use yama_wormfile::WormFileProvider;
use crate::scan::integrate_uid_or_gid_map;
@ -13,14 +13,22 @@ pub struct PileWithCache<WFP: WormFileProvider> {
pub localcache: Store,
}
#[derive(Clone, Debug, Default)]
pub struct PointerIntegrationStatistics {
pub integration: IntegrationStats,
/// Number of pointers that were integrated to get here.
pub depth: u64,
}
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
pub async fn fully_integrate_pointer_in_place(
&self,
pointer: &mut Pointer,
stats: &mut PointerIntegrationStatistics,
) -> eyre::Result<()> {
if let Some(parent_pointer_name) = pointer.parent.as_ref() {
let parent_pointer = self
.read_pointer_fully_integrated(parent_pointer_name)
.read_pointer_fully_integrated(parent_pointer_name, stats)
.await
.with_context(|| {
format!("failed to read pointer {parent_pointer_name} whilst integrating")
@ -29,19 +37,27 @@ impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
})?;
stats.integration +=
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
pointer.parent = None;
stats.depth += 1;
}
Ok(())
}
#[async_recursion]
pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result<Option<Pointer>> {
pub async fn read_pointer_fully_integrated(
&self,
name: &str,
stats: &mut PointerIntegrationStatistics,
) -> eyre::Result<Option<Pointer>> {
match self.pile.read_pointer(name).await? {
Some(mut pointer) => {
self.fully_integrate_pointer_in_place(&mut pointer).await?;
self.fully_integrate_pointer_in_place(&mut pointer, stats)
.await?;
Ok(Some(pointer))
}
None => Ok(None),

View File

@ -20,6 +20,7 @@ use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use std::fs::Metadata;
use std::ops::{Add, AddAssign};
use std::os::unix::fs::MetadataExt;
use crate::definitions::RecursiveChunkRef;
@ -350,6 +351,23 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
Ok(())
}
#[derive(Clone, Debug, Default)]
pub struct IntegrationStats {
pub descends: u64,
pub inserts: u64,
pub deletes: u64,
pub nops: u64,
}
impl AddAssign for IntegrationStats {
fn add_assign(&mut self, rhs: Self) {
self.descends += rhs.descends;
self.inserts += rhs.inserts;
self.deletes += rhs.deletes;
self.nops += rhs.nops;
}
}
/// Integrates a node in place.
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
/// `PointerData` appropriately if needed to reflect this).
@ -360,7 +378,8 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
/// Preconditions:
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
/// - `old` is the parent of `new`
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> IntegrationStats {
let mut stats = IntegrationStats::default();
if let TreeNode::Directory { children, .. } = new {
if let TreeNode::Directory {
children: old_children,
@ -371,14 +390,16 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
match children.entry(name.clone()) {
Entry::Vacant(entry) => {
entry.insert(node.clone());
stats.inserts += 1;
}
Entry::Occupied(entry) => {
if entry.get() == &TreeNode::Deleted {
// We don't insert the old node but we do remove the 'deleted' marker
// node!
entry.remove();
stats.deletes += 1;
} else {
integrate_node_in_place(entry.into_mut(), node);
stats += integrate_node_in_place(entry.into_mut(), node);
}
}
}
@ -387,7 +408,10 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
} else {
// the node stays the same...
// intentional NOP!
stats.nops += 1;
}
stats
}
/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories