Track statistics when integrating pointers
Signed-off-by: Olivier <olivier@librepush.net>
This commit is contained in:
parent
9d06016d06
commit
0873997f1e
|
@ -19,7 +19,7 @@ use tokio::task::JoinSet;
|
|||
use tracing::{debug, info, info_span, Instrument, Span};
|
||||
use tracing_indicatif::span_ext::IndicatifSpanExt;
|
||||
use users::{get_current_gid, get_current_uid};
|
||||
use yama::pile_with_cache::PileWithCache;
|
||||
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
|
||||
use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size};
|
||||
use yama::storing::{
|
||||
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
||||
|
@ -211,11 +211,13 @@ async fn find_suitable_parent_pointers(
|
|||
.next()
|
||||
{
|
||||
debug!("for {source_name:?}, using parent {most_recent_pointer:?}");
|
||||
let mut stats = PointerIntegrationStatistics::default();
|
||||
let pointer = pwc
|
||||
.read_pointer_fully_integrated(&most_recent_pointer)
|
||||
.read_pointer_fully_integrated(&most_recent_pointer, &mut stats)
|
||||
.await
|
||||
.context("failed to read parent pointer")?
|
||||
.context("no parent pointer despite having just listed it")?;
|
||||
debug!("when loading parent, stats = {stats:?}");
|
||||
result.insert(
|
||||
source_name.to_owned(),
|
||||
(most_recent_pointer.clone(), pointer),
|
||||
|
|
|
@ -9,7 +9,7 @@ use std::sync::Arc;
|
|||
use tracing::{info_span, warn, Instrument};
|
||||
use yama::extract;
|
||||
use yama::extract::flatten_treenode;
|
||||
use yama::pile_with_cache::PileWithCache;
|
||||
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
|
||||
use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode};
|
||||
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||
|
||||
|
@ -114,8 +114,9 @@ pub async fn load_pointers_for_extraction(
|
|||
) -> eyre::Result<BTreeMap<String, RootTreeNode>> {
|
||||
let mut result = BTreeMap::new();
|
||||
for (source_name, pointer_name) in &what_to_extract {
|
||||
let mut stats = PointerIntegrationStatistics::default();
|
||||
let pointer = pwc
|
||||
.read_pointer_fully_integrated(&pointer_name)
|
||||
.read_pointer_fully_integrated(&pointer_name, &mut stats)
|
||||
.await?
|
||||
.context("pointer doesn't exist??")?;
|
||||
// TODO(ownership): adapt uid/gids here
|
||||
|
|
|
@ -42,6 +42,7 @@ use yama::open::{
|
|||
open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache,
|
||||
};
|
||||
use yama::pile_connector::PileConnectionScheme;
|
||||
use yama::pile_with_cache::PointerIntegrationStatistics;
|
||||
use yama::scan::create_uidgid_lookup_tables;
|
||||
use yama::storing::{
|
||||
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
||||
|
@ -416,8 +417,9 @@ async fn main() -> eyre::Result<()> {
|
|||
update_cache(&pwc).await?;
|
||||
|
||||
let parent_pointer = if let Some(ref parent) = parent {
|
||||
let mut stats = PointerIntegrationStatistics::default();
|
||||
let pointer = pwc
|
||||
.read_pointer_fully_integrated(parent.0.as_str())
|
||||
.read_pointer_fully_integrated(parent.0.as_str(), &mut stats)
|
||||
.await
|
||||
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
|
||||
.with_context(|| {
|
||||
|
@ -700,8 +702,9 @@ async fn main() -> eyre::Result<()> {
|
|||
);
|
||||
update_cache(&pwc).await?;
|
||||
|
||||
let mut stats = PointerIntegrationStatistics::default();
|
||||
let pointer = pwc
|
||||
.read_pointer_fully_integrated(source.pointer.0.as_str())
|
||||
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
|
||||
.await
|
||||
.context("failed to read pointer")?
|
||||
.with_context(|| {
|
||||
|
@ -762,8 +765,9 @@ async fn main() -> eyre::Result<()> {
|
|||
);
|
||||
update_cache(&pwc).await?;
|
||||
|
||||
let mut stats = PointerIntegrationStatistics::default();
|
||||
let pointer = pwc
|
||||
.read_pointer_fully_integrated(source.pointer.0.as_str())
|
||||
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
|
||||
.await
|
||||
.context("failed to read pointer")?
|
||||
.with_context(|| {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use yama_localcache::Store;
|
||||
use yama_pile::pointers::Pointer;
|
||||
use yama_pile::Pile;
|
||||
use yama_pile::{pointers::Pointer, tree::IntegrationStats};
|
||||
use yama_wormfile::WormFileProvider;
|
||||
|
||||
use crate::scan::integrate_uid_or_gid_map;
|
||||
|
@ -13,14 +13,22 @@ pub struct PileWithCache<WFP: WormFileProvider> {
|
|||
pub localcache: Store,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct PointerIntegrationStatistics {
|
||||
pub integration: IntegrationStats,
|
||||
/// Number of pointers that were integrated to get here.
|
||||
pub depth: u64,
|
||||
}
|
||||
|
||||
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
||||
pub async fn fully_integrate_pointer_in_place(
|
||||
&self,
|
||||
pointer: &mut Pointer,
|
||||
stats: &mut PointerIntegrationStatistics,
|
||||
) -> eyre::Result<()> {
|
||||
if let Some(parent_pointer_name) = pointer.parent.as_ref() {
|
||||
let parent_pointer = self
|
||||
.read_pointer_fully_integrated(parent_pointer_name)
|
||||
.read_pointer_fully_integrated(parent_pointer_name, stats)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("failed to read pointer {parent_pointer_name} whilst integrating")
|
||||
|
@ -29,19 +37,27 @@ impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
|||
format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
|
||||
})?;
|
||||
|
||||
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
|
||||
stats.integration +=
|
||||
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
|
||||
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
|
||||
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
|
||||
pointer.parent = None;
|
||||
|
||||
stats.depth += 1;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result<Option<Pointer>> {
|
||||
pub async fn read_pointer_fully_integrated(
|
||||
&self,
|
||||
name: &str,
|
||||
stats: &mut PointerIntegrationStatistics,
|
||||
) -> eyre::Result<Option<Pointer>> {
|
||||
match self.pile.read_pointer(name).await? {
|
||||
Some(mut pointer) => {
|
||||
self.fully_integrate_pointer_in_place(&mut pointer).await?;
|
||||
self.fully_integrate_pointer_in_place(&mut pointer, stats)
|
||||
.await?;
|
||||
Ok(Some(pointer))
|
||||
}
|
||||
None => Ok(None),
|
||||
|
|
|
@ -20,6 +20,7 @@ use std::collections::btree_map::Entry;
|
|||
use std::collections::BTreeMap;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::fs::Metadata;
|
||||
use std::ops::{Add, AddAssign};
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
|
||||
use crate::definitions::RecursiveChunkRef;
|
||||
|
@ -350,6 +351,23 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct IntegrationStats {
|
||||
pub descends: u64,
|
||||
pub inserts: u64,
|
||||
pub deletes: u64,
|
||||
pub nops: u64,
|
||||
}
|
||||
|
||||
impl AddAssign for IntegrationStats {
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
self.descends += rhs.descends;
|
||||
self.inserts += rhs.inserts;
|
||||
self.deletes += rhs.deletes;
|
||||
self.nops += rhs.nops;
|
||||
}
|
||||
}
|
||||
|
||||
/// Integrates a node in place.
|
||||
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
|
||||
/// `PointerData` appropriately if needed to reflect this).
|
||||
|
@ -360,7 +378,8 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
|
|||
/// Preconditions:
|
||||
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
|
||||
/// - `old` is the parent of `new`
|
||||
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
||||
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> IntegrationStats {
|
||||
let mut stats = IntegrationStats::default();
|
||||
if let TreeNode::Directory { children, .. } = new {
|
||||
if let TreeNode::Directory {
|
||||
children: old_children,
|
||||
|
@ -371,14 +390,16 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
|||
match children.entry(name.clone()) {
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(node.clone());
|
||||
stats.inserts += 1;
|
||||
}
|
||||
Entry::Occupied(entry) => {
|
||||
if entry.get() == &TreeNode::Deleted {
|
||||
// We don't insert the old node but we do remove the 'deleted' marker
|
||||
// node!
|
||||
entry.remove();
|
||||
stats.deletes += 1;
|
||||
} else {
|
||||
integrate_node_in_place(entry.into_mut(), node);
|
||||
stats += integrate_node_in_place(entry.into_mut(), node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -387,7 +408,10 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
|||
} else {
|
||||
// the node stays the same...
|
||||
// intentional NOP!
|
||||
stats.nops += 1;
|
||||
}
|
||||
|
||||
stats
|
||||
}
|
||||
|
||||
/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories
|
||||
|
|
Loading…
Reference in New Issue