Track statistics when integrating pointers
Signed-off-by: Olivier <olivier@librepush.net>
This commit is contained in:
parent
9d06016d06
commit
0873997f1e
|
@ -19,7 +19,7 @@ use tokio::task::JoinSet;
|
||||||
use tracing::{debug, info, info_span, Instrument, Span};
|
use tracing::{debug, info, info_span, Instrument, Span};
|
||||||
use tracing_indicatif::span_ext::IndicatifSpanExt;
|
use tracing_indicatif::span_ext::IndicatifSpanExt;
|
||||||
use users::{get_current_gid, get_current_uid};
|
use users::{get_current_gid, get_current_uid};
|
||||||
use yama::pile_with_cache::PileWithCache;
|
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
|
||||||
use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size};
|
use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size};
|
||||||
use yama::storing::{
|
use yama::storing::{
|
||||||
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
||||||
|
@ -211,11 +211,13 @@ async fn find_suitable_parent_pointers(
|
||||||
.next()
|
.next()
|
||||||
{
|
{
|
||||||
debug!("for {source_name:?}, using parent {most_recent_pointer:?}");
|
debug!("for {source_name:?}, using parent {most_recent_pointer:?}");
|
||||||
|
let mut stats = PointerIntegrationStatistics::default();
|
||||||
let pointer = pwc
|
let pointer = pwc
|
||||||
.read_pointer_fully_integrated(&most_recent_pointer)
|
.read_pointer_fully_integrated(&most_recent_pointer, &mut stats)
|
||||||
.await
|
.await
|
||||||
.context("failed to read parent pointer")?
|
.context("failed to read parent pointer")?
|
||||||
.context("no parent pointer despite having just listed it")?;
|
.context("no parent pointer despite having just listed it")?;
|
||||||
|
debug!("when loading parent, stats = {stats:?}");
|
||||||
result.insert(
|
result.insert(
|
||||||
source_name.to_owned(),
|
source_name.to_owned(),
|
||||||
(most_recent_pointer.clone(), pointer),
|
(most_recent_pointer.clone(), pointer),
|
||||||
|
|
|
@ -9,7 +9,7 @@ use std::sync::Arc;
|
||||||
use tracing::{info_span, warn, Instrument};
|
use tracing::{info_span, warn, Instrument};
|
||||||
use yama::extract;
|
use yama::extract;
|
||||||
use yama::extract::flatten_treenode;
|
use yama::extract::flatten_treenode;
|
||||||
use yama::pile_with_cache::PileWithCache;
|
use yama::pile_with_cache::{PileWithCache, PointerIntegrationStatistics};
|
||||||
use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode};
|
use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode};
|
||||||
use yama_wormfile::boxed::BoxedWormFileProvider;
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
|
||||||
|
@ -114,8 +114,9 @@ pub async fn load_pointers_for_extraction(
|
||||||
) -> eyre::Result<BTreeMap<String, RootTreeNode>> {
|
) -> eyre::Result<BTreeMap<String, RootTreeNode>> {
|
||||||
let mut result = BTreeMap::new();
|
let mut result = BTreeMap::new();
|
||||||
for (source_name, pointer_name) in &what_to_extract {
|
for (source_name, pointer_name) in &what_to_extract {
|
||||||
|
let mut stats = PointerIntegrationStatistics::default();
|
||||||
let pointer = pwc
|
let pointer = pwc
|
||||||
.read_pointer_fully_integrated(&pointer_name)
|
.read_pointer_fully_integrated(&pointer_name, &mut stats)
|
||||||
.await?
|
.await?
|
||||||
.context("pointer doesn't exist??")?;
|
.context("pointer doesn't exist??")?;
|
||||||
// TODO(ownership): adapt uid/gids here
|
// TODO(ownership): adapt uid/gids here
|
||||||
|
|
|
@ -42,6 +42,7 @@ use yama::open::{
|
||||||
open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache,
|
open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache,
|
||||||
};
|
};
|
||||||
use yama::pile_connector::PileConnectionScheme;
|
use yama::pile_connector::PileConnectionScheme;
|
||||||
|
use yama::pile_with_cache::PointerIntegrationStatistics;
|
||||||
use yama::scan::create_uidgid_lookup_tables;
|
use yama::scan::create_uidgid_lookup_tables;
|
||||||
use yama::storing::{
|
use yama::storing::{
|
||||||
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState,
|
||||||
|
@ -416,8 +417,9 @@ async fn main() -> eyre::Result<()> {
|
||||||
update_cache(&pwc).await?;
|
update_cache(&pwc).await?;
|
||||||
|
|
||||||
let parent_pointer = if let Some(ref parent) = parent {
|
let parent_pointer = if let Some(ref parent) = parent {
|
||||||
|
let mut stats = PointerIntegrationStatistics::default();
|
||||||
let pointer = pwc
|
let pointer = pwc
|
||||||
.read_pointer_fully_integrated(parent.0.as_str())
|
.read_pointer_fully_integrated(parent.0.as_str(), &mut stats)
|
||||||
.await
|
.await
|
||||||
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
|
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
|
||||||
.with_context(|| {
|
.with_context(|| {
|
||||||
|
@ -700,8 +702,9 @@ async fn main() -> eyre::Result<()> {
|
||||||
);
|
);
|
||||||
update_cache(&pwc).await?;
|
update_cache(&pwc).await?;
|
||||||
|
|
||||||
|
let mut stats = PointerIntegrationStatistics::default();
|
||||||
let pointer = pwc
|
let pointer = pwc
|
||||||
.read_pointer_fully_integrated(source.pointer.0.as_str())
|
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
|
||||||
.await
|
.await
|
||||||
.context("failed to read pointer")?
|
.context("failed to read pointer")?
|
||||||
.with_context(|| {
|
.with_context(|| {
|
||||||
|
@ -762,8 +765,9 @@ async fn main() -> eyre::Result<()> {
|
||||||
);
|
);
|
||||||
update_cache(&pwc).await?;
|
update_cache(&pwc).await?;
|
||||||
|
|
||||||
|
let mut stats = PointerIntegrationStatistics::default();
|
||||||
let pointer = pwc
|
let pointer = pwc
|
||||||
.read_pointer_fully_integrated(source.pointer.0.as_str())
|
.read_pointer_fully_integrated(source.pointer.0.as_str(), &mut stats)
|
||||||
.await
|
.await
|
||||||
.context("failed to read pointer")?
|
.context("failed to read pointer")?
|
||||||
.with_context(|| {
|
.with_context(|| {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use yama_localcache::Store;
|
use yama_localcache::Store;
|
||||||
use yama_pile::pointers::Pointer;
|
|
||||||
use yama_pile::Pile;
|
use yama_pile::Pile;
|
||||||
|
use yama_pile::{pointers::Pointer, tree::IntegrationStats};
|
||||||
use yama_wormfile::WormFileProvider;
|
use yama_wormfile::WormFileProvider;
|
||||||
|
|
||||||
use crate::scan::integrate_uid_or_gid_map;
|
use crate::scan::integrate_uid_or_gid_map;
|
||||||
|
@ -13,14 +13,22 @@ pub struct PileWithCache<WFP: WormFileProvider> {
|
||||||
pub localcache: Store,
|
pub localcache: Store,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct PointerIntegrationStatistics {
|
||||||
|
pub integration: IntegrationStats,
|
||||||
|
/// Number of pointers that were integrated to get here.
|
||||||
|
pub depth: u64,
|
||||||
|
}
|
||||||
|
|
||||||
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
||||||
pub async fn fully_integrate_pointer_in_place(
|
pub async fn fully_integrate_pointer_in_place(
|
||||||
&self,
|
&self,
|
||||||
pointer: &mut Pointer,
|
pointer: &mut Pointer,
|
||||||
|
stats: &mut PointerIntegrationStatistics,
|
||||||
) -> eyre::Result<()> {
|
) -> eyre::Result<()> {
|
||||||
if let Some(parent_pointer_name) = pointer.parent.as_ref() {
|
if let Some(parent_pointer_name) = pointer.parent.as_ref() {
|
||||||
let parent_pointer = self
|
let parent_pointer = self
|
||||||
.read_pointer_fully_integrated(parent_pointer_name)
|
.read_pointer_fully_integrated(parent_pointer_name, stats)
|
||||||
.await
|
.await
|
||||||
.with_context(|| {
|
.with_context(|| {
|
||||||
format!("failed to read pointer {parent_pointer_name} whilst integrating")
|
format!("failed to read pointer {parent_pointer_name} whilst integrating")
|
||||||
|
@ -29,19 +37,27 @@ impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
||||||
format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
|
format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
|
stats.integration +=
|
||||||
|
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
|
||||||
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
|
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
|
||||||
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
|
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
|
||||||
pointer.parent = None;
|
pointer.parent = None;
|
||||||
|
|
||||||
|
stats.depth += 1;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_recursion]
|
#[async_recursion]
|
||||||
pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result<Option<Pointer>> {
|
pub async fn read_pointer_fully_integrated(
|
||||||
|
&self,
|
||||||
|
name: &str,
|
||||||
|
stats: &mut PointerIntegrationStatistics,
|
||||||
|
) -> eyre::Result<Option<Pointer>> {
|
||||||
match self.pile.read_pointer(name).await? {
|
match self.pile.read_pointer(name).await? {
|
||||||
Some(mut pointer) => {
|
Some(mut pointer) => {
|
||||||
self.fully_integrate_pointer_in_place(&mut pointer).await?;
|
self.fully_integrate_pointer_in_place(&mut pointer, stats)
|
||||||
|
.await?;
|
||||||
Ok(Some(pointer))
|
Ok(Some(pointer))
|
||||||
}
|
}
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
|
|
|
@ -20,6 +20,7 @@ use std::collections::btree_map::Entry;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fmt::{Debug, Formatter};
|
use std::fmt::{Debug, Formatter};
|
||||||
use std::fs::Metadata;
|
use std::fs::Metadata;
|
||||||
|
use std::ops::{Add, AddAssign};
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
|
||||||
use crate::definitions::RecursiveChunkRef;
|
use crate::definitions::RecursiveChunkRef;
|
||||||
|
@ -350,6 +351,23 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct IntegrationStats {
|
||||||
|
pub descends: u64,
|
||||||
|
pub inserts: u64,
|
||||||
|
pub deletes: u64,
|
||||||
|
pub nops: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AddAssign for IntegrationStats {
|
||||||
|
fn add_assign(&mut self, rhs: Self) {
|
||||||
|
self.descends += rhs.descends;
|
||||||
|
self.inserts += rhs.inserts;
|
||||||
|
self.deletes += rhs.deletes;
|
||||||
|
self.nops += rhs.nops;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Integrates a node in place.
|
/// Integrates a node in place.
|
||||||
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
|
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
|
||||||
/// `PointerData` appropriately if needed to reflect this).
|
/// `PointerData` appropriately if needed to reflect this).
|
||||||
|
@ -360,7 +378,8 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::
|
||||||
/// Preconditions:
|
/// Preconditions:
|
||||||
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
|
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
|
||||||
/// - `old` is the parent of `new`
|
/// - `old` is the parent of `new`
|
||||||
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> IntegrationStats {
|
||||||
|
let mut stats = IntegrationStats::default();
|
||||||
if let TreeNode::Directory { children, .. } = new {
|
if let TreeNode::Directory { children, .. } = new {
|
||||||
if let TreeNode::Directory {
|
if let TreeNode::Directory {
|
||||||
children: old_children,
|
children: old_children,
|
||||||
|
@ -371,14 +390,16 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
||||||
match children.entry(name.clone()) {
|
match children.entry(name.clone()) {
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(node.clone());
|
entry.insert(node.clone());
|
||||||
|
stats.inserts += 1;
|
||||||
}
|
}
|
||||||
Entry::Occupied(entry) => {
|
Entry::Occupied(entry) => {
|
||||||
if entry.get() == &TreeNode::Deleted {
|
if entry.get() == &TreeNode::Deleted {
|
||||||
// We don't insert the old node but we do remove the 'deleted' marker
|
// We don't insert the old node but we do remove the 'deleted' marker
|
||||||
// node!
|
// node!
|
||||||
entry.remove();
|
entry.remove();
|
||||||
|
stats.deletes += 1;
|
||||||
} else {
|
} else {
|
||||||
integrate_node_in_place(entry.into_mut(), node);
|
stats += integrate_node_in_place(entry.into_mut(), node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -387,7 +408,10 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
||||||
} else {
|
} else {
|
||||||
// the node stays the same...
|
// the node stays the same...
|
||||||
// intentional NOP!
|
// intentional NOP!
|
||||||
|
stats.nops += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stats
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories
|
/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories
|
||||||
|
|
Loading…
Reference in New Issue