Add some comments to help maintainability
Some checks failed
continuous-integration/drone the build failed
Some checks failed
continuous-integration/drone the build failed
Closes #42
This commit is contained in:
parent
5f0e6bf18c
commit
1ce0478b2c
@ -38,7 +38,8 @@ pub const FASTCDC_MAX: usize = 8 * 1024 * 1024;
|
||||
/// A chunker that will generate nested chunks of chunk references if there is that much data
|
||||
/// to store.
|
||||
/// The root RecursiveChunker is fed data bytes.
|
||||
/// If it exceeds the nominated threshold, it grows a child RecursiveChunker
|
||||
/// If it exceeds the nominated threshold, it grows a child RecursiveChunker (which may do the same).
|
||||
/// When done, `finish` should be called to flush the buffers and obtain a `RecursiveChunkRef`.
|
||||
pub struct RecursiveChunker<'pile, RP: RawPile> {
|
||||
/// The pile to submit chunks to.
|
||||
pile: &'pile Pile<RP>,
|
||||
|
||||
@ -132,6 +132,8 @@ pub fn retrieve_tree_node<RP: RawPile>(
|
||||
*/
|
||||
}
|
||||
|
||||
/// Given a pointer, fully integrates it in-place. The pointer will no longer have a parent when
|
||||
/// this operation is finished.
|
||||
pub fn fully_integrate_pointer_node<RP: RawPile>(
|
||||
pile: &Pile<RP>,
|
||||
tree_node: &mut TreeNode,
|
||||
@ -165,6 +167,8 @@ pub fn fully_integrate_pointer_node<RP: RawPile>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Loads a pointer and fully integrates it.
|
||||
/// The result will be a fully-integrated pointer (it won't have a parent).
|
||||
pub fn fully_load_pointer<RP: RawPile>(
|
||||
pile: &Pile<RP>,
|
||||
pointer_name: &str,
|
||||
|
||||
@ -37,7 +37,13 @@ pub struct PointerData {
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct RecursiveChunkRef {
|
||||
/// The root Chunk ID.
|
||||
pub chunk_id: ChunkId,
|
||||
/// The depth of the data bytes.
|
||||
/// 0 means that the chunk addressed by `chunk_id` contains data bytes.
|
||||
/// 1 means that the chunk addressed by `chunk_id` contains references to chunk that contain
|
||||
/// data bytes.
|
||||
/// (and so on)
|
||||
pub depth: u32,
|
||||
}
|
||||
|
||||
|
||||
@ -46,6 +46,8 @@ impl Write for NullWriter {
|
||||
}
|
||||
|
||||
/// Mark-and-sweep style vacuuming system.
|
||||
/// We mark all the chunks that we run into (following the structure of all the pointers and
|
||||
/// recursive chunk references) and sweep the chunks that have not been read.
|
||||
pub struct VacuumRawPile<RP: RawPile> {
|
||||
underlying: RP,
|
||||
vacuum_tracking_enabled: bool,
|
||||
@ -120,6 +122,11 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
|
||||
/// This is also capable of finding and vacuuming unused chunks.
|
||||
/// This checks:
|
||||
/// - the integrity of each chunk (assuming an integrity-aware raw pile is used)
|
||||
/// - the structure of pointers and multi-level chunk references
|
||||
pub fn check_deep<RP: RawPile>(
|
||||
pile: Pile<RP>,
|
||||
vacuum: VacuumMode,
|
||||
|
||||
@ -36,6 +36,7 @@ use crate::definitions::{FilesystemOwnership, RecursiveChunkRef, TreeNode};
|
||||
use crate::pile::{Pile, RawPile};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
/// Given a fully-integrated root node, extracts the files from the pile.
|
||||
pub fn extract<RP: RawPile>(
|
||||
target_path: &Path,
|
||||
root: &mut TreeNode,
|
||||
@ -107,6 +108,7 @@ pub fn extract<RP: RawPile>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Given the name of a pointer, extracts it.
|
||||
pub fn extract_from_pointer_name<RP: RawPile>(
|
||||
target_path: &Path,
|
||||
pointer_name: &str,
|
||||
@ -218,6 +220,7 @@ pub fn apply_uid_and_gid_translation_tables(
|
||||
.expect("Can't fail since we don't fail.");
|
||||
}
|
||||
|
||||
/// A worker thread for extracting
|
||||
pub fn extract_worker<RP: RawPile>(
|
||||
pile: &Pile<RP>,
|
||||
paths: Receiver<(PathBuf, RecursiveChunkRef)>,
|
||||
@ -240,6 +243,7 @@ pub fn extract_worker<RP: RawPile>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// A single thread that manages the workers
|
||||
pub fn manager(
|
||||
root: &mut TreeNode,
|
||||
target_path: &Path,
|
||||
@ -286,6 +290,7 @@ pub fn manager(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Applies metadata (permissions, mtime, ownership) to files from a tree node.
|
||||
pub fn apply_metadata(
|
||||
root: &TreeNode,
|
||||
target: &Path,
|
||||
|
||||
@ -21,6 +21,8 @@ use std::sync::Arc;
|
||||
|
||||
/// Pushes chunks (and pointers) from one pile to another.
|
||||
/// This is a thorough implementation that could be slow but at least should give good confidence.
|
||||
/// (Presumably we could do better by looking at the pointers that already exist on the destination
|
||||
/// and only integrating as much as we need to.)
|
||||
pub fn push_to(
|
||||
from_pile: Arc<Pile<Arc<Box<dyn RawPile>>>>,
|
||||
from_rp_bypass: Arc<Box<dyn RawPile>>,
|
||||
|
||||
@ -239,6 +239,9 @@ pub fn manager(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stores files into the pile, potentially differentiating using a parent pointer (which will be
|
||||
/// loaded and fully-integrated).
|
||||
/// This also creates a pointer (which is why this is called `store_fully`).
|
||||
pub fn store_fully(
|
||||
pile: &Pile<Box<dyn RawPile>>,
|
||||
root_dir: &PathBuf,
|
||||
|
||||
@ -30,9 +30,13 @@ pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CompressionSettings {
|
||||
/// Raw dictionary to pass to Zstd for compression and decompression
|
||||
pub dictionary: Arc<Vec<u8>>,
|
||||
/// The compression level, passed to Zstd.
|
||||
pub level: i32,
|
||||
/// The number of compressor threads to use.
|
||||
pub num_compressors: u32,
|
||||
/// The number of decompressor threads to use.
|
||||
pub num_decompressors: u32,
|
||||
}
|
||||
|
||||
|
||||
@ -21,6 +21,14 @@ use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
||||
|
||||
use crate::pile::{Keyspace, RawPile};
|
||||
|
||||
/// A RawPile that provides encryption of chunk contents.
|
||||
/// Please note that keys are not currently encrypted, so this scheme is not CPA-secure.
|
||||
/// It seems easily possible to test the pile for inclusion of a known file (by first chunking it and
|
||||
/// looking for matching chunk IDs).
|
||||
/// Use of compression a custom Zstd dictionary may make that harder but in general it seems dubious
|
||||
/// to rely on that.
|
||||
/// This feature will be revisited soon...
|
||||
/// Notably, keys should be passed through a secure permutation first.
|
||||
pub struct RawPileEncryptor<R: RawPile> {
|
||||
underlying: R,
|
||||
secret_key: Key,
|
||||
@ -28,6 +36,9 @@ pub struct RawPileEncryptor<R: RawPile> {
|
||||
|
||||
impl<R: RawPile> RawPileEncryptor<R> {
|
||||
pub fn new(underlying: R, key: Key) -> Self {
|
||||
warn!(
|
||||
"WARNING! Encrypted RawPiles are not CPA secure. Do not rely on them for security yet!"
|
||||
);
|
||||
RawPileEncryptor {
|
||||
underlying,
|
||||
secret_key: key,
|
||||
|
||||
@ -23,6 +23,8 @@ use crate::definitions::XXH64_SEED;
|
||||
use crate::pile::{Keyspace, RawPile};
|
||||
use crate::utils::bytes_to_hexstring;
|
||||
|
||||
/// This RawPile enables checking the integrity of stored chunks.
|
||||
/// This is done by storing a hash along with the chunk contents, which can later be verified.
|
||||
pub struct RawPileIntegrityChecker<RP: RawPile> {
|
||||
underlying: RP,
|
||||
}
|
||||
|
||||
@ -132,14 +132,25 @@ pub struct Inner {
|
||||
writers_in_progress: u16,
|
||||
}
|
||||
|
||||
/// A Pile built on the idea of SQLite-indexed 'blob logs'.
|
||||
/// 'Blob logs' are append-only binary files which contain simple concatenations of chunks (with a
|
||||
/// small header). This format is very dense but does not inherently provide random access.
|
||||
/// Granularity of deletes is also impacted by this structure, so vacuuming steps may involve needing
|
||||
/// to re-write bloblogs to remove deleted chunks.
|
||||
/// Because random access is important for performance, an additional SQLite database is used
|
||||
/// as a map from chunk IDs to their positions in the blob logs, allowing readers to seek to the
|
||||
/// appropriate place and read a chunk randomly.
|
||||
pub struct SqliteBloblogPile {
|
||||
inner: Arc<Mutex<Inner>>,
|
||||
path: PathBuf,
|
||||
writers_reach_zero: Condvar,
|
||||
}
|
||||
|
||||
/// A pointer to a blob in a 'blob log'.
|
||||
pub struct BloblogPointer {
|
||||
/// Which blob log the blob is stored in.
|
||||
bloblog: BloblogId,
|
||||
/// The seek offset at which the blob is located in the log.
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,9 @@ use crate::pile::{Keyspace, RawPile};
|
||||
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
|
||||
|
||||
/// A kind of RawPile which can make requests to a RawPile over a pipe (e.g. TCP socket or an
|
||||
/// SSH connection).
|
||||
/// The requests are handled by a `Responder` on the other end of the pipe.
|
||||
pub struct Requester {
|
||||
commands: Sender<(RequestBody, Sender<ResponseBody>)>,
|
||||
}
|
||||
|
||||
@ -13,6 +13,8 @@ use crate::pile::RawPile;
|
||||
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
||||
|
||||
#[derive(Clone)]
|
||||
/// A wrapper for a RawPile which allows a `Requester` to access it over a pipe (e.g. TCP socket or
|
||||
/// an SSH connection).
|
||||
pub struct Responder {
|
||||
continuation_tokens: Arc<Mutex<HashMap<u16, Sender<u16>>>>,
|
||||
}
|
||||
|
||||
@ -144,6 +144,12 @@ pub fn scan_with_progress_bar(
|
||||
}
|
||||
}
|
||||
|
||||
/// Differentiates a node in place.
|
||||
/// This makes `old` the parent of `new` (though it is up to the caller to properly update the
|
||||
/// `PointerData` to reflect this!).
|
||||
/// Loosely speaking, `new` is modified to contain the differences that, when applied to `old`, will
|
||||
/// result in the original value of `new`.
|
||||
/// See `integrate_node_in_place` for the inverse of this operation.
|
||||
pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
|
||||
if let TreeNode::Directory { children, .. } = new {
|
||||
if let TreeNode::Directory {
|
||||
@ -171,7 +177,16 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `old` must be an integrated pointer.
|
||||
/// Integrates a node in place.
|
||||
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
|
||||
/// `PointerData` appropriately if needed to reflect this).
|
||||
///
|
||||
/// Loosely speaking, `new` is treated as a set of differences that are applied to `old`, though the
|
||||
/// result is in-place.
|
||||
///
|
||||
/// Preconditions:
|
||||
/// - `old` must be an integrated pointer.
|
||||
/// - `old` is the parent of `new`
|
||||
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
|
||||
if let TreeNode::Directory { children, .. } = new {
|
||||
if let TreeNode::Directory {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user