Remove old yama and datman from tree

This commit is contained in:
Olivier 'reivilibre' 2023-08-13 16:42:32 +01:00
parent 2c14654d29
commit 96deadd270
43 changed files with 0 additions and 11121 deletions

View File

@ -1,38 +0,0 @@
[package]
name = "datman"
version = "0.7.0-alpha.1"
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
edition = "2021"
repository = "https://bics.ga/reivilibre/yama"
license = "GPL-3.0-or-later"
description = "A chunked and deduplicated backup system using Yama"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "3.1.18", features = ["derive"] }
crossbeam-channel = "0.5.1"
anyhow = "1.0"
thiserror = "1.0"
serde = { version = "1.0.104", features = ["derive"] }
serde_json = "1.0.64"
toml = "0.5.5"
log = "0.4"
env_logger = "0.7.1"
indicatif = "0.14.0"
arc-interner = "0.5.1"
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
byteorder = "1"
termion = "1.5.6"
glob = "0.3.0"
humansize = "1.1.1"
chrono = "0.4.19"
itertools = "0.10.1"
hostname = "0.3.1"
yama = { path = "../yama", version = "0.7.0-alpha.1" }
metrics = "0.17.1"
bare-metrics-recorder = { version = "0.1.0" }
comfy-table = "6.0.0-rc.1"
libc = "0.2.126"
io-streams = "0.11.0"

View File

@ -1,13 +0,0 @@
# datman: DATa MANager
Datman is a tool to make it easier to use Yama for backups.
Features:
* Chunk-based deduplication
* (optional) Compression using Zstd and a specifiable dictionary
* (optional) Encryption
* Ability to back up to remote machines over SSH
* Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels.
See the documentation for more information.

View File

@ -1,468 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use clap::Parser;
use env_logger::Env;
use anyhow::{bail, Context};
use bare_metrics_recorder::recording::BareMetricsRecorderCore;
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
use datman::commands::ilabel::interactive_labelling_session;
use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy};
use datman::commands::{init_descriptor, pushpull};
use datman::descriptor::{load_descriptor, SourceDescriptor};
use datman::get_hostname;
use datman::remote::backup_source_requester::backup_remote_source_to_destination;
use datman::remote::backup_source_responder;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use itertools::Itertools;
use log::info;
use std::str::FromStr;
use yama::commands::load_pile_descriptor;
use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel};
pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠ \x1b[25m\x1b[22m";
pub const BOLD: &str = "\x1b[1m";
pub const BOLD_OFF: &str = "\x1b[22m";
pub const WHITE: &str = "\x1b[37m";
pub const RED: &str = "\x1b[31m";
pub const GREEN: &str = "\x1b[32m";
#[derive(Parser)]
pub enum DatmanCommand {
/// Initialise a datman descriptor in this directory.
Init {},
///
Status {},
#[clap(name = "ilabel")]
InteractiveLabelling {
/// Name of the source to label.
source_name: String,
},
#[clap(name = "ibrowse")]
InteractiveBrowsing {
/// Name of the source to browse.
source_name: String,
},
/// Back up a source locally or over the network.
BackupOne {
/// Name of the source to back up.
source_name: String,
/// Name of the destination to back up to.
destination_name: String,
},
BackupAll {
/// Name of the remote to back up.
/// Special value 'self' means 'this host only'.
/// Special value 'all' means 'all hosts'.
remote_name: String,
/// Name of the destination to back up to.
destination_name: String,
},
Extract {
/// Name of the 'source' to extract
/// Omit for 'all'.
#[clap(short)]
source_name: Option<String>,
/// If specified, will get the first backup after this date.
#[clap(long)]
after: Option<HumanDateTime>,
/// If specified, will get the last backup before this date. The default behaviour is to get the latest.
#[clap(long)]
before: Option<HumanDateTime>,
/// If not specified, time-restricted extractions that don't have a pointer for every source
/// will instead lead to an error.
#[clap(long)]
accept_partial: bool, // TODO unimplemented.
/// Name of the pile to extract from
pile_name: String,
/// Place to extract to.
destination: PathBuf,
/// Skip applying metadata. Might be needed to extract without superuser privileges.
#[clap(long)]
skip_metadata: bool,
},
Report {
/// Name of the pile to report on.
pile_name: String,
/// Don't summarise months.
#[clap(long)]
individual: bool,
},
#[clap(name = "_backup_source_responder")]
InternalBackupSourceResponder,
/// Pulls all pointers from a remote pile to a local pile.
/// Does not yet support label filtering, but will do in the future.
Pull {
/// e.g. 'myserver:main'
remote_and_remote_pile: String,
pile_name: String,
},
/// Applies a retention policy by removing unnecessary backups.
/// Does not reclaim space by itself: use
/// `yama check --apply-gc --shallow`
/// & `yama compact`
/// to do that.
Prune { pile_name: String },
#[clap(name = "_pull_responder_offerer")]
InternalPullResponderOfferer {
datman_path: PathBuf,
pile_name: String,
},
}
pub struct HumanDateTime(pub DateTime<Local>);
impl FromStr for HumanDateTime {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap();
let local_datetime = local_date.and_hms(0, 0, 0);
Ok(HumanDateTime(local_datetime))
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
let local_datetime = chrono::offset::Local
.from_local_datetime(&date_and_time)
.unwrap();
Ok(HumanDateTime(local_datetime))
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
let local_datetime = chrono::offset::Local
.from_local_datetime(&date_and_time)
.unwrap();
Ok(HumanDateTime(local_datetime))
} else {
bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14");
}
}
}
fn with_obvious_successfail_message<R>(result: anyhow::Result<R>) -> anyhow::Result<R> {
match &result {
Ok(_) => {
eprintln!("Operation {}successful{}.", GREEN, WHITE);
}
Err(error) => {
eprintln!("{:?}", error);
eprintln!(
"{}{}Operation {}{}FAILED{}!{}",
FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF
);
}
};
result
}
fn with_exitcode<R>(result: anyhow::Result<R>) {
match &result {
Ok(_) => {
std::process::exit(0);
}
Err(_) => {
std::process::exit(5);
}
};
}
fn main() -> anyhow::Result<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let now = Utc::now();
let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!(
"/tmp/datman_{}.baremetrics",
now.format("%F_%H%M%S")
))?)
.start("datman".to_string())?;
shard.install_as_metrics_recorder()?;
let opts: DatmanCommand = DatmanCommand::parse();
match opts {
DatmanCommand::Init {} => {
init_descriptor(Path::new(".")).unwrap();
}
DatmanCommand::Status { .. } => {
unimplemented!();
}
DatmanCommand::InteractiveLabelling { source_name } => {
interactive_labelling_session(Path::new("."), source_name)?;
}
DatmanCommand::InteractiveBrowsing { source_name } => {
datman::commands::ibrowse::session(Path::new("."), source_name)?;
}
DatmanCommand::BackupOne {
source_name,
destination_name,
} => {
let my_hostname = get_hostname();
let descriptor = load_descriptor(Path::new(".")).unwrap();
let source = &descriptor.sources[&source_name];
let destination = &descriptor.piles[&destination_name];
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
pbar.set_style(
ProgressStyle::default_bar().template(
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
),
);
pbar.set_message("storing");
let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source {
hostname != &my_hostname
} else {
false
};
let result = if is_remote {
backup_remote_source_to_destination(
source,
destination,
&descriptor,
Path::new("."),
&source_name,
&destination_name,
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
pbar,
)
} else {
backup_source_to_destination(
source,
destination,
&descriptor,
Path::new("."),
&source_name,
&destination_name,
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
&mut pbar,
)
};
with_exitcode(with_obvious_successfail_message(result))
}
DatmanCommand::BackupAll {
remote_name,
destination_name,
} => {
let descriptor = load_descriptor(Path::new(".")).unwrap();
let destination = &descriptor.piles[&destination_name];
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
pbar.set_style(
ProgressStyle::default_bar().template(
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
),
);
pbar.set_message("storing");
backup_all_sources_to_destination(
destination,
&descriptor,
Path::new("."),
&destination_name,
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
&mut pbar,
remote_name,
)
.unwrap();
}
DatmanCommand::Extract {
source_name,
after,
before,
accept_partial,
pile_name,
destination,
skip_metadata,
} => {
if !accept_partial {
bail!("Specify --accept-partial until running without it is supported.");
}
if after.is_some() && before.is_some() {
bail!("Can't specify both before and after!");
}
let before = before.map(|dt| dt.0.with_timezone(&Utc));
let after = after.map(|dt| dt.0.with_timezone(&Utc));
datman::commands::extract::extract(
&destination,
Path::new("."),
source_name.as_ref().map(|x| x.as_ref()),
&pile_name,
before.into(),
after.into(),
!skip_metadata,
!skip_metadata,
!skip_metadata,
yama::utils::get_number_of_workers("YAMA_EXTRACTORS"),
)?;
}
DatmanCommand::InternalBackupSourceResponder => {
info!("Datman responder at {:?}", std::env::current_exe()?);
backup_source_responder::handler_stdio()?;
}
DatmanCommand::Report {
pile_name,
individual,
} => {
let descriptor = load_descriptor(Path::new(".")).unwrap();
let destination = &descriptor.piles[&pile_name];
let report =
datman::commands::report::generate_report(destination, &descriptor, !individual)?;
datman::commands::report::print_filesystem_space(&destination.path)?;
datman::commands::report::print_report(&report)?;
}
DatmanCommand::Pull {
remote_and_remote_pile,
pile_name,
} => {
let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile
.split(':')
.collect_tuple()
.context("You must pull from a remote pile specified as remote:path:pile.")?;
let descriptor = load_descriptor(Path::new(".")).unwrap();
let source = &descriptor.piles[&pile_name];
let pile_desc = load_pile_descriptor(&source.path)?;
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
&source.path,
&pile_desc,
BypassLevel::CompressionBypass,
)?;
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
pbar.set_style(
ProgressStyle::default_bar().template(
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
),
);
pbar.set_message("pulling");
let remote_host_descriptor = descriptor
.remote_hosts
.get(hostname)
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
let mut connection = Command::new("ssh")
.arg(&remote_host_descriptor.user_at_host)
.arg("--")
.arg(
&remote_host_descriptor
.path_to_datman
.as_ref()
.map(|x| x.as_str())
.unwrap_or("datman"),
)
.arg("_pull_responder_offerer")
.arg(remote_datman_path)
.arg(remote_pile_name)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()?;
let mut reader = BufReader::new(connection.stdout.take().unwrap());
let mut writer = BufWriter::new(connection.stdin.take().unwrap());
pushpull::accepting_side(
&pile,
&bypass_raw_pile,
&mut reader,
&mut writer,
Box::new(pbar),
)?;
}
DatmanCommand::Prune { pile_name } => {
let descriptor = load_descriptor(Path::new(".")).unwrap();
let retention_policy = descriptor
.retention
.context("No retention policy set in descriptor")?;
let dest_desc = &descriptor.piles[&pile_name];
let pile_desc = load_pile_descriptor(&dest_desc.path)?;
prune_with_retention_policy(
&dest_desc.path,
&pile_desc,
&RetentionPolicy::from_config(retention_policy),
true,
)?;
}
DatmanCommand::InternalPullResponderOfferer {
datman_path,
pile_name,
} => {
let descriptor = load_descriptor(&datman_path).unwrap();
let source = &descriptor.piles[&pile_name];
let pile_desc = load_pile_descriptor(&source.path)?;
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
&source.path,
&pile_desc,
BypassLevel::CompressionBypass,
)?;
let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?);
let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?);
pushpull::offering_side(
&pile,
&bypass_raw_pile,
&mut stdin,
&mut stdout,
Box::new(()),
)?;
stdout.flush()?;
}
}
Ok(())
}

View File

@ -1,66 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use crate::descriptor::{Descriptor, RetentionPolicyConfig, SourceDescriptor};
pub mod backup;
pub mod extract;
pub mod ibrowse;
pub mod ilabel;
pub mod prune;
pub mod pushpull;
pub mod report;
pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
std::fs::create_dir_all(path)?;
std::fs::create_dir(path.join("labelling"))?;
let mut datman_toml_file = File::create(path.join("datman.toml"))?;
let source: HashMap<String, SourceDescriptor> = Default::default();
/*source.insert("demo1".to_owned(), SourceDescriptor::DirectorySource {
hostname: "demohost1".to_string(),
directory: PathBuf::from("/dev/null")
});
source.insert("demo2".to_owned(), SourceDescriptor::VirtualSource { blah: "".to_string(), label: "wat".to_string() });*/
let bytes = toml::to_vec(&Descriptor {
labels: vec![
"pocket".to_owned(),
"precious".to_owned(),
"bulky".to_owned(),
],
sources: source,
piles: Default::default(),
remote_hosts: Default::default(),
retention: Some(RetentionPolicyConfig {
daily: 14,
weekly: 12,
monthly: 24,
yearly: 9001,
}),
})?;
datman_toml_file.write_all(&bytes)?;
Ok(())
}

View File

@ -1,391 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor, VirtualSourceKind};
use crate::get_hostname;
use crate::labelling::{
label_node, load_labelling_rules, str_to_label, Label, LabellingRules, State,
};
use crate::tree::{scan, FileTree, FileTree1};
use anyhow::{anyhow, bail};
use arc_interner::ArcIntern;
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use log::{info, warn};
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::io::Write;
use std::path::Path;
use std::process::{Child, Command, Stdio};
use std::sync::Arc;
use yama::chunking::SENSIBLE_THRESHOLD;
use yama::commands::{load_pile_descriptor, open_pile, store_tree_node};
use yama::definitions::{
FilesystemOwnership, FilesystemPermissions, PointerData, RecursiveChunkRef, RootTreeNode,
TreeNode,
};
use yama::progress::ProgressTracker;
pub const POINTER_DATETIME_FORMAT: &'static str = "%F_%T";
pub const POINTER_FIELD_SEPARATOR: char = '+';
pub fn get_pointer_name_at(source_name: &str, datetime: DateTime<Utc>) -> String {
format!(
"{}{}{}",
source_name,
POINTER_FIELD_SEPARATOR,
datetime.format(POINTER_DATETIME_FORMAT).to_string()
)
}
pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime<Utc>)> {
let (source_name, date_time_str) = pointer_name.rsplit_once("+")?;
let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?;
let date_time = Utc.from_utc_datetime(&date_time);
Some((source_name.to_owned(), date_time))
}
pub fn open_stdout_backup_process(
extra_args: &HashMap<String, toml::Value>,
program_name: &str,
) -> anyhow::Result<Child> {
let mut child = Command::new(format!("datman-helper-{}-backup", program_name))
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.stdin(Stdio::piped())
.spawn()?;
let mut child_stdin = child.stdin.as_mut().unwrap();
serde_json::to_writer(&mut child_stdin, extra_args)?;
child_stdin.flush()?;
// close stdin!
child.stdin = None;
Ok(child)
}
pub fn label_filter_and_convert(
tree: FileTree1<()>,
descriptor: &Descriptor,
source_name: &str,
rules: &LabellingRules,
dest: &DestPileDescriptor,
) -> anyhow::Result<Option<TreeNode>> {
info!("Labelling.");
let mut tree = tree.replace_meta(&None);
let labels = descriptor
.labels
.iter()
.map(|l| Label(ArcIntern::new(l.clone())))
.collect();
label_node("".to_owned(), None, &mut tree, &labels, rules)?;
let included_labels: HashSet<Label> = dest.included_labels.iter().map(str_to_label).collect();
info!("Filtering.");
let mut unlabelled_included = false;
// filter_inclusive includes directories that must exist for the sake of their children.
if !tree.filter_inclusive(&mut |node| {
match node.get_metadata().unwrap() {
None => {
// unlabelled -- include by default for safety
unlabelled_included = true;
true
}
Some(State::Excluded) => {
// don't include excluded things
false
}
Some(State::Labelled(label)) => {
// include things only if we want the label
included_labels.contains(&label)
}
Some(State::Split) => {
// no point retaining this directory if its children aren't going to be!
assert!(
node.is_dir(),
"Non-directories should not be labelled for Split!"
);
false
}
}
}) {
info!("Empty filter. Stopping.");
return Ok(None);
}
if unlabelled_included {
warn!("Unlabelled nodes. They have been included for safety, but you should consider running\n\t'datman ilabel {}'\nat some point to assign labels.", source_name);
}
let root = convert_filetree_to_yamatree(&tree);
Ok(Some(root))
}
pub fn backup_source_to_destination<PT: ProgressTracker>(
source: &SourceDescriptor,
dest: &DestPileDescriptor,
descriptor: &Descriptor,
desc_path: &Path,
source_name: &str,
dest_name: &str,
num_workers: u8,
progress_bar: &mut PT,
) -> anyhow::Result<()> {
match source {
SourceDescriptor::DirectorySource {
hostname: _,
directory,
cross_filesystems,
} => {
info!("Looking to backup {} to {}", source_name, dest_name);
let rules = load_labelling_rules(desc_path, source_name)?;
let exclusions = rules.get_exclusions_set(directory);
info!("Scanning.");
let tree = scan(directory, !*cross_filesystems, &exclusions)?
.ok_or_else(|| anyhow!("Source does not exist."))?;
let absolute_source_path = desc_path.join(directory);
let absolute_dest_path = desc_path.join(&dest.path);
let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
let root = if let Some(root) =
label_filter_and_convert(tree, descriptor, source_name, &rules, dest)?
{
root
} else {
return Ok(());
};
let pointer_name = get_pointer_name_at(&source_name, Utc::now());
if pile.read_pointer(pointer_name.as_str())?.is_some() {
bail!(
"Pointer by name {:?} already exists; refusing to overwrite.",
pointer_name
);
}
info!("Will write as pointer {:?}.", pointer_name);
info!("Searching for suitable parents.");
let mut parent: Option<String> = None;
let prefix = format!("{}+", source_name);
for pointer in pile.list_pointers()?.iter() {
if pointer.starts_with(&prefix) {
match parent.as_ref() {
None => {
parent = Some(pointer.to_owned());
}
Some(cur_parent) => {
if cur_parent < pointer {
parent = Some(pointer.to_owned());
}
}
}
}
}
match parent.as_ref() {
Some(parent) => {
info!("Using parent: {}", parent);
}
None => {
info!("No suitable parent found.");
}
}
info!("Storing using yama.");
yama::operations::storing::store_fully(
Arc::new(pile),
&absolute_source_path,
&pointer_name,
root,
parent,
num_workers,
progress_bar,
)?;
info!("Stored!");
}
SourceDescriptor::VirtualSource {
helper,
label,
kind: VirtualSourceKind::Stdout { filename },
extra_args,
} => {
if !dest.included_labels.contains(label) {
info!("Skipping because the source's label is not included in this destination!");
return Ok(());
}
info!("Starting up process and writing to yama store.");
let absolute_dest_path = desc_path.join(&dest.path);
let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
let pointer_name = get_pointer_name_at(&source_name, Utc::now());
if pile.read_pointer(pointer_name.as_str())?.is_some() {
bail!(
"Pointer by name {:?} already exists; refusing to overwrite.",
pointer_name
);
}
info!("Will write as pointer {:?}.", pointer_name);
let mut chunker = yama::chunking::RecursiveChunker::new(SENSIBLE_THRESHOLD, &pile);
let mut process = open_stdout_backup_process(extra_args, helper)?;
info!("Storing. No progress bar is available for this style of backup yet.");
// this bit does all the magic.
// TODO(feature): progress bar for
std::io::copy(process.stdout.as_mut().unwrap(), &mut chunker)?;
let exit_status = process.wait()?;
if !exit_status.success() {
bail!(
"The process was not successful (exit code {}). Exiting.",
exit_status.code().unwrap()
);
}
let data_chunk_ref = chunker.finish()?;
eprintln!("Stored data! Now writing a pointer...");
let root = TreeNode::NormalFile {
mtime: Utc::now().timestamp_millis() as u64,
ownership: FilesystemOwnership {
uid: u16::MAX,
gid: u16::MAX,
},
permissions: FilesystemPermissions { mode: 0o600 },
content: data_chunk_ref,
};
// very important: store the pointer!
let pointer_chunk_ref = store_tree_node(
&pile,
&RootTreeNode {
name: filename.to_owned(),
node: root,
},
)?;
let pointer_data = PointerData {
chunk_ref: pointer_chunk_ref,
parent_pointer: None,
uid_lookup: Default::default(),
gid_lookup: Default::default(),
};
pile.write_pointer(&pointer_name, &pointer_data)?;
pile.flush()?;
eprintln!("Pointer saved!");
}
}
Ok(())
}
pub fn convert_filetree_to_yamatree<A, B, C, D>(
filetree: &FileTree<A, B, C, D>,
) -> yama::definitions::TreeNode
where
A: Debug + Clone + Eq + PartialEq,
B: Debug + Clone + Eq + PartialEq,
C: Debug + Clone + Eq + PartialEq,
D: Debug + Clone + Eq + PartialEq,
{
match filetree {
FileTree::NormalFile {
mtime,
ownership,
permissions,
meta: _,
} => TreeNode::NormalFile {
mtime: *mtime,
ownership: *ownership,
permissions: *permissions,
content: RecursiveChunkRef {
chunk_id: Default::default(),
depth: 0,
},
},
FileTree::Directory {
ownership,
permissions,
children,
meta: _,
} => TreeNode::Directory {
ownership: *ownership,
permissions: *permissions,
children: children
.iter()
.map(|(k, v)| (k.clone(), convert_filetree_to_yamatree(v)))
.collect(),
},
FileTree::SymbolicLink {
ownership,
target,
meta: _,
} => TreeNode::SymbolicLink {
ownership: *ownership,
target: target.clone(),
},
FileTree::Other(_) => {
panic!("Shouldn't be any Others in the tree.");
}
}
}
pub fn backup_all_sources_to_destination<PT: ProgressTracker>(
dest: &DestPileDescriptor,
descriptor: &Descriptor,
desc_path: &Path,
dest_name: &str,
num_workers: u8,
progress_bar: &mut PT,
restricted_remote_name: String,
) -> anyhow::Result<()> {
let restricted_remote = match restricted_remote_name.as_str() {
"all" => None,
"self" | "this" | "here" => Some(get_hostname()),
other => Some(other.to_string()),
};
for (source_name, source_descriptor) in descriptor.sources.iter() {
if let (Some(source_host), Some(restricted_host)) = (
source_descriptor.get_remote_hostname(),
restricted_remote.as_ref(),
) {
if source_host != restricted_host {
// Skip this one, it wasn't requested right now.
continue;
}
}
backup_source_to_destination(
source_descriptor,
dest,
descriptor,
desc_path,
source_name.as_str(),
dest_name,
num_workers,
progress_bar,
)?;
}
Ok(())
}

View File

@ -1,182 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::commands::backup::POINTER_DATETIME_FORMAT;
use crate::descriptor::load_descriptor;
use anyhow::bail;
use chrono::{DateTime, NaiveDateTime, Utc};
use itertools::Itertools;
use log::{info, warn};
use std::path::Path;
use yama::commands::{load_pile_descriptor, open_pile};
use yama::pile::{Pile, RawPile};
pub type PileT = Pile<Box<dyn RawPile>>;
pub fn extract(
destination: &Path,
descriptor_path: &Path,
source_name: Option<&str>,
pile_name: &str,
before: Option<DateTime<Utc>>,
after: Option<DateTime<Utc>>,
apply_permissions: bool,
apply_mtime: bool,
apply_ownership: bool,
num_workers: u8,
) -> anyhow::Result<()> {
if destination.exists() {
bail!("For now, the destination is not allowed to exist prior to extraction.");
}
let descriptor = load_descriptor(descriptor_path)?;
let dest_descriptor = &descriptor.piles[pile_name];
let dest_pile_path = descriptor_path.join(&dest_descriptor.path);
let pile_descriptor = load_pile_descriptor(&dest_pile_path)?;
let pile = open_pile(&dest_pile_path, &pile_descriptor)?;
std::fs::create_dir_all(&destination)?;
let mut pointers_to_extract = Vec::new();
match source_name {
Some(source_name) => match find_pointer_for_source(source_name, &pile, &before, &after)? {
None => {
bail!(
"No pointer found for {:?} and it's the only one requested.",
source_name
);
}
Some(pointer) => {
pointers_to_extract.push(pointer);
}
},
None => {
for source in descriptor.sources.keys() {
match find_pointer_for_source(source, &pile, &before, &after)? {
None => {
warn!("No pointer found for {:?}! Carrying on anyway...", source);
}
Some(pointer) => {
pointers_to_extract.push(pointer);
}
}
}
}
}
extract_pointers_into_already_created_directory(
destination,
pointers_to_extract,
&pile,
apply_permissions,
apply_mtime,
apply_ownership,
num_workers,
)?;
Ok(())
}
fn find_pointer_for_source(
source_name: &str,
pile: &PileT,
before: &Option<DateTime<Utc>>,
after: &Option<DateTime<Utc>>,
) -> anyhow::Result<Option<String>> {
let mut current_choice: Option<(String, DateTime<Utc>)> = None;
for pointer_name in pile.list_pointers()? {
if let Some((pointer_source_name, encoded_datetime)) =
pointer_name.split('+').collect_tuple()
{
if source_name != pointer_source_name {
// don't accept pointers for other sources!
continue;
}
match NaiveDateTime::parse_from_str(encoded_datetime, POINTER_DATETIME_FORMAT) {
Ok(decoded_datetime) => {
let datetime = DateTime::from_utc(decoded_datetime, Utc);
if let Some(before) = before {
if before < &datetime {
// datetime is after the 'before' time
continue;
}
} else if let Some(after) = after {
if &datetime < after {
// datetime is before the 'after' time
continue;
}
}
match current_choice.as_ref() {
None => current_choice = Some((pointer_name, datetime)),
Some((_current_name, current_datetime)) => {
let should_replace = if after.is_some() {
// if we want the first one after a time, we want the earliest option!
// so replace if new datetime is earlier than current
&datetime < current_datetime
} else {
// replace if new datetime is after current datetime
current_datetime < &datetime
};
if should_replace {
current_choice = Some((pointer_name, datetime));
}
}
}
}
Err(e) => {
warn!(
"Ignoring {:?} because it seems to have a bad datetime: {:?}",
pointer_name, e
);
}
}
}
}
Ok(current_choice.map(|(a, _)| a))
}
fn extract_pointers_into_already_created_directory(
target: &Path,
pointers: Vec<String>,
pile: &PileT,
apply_permissions: bool,
apply_mtime: bool,
apply_ownership: bool,
num_workers: u8,
) -> anyhow::Result<()> {
for pointer in pointers {
info!("Extracting {:?} now.", pointer);
let pointer_target_dir = &target.join(&pointer);
std::fs::create_dir(pointer_target_dir)?;
yama::operations::extracting::extract_from_pointer_name(
pointer_target_dir,
&pointer,
pile,
true,
num_workers,
apply_permissions,
apply_mtime,
apply_ownership,
)?;
}
Ok(())
}

View File

@ -1,253 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::BTreeSet;
use std::path::Path;
use anyhow::{anyhow, bail};
use crate::descriptor::{load_descriptor, SourceDescriptor};
use crate::labelling::{
label_node, load_labelling_rules, save_labelling_rules, GlobRule, Label, State,
};
use crate::tree::{scan, FileTree, FileTree1};
use arc_interner::ArcIntern;
use humansize::FileSize;
use std::io::{stdin, stdout, Write};
pub fn calculate_sizes(node: &mut FileTree1<u64>, real_path: &Path) -> anyhow::Result<u64> {
match node {
FileTree::NormalFile { meta, .. } => {
let size = std::fs::metadata(real_path)?.len();
*meta = size;
Ok(size)
}
FileTree::Directory { children, meta, .. } => {
let mut size = 0;
for (name, child) in children.iter_mut() {
size += calculate_sizes(child, &real_path.join(name))?;
}
*meta = size;
Ok(size)
}
FileTree::SymbolicLink { meta, target, .. } => {
*meta = target.len() as u64;
Ok(target.len() as u64)
}
FileTree::Other(_) => Ok(0),
}
}
pub fn string_to_outcome(s: &str) -> State {
match s {
"s" => State::Split,
"x" => State::Excluded,
other => State::Labelled(Label(ArcIntern::new(other.to_owned()))),
}
}
pub fn session(path: &Path, source_name: String) -> anyhow::Result<()> {
let mut current_path = String::from("");
let descriptor = load_descriptor(path)?;
let source_descriptor = descriptor
.sources
.get(&source_name)
.ok_or_else(|| anyhow!("Could not find source {:?}!", source_name))?;
let (directory, one_filesystem) = match source_descriptor {
SourceDescriptor::DirectorySource {
directory,
cross_filesystems,
..
} => (directory, !*cross_filesystems),
SourceDescriptor::VirtualSource { .. } => {
bail!("Cannot browse virtual source.");
}
};
println!("Scanning source; this might take a little while...");
let mut dir_scan: FileTree1<Option<State>> = scan(directory, one_filesystem, &BTreeSet::new())?
.ok_or_else(|| anyhow!("Empty source."))?
.replace_meta(&None);
let mut size_dir_scan: FileTree1<u64> = dir_scan.replace_meta(&0);
calculate_sizes(&mut size_dir_scan, directory)?;
let mut rules = load_labelling_rules(path, &source_name)?;
let labels = descriptor
.labels
.iter()
.map(|l| Label(ArcIntern::new(l.clone())))
.collect();
label_node("".to_owned(), None, &mut dir_scan, &labels, &rules)?;
loop {
println!("---------------------------------------------------------");
println!("| {}", current_path);
println!("----");
if let Some(dir_node) = dir_scan.get_by_path(&current_path) {
if let FileTree::Directory { children, .. } = dir_node {
let size_node = size_dir_scan.get_by_path(&current_path).unwrap();
for (idx, (child_name, child)) in children.iter().enumerate() {
let size_child = size_node
.get_by_path(child_name)
.unwrap()
.get_metadata()
.unwrap();
if child.is_dir() {
println!("{}/", child_name);
} else if child.is_symlink() {
println!("{} (symlink)", child_name);
} else {
println!("{}", child_name);
}
print!("\t[{:3}] ", idx);
match child.get_metadata().unwrap() {
None => {
print!("unlabelled ");
}
Some(state) => match state {
State::Labelled(label) => {
print!("l:{} ", label.0.as_ref());
}
State::Split => {
print!("split ");
}
State::Excluded => {
print!("excluded ");
}
},
}
println!(
"({})",
size_child
.file_size(humansize::file_size_opts::BINARY)
.unwrap()
);
}
print!("\n> ");
stdout().flush()?;
let mut next_command = String::new();
if stdin().read_line(&mut next_command)? > 0 {
let split: Vec<&str> = next_command.trim_end_matches('\n').split(' ').collect();
match split[0] {
"x" => {
if let Ok(id) = split[1].parse::<usize>() {
let entry = children
.iter()
.enumerate()
.find(|(index, _item)| *index == id);
if let Some((_index, (name, _entry))) = entry {
let entry_path = format!("{}/{}", &current_path, name);
rules
.position_based_rules
.insert(entry_path, State::Excluded);
} else {
eprintln!("not found.");
}
} else {
eprintln!("bad int :(");
}
}
"s" => {
if let Ok(id) = split[1].parse::<usize>() {
let entry = children
.iter()
.enumerate()
.find(|(index, _item)| *index == id);
if let Some((_index, (name, _entry))) = entry {
let entry_path = format!("{}/{}", &current_path, name);
rules.position_based_rules.insert(entry_path, State::Split);
} else {
eprintln!("not found.");
}
} else {
eprintln!("bad int :(");
}
}
"p" => {
let outcome = split[1];
let pattern = split[2];
match glob::Pattern::new(&pattern) {
Ok(glob) => {
rules.glob_based_rules.push(GlobRule {
pattern: pattern.to_owned(),
glob,
outcome: string_to_outcome(&outcome),
});
}
Err(e) => {
eprintln!("{:?}", e);
}
}
}
"q" => {
break;
}
other => {
if other.chars().all(char::is_numeric) {
let id: usize = other.parse().unwrap();
let entry = children
.iter()
.enumerate()
.find(|(index, _item)| *index == id);
if let Some((_index, (name, entry))) = entry {
if entry.is_dir() {
current_path.extend("/".chars());
current_path.extend(name.chars());
} else {
eprintln!("not a dir.");
}
}
} else {
let label = split[1];
let id: usize = split[2].parse().unwrap(); // TODO
let entry = children
.iter()
.enumerate()
.find(|(index, _item)| *index == id);
if let Some((_index, (name, _entry))) = entry {
let entry_path = format!("{}/{}", &current_path, name);
rules.position_based_rules.insert(
entry_path,
State::Labelled(Label(ArcIntern::new(label.to_owned()))),
);
}
}
}
}
} else {
println!("ending.");
break;
}
} else {
break;
}
} else {
break;
}
}
save_labelling_rules(path, &source_name, &rules)?;
Ok(())
}

View File

@ -1,267 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::BTreeSet;
use std::io;
use std::io::{StdinLock, Stdout, Write};
use std::path::Path;
use arc_interner::ArcIntern;
use byteorder::ReadBytesExt;
use termion::input::TermRead;
use termion::raw::{IntoRawMode, RawTerminal};
use crate::descriptor::{load_descriptor, Descriptor, SourceDescriptor};
use crate::labelling::State::{Excluded, Labelled, Split};
use crate::labelling::{
load_labelling_rules, save_labelling_rules, GlobRule, Label, LabellingRules, State,
};
use crate::tree::{scan, FileTree, FileTree1};
use log::info;
use crate::get_hostname;
use crate::remote::backup_source_requester;
use crate::remote::backup_source_requester::connect_to_remote;
use anyhow::{anyhow, bail};
pub fn interactive_label_node(
path: String,
current_state: Option<State>,
node: &mut FileTree1<Option<State>>,
labels: &Vec<Label>,
rules: &mut LabellingRules,
stdin: &mut StdinLock,
stdout: &mut RawTerminal<Stdout>,
) -> anyhow::Result<()> {
let mut next_state = current_state;
if let Some(rule_state) = rules.apply(&path) {
next_state = Some(rule_state.clone());
} else if !next_state
.as_ref()
.map(|s| s.should_inherit())
.unwrap_or(false)
{
if node.is_dir() {
stdout.write_all(format!("\r{}/: _", path).as_bytes())?;
} else if node.is_symlink() {
stdout.write_all(format!("\r{} (symlink): _", path).as_bytes())?;
} else {
stdout.write_all(format!("\r{}: _", path).as_bytes())?;
}
stdout.flush()?;
let user_input_state = loop {
let next_char = stdin.read_u8()? as char;
if next_char >= '1' && next_char <= '9' {
let index = next_char as usize - '1' as usize;
if let Some(label) = labels.get(index) {
rules
.position_based_rules
.insert(path.clone(), Labelled(label.clone()));
print!("\x08{}\r\n", label.0);
break Some(Labelled(label.clone()));
}
} else if next_char == 'x' {
rules.position_based_rules.insert(path.clone(), Excluded);
print!("\x08{}\r\n", next_char);
break Some(Excluded);
} else if next_char == 's' {
if node.is_dir() {
rules.position_based_rules.insert(path.clone(), Split);
print!("\x08{}\r\n", next_char);
break Some(Split);
} else {
print!("\x08!");
stdout.flush()?;
}
} else if next_char == 'p' {
print!("\x08p\r\n\tPattern mode. Choose a label or other effect to apply to the pattern matches: _");
stdout.flush()?;
let rule_apply_state = loop {
let next_char = stdin.read_u8()? as char;
if next_char >= '1' && next_char <= '9' {
let index = next_char as usize - '1' as usize;
if let Some(label) = labels.get(index) {
print!("\x08{}\r\n", label.0);
break Labelled(label.clone());
}
} else if next_char == 'x' {
print!("\x08{}\r\n", next_char);
break Excluded;
} else if next_char == 's' {
print!("\x08{}\r\n", next_char);
break Split;
}
};
stdout.flush()?;
stdout.suspend_raw_mode()?;
print!("\tEnter a glob pattern to match on:\n\t");
stdout.flush()?;
let (pattern, glob) = loop {
let pattern = stdin
.read_line()?
.ok_or_else(|| anyhow!("EOT? when reading glob pattern"))?;
match glob::Pattern::new(&pattern) {
Ok(glob) => {
if !glob.matches(&path) {
println!("Doesn't match the path in question.");
continue;
}
break (pattern, glob);
}
Err(error) => {
println!("Error: {:?}. Try again.", error);
}
}
};
stdout.activate_raw_mode()?;
rules.glob_based_rules.push(GlobRule {
pattern,
glob,
outcome: rule_apply_state.clone(),
});
break Some(rule_apply_state);
} else if next_char == 'q' {
return Ok(());
}
};
next_state = user_input_state;
}
match node {
FileTree::NormalFile { meta, .. } => {
*meta = next_state;
}
FileTree::Directory { meta, children, .. } => {
*meta = next_state.clone();
for (child_name, child) in children.iter_mut() {
let child_path = format!("{}/{}", path, child_name);
interactive_label_node(
child_path,
next_state.clone(),
child,
labels,
rules,
stdin,
stdout,
)?;
}
}
FileTree::SymbolicLink { meta, .. } => {
*meta = next_state;
}
FileTree::Other(_) => {
panic!("Other() nodes shouldn't be present here.");
}
}
Ok(())
}
pub fn interactive_labelling_session(path: &Path, source_name: String) -> anyhow::Result<()> {
let descriptor: Descriptor = load_descriptor(path)?;
let source = descriptor
.sources
.get(&source_name)
.ok_or_else(|| anyhow!("No source found by that name!"))?;
if let SourceDescriptor::DirectorySource {
hostname,
directory,
cross_filesystems,
} = source
{
let my_hostname = get_hostname();
let mut dir_scan = if &my_hostname == hostname {
info!("Scanning source; this might take a little while...");
scan(directory, !*cross_filesystems, &BTreeSet::new())?
.ok_or_else(|| anyhow!("Empty source."))?
.replace_meta(&None)
} else {
info!("Requesting scan over network. This might take a while.");
let connection = connect_to_remote(&descriptor, hostname)?;
let mut read = connection.stdout.expect("Requested stdout");
let mut write = connection.stdin.expect("Requested stdin");
// first start off with an introduction
info!("Connecting to remote source...");
backup_source_requester::introduction(&mut read, &mut write)?;
// then request to scan
info!("Requesting scan from remote source... (this may take some time)");
let scan = backup_source_requester::scanning(
&mut read,
&mut write,
directory.as_ref(),
!*cross_filesystems,
&BTreeSet::new(),
)?
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?
.replace_meta(&None);
backup_source_requester::quit(&mut read, &mut write)?;
scan
};
let mut rules = load_labelling_rules(path, &source_name)?;
let labels: Vec<Label> = descriptor
.labels
.iter()
.map(|label| Label(ArcIntern::new(label.clone())))
.collect();
println!("The following label mappings are available:");
for (idx, label) in labels.iter().enumerate() {
println!("\tFor {:?}, press {}!", label.0.as_ref(), idx + 1);
}
println!("\tTo split a directory, press 's'!");
println!("\tTo exclude an entry, press 'x'!");
println!("\tTo apply a pattern, press 'p'...");
// Set terminal to raw mode to allow reading stdin one key at a time
let mut stdout = io::stdout().into_raw_mode().unwrap();
let stdin_unlocked = io::stdin();
let mut stdin = stdin_unlocked.lock();
interactive_label_node(
"".to_owned(),
None,
&mut dir_scan,
&labels,
&mut rules,
&mut stdin,
&mut stdout,
)?;
drop(stdout);
drop(stdin);
println!("\nLabelling completed!");
// save rules
save_labelling_rules(path, &source_name, &rules)?;
} else {
bail!("Can't do interactive labelling on a non-directory source.");
}
Ok(())
}

View File

@ -1,220 +0,0 @@
use crate::commands::backup::split_pointer_name;
use crate::descriptor::RetentionPolicyConfig;
use anyhow::{bail, Context};
use log::info;
use std::collections::{BTreeMap, BTreeSet};
use std::io;
use std::path::Path;
use yama::commands::open_pile;
use yama::operations::remove_pointer_safely;
use yama::pile::PileDescriptor;
pub struct RetentionBand {
pub interval_s: u64,
pub number_to_retain: u32,
}
pub struct RetentionPolicy {
pub retention_bands: Vec<RetentionBand>,
}
const DAY: u64 = 86400;
const WEEK: u64 = 7 * DAY;
const MONTH: u64 = 31 * DAY;
const YEAR: u64 = 365 * DAY;
impl RetentionPolicy {
pub fn from_config(descriptor: RetentionPolicyConfig) -> RetentionPolicy {
let mut policy = RetentionPolicy {
retention_bands: vec![],
};
if descriptor.daily != 0 {
policy.retention_bands.push(RetentionBand {
interval_s: DAY,
number_to_retain: descriptor.daily,
});
}
if descriptor.weekly != 0 {
policy.retention_bands.push(RetentionBand {
interval_s: WEEK,
number_to_retain: descriptor.weekly,
});
}
if descriptor.monthly != 0 {
policy.retention_bands.push(RetentionBand {
interval_s: MONTH,
number_to_retain: descriptor.monthly,
});
}
if descriptor.yearly != 0 {
policy.retention_bands.push(RetentionBand {
interval_s: YEAR,
number_to_retain: descriptor.yearly,
});
}
policy
}
/// Returns the set of snapshots to remove.
pub fn apply_returning_prunable(
&self,
snapshots_by_unix_time: BTreeMap<u64, String>,
) -> BTreeSet<String> {
if snapshots_by_unix_time.is_empty() {
return BTreeSet::new();
}
let mut snapshots_included: BTreeSet<u64> = BTreeSet::new();
// Always mark the most recent snapshot as retained!
let last_snapshot = snapshots_by_unix_time.keys().rev().next().unwrap();
snapshots_included.insert(*last_snapshot);
let now_time = *last_snapshot;
for band in &self.retention_bands {
for multiple in 1..=band.number_to_retain {
let target_time = now_time - (multiple as u64) * band.interval_s;
if let Some((k, _)) = snapshots_by_unix_time.range(0..=target_time).rev().next() {
snapshots_included.insert(*k);
}
}
}
// Find all prunable (unincluded) snapshots.
snapshots_by_unix_time
.into_iter()
.filter(|(k, _v)| !snapshots_included.contains(k))
.map(|(_k, v)| v)
.collect()
}
}
pub fn prune_with_retention_policy(
pile_path: &Path,
pile_desc: &PileDescriptor,
policy: &RetentionPolicy,
prompt_first: bool,
) -> anyhow::Result<()> {
let pile = open_pile(&pile_path, &pile_desc).context("Failed to open pile")?;
let pointers = pile
.list_pointers()
.context("Failed to list pointers in pile")?;
let mut pointers_to_keep: BTreeSet<String> = pointers.iter().cloned().collect();
let pointers_to_remove = get_prunable_pointers(&policy, pointers);
for remove in &pointers_to_remove {
pointers_to_keep.remove(remove);
}
info!("Gory details:\n---\nKeep: {pointers_to_keep:?}\n---\nRemove: {pointers_to_remove:?}");
info!(
"{} pointers to remove ({} to keep) based on retention policy.",
pointers_to_remove.len(),
pointers_to_keep.len()
);
if prompt_first {
println!("Would you like to proceed? [y/N]: ");
let mut buffer = String::new();
let stdin = io::stdin(); // We get `Stdin` here.
stdin.read_line(&mut buffer)?;
if buffer.trim().to_ascii_lowercase() != "y" {
bail!("Aborted by user.");
}
}
for to_remove in pointers_to_remove {
let res = remove_pointer_safely(&pile, &to_remove).context("removing prunable pointers");
pile.flush()
.context("flushing pile after removing pointers")?;
res?;
}
Ok(())
}
fn get_prunable_pointers(policy: &RetentionPolicy, pointers: Vec<String>) -> BTreeSet<String> {
let mut split_pointers_by_name: BTreeMap<String, BTreeMap<u64, String>> = BTreeMap::new();
for pointer in pointers {
let (name, datetime) = if let Some(x) = split_pointer_name(&pointer) {
x
} else {
continue;
};
split_pointers_by_name
.entry(name)
.or_default()
.insert(datetime.timestamp().try_into().unwrap(), pointer);
}
let mut pointers_to_remove = BTreeSet::new();
for (_pointer_base_name, ts_to_pointer) in split_pointers_by_name {
let to_remove = policy.apply_returning_prunable(ts_to_pointer);
pointers_to_remove.extend(to_remove);
}
pointers_to_remove
}
#[cfg(test)]
mod test {
use crate::commands::prune::{get_prunable_pointers, RetentionPolicy};
use crate::descriptor::RetentionPolicyConfig;
#[test]
fn test_prunable_pointers() {
let pointers = vec![
"alice+2022-09-28_05:00:00",
"alice+2022-09-28_02:00:00",
"alice+2022-09-21_05:00:00",
"alice+2022-09-14_05:00:00",
"alice+2022-09-08_05:00:00",
"alice+2022-09-07_05:00:00",
"alice+2022-09-01_05:00:00",
"bob+2022-09-28_06:00:00",
"bob+2022-09-28_03:00:00",
"bob+2022-09-21_06:00:00",
"bob+2022-09-14_06:00:00",
"bob+2022-09-08_06:00:00",
"bob+2022-09-07_06:00:00",
"bob+2022-09-01_06:00:00",
]
.into_iter()
.map(|s| s.to_owned())
.collect();
let policy = RetentionPolicy::from_config(RetentionPolicyConfig {
daily: 0,
weekly: 3,
monthly: 0,
yearly: 0,
});
assert_eq!(
get_prunable_pointers(&policy, pointers)
.into_iter()
.collect::<Vec<_>>(),
vec![
"alice+2022-09-01_05:00:00",
"alice+2022-09-08_05:00:00",
"alice+2022-09-28_02:00:00",
"bob+2022-09-01_06:00:00",
"bob+2022-09-08_06:00:00",
"bob+2022-09-28_03:00:00",
]
);
}
}

View File

@ -1,306 +0,0 @@
// Push and Pull support for Datman
use anyhow::{bail, ensure, Context};
use log::info;
use std::collections::{BTreeMap, BTreeSet};
use std::io::{Read, Write};
use std::sync::Arc;
use std::time::Instant;
use yama::chunking::RecursiveUnchunker;
use yama::commands::retrieve_tree_node;
use yama::definitions::{ChunkId, PointerData, RecursiveChunkRef, TreeNode};
use yama::pile::{Keyspace, Pile, PipelineDescription, RawPile};
use yama::progress::ProgressTracker;
use yama::remote::{read_message, write_message};
pub fn offer_pointers<W: Write, RP: RawPile>(
pile: &Pile<RP>,
writer: &mut W,
) -> anyhow::Result<BTreeMap<String, PointerData>> {
let mut pointers_to_offer: BTreeMap<String, PointerData> = BTreeMap::new();
for pointer_name in pile.list_pointers()? {
let pointer_data = pile
.read_pointer(&pointer_name)?
.context("Listed pointer not present")?;
pointers_to_offer.insert(pointer_name, pointer_data);
}
write_message(writer, &pointers_to_offer)?;
Ok(pointers_to_offer)
}
pub fn ensure_compatible_bypasses(
my_full: &Vec<PipelineDescription>,
my_bypass: &Vec<PipelineDescription>,
their_full: &Vec<PipelineDescription>,
their_bypass: &Vec<PipelineDescription>,
) -> anyhow::Result<()> {
ensure!(
my_full.starts_with(&my_bypass),
"Our full pipeline is not an extension of the bypass pipeline."
);
ensure!(
their_full.starts_with(&their_bypass),
"Their full pipeline is not an extension of their bypass pipeline."
);
let my_bypassed_parts = &my_full[my_bypass.len()..];
let their_bypassed_parts = &their_full[their_bypass.len()..];
ensure!(
my_bypassed_parts == their_bypassed_parts,
"Our bypassed parts and their bypassed parts are not the same.\nOurs: {:?}\nTheirs: {:?}",
my_bypassed_parts,
their_bypassed_parts
);
Ok(())
}
pub fn negotiate_bypassed_pile<R: Read, W: Write>(
pile: &Pile<Arc<Box<dyn RawPile>>>,
bypass_pile: &Box<dyn RawPile>,
reader: &mut R,
writer: &mut W,
) -> anyhow::Result<()> {
let my_full_pipeline = pile.raw_pile.describe_pipeline()?;
let my_bypass_pipeline = bypass_pile.describe_pipeline()?;
write_message(writer, &my_full_pipeline)?;
write_message(writer, &my_bypass_pipeline)?;
writer.flush()?;
let their_full_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
let their_bypass_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
ensure_compatible_bypasses(
&my_full_pipeline,
&my_bypass_pipeline,
&their_full_pipeline,
&their_bypass_pipeline,
)?;
Ok(())
}
fn collect_chunk_ids(
pile: &Pile<Arc<Box<dyn RawPile>>>,
root: &TreeNode,
chunk_ids: &mut BTreeSet<ChunkId>,
) -> anyhow::Result<()> {
root.visit(
&mut |tree_node, _| {
match tree_node {
TreeNode::NormalFile { content, .. } => {
collect_chunk_ids_from_chunkref(pile, content, chunk_ids)?;
}
_ => {}
}
Ok(())
},
"".to_owned(),
)?;
Ok(())
}
fn collect_chunk_ids_from_chunkref(
pile: &Pile<Arc<Box<dyn RawPile>>>,
chunk_ref: &RecursiveChunkRef,
collection: &mut BTreeSet<ChunkId>,
) -> anyhow::Result<()> {
if chunk_ref.depth == 0 {
collection.insert(chunk_ref.chunk_id);
} else {
let shallower_chunk_ref = RecursiveChunkRef {
chunk_id: chunk_ref.chunk_id,
depth: chunk_ref.depth - 1,
};
let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
let mut next_chunk_id: ChunkId = Default::default();
loop {
let read = unchunker.read(&mut next_chunk_id[..])?;
if read == 0 {
break;
} else if read < next_chunk_id.len() {
unchunker.read_exact(&mut next_chunk_id[read..])?;
}
collection.insert(next_chunk_id);
}
}
Ok(())
}
pub fn offering_side<R: Read, W: Write>(
pile: &Pile<Arc<Box<dyn RawPile>>>,
bypass_pile: &Box<dyn RawPile>,
reader: &mut R,
writer: &mut W,
mut progress: Box<dyn ProgressTracker>,
) -> anyhow::Result<()> {
let version = env!("CARGO_PKG_VERSION");
let expecting = format!("Datman Pull Accepter {}", version);
write_message(writer, &format!("Datman Pull Offerer {}", version))?;
writer.flush()?;
let found: String = read_message(reader)?;
ensure!(
found == expecting,
"Version mismatch. Expecting {:?} got {:?}",
expecting,
found
);
// First 'negotiate' (for now: assert) a pile bypass.
// This lets us avoid decompressing things before recompressing them at the other end,
// assuming both ends use the same dictionary.
negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
let offered_pointers = offer_pointers(pile, writer)?;
let wanted_pointers = read_message::<_, BTreeSet<String>>(reader)?;
let mut chunks_to_offer: BTreeSet<ChunkId> = BTreeSet::new();
for pointer_name in &wanted_pointers {
let pointer_data = offered_pointers
.get(pointer_name)
.with_context(|| format!("Requested pointer {:?} was not offered", pointer_name))?;
collect_chunk_ids_from_chunkref(pile, &pointer_data.chunk_ref, &mut chunks_to_offer)?;
let root_node = retrieve_tree_node(pile, pointer_data.chunk_ref.clone())?;
collect_chunk_ids(pile, &root_node.node, &mut chunks_to_offer)?;
}
write_message(writer, &chunks_to_offer)?;
writer.flush()?;
let chunks_to_skip: BTreeSet<ChunkId> = read_message(reader)?;
let chunks_to_send: Vec<ChunkId> = chunks_to_offer
.difference(&chunks_to_skip)
.cloned()
.collect();
drop(chunks_to_offer);
drop(chunks_to_skip);
let start_sort_by_hints = Instant::now();
let chunks_to_send_with_hints: BTreeSet<(u64, ChunkId)> = chunks_to_send
.into_iter()
.map(|chunk_id| {
pile.raw_pile
.chunk_id_transfer_ordering_hint(&chunk_id)
.map(|hint| (hint, chunk_id))
})
.collect::<anyhow::Result<_>>()?;
let time_to_sort_by_hints = Instant::now() - start_sort_by_hints;
info!(
"{} s to sort {} chunks by their hints",
time_to_sort_by_hints.as_secs_f32(),
chunks_to_send_with_hints.len()
);
progress.set_max_size(chunks_to_send_with_hints.len() as u64);
progress.set_current(0);
for (_hint, chunk_id) in chunks_to_send_with_hints {
let chunk_data = bypass_pile
.read(Keyspace::Chunk, &chunk_id)?
.context("Chunk vanished")?;
write_message(writer, &Some((chunk_id, chunk_data)))?;
progress.inc_progress(1);
}
write_message(writer, &None::<Option<(ChunkId, Vec<u8>)>>)?;
writer.flush()?;
Ok(())
}
pub fn accepting_side<R: Read, W: Write>(
pile: &Pile<Arc<Box<dyn RawPile>>>,
bypass_pile: &Box<dyn RawPile>,
reader: &mut R,
writer: &mut W,
mut progress: Box<dyn ProgressTracker>,
) -> anyhow::Result<()> {
let version = env!("CARGO_PKG_VERSION");
let expecting = format!("Datman Pull Offerer {}", version);
write_message(writer, &format!("Datman Pull Accepter {}", version))?;
writer.flush()?;
let found: String = read_message(reader)?;
ensure!(
found == expecting,
"Version mismatch. Expecting {:?} got {:?}",
expecting,
found
);
// First 'negotiate' (for now: assert) a pile bypass.
// This lets us avoid decompressing things before recompressing them at the other end,
// assuming both ends use the same dictionary.
negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
let offered_pointers: BTreeMap<String, PointerData> = read_message(reader)?;
let mut wanted_pointers: BTreeSet<String> = BTreeSet::new();
for (pointer_name, pointer_data) in &offered_pointers {
if pile.read_pointer(pointer_name)?.is_none() {
wanted_pointers.insert(pointer_name.clone());
if let Some(parent) = &pointer_data.parent_pointer {
if pile.read_pointer(parent)?.is_none() && !offered_pointers.contains_key(parent) {
bail!("Offered pointer {:?} requires parent {:?} which we don't have and isn't offered.", pointer_name, parent);
}
}
}
}
write_message(writer, &wanted_pointers)?;
writer.flush()?;
let offered_chunks: BTreeSet<ChunkId> = read_message(reader)?;
let mut chunks_to_skip: BTreeSet<ChunkId> = BTreeSet::new();
for chunk_id in &offered_chunks {
if pile.chunk_exists(chunk_id)? {
chunks_to_skip.insert(*chunk_id);
}
}
write_message(writer, &chunks_to_skip)?;
writer.flush()?;
let num_chunks_to_recv = offered_chunks.len() - chunks_to_skip.len();
let mut chunks_to_recv: BTreeSet<ChunkId> = offered_chunks
.difference(&chunks_to_skip)
.cloned()
.collect();
drop(offered_chunks);
drop(chunks_to_skip);
progress.set_max_size(num_chunks_to_recv as u64);
progress.set_current(0);
while let Some((chunk_id, chunk_data)) = read_message::<_, Option<(ChunkId, Vec<u8>)>>(reader)?
{
ensure!(
chunks_to_recv.remove(&chunk_id),
"Received unexpected chunk"
);
bypass_pile.write(Keyspace::Chunk, &chunk_id, &chunk_data)?;
progress.inc_progress(1);
}
ensure!(chunks_to_recv.is_empty(), "Unreceived chunks.");
for (pointer_name, pointer_data) in &offered_pointers {
pile.write_pointer(pointer_name, pointer_data)?;
}
pile.flush()?;
Ok(())
}

View File

@ -1,456 +0,0 @@
use crate::commands::backup::split_pointer_name;
use crate::descriptor::{Descriptor, DestPileDescriptor};
use anyhow::Context;
use chrono::{Date, DateTime, Utc};
use comfy_table::presets::UTF8_FULL;
use comfy_table::{Attribute, Cell, Color, ContentArrangement, Table};
use humansize::FileSize;
use itertools::Itertools;
use log::info;
use std::collections::{BTreeMap, BTreeSet};
use std::ffi::CString;
use std::io::Read;
use std::mem;
use std::mem::size_of;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use yama::chunking::RecursiveUnchunker;
use yama::commands::{load_pile_descriptor, open_pile, retrieve_tree_node};
use yama::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
use yama::pile::{DebugStatistics, Pile, RawPile};
// This module generates reports for a Datman system.
// Referenced Chunk IDs are counted and used to give an indication of size.
// Chunk IDs are summarised into u32s to reduce memory usage. Since the report is approximate,
// it doesn't matter if there are a few collisions (although they are still fairly unlikely to
// affect much).
#[derive(Clone)]
pub struct Report {
pub last_source_backups: BTreeMap<String, Option<DateTime<Utc>>>,
pub chunk_usages_aggregated: bool,
pub chunk_usage: BTreeMap<String, Sizes>,
pub debug_stats: Option<DebugStatistics>,
}
#[derive(Clone, Default)]
pub struct Sizes {
/// Total number of chunks that we refer to.
pub total: u32,
/// Each referred chunk is counted once here, but divided by the number of sharers.
/// We are 'morally responsible' for this many chunks.
pub moral: u32,
/// Number of chunks that only we point to.
pub unique: u32,
/// Number of chunks for which we are the oldest (lexicographically earliest) pointer to point
/// to those chunks.
pub rollup: u32,
}
type CondensedChunkId = u32;
fn condense_chunk_id(chunk_id: ChunkId) -> CondensedChunkId {
CondensedChunkId::from_be_bytes(
chunk_id[0..size_of::<CondensedChunkId>()]
.try_into()
.unwrap(),
)
}
pub fn generate_report(
dest_pile_descriptor: &DestPileDescriptor,
descriptor: &Descriptor,
aggregate_chunk_usage_by_month: bool,
) -> anyhow::Result<Report> {
let pile_descriptor = load_pile_descriptor(&dest_pile_descriptor.path)?;
let pile = open_pile(&dest_pile_descriptor.path, &pile_descriptor)?;
let debug_stats = pile.raw_pile.debug_statistics()?;
let mut pointers_to_parent_and_chunkids = BTreeMap::new();
let mut pointergroups_to_pointers: BTreeMap<String, Vec<String>> = BTreeMap::new();
info!("Collecting chunk IDs... This will probably be slow.");
for pointer_name in pile.list_pointers()? {
let pointer = pile
.read_pointer(&pointer_name)?
.context("listed pointer doesn't exist")?;
let root_node = retrieve_tree_node(&pile, pointer.chunk_ref)?;
let pointer_chunk_ids = collect_chunk_ids(&pile, &root_node.node)?;
let pointergroup = if aggregate_chunk_usage_by_month {
let (base, date_time) =
split_pointer_name(&pointer_name).context("Can't split pointer name")?;
format!("{}+{}", base, date_time.format("%Y-%m"))
} else {
pointer_name.clone()
};
pointergroups_to_pointers
.entry(pointergroup)
.or_default()
.push(pointer_name.clone());
pointers_to_parent_and_chunkids
.insert(pointer_name, (pointer.parent_pointer, pointer_chunk_ids));
}
// Now we iterate in reverse order, making a list of count of Chunk IDs.
// At the same time, we can also calculate 'rollup' sizes.
let mut chunk_sharer_counts: BTreeMap<CondensedChunkId, u16> = BTreeMap::new();
let mut pointergroup_stats: BTreeMap<String, Sizes> = BTreeMap::new();
for (pointergroup_name, pointers_in_group) in pointergroups_to_pointers.iter().rev() {
let mut deduped_chunks = BTreeSet::new();
for pointer_name in pointers_in_group {
deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
&pointers_to_parent_and_chunkids,
&pointer_name,
))
}
let mut rollup_count = 0;
for chunk in deduped_chunks {
let count = chunk_sharer_counts.entry(chunk).or_default();
*count += 1;
if *count == 1 {
rollup_count += 1;
}
}
let entry = pointergroup_stats
.entry(pointergroup_name.to_owned())
.or_default();
entry.rollup = rollup_count;
}
// Now go through again and update all the stats!
for (pointergroup_name, pointers_in_group) in &pointergroups_to_pointers {
let mut deduped_chunks = BTreeSet::new();
for pointer_name in pointers_in_group {
deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
&pointers_to_parent_and_chunkids,
&pointer_name,
))
}
let mut unique_count = 0;
let mut shared_count_by_sharers = [0u32; 256];
let total_count = deduped_chunks.len();
for chunk in deduped_chunks {
let count = chunk_sharer_counts[&chunk];
if count == 1 {
unique_count += 1;
} else {
let num_sharers = (count as usize).min(256);
shared_count_by_sharers[num_sharers - 1] += 1;
}
}
let mut sharers_sum: f64 = 0.0;
for (sharers_minus_one, count) in shared_count_by_sharers.into_iter().enumerate() {
sharers_sum += (count as f64) / (sharers_minus_one + 1) as f64;
}
let entry = pointergroup_stats
.entry(pointergroup_name.to_owned())
.or_default();
entry.moral = (sharers_sum.ceil() as u32) + unique_count;
entry.unique = unique_count;
entry.total = total_count as u32;
}
let mut last_backed_up = BTreeMap::new();
for source_name in descriptor.sources.keys().cloned() {
last_backed_up.insert(source_name, None);
}
for pointer_name in pointers_to_parent_and_chunkids.keys() {
if let Some((source_name, date_time)) = split_pointer_name(&pointer_name) {
last_backed_up.insert(source_name, Some(date_time));
}
}
Ok(Report {
last_source_backups: last_backed_up,
chunk_usage: pointergroup_stats,
chunk_usages_aggregated: aggregate_chunk_usage_by_month,
debug_stats,
})
}
// Does not filter duplicates...
fn iter_over_all_chunkids_incl_parents<'a>(
pointers_to_parent_and_chunkids: &'a BTreeMap<
String,
(Option<String>, BTreeSet<CondensedChunkId>),
>,
pointer_name: &'a str,
) -> Box<dyn Iterator<Item = CondensedChunkId> + 'a> {
let (parent, chunks) = &pointers_to_parent_and_chunkids[pointer_name];
match parent {
None => Box::new(chunks.iter().copied()),
Some(parent) => Box::new(chunks.iter().copied().chain(
iter_over_all_chunkids_incl_parents(pointers_to_parent_and_chunkids, &parent),
)),
}
}
fn collect_chunk_ids<RP: RawPile>(
pile: &Pile<RP>,
root: &TreeNode,
) -> anyhow::Result<BTreeSet<CondensedChunkId>> {
let mut chunk_ids = BTreeSet::new();
root.visit(
&mut |tree_node, _| {
match tree_node {
TreeNode::NormalFile { content, .. } => {
collect_chunk_ids_from_chunkref(pile, content, &mut chunk_ids)?;
}
_ => {}
}
Ok(())
},
"".to_owned(),
)?;
Ok(chunk_ids)
}
fn collect_chunk_ids_from_chunkref<RP: RawPile>(
pile: &Pile<RP>,
chunk_ref: &RecursiveChunkRef,
collection: &mut BTreeSet<CondensedChunkId>,
) -> anyhow::Result<()> {
if chunk_ref.depth == 0 {
collection.insert(condense_chunk_id(chunk_ref.chunk_id));
} else {
let shallower_chunk_ref = RecursiveChunkRef {
chunk_id: chunk_ref.chunk_id,
depth: chunk_ref.depth - 1,
};
let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
let mut next_chunk_id: ChunkId = Default::default();
loop {
let read = unchunker.read(&mut next_chunk_id[..])?;
if read == 0 {
break;
} else if read < next_chunk_id.len() {
unchunker.read_exact(&mut next_chunk_id[read..])?;
}
collection.insert(condense_chunk_id(next_chunk_id));
}
}
Ok(())
}
pub fn print_report(report: &Report) -> anyhow::Result<()> {
print_time_report(report)?;
print_size_report(report)?;
Ok(())
}
pub fn print_time_report(report: &Report) -> anyhow::Result<()> {
println!("\nBackup times");
let mut table = Table::new();
table
.load_preset(UTF8_FULL)
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
.enforce_styling();
table.set_header(vec![
Cell::new("Source name").fg(Color::Cyan),
Cell::new("Last backed up").fg(Color::Cyan),
]);
let today = Utc::today();
let sort_by_dates: Vec<(Option<Date<Utc>>, String)> = report
.last_source_backups
.iter()
.map(|(name, datetime)| (datetime.map(|dt| dt.date()), name.to_owned()))
.sorted()
.collect();
for (date, source_name) in sort_by_dates {
match date {
None => {
table.add_row(vec![
Cell::new(source_name).fg(Color::Magenta),
Cell::new("NEVER").fg(Color::Red).add_attributes(vec![
Attribute::SlowBlink,
Attribute::RapidBlink,
Attribute::Bold,
]),
]);
}
Some(date) => {
let number_of_days = today.signed_duration_since(date).num_days();
let num_days_human = if number_of_days > 0 {
format!("{number_of_days} days ago")
} else {
format!("today")
};
let colour = if number_of_days < 2 {
Color::Green
} else if number_of_days < 14 {
Color::Yellow
} else {
Color::Red
};
let formatted_date = date.format("%F");
let mut val_cell =
Cell::new(format!("{formatted_date} {num_days_human}")).fg(colour);
if number_of_days > 28 {
val_cell = val_cell.add_attribute(Attribute::SlowBlink);
}
table.add_row(vec![Cell::new(source_name).fg(Color::Magenta), val_cell]);
}
}
}
println!("{table}");
Ok(())
}
pub fn print_size_report(report: &Report) -> anyhow::Result<()> {
println!("\nPile size");
let mut table = Table::new();
table
.load_preset(UTF8_FULL)
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
.enforce_styling();
//.set_width(100);
table.set_header(vec![
Cell::new("Pointer name").fg(Color::Cyan),
Cell::new("Rollup size").fg(Color::Magenta),
Cell::new("Unique size").fg(Color::Magenta),
Cell::new("Moral size").fg(Color::Magenta),
Cell::new("Total size").fg(Color::Magenta),
]);
let average_chunk_size = report
.debug_stats
.as_ref()
.map(|stats| stats.total_chunk_size as f64 / stats.number_of_chunks as f64);
for (pointer_name, sizes) in &report.chunk_usage {
table.add_row(vec![
Cell::new(pointer_name).fg(Color::Blue),
Cell::new(format_size(sizes.rollup, average_chunk_size)).fg(Color::Yellow),
Cell::new(format_size(sizes.unique, average_chunk_size)).fg(Color::Yellow),
Cell::new(format_size(sizes.moral, average_chunk_size)).fg(Color::Yellow),
Cell::new(format_size(sizes.total, average_chunk_size)).fg(Color::Yellow),
]);
}
println!("{table}");
Ok(())
}
fn format_size(chunks: u32, average_chunk_size: Option<f64>) -> String {
let est_size_suffix = average_chunk_size
.map(|bytes_per_chunk| {
let num_bytes = (chunks as f64 * bytes_per_chunk) as u64;
let mut format = humansize::file_size_opts::BINARY;
format.decimal_places = 1;
format!(" ~{}", num_bytes.file_size(format).unwrap())
})
.unwrap_or_default();
format!("{} c{}", chunks, est_size_suffix)
}
fn calculate_total_filesize_of_dir(dir: &Path) -> anyhow::Result<u64> {
let mut total = 0;
for file in std::fs::read_dir(dir)? {
let file = file?;
let metadata = file.metadata()?;
total += metadata.size();
if metadata.is_dir() {
total += calculate_total_filesize_of_dir(&file.path())?;
}
}
Ok(total)
}
pub fn print_filesystem_space(pile_path: &Path) -> anyhow::Result<()> {
let usage_for_pile = calculate_total_filesize_of_dir(&pile_path)?;
let path_c = CString::new(pile_path.as_os_str().as_bytes()).unwrap();
let stats = unsafe {
let mut stats: libc::statfs = mem::zeroed();
match libc::statfs(path_c.as_ptr(), &mut stats) {
0 => Ok(stats),
other => Err(std::io::Error::from_raw_os_error(other)),
}
}?;
// On a BTRFS system with 2 disks in RAID1, note (about df -h):
// - 'Size' shows the average size of the two disks. I think of it as 'ideal size'.
// - 'Avail' seems to show the actual number of bytes usable.
// - 'Used' seems to show the actual number of bytes used.
// In short: probably avoid relying on 'size'.
let block_size = stats.f_bsize as i64;
let used_bytes = (stats.f_blocks - stats.f_bfree) as i64 * block_size;
let avail_bytes = stats.f_bavail as i64 * block_size;
let usable_bytes = used_bytes + avail_bytes;
let theoretical_size = stats.f_blocks as i64 * block_size;
let mut format = humansize::file_size_opts::BINARY;
format.decimal_places = 1;
format.decimal_zeroes = 1;
println!("\nFilesystem Information");
let mut table = Table::new();
table
.load_preset(UTF8_FULL)
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
.enforce_styling();
//.set_width(100);
table.set_header(vec![
Cell::new("Theoretical Size").fg(Color::Cyan),
Cell::new("Usable Size").fg(Color::Cyan),
Cell::new("Used").fg(Color::Cyan),
Cell::new("Used for Pile").fg(Color::Cyan),
Cell::new("Available").fg(Color::Cyan),
]);
let available_space_colour = if avail_bytes < 8 * 1024 * 1024 * 1024 {
Color::Red
} else if avail_bytes < 64 * 1024 * 1024 * 1024 {
Color::Yellow
} else {
Color::Green
};
table.add_row(vec![
Cell::new(format!(
"{:>9}",
theoretical_size.file_size(&format).unwrap()
))
.fg(Color::Blue),
Cell::new(format!("{:>9}", usable_bytes.file_size(&format).unwrap())).fg(Color::Blue),
Cell::new(format!("{:>9}", used_bytes.file_size(&format).unwrap())).fg(Color::Blue),
Cell::new(format!("{:>9}", usage_for_pile.file_size(&format).unwrap())).fg(Color::Blue),
Cell::new(format!("{:>9}", avail_bytes.file_size(&format).unwrap()))
.fg(available_space_colour),
]);
print!("{table}");
Ok(())
}

View File

@ -1,116 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use std::path::{Path, PathBuf};
// TODO how do we handle?:
// - (important) yama push of one pile to another
// - backup policy stuff like 'minimum backup frequency' ... show when it's not been done
// - backup policy stuff like 'minimum on two different disks, not powered at the same time...'
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct Descriptor {
/// Dataset labels
pub labels: Vec<String>,
/// Sources
pub sources: HashMap<String, SourceDescriptor>,
/// Paths to destination Yama Piles. Remote Piles need a local virtual pile to specify the layers.
pub piles: HashMap<String, DestPileDescriptor>,
pub remote_hosts: HashMap<String, RemoteHostDescriptor>,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub retention: Option<RetentionPolicyConfig>,
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct RemoteHostDescriptor {
pub user_at_host: String,
pub path_to_datman: Option<String>,
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct RetentionPolicyConfig {
pub daily: u32,
pub weekly: u32,
pub monthly: u32,
pub yearly: u32,
}
#[derive(Clone, Serialize, Deserialize, Debug)]
#[serde(untagged)]
pub enum SourceDescriptor {
DirectorySource {
hostname: String,
directory: PathBuf,
#[serde(default)]
cross_filesystems: bool,
},
VirtualSource {
/// The name of the helper program that will be used to do this backup.
helper: String,
/// The label that will be assigned to this source.
label: String,
/// The kind of virtual source (how it operates).
kind: VirtualSourceKind,
#[serde(flatten)]
extra_args: HashMap<String, toml::Value>,
},
}
impl SourceDescriptor {
/// Gets the hostname that this source descriptor is for, if possible.
pub fn get_remote_hostname(&self) -> Option<&str> {
match self {
SourceDescriptor::DirectorySource { hostname, .. } => Some(hostname.as_str()),
SourceDescriptor::VirtualSource { .. } => None,
}
}
}
#[derive(Clone, Serialize, Deserialize, Debug)]
#[serde(untagged)]
pub enum VirtualSourceKind {
Stdout {
#[serde(rename = "stdout")]
filename: String,
},
// TODO(feature) TempDir
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct DestPileDescriptor {
pub path: PathBuf,
pub included_labels: Vec<String>,
}
pub fn load_descriptor(path: &Path) -> anyhow::Result<Descriptor> {
let descriptor_file = path.join("datman.toml");
let mut buf = Vec::new();
File::open(descriptor_file)?.read_to_end(&mut buf)?;
Ok(toml::de::from_slice(&buf)?)
}

View File

@ -1,288 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use anyhow::anyhow;
use anyhow::Context;
use arc_interner::ArcIntern;
use byteorder::WriteBytesExt;
use glob::Pattern;
use log::warn;
use serde::{Deserialize, Serialize};
use crate::labelling::State::{Excluded, Labelled, Split};
use crate::tree::{FileTree, FileTree1};
pub fn load_labelling_rules(path: &Path, source_name: &str) -> anyhow::Result<LabellingRules> {
let rule_path = path.join("labelling").join(format!("{}.zst", source_name));
if rule_path.exists() {
let rule_file = File::open(&rule_path)?;
let rule_reader = zstd::stream::read::Decoder::new(rule_file)?;
let buf_reader = BufReader::new(rule_reader);
Ok(LabellingRules::load(buf_reader)?)
} else {
Ok(LabellingRules::default())
}
}
pub fn save_labelling_rules(
path: &Path,
source_name: &str,
rules: &LabellingRules,
) -> anyhow::Result<()> {
let rule_path = path.join("labelling").join(format!("{}.zst", source_name));
if rule_path.exists() {
let backup_rule_path = path.join("labelling").join(format!("{}.zst~", source_name));
std::fs::rename(&rule_path, &backup_rule_path)?;
}
let rule_file = File::create(rule_path)?;
let mut zstd_writer = zstd::stream::write::Encoder::new(rule_file, 18)?;
rules.save(&mut zstd_writer)?;
zstd_writer.finish()?; // MUST CALL finish here!
Ok(())
}
#[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialOrd, PartialEq, Hash)]
pub struct Label(pub ArcIntern<String>);
#[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialOrd, PartialEq)]
pub enum State {
Labelled(Label),
Split,
Excluded,
}
impl State {
pub fn should_inherit(&self) -> bool {
match self {
Labelled(_) => true,
Split => false,
Excluded => true,
}
}
}
#[derive(Clone, Debug)]
pub struct GlobRule {
pub pattern: String,
pub glob: Pattern,
pub outcome: State,
}
#[derive(Clone, Debug, Default)]
pub struct LabellingRules {
pub position_based_rules: HashMap<String, State>,
pub glob_based_rules: Vec<GlobRule>,
}
impl LabellingRules {
pub fn load<R: BufRead>(mut input: R) -> anyhow::Result<Self> {
let mut result = LabellingRules {
position_based_rules: Default::default(),
glob_based_rules: Default::default(),
};
let mut str = String::new();
loop {
str.clear();
let line_len = input.read_line(&mut str)?;
if line_len == 0 {
break;
}
if &str == "---\n" {
// start reading glob patterns now.
break;
}
let pieces: Vec<&str> = str.trim_end_matches('\n').split('\t').collect();
if pieces.len() == 2 {
match pieces[1] {
"?" => {
result
.position_based_rules
.insert(pieces[0].to_owned(), Split);
}
"!" => {
result
.position_based_rules
.insert(pieces[0].to_owned(), Excluded);
}
label_str => {
result.position_based_rules.insert(
pieces[0].to_owned(),
Labelled(Label(ArcIntern::new(label_str.to_owned()))),
);
}
}
} else {
warn!("not 2 pieces: {:?}", str);
}
}
loop {
str.clear();
let line_len = input.read_line(&mut str)?;
if line_len == 0 {
break;
}
let pieces: Vec<&str> = str.trim().split('\t').collect();
if pieces.len() == 2 {
let outcome = match pieces[1] {
"?" => Split,
"!" => Excluded,
label_str => Labelled(Label(ArcIntern::new(label_str.to_owned()))),
};
let pattern = pieces[0].to_owned();
let glob = Pattern::new(&pattern)
.with_context(|| anyhow!("Whilst compiling glob: {:?}", pattern))?;
result.glob_based_rules.push(GlobRule {
pattern,
glob,
outcome,
});
} else {
warn!("not 2 pieces: {:?}", str);
}
}
Ok(result)
}
pub fn save<W: Write>(&self, mut output: W) -> anyhow::Result<()> {
for (path, rule) in self.position_based_rules.iter() {
output.write_all(path.as_bytes())?;
output.write_u8('\t' as u8)?;
match rule {
Labelled(label) => {
output.write_all(label.0.as_bytes())?;
}
Split => {
output.write_u8('?' as u8)?;
}
Excluded => {
output.write_u8('!' as u8)?;
}
}
output.write_u8('\n' as u8)?;
}
output.write_all("---\n".as_bytes())?;
for glob_rule in self.glob_based_rules.iter() {
output.write_all(glob_rule.pattern.as_bytes())?;
output.write_u8('\t' as u8)?;
match &glob_rule.outcome {
Labelled(label) => {
output.write_all(label.0.as_bytes())?;
}
Split => {
output.write_u8('?' as u8)?;
}
Excluded => {
output.write_u8('!' as u8)?;
}
}
output.write_u8('\n' as u8)?;
}
output.flush()?;
Ok(())
}
pub fn apply(&self, path: &str) -> Option<State> {
if let Some(rule_state) = self.position_based_rules.get(path) {
return Some(rule_state.clone());
}
for glob_rule in self.glob_based_rules.iter() {
if glob_rule.glob.matches(path) {
return Some(glob_rule.outcome.clone());
}
}
None
}
pub fn get_exclusions_set(&self, base: &Path) -> BTreeSet<PathBuf> {
let mut exclusions = BTreeSet::new();
for (ext_path, state) in &self.position_based_rules {
assert!(ext_path.is_empty() || ext_path.starts_with('/'));
let full_path = PathBuf::from(format!(
"{}{ext_path}",
base.to_str().expect("base path must always be utf-8")
));
if state == &Excluded {
exclusions.insert(full_path);
}
}
exclusions
}
}
/// Uninteractively label the nodes.
pub fn label_node(
path: String,
current_state: Option<State>,
node: &mut FileTree1<Option<State>>,
labels: &Vec<Label>,
rules: &LabellingRules,
) -> anyhow::Result<()> {
let mut next_state = current_state;
if let Some(rule_state) = rules.apply(&path) {
next_state = Some(rule_state.clone());
} else if !next_state
.as_ref()
.map(|s| s.should_inherit())
.unwrap_or(false)
{
next_state = None;
}
match node {
FileTree::NormalFile { meta, .. } => {
*meta = next_state;
}
FileTree::Directory { meta, children, .. } => {
*meta = next_state.clone();
for (child_name, child) in children.iter_mut() {
let child_path = format!("{}/{}", path, child_name);
label_node(child_path, next_state.clone(), child, labels, rules)?;
}
}
FileTree::SymbolicLink { meta, .. } => {
*meta = next_state;
}
FileTree::Other(_) => {
panic!("Other() nodes shouldn't be present here.");
}
}
Ok(())
}
pub fn str_to_label<I: AsRef<str>>(input: I) -> Label {
Label(ArcIntern::new(input.as_ref().to_owned()))
}

View File

@ -1,12 +0,0 @@
pub mod commands;
pub mod descriptor;
pub mod labelling;
pub mod remote;
pub mod tree;
pub fn get_hostname() -> String {
hostname::get()
.expect("No hostname")
.into_string()
.expect("Hostname string must be sensible.")
}

View File

@ -1,2 +0,0 @@
pub mod backup_source_requester;
pub mod backup_source_responder;

View File

@ -1,304 +0,0 @@
use crate::commands::backup::{get_pointer_name_at, label_filter_and_convert};
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor};
use crate::labelling::load_labelling_rules;
use crate::tree::FileTree;
use anyhow::{anyhow, bail};
use chrono::Utc;
use log::info;
use std::collections::BTreeSet;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Child, Command, Stdio};
use std::sync::Arc;
use yama::commands::{load_pile_descriptor, open_pile};
use yama::definitions::{PartialPointerData, TreeNode};
use yama::operations::storing::{pointer_ops_prepare_to_store, pointers_ops_after_store};
use yama::pile::access_guard::PileGuard;
use yama::pile::{Pile, RawPile, StoragePipelineSettings};
use yama::progress::ProgressTracker;
use yama::remote::responder::{Responder, ResponderWritingPipeline};
use yama::remote::{read_message, write_message};
use yama::utils::get_number_of_workers;
// SECURITY WARNING: the system you connect to using this mechanism will receive full access to
// your Yama pile. Do NOT connect to untrusted or compromised systems using this mechanism (yet).
pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
info!("Introduction.");
let version = env!("CARGO_PKG_VERSION");
write_message(
write,
&format!("Datman v{} Backup Source Requester", version),
)?;
write.flush()?;
let foreign_side: String = read_message(read)?;
let expected_foreign_side = format!("Datman v{} Backup Source Responder", version);
if &foreign_side != &expected_foreign_side {
bail!(
"Datman version mismatch. Expected {:?}, got {:?}",
expected_foreign_side,
foreign_side
);
}
Ok(())
}
pub fn scanning<R: Read, W: Write>(
read: &mut R,
write: &mut W,
path: &Path,
one_filesystem: bool,
exclusions: &BTreeSet<PathBuf>,
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
info!("Scanning.");
write_message(write, &"scan")?;
write_message(write, &path)?;
write_message(write, &one_filesystem)?;
write_message(write, exclusions)?;
write.flush()?;
let scan_result: Option<FileTree<(), (), (), ()>> = read_message(read)?;
Ok(scan_result)
}
pub fn chunking<
R: Read + Send + 'static,
W: Write + Send + 'static,
RP: RawPile + 'static,
PT: ProgressTracker + Send + 'static,
>(
read: R,
mut write: W,
path: &Path,
tree_node: &TreeNode,
raw_pile: Arc<RP>,
progress_bar: PT,
use_writing_pipeline: bool,
) -> anyhow::Result<(R, W, PartialPointerData)> {
info!("Chunking.");
write_message(&mut write, &"chunk")?;
write_message(&mut write, &path)?;
write_message(&mut write, tree_node)?;
write.flush()?;
let (writing_pipeline, control_rx) = if use_writing_pipeline {
let sps = StoragePipelineSettings {
num_compressors: get_number_of_workers("YAMA_PL_COMPRESSORS") as u32,
compressor_input_bound: 32,
writer_input_bound: 32,
};
let (control_tx, control_rx) = crossbeam_channel::unbounded();
let pipeline = raw_pile.build_storage_pipeline(sps, control_tx)?;
(
Some(ResponderWritingPipeline {
pipeline_submission: pipeline,
}),
Some(control_rx),
)
} else {
(None, None)
};
let guarded_pile = PileGuard::new(Arc::clone(&raw_pile), true);
let (r_handle, w_handle, join_handles) = Responder::start(
read,
write,
get_number_of_workers("YAMA_RESPONDERS") as u16,
Arc::new(guarded_pile),
writing_pipeline,
progress_bar,
);
info!("Waiting for remote to finish chunking.");
for handle in join_handles {
handle.join().expect("Join handle should not fail");
}
let mut read = r_handle.join().unwrap();
let write = w_handle.join().unwrap();
if let Some(control_rx) = control_rx {
while let Ok(_) = control_rx.recv() {
// TODO nop
}
}
info!("Remote finished chunking.");
let pointer_data: PartialPointerData = read_message(&mut read)?;
Ok((read, write, pointer_data))
}
pub fn quit<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
write_message(write, &"exit")?;
write.flush()?;
let scan_result: String = read_message(read)?;
if scan_result.as_str() != "exit" {
bail!("Exit failed");
}
Ok(())
}
pub fn connect_to_remote(descriptor: &Descriptor, hostname: &str) -> anyhow::Result<Child> {
let remote_host_descriptor = descriptor
.remote_hosts
.get(hostname)
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
let connection = Command::new("ssh")
.arg(&remote_host_descriptor.user_at_host)
.arg("--")
.arg(
&remote_host_descriptor
.path_to_datman
.as_ref()
.map(|x| x.as_str())
.unwrap_or("datman"),
)
.arg("_backup_source_responder")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()?;
Ok(connection)
}
pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>(
source: &SourceDescriptor,
dest: &DestPileDescriptor,
descriptor: &Descriptor,
desc_path: &Path,
source_name: &str,
dest_name: &str,
_num_workers: u8,
progress_bar: PT,
) -> anyhow::Result<()> {
match source {
SourceDescriptor::DirectorySource {
hostname,
directory,
cross_filesystems,
} => {
let remote_host_descriptor = descriptor
.remote_hosts
.get(hostname)
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
info!(
"Looking to backup {} (from {}) to {}",
source_name, remote_host_descriptor.user_at_host, dest_name
);
let connection = connect_to_remote(descriptor, hostname)?;
let mut read = connection.stdout.expect("Requested stdout");
let mut write = connection.stdin.expect("Requested stdin");
// first start off with an introduction
info!("Connecting...");
introduction(&mut read, &mut write)?;
let rules = load_labelling_rules(desc_path, source_name)?;
let exclusions = rules.get_exclusions_set(directory);
// then request to scan
info!("Requesting scan... (this may take some time)");
let scan_result = scanning(
&mut read,
&mut write,
directory.as_ref(),
!*cross_filesystems,
&exclusions,
)?
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?;
let mut root =
label_filter_and_convert(scan_result, descriptor, source_name, &rules, dest)?
.ok_or_else(|| anyhow!("Empty filter..."))?;
let absolute_dest_path = desc_path.join(&dest.path);
let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
let pointer_name = get_pointer_name_at(&source_name, Utc::now());
if pile.read_pointer(pointer_name.as_str())?.is_some() {
bail!(
"Pointer by name {:?} already exists; refusing to overwrite.",
pointer_name
);
}
info!("Will write as pointer {:?}.", pointer_name);
info!("Searching for suitable parents.");
let mut parent: Option<String> = None;
let prefix = format!("{}+", source_name);
for pointer in pile.list_pointers()?.iter() {
if pointer.starts_with(&prefix) {
match parent.as_ref() {
None => {
parent = Some(pointer.to_owned());
}
Some(cur_parent) => {
if cur_parent < pointer {
parent = Some(pointer.to_owned());
}
}
}
}
}
match parent.as_ref() {
Some(parent) => {
info!("Using parent: {}", parent);
}
None => {
info!("No suitable parent found.");
}
}
info!("Storing remote using Yama (this may take some time)...");
let raw_pile = Arc::new(pile.raw_pile);
let pile = Pile::new(raw_pile.clone());
pointer_ops_prepare_to_store(&pile, &mut root, &parent)?;
info!(
"Have pointer_name = {:?}, parent = {:?}",
pointer_name, parent
);
let (mut read, mut write, pointer_data) = chunking(
read,
write,
directory.as_ref(),
&root,
raw_pile,
progress_bar,
true,
)?;
quit(&mut read, &mut write)?;
pointers_ops_after_store(&pile, &pointer_name, &pointer_data.complete(parent))?;
pile.flush()?;
info!("Stored! Checking for existence...");
if pile.list_pointers()?.contains(&pointer_name) {
info!("Exists!");
} else {
bail!("Pointer {:?} does not exist...", &pointer_name);
}
}
SourceDescriptor::VirtualSource { .. } => {
unimplemented!("Can't currently back up virtualsources on remotes...")
}
}
Ok(())
}

View File

@ -1,187 +0,0 @@
// This file implements the responder side of the backup source protocol -- the protocol used
// to connect to remote backup sources.
use std::collections::BTreeSet;
use std::io::{stdin, stdout, Read, Write};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use anyhow::bail;
use crossbeam_channel::Sender;
use log::info;
use yama::definitions::{PartialPointerData, TreeNode};
use yama::pile::{Pile, RawPile};
use yama::progress::ProgressTracker;
use yama::remote::requester::Requester;
use yama::remote::{read_message, write_message, RequestBody, ResponseBody};
use yama::utils::get_number_of_workers;
use crate::tree::scan;
pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
let version = env!("CARGO_PKG_VERSION");
write_message(
write,
&format!("Datman v{} Backup Source Responder", version),
)?;
write.flush()?;
let foreign_side: String = read_message(read)?;
let expected_foreign_side = format!("Datman v{} Backup Source Requester", version);
if &foreign_side != &expected_foreign_side {
bail!(
"Datman version mismatch. Expected {:?}, got {:?}",
expected_foreign_side,
foreign_side
);
}
Ok(())
}
pub fn scanning<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
let path: PathBuf = read_message(read)?;
let one_filesystem: bool = read_message(read)?;
let exclusions: BTreeSet<PathBuf> = read_message(read)?;
let scan_result = scan(&path, one_filesystem, &exclusions)?;
write_message(write, &scan_result)?;
write.flush()?;
Ok(())
}
pub struct ProgressSender {
pub last_sent: Instant,
pub current_progress: u64,
pub current_max: u64,
// TODO actually propagate this
pub current_message: String,
pub sender: Sender<(RequestBody, Option<Sender<ResponseBody>>)>,
}
impl ProgressSender {
pub fn send_now(&mut self, _include_message: bool) {
self.sender
.send((
RequestBody::Progress {
current: self.current_progress,
max: self.current_max,
},
None,
))
.expect("Progress sender failed");
self.last_sent = Instant::now();
}
pub fn send_if_overdue(&mut self) {
if Instant::now().duration_since(self.last_sent).as_millis() >= 1024 {
self.send_now(false);
}
}
}
impl ProgressTracker for ProgressSender {
fn inc_progress(&mut self, delta_progress: u64) {
self.current_progress += delta_progress;
self.send_if_overdue();
}
fn set_current(&mut self, current_progress: u64) {
self.current_progress = current_progress;
self.send_if_overdue();
}
fn set_max_size(&mut self, max_size: u64) {
self.current_max = max_size;
self.send_if_overdue();
}
}
// TODO use io-streams crate and get rid of the duplication!!
pub fn chunking_stdio() -> anyhow::Result<PartialPointerData> {
let (path, tree_node) = {
let stdin = stdin();
let mut read = stdin.lock();
let path: PathBuf = read_message(&mut read)?;
let tree_node: TreeNode = read_message(&mut read)?;
(path, tree_node)
};
let (pointer_data, requester_join_handles) = {
let (yama_requester, requester_join_handles) = Requester::new_from_stdio();
let command_sender = yama_requester.clone_command_sender();
info!("progress sender in use");
let mut progress_bar = ProgressSender {
last_sent: Instant::now(),
current_progress: 0,
current_max: 0,
current_message: "".to_string(),
sender: command_sender,
};
let raw_pile: Box<dyn RawPile> = Box::new(yama_requester);
let pile = Pile::new(raw_pile);
let pointer_data = yama::operations::storing::store_without_pointer_ops(
&Arc::new(pile),
&path,
tree_node,
get_number_of_workers("YAMA_CHUNKERS"),
&mut progress_bar,
)?;
(pointer_data, requester_join_handles)
};
info!("Waiting to join.");
for join_handle in requester_join_handles {
join_handle.join().expect("Expected to join handle");
}
info!("Chunking completed.");
Ok(pointer_data)
}
pub fn handler_stdio() -> anyhow::Result<()> {
let stdin = stdin();
let stdout = stdout();
let mut read = stdin.lock();
let mut write = stdout.lock();
info!("Introduction.");
introduction(&mut read, &mut write)?;
loop {
let command: String = read_message(&mut read)?;
match command.as_str() {
"scan" => {
info!("Scanning.");
scanning(&mut read, &mut write)?;
}
"chunk" => {
info!("Chunking.");
drop(read);
drop(write);
let pointer_data = chunking_stdio()?;
read = stdin.lock();
write = stdout.lock();
write_message(&mut write, &pointer_data)?;
write.flush()?;
}
"exit" => {
write_message(&mut write, &"exit")?;
write.flush()?;
break;
}
othercommand => {
bail!("Don't understand {:?}", othercommand);
}
}
}
Ok(())
}

View File

@ -1,359 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Debug;
use std::fs::{read_link, symlink_metadata, DirEntry, Metadata};
use std::io::ErrorKind;
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use anyhow::anyhow;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use log::{debug, info, warn};
use serde::{Deserialize, Serialize};
pub use yama::definitions::FilesystemOwnership;
pub use yama::definitions::FilesystemPermissions;
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub enum FileTree<NMeta, DMeta, SMeta, Other>
where
NMeta: Debug + Clone + Eq + PartialEq,
DMeta: Debug + Clone + Eq + PartialEq,
SMeta: Debug + Clone + Eq + PartialEq,
Other: Debug + Clone + Eq + PartialEq,
{
NormalFile {
/// modification time in ms
mtime: u64,
ownership: FilesystemOwnership,
permissions: FilesystemPermissions,
meta: NMeta,
},
Directory {
ownership: FilesystemOwnership,
permissions: FilesystemPermissions,
children: BTreeMap<String, FileTree<NMeta, DMeta, SMeta, Other>>,
meta: DMeta,
},
SymbolicLink {
ownership: FilesystemOwnership,
target: String,
meta: SMeta,
},
Other(Other),
}
pub type FileTree1<A> = FileTree<A, A, A, ()>;
impl<NMeta, DMeta, SMeta, Other> FileTree<NMeta, DMeta, SMeta, Other>
where
NMeta: Debug + Clone + Eq + PartialEq,
DMeta: Debug + Clone + Eq + PartialEq,
SMeta: Debug + Clone + Eq + PartialEq,
Other: Debug + Clone + Eq + PartialEq,
{
pub fn is_dir(&self) -> bool {
match self {
FileTree::NormalFile { .. } => false,
FileTree::Directory { .. } => true,
FileTree::SymbolicLink { .. } => false,
FileTree::Other(_) => false,
}
}
pub fn is_symlink(&self) -> bool {
match self {
FileTree::NormalFile { .. } => false,
FileTree::Directory { .. } => false,
FileTree::SymbolicLink { .. } => true,
FileTree::Other(_) => false,
}
}
pub fn get_by_path(&self, path: &String) -> Option<&FileTree<NMeta, DMeta, SMeta, Other>> {
let mut node = self;
for piece in path.split('/') {
if piece.is_empty() {
continue;
}
match node {
FileTree::Directory { children, .. } => match children.get(piece) {
None => {
return None;
}
Some(new_node) => {
node = new_node;
}
},
_ => {
return None;
}
}
}
Some(node)
}
pub fn replace_meta<Replacement: Clone + Debug + Eq + PartialEq>(
&self,
replacement: &Replacement,
) -> FileTree<Replacement, Replacement, Replacement, Other> {
match self {
FileTree::NormalFile {
mtime,
ownership,
permissions,
..
} => FileTree::NormalFile {
mtime: *mtime,
ownership: *ownership,
permissions: *permissions,
meta: replacement.clone(),
},
FileTree::Directory {
ownership,
permissions,
children,
..
} => {
let children = children
.iter()
.map(|(str, ft)| (str.clone(), ft.replace_meta(replacement)))
.collect();
FileTree::Directory {
ownership: ownership.clone(),
permissions: permissions.clone(),
children,
meta: replacement.clone(),
}
}
FileTree::SymbolicLink {
ownership, target, ..
} => FileTree::SymbolicLink {
ownership: ownership.clone(),
target: target.clone(),
meta: replacement.clone(),
},
FileTree::Other(other) => FileTree::Other(other.clone()),
}
}
/// Filters the tree in-place by removing nodes that do not satisfy the predicate.
/// 'Inclusive' in the sense that if a directory does not satisfy the predicate but one of its
/// descendants does, then the directory will be included anyway.
/// (So nodes that satisfy the predicate will never be excluded because of a parent not doing so.)
///
/// Returns true if this node should be included, and false if it should not be.
pub fn filter_inclusive<F>(&mut self, predicate: &mut F) -> bool
where
F: FnMut(&Self) -> bool,
{
match self {
FileTree::Directory { children, .. } => {
let mut to_remove = Vec::new();
for (name, child) in children.iter_mut() {
if !child.filter_inclusive(predicate) {
to_remove.push(name.clone());
}
}
for name in to_remove {
children.remove(&name);
}
!children.is_empty() || predicate(&self)
}
_ => predicate(&self),
}
}
}
impl<X: Debug + Clone + Eq, YAny: Debug + Clone + Eq> FileTree<X, X, X, YAny> {
pub fn get_metadata(&self) -> Option<&X> {
match self {
FileTree::NormalFile { meta, .. } => Some(meta),
FileTree::Directory { meta, .. } => Some(meta),
FileTree::SymbolicLink { meta, .. } => Some(meta),
FileTree::Other(_) => None,
}
}
pub fn set_metadata(&mut self, new_meta: X) {
match self {
FileTree::NormalFile { meta, .. } => {
*meta = new_meta;
}
FileTree::Directory { meta, .. } => {
*meta = new_meta;
}
FileTree::SymbolicLink { meta, .. } => {
*meta = new_meta;
}
FileTree::Other(_) => {
// nop
}
}
}
}
/// Given a file's metadata, returns the mtime in milliseconds.
pub fn mtime_msec(metadata: &Metadata) -> u64 {
(metadata.mtime() * 1000 + metadata.mtime_nsec() / 1_000_000) as u64
}
/// Scan the filesystem to produce a Tree, using a default progress bar.
pub fn scan(
path: &Path,
one_filesystem: bool,
exclusions: &BTreeSet<PathBuf>,
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(2));
pbar.set_style(ProgressStyle::default_spinner().template("{spinner} {pos:7} {msg}"));
pbar.set_message("dir scan");
let one_filesystem = if one_filesystem { Some(None) } else { None };
let result = scan_with_progress_bar(path, &pbar, one_filesystem, exclusions);
pbar.finish_at_current_pos();
result
}
/// Scan the filesystem to produce a Tree, using the specified progress bar.
pub fn scan_with_progress_bar(
path: &Path,
progress_bar: &ProgressBar,
mut one_filesystem: Option<Option<u64>>,
exclusions: &BTreeSet<PathBuf>,
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
if exclusions.contains(path) {
// Don't enter excluded paths.
debug!("Not descending into excluded path: {:?}", path);
return Ok(None);
}
let metadata_res = symlink_metadata(path);
progress_bar.inc(1);
if let Err(e) = &metadata_res {
match e.kind() {
ErrorKind::NotFound => {
warn!("vanished: {:?}", path);
return Ok(None);
}
ErrorKind::PermissionDenied => {
warn!("permission denied: {:?}", path);
return Ok(None);
}
_ => { /* nop */ }
}
}
let metadata = metadata_res?;
let filetype = metadata.file_type();
if let Some(one_filesystem) = one_filesystem.as_mut() {
let this_fs = metadata.dev();
if *one_filesystem.get_or_insert(this_fs) != this_fs {
info!("Stopping at filesystem boundary: {:?}", path);
return Ok(None);
}
}
/*let name = path
.file_name()
.ok_or(anyhow!("No filename, wat"))?
.to_str()
.ok_or(anyhow!("Filename can't be to_str()d"))?
.to_owned();*/
let ownership = FilesystemOwnership {
uid: metadata.uid() as u16,
gid: metadata.gid() as u16,
};
let permissions = FilesystemPermissions {
mode: metadata.mode(),
};
if filetype.is_file() {
// Leave an unpopulated file node. It's not my responsibility to chunk it right now.
Ok(Some(FileTree::NormalFile {
mtime: mtime_msec(&metadata),
ownership,
permissions,
meta: (),
}))
} else if filetype.is_dir() {
let mut children = BTreeMap::new();
progress_bar.set_message(&format!("{:?}", path));
let dir_read = path.read_dir();
if let Err(e) = &dir_read {
match e.kind() {
ErrorKind::NotFound => {
warn!("vanished/: {:?}", path);
return Ok(None);
}
ErrorKind::PermissionDenied => {
warn!("permission denied/: {:?}", path);
return Ok(None);
}
_ => { /* nop */ }
}
}
for entry in dir_read? {
let entry: DirEntry = entry?;
if entry.file_name() == ".datmanskip" {
// Directories with .datmanskip in them are to be skipped entirely.
// TODO(perf): should this be checked upfront before some children may already
// have been scanned?
debug!("Skipping {path:?} because it has a .datmanskip file.");
return Ok(None);
}
let scanned =
scan_with_progress_bar(&entry.path(), progress_bar, one_filesystem, exclusions)?;
if let Some(scanned) = scanned {
if let Ok(filename) = entry.file_name().into_string() {
children.insert(filename, scanned);
} else {
warn!("Non-UTF-8 filename; ignoring: {:?}", entry.file_name())
}
}
}
Ok(Some(FileTree::Directory {
ownership,
permissions,
children,
meta: (),
}))
} else if filetype.is_symlink() {
let target = read_link(path)?
.to_str()
.ok_or(anyhow!("target path cannot be to_str()d"))?
.to_owned();
Ok(Some(FileTree::SymbolicLink {
ownership,
target,
meta: (),
}))
} else {
Ok(None)
}
}

View File

@ -1,44 +0,0 @@
[package]
name = "yama"
version = "0.7.0-alpha.1"
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
edition = "2018"
description = "Deduplicated, compressed and encrypted content pile manager"
repository = "https://bics.ga/reivilibre/yama"
license = "GPL-3.0-or-later"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
fastcdc = "1.0.6"
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
clap = { version = "3.1.18", features = ["derive"] }
blake = "2.0.2"
twox-hash = "1.5.0"
serde = { version = "1.0.104", features = ["derive"] }
serde_bare = "0.3.0"
users = "0.9.1"
crossbeam-channel = "0.5.1"
crossbeam-utils = "0.8.5"
toml = "0.5.5"
glob = "0.3.0"
nix = "0.17.0"
log = "0.4"
env_logger = "0.7.1"
indicatif = "0.14.0"
num_cpus = "1"
anyhow = "1.0"
thiserror = "1.0"
byteorder = "1"
itertools = "0.9.0"
rayon = "1.5.0"
chrono = "0.4.19"
rustyline = "7.1.0"
derivative = "2.2.0"
metrics = "0.17.1"
[dev-dependencies]
temp-dir = "0.1.11"

View File

@ -1,25 +0,0 @@
# 山 (yama): deduplicated heap repository
Yama is a system for storing files and directory trees in 'piles'. The data stored is deduplicated (by using content-defined chunking) and can be compressed and encrypted, too.
NOT YET ~~Yama also permits storing to piles on remote computers, using SSH.~~
Yama is intended for use as a storage mechanism for backups. Datman is a tool to make it easier to use Yama for backups.
The documentation is currently the best source of information about Yama, see the `docs` directory.
Yama can be used as a library for your own programs; further information about this is yet to be provided but the API documentation (Rustdocs) may be useful.
## Other, unpolished, notes
### Training a Zstd Dictionary
`zstd --train FILEs -o zstd.dict`
* Candidate size: `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -printf "%s\n" | jq -s 'add'`
* Want to sample:
* `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -exec cp {} -t /tmp/d/ \;`
* `du -sh`
* `find > file.list`
* `wc -l < file.list` → gives a № lines
* `shuf -n 4242 file.list | xargs -x zstd --train -o zstd.dict` for 4242 files. Chokes if it receives a filename with a space, just re-run until you get a working set.

View File

@ -1,311 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::path::{Path, PathBuf};
use anyhow::{bail, Context};
use log::info;
use clap::Parser;
use env_logger::Env;
use std::sync::Arc;
use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
use yama::debug::{debug_command, DebugCommand};
use yama::operations::checking::VacuumMode;
use yama::operations::legacy_pushpull::{
determine_bypass_level, open_pile_with_work_bypass, push_to,
};
use yama::operations::{checking, cleanup, extracting};
use yama::pile::local_sqlitebloblogs::CompactionThresholds;
use yama::pile::{Pile, PileDescriptor, RawPile};
use yama::{commands, debug};
#[derive(Parser)]
#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))]
struct Opts {
/// Chooses a different pile to be the working pile.
/// If specified, must be the name of a remote in yama.toml.
// TODO OBS? #[clap(short, long)]
// with: Option<String>,
#[clap(subcommand)]
command: PileCommand,
}
#[derive(Parser)]
enum PileCommand {
/// Initialise a yama pile in this directory.
Init {},
/// Retrieve a pointer from the yama pile, using a named pointer name.
Retrieve {
/// Name of the pointer to retrieve.
pointer_name: String,
/// Limited expression(s) of files to retrieve.
/// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE.
#[clap(short, long)]
subset: Option<String>,
destination: PathBuf,
/// Number of extraction workers to use. Ideal value varies, but probably not much more than
/// the number of CPU threads.
#[clap(long)]
num_workers: Option<u8>,
},
/// Check this yama pile for corruption.
Check {
#[clap(long)]
apply_gc: bool,
#[clap(long)]
dry_run_gc: bool,
#[clap(long)]
deep: bool,
#[clap(long)]
shallow: bool,
},
Compact {
/// Don't actually perform any compaction; just plan it out.
#[clap(long)]
dry_run: bool,
/// Allocated size under which a bloblog is considered small.
#[clap(long = "small")]
small_thresh: Option<u64>,
/// Minimum amount of space to reclaim in order to run compaction for reclaim.
#[clap(long = "reclaim")]
min_reclaim: Option<u64>,
/// Maximum amount of space that can be deallocated in a bloblog before we consider it
/// worthwhile to replace.
#[clap(long = "max-dealloc")]
max_deallocated: Option<u64>,
/// Minimum number of mergeable small bloblogs in order to run compaction for merge.
#[clap(long)]
mergeable: Option<u32>,
},
/// Enter a debug prompt for manually operating on the yama pile.
Debug { supplied_command: Vec<String> },
/// Pushes a pointer from this pile to another pile.
Push {
/// The name of the pointer to push.
pointer_name: String,
/// The path to the other pile to push the pointer to.
other_pile_path: PathBuf,
},
}
fn main() -> anyhow::Result<()> {
std::process::exit(wrapped_main()?);
}
/// Wrapped main, returning the exit code.
/// DO NOT USE exit() in this function, because it can interfere with destructors.
/// (Destructors are needed to ensure some piles are flushed, for example.)
fn wrapped_main() -> anyhow::Result<i32> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let opts: Opts = Opts::parse();
let open_pile = || -> anyhow::Result<(PileDescriptor, Pile<Box<dyn RawPile>>)> {
let this_dir = Path::new(".");
let descriptor =
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
let pile = open_pile(this_dir, &descriptor).context("Failed to open pile")?;
Ok((descriptor, pile))
};
match &opts.command {
PileCommand::Retrieve {
pointer_name,
subset,
destination,
num_workers: workers,
} => {
let (_pdesc, pile) = open_pile()?;
let mut pointer = pile
.read_pointer(pointer_name)?
.expect("No pointer by that name!"); // todo be nice
if destination.exists() {
bail!("The destination already exists. Overwriting not allowed (yet).");
}
let mut root_tree_node =
commands::retrieve_tree_node(&pile, pointer.chunk_ref.clone())?;
fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
let mut node_to_extract = &mut root_tree_node.node;
if let Some(subset) = subset {
for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) {
match node_to_extract.child(path_to_descend) {
Ok(new_node) => {
node_to_extract = new_node;
}
Err(msg) => {
bail!("Can't descend into {path_to_descend:?}: {msg}");
}
}
}
}
// todo allow disabling apply metadata
extracting::extract(
destination,
node_to_extract,
&pile,
true,
workers.unwrap_or(2),
true,
true,
true,
)?;
}
PileCommand::Check {
apply_gc,
dry_run_gc,
deep,
shallow,
} => {
let vacuum_mode = if *dry_run_gc {
VacuumMode::DryRunVacuum
} else if *apply_gc {
VacuumMode::Vacuum
} else {
VacuumMode::NoVacuum
};
let (_pdesc, pile) = open_pile()?;
let error_count = if *deep {
checking::check_deep(pile, vacuum_mode, true)?
} else if *shallow {
checking::check_shallow(pile, vacuum_mode, true, true)?
} else {
bail!("You need to specify either --shallow or --deep.");
};
if error_count > 0 {
eprintln!("THERE ARE {} ERRORS.", error_count);
return Ok(1);
}
}
PileCommand::Compact {
dry_run,
small_thresh,
min_reclaim,
max_deallocated,
mergeable,
} => {
let this_dir = Path::new(".");
let descriptor =
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
cleanup::compact(
this_dir,
&descriptor,
!*dry_run,
true,
CompactionThresholds {
minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024),
minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64),
cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024),
cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024),
},
)?;
}
PileCommand::Init {} => {
commands::init(".".as_ref())?;
}
PileCommand::Debug { supplied_command } => {
let (pdesc, pile) = open_pile()?;
if supplied_command.is_empty() {
debug::debug_prompt(pdesc, pile)?;
} else {
let mut prefixed_command = vec![String::from("yama-debug")];
prefixed_command.extend(supplied_command.iter().cloned());
match DebugCommand::try_parse_from(prefixed_command) {
Ok(command) => {
if let Err(e) = debug_command(&pdesc, &pile, command) {
eprintln!("Failed {:?}", e);
pile.flush()?;
return Ok(2);
} else {
pile.flush()?;
return Ok(0);
}
}
Err(err) => {
eprintln!("Invalid {:?}", err);
return Ok(3);
}
}
}
}
PileCommand::Push {
pointer_name,
other_pile_path,
} => {
let this_pile_path = PathBuf::from(".");
let descriptor_this = load_pile_descriptor(".".as_ref())
.context("Failed to load this pile descriptor")?;
let descriptor_other = load_pile_descriptor(other_pile_path)
.context("Failed to load foreign pile descriptor.")?;
let bypass_level = determine_bypass_level(
&descriptor_this,
&this_pile_path,
&descriptor_other,
&other_pile_path,
)?;
info!("Using bypass level: {:?}", bypass_level);
let (this_pile, this_rp_bypass) =
open_pile_with_work_bypass(&this_pile_path, &descriptor_this, bypass_level)?;
let (other_pile, other_rp_bypass) =
open_pile_with_work_bypass(&other_pile_path, &descriptor_other, bypass_level)?;
// TODO flush the pile after here yet
push_to(
Arc::new(this_pile),
this_rp_bypass,
Arc::new(other_pile),
other_rp_bypass,
vec![pointer_name.clone()],
true,
32,
)?;
}
}
Ok(0)
}

View File

@ -1,241 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::cmp::min;
use std::io;
use std::io::{Cursor, Read, Write};
use anyhow::anyhow;
use fastcdc::FastCDC;
use crate::definitions::{ChunkId, RecursiveChunkRef};
use crate::pile::{Pile, RawPile};
use crate::utils::bytes_to_hexstring;
pub const SENSIBLE_THRESHOLD: usize = 1024 * 1024;
// 256 kiB
pub const FASTCDC_MIN: usize = 256 * 1024;
// 1 MiB
pub const FASTCDC_AVG: usize = 1024 * 1024;
// 8 MiB
pub const FASTCDC_MAX: usize = 8 * 1024 * 1024;
pub trait ChunkSubmissionTarget: Sync {
fn submit(&self, chunk_id: ChunkId, chunk_data: &[u8]) -> anyhow::Result<()>;
}
impl<RP: RawPile> ChunkSubmissionTarget for Pile<RP> {
fn submit(&self, chunk_id: ChunkId, chunk_data: &[u8]) -> anyhow::Result<()> {
self.submit_chunk(chunk_id, chunk_data)
}
}
impl ChunkSubmissionTarget for crossbeam_channel::Sender<(ChunkId, Vec<u8>)> {
fn submit(&self, chunk_id: ChunkId, chunk_data: &[u8]) -> anyhow::Result<()> {
self.send((chunk_id, chunk_data.to_vec()))
.map_err(|_| anyhow::anyhow!("Failed to send to pipeline."))
}
}
/// A chunker that will generate nested chunks of chunk references if there is that much data
/// to store.
/// The root RecursiveChunker is fed data bytes.
/// If it exceeds the nominated threshold, it grows a child RecursiveChunker (which may do the same).
/// When done, `finish` should be called to flush the buffers and obtain a `RecursiveChunkRef`.
pub struct RecursiveChunker<'cst, CST: ChunkSubmissionTarget> {
/// The pile to submit chunks to.
target: &'cst CST,
/// Buffer of data at this level.
buffer: Vec<u8>,
/// The next-layer recursive chunker. This is where this chunker will submit chunk IDs to for
/// recursive chunking.
next_layer: Option<Box<RecursiveChunker<'cst, CST>>>,
/// The size at which this chunker will perform recursive chunking.
threshold: usize,
}
impl<'cst, CST: ChunkSubmissionTarget> RecursiveChunker<'cst, CST> {
pub fn new(threshold: usize, target: &'cst CST) -> Self {
RecursiveChunker {
target,
buffer: vec![],
next_layer: None,
threshold,
}
}
/// finalise: true iff this is the last chunk (we will not reject a chunk which may have been
/// truncated)
fn do_chunking(&mut self, finalise: bool) -> anyhow::Result<Vec<u8>> {
let fastcdc = FastCDC::new(&self.buffer, FASTCDC_MIN, FASTCDC_AVG, FASTCDC_MAX);
let mut new_chunks: Vec<u8> = Vec::new();
let mut consumed_until: Option<usize> = None;
for chunk in fastcdc {
let is_final = chunk.offset + chunk.length == self.buffer.len();
if !is_final || finalise {
consumed_until = Some(chunk.offset + chunk.length);
let chunk_data = &self.buffer[chunk.offset..chunk.offset + chunk.length];
let chunk_id = calculate_chunkid(chunk_data);
new_chunks.extend_from_slice(&chunk_id);
self.target.submit(chunk_id, chunk_data)?;
}
}
if let Some(consumed_until) = consumed_until {
if consumed_until > 0 {
self.buffer.drain(0..consumed_until);
}
}
Ok(new_chunks)
}
pub fn finish(mut self) -> anyhow::Result<RecursiveChunkRef> {
if self.next_layer.is_some() {
// we are chunking so make this the last chunk
let new_chunks = self.do_chunking(true)?;
let mut subchunker = self.next_layer.unwrap();
subchunker.write(&new_chunks)?;
let mut rcr = subchunker.finish()?;
// as there is a level of chunking, increase the depth
rcr.depth += 1;
Ok(rcr)
} else {
// no chunking, so depth=0 (raw) and just emit our unchunked data
let chunk_id = calculate_chunkid(&self.buffer);
self.target.submit(chunk_id, &self.buffer)?;
Ok(RecursiveChunkRef { chunk_id, depth: 0 })
}
}
fn write_impl(&mut self, buf: &[u8]) -> anyhow::Result<usize> {
self.buffer.extend_from_slice(buf);
if self.buffer.len() > self.threshold {
if self.next_layer.is_none() {
// start chunking
self.next_layer = Some(Box::new(RecursiveChunker::new(
self.threshold,
self.target.clone(),
)));
}
let new_chunks = self.do_chunking(false)?;
self.next_layer.as_mut().unwrap().write(&new_chunks)?;
}
Ok(buf.len())
}
}
impl<'cst, CST: ChunkSubmissionTarget> Write for RecursiveChunker<'cst, CST> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match self.write_impl(buf) {
Err(err) => Err(io::Error::new(io::ErrorKind::Other, err)),
Ok(written) => Ok(written),
}
}
fn flush(&mut self) -> io::Result<()> {
// nop is probably the most correct action here...
Ok(())
}
}
#[inline]
pub fn calculate_chunkid(chunk: &[u8]) -> ChunkId {
// TODO(newver) Allow pluggable chunkID calculations so that encrypted storage can work without
// leaking contents.
let mut chunk_id: ChunkId = Default::default();
blake::hash(256, &chunk, &mut chunk_id).expect("BLAKE problem");
chunk_id
}
pub struct RecursiveUnchunker<'pile, RP: RawPile> {
sub_reader: Box<dyn Read + 'pile>,
buffer: Vec<u8>,
pile: &'pile Pile<RP>,
}
impl<'pile, RP: RawPile> RecursiveUnchunker<'pile, RP> {
/// Create a new recursive unchunker.
/// This will automatically create sub-unchunkers as needed.
pub fn new(pile: &'pile Pile<RP>, reference: RecursiveChunkRef) -> Self {
if reference.depth == 0 {
// this unchunker only needs to unchunk the one chunk
RecursiveUnchunker {
sub_reader: Box::new(Cursor::new(reference.chunk_id.to_vec())),
buffer: vec![],
pile: &pile,
}
} else {
let next_ref = RecursiveChunkRef {
depth: reference.depth - 1,
..reference
};
let sub_unchunker = RecursiveUnchunker::new(pile, next_ref);
RecursiveUnchunker {
sub_reader: Box::new(sub_unchunker),
buffer: vec![],
pile: &pile,
}
}
}
}
impl<'pile, RP: RawPile> Read for RecursiveUnchunker<'pile, RP> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
while self.buffer.is_empty() {
// Internal buffer is empty, so we need to load another chunk.
// (we use 'while' in case we load an empty chunk...)
let mut next_chunk_id: ChunkId = Default::default();
let read_bytes = self.sub_reader.read(&mut next_chunk_id)?;
if read_bytes == 0 {
// end of chunks, because return of zero here means EOF
return Ok(0);
}
if read_bytes < next_chunk_id.len() {
// any error, including EOF at this point, is an error
self.sub_reader
.read_exact(&mut next_chunk_id[read_bytes..])?;
}
let chunk = self
.pile
.read_chunk(&next_chunk_id)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
if let Some(chunk) = chunk {
self.buffer.extend_from_slice(&chunk);
} else {
return Err(io::Error::new(
io::ErrorKind::NotFound,
anyhow!("Chunk {} not found", bytes_to_hexstring(&next_chunk_id)),
));
}
}
let to_read = min(self.buffer.len(), buf.len());
buf[0..to_read].copy_from_slice(&self.buffer[0..to_read]);
self.buffer.drain(0..to_read);
Ok(to_read)
}
}

View File

@ -1,183 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::fs::File;
use std::io;
use std::io::{Read, Write};
use std::path::Path;
use std::sync::Arc;
use anyhow::{anyhow, bail, Context};
use log::warn;
use crate::chunking::{RecursiveChunker, RecursiveUnchunker, SENSIBLE_THRESHOLD};
use crate::definitions::{PointerData, RecursiveChunkRef, RootTreeNode, TreeNode};
use crate::pile::compression::{CompressionSettings, RawPileCompressor};
use crate::pile::integrity::RawPileIntegrityChecker;
use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
use crate::pile::{Pile, PileDescriptor, PileStorage, RawPile};
use crate::tree::{integrate_node_in_place, merge_uid_or_gid_tables};
use crate::utils::get_number_of_workers;
pub fn init(dir: &Path) -> anyhow::Result<()> {
let yama_toml = dir.join("yama.toml");
if yama_toml.exists() {
bail!("yama.toml already exists. Cannot create yama pile here.");
}
/*
let pile_db = sled::open(dir.join("pile.sled"))?;
pile_db.flush()?;
*/
let mut file = File::create(yama_toml)?;
let desc = PileDescriptor {
yama_version: env!("CARGO_PKG_VERSION").to_owned(),
storage: PileStorage::SqliteIndexedBloblog,
compression: Some(12),
};
file.write_all(&toml::to_vec(&desc)?)?;
Ok(())
}
pub fn load_pile_descriptor(dir: &Path) -> anyhow::Result<PileDescriptor> {
let yama_toml = dir.join("yama.toml");
if !yama_toml.exists() {
bail!("yama.toml does not exist here. Is this an existing pile?");
}
let mut file = File::open(yama_toml)?;
let mut buf = Vec::new();
file.read_to_end(&mut buf)?;
Ok(toml::from_slice(&buf)?)
}
pub fn open_pile(dir: &Path, desc: &PileDescriptor) -> anyhow::Result<Pile<Box<dyn RawPile>>> {
let num_compressors = get_number_of_workers("YAMA_COMPRESSORS");
let num_decompressors = get_number_of_workers("YAMA_DECOMPRESSORS");
match desc.storage {
PileStorage::RemoteOnly => {
bail!("This is a remote-only pile. No local storage allowed.");
}
PileStorage::SqliteIndexedBloblog => {
let blob_raw_pile = RawPileIntegrityChecker::new(SqliteBloblogPile::open(dir)?);
let raw_pile: Box<dyn RawPile> = match desc.compression {
None => Box::new(blob_raw_pile),
Some(comp_level) => {
let mut dictionary = Vec::new();
let dict_path = dir.join("important_zstd.dict");
File::open(dict_path)
.context("You need important_zstd.dict in your pile folder.")?
.read_to_end(&mut dictionary)?;
let (compressed_pile, _handles) = RawPileCompressor::new(
blob_raw_pile,
CompressionSettings {
dictionary: Arc::new(dictionary),
level: comp_level as i32,
num_compressors: num_compressors as u32,
num_decompressors: num_decompressors as u32,
},
)?;
Box::new(compressed_pile)
}
};
Ok(Pile::new(raw_pile))
}
}
}
pub fn store_tree_node<RP: RawPile>(
pile: &Pile<RP>,
root_tree_node: &RootTreeNode,
) -> anyhow::Result<RecursiveChunkRef> {
let serialised = serde_bare::to_vec(root_tree_node)?;
let mut chunker = RecursiveChunker::new(SENSIBLE_THRESHOLD, pile);
io::copy(&mut (&serialised[..]), &mut chunker)?;
let chunk_ref = chunker.finish()?;
Ok(chunk_ref)
}
pub fn retrieve_tree_node<RP: RawPile>(
pile: &Pile<RP>,
chunk_ref: RecursiveChunkRef,
) -> anyhow::Result<RootTreeNode> {
let mut serialised = Vec::new();
let mut unchunker = RecursiveUnchunker::new(pile, chunk_ref);
io::copy(&mut unchunker, &mut serialised)?;
Ok(serde_bare::from_slice(&serialised)?)
/*
let unchunker = RecursiveUnchunker::new(pile, chunk_ref);
Ok(serde_bare::from_reader(unchunker)?)
*/
}
/// Given a pointer, fully integrates it in-place. The pointer will no longer have a parent when
/// this operation is finished.
pub fn fully_integrate_pointer_node<RP: RawPile>(
pile: &Pile<RP>,
tree_node: &mut TreeNode,
pointer: &mut PointerData,
) -> anyhow::Result<()> {
if let Some(parent_name) = &pointer.parent_pointer {
let mut parent = pile
.read_pointer(parent_name.as_str())?
.ok_or_else(|| anyhow!("Parent pointer {:?} not found.", parent_name))?;
let mut parent_node = retrieve_tree_node(pile, parent.chunk_ref.clone())?.node;
fully_integrate_pointer_node(pile, &mut parent_node, &mut parent)?;
integrate_node_in_place(tree_node, &mut parent_node)?;
// merge in the UID and GID tables when integrating.
if !merge_uid_or_gid_tables(&mut pointer.uid_lookup, &parent.uid_lookup) {
warn!(
"Overlap when merging parent:{:?}'s UID table into child.",
parent_name
);
}
if !merge_uid_or_gid_tables(&mut pointer.gid_lookup, &parent.gid_lookup) {
warn!(
"Overlap when merging parent:{:?}'s GID table into child.",
parent_name
);
}
pointer.parent_pointer = None;
}
Ok(())
}
/// Loads a pointer and fully integrates it.
/// The result will be a fully-integrated pointer (it won't have a parent).
pub fn fully_load_pointer<RP: RawPile>(
pile: &Pile<RP>,
pointer_name: &str,
) -> anyhow::Result<(PointerData, RootTreeNode)> {
let mut pointer_data = pile
.read_pointer(pointer_name)?
.ok_or_else(|| anyhow!("Pointer {:?} not found.", pointer_name))?;
let mut root_node = retrieve_tree_node(pile, pointer_data.chunk_ref.clone())?;
fully_integrate_pointer_node(pile, &mut root_node.node, &mut pointer_data)?;
Ok((pointer_data, root_node))
}

View File

@ -1,213 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::commands::retrieve_tree_node;
use crate::definitions::{FilesystemOwnership, FilesystemPermissions, TreeNode};
use crate::operations::remove_pointer_safely;
use crate::pile::{Pile, PileDescriptor, RawPile};
use anyhow::anyhow;
use clap::Parser;
use rustyline::error::ReadlineError;
use rustyline::Editor;
#[derive(Parser)]
pub enum DebugCommand {
/// List the pointers that are stored in this yama pile.
#[clap(name = "lsp")]
ListPointers {
/// List details about each pointer.
#[clap(short)]
verbose: bool,
},
/// Delete a pointer from the yama pile.
#[clap(name = "rmp")]
DeletePointer {
/// Name of the pointer to delete.
name: String,
},
/// Reads the information on a pointer.
#[clap(name = "infop")]
PointerInfo {
/// Name of the pointer to read.
name: String,
},
/// Reads statistics from the Pile
#[clap(name = "stats")]
Statistics {},
}
pub fn debug_prompt<RP: RawPile>(pdesc: PileDescriptor, pile: Pile<RP>) -> anyhow::Result<()> {
let mut rl = Editor::<()>::new();
if rl.load_history(".yama-history").is_err() {
// no previous history...
}
loop {
let readline = rl.readline("debug 山 ");
match readline {
Ok(line) => {
rl.add_history_entry(line.as_str());
let mut args = vec![""];
args.extend(line.split_ascii_whitespace());
match DebugCommand::try_parse_from(args) {
Ok(command) => {
if let Err(e) = debug_command(&pdesc, &pile, command) {
eprintln!("Failed {:?}", e);
}
}
Err(e) => {
eprintln!("{}", e);
}
}
}
Err(ReadlineError::Interrupted) => {
eprintln!("^C");
break;
}
Err(ReadlineError::Eof) => {
eprintln!("^D");
break;
}
Err(err) => {
eprintln!("Error: {:?}", err);
break;
}
}
}
pile.flush()?;
rl.save_history(".yama-history").unwrap();
Ok(())
}
pub fn debug_command<RP: RawPile>(
_pdesc: &PileDescriptor,
pile: &Pile<RP>,
command: DebugCommand,
) -> anyhow::Result<()> {
match &command {
DebugCommand::ListPointers { verbose } => {
for pointer in pile.list_pointers()?.iter() {
if *verbose {
let pointer_data = pile.read_pointer(pointer.as_str())?;
match pointer_data {
None => {
println!("{} → ??? MISSING DATA", pointer);
}
Some(data) => {
println!(
"{} → {:?} [parent={:?}]",
pointer, data.chunk_ref, data.parent_pointer
);
}
}
} else {
println!("{}", pointer);
}
}
}
DebugCommand::DeletePointer { name } => {
remove_pointer_safely(pile, name)?;
}
DebugCommand::PointerInfo { name } => {
let this_pointer = pile
.read_pointer(name.as_str())?
.ok_or_else(|| anyhow!("Pointer {:?} does not exist.", name))?;
let this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())?;
eprintln!(" --- Pointer data --- ");
eprintln!("{:#?}", this_pointer);
eprintln!(" --- Tree node --- ");
//eprintln!("{:#?}", this_node.node);
tree_node_printer(&this_node.name, &this_node.node, 0);
}
DebugCommand::Statistics { .. } => {
if let Some(stats) = pile.raw_pile.debug_statistics()? {
println!("Statistics for this pile");
println!(" chunk count: {} chunks", stats.number_of_chunks);
println!(
" total chunk stored space: {} bytes (may exclude deleted chunks)",
stats.total_chunk_size
);
let average_size =
((stats.total_chunk_size as f64) / (stats.number_of_chunks as f64)) as u64;
println!(" (average chunk size: {} bytes)", average_size);
} else {
eprintln!("{:?}", pile);
eprintln!("Statistics appear not to be supported on this kind of pile?");
}
}
}
Ok(())
}
pub fn compact_ownership(ownership: &FilesystemOwnership) -> String {
format!("uid={}, gid={}", ownership.uid, ownership.gid)
}
pub fn compact_permissions(perms: &FilesystemPermissions) -> String {
format!("{:4o}", perms.mode)
}
pub fn tree_node_printer(name: &str, node: &TreeNode, depth: usize) {
match node {
TreeNode::NormalFile {
mtime,
ownership,
permissions,
content,
} => {
eprintln!(
"{}{} = {:?} ({}, {}, mtime={})",
" ".repeat(depth),
name,
content,
compact_ownership(ownership),
compact_permissions(permissions),
mtime
);
}
TreeNode::Directory {
ownership,
permissions,
children,
} => {
eprintln!(
"{}{}/ ({}, {})",
" ".repeat(depth),
name,
compact_ownership(ownership),
compact_permissions(permissions)
);
for (name, child) in children.iter() {
tree_node_printer(name, child, depth + 1);
}
}
TreeNode::SymbolicLink { ownership, target } => {
eprintln!(
"{}{} → {} ({})",
" ".repeat(depth),
name,
target,
compact_ownership(ownership)
);
}
TreeNode::Deleted => {
eprintln!("{}{} DELETED", " ".repeat(depth), name);
}
}
}

View File

@ -1,357 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::BTreeMap;
use std::fmt::{Debug, Formatter};
use anyhow::bail;
use serde::{Deserialize, Serialize};
use crate::utils::bytes_to_hexstring;
pub type ChunkId = [u8; 32];
pub type XXHash = u64;
pub const XXH64_SEED: u64 = 424242;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PointerData {
pub chunk_ref: RecursiveChunkRef,
pub parent_pointer: Option<String>,
pub uid_lookup: BTreeMap<u16, Option<String>>,
pub gid_lookup: BTreeMap<u16, Option<String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PartialPointerData {
pub chunk_ref: RecursiveChunkRef,
pub uid_lookup: BTreeMap<u16, Option<String>>,
pub gid_lookup: BTreeMap<u16, Option<String>>,
}
impl PartialPointerData {
pub fn complete(self, parent_pointer: Option<String>) -> PointerData {
PointerData {
chunk_ref: self.chunk_ref,
parent_pointer,
uid_lookup: self.uid_lookup,
gid_lookup: self.gid_lookup,
}
}
}
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct RecursiveChunkRef {
/// The root Chunk ID.
pub chunk_id: ChunkId,
/// The depth of the data bytes.
/// 0 means that the chunk addressed by `chunk_id` contains data bytes.
/// 1 means that the chunk addressed by `chunk_id` contains references to chunk that contain
/// data bytes.
/// (and so on)
pub depth: u32,
}
impl Debug for RecursiveChunkRef {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}<{}>", bytes_to_hexstring(&self.chunk_id), self.depth)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RootTreeNode {
pub name: String,
pub node: TreeNode,
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub enum TreeNode {
NormalFile {
/// modification time in ms
mtime: u64,
ownership: FilesystemOwnership,
permissions: FilesystemPermissions,
// TODO size: u64 or not
// can perhaps cache chunk-wise (but not sure.)
content: RecursiveChunkRef,
},
Directory {
ownership: FilesystemOwnership,
permissions: FilesystemPermissions,
children: BTreeMap<String, TreeNode>,
},
SymbolicLink {
ownership: FilesystemOwnership,
target: String,
},
// TODO is there any other kind of file we need to store?
Deleted,
}
impl TreeNode {
pub fn metadata_invalidates(&self, other: &TreeNode) -> anyhow::Result<bool> {
Ok(match self {
TreeNode::NormalFile {
mtime,
ownership,
permissions,
..
} => {
if let TreeNode::NormalFile {
mtime: other_mtime,
ownership: other_ownership,
permissions: other_permissions,
..
} = other
{
mtime != other_mtime
|| ownership != other_ownership
|| permissions != other_permissions
} else {
true
}
}
TreeNode::Directory {
ownership,
permissions,
children,
} => {
if let TreeNode::Directory {
ownership: other_ownership,
permissions: other_permissions,
children: other_children,
} = other
{
if ownership != other_ownership || permissions != other_permissions {
return Ok(true);
}
if children.len() != other_children.len() {
return Ok(true);
}
for ((left_name, left_node), (right_name, right_node)) in
children.iter().zip(other_children.iter())
{
if left_name != right_name || left_node.metadata_invalidates(right_node)? {
return Ok(true);
}
}
false
} else {
true
}
}
TreeNode::SymbolicLink { ownership, target } => {
if let TreeNode::SymbolicLink {
ownership: other_ownership,
target: other_target,
} = other
{
ownership != other_ownership || target != other_target
} else {
true
}
}
TreeNode::Deleted => {
// unreachable
bail!("Why is Deleted here?");
}
})
}
//
///// Guarantees consistent visit order.
// pub fn visit_mut<F>(
// &mut self,
// visitor: &mut F,
// path_prefix: &str,
// skip_components: u32,
// ) -> anyhow::Result<()>
// where
// F: FnMut(&mut Self, &str) -> anyhow::Result<()>,
// {
// let mut my_path_buf = String::new();
// my_path_buf.push_str(path_prefix);
//
// if skip_components == 0 {
// if !my_path_buf.is_empty() {
// my_path_buf.push('/');
// }
// my_path_buf.push_str(&self.name);
// }
//
// visitor(self, &my_path_buf)?;
//
// if let TreeNode::Directory { children, .. } = &mut self.content {
// for child in children.iter_mut() {
// let new_skip = if skip_components > 0 {
// skip_components - 1
// } else {
// 0
// };
// child.visit_mut(visitor, &my_path_buf, new_skip)?;
// }
// }
// Ok(())
// }
pub fn count_normal_files(&self) -> u32 {
match self {
TreeNode::NormalFile { .. } => 1,
TreeNode::Directory { children, .. } => children
.values()
.map(|child| child.count_normal_files())
.sum(),
_ => 0,
}
}
pub fn visit<F: FnMut(&TreeNode, &str) -> anyhow::Result<()>>(
&self,
func: &mut F,
prefix: String,
) -> anyhow::Result<()> {
func(self, &prefix)?;
if let TreeNode::Directory { children, .. } = &self {
for (name, child) in children.iter() {
if prefix.is_empty() {
// don't want a slash prefix
child.visit(func, name.clone())?;
} else {
child.visit(func, format!("{}/{}", prefix, name))?;
}
}
}
Ok(())
}
pub fn visit_mut<F: FnMut(&mut TreeNode, &str) -> anyhow::Result<()>>(
&mut self,
func: &mut F,
prefix: String,
) -> anyhow::Result<()> {
func(self, &prefix)?;
if let TreeNode::Directory { children, .. } = self {
for (name, child) in children.iter_mut() {
if prefix.is_empty() {
// don't want a slash prefix
child.visit_mut(func, name.clone())?;
} else {
child.visit_mut(func, format!("{}/{}", prefix, name))?;
}
}
}
Ok(())
}
pub fn exists(&self, pieces: &[&str]) -> bool {
match pieces.first() {
None => true,
Some(subpath) => {
if let TreeNode::Directory { children, .. } = self {
match children.get(*subpath) {
None => false,
Some(child) => child.exists(&pieces[1..]),
}
} else {
false
}
}
}
}
/// Recurses into a child by name, or returns Err with a reason.
pub fn child(&mut self, name: &str) -> Result<&mut TreeNode, &'static str> {
match self {
TreeNode::NormalFile { .. } => Err("not a directory: normal file"),
TreeNode::Directory { children, .. } => match children.get_mut(name) {
None => Err("child not in directory"),
Some(node) => Ok(node),
},
TreeNode::SymbolicLink { .. } => Err("not a directory: symlink"),
TreeNode::Deleted => Err("not a directory: deleted"),
}
}
}
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FilesystemOwnership {
pub uid: u16,
pub gid: u16,
}
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FilesystemPermissions {
pub mode: u32,
}
#[cfg(test)]
pub mod tests {
use crate::definitions::{
FilesystemOwnership, FilesystemPermissions, RecursiveChunkRef, TreeNode,
};
use std::collections::BTreeMap;
pub fn example_file() -> TreeNode {
TreeNode::NormalFile {
mtime: 424242,
ownership: FilesystemOwnership {
uid: 1042,
gid: 1043,
},
permissions: FilesystemPermissions { mode: 0o760 },
content: RecursiveChunkRef {
chunk_id: Default::default(),
depth: 0,
},
}
}
pub fn example_dir(
file1: Option<(&str, TreeNode)>,
file2: Option<(&str, TreeNode)>,
) -> TreeNode {
let mut map = BTreeMap::new();
if let Some((name, file)) = file1 {
map.insert(name.to_owned(), file);
}
if let Some((name, file)) = file2 {
map.insert(name.to_owned(), file);
}
TreeNode::Directory {
ownership: FilesystemOwnership {
uid: 1042,
gid: 1043,
},
permissions: FilesystemPermissions { mode: 0o770 },
children: map,
}
}
#[test]
pub fn test_exists() {
let file = example_file();
assert!(file.exists(&[]));
assert!(!file.exists(&["anything"]));
let subdir = example_dir(Some(("fetchmailrc", example_file())), None);
let dir = example_dir(Some(("boot.img", example_file())), Some(("etc", subdir)));
assert!(dir.exists(&[]));
assert!(dir.exists(&["boot.img"]));
assert!(dir.exists(&["etc", "fetchmailrc"]));
assert!(!dir.exists(&["bin"]));
assert!(!dir.exists(&["etc", "resolv.conf"]));
assert!(!dir.exists(&["boot.img", "hehe"]));
}
}

View File

@ -1,10 +0,0 @@
pub mod chunking;
pub mod commands;
pub mod debug;
pub mod definitions;
pub mod operations;
pub mod pile;
pub mod progress;
pub mod remote;
pub mod tree;
pub mod utils;

View File

@ -1,80 +0,0 @@
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node, store_tree_node};
use crate::pile::{Pile, RawPile};
use crate::tree::{differentiate_node_in_place, integrate_node_in_place};
use anyhow::{anyhow, Context};
use log::info;
pub mod checking;
pub mod cleanup;
pub mod extracting;
pub mod legacy_pushpull;
pub mod storing;
pub fn remove_pointer_safely<P: RawPile>(pile: &Pile<P>, name: &str) -> anyhow::Result<()> {
// retrieve this pointer
let mut this_pointer = pile
.read_pointer(name)?
.ok_or_else(|| anyhow!("Pointer {:?} does not exist so can not be deleted.", name))?;
let mut this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())
.context("retrieving 'this' node")?;
let new_parent_name = this_pointer.parent_pointer.clone();
fully_integrate_pointer_node(pile, &mut this_node.node, &mut this_pointer)
.context("integrating new parent")?;
let new_parent = if let Some(ref new_parent_name) = new_parent_name {
let mut new_parent_pointer = pile
.read_pointer(new_parent_name.as_str())?
.ok_or_else(|| anyhow!("Parent pointer {:?} does not exist.", name))?;
let mut new_parent_node = retrieve_tree_node(&pile, new_parent_pointer.chunk_ref.clone())?;
fully_integrate_pointer_node(pile, &mut new_parent_node.node, &mut new_parent_pointer)?;
Some((new_parent_pointer, new_parent_node))
} else {
None
};
// now integrate any pointers that rely on this one
// so that they no longer rely on this one.
for pointer in pile.list_pointers()?.iter() {
if pointer == name {
continue;
}
if let Some(mut pointer_data) = pile.read_pointer(pointer.as_str())? {
if let Some(parent_pointer) = pointer_data.parent_pointer.as_ref() {
if parent_pointer == name {
info!("Pointer would be orphaned: {:?}; integrating", pointer);
// need to integrate this node, so retrieve it
let mut node = retrieve_tree_node(&pile, pointer_data.chunk_ref)?;
// integrate it in-place
integrate_node_in_place(&mut node.node, &this_node.node)?;
if let Some((_, ref new_parent_node)) = new_parent {
// then differentiate with respect to the NEW parent
differentiate_node_in_place(&mut node.node, &new_parent_node.node)?;
}
// pass through the parent
pointer_data.parent_pointer = new_parent_name.clone();
// store the updated version of the pointer
let new_chunk_ref = store_tree_node(&pile, &node)?;
// associate the new node with the new version of the pointer
pointer_data.chunk_ref = new_chunk_ref;
// write the pointer back.
pile.write_pointer(pointer.as_str(), &pointer_data)?;
// we must flush chunks before deleting the pointer
pile.flush()
.context("flushing after writing pointer back")?;
}
}
}
}
// then delete the pointer
pile.delete_pointer(name)?;
info!("Deleted pointer: {:?}", name);
Ok(())
}

View File

@ -1,438 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::chunking::RecursiveUnchunker;
use crate::commands::retrieve_tree_node;
use crate::definitions::{ChunkId, TreeNode};
use crate::pile::{
ControllerMessage, Keyspace, Pile, PipelineDescription, RawPile, StoragePipelineSettings,
};
use anyhow::bail;
use crossbeam_channel::Sender;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use itertools::Itertools;
use log::{error, info, warn};
use std::collections::HashSet;
use std::convert::TryInto;
use std::io::{Read, Write};
use std::sync::Mutex;
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum VacuumMode {
NoVacuum,
DryRunVacuum,
Vacuum,
}
pub struct NullWriter {}
impl Write for NullWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
/// Mark-and-sweep style vacuuming system.
/// We mark all the chunks that we run into (following the structure of all the pointers and
/// recursive chunk references) and sweep the chunks that have not been read.
#[derive(Debug)]
pub struct VacuumRawPile<RP: RawPile> {
underlying: RP,
vacuum_tracking_enabled: bool,
pub retrieved_chunks: Mutex<HashSet<ChunkId>>,
}
impl<RP: RawPile> VacuumRawPile<RP> {
pub fn new(underlying: RP, vacuum_tracking_enabled: bool) -> Self {
VacuumRawPile {
underlying,
vacuum_tracking_enabled,
retrieved_chunks: Default::default(),
}
}
pub fn calculate_vacuum_for_sweeping(&self) -> anyhow::Result<HashSet<ChunkId>> {
if !self.vacuum_tracking_enabled {
bail!("Vacuum tracking not enabled, you can't calculate the vacuum set!");
}
let mut to_sweep = HashSet::new();
let retrieved_chunks = self.retrieved_chunks.lock().unwrap();
let mut chunk_id: ChunkId = Default::default();
for key in self.list_keys(Keyspace::Chunk)? {
chunk_id.clone_from_slice(&key?);
if !retrieved_chunks.contains(&chunk_id) {
to_sweep.insert(chunk_id.clone());
}
}
Ok(to_sweep)
}
}
impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
self.underlying.exists(kind, key)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
if self.vacuum_tracking_enabled && kind == Keyspace::Chunk {
let mut chunk_id: ChunkId = Default::default();
chunk_id.clone_from_slice(key);
self.retrieved_chunks.lock().unwrap().insert(chunk_id);
}
self.underlying.read(kind, key)
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
self.underlying.write(kind, key, value)
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.underlying.delete(kind, key)
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
self.underlying.delete_many(kind, keys)
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
self.underlying.list_keys(kind)
}
fn flush(&self) -> anyhow::Result<()> {
self.underlying.flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
self.underlying.check_lowlevel()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
self.underlying
.build_storage_pipeline(settings, controller_send)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
self.underlying.describe_pipeline()
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
}
}
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
/// This is also capable of finding and vacuuming unused chunks.
/// This checks:
/// - the integrity of each chunk (assuming an integrity-aware raw pile is used)
/// - the structure of pointers and multi-level chunk references
pub fn check_deep<RP: RawPile>(
pile: Pile<RP>,
vacuum: VacuumMode,
make_progress_bar: bool,
) -> anyhow::Result<u32> {
let pile = Pile::new(VacuumRawPile::new(
pile.raw_pile,
vacuum != VacuumMode::NoVacuum,
));
let mut errors = 0;
let mut to_check = Vec::new();
let pointer_list = pile.list_pointers()?;
for pointer in pointer_list.iter() {
info!("Checking pointer {:?}", pointer);
match pile.read_pointer(&pointer)? {
Some(pointer_data) => {
if let Some(parent) = pointer_data.parent_pointer {
if !pointer_list.contains(&parent) {
errors += 1;
error!(
"Pointer {:?} has a parent {:?} which does not exist.",
pointer, parent
);
}
}
let tree_node = retrieve_tree_node(&pile, pointer_data.chunk_ref.clone())?;
tree_node.node.visit(
&mut |node, _| {
if let TreeNode::NormalFile { content, .. } = node {
to_check.push(content.clone());
}
Ok(())
},
"".to_owned(),
)?;
}
None => {
errors += 1;
error!("Pointer {:?} does not seem to exist.", pointer);
}
}
}
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(1000 as u64, ProgressDrawTarget::stdout_with_hz(10))
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
);
pbar.set_message("checking");
let mut done = 0;
while let Some(next_to_check) = to_check.pop() {
done += 1;
pbar.set_length(done + to_check.len() as u64);
pbar.set_position(done);
let mut unchunker = RecursiveUnchunker::new(&pile, next_to_check.clone());
match std::io::copy(&mut unchunker, &mut NullWriter {}) {
Ok(_) => {}
Err(err) => {
errors += 1;
warn!(
"Error occurred when reading {:?}: {:?}.",
next_to_check, err
);
}
}
}
pbar.finish_and_clear();
if errors > 0 {
error!("There were {:?}", errors);
} else {
info!("No errors.");
}
if errors == 0 && vacuum != VacuumMode::NoVacuum {
info!("Calculating sweep set for vacuuming.");
let to_vacuum = pile.raw_pile.calculate_vacuum_for_sweeping()?;
info!("{} chunks are ready to be vacuumed.", to_vacuum.len());
if vacuum == VacuumMode::Vacuum {
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(
to_vacuum.len() as u64,
ProgressDrawTarget::stdout_with_hz(10),
)
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar().template(
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
),
);
pbar.set_message("vacuuming");
// actually do the vacuum!
info!("Going to vacuum them up.");
for vacuum_id in to_vacuum {
pile.raw_pile.delete(Keyspace::Chunk, &vacuum_id)?;
pbar.inc(1);
}
pile.flush()?;
pbar.finish_and_clear();
}
}
Ok(errors)
}
/// A shallower check than the deep one. This avoids reading the last layer of chunks.
/// (they are simply assumed to be OK.).
/// This leads to much faster performance and is mostly intended for GC.
/// We can check existence for those leaf chunks if desired. This still avoids the
/// overhead of decryption, decompression and reading from disk/network.
pub fn check_shallow<RP: RawPile>(
pile: Pile<RP>,
vacuum: VacuumMode,
make_progress_bar: bool,
check_existence: bool,
) -> anyhow::Result<u32> {
let pile = Pile::new(VacuumRawPile::new(
pile.raw_pile,
vacuum != VacuumMode::NoVacuum,
));
let mut additional_seen: HashSet<ChunkId> = HashSet::new();
let mut errors = 0;
let mut to_check = Vec::new();
let pointer_list = pile.list_pointers()?;
for pointer in pointer_list.iter() {
info!("Checking pointer {:?}", pointer);
match pile.read_pointer(&pointer)? {
Some(pointer_data) => {
if let Some(parent) = pointer_data.parent_pointer {
if !pointer_list.contains(&parent) {
errors += 1;
error!(
"Pointer {:?} has a parent {:?} which does not exist.",
pointer, parent
);
}
}
let tree_node = retrieve_tree_node(&pile, pointer_data.chunk_ref.clone())?;
tree_node.node.visit(
&mut |node, _| {
if let TreeNode::NormalFile { content, .. } = node {
to_check.push(content.clone());
}
Ok(())
},
"".to_owned(),
)?;
}
None => {
errors += 1;
error!("Pointer {:?} does not seem to exist.", pointer);
}
}
}
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(1000 as u64, ProgressDrawTarget::stdout_with_hz(10))
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
);
pbar.set_message("checking");
let mut done = 0;
while let Some(next_to_check) = to_check.pop() {
done += 1;
pbar.set_length(done + to_check.len() as u64);
pbar.set_position(done);
if next_to_check.depth > 0 {
let mut reduced_height = next_to_check.clone();
reduced_height.depth -= 1;
let mut chunk_id_buf: ChunkId = Default::default();
let mut unchunker = RecursiveUnchunker::new(&pile, reduced_height);
loop {
let read_bytes = unchunker.read(&mut chunk_id_buf)?;
if read_bytes == 0 {
// end of chunks, because return of zero here means EOF
break;
}
if read_bytes < chunk_id_buf.len() {
// any error, including EOF at this point, is an error
unchunker.read_exact(&mut chunk_id_buf[read_bytes..])?;
}
if check_existence && !pile.chunk_exists(&chunk_id_buf)? {
errors += 1;
warn!("Chunk missing: {:?}", &chunk_id_buf);
}
additional_seen.insert(chunk_id_buf.clone());
}
} else {
// already shallowest, just add the reference to the seen list.
additional_seen.insert(next_to_check.chunk_id);
}
}
pbar.finish_and_clear();
if errors > 0 {
error!("There were {:?}", errors);
} else {
info!("No errors.");
}
if errors == 0 && vacuum != VacuumMode::NoVacuum {
info!("Calculating sweep set for vacuuming.");
let mut to_vacuum = pile.raw_pile.calculate_vacuum_for_sweeping()?;
// don't forget to include the leaves that we didn't actually visit!
for element in additional_seen {
to_vacuum.remove(&element);
}
info!("{} chunks are ready to be vacuumed.", to_vacuum.len());
if vacuum == VacuumMode::Vacuum {
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(
to_vacuum.len() as u64,
ProgressDrawTarget::stdout_with_hz(10),
)
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar().template(
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
),
);
pbar.set_message("vacuuming");
// actually do the vacuum!
info!("Going to vacuum them up.");
for vacuum_ids_chunk in to_vacuum
.into_iter()
.chunks(512)
.into_iter()
.map(|c| c.collect::<Vec<ChunkId>>())
{
pile.raw_pile.delete_many(
Keyspace::Chunk,
vacuum_ids_chunk
.iter()
.map(|ci| ci.as_slice())
.collect::<Vec<&[u8]>>()
.as_slice(),
)?;
pbar.inc(vacuum_ids_chunk.len().try_into().unwrap());
}
pile.flush()?;
pbar.finish_and_clear();
}
}
Ok(errors)
}

View File

@ -1,64 +0,0 @@
use crate::pile::local_sqlitebloblogs::{CompactionThresholds, SqliteBloblogPile};
use crate::pile::{PileDescriptor, PileStorage};
use anyhow::{bail, Context};
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use log::info;
use std::path::Path;
pub fn compact(
pile_path: &Path,
pile_desc: &PileDescriptor,
actually_run: bool,
make_progress_bar: bool,
thresholds: CompactionThresholds,
) -> anyhow::Result<()> {
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(1000 as u64, ProgressDrawTarget::stdout_with_hz(10))
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
);
pbar.set_message("compacting");
match pile_desc.storage {
PileStorage::SqliteIndexedBloblog => {
let bloblog_pile = SqliteBloblogPile::open(&pile_path)
.context("Failed to open SQLite-indexed Bloblog Pile")?;
compact_bloblogs(bloblog_pile, pbar, actually_run, thresholds)?;
Ok(())
}
other @ PileStorage::RemoteOnly => {
bail!("Cannot use compaction on this kind of pile: {other:?}!");
}
}
}
fn compact_bloblogs(
bloblog_pile: SqliteBloblogPile,
pbar: ProgressBar,
actually_run: bool,
thresholds: CompactionThresholds,
) -> anyhow::Result<()> {
info!("=== Analysing for compaction ===");
let analysis = bloblog_pile.analyse_for_compaction()?;
let chunks_total: u64 = analysis.values().map(|bs| bs.chunks_total).sum();
let chunks_deleted: u64 = analysis.values().map(|bs| bs.chunks_deleted).sum();
let bytes_total: u64 = analysis.values().map(|bs| bs.bytes_total).sum();
let bytes_deleted: u64 = analysis.values().map(|bs| bs.bytes_deleted).sum();
info!("{} bloblogs in this pile, with {chunks_total} chunks ({bytes_total} B) of which {chunks_deleted} ({bytes_deleted} B) are deleted.", analysis.len());
info!("=== Planning compaction ===");
let plan = bloblog_pile.plan_compaction(&thresholds, analysis)?;
info!("Planned compaction: replace {} bloblogs (of which {} are small), freeing up {} B and rewriting {} B", plan.bloblogs_to_replace.len(), plan.small_bloblogs, plan.reclaimable_space, plan.bytes_to_write);
if actually_run {
info!("=== Compacting ===");
bloblog_pile.perform_compaction(Box::new(pbar), plan)?;
}
Ok(())
}

View File

@ -1,370 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::convert::TryInto;
use std::fs::OpenOptions;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU32, Ordering};
use std::{fs, io};
use anyhow::{anyhow, Context};
use crossbeam_channel::{Receiver, Sender};
use crossbeam_utils::thread;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use log::error;
use nix::sys::time::{TimeVal, TimeValLike};
use nix::unistd::{Gid, Uid};
use crate::chunking::RecursiveUnchunker;
use crate::commands::fully_load_pointer;
use crate::definitions::{FilesystemOwnership, RecursiveChunkRef, TreeNode};
use crate::pile::{Pile, RawPile};
use std::collections::{BTreeMap, HashMap};
/// Given a fully-integrated root node, extracts the files from the pile.
pub fn extract<RP: RawPile>(
target_path: &Path,
root: &mut TreeNode,
pile: &Pile<RP>,
make_progress_bar: bool,
num_workers: u8,
apply_permissions: bool,
apply_mtime: bool,
apply_ownership: bool,
) -> anyhow::Result<()> {
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(
root.count_normal_files() as u64,
ProgressDrawTarget::stdout_with_hz(10),
)
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
);
pbar.set_message("extracting");
let (paths_send, paths_recv) = crossbeam_channel::unbounded();
let (results_send, results_recv) = crossbeam_channel::bounded(16);
let failures = AtomicU32::new(0);
thread::scope(|s| {
for worker in 0..num_workers {
let paths_recv = paths_recv.clone();
let results_send = results_send.clone();
let failures = &failures; // needed because of move
s.builder()
.name(format!("yama unchunker {}", worker))
.spawn(move |_| {
if let Err(e) = extract_worker(pile, paths_recv, results_send) {
error!("Extraction worker {} failed: {:?}!", worker, e);
failures.fetch_add(1, Ordering::Relaxed);
}
})
.expect("Failed to start thread");
}
// Needed to allow the manager to join once the workers finish and drop their senders.
drop(results_send);
drop(paths_recv);
s.spawn(|_| {
if let Err(e) = manager(root, target_path, paths_send, results_recv, &pbar) {
error!("Extraction manager failed: {:?}!", e);
failures.fetch_add(1, Ordering::Relaxed);
}
});
})
.expect("join issue");
pbar.set_message("applying metadata");
apply_metadata(
root,
target_path,
apply_permissions,
apply_mtime,
apply_ownership,
)?;
Ok(())
}
/// Given the name of a pointer, extracts it.
pub fn extract_from_pointer_name<RP: RawPile>(
target_path: &Path,
pointer_name: &str,
pile: &Pile<RP>,
make_progress_bar: bool,
num_workers: u8,
apply_permissions: bool,
apply_mtime: bool,
apply_ownership: bool,
) -> anyhow::Result<()> {
let (pointer_data, mut root_node) = fully_load_pointer(pile, pointer_name.as_ref())?;
let uid_translation_table = build_uid_translation_table(&pointer_data.uid_lookup);
let gid_translation_table = build_gid_translation_table(&pointer_data.gid_lookup);
// convert the UIDs and GIDs to match this system, which may be different from the usual.
apply_uid_and_gid_translation_tables(
&mut root_node.node,
&uid_translation_table,
&gid_translation_table,
);
extract(
&target_path.join(&root_node.name),
&mut root_node.node,
pile,
make_progress_bar,
num_workers,
apply_permissions,
apply_mtime,
apply_ownership,
)
}
pub fn build_uid_translation_table(
uid_lookup: &BTreeMap<u16, Option<String>>,
) -> HashMap<u16, u16> {
let mut result: HashMap<u16, u16> = Default::default();
for (old_uid, name) in uid_lookup.iter() {
if let Some(name) = name {
if let Some(user) = users::get_user_by_name(name) {
let new_uid = user.uid() as u16;
if new_uid != *old_uid {
result.insert(*old_uid, new_uid);
}
}
}
}
result
}
pub fn build_gid_translation_table(
gid_lookup: &BTreeMap<u16, Option<String>>,
) -> HashMap<u16, u16> {
let mut result: HashMap<u16, u16> = Default::default();
for (old_gid, name) in gid_lookup.iter() {
if let Some(name) = name {
if let Some(group) = users::get_group_by_name(name) {
let new_gid = group.gid() as u16;
if new_gid != *old_gid {
result.insert(*old_gid, new_gid);
}
}
}
}
result
}
pub fn apply_uid_and_gid_translation_tables(
node: &mut TreeNode,
uid_translation: &HashMap<u16, u16>,
gid_translation: &HashMap<u16, u16>,
) {
if uid_translation.is_empty() && gid_translation.is_empty() {
// nothing to do here :).
return;
}
let apply_to = |ownership: &mut FilesystemOwnership| {
ownership.uid = *uid_translation
.get(&ownership.uid)
.unwrap_or(&ownership.uid);
ownership.gid = *gid_translation
.get(&ownership.gid)
.unwrap_or(&ownership.gid);
};
node.visit_mut(
&mut |node, _| {
match node {
TreeNode::NormalFile { ownership, .. } => {
apply_to(ownership);
}
TreeNode::Directory { ownership, .. } => {
apply_to(ownership);
}
TreeNode::SymbolicLink { ownership, .. } => {
apply_to(ownership);
}
TreeNode::Deleted => {}
}
Ok(())
},
"".to_owned(),
)
.expect("Can't fail since we don't fail.");
}
/// A worker thread for extracting
pub fn extract_worker<RP: RawPile>(
pile: &Pile<RP>,
paths: Receiver<(PathBuf, RecursiveChunkRef)>,
results: Sender<()>,
) -> anyhow::Result<()> {
while let Ok((path, chunk_ref)) = paths.recv() {
let mut extractor = RecursiveUnchunker::new(pile, chunk_ref);
let mut file = OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)
.with_context(|| format!("Failed to open {:?}", path))?;
io::copy(&mut extractor, &mut file)?;
results
.send(())
.or_else(|_| Err(anyhow!("Failed to send result")))?;
}
Ok(())
}
/// A single thread that manages the workers
pub fn manager(
root: &mut TreeNode,
target_path: &Path,
paths_sender: Sender<(PathBuf, RecursiveChunkRef)>,
results_receiver: Receiver<()>,
progress_bar: &ProgressBar,
) -> anyhow::Result<()> {
root.visit(
&mut |tree_node, name| {
let final_path = if name.is_empty() {
target_path.to_path_buf()
} else {
target_path.join(name)
};
match tree_node {
TreeNode::NormalFile { content, .. } => {
paths_sender
.send((final_path, content.clone()))
.expect("Unable to send to should-be unbounded channel");
}
TreeNode::Directory { .. } => {
fs::create_dir(&final_path)?;
}
TreeNode::SymbolicLink { target, .. } => {
// TODO may want to perform rewrites ...?
std::os::unix::fs::symlink(target, &final_path)?;
}
TreeNode::Deleted => {
panic!("should not be extracting 'Deleted!' --- BUG.");
}
};
Ok(())
},
"".to_string(),
)?;
// Needed to allow the workers to finish; otherwise we never join.
drop(paths_sender);
while let Ok(()) = results_receiver.recv() {
progress_bar.inc(1);
}
Ok(())
}
/// Applies metadata (permissions, mtime, ownership) to files from a tree node.
pub fn apply_metadata(
root: &TreeNode,
target: &Path,
apply_permissions: bool,
apply_mtime: bool,
apply_owner: bool,
) -> anyhow::Result<()> {
match root {
TreeNode::NormalFile {
mtime,
ownership,
permissions,
..
} => {
if apply_permissions {
let mut perms = fs::metadata(&target)?.permissions();
perms.set_mode(permissions.mode);
fs::set_permissions(&target, perms)?;
}
if apply_owner {
nix::unistd::chown(
target,
Some(Uid::from_raw(ownership.uid as u32)),
Some(Gid::from_raw(ownership.gid as u32)),
)?;
}
if apply_mtime {
if let Ok(mtime) = (*mtime).try_into() {
let tv = TimeVal::milliseconds(mtime);
nix::sys::stat::lutimes(target, &tv, &tv)?;
}
}
}
TreeNode::Directory {
ownership,
permissions,
children,
} => {
if apply_permissions {
let mut perms = fs::metadata(&target)?.permissions();
perms.set_mode(permissions.mode);
fs::set_permissions(&target, perms)?;
}
if apply_owner {
nix::unistd::chown(
target,
Some(Uid::from_raw(ownership.uid as u32)),
Some(Gid::from_raw(ownership.gid as u32)),
)?;
}
for (name, child) in children.iter() {
let child_path = target.join(name);
apply_metadata(
child,
&child_path,
apply_permissions,
apply_mtime,
apply_owner,
)?;
}
}
TreeNode::SymbolicLink { ownership, .. } => {
if apply_owner {
nix::unistd::chown(
target,
Some(Uid::from_raw(ownership.uid as u32)),
Some(Gid::from_raw(ownership.gid as u32)),
)?;
}
}
TreeNode::Deleted => {
panic!("Deleted is not meant to be reachable here.");
}
}
Ok(())
}

View File

@ -1,333 +0,0 @@
use crate::chunking::RecursiveUnchunker;
use crate::commands::fully_load_pointer;
use crate::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
use crate::operations::checking::VacuumRawPile;
use crate::operations::legacy_pushpull::PushWorkerToManagerMessage::{NewTask, TaskDone};
use crate::pile::compression::{CompressionSettings, RawPileCompressor};
use crate::pile::integrity::RawPileIntegrityChecker;
use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
use crate::pile::{Keyspace, Pile, PileDescriptor, PileStorage, RawPile};
use crate::utils::get_number_of_workers;
use anyhow::{bail, Context};
use crossbeam_channel::{Receiver, Sender};
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use log::error;
use std::collections::HashSet;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
/// Pushes chunks (and pointers) from one pile to another.
/// This is a thorough implementation that could be slow but at least should give good confidence.
/// (Presumably we could do better by looking at the pointers that already exist on the destination
/// and only integrating as much as we need to.)
pub fn push_to(
from_pile: Arc<Pile<Arc<Box<dyn RawPile>>>>,
from_rp_bypass: Arc<Box<dyn RawPile>>,
to_pile: Arc<Pile<Arc<Box<dyn RawPile>>>>,
to_rp_bypass: Arc<Box<dyn RawPile>>,
pointers: Vec<String>,
make_progress_bar: bool,
num_workers: u32,
) -> anyhow::Result<()> {
let pbar = if make_progress_bar {
ProgressBar::with_draw_target(
1, // TODO
ProgressDrawTarget::stdout_with_hz(10),
)
} else {
ProgressBar::hidden()
};
pbar.set_style(
ProgressStyle::default_bar()
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
);
pbar.set_message("push/pull");
let (jobs_tx, jobs_rx) = crossbeam_channel::unbounded();
let (stat_tx, stat_rx) = crossbeam_channel::bounded(32);
let mut to_process = Vec::new();
for pointer in pointers {
let (pointer_data, root_node) = fully_load_pointer(&from_pile, &pointer)?;
// schedule storing the pointer chunks
to_process.push(pointer_data.chunk_ref.clone());
if to_pile.read_pointer(&pointer)?.is_some() {
unimplemented!("pointer in target exists.");
}
// copy across the pointer data
to_pile.write_pointer(&pointer, &pointer_data)?;
root_node
.node
.visit(
&mut |node, _path| {
match node {
TreeNode::NormalFile { content, .. } => {
to_process.push(content.clone());
}
_ => {} // nop
}
Ok(())
},
String::new(),
)
.expect("No fail");
}
// start the work
let critical_failures = Arc::new(AtomicU32::new(0));
for worker_num in 0..num_workers {
let jobs_rx = jobs_rx.clone();
let stat_tx = stat_tx.clone();
let critical_failures = critical_failures.clone();
let from_pile = from_pile.clone();
let from_rp_bypass = from_rp_bypass.clone();
let to_pile = to_pile.clone();
let to_rp_bypass = to_rp_bypass.clone();
std::thread::Builder::new()
.name(format!("yama pusher {}", worker_num))
.spawn(move || {
if let Err(e) = pusher_worker(
from_pile,
from_rp_bypass,
to_pile,
to_rp_bypass,
jobs_rx,
stat_tx,
) {
error!("[critical!] Push worker {} FAILED: {:?}", worker_num, e);
critical_failures.fetch_add(1, Ordering::Relaxed);
}
})
.expect("Failed to start thread");
}
for task in to_process {
stat_tx
.send(NewTask(task))
.expect("unbounded so should be able to send");
}
// must drop here for ending to happen
drop(jobs_rx);
drop(stat_tx);
pbar.set_length(0);
if let Err(e) = pusher_manager(&pbar, stat_rx, jobs_tx) {
error!("[critical!] Push manager FAILED: {:?}", e);
critical_failures.fetch_add(1, Ordering::Relaxed);
}
Ok(())
}
enum PushWorkerToManagerMessage {
NewTask(RecursiveChunkRef),
TaskDone,
}
fn pusher_manager(
pbar: &ProgressBar,
update_receiver: Receiver<PushWorkerToManagerMessage>,
job_queue: Sender<RecursiveChunkRef>,
) -> anyhow::Result<()> {
let mut outstanding = 0;
let mut already_done = HashSet::new();
while let Ok(status) = update_receiver.recv() {
match status {
PushWorkerToManagerMessage::NewTask(task) => {
if already_done.insert(task.clone()) {
job_queue.send(task)?;
pbar.inc_length(1);
outstanding += 1;
}
}
PushWorkerToManagerMessage::TaskDone => {
pbar.inc(1);
outstanding -= 1;
if outstanding == 0 {
break;
}
}
}
}
Ok(())
}
fn pusher_worker(
from_pile: Arc<Pile<Arc<Box<dyn RawPile>>>>,
from_rp_bypass: Arc<Box<dyn RawPile>>,
to_pile: Arc<Pile<Arc<Box<dyn RawPile>>>>,
to_rp_bypass: Arc<Box<dyn RawPile>>,
jobs_rx: Receiver<RecursiveChunkRef>,
stat_tx: Sender<PushWorkerToManagerMessage>,
) -> anyhow::Result<()> {
while let Ok(job) = jobs_rx.recv() {
if !to_pile.chunk_exists(&job.chunk_id)? {
if let Some(bypass_chunk_data) = from_rp_bypass.read(Keyspace::Chunk, &job.chunk_id)? {
to_rp_bypass.write(Keyspace::Chunk, &job.chunk_id, &bypass_chunk_data)?;
} else {
bail!("Chunk cannot be copied because doesn't exist (in bypass pile).");
}
}
if job.depth > 0 {
// we want to (partially) unchunk this and submit all subchunks.
let vacuum_rp = VacuumRawPile::new(from_pile.raw_pile.clone(), true);
let vacuum_pile = Pile::new(vacuum_rp);
// First read the bottom-level chunk IDs
let mut reduced_height = job.clone();
reduced_height.depth -= 1;
let mut chunk_id_buf: ChunkId = Default::default();
let mut unchunker = RecursiveUnchunker::new(&vacuum_pile, reduced_height);
loop {
let read_bytes = unchunker.read(&mut chunk_id_buf)?;
if read_bytes == 0 {
// end of chunks, because return of zero here means EOF
break;
}
if read_bytes < chunk_id_buf.len() {
// any error, including EOF at this point, is an error
unchunker.read_exact(&mut chunk_id_buf[read_bytes..])?;
}
stat_tx
.send(NewTask(RecursiveChunkRef {
chunk_id: chunk_id_buf.clone(),
depth: 0,
}))
.expect("Should be able to send");
}
// Then track the chunks that we read whilst doing the above
for needed_chunk_id in vacuum_pile
.raw_pile
.retrieved_chunks
.lock()
.expect("Should be able to lock")
.iter()
{
if needed_chunk_id != &job.chunk_id {
// only track them if they're not the same as the one on this job.
stat_tx
.send(NewTask(RecursiveChunkRef {
chunk_id: needed_chunk_id.clone(),
depth: 0,
}))
.expect("Should be able to send");
}
}
}
stat_tx.send(TaskDone)?;
}
Ok(())
}
#[derive(Copy, Clone, Debug)]
pub enum BypassLevel {
NoBypass,
CompressionBypass,
}
pub fn determine_bypass_level(
desc1: &PileDescriptor,
dir1: &Path,
desc2: &PileDescriptor,
dir2: &Path,
) -> anyhow::Result<BypassLevel> {
if desc1.compression.is_some() && desc2.compression.is_some() {
let mut dictionary1 = Vec::new();
let dict_path1 = dir1.join("important_zstd.dict");
File::open(dict_path1)
.context("You need important_zstd.dict in your pile folder.")?
.read_to_end(&mut dictionary1)?;
let mut dictionary2 = Vec::new();
let dict_path2 = dir2.join("important_zstd.dict");
File::open(dict_path2)
.context("You need important_zstd.dict in your pile folder.")?
.read_to_end(&mut dictionary2)?;
if dictionary1 == dictionary2 {
// we can only bypass if both dictionaries are the same
Ok(BypassLevel::CompressionBypass)
} else {
Ok(BypassLevel::NoBypass)
}
} else {
Ok(BypassLevel::NoBypass)
}
}
/// Opens a pile with potential for returning a 'complete' pile as well as a lower-level 'bypass'
/// pile, which, for example, skips performing compression operations.
///
/// Return tuple: (actual pile, bypass raw pile)
pub fn open_pile_with_work_bypass(
dir: &Path,
desc: &PileDescriptor,
bypass_level: BypassLevel,
) -> anyhow::Result<(Pile<Arc<Box<dyn RawPile>>>, Arc<Box<dyn RawPile>>)> {
let num_compressors = get_number_of_workers("YAMA_COMPRESSORS");
let num_decompressors = get_number_of_workers("YAMA_DECOMPRESSORS");
match desc.storage {
PileStorage::RemoteOnly => {
bail!("This is a remote-only pile. No local storage allowed.");
}
PileStorage::SqliteIndexedBloblog => {
let blob_raw_pile = RawPileIntegrityChecker::new(SqliteBloblogPile::open(dir)?);
match bypass_level {
BypassLevel::NoBypass => {
unimplemented!()
}
BypassLevel::CompressionBypass => {
let common_raw_pile: Arc<Box<dyn RawPile>> = Arc::new(Box::new(blob_raw_pile));
let raw_pile: Arc<Box<dyn RawPile>> = match desc.compression {
None => common_raw_pile.clone(),
Some(comp_level) => {
let mut dictionary = Vec::new();
let dict_path = dir.join("important_zstd.dict");
File::open(dict_path)
.context("You need important_zstd.dict in your pile folder.")?
.read_to_end(&mut dictionary)?;
let (compressed_pile, _handles) = RawPileCompressor::new(
common_raw_pile.clone(),
CompressionSettings {
dictionary: Arc::new(dictionary),
level: comp_level as i32,
num_compressors: num_compressors as u32,
num_decompressors: num_decompressors as u32,
},
)?;
Arc::new(Box::new(compressed_pile))
}
};
Ok((Pile::new(raw_pile), common_raw_pile))
}
}
}
}
}

View File

@ -1,342 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::fs::File;
use std::io;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU32, Ordering};
use anyhow::{anyhow, bail, Context};
use crossbeam_channel::{Receiver, Sender};
use crossbeam_utils::thread;
use log::{error, warn};
use crate::chunking::{ChunkSubmissionTarget, RecursiveChunker, SENSIBLE_THRESHOLD};
use crate::commands;
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node};
use crate::definitions::{
PartialPointerData, PointerData, RecursiveChunkRef, RootTreeNode, TreeNode,
};
use crate::pile::{existence_checker_stage, Pile, RawPile, StoragePipelineSettings};
use crate::progress::ProgressTracker;
use crate::tree::{create_uidgid_lookup_tables, differentiate_node_in_place};
use crate::utils::get_number_of_workers;
use std::collections::BTreeMap;
use std::sync::Arc;
pub fn store<CST: ChunkSubmissionTarget, PT: ProgressTracker>(
root_path: &Path,
root: &mut TreeNode,
target: &CST,
progress_bar: &mut PT,
num_workers: u8,
) -> anyhow::Result<()> {
let (paths_send, paths_recv) = crossbeam_channel::unbounded();
let (results_send, results_recv) = crossbeam_channel::bounded(16);
progress_bar.set_max_size(root.count_normal_files() as u64);
let critical_failures = AtomicU32::new(0);
thread::scope(|s| {
for worker_num in 0..num_workers {
let paths_recv = paths_recv.clone();
let results_send = results_send.clone();
let critical_failures = &critical_failures; // needed because of move
s.builder()
.name(format!("yama chunker {}", worker_num))
.spawn(move |_| {
if let Err(e) = store_worker(root_path, target, paths_recv, results_send) {
error!("[critical!] Storage worker {} FAILED: {:?}", worker_num, e);
critical_failures.fetch_add(1, Ordering::Relaxed);
}
})
.expect("Failed to start thread");
}
drop(results_send);
drop(paths_recv);
if let Err(e) = manager(root, paths_send, results_recv, progress_bar) {
error!("[critical!] Storage manager FAILED: {:?}", e);
critical_failures.fetch_add(1, Ordering::Relaxed);
}
})
.expect("thread scope failed");
let critical_failures = critical_failures.load(Ordering::SeqCst);
if critical_failures > 0 {
bail!("There were {} critical failures.", critical_failures);
} else {
Ok(())
}
}
pub fn store_worker<CST: ChunkSubmissionTarget>(
root: &Path,
target: &CST,
paths: Receiver<String>,
results: Sender<(String, Option<RecursiveChunkRef>)>,
) -> anyhow::Result<()> {
while let Ok(path) = paths.recv() {
let full_path = root.join(&path);
match File::open(&full_path) {
Ok(mut file) => {
let mut chunker = RecursiveChunker::new(SENSIBLE_THRESHOLD, target);
// streaming copy from file to chunker, really cool :)
io::copy(&mut file, &mut chunker)?;
let chunk_ref = chunker.finish()?;
results
.send((path, Some(chunk_ref)))
.or(Err(anyhow!("Failed to send result.")))?;
}
Err(err) => match err.kind() {
ErrorKind::NotFound => {
warn!("File vanished: {:?}. Will ignore.", full_path);
// send None so the manager knows to remove this from the tree.
results
.send((path, None))
.or(Err(anyhow!("Failed to send result.")))?;
}
ErrorKind::PermissionDenied => {
// TODO think about if we want a 'skip failed permissions' mode ...
error!(
"Permission denied to read {:?}; do you need to change user?",
full_path
);
Err(err)?;
}
_ => {
Err(err)?;
}
},
};
}
Ok(())
}
fn delete_node(root: &mut TreeNode, child_path: &str) -> anyhow::Result<()> {
let path_pieces: Vec<&str> = child_path.split('/').collect();
let mut this = root;
for &piece in &path_pieces[0..path_pieces.len() - 1] {
if let TreeNode::Directory { children, .. } = this {
match children.get_mut(piece) {
None => bail!(
"Tried to delete {} but {} does not exist.",
child_path,
piece
),
Some(next) => this = next,
}
} else {
bail!(
"Tried to delete {} from tree node but '{}' not a directory.",
child_path,
piece
);
}
}
if let TreeNode::Directory { children, .. } = this {
children.remove(*path_pieces.last().unwrap());
} else {
bail!(
"Tried to delete {} from tree node but parent not a directory.",
child_path
);
}
Ok(())
}
fn update_node(
root: &mut TreeNode,
child_path: &str,
new_ref: RecursiveChunkRef,
) -> anyhow::Result<()> {
let mut this = root;
for piece in child_path.split('/') {
if let TreeNode::Directory { children, .. } = this {
this = children
.get_mut(piece)
.ok_or_else(|| anyhow!("Tried to update {} but {} not found", child_path, piece))?;
} else {
bail!(
"Tried to update {} but {} not a directory.",
child_path,
piece
);
}
}
if let TreeNode::NormalFile { content, .. } = this {
*content = new_ref;
} else {
bail!("Tried to update {} but it's not a NormalFile.", child_path);
}
Ok(())
}
pub fn manager<PT: ProgressTracker>(
root: &mut TreeNode,
paths_sender: Sender<String>,
results_receiver: Receiver<(String, Option<RecursiveChunkRef>)>,
progress_bar: &mut PT,
) -> anyhow::Result<()> {
root.visit(
&mut |tree_node, name| {
if let TreeNode::NormalFile { .. } = tree_node {
paths_sender
.send(name.to_string())
.or_else(|_| Err(anyhow!("Unable to send to should-be unbounded channel")))?;
}
Ok(())
},
"".to_string(),
)?;
drop(paths_sender);
while let Ok((path, opt_chunk_ref)) = results_receiver.recv() {
progress_bar.inc_progress(1);
match opt_chunk_ref {
None => {
delete_node(root, &path)?;
}
Some(new_chunk_ref) => {
update_node(root, &path, new_chunk_ref)?;
}
}
}
Ok(())
}
/// Stores files into the pile, potentially differentiating using a parent pointer (which will be
/// loaded and fully-integrated).
/// This also creates a pointer (which is why this is called `store_fully`).
pub fn store_fully<PT: ProgressTracker>(
pile: Arc<Pile<Box<dyn RawPile>>>,
root_dir: &PathBuf,
new_pointer_name: &String,
mut root_node: TreeNode,
parent: Option<String>,
num_workers: u8,
progress_bar: &mut PT,
) -> anyhow::Result<()> {
pointer_ops_prepare_to_store(&pile, &mut root_node, &parent)?;
let pointer_data =
store_without_pointer_ops(&pile, &root_dir, root_node, num_workers, progress_bar)?
.complete(parent);
pointers_ops_after_store(&pile, &new_pointer_name, &pointer_data)?;
Ok(())
}
pub fn pointers_ops_after_store(
pile: &Pile<impl RawPile>,
new_pointer_name: &str,
pointer_data: &PointerData,
) -> anyhow::Result<()> {
pile.write_pointer(&new_pointer_name, &pointer_data)?;
pile.flush()?;
Ok(())
}
pub fn pointer_ops_prepare_to_store(
pile: &Pile<impl RawPile>,
mut root_node: &mut TreeNode,
parent: &Option<String>,
) -> anyhow::Result<()> {
if let Some(parent) = parent.as_ref() {
let mut parent_pointer = pile.read_pointer(parent)?.ok_or_else(|| {
anyhow!(
"Selected parent pointer {:?} didn't exist when tried to retrieve it.",
parent
)
})?;
let mut parent_node = retrieve_tree_node(&pile, parent_pointer.chunk_ref.clone())?;
fully_integrate_pointer_node(&pile, &mut parent_node.node, &mut parent_pointer)?;
differentiate_node_in_place(&mut root_node, &parent_node.node)?;
}
Ok(())
}
pub fn store_without_pointer_ops<PT: ProgressTracker>(
pile: &Arc<Pile<Box<dyn RawPile>>>,
root_dir: &PathBuf,
mut root_node: TreeNode,
num_workers: u8,
progress_bar: &mut PT,
) -> anyhow::Result<PartialPointerData> {
// TODO make these configurable
let sps = StoragePipelineSettings {
num_compressors: get_number_of_workers("YAMA_PL_COMPRESSORS") as u32,
compressor_input_bound: 64,
writer_input_bound: 64,
};
let (control_tx, control_rx) = crossbeam_channel::unbounded();
let pile2 = pile.clone();
let pipeline = pile.raw_pile.build_storage_pipeline(sps, control_tx)?;
// TODO(newver) The existence checker stage should be able to be swapped between different implementations.
let pipeline = existence_checker_stage(pile2, pipeline);
store(
&root_dir,
&mut root_node,
&pipeline,
progress_bar,
num_workers,
)?;
// must drop the pipeline to allow the threads to close
drop(pipeline);
while let Ok(_) = control_rx.recv() {
// TODO nothing for now.
}
let mut uid_lookup = BTreeMap::new();
let mut gid_lookup = BTreeMap::new();
create_uidgid_lookup_tables(&root_node, &mut uid_lookup, &mut gid_lookup)
.context("Failed to build UID and GID lookup tables :(.")?;
let chunk_ref = commands::store_tree_node(
&pile,
&RootTreeNode {
name: root_dir
.file_name()
.map(|s| s.to_str())
.flatten()
.unwrap_or("_root")
.to_owned(),
node: root_node,
},
)?;
let pointer_data = PartialPointerData {
chunk_ref,
uid_lookup,
gid_lookup,
};
Ok(pointer_data)
}

View File

@ -1,393 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use crate::definitions::{ChunkId, PointerData};
use crate::utils::get_number_of_workers;
use crossbeam_channel::Sender;
use std::collections::HashSet;
use std::fmt::Debug;
use std::sync::{Arc, Condvar, Mutex};
pub mod access_guard;
pub mod compression;
pub mod integrity;
pub mod local_sqlitebloblogs;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct PileDescriptor {
/// The last version of yama that was used with this pile.
pub yama_version: String,
/// The storage backend to use.
pub storage: PileStorage,
/// If specified, the compression level of the pile.
pub compression: Option<u16>,
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
pub enum PileStorage {
/// No local storage. Pile is only usable for remotes.
RemoteOnly,
/// Local storage backed by bloblogs that are indexed by a SQLite database.
SqliteIndexedBloblog,
// Local temporary storage in which chunks are only kept for long enough to send them to
// remotes. Unimplemented at present.
// TODO THIS IS NOT THE CORRECT NAME ANYWAY BarePushSled,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct RemoteDescriptor {
pub encrypted: bool,
pub host: Option<String>,
pub user: Option<String>,
pub path: PathBuf,
}
#[derive(PartialOrd, PartialEq, Copy, Clone, Serialize, Deserialize, Eq)]
pub enum Keyspace {
Chunk,
ChunkHash,
Pointer,
}
/// Useful information for humans. Doesn't need to be spot on, but kind of interesting.
#[derive(Debug, Clone)]
pub struct DebugStatistics {
pub number_of_chunks: u64,
pub minimum_chunk_size: Option<u32>,
pub maximum_chunk_size: Option<u32>,
pub total_chunk_size: u64,
}
#[derive(Debug, Clone)]
pub struct StoragePipelineSettings {
pub num_compressors: u32,
pub compressor_input_bound: u32,
pub writer_input_bound: u32,
}
pub fn existence_checker_stage<RP: RawPile>(
pile: Arc<Pile<RP>>,
next_stage: Sender<(ChunkId, Vec<u8>)>,
) -> Sender<(ChunkId, Vec<u8>)> {
// TODO(newver) Do better than this.
let shared_seen_set: Arc<Mutex<HashSet<ChunkId>>> = Default::default();
let (tx, rx) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(32);
// TODO would like something better for the networked case
for _ in 0..get_number_of_workers("YAMA_EXISTENCE_CHECKERS") {
let shared_seen_set = shared_seen_set.clone();
let next_stage = next_stage.clone();
let rx = rx.clone();
let pile = pile.clone();
std::thread::Builder::new()
.name("yama exist?er".to_string())
.spawn(move || {
while let Ok((chunk_id, chunk)) = rx.recv() {
// TODO handle errors properly
let is_new = { shared_seen_set.lock().unwrap().insert(chunk_id) };
if !is_new {
continue;
}
if !pile.chunk_exists(&chunk_id).unwrap() {
next_stage.send((chunk_id, chunk)).unwrap();
}
}
})
.unwrap();
}
tx
}
pub enum ControllerMessage {
Failure {
worker_id: Arc<String>,
error_message: String,
},
}
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub enum PipelineDescription {
Store,
Remote,
Integrity,
Compression { dictionary_fingerprint: u64 },
Encryption,
}
pub trait RawPile: Send + Sync + Debug + 'static {
// TODO expose verification errors?
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool>;
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()>;
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()>;
fn delete_many(&self, kind: Keyspace, key: &[&[u8]]) -> anyhow::Result<()>;
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>>;
/*
fn list_keyvalue_pairs(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<(Vec<u8>, Vec<u8>)>>>>;
*/
fn flush(&self) -> anyhow::Result<()>;
// TODO return a progress Receiver
fn check_lowlevel(&self) -> anyhow::Result<bool>;
/// Return a few statistics, if possible.
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
Ok(None)
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>>;
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>>;
/// Return a u64 order token that indicates the optimum order to read this chunk in
/// compared to other chunks.
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64>;
}
impl RawPile for Box<dyn RawPile> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
self.as_ref().exists(kind, key)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
self.as_ref().read(kind, key)
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
self.as_ref().write(kind, key, value)
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.as_ref().delete(kind, key)
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
self.as_ref().delete_many(kind, keys)
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
self.as_ref().list_keys(kind)
}
fn flush(&self) -> anyhow::Result<()> {
self.as_ref().flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
self.as_ref().check_lowlevel()
}
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
self.as_ref().debug_statistics()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
self.as_ref()
.build_storage_pipeline(settings, controller_send)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
self.as_ref().describe_pipeline()
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
}
}
impl<RP: RawPile> RawPile for Arc<RP> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
self.as_ref().exists(kind, key)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
self.as_ref().read(kind, key)
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
self.as_ref().write(kind, key, value)
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.as_ref().delete(kind, key)
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
self.as_ref().delete_many(kind, keys)
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
self.as_ref().list_keys(kind)
}
fn flush(&self) -> anyhow::Result<()> {
self.as_ref().flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
self.as_ref().check_lowlevel()
}
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
self.as_ref().debug_statistics()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
self.as_ref()
.build_storage_pipeline(settings, controller_send)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
self.as_ref().describe_pipeline()
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
}
}
#[derive(Debug)]
pub struct Pile<R: RawPile> {
pub raw_pile: R,
pub racy_submission_mutex: Mutex<HashSet<ChunkId>>,
pub racy_submission_condvar: Condvar,
}
impl<R: RawPile> Pile<R> {
pub fn new(raw_pile: R) -> Self {
Pile {
raw_pile,
racy_submission_mutex: Mutex::new(Default::default()),
racy_submission_condvar: Default::default(),
}
}
// TODO(clarity, features): have a special kind of error for verification failures
// may be wanted for best-effort restores
pub fn read_chunk(&self, key: &ChunkId) -> anyhow::Result<Option<Vec<u8>>> {
self.raw_pile.read(Keyspace::Chunk, key)
/*
let result = self.raw_pile.read(Keyspace::Chunk, &key)?;
if let Some(chunk) = result {
if verify {
let hash = self
.raw_pile
.read(Keyspace::ChunkHash, &key)?
.ok_or_else(|| {
anyhow!(
"Hash not found for chunk {}; can't verify",
bytes_to_hexstring(&key)
)
})?;
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&chunk);
let computed_hash = hasher.finish().to_be_bytes();
if &computed_hash[..] != &hash {
bail!(
"Hash mismatch for chunk {}: expected {} computed {}",
bytes_to_hexstring(&key),
bytes_to_hexstring(&hash),
bytes_to_hexstring(&computed_hash),
);
}
}
Ok(Some(chunk))
} else {
Ok(None)
}
*/
}
pub fn write_chunk(&self, key: &ChunkId, value: &[u8]) -> anyhow::Result<()> {
self.raw_pile.write(Keyspace::Chunk, key, value)
}
pub fn chunk_exists(&self, key: &ChunkId) -> anyhow::Result<bool> {
self.raw_pile.exists(Keyspace::Chunk, key)
}
pub fn read_pointer(&self, key: &str) -> anyhow::Result<Option<PointerData>> {
Ok(
if let Some(pointer_data_raw) = self.raw_pile.read(Keyspace::Pointer, key.as_bytes())? {
Some(serde_bare::from_slice(&pointer_data_raw)?)
} else {
None
},
)
}
pub fn write_pointer(&self, key: &str, pointer: &PointerData) -> anyhow::Result<()> {
self.raw_pile.write(
Keyspace::Pointer,
key.as_bytes(),
&serde_bare::to_vec(pointer)?,
)
}
pub fn delete_pointer(&self, key: &str) -> anyhow::Result<()> {
self.raw_pile.delete(Keyspace::Pointer, key.as_bytes())
}
pub fn list_pointers(&self) -> anyhow::Result<Vec<String>> {
let mut result = Vec::new();
for key in self.raw_pile.list_keys(Keyspace::Pointer)? {
result.push(String::from_utf8(key?)?);
}
Ok(result)
}
pub fn submit_chunk(&self, chunk_id: ChunkId, chunk_data: &[u8]) -> anyhow::Result<()> {
let mut racy_submissions = self.racy_submission_mutex.lock().unwrap();
if racy_submissions.insert(chunk_id) {
drop(racy_submissions);
if !self.chunk_exists(&chunk_id)? {
self.write_chunk(&chunk_id, chunk_data)?;
}
racy_submissions = self.racy_submission_mutex.lock().unwrap();
racy_submissions.remove(&chunk_id);
// wake up anyone who might be waiting for this chunk
self.racy_submission_condvar.notify_all();
} else {
loop {
racy_submissions = self.racy_submission_condvar.wait(racy_submissions).unwrap();
if !racy_submissions.contains(&chunk_id) {
break;
}
}
}
Ok(())
}
/// Flushes buffered writes. Should really run this before exiting, so I can sleep better at
/// night (rather than relying on the destructor).
pub fn flush(&self) -> anyhow::Result<()> {
self.raw_pile.flush()
}
}

View File

@ -1,141 +0,0 @@
use crate::chunking::calculate_chunkid;
use crate::definitions::ChunkId;
use crate::pile::{
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
};
use anyhow::{anyhow, bail};
use crossbeam_channel::{Receiver, Sender};
use derivative::Derivative;
use std::sync::Arc;
use std::thread;
/// PileGuard is a wrapper around a pile that prevents data exfiltration and malicious corruption.
/// It's basically a firewall for a Pile?
/// Preventing malicious corruption requires the chunks to be unprocessed. This way, their ID can be
/// checked by this module.
#[derive(Debug, Derivative)]
#[derivative(Clone(bound = ""))]
// we need to use derivative's Clone impl because Arc<R> causes R to have a bound on Clone
// even though that's not needed. https://github.com/rust-lang/rust/issues/26925
pub struct PileGuard<R: Clone + RawPile> {
underlying: R,
/// Whether to verify chunk IDs to prevent malicious corruption
verify_chunk_ids: bool,
}
fn pipeline(
subsequent_pipeline: Sender<(ChunkId, Vec<u8>)>,
input: Receiver<(ChunkId, Vec<u8>)>,
) -> anyhow::Result<()> {
while let Ok((claimed_chunk_id, chunk)) = input.recv() {
let actual_chunk_id = calculate_chunkid(&chunk);
if actual_chunk_id != claimed_chunk_id {
bail!("CHUNK ID MISMATCH — is this forgery? (malicious storage process?) claimed{:?} actually{:?}", claimed_chunk_id, actual_chunk_id);
}
subsequent_pipeline
.send((claimed_chunk_id, chunk))
.map_err(|_| anyhow!("Subsequent step closed"))?;
}
Ok(())
}
impl<R: Clone + RawPile> PileGuard<R> {
pub fn new(underlying: R, verify_chunk_ids: bool) -> Self {
PileGuard {
underlying,
verify_chunk_ids,
}
}
}
impl<R: Clone + RawPile> RawPile for PileGuard<R> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
match kind {
Keyspace::Chunk => self.underlying.exists(kind, key),
Keyspace::ChunkHash => {
bail!("Access denied");
}
Keyspace::Pointer => {
bail!("Access denied");
}
}
}
fn read(&self, _kind: Keyspace, _key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
bail!("Access denied");
}
fn write(&self, kind: Keyspace, _key: &[u8], _value: &[u8]) -> anyhow::Result<()> {
match kind {
Keyspace::Chunk => {
todo!()
}
Keyspace::ChunkHash => {
bail!("Access denied");
}
Keyspace::Pointer => {
bail!("Access denied");
}
}
}
fn delete(&self, _kind: Keyspace, _key: &[u8]) -> anyhow::Result<()> {
bail!("Access denied");
}
fn delete_many(&self, _kind: Keyspace, _keys: &[&[u8]]) -> anyhow::Result<()> {
bail!("Access denied");
}
fn list_keys(
&self,
_kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
bail!("Access denied");
}
fn flush(&self) -> anyhow::Result<()> {
self.underlying.flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
self.underlying.check_lowlevel()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
let subsequent_pipeline = self
.underlying
.build_storage_pipeline(settings.clone(), controller_send.clone())?;
let (input_to_this_stage, receiver) = crossbeam_channel::bounded(8);
thread::Builder::new()
.name("yama Aguard".to_owned())
.spawn(move || {
if let Err(err) = pipeline(subsequent_pipeline, receiver) {
controller_send
.send(ControllerMessage::Failure {
worker_id: Arc::new(String::from("accessguard")),
error_message: format!("err {:?}", err),
})
.expect("This is BAD: failed to send failure message to controller.");
}
})
.unwrap();
Ok(input_to_this_stage)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
// TODO(question) Should we be described in the pipeline?
self.underlying.describe_pipeline()
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
}
}

View File

@ -1,359 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::convert::TryInto;
use std::sync::Arc;
use std::thread;
use std::thread::JoinHandle;
use anyhow::anyhow;
use crossbeam_channel::{Receiver, Sender};
use derivative::Derivative;
use log::error;
use metrics::{register_counter, Unit};
use zstd::bulk::{Compressor, Decompressor};
use crate::definitions::ChunkId;
use crate::pile::{
ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
StoragePipelineSettings,
};
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
#[derive(Clone, Debug)]
pub struct CompressionSettings {
/// Raw dictionary to pass to Zstd for compression and decompression
pub dictionary: Arc<Vec<u8>>,
/// The compression level, passed to Zstd.
pub level: i32,
/// The number of compressor threads to use.
pub num_compressors: u32,
/// The number of decompressor threads to use.
pub num_decompressors: u32,
}
#[derive(Debug, Derivative)]
#[derivative(Clone(bound = ""))]
// we need to use derivative's Clone impl because Arc<R> causes R to have a bound on Clone
// even though that's not needed. https://github.com/rust-lang/rust/issues/26925
pub struct RawPileCompressor<R: RawPile> {
underlying: Arc<R>,
compressor: Option<Sender<(Vec<u8>, Sender<Vec<u8>>)>>,
decompressor: Option<Sender<(Vec<u8>, Sender<Vec<u8>>)>>,
settings: Arc<CompressionSettings>,
}
impl<R: RawPile> RawPileCompressor<R> {
pub fn new(
underlying: R,
settings: CompressionSettings,
) -> anyhow::Result<(Self, Vec<JoinHandle<()>>)> {
register_counter!(
"compressor_in_bytes",
Unit::Bytes,
"Number of bytes that have been fed into the compressor"
);
register_counter!(
"compressor_out_bytes",
Unit::Bytes,
"Number of bytes that have come out of the compressor"
);
register_counter!(
"compressor_chunks",
Unit::Count,
"Number of chunks that have been compressed"
);
register_counter!(
"decompressor_in_bytes",
Unit::Bytes,
"Number of bytes that have been fed into the decompressor"
);
register_counter!(
"decompressor_out_bytes",
Unit::Bytes,
"Number of bytes that have come out of the decompressor"
);
register_counter!(
"decompressor_chunks",
Unit::Count,
"Number of chunks that have been decompressed"
);
if settings.num_compressors == 0 && settings.num_decompressors == 0 {
// optimisation for when we're only building a pipeline: we don't want to
return Ok((
RawPileCompressor {
underlying: Arc::new(underlying),
compressor: None,
decompressor: None,
settings: Arc::new(settings),
},
Vec::with_capacity(0),
));
}
let (com_s, com_r) = crossbeam_channel::bounded(4);
let (dec_s, dec_r) = crossbeam_channel::bounded(4);
let mut handles = Vec::new();
for worker in 0..settings.num_compressors {
let settings = settings.clone();
let com_r = com_r.clone();
let builder = thread::Builder::new().name(format!("yama compressor {}", worker));
handles.push(builder.spawn(move || {
if let Err(e) = Self::compressor_worker(com_r, settings) {
error!("compressor worker failed: {:?}", e);
}
})?);
}
for worker in 0..settings.num_decompressors {
let settings = settings.clone();
let dec_r = dec_r.clone();
let builder = thread::Builder::new().name(format!("yama decompressor {}", worker));
handles.push(builder.spawn(move || {
if let Err(e) = Self::decompressor_worker(dec_r, settings) {
error!("decompressor worker failed: {:?}", e);
}
})?);
}
Ok((
RawPileCompressor {
underlying: Arc::new(underlying),
compressor: Some(com_s),
decompressor: Some(dec_s),
settings: Arc::new(settings),
},
handles,
))
}
fn compressor_worker(
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
settings: CompressionSettings,
) -> anyhow::Result<()> {
let mut compressor =
Compressor::with_dictionary(settings.level, settings.dictionary.as_ref())?;
while let Ok((job, response_sender)) = queue.recv() {
let result = compressor.compress(&job)?;
response_sender
.send(result)
.or(Err(anyhow!("Couldn't send compression result")))?;
}
Ok(())
}
fn decompressor_worker(
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
settings: CompressionSettings,
) -> anyhow::Result<()> {
let mut decompressor = Decompressor::with_dictionary(settings.dictionary.as_ref())?;
while let Ok((job, response_sender)) = queue.recv() {
let result = decompressor.decompress(&job, DECOMPRESS_CAPACITY)?;
response_sender
.send(result)
.or(Err(anyhow!("Couldn't send decompression result")))?;
}
Ok(())
}
fn decompress(&self, data: &[u8]) -> anyhow::Result<Vec<u8>> {
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
self.decompressor
.as_ref()
.expect("No decompressors configured")
.send((data.to_vec(), ret_s))
.or(Err(anyhow!("couldn't send to decompressor")))?;
Ok(ret_r.recv().or(Err(anyhow!("couldn't receive result")))?)
}
fn compress(&self, compressed_data: &[u8]) -> anyhow::Result<Vec<u8>> {
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
self.compressor
.as_ref()
.expect("No compressors configured")
.send((compressed_data.to_vec(), ret_s))
.or(Err(anyhow!("couldn't send to compressor")))?;
Ok(ret_r.recv().or(Err(anyhow!("couldn't receive result")))?)
}
fn storage_pipeline_worker(
&self,
next_stage: Sender<(ChunkId, Vec<u8>)>,
input: Receiver<(ChunkId, Vec<u8>)>,
worker_id: String,
) -> anyhow::Result<()> {
// the worker ID has to live forever, so we leak it :/
let worker_id: &'static str = Box::leak(worker_id.into_boxed_str());
metrics::register_histogram!(
"compressor_idle_time",
metrics::Unit::Seconds,
"Time spent waiting between chunks",
"id" => worker_id
);
metrics::register_counter!(
"compressor_bytes_input",
metrics::Unit::Bytes,
"Number of bytes input into the compressor.",
"id" => worker_id
);
metrics::register_counter!(
"compressor_bytes_output",
metrics::Unit::Bytes,
"Number of bytes output from the compressor.",
"id" => worker_id
);
metrics::register_counter!(
"compressor_chunks_processed",
metrics::Unit::Count,
"Number of bytes input into the compressor.",
"id" => worker_id
);
let mut compressor =
Compressor::with_dictionary(self.settings.level, self.settings.dictionary.as_ref())?;
while let Ok((chunk_id, bytes)) = input.recv() {
let in_bytes = bytes.len();
let bytes = compressor.compress(&bytes)?;
let out_bytes = bytes.len();
next_stage.send((chunk_id, bytes))?;
// Per-worker metrics
// TODO rename
metrics::counter!("compressor_bytes_input", in_bytes as u64, "id" => worker_id);
metrics::counter!("compressor_bytes_output", out_bytes as u64, "id" => worker_id);
// Global metrics
metrics::counter!("compressor_in_bytes", in_bytes as u64);
metrics::counter!("compressor_out_bytes", out_bytes as u64);
metrics::increment_counter!("compressor_chunks");
}
Ok(())
}
}
impl<R: RawPile> RawPile for RawPileCompressor<R> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
self.underlying.exists(kind, key)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
if let Some(data) = self.underlying.read(kind, key)? {
Ok(Some(self.decompress(&data)?))
} else {
Ok(None)
}
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
let compressed = self.compress(value)?;
self.underlying.write(kind, key, &compressed)
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.underlying.delete(kind, key)
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
self.underlying.delete_many(kind, keys)
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
self.underlying.list_keys(kind)
}
fn flush(&self) -> anyhow::Result<()> {
self.underlying.flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
self.underlying.check_lowlevel()
}
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
self.underlying.debug_statistics()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
// this one should have a few threads behind it! yarr!
let subsequent_pipeline = self
.underlying
.build_storage_pipeline(settings.clone(), controller_send.clone())?;
let (input_to_this_stage, receiver) =
crossbeam_channel::bounded(settings.compressor_input_bound as usize);
for compressor_number in 0..settings.num_compressors {
let subsequent_pipeline = subsequent_pipeline.clone();
let receiver = receiver.clone();
let controller_send = controller_send.clone();
let this = (*self).clone();
thread::Builder::new()
.name(format!("yama Pcomp{}", compressor_number))
.spawn(move || {
let worker_id = Arc::new(format!("compressor-{}", compressor_number));
if let Err(err) = this.storage_pipeline_worker(
subsequent_pipeline,
receiver,
worker_id.to_string(),
) {
controller_send
.send(ControllerMessage::Failure {
worker_id,
error_message: format!("err {:?}", err),
})
.expect("This is BAD: failed to send failure message to controller.");
}
})
.unwrap();
}
Ok(input_to_this_stage)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
let mut underlying = self.underlying.describe_pipeline()?;
let mut dict_fingerprint_u256 = [0; 32];
blake::hash(256, &self.settings.dictionary, &mut dict_fingerprint_u256)?;
let dictionary_fingerprint: u64 =
u64::from_be_bytes(dict_fingerprint_u256[0..8].try_into().unwrap());
underlying.push(PipelineDescription::Compression {
dictionary_fingerprint,
});
Ok(underlying)
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
}
}

View File

@ -1,160 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::hash::Hasher;
use thiserror::Error;
use crate::definitions::{ChunkId, XXH64_SEED};
use crate::pile::{
ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
StoragePipelineSettings,
};
use crate::utils::bytes_to_hexstring;
use crossbeam_channel::Sender;
/// This RawPile enables checking the integrity of stored chunks.
/// This is done by storing a hash along with the chunk contents, which can later be verified.
#[derive(Debug)]
pub struct RawPileIntegrityChecker<RP: RawPile> {
underlying: RP,
}
impl<RP: RawPile> RawPileIntegrityChecker<RP> {
pub fn new(underlying: RP) -> Self {
RawPileIntegrityChecker { underlying }
}
}
#[derive(Error, Debug)]
#[error("Integrity error for chunk {chunk_id}; expected XXHash {expected_hash} but computed {computed_hash}!")]
pub struct IntegrityError {
pub chunk_id: String,
pub expected_hash: String,
pub computed_hash: String,
}
impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
self.underlying.exists(kind, key)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
match self.underlying.read(kind, key)? {
None => Ok(None),
Some(mut data_then_hash) => {
let len = data_then_hash.len();
let data_only = &data_then_hash[..len - 8];
let xxhash = &data_then_hash[len - 8..];
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&data_only);
let computed_hash = hasher.finish().to_be_bytes();
if computed_hash != xxhash {
Err(IntegrityError {
chunk_id: bytes_to_hexstring(key),
expected_hash: bytes_to_hexstring(&xxhash),
computed_hash: bytes_to_hexstring(&computed_hash),
})?;
}
// remove hash from end
data_then_hash.drain(len - 8..);
Ok(Some(data_then_hash))
}
}
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
// start with the data
let mut buf = Vec::new();
buf.extend_from_slice(&value[..]);
// then append the hash
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&value);
let computed_hash = hasher.finish().to_be_bytes();
buf.extend_from_slice(&computed_hash);
self.underlying.write(kind, key, &buf)
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.underlying.delete(kind, key)
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
self.underlying.delete_many(kind, keys)
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
self.underlying.list_keys(kind)
}
fn flush(&self) -> anyhow::Result<()> {
self.underlying.flush()
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
// TODO integrity check ...?
self.underlying.check_lowlevel()
}
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
self.underlying.debug_statistics()
}
fn build_storage_pipeline(
&self,
settings: StoragePipelineSettings,
controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
// TODO primitive implementation but good enough for now.
// May want metrics later?
let next_stage = self
.underlying
.build_storage_pipeline(settings, controller_send)?;
let (input, receiver) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(64);
std::thread::Builder::new()
.name("yama integrity".to_string())
.spawn(move || {
while let Ok((chunk_id, mut chunk)) = receiver.recv() {
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&chunk);
let computed_hash = hasher.finish().to_be_bytes();
chunk.extend_from_slice(&computed_hash);
next_stage.send((chunk_id, chunk)).unwrap();
}
})
.unwrap();
Ok(input)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
let mut underlying = self.underlying.describe_pipeline()?;
underlying.push(PipelineDescription::Integrity);
Ok(underlying)
}
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +0,0 @@
use indicatif::ProgressBar;
pub trait ProgressTracker {
fn inc_progress(&mut self, delta_progress: u64);
fn set_current(&mut self, current_progress: u64);
fn set_max_size(&mut self, max_size: u64);
}
impl ProgressTracker for ProgressBar {
#[inline]
fn set_max_size(&mut self, max_size: u64) {
self.set_length(max_size);
}
#[inline]
fn inc_progress(&mut self, delta_progress: u64) {
self.inc(delta_progress);
}
#[inline]
fn set_current(&mut self, current_progress: u64) {
self.set_position(current_progress);
}
}
/// No-operation progress tracker.
impl ProgressTracker for () {
#[inline]
fn set_max_size(&mut self, _max_size: u64) {
// nop
}
#[inline]
fn inc_progress(&mut self, _delta_progress: u64) {
// nop
}
#[inline]
fn set_current(&mut self, _current_progress: u64) {
// nop
}
}

View File

@ -1,102 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::convert::TryInto;
use std::io::{Read, Write};
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use crate::pile::{Keyspace, PipelineDescription};
pub mod requester;
pub mod responder;
#[derive(Serialize, Deserialize, Clone)]
pub struct Request {
id: u16,
body: RequestBody,
}
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq)]
pub enum RequestBody {
Read {
kind: Keyspace,
key: Vec<u8>,
},
CheckExists {
kind: Keyspace,
key: Vec<u8>,
},
Write {
kind: Keyspace,
key: Vec<u8>,
value: Vec<u8>,
},
Delete {
kind: Keyspace,
key: Vec<u8>,
},
List {
kind: Keyspace,
},
NextBatch {
token: u16,
},
Flush,
LowLevelCheck,
Describe,
Shutdown,
Progress {
current: u64,
max: u64,
},
}
#[derive(Serialize, Deserialize, Clone)]
pub struct Response {
response_to: u16,
body: ResponseBody,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub enum ResponseBody {
Success,
Failed(String),
NotExists,
Data(Vec<u8>),
BatchData {
batch: Vec<Vec<u8>>,
next_token: u16,
},
Description(Vec<PipelineDescription>),
}
pub fn read_message<R: Read, D: DeserializeOwned>(read: &mut R) -> anyhow::Result<D> {
let len = read.read_u32::<BigEndian>()?;
let mut data_vec = vec![0u8; len as usize];
read.read_exact(&mut data_vec)?;
Ok(serde_bare::from_slice(&data_vec)?)
}
pub fn write_message<W: Write, S: Serialize>(write: &mut W, message: &S) -> anyhow::Result<()> {
let data_vec = serde_bare::to_vec(&message)?;
write.write_u32::<BigEndian>(data_vec.len().try_into()?)?;
write.write_all(&data_vec)?;
Ok(())
}

View File

@ -1,495 +0,0 @@
use std::collections::HashMap;
use std::io::{stdin, stdout, Read, Write};
use std::sync::{Arc, Mutex};
use std::thread;
use std::thread::JoinHandle;
use anyhow::{anyhow, bail};
use crossbeam_channel::{Receiver, Sender};
use log::{error, info};
use crate::definitions::ChunkId;
use crate::pile::{
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
};
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
use metrics::{
gauge, histogram, increment_counter, register_counter, register_gauge, register_histogram, Unit,
};
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
use std::time::Instant;
/// A kind of RawPile which can make requests to a RawPile over a pipe (e.g. TCP socket or an
/// SSH connection).
/// The requests are handled by a `Responder` on the other end of the pipe.
#[derive(Debug)]
pub struct Requester {
commands: Sender<(RequestBody, Option<Sender<ResponseBody>>)>,
}
impl Requester {
pub fn new<R: Read + Send + 'static, W: Write + Send + 'static>(
read: R,
write: W,
) -> (Self, Vec<JoinHandle<()>>) {
register_histogram!(
"requester_cmd_response_time_ms",
Unit::Milliseconds,
"Time between request being issued and a response being received"
);
let in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>> =
Arc::new(Mutex::new(HashMap::new()));
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
let mut handles = Vec::new();
let shutdown_signal: Arc<(AtomicU16, AtomicBool)> = Default::default();
{
// Spawn a reader
let in_flight = in_flight.clone();
let shutdown_signal = shutdown_signal.clone();
handles.push(
thread::Builder::new()
.name("ReqstrReader".to_string())
.spawn(move || {
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
error!("reader failed: {:?}", e);
}
})
.unwrap(),
);
}
{
// Spawn a writer
let in_flight = in_flight.clone();
let command_receiver = command_receiver.clone();
handles.push(
thread::Builder::new()
.name("ReqstrWriter".to_string())
.spawn(move || {
if let Err(e) =
Self::writer(write, in_flight, command_receiver, shutdown_signal)
{
error!("writer failed: {:?}", e);
}
})
.unwrap(),
);
}
(
Requester {
commands: command_sender,
},
handles,
)
}
pub fn new_from_stdio() -> (Self, Vec<JoinHandle<()>>) {
let in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>> =
Arc::new(Mutex::new(HashMap::new()));
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
let mut handles = Vec::new();
let shutdown_signal: Arc<(AtomicU16, AtomicBool)> = Default::default();
{
// Spawn a reader
let in_flight = in_flight.clone();
let shutdown_signal = shutdown_signal.clone();
handles.push(
thread::Builder::new()
.name("ReqstrReaderSI".to_string())
.spawn(move || {
let stdin = stdin();
let read = stdin.lock();
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
error!("reader failed: {:?}", e);
}
})
.unwrap(),
);
}
{
// Spawn a writer
let in_flight = in_flight.clone();
let command_receiver = command_receiver.clone();
handles.push(
thread::Builder::new()
.name("ReqstrWriterSO".to_string())
.spawn(move || {
let stdout = stdout();
let write = stdout.lock();
if let Err(e) =
Self::writer(write, in_flight, command_receiver, shutdown_signal)
{
error!("writer failed: {:?}", e);
}
})
.unwrap(),
);
}
(
Requester {
commands: command_sender,
},
handles,
)
}
pub fn clone_command_sender(&self) -> Sender<(RequestBody, Option<Sender<ResponseBody>>)> {
self.commands.clone()
}
/// Thread that reads messages and sends them along.
fn reader<R: Read>(
mut read: R,
in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>>,
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
) -> anyhow::Result<()> {
loop {
let response: Response = read_message(&mut read)?;
if shutdown_request_channel.1.load(Ordering::Relaxed)
&& response.response_to == shutdown_request_channel.0.load(Ordering::Relaxed)
{
return Ok(());
}
let mut map = in_flight.lock().or(Err(anyhow!("Mutex poisoned")))?;
// We free up the ID as we get the sender out of the map.
let (resp_sender, req_instant) = map
.remove(&response.response_to)
.ok_or(anyhow!("Didn't find response channel..."))?;
let req_resp_time_in_millis =
Instant::now().duration_since(req_instant).as_millis() as f64;
histogram!("requester_cmd_response_time_ms", req_resp_time_in_millis);
resp_sender
.send(response.body)
.or(Err(anyhow!("Failed to send response to channel")))?;
}
}
/// Thread that writes messages.
fn writer<W: Write>(
mut write: W,
in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>>,
command_receiver: Receiver<(RequestBody, Option<Sender<ResponseBody>>)>,
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
) -> anyhow::Result<()> {
while let Ok((req_body, response_channel)) = command_receiver.recv() {
let request_id = if let Some(response_channel) = response_channel {
let mut map = in_flight.lock().or(Err(anyhow!("Mutex poisoned")))?;
let request_id = (1u16..u16::MAX)
.into_iter()
.find(|id| !map.contains_key(&id))
.expect("No ID found");
let now = Instant::now();
map.insert(request_id, (response_channel, now));
request_id
} else {
0
};
let shutting_down = &req_body == &RequestBody::Shutdown;
if shutting_down {
shutdown_request_channel
.0
.store(request_id, Ordering::SeqCst);
shutdown_request_channel.1.store(true, Ordering::SeqCst);
}
write_message(
&mut write,
&Request {
id: request_id,
body: req_body,
},
)?;
write.flush()?;
if shutting_down {
return Ok(());
}
}
info!("Exited send loop without shutdown message, issuing Shutdown.");
// shutdown ourselves
let request_id = {
let map = in_flight.lock().or(Err(anyhow!("Mutex poisoned")))?;
let request_id = (0u16..u16::MAX)
.into_iter()
.find(|id| !map.contains_key(&id))
.expect("No ID found");
request_id
};
shutdown_request_channel
.0
.store(request_id, Ordering::SeqCst);
shutdown_request_channel.1.store(true, Ordering::SeqCst);
write_message(
&mut write,
&Request {
id: request_id,
body: RequestBody::Shutdown,
},
)?;
write.flush()?;
Ok(())
}
/// Helper to make a request and wait for the result.
fn request(&self, req: RequestBody) -> anyhow::Result<ResponseBody> {
let (sender, receiver) = crossbeam_channel::bounded(0);
self.commands
.send((req, Some(sender)))
.or(Err(anyhow!("Failed to queue request")))?;
Ok(receiver
.recv()
.or(Err(anyhow!("Failed to receive response")))?)
}
}
impl RawPile for Requester {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
match self.request(RequestBody::CheckExists {
kind,
key: key.to_vec(),
})? {
ResponseBody::Success => Ok(true),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
ResponseBody::NotExists => Ok(false),
other => Err(anyhow!("Received {:?} for Exists", other)),
}
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
match self.request(RequestBody::Read {
kind,
key: key.to_vec(),
})? {
ResponseBody::Success => Err(anyhow!("Received Success for read.")),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
ResponseBody::NotExists => Ok(None),
ResponseBody::Data(data) => Ok(Some(data)),
other => Err(anyhow!("Received {:?} for Read", other)),
}
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
match self.request(RequestBody::Write {
kind,
key: key.to_vec(),
value: value.to_vec(),
})? {
ResponseBody::Success => Ok(()),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
other => Err(anyhow!("Received {:?} for Write", other)),
}
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
match self.request(RequestBody::Delete {
kind,
key: key.to_vec(),
})? {
ResponseBody::Success => Ok(()),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
other => Err(anyhow!("Received {:?} for Delete", other)),
}
}
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
for &key in keys {
self.delete(kind, key)?;
}
Ok(())
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
match self.request(RequestBody::List { kind })? {
ResponseBody::Success => Ok(Box::new(ListKeyIterator {
command_sender: self.commands.clone(),
batch_token: None,
buffer: Vec::with_capacity(0),
})),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
ResponseBody::BatchData { batch, next_token } => Ok(Box::new(ListKeyIterator {
command_sender: self.commands.clone(),
batch_token: Some(next_token),
buffer: batch,
})),
other => Err(anyhow!("Received {:?} for List", other)),
}
}
fn flush(&self) -> anyhow::Result<()> {
match self.request(RequestBody::Flush)? {
ResponseBody::Success => Ok(()),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
other => Err(anyhow!("Received {:?} for Flush", other)),
}
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
match self.request(RequestBody::LowLevelCheck)? {
ResponseBody::Success => Ok(true),
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
other => Err(anyhow!("Received {:?} for LowLevelCheck", other)),
}
}
fn build_storage_pipeline(
&self,
_settings: StoragePipelineSettings,
_controller_send: Sender<ControllerMessage>,
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
// this one is a little bit more complex.
// We want to be able to send off multiple write requests at once, but not too many, so we
// need to be able to apply backpressure.
let (input, receiver) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(128);
let command_sender = self.commands.clone();
register_counter!(
"requester_pipeline_cmds_issued",
Unit::Count,
"Number of write commands issued by the Requester's storage pipeline"
);
register_gauge!(
"requester_pipeline_writes_inflight",
Unit::Count,
"Number of write commands in-flight"
);
std::thread::Builder::new()
.name("ReqStPpln".to_string())
.spawn(move || {
let (response_tx, response_rx) = crossbeam_channel::bounded::<ResponseBody>(32);
let mut in_flight_writes = 0;
const MAX_IN_FLIGHT_WRITES: u32 = 32;
let mut pipeline_still_going = true;
while pipeline_still_going || in_flight_writes > 0 {
gauge!(
"requester_pipeline_writes_inflight",
in_flight_writes as f64
);
// TODO this won't handle channel closure properly.
if in_flight_writes < MAX_IN_FLIGHT_WRITES && pipeline_still_going {
crossbeam_channel::select! {
recv(response_rx) -> resp => {
in_flight_writes -= 1;
match resp.unwrap() {
ResponseBody::Success => {
// nop
}
ResponseBody::Failed(string) => {
panic!("Requester pipeline fail {}", string);
}
other => panic!("wtf {:?}", other),
}
}
recv(receiver) -> resp => {
if let Ok((chunk_id, write)) = resp {
in_flight_writes += 1;
increment_counter!("requester_pipeline_cmds_issued");
command_sender.send((RequestBody::Write {
kind: Keyspace::Chunk,
key: chunk_id.to_vec(),
value: write
}, Some(response_tx.clone()))).unwrap();
} else {
// the input has stopped
pipeline_still_going = false;
}
}
}
} else {
// Either the pipeline is stopping or we are too busy to accept new chunks,
// so only process responses.
let resp = response_rx.recv().unwrap();
in_flight_writes -= 1;
match resp {
ResponseBody::Success => {
// nop
}
ResponseBody::Failed(string) => {
panic!("Requester pipeline fail {}", string);
}
other => panic!("wtf {:?}", other),
}
}
}
})
.unwrap();
Ok(input)
}
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
match self.request(RequestBody::Describe)? {
ResponseBody::Description(mut description) => {
description.push(PipelineDescription::Remote);
Ok(description)
}
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
other => Err(anyhow!("Received {:?} for Describe", other)),
}
}
fn chunk_id_transfer_ordering_hint(&self, _chunk_id: &ChunkId) -> anyhow::Result<u64> {
bail!("You probably shouldn't be using chunk ID transfer ordering hints with a remote.");
}
}
pub struct ListKeyIterator {
pub(crate) command_sender: Sender<(RequestBody, Option<Sender<ResponseBody>>)>,
pub(crate) batch_token: Option<u16>,
/// in reverse order
pub(crate) buffer: Vec<Vec<u8>>,
}
impl Iterator for ListKeyIterator {
type Item = anyhow::Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(next) = self.buffer.pop() {
Some(Ok(next))
} else if let Some(batch_token) = self.batch_token {
let (send, recv) = crossbeam_channel::bounded(0);
self.command_sender
.send((RequestBody::NextBatch { token: batch_token }, Some(send)))
.expect("Unable to send");
let resp = recv.recv().expect("Unable to recv");
match resp {
ResponseBody::Success => {
self.batch_token = None;
None
}
ResponseBody::Failed(err_msg) => Some(Err(anyhow!("Remote failure: {}", err_msg))),
ResponseBody::BatchData { batch, next_token } => {
self.batch_token = Some(next_token);
self.buffer = batch;
self.buffer.reverse();
if let Some(next) = self.buffer.pop() {
Some(Ok(next))
} else {
None
}
}
other => Some(Err(anyhow!("Received {:?} for NextBatch", other))),
}
} else {
None
}
}
}

View File

@ -1,374 +0,0 @@
use std::collections::HashMap;
use std::io::{Read, Write};
use std::sync::{Arc, Mutex};
use std::thread;
use std::thread::JoinHandle;
use anyhow::anyhow;
use crossbeam_channel::{Receiver, Sender};
use itertools::Itertools;
use log::{error, info, warn};
use crate::definitions::ChunkId;
use crate::pile::{Keyspace, RawPile};
use crate::progress::ProgressTracker;
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
#[derive(Clone)]
pub struct ResponderWritingPipeline {
pub pipeline_submission: Sender<(ChunkId, Vec<u8>)>,
}
#[derive(Clone)]
/// A wrapper for a RawPile which allows a `Requester` to access it over a pipe (e.g. TCP socket or
/// an SSH connection).
pub struct Responder {
continuation_tokens: Arc<Mutex<HashMap<u16, Sender<u16>>>>,
writing_pipeline: Option<ResponderWritingPipeline>,
}
impl Responder {
/// Start a 'responder' (command processor).
pub fn start<
RP: RawPile + 'static,
R: Read + Send + 'static,
W: Write + Send + 'static,
PT: ProgressTracker + Send + 'static,
>(
read: R,
write: W,
num_workers: u16,
pile: Arc<RP>,
writing_pipeline: Option<ResponderWritingPipeline>,
mut progress_bar: PT,
) -> (JoinHandle<R>, JoinHandle<W>, Vec<JoinHandle<()>>) {
let mut handles = Vec::new();
let (work_queue_send, work_queue_recv) = crossbeam_channel::bounded::<Request>(16);
let (resp_send, resp_recv) = crossbeam_channel::bounded::<Response>(4);
let responder = Responder {
continuation_tokens: Arc::new(Mutex::new(Default::default())),
writing_pipeline,
};
let r_handle = {
// spawn the reader
let work_queue_send = work_queue_send.clone();
let responder = responder.clone();
thread::Builder::new()
.name("RespdrReader".to_string())
.spawn(move || {
let mut read = read;
if let Err(e) = responder.reader(&mut read, work_queue_send, &mut progress_bar)
{
error!("reader failed: {:?}", e);
}
read
})
.unwrap()
};
let w_handle = {
// spawn the writer
let resp_recv = resp_recv.clone();
let responder = responder.clone();
thread::Builder::new()
.name("RespdrWriter".to_string())
.spawn(move || {
let mut write = write;
if let Err(e) = responder.writer(&mut write, resp_recv) {
error!("writer failed: {:?}", e);
}
write
})
.unwrap()
};
for worker_num in 0..num_workers {
// spawn a worker
let responder = responder.clone();
let work_queue_recv = work_queue_recv.clone();
let resp_send = resp_send.clone();
let pile = pile.clone();
handles.push(
thread::Builder::new()
.name("RespdrWorker".to_string())
.spawn(move || {
if let Err(e) = responder.worker(pile.as_ref(), work_queue_recv, resp_send)
{
error!("worker {} failed: {:?}", worker_num, e);
}
})
.unwrap(),
);
}
(r_handle, w_handle, handles)
}
/// Thread that reads messages and sends them along.
fn reader<R: Read + Send + 'static, PT: ProgressTracker>(
&self,
read: &mut R,
worker_queue_send: Sender<Request>,
progress_tracker: &mut PT,
) -> anyhow::Result<()> {
loop {
let request: Request = read_message(read)?;
match request.body {
RequestBody::Shutdown => {
worker_queue_send.send(request)?;
info!("Shutting down responder");
return Ok(());
}
RequestBody::NextBatch { token } => {
let tokens = self
.continuation_tokens
.lock()
.or(Err(anyhow!("Mutex poisoned")))?;
tokens
.get(&token)
.ok_or(anyhow!("Could not find that token."))?
.send(request.id)
.or(Err(anyhow!(
"Failed to send continuation token to continuer"
)))?;
}
RequestBody::Progress { current, max } => {
progress_tracker.set_max_size(max);
progress_tracker.set_current(current);
}
_ => {
worker_queue_send.send(request)?;
}
}
}
}
/// Thread that writes messages.
fn writer<W: Write + Send>(
&self,
mut write: W,
responses: Receiver<Response>,
) -> anyhow::Result<()> {
while let Ok(response) = responses.recv() {
write_message(&mut write, &response)?;
write.flush()?;
}
Ok(())
}
/// Thread which performs the actual work using the pile.
fn worker<RP: RawPile>(
&self,
pile: &RP,
requests: Receiver<Request>,
responses: Sender<Response>,
) -> anyhow::Result<()> {
while let Ok(request) = requests.recv() {
let response = match request.body {
RequestBody::Read { kind, key } => match pile.read(kind, &key) {
Ok(Some(data)) => Response {
response_to: request.id,
body: ResponseBody::Data(data),
},
Ok(None) => Response {
response_to: request.id,
body: ResponseBody::NotExists,
},
Err(err) => {
warn!("Error whilst doing a raw read: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::CheckExists { kind, key } => match pile.exists(kind, &key) {
Ok(true) => Response {
response_to: request.id,
body: ResponseBody::Success,
},
Ok(false) => Response {
response_to: request.id,
body: ResponseBody::NotExists,
},
Err(err) => {
warn!("Error whilst doing a raw exists: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::Write { kind, key, value } => {
if let Some(writing_pipeline) = self
.writing_pipeline
.as_ref()
.filter(|_| kind == Keyspace::Chunk)
{
let mut chunk_id = ChunkId::default();
chunk_id.copy_from_slice(&key[..]);
writing_pipeline
.pipeline_submission
.send((chunk_id, value))?;
// We lie and say it was successful once we submit.
// We'll complain on our side if anything goes wrong, anyway.
Response {
response_to: request.id,
body: ResponseBody::Success,
}
} else {
match pile.write(kind, &key, &value) {
Ok(_) => Response {
response_to: request.id,
body: ResponseBody::Success,
},
Err(err) => {
warn!("Error whilst doing a raw write: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
}
}
}
RequestBody::Delete { kind, key } => match pile.delete(kind, &key) {
Ok(_) => Response {
response_to: request.id,
body: ResponseBody::Success,
},
Err(err) => {
warn!("Error whilst doing a raw delete: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::List { kind } => match pile.list_keys(kind) {
Ok(iterator) => {
let mut resp_id = request.id;
let (cont_sender, cont_receiver) = crossbeam_channel::bounded(1);
let batch_token = {
let mut map = self
.continuation_tokens
.lock()
.or(Err(anyhow!("Mutex poisoned")))?;
let batch_token = (0u16..u16::MAX)
.into_iter()
.find(|id| !map.contains_key(&id))
.expect("No ID found");
map.insert(batch_token, cont_sender);
batch_token
};
for chunk in &iterator.chunks(32) {
let mut results = Vec::with_capacity(32);
for result in chunk {
results.push(result?);
}
responses
.send(Response {
response_to: resp_id,
body: ResponseBody::BatchData {
batch: results,
next_token: batch_token,
},
})
.or(Err(anyhow!("Failed to queue response")))?;
resp_id = cont_receiver
.recv()
.or(Err(anyhow!("Failed to receive continuation response ID")))?;
}
let mut map = self
.continuation_tokens
.lock()
.or(Err(anyhow!("Mutex poisoned")))?;
map.remove(&batch_token);
Response {
response_to: resp_id,
body: ResponseBody::Success,
}
}
Err(err) => {
warn!("Error whilst doing a raw list_keys: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::NextBatch { .. } => {
unreachable!("because this is handled by the reader")
}
RequestBody::Flush => match pile.flush() {
Ok(_) => Response {
response_to: request.id,
body: ResponseBody::Success,
},
Err(err) => {
warn!("Error whilst doing a raw flush: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::LowLevelCheck => match pile.check_lowlevel() {
Ok(_) => Response {
response_to: request.id,
body: ResponseBody::Success,
},
Err(err) => {
warn!("Error whilst doing a raw check_lowlevel: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
RequestBody::Shutdown => Response {
response_to: request.id,
body: ResponseBody::Success,
},
RequestBody::Progress { .. } => {
unreachable!("handled by readea")
}
RequestBody::Describe => match pile.describe_pipeline() {
Ok(description) => Response {
response_to: request.id,
body: ResponseBody::Description(description),
},
Err(err) => {
warn!("Error whilst doing a raw describe_pipeline: {:?}", err);
let err = format!("{:?}", err);
Response {
response_to: request.id,
body: ResponseBody::Failed(err),
}
}
},
};
responses
.send(response)
.or(Err(anyhow!("Failed to queue response")))?;
}
Ok(())
}
}

View File

@ -1,718 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::fs::{read_link, symlink_metadata, DirEntry, Metadata};
use std::io::ErrorKind;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use anyhow::anyhow;
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use log::warn;
use crate::definitions::{FilesystemOwnership, FilesystemPermissions, RecursiveChunkRef, TreeNode};
/// Given a file's metadata, returns the mtime in milliseconds.
pub fn mtime_msec(metadata: &Metadata) -> u64 {
(metadata.mtime() * 1000 + metadata.mtime_nsec() / 1_000_000) as u64
}
/// Scan the filesystem to produce a Tree, using a default progress bar.
pub fn scan(path: &Path) -> anyhow::Result<Option<TreeNode>> {
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(2));
pbar.set_style(ProgressStyle::default_spinner().template("{spinner} {pos:7} {msg}"));
pbar.set_message("dir scan");
let result = scan_with_progress_bar(path, &pbar);
pbar.finish_at_current_pos();
result
}
/// Scan the filesystem to produce a Tree, using the specified progress bar.
pub fn scan_with_progress_bar(
path: &Path,
progress_bar: &ProgressBar,
) -> anyhow::Result<Option<TreeNode>> {
let metadata_res = symlink_metadata(path);
progress_bar.inc(1);
if let Err(e) = &metadata_res {
match e.kind() {
ErrorKind::NotFound => {
warn!("vanished: {:?}", path);
return Ok(None);
}
ErrorKind::PermissionDenied => {
warn!("permission denied: {:?}", path);
return Ok(None);
}
_ => { /* nop */ }
}
}
let metadata = metadata_res?;
let filetype = metadata.file_type();
/*let name = path
.file_name()
.ok_or(anyhow!("No filename, wat"))?
.to_str()
.ok_or(anyhow!("Filename can't be to_str()d"))?
.to_owned();*/
let ownership = FilesystemOwnership {
uid: metadata.uid() as u16,
gid: metadata.gid() as u16,
};
let permissions = FilesystemPermissions {
mode: metadata.mode(),
};
if filetype.is_file() {
// Leave an unpopulated file node. It's not my responsibility to chunk it right now.
Ok(Some(TreeNode::NormalFile {
mtime: mtime_msec(&metadata),
ownership,
permissions,
content: RecursiveChunkRef {
chunk_id: [0; 32],
depth: 0,
},
}))
} else if filetype.is_dir() {
let mut children = BTreeMap::new();
progress_bar.set_message(&format!("{:?}", path));
let dir_read = path.read_dir();
if let Err(e) = &dir_read {
match e.kind() {
ErrorKind::NotFound => {
warn!("vanished/: {:?}", path);
return Ok(None);
}
ErrorKind::PermissionDenied => {
warn!("permission denied/: {:?}", path);
return Ok(None);
}
_ => { /* nop */ }
}
}
for entry in dir_read? {
let entry: DirEntry = entry?;
let scanned = scan_with_progress_bar(&entry.path(), progress_bar)?;
if let Some(scanned) = scanned {
children.insert(
entry
.file_name()
.into_string()
.expect("OsString not String"),
scanned,
);
}
}
Ok(Some(TreeNode::Directory {
ownership,
permissions,
children,
}))
} else if filetype.is_symlink() {
let target = read_link(path)?
.to_str()
.ok_or(anyhow!("target path cannot be to_str()d"))?
.to_owned();
Ok(Some(TreeNode::SymbolicLink { ownership, target }))
} else {
Ok(None)
}
}
/// Differentiates a node in place.
/// This makes `old` the parent of `new` (though it is up to the caller to properly update the
/// `PointerData` to reflect this!).
/// Loosely speaking, `new` is modified to contain the differences that, when applied to `old`, will
/// result in the original value of `new`.
/// See `integrate_node_in_place` for the inverse of this operation.
pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
if let TreeNode::Directory { children, .. } = new {
if let TreeNode::Directory {
children: old_children,
..
} = old
{
for (name, old_node) in old_children.iter() {
match children.entry(name.clone()) {
Entry::Vacant(vacant_entry) => {
vacant_entry.insert(TreeNode::Deleted);
}
Entry::Occupied(occupied_entry) => {
if !occupied_entry.get().metadata_invalidates(old_node)? {
// The entry is the same, so we don't need to store it!
occupied_entry.remove_entry();
} else {
differentiate_node_in_place(occupied_entry.into_mut(), old_node)?;
}
}
}
}
}
}
Ok(())
}
/// Integrates a node in place.
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
/// `PointerData` appropriately if needed to reflect this).
///
/// Loosely speaking, `new` is treated as a set of differences that are applied to `old`, though the
/// result is in-place.
///
/// Preconditions:
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
/// - `old` is the parent of `new`
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
if let TreeNode::Directory { children, .. } = new {
if let TreeNode::Directory {
children: old_children,
..
} = old
{
for (name, node) in old_children.iter() {
match children.entry(name.clone()) {
Entry::Vacant(entry) => {
entry.insert(node.clone());
}
Entry::Occupied(entry) => {
if entry.get() == &TreeNode::Deleted {
// We don't insert the old node but we do remove the 'deleted' marker
// node!
entry.remove();
} else {
integrate_node_in_place(entry.into_mut(), node)?;
}
}
}
}
}
} else {
// the node stays the same...
// intentional NOP!
}
Ok(())
}
/// Given a node, recursively constructs a UID and GID lookup table based on THIS system's
/// users and groups.
pub fn create_uidgid_lookup_tables(
node: &TreeNode,
uids: &mut BTreeMap<u16, Option<String>>,
gids: &mut BTreeMap<u16, Option<String>>,
) -> anyhow::Result<()> {
let ownership = match &node {
TreeNode::NormalFile { ownership, .. } => Some(ownership),
TreeNode::Directory { ownership, .. } => Some(ownership),
TreeNode::SymbolicLink { ownership, .. } => Some(ownership),
TreeNode::Deleted => None,
};
if let Some(ownership) = ownership {
if !uids.contains_key(&ownership.uid) {
if let Some(user) = users::get_user_by_uid(ownership.uid.into()) {
uids.insert(
ownership.uid,
Some(
user.name()
.to_str()
.ok_or(anyhow!("uid leads to non-String name"))?
.to_owned(),
),
);
} else {
uids.insert(ownership.uid, None);
}
}
if !gids.contains_key(&ownership.gid) {
if let Some(group) = users::get_group_by_gid(ownership.gid.into()) {
gids.insert(
ownership.gid,
Some(
group
.name()
.to_str()
.ok_or(anyhow!("gid leads to non-String name"))?
.to_owned(),
),
);
} else {
gids.insert(ownership.gid, None);
}
}
}
if let TreeNode::Directory { children, .. } = &node {
for (_name, child) in children {
create_uidgid_lookup_tables(child, uids, gids)?;
}
}
Ok(())
}
#[must_use]
/// Accepts a target UID/GID map and a source UID/GID map.
/// Merges source into target.
/// Returns true if everything is OK, false if there is a warning (overlap!).
/// In the event of an overlap, the pre-existing entry wins.
pub fn merge_uid_or_gid_tables(
target: &mut BTreeMap<u16, Option<String>>,
source: &BTreeMap<u16, Option<String>>,
) -> bool {
let mut warn = false;
for (uid_or_gid, name) in source.iter() {
match target.entry(*uid_or_gid) {
Entry::Vacant(entry) => {
entry.insert(name.clone());
}
Entry::Occupied(entry) => {
if entry.get() != name {
// Conflicting IDs
warn = true;
}
}
}
}
!warn
}
#[cfg(test)]
mod tests {
use crate::definitions::{
FilesystemOwnership, FilesystemPermissions, RecursiveChunkRef, TreeNode,
};
use crate::tree::{differentiate_node_in_place, integrate_node_in_place};
use std::collections::BTreeMap;
#[test]
pub fn test_differentiate_in_place_primitive() {
let mut new = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: BTreeMap::new(),
};
let old = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: BTreeMap::new(),
};
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
assert_eq!(
new,
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: BTreeMap::new()
}
);
}
/// Tests the creation of a file in the tree.
#[test]
pub fn test_differentiate_in_place_create_only() {
let alice = TreeNode::NormalFile {
mtime: 98347523,
ownership: FilesystemOwnership { uid: 43, gid: 48 },
permissions: FilesystemPermissions { mode: 1338 },
content: RecursiveChunkRef {
chunk_id: [36; 32],
depth: 22,
},
};
let bob_new = TreeNode::SymbolicLink {
ownership: FilesystemOwnership { uid: 43, gid: 48 },
target: "alice".to_string(),
};
let mut children_new = BTreeMap::new();
children_new.insert("bob".to_owned(), bob_new.clone());
children_new.insert("alice".to_owned(), alice.clone());
let mut children_old = BTreeMap::new();
children_old.insert("alice".to_owned(), alice.clone());
let mut new = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: children_new.clone(),
};
let old = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 41, gid: 46 },
permissions: FilesystemPermissions { mode: 1336 },
children: children_old.clone(),
};
let mut children_result = BTreeMap::new();
children_result.insert("bob".to_owned(), bob_new);
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
assert_eq!(
new,
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: children_result
}
);
}
/// Tests only a change in metadata in the tree.
#[test]
pub fn test_differentiate_in_place_meta_only() {
let alice = TreeNode::NormalFile {
mtime: 98347523,
ownership: FilesystemOwnership { uid: 43, gid: 48 },
permissions: FilesystemPermissions { mode: 1338 },
content: RecursiveChunkRef {
chunk_id: [36; 32],
depth: 22,
},
};
let mut children = BTreeMap::new();
children.insert("alice".to_owned(), alice);
let mut new = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: children.clone(),
};
let old = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 41, gid: 46 },
permissions: FilesystemPermissions { mode: 1336 },
children: children.clone(),
};
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
assert_eq!(
new,
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: BTreeMap::new()
}
);
}
/// Tests that nodes that vanish lead to creation of a Deleted node.
#[test]
pub fn test_differences_in_place_deleted() {
let alice = TreeNode::NormalFile {
mtime: 98347523,
ownership: FilesystemOwnership { uid: 43, gid: 48 },
permissions: FilesystemPermissions { mode: 1338 },
content: RecursiveChunkRef {
chunk_id: [36; 32],
depth: 22,
},
};
let bob_old = TreeNode::SymbolicLink {
ownership: FilesystemOwnership { uid: 43, gid: 48 },
target: "alice".to_string(),
};
let mut children_old = BTreeMap::new();
children_old.insert("bob".to_owned(), bob_old.clone());
children_old.insert("alice".to_owned(), alice.clone());
let mut children_new = BTreeMap::new();
children_new.insert("alice".to_owned(), alice.clone());
let old = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 42, gid: 47 },
permissions: FilesystemPermissions { mode: 1337 },
children: children_old.clone(),
};
let mut new = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 41, gid: 46 },
permissions: FilesystemPermissions { mode: 1336 },
children: children_new.clone(),
};
let mut children_result = BTreeMap::new();
children_result.insert("bob".to_owned(), TreeNode::Deleted);
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
assert_eq!(
new,
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 41, gid: 46 },
permissions: FilesystemPermissions { mode: 1336 },
children: children_result
}
);
}
#[test]
pub fn test_differentiate_node_in_place_mega_example() {
// TODO extend this example
let parent = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0550 },
children: vec![
(
"file1".to_string(),
TreeNode::NormalFile {
mtime: 1996,
ownership: FilesystemOwnership { uid: 54, gid: 59 },
permissions: FilesystemPermissions { mode: 0311 },
content: RecursiveChunkRef {
chunk_id: [37; 32],
depth: 2,
},
},
),
(
"file2".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
let child_full = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0560 },
children: vec![
(
"file1".to_string(),
TreeNode::NormalFile {
mtime: 1996,
ownership: FilesystemOwnership { uid: 54, gid: 59 },
permissions: FilesystemPermissions { mode: 0311 },
content: RecursiveChunkRef {
chunk_id: [37; 32],
depth: 2,
},
},
),
(
"file42".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
let mut child_diff = child_full.clone();
differentiate_node_in_place(&mut child_diff, &parent).unwrap();
let expected_child_diff = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0560 },
children: vec![
("file2".to_string(), TreeNode::Deleted),
(
"file42".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
assert_eq!(child_diff, expected_child_diff);
}
#[test]
pub fn test_integrate_node_in_place_mega_example() {
// TODO extend this example
let parent = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0550 },
children: vec![
(
"file1".to_string(),
TreeNode::NormalFile {
mtime: 1996,
ownership: FilesystemOwnership { uid: 54, gid: 59 },
permissions: FilesystemPermissions { mode: 0311 },
content: RecursiveChunkRef {
chunk_id: [37; 32],
depth: 2,
},
},
),
(
"file2".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
let child_diff = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0560 },
children: vec![
("file2".to_string(), TreeNode::Deleted),
(
"file42".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
let mut child_full = child_diff.clone();
integrate_node_in_place(&mut child_full, &parent).unwrap();
let expected_child_full = TreeNode::Directory {
ownership: FilesystemOwnership { uid: 47, gid: 49 },
permissions: FilesystemPermissions { mode: 0660 },
children: vec![(
"dir1".to_string(),
TreeNode::Directory {
ownership: FilesystemOwnership { uid: 46, gid: 50 },
permissions: FilesystemPermissions { mode: 0560 },
children: vec![
(
"file1".to_string(),
TreeNode::NormalFile {
mtime: 1996,
ownership: FilesystemOwnership { uid: 54, gid: 59 },
permissions: FilesystemPermissions { mode: 0311 },
content: RecursiveChunkRef {
chunk_id: [37; 32],
depth: 2,
},
},
),
(
"file42".to_string(),
TreeNode::NormalFile {
mtime: 1970,
ownership: FilesystemOwnership { uid: 55, gid: 60 },
permissions: FilesystemPermissions { mode: 0321 },
content: RecursiveChunkRef {
chunk_id: [42; 32],
depth: 29,
},
},
),
]
.into_iter()
.collect(),
},
)]
.into_iter()
.collect(),
};
assert_eq!(child_full, expected_child_full);
}
}

View File

@ -1,140 +0,0 @@
/*
This file is part of Yama.
Yama is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Yama is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Yama. If not, see <https://www.gnu.org/licenses/>.
*/
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
pub fn bytes_to_hexstring(chunkid: &[u8]) -> String {
let mut s = String::with_capacity(chunkid.len() * 2);
for &byte in chunkid.iter() {
write!(&mut s, "{:02x}", byte).expect("Unable to write");
}
s
}
pub fn get_number_of_workers(first_try_env_name: &str) -> u8 {
let from_env_var = std::env::var(first_try_env_name)
.ok()
.or_else(|| std::env::var("YAMA_WORKERS").ok());
if let Some(from_env_var) = from_env_var {
from_env_var
.parse()
.expect("Number of workers should be an integer from 1 to 255.")
} else {
let number = num_cpus::get();
if number > u8::MAX.into() {
u8::MAX
} else {
number as u8
}
}
}
#[derive(Clone, Debug)]
pub struct LruMap<K, V> {
capacity: usize,
last_access: BTreeSet<(u64, K)>,
items: BTreeMap<K, (V, u64)>,
counter: u64,
}
impl<K: Ord + Clone, V> LruMap<K, V> {
pub fn new(capacity: usize) -> LruMap<K, V> {
LruMap {
capacity,
last_access: BTreeSet::new(),
items: BTreeMap::new(),
counter: 0,
}
}
/// Gets an item from the LRU map.
pub fn get(&mut self, key: &K) -> Option<&V> {
match self.items.get_mut(key) {
Some((value, last_used_instant)) => {
assert!(
self.last_access.remove(&(*last_used_instant, key.clone())),
"Corrupt LRU map: freshen not correct."
);
let new_instant = self.counter;
self.counter += 1;
self.last_access.insert((new_instant, key.clone()));
*last_used_instant = new_instant;
Some(value)
}
None => None,
}
}
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
let new_instant = self.counter;
self.counter += 1;
let retval = match self.items.insert(key.clone(), (value, new_instant)) {
Some((old_entry, old_instant)) => {
assert!(
self.last_access.remove(&(old_instant, key.clone())),
"Corrupt LRU map: insert not correct."
);
Some(old_entry)
}
None => None,
};
self.last_access.insert((new_instant, key));
if retval.is_none() {
// We didn't replace any item, so we have grown by 1.
// Check if we need to evict.
if self.items.len() > self.capacity {
self.evict();
}
}
retval
}
pub fn evict(&mut self) -> Option<(K, V)> {
if let Some(first_entry) = self.last_access.iter().next().cloned() {
self.last_access.remove(&first_entry);
let (_, key) = first_entry;
let (value, _) = self
.items
.remove(&key)
.expect("Corrupt LRU map: last access and items out of sync");
Some((key, value))
} else {
None
}
}
}
#[cfg(test)]
mod test {
use crate::utils::LruMap;
#[test]
fn test_lru_map() {
let mut lmap = LruMap::new(3);
lmap.insert(1, 1);
lmap.insert(2, 1);
lmap.insert(3, 1);
assert_eq!(lmap.get(&1), Some(&1));
lmap.insert(4, 1);
assert_eq!(lmap.get(&2), None);
}
}