CHECKPOINT overhaul
This commit is contained in:
parent
8d5c373abc
commit
5cd2700396
6
.gitignore
vendored
6
.gitignore
vendored
@ -17,3 +17,9 @@ __pycache__
|
|||||||
/datman-helper-mysql/datman_helper_mysql.egg-info
|
/datman-helper-mysql/datman_helper_mysql.egg-info
|
||||||
/result
|
/result
|
||||||
|
|
||||||
|
|
||||||
|
.direnv
|
||||||
|
yama7demo
|
||||||
|
yamaSFTPdemo
|
||||||
|
|
||||||
|
yama_localcache/testdb.sqlite
|
2326
Cargo.lock
generated
2326
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
16
Cargo.toml
16
Cargo.toml
@ -6,6 +6,9 @@ members = [
|
|||||||
"yama_wormfile_fs",
|
"yama_wormfile_fs",
|
||||||
"yama_wormfile_sftp",
|
"yama_wormfile_sftp",
|
||||||
"yama_wormfile_s3",
|
"yama_wormfile_s3",
|
||||||
|
"yama_midlevel_crypto",
|
||||||
|
"yama_pile",
|
||||||
|
"yama_localcache",
|
||||||
]
|
]
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
@ -13,3 +16,16 @@ members = [
|
|||||||
debug = 2
|
debug = 2
|
||||||
# When this feature stabilises, it will be possible to split the debug information into a file alongside the binary
|
# When this feature stabilises, it will be possible to split the debug information into a file alongside the binary
|
||||||
#split-debuginfo = "packed"
|
#split-debuginfo = "packed"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# A few packages benefit from optimisations in the dev profile, otherwise Yama operations are needlessly slowed down.
|
||||||
|
[profile.dev.package.fastcdc]
|
||||||
|
opt-level = 2
|
||||||
|
|
||||||
|
[profile.dev.package.blake3]
|
||||||
|
opt-level = 2
|
||||||
|
|
||||||
|
# not so obvious with sqlx:
|
||||||
|
#[profile.dev.package.sqlx]
|
||||||
|
#opt-level = 2
|
5
GLOSSARY.md
Normal file
5
GLOSSARY.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
## Internals
|
||||||
|
|
||||||
|
* **Chunk**: piece of a file that is obtained using a Content-Defined Chunking scheme
|
||||||
|
* **Chunk ID**: Blake3 hash of the contents of a chunk
|
||||||
|
* **Metachunk**: a chunk that itself contains (part of) a list of chunks.
|
38
datman.old/Cargo.toml
Normal file
38
datman.old/Cargo.toml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
[package]
|
||||||
|
name = "datman"
|
||||||
|
version = "0.7.0-alpha.1"
|
||||||
|
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
||||||
|
edition = "2021"
|
||||||
|
repository = "https://bics.ga/reivilibre/yama"
|
||||||
|
license = "GPL-3.0-or-later"
|
||||||
|
|
||||||
|
description = "A chunked and deduplicated backup system using Yama"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = { version = "3.1.18", features = ["derive"] }
|
||||||
|
crossbeam-channel = "0.5.1"
|
||||||
|
anyhow = "1.0"
|
||||||
|
thiserror = "1.0"
|
||||||
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
|
serde_json = "1.0.64"
|
||||||
|
toml = "0.5.5"
|
||||||
|
log = "0.4"
|
||||||
|
env_logger = "0.7.1"
|
||||||
|
indicatif = "0.14.0"
|
||||||
|
arc-interner = "0.5.1"
|
||||||
|
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
||||||
|
byteorder = "1"
|
||||||
|
termion = "1.5.6"
|
||||||
|
glob = "0.3.0"
|
||||||
|
humansize = "1.1.1"
|
||||||
|
chrono = "0.4.19"
|
||||||
|
itertools = "0.10.1"
|
||||||
|
hostname = "0.3.1"
|
||||||
|
yama = { path = "../yama", version = "0.7.0-alpha.1" }
|
||||||
|
metrics = "0.17.1"
|
||||||
|
bare-metrics-recorder = { version = "0.1.0" }
|
||||||
|
comfy-table = "6.0.0-rc.1"
|
||||||
|
libc = "0.2.126"
|
||||||
|
io-streams = "0.11.0"
|
13
datman.old/README.md
Normal file
13
datman.old/README.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# datman: DATa MANager
|
||||||
|
|
||||||
|
Datman is a tool to make it easier to use Yama for backups.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
|
||||||
|
* Chunk-based deduplication
|
||||||
|
* (optional) Compression using Zstd and a specifiable dictionary
|
||||||
|
* (optional) Encryption
|
||||||
|
* Ability to back up to remote machines over SSH
|
||||||
|
* Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels.
|
||||||
|
|
||||||
|
See the documentation for more information.
|
468
datman.old/src/bin/datman.rs
Normal file
468
datman.old/src/bin/datman.rs
Normal file
@ -0,0 +1,468 @@
|
|||||||
|
/*
|
||||||
|
This file is part of Yama.
|
||||||
|
|
||||||
|
Yama is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Yama is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{BufReader, BufWriter, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use env_logger::Env;
|
||||||
|
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use bare_metrics_recorder::recording::BareMetricsRecorderCore;
|
||||||
|
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
|
||||||
|
use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
|
||||||
|
use datman::commands::ilabel::interactive_labelling_session;
|
||||||
|
use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy};
|
||||||
|
use datman::commands::{init_descriptor, pushpull};
|
||||||
|
use datman::descriptor::{load_descriptor, SourceDescriptor};
|
||||||
|
use datman::get_hostname;
|
||||||
|
use datman::remote::backup_source_requester::backup_remote_source_to_destination;
|
||||||
|
use datman::remote::backup_source_responder;
|
||||||
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
|
use itertools::Itertools;
|
||||||
|
use log::info;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use yama::commands::load_pile_descriptor;
|
||||||
|
use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel};
|
||||||
|
|
||||||
|
pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m";
|
||||||
|
pub const BOLD: &str = "\x1b[1m";
|
||||||
|
pub const BOLD_OFF: &str = "\x1b[22m";
|
||||||
|
pub const WHITE: &str = "\x1b[37m";
|
||||||
|
pub const RED: &str = "\x1b[31m";
|
||||||
|
pub const GREEN: &str = "\x1b[32m";
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub enum DatmanCommand {
|
||||||
|
/// Initialise a datman descriptor in this directory.
|
||||||
|
Init {},
|
||||||
|
|
||||||
|
///
|
||||||
|
Status {},
|
||||||
|
|
||||||
|
#[clap(name = "ilabel")]
|
||||||
|
InteractiveLabelling {
|
||||||
|
/// Name of the source to label.
|
||||||
|
source_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[clap(name = "ibrowse")]
|
||||||
|
InteractiveBrowsing {
|
||||||
|
/// Name of the source to browse.
|
||||||
|
source_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Back up a source locally or over the network.
|
||||||
|
BackupOne {
|
||||||
|
/// Name of the source to back up.
|
||||||
|
source_name: String,
|
||||||
|
|
||||||
|
/// Name of the destination to back up to.
|
||||||
|
destination_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
BackupAll {
|
||||||
|
/// Name of the remote to back up.
|
||||||
|
/// Special value 'self' means 'this host only'.
|
||||||
|
/// Special value 'all' means 'all hosts'.
|
||||||
|
remote_name: String,
|
||||||
|
|
||||||
|
/// Name of the destination to back up to.
|
||||||
|
destination_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
Extract {
|
||||||
|
/// Name of the 'source' to extract
|
||||||
|
/// Omit for 'all'.
|
||||||
|
#[clap(short)]
|
||||||
|
source_name: Option<String>,
|
||||||
|
|
||||||
|
/// If specified, will get the first backup after this date.
|
||||||
|
#[clap(long)]
|
||||||
|
after: Option<HumanDateTime>,
|
||||||
|
|
||||||
|
/// If specified, will get the last backup before this date. The default behaviour is to get the latest.
|
||||||
|
#[clap(long)]
|
||||||
|
before: Option<HumanDateTime>,
|
||||||
|
|
||||||
|
/// If not specified, time-restricted extractions that don't have a pointer for every source
|
||||||
|
/// will instead lead to an error.
|
||||||
|
#[clap(long)]
|
||||||
|
accept_partial: bool, // TODO unimplemented.
|
||||||
|
|
||||||
|
/// Name of the pile to extract from
|
||||||
|
pile_name: String,
|
||||||
|
|
||||||
|
/// Place to extract to.
|
||||||
|
destination: PathBuf,
|
||||||
|
|
||||||
|
/// Skip applying metadata. Might be needed to extract without superuser privileges.
|
||||||
|
#[clap(long)]
|
||||||
|
skip_metadata: bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
Report {
|
||||||
|
/// Name of the pile to report on.
|
||||||
|
pile_name: String,
|
||||||
|
|
||||||
|
/// Don't summarise months.
|
||||||
|
#[clap(long)]
|
||||||
|
individual: bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[clap(name = "_backup_source_responder")]
|
||||||
|
InternalBackupSourceResponder,
|
||||||
|
|
||||||
|
/// Pulls all pointers from a remote pile to a local pile.
|
||||||
|
/// Does not yet support label filtering, but will do in the future.
|
||||||
|
Pull {
|
||||||
|
/// e.g. 'myserver:main'
|
||||||
|
remote_and_remote_pile: String,
|
||||||
|
|
||||||
|
pile_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Applies a retention policy by removing unnecessary backups.
|
||||||
|
/// Does not reclaim space by itself: use
|
||||||
|
/// `yama check --apply-gc --shallow`
|
||||||
|
/// & `yama compact`
|
||||||
|
/// to do that.
|
||||||
|
Prune { pile_name: String },
|
||||||
|
|
||||||
|
#[clap(name = "_pull_responder_offerer")]
|
||||||
|
InternalPullResponderOfferer {
|
||||||
|
datman_path: PathBuf,
|
||||||
|
pile_name: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct HumanDateTime(pub DateTime<Local>);
|
||||||
|
|
||||||
|
impl FromStr for HumanDateTime {
|
||||||
|
type Err = anyhow::Error;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
|
||||||
|
let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap();
|
||||||
|
let local_datetime = local_date.and_hms(0, 0, 0);
|
||||||
|
Ok(HumanDateTime(local_datetime))
|
||||||
|
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
|
||||||
|
let local_datetime = chrono::offset::Local
|
||||||
|
.from_local_datetime(&date_and_time)
|
||||||
|
.unwrap();
|
||||||
|
Ok(HumanDateTime(local_datetime))
|
||||||
|
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
|
||||||
|
let local_datetime = chrono::offset::Local
|
||||||
|
.from_local_datetime(&date_and_time)
|
||||||
|
.unwrap();
|
||||||
|
Ok(HumanDateTime(local_datetime))
|
||||||
|
} else {
|
||||||
|
bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_obvious_successfail_message<R>(result: anyhow::Result<R>) -> anyhow::Result<R> {
|
||||||
|
match &result {
|
||||||
|
Ok(_) => {
|
||||||
|
eprintln!("Operation {}successful{}.", GREEN, WHITE);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
eprintln!("{:?}", error);
|
||||||
|
eprintln!(
|
||||||
|
"{}{}Operation {}{}FAILED{}!{}",
|
||||||
|
FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_exitcode<R>(result: anyhow::Result<R>) {
|
||||||
|
match &result {
|
||||||
|
Ok(_) => {
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
std::process::exit(5);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> anyhow::Result<()> {
|
||||||
|
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
let now = Utc::now();
|
||||||
|
|
||||||
|
let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!(
|
||||||
|
"/tmp/datman_{}.baremetrics",
|
||||||
|
now.format("%F_%H%M%S")
|
||||||
|
))?)
|
||||||
|
.start("datman".to_string())?;
|
||||||
|
shard.install_as_metrics_recorder()?;
|
||||||
|
|
||||||
|
let opts: DatmanCommand = DatmanCommand::parse();
|
||||||
|
|
||||||
|
match opts {
|
||||||
|
DatmanCommand::Init {} => {
|
||||||
|
init_descriptor(Path::new(".")).unwrap();
|
||||||
|
}
|
||||||
|
DatmanCommand::Status { .. } => {
|
||||||
|
unimplemented!();
|
||||||
|
}
|
||||||
|
DatmanCommand::InteractiveLabelling { source_name } => {
|
||||||
|
interactive_labelling_session(Path::new("."), source_name)?;
|
||||||
|
}
|
||||||
|
DatmanCommand::InteractiveBrowsing { source_name } => {
|
||||||
|
datman::commands::ibrowse::session(Path::new("."), source_name)?;
|
||||||
|
}
|
||||||
|
DatmanCommand::BackupOne {
|
||||||
|
source_name,
|
||||||
|
destination_name,
|
||||||
|
} => {
|
||||||
|
let my_hostname = get_hostname();
|
||||||
|
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||||
|
let source = &descriptor.sources[&source_name];
|
||||||
|
let destination = &descriptor.piles[&destination_name];
|
||||||
|
|
||||||
|
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
||||||
|
pbar.set_style(
|
||||||
|
ProgressStyle::default_bar().template(
|
||||||
|
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
pbar.set_message("storing");
|
||||||
|
|
||||||
|
let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source {
|
||||||
|
hostname != &my_hostname
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = if is_remote {
|
||||||
|
backup_remote_source_to_destination(
|
||||||
|
source,
|
||||||
|
destination,
|
||||||
|
&descriptor,
|
||||||
|
Path::new("."),
|
||||||
|
&source_name,
|
||||||
|
&destination_name,
|
||||||
|
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
||||||
|
pbar,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
backup_source_to_destination(
|
||||||
|
source,
|
||||||
|
destination,
|
||||||
|
&descriptor,
|
||||||
|
Path::new("."),
|
||||||
|
&source_name,
|
||||||
|
&destination_name,
|
||||||
|
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
||||||
|
&mut pbar,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
with_exitcode(with_obvious_successfail_message(result))
|
||||||
|
}
|
||||||
|
DatmanCommand::BackupAll {
|
||||||
|
remote_name,
|
||||||
|
destination_name,
|
||||||
|
} => {
|
||||||
|
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||||
|
let destination = &descriptor.piles[&destination_name];
|
||||||
|
|
||||||
|
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
||||||
|
pbar.set_style(
|
||||||
|
ProgressStyle::default_bar().template(
|
||||||
|
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
pbar.set_message("storing");
|
||||||
|
|
||||||
|
backup_all_sources_to_destination(
|
||||||
|
destination,
|
||||||
|
&descriptor,
|
||||||
|
Path::new("."),
|
||||||
|
&destination_name,
|
||||||
|
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
||||||
|
&mut pbar,
|
||||||
|
remote_name,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
DatmanCommand::Extract {
|
||||||
|
source_name,
|
||||||
|
after,
|
||||||
|
before,
|
||||||
|
accept_partial,
|
||||||
|
pile_name,
|
||||||
|
destination,
|
||||||
|
skip_metadata,
|
||||||
|
} => {
|
||||||
|
if !accept_partial {
|
||||||
|
bail!("Specify --accept-partial until running without it is supported.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if after.is_some() && before.is_some() {
|
||||||
|
bail!("Can't specify both before and after!");
|
||||||
|
}
|
||||||
|
|
||||||
|
let before = before.map(|dt| dt.0.with_timezone(&Utc));
|
||||||
|
let after = after.map(|dt| dt.0.with_timezone(&Utc));
|
||||||
|
|
||||||
|
datman::commands::extract::extract(
|
||||||
|
&destination,
|
||||||
|
Path::new("."),
|
||||||
|
source_name.as_ref().map(|x| x.as_ref()),
|
||||||
|
&pile_name,
|
||||||
|
before.into(),
|
||||||
|
after.into(),
|
||||||
|
!skip_metadata,
|
||||||
|
!skip_metadata,
|
||||||
|
!skip_metadata,
|
||||||
|
yama::utils::get_number_of_workers("YAMA_EXTRACTORS"),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
DatmanCommand::InternalBackupSourceResponder => {
|
||||||
|
info!("Datman responder at {:?}", std::env::current_exe()?);
|
||||||
|
backup_source_responder::handler_stdio()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
DatmanCommand::Report {
|
||||||
|
pile_name,
|
||||||
|
individual,
|
||||||
|
} => {
|
||||||
|
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||||
|
let destination = &descriptor.piles[&pile_name];
|
||||||
|
let report =
|
||||||
|
datman::commands::report::generate_report(destination, &descriptor, !individual)?;
|
||||||
|
|
||||||
|
datman::commands::report::print_filesystem_space(&destination.path)?;
|
||||||
|
datman::commands::report::print_report(&report)?;
|
||||||
|
}
|
||||||
|
DatmanCommand::Pull {
|
||||||
|
remote_and_remote_pile,
|
||||||
|
pile_name,
|
||||||
|
} => {
|
||||||
|
let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile
|
||||||
|
.split(':')
|
||||||
|
.collect_tuple()
|
||||||
|
.context("You must pull from a remote pile specified as remote:path:pile.")?;
|
||||||
|
|
||||||
|
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||||
|
let source = &descriptor.piles[&pile_name];
|
||||||
|
|
||||||
|
let pile_desc = load_pile_descriptor(&source.path)?;
|
||||||
|
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
||||||
|
&source.path,
|
||||||
|
&pile_desc,
|
||||||
|
BypassLevel::CompressionBypass,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
||||||
|
pbar.set_style(
|
||||||
|
ProgressStyle::default_bar().template(
|
||||||
|
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
pbar.set_message("pulling");
|
||||||
|
|
||||||
|
let remote_host_descriptor = descriptor
|
||||||
|
.remote_hosts
|
||||||
|
.get(hostname)
|
||||||
|
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
|
||||||
|
|
||||||
|
let mut connection = Command::new("ssh")
|
||||||
|
.arg(&remote_host_descriptor.user_at_host)
|
||||||
|
.arg("--")
|
||||||
|
.arg(
|
||||||
|
&remote_host_descriptor
|
||||||
|
.path_to_datman
|
||||||
|
.as_ref()
|
||||||
|
.map(|x| x.as_str())
|
||||||
|
.unwrap_or("datman"),
|
||||||
|
)
|
||||||
|
.arg("_pull_responder_offerer")
|
||||||
|
.arg(remote_datman_path)
|
||||||
|
.arg(remote_pile_name)
|
||||||
|
.stdin(Stdio::piped())
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.spawn()?;
|
||||||
|
|
||||||
|
let mut reader = BufReader::new(connection.stdout.take().unwrap());
|
||||||
|
let mut writer = BufWriter::new(connection.stdin.take().unwrap());
|
||||||
|
|
||||||
|
pushpull::accepting_side(
|
||||||
|
&pile,
|
||||||
|
&bypass_raw_pile,
|
||||||
|
&mut reader,
|
||||||
|
&mut writer,
|
||||||
|
Box::new(pbar),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
DatmanCommand::Prune { pile_name } => {
|
||||||
|
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
||||||
|
let retention_policy = descriptor
|
||||||
|
.retention
|
||||||
|
.context("No retention policy set in descriptor")?;
|
||||||
|
let dest_desc = &descriptor.piles[&pile_name];
|
||||||
|
|
||||||
|
let pile_desc = load_pile_descriptor(&dest_desc.path)?;
|
||||||
|
|
||||||
|
prune_with_retention_policy(
|
||||||
|
&dest_desc.path,
|
||||||
|
&pile_desc,
|
||||||
|
&RetentionPolicy::from_config(retention_policy),
|
||||||
|
true,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
DatmanCommand::InternalPullResponderOfferer {
|
||||||
|
datman_path,
|
||||||
|
pile_name,
|
||||||
|
} => {
|
||||||
|
let descriptor = load_descriptor(&datman_path).unwrap();
|
||||||
|
let source = &descriptor.piles[&pile_name];
|
||||||
|
|
||||||
|
let pile_desc = load_pile_descriptor(&source.path)?;
|
||||||
|
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
||||||
|
&source.path,
|
||||||
|
&pile_desc,
|
||||||
|
BypassLevel::CompressionBypass,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?);
|
||||||
|
let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?);
|
||||||
|
|
||||||
|
pushpull::offering_side(
|
||||||
|
&pile,
|
||||||
|
&bypass_raw_pile,
|
||||||
|
&mut stdin,
|
||||||
|
&mut stdout,
|
||||||
|
Box::new(()),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
stdout.flush()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
12
datman.old/src/lib.rs
Normal file
12
datman.old/src/lib.rs
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
pub mod commands;
|
||||||
|
pub mod descriptor;
|
||||||
|
pub mod labelling;
|
||||||
|
pub mod remote;
|
||||||
|
pub mod tree;
|
||||||
|
|
||||||
|
pub fn get_hostname() -> String {
|
||||||
|
hostname::get()
|
||||||
|
.expect("No hostname")
|
||||||
|
.into_string()
|
||||||
|
.expect("Hostname string must be sensible.")
|
||||||
|
}
|
@ -11,28 +11,4 @@ description = "A chunked and deduplicated backup system using Yama"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "3.1.18", features = ["derive"] }
|
eyre = "0.6.8"
|
||||||
crossbeam-channel = "0.5.1"
|
|
||||||
anyhow = "1.0"
|
|
||||||
thiserror = "1.0"
|
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
|
||||||
serde_json = "1.0.64"
|
|
||||||
toml = "0.5.5"
|
|
||||||
log = "0.4"
|
|
||||||
env_logger = "0.7.1"
|
|
||||||
indicatif = "0.14.0"
|
|
||||||
arc-interner = "0.5.1"
|
|
||||||
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
|
||||||
byteorder = "1"
|
|
||||||
termion = "1.5.6"
|
|
||||||
glob = "0.3.0"
|
|
||||||
humansize = "1.1.1"
|
|
||||||
chrono = "0.4.19"
|
|
||||||
itertools = "0.10.1"
|
|
||||||
hostname = "0.3.1"
|
|
||||||
yama = { path = "../yama", version = "0.7.0-alpha.1" }
|
|
||||||
metrics = "0.17.1"
|
|
||||||
bare-metrics-recorder = { version = "0.1.0" }
|
|
||||||
comfy-table = "6.0.0-rc.1"
|
|
||||||
libc = "0.2.126"
|
|
||||||
io-streams = "0.11.0"
|
|
||||||
|
@ -15,454 +15,6 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::fs::File;
|
pub fn main() -> eyre::Result<()> {
|
||||||
use std::io::{BufReader, BufWriter, Write};
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::process::{Command, Stdio};
|
|
||||||
|
|
||||||
use clap::Parser;
|
|
||||||
use env_logger::Env;
|
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
|
||||||
use bare_metrics_recorder::recording::BareMetricsRecorderCore;
|
|
||||||
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
|
|
||||||
use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
|
|
||||||
use datman::commands::ilabel::interactive_labelling_session;
|
|
||||||
use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy};
|
|
||||||
use datman::commands::{init_descriptor, pushpull};
|
|
||||||
use datman::descriptor::{load_descriptor, SourceDescriptor};
|
|
||||||
use datman::get_hostname;
|
|
||||||
use datman::remote::backup_source_requester::backup_remote_source_to_destination;
|
|
||||||
use datman::remote::backup_source_responder;
|
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
|
||||||
use itertools::Itertools;
|
|
||||||
use log::info;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use yama::commands::load_pile_descriptor;
|
|
||||||
use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel};
|
|
||||||
|
|
||||||
pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m";
|
|
||||||
pub const BOLD: &str = "\x1b[1m";
|
|
||||||
pub const BOLD_OFF: &str = "\x1b[22m";
|
|
||||||
pub const WHITE: &str = "\x1b[37m";
|
|
||||||
pub const RED: &str = "\x1b[31m";
|
|
||||||
pub const GREEN: &str = "\x1b[32m";
|
|
||||||
|
|
||||||
#[derive(Parser)]
|
|
||||||
pub enum DatmanCommand {
|
|
||||||
/// Initialise a datman descriptor in this directory.
|
|
||||||
Init {},
|
|
||||||
|
|
||||||
///
|
|
||||||
Status {},
|
|
||||||
|
|
||||||
#[clap(name = "ilabel")]
|
|
||||||
InteractiveLabelling {
|
|
||||||
/// Name of the source to label.
|
|
||||||
source_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[clap(name = "ibrowse")]
|
|
||||||
InteractiveBrowsing {
|
|
||||||
/// Name of the source to browse.
|
|
||||||
source_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Back up a source locally or over the network.
|
|
||||||
BackupOne {
|
|
||||||
/// Name of the source to back up.
|
|
||||||
source_name: String,
|
|
||||||
|
|
||||||
/// Name of the destination to back up to.
|
|
||||||
destination_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
BackupAll {
|
|
||||||
/// Name of the remote to back up.
|
|
||||||
/// Special value 'self' means 'this host only'.
|
|
||||||
/// Special value 'all' means 'all hosts'.
|
|
||||||
remote_name: String,
|
|
||||||
|
|
||||||
/// Name of the destination to back up to.
|
|
||||||
destination_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
Extract {
|
|
||||||
/// Name of the 'source' to extract
|
|
||||||
/// Omit for 'all'.
|
|
||||||
#[clap(short)]
|
|
||||||
source_name: Option<String>,
|
|
||||||
|
|
||||||
/// If specified, will get the first backup after this date.
|
|
||||||
#[clap(long)]
|
|
||||||
after: Option<HumanDateTime>,
|
|
||||||
|
|
||||||
/// If specified, will get the last backup before this date. The default behaviour is to get the latest.
|
|
||||||
#[clap(long)]
|
|
||||||
before: Option<HumanDateTime>,
|
|
||||||
|
|
||||||
/// If not specified, time-restricted extractions that don't have a pointer for every source
|
|
||||||
/// will instead lead to an error.
|
|
||||||
#[clap(long)]
|
|
||||||
accept_partial: bool, // TODO unimplemented.
|
|
||||||
|
|
||||||
/// Name of the pile to extract from
|
|
||||||
pile_name: String,
|
|
||||||
|
|
||||||
/// Place to extract to.
|
|
||||||
destination: PathBuf,
|
|
||||||
|
|
||||||
/// Skip applying metadata. Might be needed to extract without superuser privileges.
|
|
||||||
#[clap(long)]
|
|
||||||
skip_metadata: bool,
|
|
||||||
},
|
|
||||||
|
|
||||||
Report {
|
|
||||||
/// Name of the pile to report on.
|
|
||||||
pile_name: String,
|
|
||||||
|
|
||||||
/// Don't summarise months.
|
|
||||||
#[clap(long)]
|
|
||||||
individual: bool,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[clap(name = "_backup_source_responder")]
|
|
||||||
InternalBackupSourceResponder,
|
|
||||||
|
|
||||||
/// Pulls all pointers from a remote pile to a local pile.
|
|
||||||
/// Does not yet support label filtering, but will do in the future.
|
|
||||||
Pull {
|
|
||||||
/// e.g. 'myserver:main'
|
|
||||||
remote_and_remote_pile: String,
|
|
||||||
|
|
||||||
pile_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Applies a retention policy by removing unnecessary backups.
|
|
||||||
/// Does not reclaim space by itself: use
|
|
||||||
/// `yama check --apply-gc --shallow`
|
|
||||||
/// & `yama compact`
|
|
||||||
/// to do that.
|
|
||||||
Prune { pile_name: String },
|
|
||||||
|
|
||||||
#[clap(name = "_pull_responder_offerer")]
|
|
||||||
InternalPullResponderOfferer {
|
|
||||||
datman_path: PathBuf,
|
|
||||||
pile_name: String,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct HumanDateTime(pub DateTime<Local>);
|
|
||||||
|
|
||||||
impl FromStr for HumanDateTime {
|
|
||||||
type Err = anyhow::Error;
|
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
||||||
if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
|
|
||||||
let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap();
|
|
||||||
let local_datetime = local_date.and_hms(0, 0, 0);
|
|
||||||
Ok(HumanDateTime(local_datetime))
|
|
||||||
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
|
|
||||||
let local_datetime = chrono::offset::Local
|
|
||||||
.from_local_datetime(&date_and_time)
|
|
||||||
.unwrap();
|
|
||||||
Ok(HumanDateTime(local_datetime))
|
|
||||||
} else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
|
|
||||||
let local_datetime = chrono::offset::Local
|
|
||||||
.from_local_datetime(&date_and_time)
|
|
||||||
.unwrap();
|
|
||||||
Ok(HumanDateTime(local_datetime))
|
|
||||||
} else {
|
|
||||||
bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn with_obvious_successfail_message<R>(result: anyhow::Result<R>) -> anyhow::Result<R> {
|
|
||||||
match &result {
|
|
||||||
Ok(_) => {
|
|
||||||
eprintln!("Operation {}successful{}.", GREEN, WHITE);
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
eprintln!("{:?}", error);
|
|
||||||
eprintln!(
|
|
||||||
"{}{}Operation {}{}FAILED{}!{}",
|
|
||||||
FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
fn with_exitcode<R>(result: anyhow::Result<R>) {
|
|
||||||
match &result {
|
|
||||||
Ok(_) => {
|
|
||||||
std::process::exit(0);
|
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
std::process::exit(5);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
|
||||||
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
|
||||||
|
|
||||||
let now = Utc::now();
|
|
||||||
|
|
||||||
let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!(
|
|
||||||
"/tmp/datman_{}.baremetrics",
|
|
||||||
now.format("%F_%H%M%S")
|
|
||||||
))?)
|
|
||||||
.start("datman".to_string())?;
|
|
||||||
shard.install_as_metrics_recorder()?;
|
|
||||||
|
|
||||||
let opts: DatmanCommand = DatmanCommand::parse();
|
|
||||||
|
|
||||||
match opts {
|
|
||||||
DatmanCommand::Init {} => {
|
|
||||||
init_descriptor(Path::new(".")).unwrap();
|
|
||||||
}
|
|
||||||
DatmanCommand::Status { .. } => {
|
|
||||||
unimplemented!();
|
|
||||||
}
|
|
||||||
DatmanCommand::InteractiveLabelling { source_name } => {
|
|
||||||
interactive_labelling_session(Path::new("."), source_name)?;
|
|
||||||
}
|
|
||||||
DatmanCommand::InteractiveBrowsing { source_name } => {
|
|
||||||
datman::commands::ibrowse::session(Path::new("."), source_name)?;
|
|
||||||
}
|
|
||||||
DatmanCommand::BackupOne {
|
|
||||||
source_name,
|
|
||||||
destination_name,
|
|
||||||
} => {
|
|
||||||
let my_hostname = get_hostname();
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let source = &descriptor.sources[&source_name];
|
|
||||||
let destination = &descriptor.piles[&destination_name];
|
|
||||||
|
|
||||||
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
|
||||||
pbar.set_style(
|
|
||||||
ProgressStyle::default_bar().template(
|
|
||||||
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
|
||||||
),
|
|
||||||
);
|
|
||||||
pbar.set_message("storing");
|
|
||||||
|
|
||||||
let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source {
|
|
||||||
hostname != &my_hostname
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = if is_remote {
|
|
||||||
backup_remote_source_to_destination(
|
|
||||||
source,
|
|
||||||
destination,
|
|
||||||
&descriptor,
|
|
||||||
Path::new("."),
|
|
||||||
&source_name,
|
|
||||||
&destination_name,
|
|
||||||
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
|
||||||
pbar,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
backup_source_to_destination(
|
|
||||||
source,
|
|
||||||
destination,
|
|
||||||
&descriptor,
|
|
||||||
Path::new("."),
|
|
||||||
&source_name,
|
|
||||||
&destination_name,
|
|
||||||
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
|
||||||
&mut pbar,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
with_exitcode(with_obvious_successfail_message(result))
|
|
||||||
}
|
|
||||||
DatmanCommand::BackupAll {
|
|
||||||
remote_name,
|
|
||||||
destination_name,
|
|
||||||
} => {
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let destination = &descriptor.piles[&destination_name];
|
|
||||||
|
|
||||||
let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
|
||||||
pbar.set_style(
|
|
||||||
ProgressStyle::default_bar().template(
|
|
||||||
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
|
||||||
),
|
|
||||||
);
|
|
||||||
pbar.set_message("storing");
|
|
||||||
|
|
||||||
backup_all_sources_to_destination(
|
|
||||||
destination,
|
|
||||||
&descriptor,
|
|
||||||
Path::new("."),
|
|
||||||
&destination_name,
|
|
||||||
yama::utils::get_number_of_workers("YAMA_CHUNKERS"),
|
|
||||||
&mut pbar,
|
|
||||||
remote_name,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
DatmanCommand::Extract {
|
|
||||||
source_name,
|
|
||||||
after,
|
|
||||||
before,
|
|
||||||
accept_partial,
|
|
||||||
pile_name,
|
|
||||||
destination,
|
|
||||||
skip_metadata,
|
|
||||||
} => {
|
|
||||||
if !accept_partial {
|
|
||||||
bail!("Specify --accept-partial until running without it is supported.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if after.is_some() && before.is_some() {
|
|
||||||
bail!("Can't specify both before and after!");
|
|
||||||
}
|
|
||||||
|
|
||||||
let before = before.map(|dt| dt.0.with_timezone(&Utc));
|
|
||||||
let after = after.map(|dt| dt.0.with_timezone(&Utc));
|
|
||||||
|
|
||||||
datman::commands::extract::extract(
|
|
||||||
&destination,
|
|
||||||
Path::new("."),
|
|
||||||
source_name.as_ref().map(|x| x.as_ref()),
|
|
||||||
&pile_name,
|
|
||||||
before.into(),
|
|
||||||
after.into(),
|
|
||||||
!skip_metadata,
|
|
||||||
!skip_metadata,
|
|
||||||
!skip_metadata,
|
|
||||||
yama::utils::get_number_of_workers("YAMA_EXTRACTORS"),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::InternalBackupSourceResponder => {
|
|
||||||
info!("Datman responder at {:?}", std::env::current_exe()?);
|
|
||||||
backup_source_responder::handler_stdio()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::Report {
|
|
||||||
pile_name,
|
|
||||||
individual,
|
|
||||||
} => {
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let destination = &descriptor.piles[&pile_name];
|
|
||||||
let report =
|
|
||||||
datman::commands::report::generate_report(destination, &descriptor, !individual)?;
|
|
||||||
|
|
||||||
datman::commands::report::print_filesystem_space(&destination.path)?;
|
|
||||||
datman::commands::report::print_report(&report)?;
|
|
||||||
}
|
|
||||||
DatmanCommand::Pull {
|
|
||||||
remote_and_remote_pile,
|
|
||||||
pile_name,
|
|
||||||
} => {
|
|
||||||
let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile
|
|
||||||
.split(':')
|
|
||||||
.collect_tuple()
|
|
||||||
.context("You must pull from a remote pile specified as remote:path:pile.")?;
|
|
||||||
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let source = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&source.path)?;
|
|
||||||
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
|
||||||
&source.path,
|
|
||||||
&pile_desc,
|
|
||||||
BypassLevel::CompressionBypass,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
|
||||||
pbar.set_style(
|
|
||||||
ProgressStyle::default_bar().template(
|
|
||||||
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
|
||||||
),
|
|
||||||
);
|
|
||||||
pbar.set_message("pulling");
|
|
||||||
|
|
||||||
let remote_host_descriptor = descriptor
|
|
||||||
.remote_hosts
|
|
||||||
.get(hostname)
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
|
|
||||||
|
|
||||||
let mut connection = Command::new("ssh")
|
|
||||||
.arg(&remote_host_descriptor.user_at_host)
|
|
||||||
.arg("--")
|
|
||||||
.arg(
|
|
||||||
&remote_host_descriptor
|
|
||||||
.path_to_datman
|
|
||||||
.as_ref()
|
|
||||||
.map(|x| x.as_str())
|
|
||||||
.unwrap_or("datman"),
|
|
||||||
)
|
|
||||||
.arg("_pull_responder_offerer")
|
|
||||||
.arg(remote_datman_path)
|
|
||||||
.arg(remote_pile_name)
|
|
||||||
.stdin(Stdio::piped())
|
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.stderr(Stdio::inherit())
|
|
||||||
.spawn()?;
|
|
||||||
|
|
||||||
let mut reader = BufReader::new(connection.stdout.take().unwrap());
|
|
||||||
let mut writer = BufWriter::new(connection.stdin.take().unwrap());
|
|
||||||
|
|
||||||
pushpull::accepting_side(
|
|
||||||
&pile,
|
|
||||||
&bypass_raw_pile,
|
|
||||||
&mut reader,
|
|
||||||
&mut writer,
|
|
||||||
Box::new(pbar),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::Prune { pile_name } => {
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let retention_policy = descriptor
|
|
||||||
.retention
|
|
||||||
.context("No retention policy set in descriptor")?;
|
|
||||||
let dest_desc = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&dest_desc.path)?;
|
|
||||||
|
|
||||||
prune_with_retention_policy(
|
|
||||||
&dest_desc.path,
|
|
||||||
&pile_desc,
|
|
||||||
&RetentionPolicy::from_config(retention_policy),
|
|
||||||
true,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::InternalPullResponderOfferer {
|
|
||||||
datman_path,
|
|
||||||
pile_name,
|
|
||||||
} => {
|
|
||||||
let descriptor = load_descriptor(&datman_path).unwrap();
|
|
||||||
let source = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&source.path)?;
|
|
||||||
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
|
||||||
&source.path,
|
|
||||||
&pile_desc,
|
|
||||||
BypassLevel::CompressionBypass,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?);
|
|
||||||
let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?);
|
|
||||||
|
|
||||||
pushpull::offering_side(
|
|
||||||
&pile,
|
|
||||||
&bypass_raw_pile,
|
|
||||||
&mut stdin,
|
|
||||||
&mut stdout,
|
|
||||||
Box::new(()),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
stdout.flush()?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1 @@
|
|||||||
pub mod commands;
|
|
||||||
pub mod descriptor;
|
|
||||||
pub mod labelling;
|
|
||||||
pub mod remote;
|
|
||||||
pub mod tree;
|
|
||||||
|
|
||||||
pub fn get_hostname() -> String {
|
|
||||||
hostname::get()
|
|
||||||
.expect("No hostname")
|
|
||||||
.into_string()
|
|
||||||
.expect("Hostname string must be sensible.")
|
|
||||||
}
|
|
||||||
|
50
flake.lock
generated
50
flake.lock
generated
@ -1,5 +1,26 @@
|
|||||||
{
|
{
|
||||||
"nodes": {
|
"nodes": {
|
||||||
|
"fenix": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"rust-analyzer-src": "rust-analyzer-src"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1682230876,
|
||||||
|
"narHash": "sha256-vCnd1pZRQKCdNvivQBD7WzaOlU1GcN91OCAz1rnoe5M=",
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "fenix",
|
||||||
|
"rev": "378f052d9f1cd90060ec4329f81782fee80490a4",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "fenix",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
"naersk": {
|
"naersk": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"nixpkgs": "nixpkgs"
|
"nixpkgs": "nixpkgs"
|
||||||
@ -32,23 +53,44 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs_2": {
|
"nixpkgs_2": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 0,
|
"lastModified": 1682173319,
|
||||||
"narHash": "sha256-50235YW76Jnx4okogoJv/sMz+WNnqC+0DqtkV3jm2XM=",
|
"narHash": "sha256-tPhOpJJ+wrWIusvGgIB2+x6ILfDkEgQMX0BTtM5vd/4=",
|
||||||
"path": "/nix/store/aw7bxjysi3wd3xia5qh7qqwsbqmyqbya-source",
|
"owner": "NixOS",
|
||||||
"type": "path"
|
"repo": "nixpkgs",
|
||||||
|
"rev": "ee7ec1c71adc47d2e3c2d5eb0d6b8fbbd42a8d1c",
|
||||||
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"id": "nixpkgs",
|
"id": "nixpkgs",
|
||||||
|
"ref": "nixos-22.11",
|
||||||
"type": "indirect"
|
"type": "indirect"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"root": {
|
"root": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
|
"fenix": "fenix",
|
||||||
"naersk": "naersk",
|
"naersk": "naersk",
|
||||||
"nixpkgs": "nixpkgs_2",
|
"nixpkgs": "nixpkgs_2",
|
||||||
"utils": "utils"
|
"utils": "utils"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"rust-analyzer-src": {
|
||||||
|
"flake": false,
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1682163822,
|
||||||
|
"narHash": "sha256-u7vaRlI6rYiutytoTk8lyOtNKO/rz5Q63Z6S6QzYCtU=",
|
||||||
|
"owner": "rust-lang",
|
||||||
|
"repo": "rust-analyzer",
|
||||||
|
"rev": "2feabc4dc462644287372922928110eea4c60ca7",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "rust-lang",
|
||||||
|
"ref": "nightly",
|
||||||
|
"repo": "rust-analyzer",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
"utils": {
|
"utils": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1659877975,
|
"lastModified": 1659877975,
|
||||||
|
71
flake.nix
71
flake.nix
@ -4,11 +4,40 @@
|
|||||||
inputs = {
|
inputs = {
|
||||||
utils.url = "github:numtide/flake-utils";
|
utils.url = "github:numtide/flake-utils";
|
||||||
naersk.url = "github:nix-community/naersk";
|
naersk.url = "github:nix-community/naersk";
|
||||||
|
# Current Rust in nixpkgs is too old unfortunately — let's use the Fenix overlay's packages...
|
||||||
|
fenix = {
|
||||||
|
url = "github:nix-community/fenix";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
nixpkgs.url = "nixpkgs/nixos-22.11";
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = { self, nixpkgs, utils, naersk }:
|
outputs = { self, nixpkgs, utils, naersk, fenix }:
|
||||||
utils.lib.eachDefaultSystem (system: let
|
utils.lib.eachDefaultSystem (system: let
|
||||||
pkgs = nixpkgs.legacyPackages."${system}";
|
pkgs = nixpkgs.legacyPackages."${system}";
|
||||||
|
#fenixRustToolchain = fenix.packages."${system}".minimal.toolchain
|
||||||
|
# fenixRustToolchain =
|
||||||
|
# fenix."${system}".complete.withComponents [
|
||||||
|
# "cargo"
|
||||||
|
# "clippy"
|
||||||
|
# "rust-src"
|
||||||
|
# "rustc"
|
||||||
|
# "rustfmt"
|
||||||
|
# ];
|
||||||
|
# fenixRustToolchain = fenix.packages."${system}".stable.toolchain;
|
||||||
|
fenixRustToolchain =
|
||||||
|
fenix.packages."${system}".stable.withComponents [
|
||||||
|
"cargo"
|
||||||
|
"clippy"
|
||||||
|
"rust-src"
|
||||||
|
"rustc"
|
||||||
|
"rustfmt"
|
||||||
|
];
|
||||||
|
# rust-toolchain = pkgs.symlinkJoin {
|
||||||
|
# name = "rust-toolchain";
|
||||||
|
# paths = [fenixRustToolchain.rustc fenixRustToolchain.cargo fenixRustToolchain.clippy fenixRustToolchain.rustfmt fenixRustToolchain.rustPlatform.rustcSrc];
|
||||||
|
# };
|
||||||
|
|
||||||
naersk-lib = naersk.lib."${system}";
|
naersk-lib = naersk.lib."${system}";
|
||||||
|
|
||||||
rustComponents = naersk-lib.buildPackage {
|
rustComponents = naersk-lib.buildPackage {
|
||||||
@ -77,7 +106,45 @@
|
|||||||
|
|
||||||
# `nix develop`
|
# `nix develop`
|
||||||
devShell = pkgs.mkShell {
|
devShell = pkgs.mkShell {
|
||||||
nativeBuildInputs = with pkgs; [ rustc cargo ];
|
buildInputs = [
|
||||||
|
fenixRustToolchain
|
||||||
|
#rust-toolchain
|
||||||
|
|
||||||
|
pkgs.pkg-config
|
||||||
|
|
||||||
|
pkgs.alsa-lib
|
||||||
|
pkgs.sqlite
|
||||||
|
pkgs.sqlx-cli
|
||||||
|
#pkgs.libclang # ??
|
||||||
|
];
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
pkgs.openssl
|
||||||
|
pkgs.python3
|
||||||
|
];
|
||||||
|
|
||||||
|
# Needed for bindgen when binding to avahi
|
||||||
|
LIBCLANG_PATH="${pkgs.llvmPackages_latest.libclang.lib}/lib";
|
||||||
|
|
||||||
|
# Don't know if this var does anything by itself, but you need to feed this value in to IntelliJ IDEA and it's probably easier to pull out of an env var than look it up each time.
|
||||||
|
RUST_SRC_PATH = "${fenixRustToolchain}/lib/rustlib/src/rust/library";
|
||||||
|
|
||||||
|
# Cargo culted:
|
||||||
|
# Add to rustc search path
|
||||||
|
RUSTFLAGS = (builtins.map (a: ''-L ${a}/lib'') [
|
||||||
|
]);
|
||||||
|
# Add to bindgen search path
|
||||||
|
BINDGEN_EXTRA_CLANG_ARGS =
|
||||||
|
# Includes with normal include path
|
||||||
|
(builtins.map (a: ''-I"${a}/include"'') [
|
||||||
|
])
|
||||||
|
# Includes with special directory paths
|
||||||
|
++ [
|
||||||
|
''-I"${pkgs.llvmPackages_latest.libclang.lib}/lib/clang/${pkgs.llvmPackages_latest.libclang.version}/include"''
|
||||||
|
#''-I"${pkgs.glib.dev}/include/glib-2.0"''
|
||||||
|
#''-I${pkgs.glib.out}/lib/glib-2.0/include/''
|
||||||
|
];
|
||||||
|
#nativeBuildInputs = with pkgs; [ rustc cargo ];
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@ pkgs.mkShell {
|
|||||||
|
|
||||||
pkgs.alsa-lib
|
pkgs.alsa-lib
|
||||||
pkgs.sqlite
|
pkgs.sqlite
|
||||||
|
pkgs.sqlx-cli
|
||||||
#pkgs.libclang # ??
|
#pkgs.libclang # ??
|
||||||
];
|
];
|
||||||
|
|
||||||
|
44
yama.old/Cargo.toml
Normal file
44
yama.old/Cargo.toml
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
[package]
|
||||||
|
name = "yama"
|
||||||
|
version = "0.7.0-alpha.1"
|
||||||
|
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
||||||
|
edition = "2018"
|
||||||
|
description = "Deduplicated, compressed and encrypted content pile manager"
|
||||||
|
|
||||||
|
repository = "https://bics.ga/reivilibre/yama"
|
||||||
|
license = "GPL-3.0-or-later"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
fastcdc = "1.0.6"
|
||||||
|
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
||||||
|
clap = { version = "3.1.18", features = ["derive"] }
|
||||||
|
blake = "2.0.2"
|
||||||
|
twox-hash = "1.5.0"
|
||||||
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
|
serde_bare = "0.3.0"
|
||||||
|
users = "0.9.1"
|
||||||
|
crossbeam-channel = "0.5.1"
|
||||||
|
crossbeam-utils = "0.8.5"
|
||||||
|
toml = "0.5.5"
|
||||||
|
glob = "0.3.0"
|
||||||
|
nix = "0.17.0"
|
||||||
|
log = "0.4"
|
||||||
|
env_logger = "0.7.1"
|
||||||
|
indicatif = "0.14.0"
|
||||||
|
num_cpus = "1"
|
||||||
|
anyhow = "1.0"
|
||||||
|
thiserror = "1.0"
|
||||||
|
|
||||||
|
byteorder = "1"
|
||||||
|
itertools = "0.9.0"
|
||||||
|
rayon = "1.5.0"
|
||||||
|
chrono = "0.4.19"
|
||||||
|
rustyline = "7.1.0"
|
||||||
|
derivative = "2.2.0"
|
||||||
|
metrics = "0.17.1"
|
||||||
|
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
temp-dir = "0.1.11"
|
25
yama.old/README.md
Normal file
25
yama.old/README.md
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# 山 (yama): deduplicated heap repository
|
||||||
|
|
||||||
|
Yama is a system for storing files and directory trees in 'piles'. The data stored is deduplicated (by using content-defined chunking) and can be compressed and encrypted, too.
|
||||||
|
|
||||||
|
NOT YET ~~Yama also permits storing to piles on remote computers, using SSH.~~
|
||||||
|
|
||||||
|
Yama is intended for use as a storage mechanism for backups. Datman is a tool to make it easier to use Yama for backups.
|
||||||
|
|
||||||
|
The documentation is currently the best source of information about Yama, see the `docs` directory.
|
||||||
|
|
||||||
|
Yama can be used as a library for your own programs; further information about this is yet to be provided but the API documentation (Rustdocs) may be useful.
|
||||||
|
|
||||||
|
## Other, unpolished, notes
|
||||||
|
|
||||||
|
### Training a Zstd Dictionary
|
||||||
|
|
||||||
|
`zstd --train FILEs -o zstd.dict`
|
||||||
|
|
||||||
|
* Candidate size: `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -printf "%s\n" | jq -s 'add'`
|
||||||
|
* Want to sample:
|
||||||
|
* `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -exec cp {} -t /tmp/d/ \;`
|
||||||
|
* `du -sh`
|
||||||
|
* `find > file.list`
|
||||||
|
* `wc -l < file.list` → gives a № lines
|
||||||
|
* `shuf -n 4242 file.list | xargs -x zstd --train -o zstd.dict` for 4242 files. Chokes if it receives a filename with a space, just re-run until you get a working set.
|
311
yama.old/src/bin/yama.rs
Normal file
311
yama.old/src/bin/yama.rs
Normal file
@ -0,0 +1,311 @@
|
|||||||
|
/*
|
||||||
|
This file is part of Yama.
|
||||||
|
|
||||||
|
Yama is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Yama is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use log::info;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use env_logger::Env;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
|
||||||
|
use yama::debug::{debug_command, DebugCommand};
|
||||||
|
use yama::operations::checking::VacuumMode;
|
||||||
|
use yama::operations::legacy_pushpull::{
|
||||||
|
determine_bypass_level, open_pile_with_work_bypass, push_to,
|
||||||
|
};
|
||||||
|
use yama::operations::{checking, cleanup, extracting};
|
||||||
|
use yama::pile::local_sqlitebloblogs::CompactionThresholds;
|
||||||
|
use yama::pile::{Pile, PileDescriptor, RawPile};
|
||||||
|
use yama::{commands, debug};
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))]
|
||||||
|
struct Opts {
|
||||||
|
/// Chooses a different pile to be the working pile.
|
||||||
|
/// If specified, must be the name of a remote in yama.toml.
|
||||||
|
// TODO OBS? #[clap(short, long)]
|
||||||
|
// with: Option<String>,
|
||||||
|
|
||||||
|
#[clap(subcommand)]
|
||||||
|
command: PileCommand,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
enum PileCommand {
|
||||||
|
/// Initialise a yama pile in this directory.
|
||||||
|
Init {},
|
||||||
|
|
||||||
|
/// Retrieve a pointer from the yama pile, using a named pointer name.
|
||||||
|
Retrieve {
|
||||||
|
/// Name of the pointer to retrieve.
|
||||||
|
pointer_name: String,
|
||||||
|
|
||||||
|
/// Limited expression(s) of files to retrieve.
|
||||||
|
/// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE.
|
||||||
|
#[clap(short, long)]
|
||||||
|
subset: Option<String>,
|
||||||
|
|
||||||
|
destination: PathBuf,
|
||||||
|
|
||||||
|
/// Number of extraction workers to use. Ideal value varies, but probably not much more than
|
||||||
|
/// the number of CPU threads.
|
||||||
|
#[clap(long)]
|
||||||
|
num_workers: Option<u8>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Check this yama pile for corruption.
|
||||||
|
Check {
|
||||||
|
#[clap(long)]
|
||||||
|
apply_gc: bool,
|
||||||
|
|
||||||
|
#[clap(long)]
|
||||||
|
dry_run_gc: bool,
|
||||||
|
|
||||||
|
#[clap(long)]
|
||||||
|
deep: bool,
|
||||||
|
|
||||||
|
#[clap(long)]
|
||||||
|
shallow: bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
Compact {
|
||||||
|
/// Don't actually perform any compaction; just plan it out.
|
||||||
|
#[clap(long)]
|
||||||
|
dry_run: bool,
|
||||||
|
|
||||||
|
/// Allocated size under which a bloblog is considered small.
|
||||||
|
#[clap(long = "small")]
|
||||||
|
small_thresh: Option<u64>,
|
||||||
|
|
||||||
|
/// Minimum amount of space to reclaim in order to run compaction for reclaim.
|
||||||
|
#[clap(long = "reclaim")]
|
||||||
|
min_reclaim: Option<u64>,
|
||||||
|
|
||||||
|
/// Maximum amount of space that can be deallocated in a bloblog before we consider it
|
||||||
|
/// worthwhile to replace.
|
||||||
|
#[clap(long = "max-dealloc")]
|
||||||
|
max_deallocated: Option<u64>,
|
||||||
|
|
||||||
|
/// Minimum number of mergeable small bloblogs in order to run compaction for merge.
|
||||||
|
#[clap(long)]
|
||||||
|
mergeable: Option<u32>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Enter a debug prompt for manually operating on the yama pile.
|
||||||
|
Debug { supplied_command: Vec<String> },
|
||||||
|
|
||||||
|
/// Pushes a pointer from this pile to another pile.
|
||||||
|
Push {
|
||||||
|
/// The name of the pointer to push.
|
||||||
|
pointer_name: String,
|
||||||
|
|
||||||
|
/// The path to the other pile to push the pointer to.
|
||||||
|
other_pile_path: PathBuf,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> anyhow::Result<()> {
|
||||||
|
std::process::exit(wrapped_main()?);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrapped main, returning the exit code.
|
||||||
|
/// DO NOT USE exit() in this function, because it can interfere with destructors.
|
||||||
|
/// (Destructors are needed to ensure some piles are flushed, for example.)
|
||||||
|
fn wrapped_main() -> anyhow::Result<i32> {
|
||||||
|
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
let opts: Opts = Opts::parse();
|
||||||
|
|
||||||
|
let open_pile = || -> anyhow::Result<(PileDescriptor, Pile<Box<dyn RawPile>>)> {
|
||||||
|
let this_dir = Path::new(".");
|
||||||
|
let descriptor =
|
||||||
|
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
|
||||||
|
let pile = open_pile(this_dir, &descriptor).context("Failed to open pile")?;
|
||||||
|
Ok((descriptor, pile))
|
||||||
|
};
|
||||||
|
|
||||||
|
match &opts.command {
|
||||||
|
PileCommand::Retrieve {
|
||||||
|
pointer_name,
|
||||||
|
subset,
|
||||||
|
destination,
|
||||||
|
num_workers: workers,
|
||||||
|
} => {
|
||||||
|
let (_pdesc, pile) = open_pile()?;
|
||||||
|
let mut pointer = pile
|
||||||
|
.read_pointer(pointer_name)?
|
||||||
|
.expect("No pointer by that name!"); // todo be nice
|
||||||
|
|
||||||
|
if destination.exists() {
|
||||||
|
bail!("The destination already exists. Overwriting not allowed (yet).");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut root_tree_node =
|
||||||
|
commands::retrieve_tree_node(&pile, pointer.chunk_ref.clone())?;
|
||||||
|
|
||||||
|
fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
|
||||||
|
|
||||||
|
let mut node_to_extract = &mut root_tree_node.node;
|
||||||
|
|
||||||
|
if let Some(subset) = subset {
|
||||||
|
for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) {
|
||||||
|
match node_to_extract.child(path_to_descend) {
|
||||||
|
Ok(new_node) => {
|
||||||
|
node_to_extract = new_node;
|
||||||
|
}
|
||||||
|
Err(msg) => {
|
||||||
|
bail!("Can't descend into {path_to_descend:?}: {msg}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo allow disabling apply metadata
|
||||||
|
extracting::extract(
|
||||||
|
destination,
|
||||||
|
node_to_extract,
|
||||||
|
&pile,
|
||||||
|
true,
|
||||||
|
workers.unwrap_or(2),
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
PileCommand::Check {
|
||||||
|
apply_gc,
|
||||||
|
dry_run_gc,
|
||||||
|
deep,
|
||||||
|
shallow,
|
||||||
|
} => {
|
||||||
|
let vacuum_mode = if *dry_run_gc {
|
||||||
|
VacuumMode::DryRunVacuum
|
||||||
|
} else if *apply_gc {
|
||||||
|
VacuumMode::Vacuum
|
||||||
|
} else {
|
||||||
|
VacuumMode::NoVacuum
|
||||||
|
};
|
||||||
|
let (_pdesc, pile) = open_pile()?;
|
||||||
|
let error_count = if *deep {
|
||||||
|
checking::check_deep(pile, vacuum_mode, true)?
|
||||||
|
} else if *shallow {
|
||||||
|
checking::check_shallow(pile, vacuum_mode, true, true)?
|
||||||
|
} else {
|
||||||
|
bail!("You need to specify either --shallow or --deep.");
|
||||||
|
};
|
||||||
|
|
||||||
|
if error_count > 0 {
|
||||||
|
eprintln!("THERE ARE {} ERRORS.", error_count);
|
||||||
|
return Ok(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PileCommand::Compact {
|
||||||
|
dry_run,
|
||||||
|
small_thresh,
|
||||||
|
min_reclaim,
|
||||||
|
max_deallocated,
|
||||||
|
mergeable,
|
||||||
|
} => {
|
||||||
|
let this_dir = Path::new(".");
|
||||||
|
let descriptor =
|
||||||
|
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
|
||||||
|
cleanup::compact(
|
||||||
|
this_dir,
|
||||||
|
&descriptor,
|
||||||
|
!*dry_run,
|
||||||
|
true,
|
||||||
|
CompactionThresholds {
|
||||||
|
minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024),
|
||||||
|
minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64),
|
||||||
|
cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024),
|
||||||
|
cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024),
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
PileCommand::Init {} => {
|
||||||
|
commands::init(".".as_ref())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
PileCommand::Debug { supplied_command } => {
|
||||||
|
let (pdesc, pile) = open_pile()?;
|
||||||
|
if supplied_command.is_empty() {
|
||||||
|
debug::debug_prompt(pdesc, pile)?;
|
||||||
|
} else {
|
||||||
|
let mut prefixed_command = vec![String::from("yama-debug")];
|
||||||
|
prefixed_command.extend(supplied_command.iter().cloned());
|
||||||
|
match DebugCommand::try_parse_from(prefixed_command) {
|
||||||
|
Ok(command) => {
|
||||||
|
if let Err(e) = debug_command(&pdesc, &pile, command) {
|
||||||
|
eprintln!("Failed {:?}", e);
|
||||||
|
pile.flush()?;
|
||||||
|
return Ok(2);
|
||||||
|
} else {
|
||||||
|
pile.flush()?;
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("Invalid {:?}", err);
|
||||||
|
return Ok(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PileCommand::Push {
|
||||||
|
pointer_name,
|
||||||
|
other_pile_path,
|
||||||
|
} => {
|
||||||
|
let this_pile_path = PathBuf::from(".");
|
||||||
|
|
||||||
|
let descriptor_this = load_pile_descriptor(".".as_ref())
|
||||||
|
.context("Failed to load this pile descriptor")?;
|
||||||
|
|
||||||
|
let descriptor_other = load_pile_descriptor(other_pile_path)
|
||||||
|
.context("Failed to load foreign pile descriptor.")?;
|
||||||
|
|
||||||
|
let bypass_level = determine_bypass_level(
|
||||||
|
&descriptor_this,
|
||||||
|
&this_pile_path,
|
||||||
|
&descriptor_other,
|
||||||
|
&other_pile_path,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
info!("Using bypass level: {:?}", bypass_level);
|
||||||
|
|
||||||
|
let (this_pile, this_rp_bypass) =
|
||||||
|
open_pile_with_work_bypass(&this_pile_path, &descriptor_this, bypass_level)?;
|
||||||
|
let (other_pile, other_rp_bypass) =
|
||||||
|
open_pile_with_work_bypass(&other_pile_path, &descriptor_other, bypass_level)?;
|
||||||
|
|
||||||
|
// TODO flush the pile after here yet
|
||||||
|
push_to(
|
||||||
|
Arc::new(this_pile),
|
||||||
|
this_rp_bypass,
|
||||||
|
Arc::new(other_pile),
|
||||||
|
other_rp_bypass,
|
||||||
|
vec![pointer_name.clone()],
|
||||||
|
true,
|
||||||
|
32,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(0)
|
||||||
|
}
|
10
yama.old/src/lib.rs
Normal file
10
yama.old/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
pub mod chunking;
|
||||||
|
pub mod commands;
|
||||||
|
pub mod debug;
|
||||||
|
pub mod definitions;
|
||||||
|
pub mod operations;
|
||||||
|
pub mod pile;
|
||||||
|
pub mod progress;
|
||||||
|
pub mod remote;
|
||||||
|
pub mod tree;
|
||||||
|
pub mod utils;
|
@ -28,7 +28,6 @@ use std::sync::{Arc, Condvar, Mutex};
|
|||||||
|
|
||||||
pub mod access_guard;
|
pub mod access_guard;
|
||||||
pub mod compression;
|
pub mod compression;
|
||||||
pub mod encryption;
|
|
||||||
pub mod integrity;
|
pub mod integrity;
|
||||||
pub mod local_sqlitebloblogs;
|
pub mod local_sqlitebloblogs;
|
||||||
|
|
@ -11,35 +11,42 @@ license = "GPL-3.0-or-later"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
fastcdc = "1.0.6"
|
eyre = "0.6.8"
|
||||||
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
tracing = "0.1.37"
|
||||||
clap = { version = "3.1.18", features = ["derive"] }
|
ignore = "0.4.20"
|
||||||
blake = "2.0.2"
|
patricia_tree = "0.5.7"
|
||||||
twox-hash = "1.5.0"
|
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
users = "0.11.0"
|
||||||
serde_bare = "0.3.0"
|
|
||||||
users = "0.9.1"
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
crossbeam-channel = "0.5.1"
|
|
||||||
crossbeam-utils = "0.8.5"
|
yama_pile = { path = "../yama_pile" }
|
||||||
toml = "0.5.5"
|
yama_localcache = { path = "../yama_localcache" }
|
||||||
glob = "0.3.0"
|
yama_wormfile = { path = "../yama_wormfile" }
|
||||||
nix = "0.17.0"
|
yama_wormfile_fs = { path = "../yama_wormfile_fs" }
|
||||||
log = "0.4"
|
yama_wormfile_s3 = { path = "../yama_wormfile_s3" }
|
||||||
env_logger = "0.7.1"
|
yama_wormfile_sftp = { path = "../yama_wormfile_sftp" }
|
||||||
indicatif = "0.14.0"
|
yama_midlevel_crypto = { path = "../yama_midlevel_crypto" }
|
||||||
num_cpus = "1"
|
|
||||||
anyhow = "1.0"
|
clap = { version = "4.2.2", features = ["derive"] }
|
||||||
thiserror = "1.0"
|
|
||||||
sodiumoxide = "0.2.6"
|
tokio = { version = "1.27.0", features = ["io-std"] }
|
||||||
byteorder = "1"
|
appdirs = "0.2.0"
|
||||||
itertools = "0.9.0"
|
twox-hash = "1.6.3"
|
||||||
rayon = "1.5.0"
|
hostname = "0.3.1"
|
||||||
rusqlite = "0.24.2"
|
|
||||||
chrono = "0.4.19"
|
tracing-subscriber = { version = "0.3.16", features = ["tracing-log", "env-filter"] }
|
||||||
rustyline = "7.1.0"
|
tracing-indicatif = "0.3.0"
|
||||||
derivative = "2.2.0"
|
indicatif = "0.17.3"
|
||||||
metrics = "0.17.1"
|
|
||||||
|
dashmap = "5.4.0"
|
||||||
|
fastcdc = "3.0.3"
|
||||||
|
zstd = "0.12.3"
|
||||||
|
memmap2 = "0.5.10"
|
||||||
|
flume = "0.10.14"
|
||||||
|
|
||||||
|
async-recursion = "1.0.4"
|
||||||
|
toml = "0.7.3"
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
dust_style_filetree_display = "0.8.5"
|
||||||
temp-dir = "0.1.11"
|
|
||||||
|
@ -15,297 +15,599 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use clap::{Parser, Subcommand};
|
||||||
|
use eyre::{bail, ensure, eyre, Context, ContextCompat};
|
||||||
|
use patricia_tree::PatriciaMap;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::iter::Iterator;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::str::FromStr;
|
||||||
use anyhow::{bail, Context};
|
|
||||||
use log::info;
|
|
||||||
|
|
||||||
use clap::Parser;
|
|
||||||
use env_logger::Env;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
|
use indicatif::ProgressStyle;
|
||||||
use yama::debug::{debug_command, DebugCommand};
|
use tokio::io::{stdin, AsyncBufReadExt, BufReader};
|
||||||
use yama::operations::checking::VacuumMode;
|
use tracing::{info, info_span, warn, Span, Instrument};
|
||||||
use yama::operations::legacy_pushpull::{
|
use tracing_indicatif::IndicatifLayer;
|
||||||
determine_bypass_level, open_pile_with_work_bypass, push_to,
|
use tracing_indicatif::span_ext::IndicatifSpanExt;
|
||||||
|
use tracing_subscriber::filter::filter_fn;
|
||||||
|
use tracing_subscriber::Layer;
|
||||||
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
|
use tracing_subscriber::util::SubscriberInitExt;
|
||||||
|
use yama::extract::flatten_treenode;
|
||||||
|
use yama::init::{generate_master_keyring, pack_keyring};
|
||||||
|
use yama::open::{open_keyring_interactive, open_pile, pre_open_keyring, update_cache};
|
||||||
|
use yama::pile_connector::PileConnectionScheme;
|
||||||
|
use yama::scan::create_uidgid_lookup_tables;
|
||||||
|
use yama::storing::{assemble_and_write_indices, StoragePipeline};
|
||||||
|
use yama::{extract, get_hostname, init, PROGRESS_BAR_STYLE, scan};
|
||||||
|
use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde};
|
||||||
|
use yama_midlevel_crypto::chunk_id::ChunkIdKey;
|
||||||
|
use yama_pile::definitions::{
|
||||||
|
PackedPileConfig, PileConfig, RecursiveChunkRef, SUPPORTED_YAMA_PILE_VERSION,
|
||||||
};
|
};
|
||||||
use yama::operations::{checking, cleanup, extracting};
|
use yama_pile::locks::LockKind;
|
||||||
use yama::pile::local_sqlitebloblogs::CompactionThresholds;
|
use yama_pile::pointers::Pointer;
|
||||||
use yama::pile::{Pile, PileDescriptor, RawPile};
|
use yama_pile::tree::unpopulated::ScanEntry;
|
||||||
use yama::{commands, debug};
|
use yama_pile::tree::{
|
||||||
|
assemble_tree_from_scan_entries, differentiate_node_in_place, RootTreeNode, TreeNode,
|
||||||
|
};
|
||||||
|
use yama_pile::FILE_YAMA_CONNECTOR;
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Clone, Debug)]
|
||||||
#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))]
|
pub struct PileAndPointer {
|
||||||
struct Opts {
|
pub pile_path: Option<PathBuf>,
|
||||||
/// Chooses a different pile to be the working pile.
|
pub pointer: PointerName,
|
||||||
/// If specified, must be the name of a remote in yama.toml.
|
|
||||||
// TODO OBS? #[clap(short, long)]
|
|
||||||
// with: Option<String>,
|
|
||||||
|
|
||||||
#[clap(subcommand)]
|
|
||||||
command: PileCommand,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Clone, Debug)]
|
||||||
enum PileCommand {
|
#[repr(transparent)]
|
||||||
/// Initialise a yama pile in this directory.
|
pub struct PointerName(String);
|
||||||
Init {},
|
|
||||||
|
|
||||||
/// Retrieve a pointer from the yama pile, using a named pointer name.
|
impl FromStr for PointerName {
|
||||||
Retrieve {
|
type Err = eyre::Error;
|
||||||
/// Name of the pointer to retrieve.
|
|
||||||
pointer_name: String,
|
|
||||||
|
|
||||||
/// Limited expression(s) of files to retrieve.
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
/// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE.
|
if !s
|
||||||
#[clap(short, long)]
|
.chars()
|
||||||
subset: Option<String>,
|
.all(|c| c.is_alphanumeric() || ['_', '+', '-'].contains(&c))
|
||||||
|
{
|
||||||
destination: PathBuf,
|
bail!("Bad pointer name: {s:?}");
|
||||||
|
}
|
||||||
/// Number of extraction workers to use. Ideal value varies, but probably not much more than
|
Ok(PointerName(s.to_owned()))
|
||||||
/// the number of CPU threads.
|
}
|
||||||
#[clap(long)]
|
|
||||||
num_workers: Option<u8>,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Check this yama pile for corruption.
|
|
||||||
Check {
|
|
||||||
#[clap(long)]
|
|
||||||
apply_gc: bool,
|
|
||||||
|
|
||||||
#[clap(long)]
|
|
||||||
dry_run_gc: bool,
|
|
||||||
|
|
||||||
#[clap(long)]
|
|
||||||
deep: bool,
|
|
||||||
|
|
||||||
#[clap(long)]
|
|
||||||
shallow: bool,
|
|
||||||
},
|
|
||||||
|
|
||||||
Compact {
|
|
||||||
/// Don't actually perform any compaction; just plan it out.
|
|
||||||
#[clap(long)]
|
|
||||||
dry_run: bool,
|
|
||||||
|
|
||||||
/// Allocated size under which a bloblog is considered small.
|
|
||||||
#[clap(long = "small")]
|
|
||||||
small_thresh: Option<u64>,
|
|
||||||
|
|
||||||
/// Minimum amount of space to reclaim in order to run compaction for reclaim.
|
|
||||||
#[clap(long = "reclaim")]
|
|
||||||
min_reclaim: Option<u64>,
|
|
||||||
|
|
||||||
/// Maximum amount of space that can be deallocated in a bloblog before we consider it
|
|
||||||
/// worthwhile to replace.
|
|
||||||
#[clap(long = "max-dealloc")]
|
|
||||||
max_deallocated: Option<u64>,
|
|
||||||
|
|
||||||
/// Minimum number of mergeable small bloblogs in order to run compaction for merge.
|
|
||||||
#[clap(long)]
|
|
||||||
mergeable: Option<u32>,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Enter a debug prompt for manually operating on the yama pile.
|
|
||||||
Debug { supplied_command: Vec<String> },
|
|
||||||
|
|
||||||
/// Pushes a pointer from this pile to another pile.
|
|
||||||
Push {
|
|
||||||
/// The name of the pointer to push.
|
|
||||||
pointer_name: String,
|
|
||||||
|
|
||||||
/// The path to the other pile to push the pointer to.
|
|
||||||
other_pile_path: PathBuf,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
impl FromStr for PileAndPointer {
|
||||||
std::process::exit(wrapped_main()?);
|
type Err = eyre::Error;
|
||||||
}
|
|
||||||
|
|
||||||
/// Wrapped main, returning the exit code.
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
/// DO NOT USE exit() in this function, because it can interfere with destructors.
|
match s.split_once(":") {
|
||||||
/// (Destructors are needed to ensure some piles are flushed, for example.)
|
None => Ok(PileAndPointer {
|
||||||
fn wrapped_main() -> anyhow::Result<i32> {
|
pile_path: None,
|
||||||
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
|
pointer: PointerName::from_str(s)?,
|
||||||
|
}),
|
||||||
let opts: Opts = Opts::parse();
|
Some((pile_path, pointer)) => Ok(PileAndPointer {
|
||||||
|
pile_path: Some(PathBuf::from(pile_path)),
|
||||||
let open_pile = || -> anyhow::Result<(PileDescriptor, Pile<Box<dyn RawPile>>)> {
|
pointer: PointerName::from_str(pointer)?,
|
||||||
let this_dir = Path::new(".");
|
}),
|
||||||
let descriptor =
|
}
|
||||||
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
|
}
|
||||||
let pile = open_pile(this_dir, &descriptor).context("Failed to open pile")?;
|
}
|
||||||
Ok((descriptor, pile))
|
|
||||||
};
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct PileAndPointerWithSubTree {
|
||||||
match &opts.command {
|
pub pile_path: Option<PathBuf>,
|
||||||
PileCommand::Retrieve {
|
pub pointer: PointerName,
|
||||||
pointer_name,
|
// TODO how to represent...
|
||||||
subset,
|
pub sub_tree: String,
|
||||||
destination,
|
}
|
||||||
num_workers: workers,
|
|
||||||
} => {
|
impl FromStr for PileAndPointerWithSubTree {
|
||||||
let (_pdesc, pile) = open_pile()?;
|
type Err = eyre::Error;
|
||||||
let mut pointer = pile
|
|
||||||
.read_pointer(pointer_name)?
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
.expect("No pointer by that name!"); // todo be nice
|
let (pile_path, pointer_and_subtree) = match s.split_once(":") {
|
||||||
|
None => (None, s),
|
||||||
if destination.exists() {
|
Some((pile_path, pointer)) => (Some(PathBuf::from(pile_path)), pointer),
|
||||||
bail!("The destination already exists. Overwriting not allowed (yet).");
|
};
|
||||||
}
|
|
||||||
|
if let Some(slash) = pointer_and_subtree.find('/') {
|
||||||
let mut root_tree_node =
|
Ok(PileAndPointerWithSubTree {
|
||||||
commands::retrieve_tree_node(&pile, pointer.chunk_ref.clone())?;
|
pile_path,
|
||||||
|
pointer: PointerName::from_str(&pointer_and_subtree[0..slash])?,
|
||||||
fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
|
sub_tree: pointer_and_subtree[slash + 1..].to_owned(),
|
||||||
|
})
|
||||||
let mut node_to_extract = &mut root_tree_node.node;
|
} else {
|
||||||
|
Ok(PileAndPointerWithSubTree {
|
||||||
if let Some(subset) = subset {
|
pile_path,
|
||||||
for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) {
|
pointer: PointerName::from_str(&pointer_and_subtree)?,
|
||||||
match node_to_extract.child(path_to_descend) {
|
sub_tree: String::new(),
|
||||||
Ok(new_node) => {
|
})
|
||||||
node_to_extract = new_node;
|
}
|
||||||
}
|
}
|
||||||
Err(msg) => {
|
}
|
||||||
bail!("Can't descend into {path_to_descend:?}: {msg}");
|
|
||||||
}
|
#[derive(Parser, Clone, Debug)]
|
||||||
}
|
pub enum YamaCommand {
|
||||||
}
|
/// Create a new Yama pile.
|
||||||
}
|
Init {
|
||||||
|
#[arg(long)]
|
||||||
// todo allow disabling apply metadata
|
sftp: bool,
|
||||||
extracting::extract(
|
#[arg(long)]
|
||||||
destination,
|
s3: bool,
|
||||||
node_to_extract,
|
#[arg(long)]
|
||||||
&pile,
|
no_password: bool,
|
||||||
true,
|
|
||||||
workers.unwrap_or(2),
|
#[arg(long)]
|
||||||
true,
|
zstd_dict: Option<PathBuf>,
|
||||||
true,
|
#[arg(long)]
|
||||||
true,
|
no_zstd_dict: bool,
|
||||||
)?;
|
},
|
||||||
}
|
|
||||||
PileCommand::Check {
|
/// Keyring management commands; see `yama keyring --help`.
|
||||||
apply_gc,
|
#[command(subcommand)]
|
||||||
dry_run_gc,
|
Keyring(KeyringCommand),
|
||||||
deep,
|
|
||||||
shallow,
|
/// Store a file, directory or input stream to a Yama pile.
|
||||||
} => {
|
Store {
|
||||||
let vacuum_mode = if *dry_run_gc {
|
source: PathBuf,
|
||||||
VacuumMode::DryRunVacuum
|
destination: PileAndPointer,
|
||||||
} else if *apply_gc {
|
|
||||||
VacuumMode::Vacuum
|
#[arg(long)]
|
||||||
} else {
|
stdin: bool,
|
||||||
VacuumMode::NoVacuum
|
|
||||||
};
|
#[arg(long)]
|
||||||
let (_pdesc, pile) = open_pile()?;
|
overwrite: bool,
|
||||||
let error_count = if *deep {
|
|
||||||
checking::check_deep(pile, vacuum_mode, true)?
|
/// Name of a parent pointer to use, if storing an incremental update.
|
||||||
} else if *shallow {
|
/// Although optional, this can make storing time much faster as it can prevent unchanged
|
||||||
checking::check_shallow(pile, vacuum_mode, true, true)?
|
/// files from being rescanned.
|
||||||
} else {
|
#[arg(short = 'p', long)]
|
||||||
bail!("You need to specify either --shallow or --deep.");
|
parent: Option<PointerName>,
|
||||||
};
|
},
|
||||||
|
|
||||||
if error_count > 0 {
|
/// Extract a file, directory or output stream from a Yama pile.
|
||||||
eprintln!("THERE ARE {} ERRORS.", error_count);
|
Extract {
|
||||||
return Ok(1);
|
source: PileAndPointerWithSubTree,
|
||||||
}
|
destination: PathBuf,
|
||||||
}
|
|
||||||
PileCommand::Compact {
|
#[arg(long)]
|
||||||
dry_run,
|
stdout: bool,
|
||||||
small_thresh,
|
|
||||||
min_reclaim,
|
#[arg(long)]
|
||||||
max_deallocated,
|
overwrite: bool,
|
||||||
mergeable,
|
},
|
||||||
} => {
|
|
||||||
let this_dir = Path::new(".");
|
// TODO Mount { ... },
|
||||||
let descriptor =
|
Check {
|
||||||
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
|
#[arg(long, short = '1')]
|
||||||
cleanup::compact(
|
pointers: bool,
|
||||||
this_dir,
|
|
||||||
&descriptor,
|
#[arg(long, short = '2')]
|
||||||
!*dry_run,
|
shallow: bool,
|
||||||
true,
|
|
||||||
CompactionThresholds {
|
#[arg(long, short = '9')]
|
||||||
minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024),
|
intensive: bool,
|
||||||
minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64),
|
},
|
||||||
cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024),
|
// TODO lsp, rmp
|
||||||
cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024),
|
|
||||||
},
|
// TODO vacuum
|
||||||
)?;
|
|
||||||
}
|
// TODO `locks` to inspect locks
|
||||||
PileCommand::Init {} => {
|
}
|
||||||
commands::init(".".as_ref())?;
|
|
||||||
}
|
#[derive(Subcommand, Clone, Debug)]
|
||||||
|
pub enum KeyringCommand {
|
||||||
PileCommand::Debug { supplied_command } => {
|
Inspect {
|
||||||
let (pdesc, pile) = open_pile()?;
|
file: PathBuf,
|
||||||
if supplied_command.is_empty() {
|
},
|
||||||
debug::debug_prompt(pdesc, pile)?;
|
Create {
|
||||||
} else {
|
/// Where to put the new keyring.
|
||||||
let mut prefixed_command = vec![String::from("yama-debug")];
|
new: PathBuf,
|
||||||
prefixed_command.extend(supplied_command.iter().cloned());
|
|
||||||
match DebugCommand::try_parse_from(prefixed_command) {
|
/// What keyring to use to copy keys from. Default is `master.yamakeyring`.
|
||||||
Ok(command) => {
|
#[arg(long)]
|
||||||
if let Err(e) = debug_command(&pdesc, &pile, command) {
|
from: Option<PathBuf>,
|
||||||
eprintln!("Failed {:?}", e);
|
|
||||||
pile.flush()?;
|
/// Which keys to put into the new keyring.
|
||||||
return Ok(2);
|
#[arg(long)]
|
||||||
} else {
|
with: String,
|
||||||
pile.flush()?;
|
|
||||||
return Ok(0);
|
/// Don't password-protect the new keyring.
|
||||||
}
|
/// It goes without saying that you should never disclose an unprotected keyring to anyone.
|
||||||
}
|
#[arg(long)]
|
||||||
Err(err) => {
|
no_password: bool,
|
||||||
eprintln!("Invalid {:?}", err);
|
|
||||||
return Ok(3);
|
/// Use weaker, faster password protection for the new keyring.
|
||||||
}
|
/// You should use different passwords for weak and strong keyrings!
|
||||||
}
|
/// This may be suitable if the keyring is kept in a moderately safe environment, but
|
||||||
}
|
/// prefer a strongly-protected keyring if the file might be reasonably accessible by
|
||||||
}
|
/// enemies.
|
||||||
|
#[arg(long)]
|
||||||
PileCommand::Push {
|
weak: bool,
|
||||||
pointer_name,
|
}, // TODO ChangePassword
|
||||||
other_pile_path,
|
}
|
||||||
} => {
|
|
||||||
let this_pile_path = PathBuf::from(".");
|
const PROGRESS_SPANS: &'static [&'static str] = &["store_file", "storing", "unpack_files", "expand_chunkrefs", "extract_files"];
|
||||||
|
|
||||||
let descriptor_this = load_pile_descriptor(".".as_ref())
|
#[tokio::main]
|
||||||
.context("Failed to load this pile descriptor")?;
|
async fn main() -> eyre::Result<()> {
|
||||||
|
let indicatif_layer = IndicatifLayer::new();
|
||||||
let descriptor_other = load_pile_descriptor(other_pile_path)
|
let stderr_writer = indicatif_layer.get_stderr_writer();
|
||||||
.context("Failed to load foreign pile descriptor.")?;
|
let indicatif_layer = indicatif_layer
|
||||||
|
.with_filter(filter_fn(|span_metadata| {
|
||||||
let bypass_level = determine_bypass_level(
|
span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name())
|
||||||
&descriptor_this,
|
}));
|
||||||
&this_pile_path,
|
|
||||||
&descriptor_other,
|
tracing_subscriber::registry()
|
||||||
&other_pile_path,
|
.with(
|
||||||
)?;
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "sqlx=warn,yama=debug,info".into()),
|
||||||
info!("Using bypass level: {:?}", bypass_level);
|
)
|
||||||
|
.with(tracing_subscriber::fmt::layer().with_writer(stderr_writer))
|
||||||
let (this_pile, this_rp_bypass) =
|
.with(indicatif_layer)
|
||||||
open_pile_with_work_bypass(&this_pile_path, &descriptor_this, bypass_level)?;
|
.init();
|
||||||
let (other_pile, other_rp_bypass) =
|
|
||||||
open_pile_with_work_bypass(&other_pile_path, &descriptor_other, bypass_level)?;
|
match dbg!(YamaCommand::parse()) {
|
||||||
|
YamaCommand::Init {
|
||||||
// TODO flush the pile after here yet
|
sftp,
|
||||||
push_to(
|
s3,
|
||||||
Arc::new(this_pile),
|
no_password,
|
||||||
this_rp_bypass,
|
zstd_dict,
|
||||||
Arc::new(other_pile),
|
no_zstd_dict,
|
||||||
other_rp_bypass,
|
} => {
|
||||||
vec![pointer_name.clone()],
|
if zstd_dict.is_some() && no_zstd_dict {
|
||||||
true,
|
bail!("Can only choose one of --zstd-dict or --no-zstd-dict!");
|
||||||
32,
|
}
|
||||||
)?;
|
if zstd_dict.is_none() && !no_zstd_dict {
|
||||||
|
bail!("Must choose one of --zstd-dict or --no-zstd-dict!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Begin. Create a pile in the current directory.
|
||||||
|
let pile_path = PathBuf::from(".");
|
||||||
|
|
||||||
|
// Check we are happy to make a pile or connector here.
|
||||||
|
init::pre_init_check(&pile_path).await?;
|
||||||
|
|
||||||
|
let connection_scheme = set_up_connection(sftp, s3, &pile_path).await?;
|
||||||
|
let connection = connection_scheme
|
||||||
|
.connect_to_wormfileprovider()
|
||||||
|
.await
|
||||||
|
.context("Failed to open pile destination")?;
|
||||||
|
|
||||||
|
info!("Pile destination seems OK; can proceed to set up pile.");
|
||||||
|
|
||||||
|
let master_password = if no_password {
|
||||||
|
warn!("Not setting a master password. The master keyring will be unprotected.");
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
println!("enter master password:");
|
||||||
|
let stdin = stdin();
|
||||||
|
let mut stdin_br = BufReader::new(stdin);
|
||||||
|
let mut line = String::new();
|
||||||
|
stdin_br.read_line(&mut line).await?;
|
||||||
|
Some(line.trim().to_owned())
|
||||||
|
};
|
||||||
|
|
||||||
|
let master_keyring = generate_master_keyring();
|
||||||
|
let master_key_packed = pack_keyring(
|
||||||
|
master_keyring.clone(),
|
||||||
|
master_password.as_ref().map(|s| s.as_ref()),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let zstd_dict = if no_zstd_dict {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let zstd_dict_path = zstd_dict.unwrap();
|
||||||
|
Some(Arc::new(tokio::fs::read(&zstd_dict_path)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to read Zstd dict at {zstd_dict_path:?}"))?))
|
||||||
|
};
|
||||||
|
|
||||||
|
let pile_config = PileConfig {
|
||||||
|
yama_pile_version: SUPPORTED_YAMA_PILE_VERSION.to_owned(),
|
||||||
|
chunk_id_key: ChunkIdKey::new_rand(),
|
||||||
|
zstd_dict,
|
||||||
|
};
|
||||||
|
let packed_pile_config: PackedPileConfig = master_keyring
|
||||||
|
.w_config
|
||||||
|
.unwrap()
|
||||||
|
.make_locked_asymbox(CborSerde::serialise(&pile_config).unwrap());
|
||||||
|
|
||||||
|
let make_pileside_copy_of_master_key = !(sftp || s3); // TODO ask for these
|
||||||
|
|
||||||
|
init::init_pile(
|
||||||
|
&connection,
|
||||||
|
packed_pile_config,
|
||||||
|
if make_pileside_copy_of_master_key {
|
||||||
|
Some(master_key_packed.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if sftp || s3 {
|
||||||
|
// For remote piles: save a master keyring copy locally
|
||||||
|
tokio::fs::write(
|
||||||
|
pile_path.join("master.yamakeyring"),
|
||||||
|
&master_key_packed.into_byte_vec(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("failed to make local copy of master keyring")?;
|
||||||
|
tokio::fs::write(
|
||||||
|
pile_path.join(FILE_YAMA_CONNECTOR),
|
||||||
|
toml::to_string_pretty(&connection_scheme)
|
||||||
|
.context("failed to serialise connector")?,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("failed to write connector")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
YamaCommand::Store {
|
||||||
|
source,
|
||||||
|
destination,
|
||||||
|
stdin,
|
||||||
|
overwrite,
|
||||||
|
parent,
|
||||||
|
} => {
|
||||||
|
ensure!(!stdin, "stdin not supported yet");
|
||||||
|
let pile_connector_path = destination.pile_path.unwrap_or(PathBuf::from("."));
|
||||||
|
let keyring = pre_open_keyring(&pile_connector_path).await?;
|
||||||
|
let keyring = open_keyring_interactive(keyring).await?;
|
||||||
|
|
||||||
|
let pwc = open_pile(
|
||||||
|
&pile_connector_path,
|
||||||
|
keyring,
|
||||||
|
LockKind::Shared,
|
||||||
|
format!("{} store {:?}", get_hostname(), destination.pointer),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
update_cache(&pwc).await?;
|
||||||
|
|
||||||
|
let parent_pointer = if let Some(ref parent) = parent {
|
||||||
|
let pointer = pwc
|
||||||
|
.read_pointer_fully_integrated(parent.0.as_str())
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("whilst reading parent pointer: {parent:?}"))?
|
||||||
|
.with_context(|| {
|
||||||
|
format!("it appears that the pointer {parent:?} does not exist")
|
||||||
|
})?;
|
||||||
|
assert!(pointer.parent.is_none());
|
||||||
|
Some(pointer)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let source2 = source.clone();
|
||||||
|
let scan_entry_map = tokio::task::spawn_blocking(move || {
|
||||||
|
scan::scan(&source2, &Vec::new()).context("Failed to scan")
|
||||||
|
})
|
||||||
|
.await??;
|
||||||
|
|
||||||
|
let pwc = Arc::new(pwc);
|
||||||
|
|
||||||
|
let (mut chunk_file_map, pruned_scan_entry_map) =
|
||||||
|
if let Some(ref parent_node) = parent_pointer {
|
||||||
|
let (cfm, pruned) =
|
||||||
|
scan::prepopulate_unmodified(&parent_node.root.node, &scan_entry_map);
|
||||||
|
|
||||||
|
(cfm, Cow::Owned(pruned))
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
PatriciaMap::<(RecursiveChunkRef, u64)>::new(),
|
||||||
|
Cow::Borrowed(&scan_entry_map),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let store_span = info_span!("storing");
|
||||||
|
// store_span.pb_set_style(&ProgressStyle::default_bar());
|
||||||
|
store_span.pb_set_style(&ProgressStyle::default_bar().template(
|
||||||
|
PROGRESS_BAR_STYLE,
|
||||||
|
).unwrap());
|
||||||
|
store_span.pb_set_message("storing files");
|
||||||
|
store_span.pb_set_length(pruned_scan_entry_map.values()
|
||||||
|
.filter(|v| matches!(v, ScanEntry::NormalFile { .. })).count() as u64);
|
||||||
|
let store_span_entered = store_span.enter();
|
||||||
|
|
||||||
|
let (pipeline, pipeline_job_tx) = StoragePipeline::launch_new(4, pwc.clone()).await?;
|
||||||
|
|
||||||
|
let source2 = source.clone();
|
||||||
|
let (submitter_task, receiver_task) = tokio::join!(
|
||||||
|
async move {
|
||||||
|
let pipeline_job_tx = pipeline_job_tx;
|
||||||
|
for (name_bytes, scan_entry) in pruned_scan_entry_map.iter() {
|
||||||
|
if let ScanEntry::NormalFile { .. } = scan_entry {
|
||||||
|
let name = std::str::from_utf8(name_bytes.as_slice())
|
||||||
|
.context("name is not str")?;
|
||||||
|
pipeline_job_tx
|
||||||
|
.send_async((name.to_owned(), source2.join(name)))
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("unable to send to pipeline."))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(pipeline_job_tx);
|
||||||
|
Ok::<_, eyre::Report>(())
|
||||||
|
},
|
||||||
|
async {
|
||||||
|
while let Ok((job_id, rec_chunk_ref, real_size)) = pipeline.next_result().await
|
||||||
|
{
|
||||||
|
chunk_file_map.insert_str(&job_id, (rec_chunk_ref, real_size));
|
||||||
|
Span::current().pb_inc(1);
|
||||||
|
}
|
||||||
|
// eprintln!("fin rec");
|
||||||
|
Ok::<_, eyre::Report>(())
|
||||||
|
}
|
||||||
|
);
|
||||||
|
submitter_task?;
|
||||||
|
receiver_task?;
|
||||||
|
|
||||||
|
drop(store_span_entered);
|
||||||
|
drop(store_span);
|
||||||
|
|
||||||
|
info!("All files stored, writing indices...");
|
||||||
|
|
||||||
|
// Write indices for the new bloblogs we have created. This is a prerequisite for creating a pointer.
|
||||||
|
let chunkmaps = pipeline
|
||||||
|
.finish_into_chunkmaps()
|
||||||
|
.await
|
||||||
|
.context("failed to finish into chunkmaps")?;
|
||||||
|
assemble_and_write_indices(&pwc, chunkmaps)
|
||||||
|
.await
|
||||||
|
.context("failed to assemble and write indices")?;
|
||||||
|
|
||||||
|
info!("All indices stored, writing pointer...");
|
||||||
|
|
||||||
|
// Assemble and write a pointer
|
||||||
|
let mut tree = assemble_tree_from_scan_entries(scan_entry_map, chunk_file_map)
|
||||||
|
.context("failed to assemble tree")?;
|
||||||
|
let (uids, gids) =
|
||||||
|
create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?;
|
||||||
|
|
||||||
|
if let Some(ref parent_node) = parent_pointer {
|
||||||
|
differentiate_node_in_place(&mut tree, &parent_node.root.node)
|
||||||
|
.context("failed to differentiate?")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
pwc.pile
|
||||||
|
.write_pointer(
|
||||||
|
destination.pointer.0.as_str(),
|
||||||
|
overwrite,
|
||||||
|
&Pointer {
|
||||||
|
parent: parent.as_ref().map(|p| p.0.clone()),
|
||||||
|
root: RootTreeNode {
|
||||||
|
name: source
|
||||||
|
.file_name()
|
||||||
|
.map(|oss| oss.to_str())
|
||||||
|
.flatten()
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_owned(),
|
||||||
|
node: tree,
|
||||||
|
},
|
||||||
|
uids,
|
||||||
|
gids,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("failed to write pointer")?;
|
||||||
|
|
||||||
|
Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?;
|
||||||
|
}
|
||||||
|
YamaCommand::Extract {
|
||||||
|
source,
|
||||||
|
destination,
|
||||||
|
stdout,
|
||||||
|
overwrite,
|
||||||
|
} => {
|
||||||
|
ensure!(!stdout, "stdout not supported yet");
|
||||||
|
let pile_connector_path = source
|
||||||
|
.pile_path
|
||||||
|
.as_ref()
|
||||||
|
.map(|p| p.as_ref())
|
||||||
|
.unwrap_or(Path::new("."));
|
||||||
|
let keyring = pre_open_keyring(&pile_connector_path).await?;
|
||||||
|
let keyring = open_keyring_interactive(keyring).await?;
|
||||||
|
|
||||||
|
let pwc = Arc::new(open_pile(
|
||||||
|
&pile_connector_path,
|
||||||
|
keyring,
|
||||||
|
LockKind::Shared,
|
||||||
|
format!("{} store {:?}", get_hostname(), source.pointer),
|
||||||
|
)
|
||||||
|
.await?);
|
||||||
|
update_cache(&pwc).await?;
|
||||||
|
|
||||||
|
let pointer = pwc
|
||||||
|
.read_pointer_fully_integrated(source.pointer.0.as_str())
|
||||||
|
.await
|
||||||
|
.context("failed to read pointer")?
|
||||||
|
.with_context(|| {
|
||||||
|
format!(
|
||||||
|
"it appears that the pointer {:?} does not exist",
|
||||||
|
source.pointer
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
assert!(pointer.parent.is_none());
|
||||||
|
|
||||||
|
let node = if source.sub_tree.is_empty() {
|
||||||
|
&pointer.root.node
|
||||||
|
} else {
|
||||||
|
let mut current = &pointer.root.node;
|
||||||
|
for subpath in source.sub_tree.split('/') {
|
||||||
|
if let TreeNode::Directory { children, .. } = current {
|
||||||
|
current = children.get(subpath).with_context(|| {
|
||||||
|
format!("can't descend into {subpath}: doesn't exist in directory.")
|
||||||
|
})?;
|
||||||
|
} else {
|
||||||
|
bail!("can't descend into {subpath}; parent isn't a directory...");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current
|
||||||
|
};
|
||||||
|
|
||||||
|
let flat = flatten_treenode(&node)?;
|
||||||
|
|
||||||
|
extract::unpack_nonfiles(&destination, &flat.nonfiles, false, true).await?;
|
||||||
|
|
||||||
|
let extract_span = info_span!("extract_files");
|
||||||
|
extract::unpack_files(&pwc, &destination, &flat.files, false, true)
|
||||||
|
.instrument(extract_span)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?;
|
||||||
|
}
|
||||||
|
other => todo!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn set_up_connection(
|
||||||
|
sftp: bool,
|
||||||
|
s3: bool,
|
||||||
|
local_dir: &Path,
|
||||||
|
) -> eyre::Result<PileConnectionScheme> {
|
||||||
|
let stdin = stdin();
|
||||||
|
let mut stdin_br = BufReader::new(stdin);
|
||||||
|
let mut line = String::new();
|
||||||
|
|
||||||
|
match (sftp, s3) {
|
||||||
|
(true, true) => {
|
||||||
|
bail!("Can only choose one of --sftp or --s3 (or local)!");
|
||||||
|
}
|
||||||
|
(true, false) => {
|
||||||
|
// SFTP
|
||||||
|
println!("Enter user@host for SFTP:");
|
||||||
|
stdin_br.read_line(&mut line).await?;
|
||||||
|
let user_at_host = line.trim().to_owned();
|
||||||
|
line.clear();
|
||||||
|
|
||||||
|
println!("Enter remote path (can be relative to user home):");
|
||||||
|
stdin_br.read_line(&mut line).await?;
|
||||||
|
let remote_path = line.trim().to_owned();
|
||||||
|
line.clear();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Enter password, or blank if not required (due to use of key authentication):"
|
||||||
|
);
|
||||||
|
stdin_br.read_line(&mut line).await?;
|
||||||
|
let password = line.trim().to_owned();
|
||||||
|
line.clear();
|
||||||
|
|
||||||
|
Ok(PileConnectionScheme::Sftp {
|
||||||
|
user_at_host,
|
||||||
|
password,
|
||||||
|
directory: remote_path,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
(false, true) => {
|
||||||
|
// S3
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
(false, false) => {
|
||||||
|
// Local filesystem
|
||||||
|
Ok(PileConnectionScheme::Local {
|
||||||
|
directory: local_dir.to_owned(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(0)
|
|
||||||
}
|
}
|
||||||
|
234
yama/src/bin/yamascan.rs
Normal file
234
yama/src/bin/yamascan.rs
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
/*
|
||||||
|
This file is part of Yama.
|
||||||
|
|
||||||
|
Yama is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Yama is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use dust_style_filetree_display::display::{draw_it, InitialDisplayData};
|
||||||
|
|
||||||
|
use dust_style_filetree_display::filter::AggregateData;
|
||||||
|
use dust_style_filetree_display::node::Node;
|
||||||
|
use dust_style_filetree_display::{get_height_of_terminal, get_width_of_terminal, init_color};
|
||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use patricia_tree::PatriciaMap;
|
||||||
|
use tokio::fs::OpenOptions;
|
||||||
|
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
|
||||||
|
|
||||||
|
use yama::scan;
|
||||||
|
|
||||||
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
|
use tracing_subscriber::util::SubscriberInitExt;
|
||||||
|
|
||||||
|
use yama_pile::tree::unpopulated::ScanEntry;
|
||||||
|
|
||||||
|
#[derive(Parser, Clone, Debug)]
|
||||||
|
pub enum YamaScanCommand {
|
||||||
|
/// Add an entry to an ignore file
|
||||||
|
#[command(alias = "i")]
|
||||||
|
Ignore {
|
||||||
|
/// What to ignore
|
||||||
|
path: String,
|
||||||
|
|
||||||
|
/// Don't anchor the match to this directory.
|
||||||
|
#[arg(short = 'a')]
|
||||||
|
unanchored: bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Show dust-style usage graph of the current directory, excluding excluded files.
|
||||||
|
#[command(alias = "du")]
|
||||||
|
Usage {},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> eyre::Result<()> {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(
|
||||||
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "sqlx=warn,yama=debug,info".into()),
|
||||||
|
)
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
match YamaScanCommand::parse() {
|
||||||
|
YamaScanCommand::Usage {} => {
|
||||||
|
let idd = InitialDisplayData {
|
||||||
|
short_paths: true,
|
||||||
|
is_reversed: false,
|
||||||
|
colors_on: !init_color(false),
|
||||||
|
by_filecount: false,
|
||||||
|
is_screen_reader: false,
|
||||||
|
iso: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let scan = scan::scan(Path::new("."), &Vec::new()).context("Couldn't scan")?;
|
||||||
|
let top_nodes = assemble_display_tree_from_scan_entries(scan)?.children;
|
||||||
|
|
||||||
|
let root_display_node = dust_style_filetree_display::filter::get_biggest(
|
||||||
|
top_nodes,
|
||||||
|
AggregateData {
|
||||||
|
min_size: None,
|
||||||
|
only_dir: false,
|
||||||
|
only_file: false,
|
||||||
|
number_of_lines: get_height_of_terminal(),
|
||||||
|
depth: usize::MAX,
|
||||||
|
using_a_filter: false,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.expect("no root?");
|
||||||
|
|
||||||
|
draw_it(
|
||||||
|
idd,
|
||||||
|
false,
|
||||||
|
get_width_of_terminal(),
|
||||||
|
&root_display_node,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
YamaScanCommand::Ignore {
|
||||||
|
path, unanchored
|
||||||
|
} => {
|
||||||
|
let mut oo = OpenOptions::new()
|
||||||
|
.read(true)
|
||||||
|
.write(true)
|
||||||
|
.create(true)
|
||||||
|
.truncate(false)
|
||||||
|
.open(".yamaignore").await
|
||||||
|
.context("failed to open .yamaignore for r/w")?;
|
||||||
|
let pos = oo.seek(SeekFrom::End(0)).await?;
|
||||||
|
if pos > 1 {
|
||||||
|
oo.seek(SeekFrom::End(-1)).await?;
|
||||||
|
let last_byte = oo.read_u8().await?;
|
||||||
|
if last_byte != b'\n' {
|
||||||
|
oo.write_u8(b'\n').await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if unanchored {
|
||||||
|
oo.write_all(format!("{}\n", path).as_bytes()).await?;
|
||||||
|
} else {
|
||||||
|
oo.write_all(format!("/{}\n", path).as_bytes()).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
oo.flush().await?;
|
||||||
|
drop(oo);
|
||||||
|
},
|
||||||
|
_other => todo!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn assemble_display_tree_from_scan_entries(scan: PatriciaMap<ScanEntry>) -> eyre::Result<Node> {
|
||||||
|
let mut dirs: BTreeMap<String, BTreeMap<String, Node>> = BTreeMap::new();
|
||||||
|
// special-case the root ("")
|
||||||
|
dirs.insert(String::new(), BTreeMap::new());
|
||||||
|
|
||||||
|
for (key, entry) in scan.into_iter() {
|
||||||
|
let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?;
|
||||||
|
let (parent_dir_name, child_name) =
|
||||||
|
key_string.rsplit_once('/').unwrap_or(("", &key_string));
|
||||||
|
match entry {
|
||||||
|
ScanEntry::NormalFile { size, .. } => {
|
||||||
|
// note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
Node {
|
||||||
|
name: PathBuf::from(&key_string),
|
||||||
|
size,
|
||||||
|
children: vec![],
|
||||||
|
inode_device: None,
|
||||||
|
depth: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ScanEntry::Directory {
|
||||||
|
ownership: _,
|
||||||
|
permissions: _,
|
||||||
|
} => {
|
||||||
|
dirs.insert(key_string.clone(), BTreeMap::new());
|
||||||
|
// note: for the root, this inserts the root directory entry as a child called "" within the root.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
Node {
|
||||||
|
name: PathBuf::from(&key_string),
|
||||||
|
size: 4096,
|
||||||
|
children: vec![],
|
||||||
|
inode_device: None,
|
||||||
|
depth: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ScanEntry::SymbolicLink {
|
||||||
|
ownership: _,
|
||||||
|
target: _,
|
||||||
|
} => {
|
||||||
|
// note: for the root, this inserts the root symlink entry as a child called "" within a fake root 'directory'.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
Node {
|
||||||
|
name: PathBuf::from(&key_string),
|
||||||
|
size: 4096,
|
||||||
|
children: vec![],
|
||||||
|
inode_device: None,
|
||||||
|
depth: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now roll up the directories. In Rustc v1.66 it'd be nice to use pop_last()...
|
||||||
|
while let Some(last_key) = dirs.keys().last().cloned() {
|
||||||
|
let mut last_children = dirs.remove(&last_key).unwrap();
|
||||||
|
if last_key.is_empty() {
|
||||||
|
assert!(
|
||||||
|
dirs.is_empty(),
|
||||||
|
"when pulling out root pseudo-dir, dirs must be empty for roll-up."
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut real_root = last_children.remove("").unwrap();
|
||||||
|
real_root.children = last_children.into_values().collect();
|
||||||
|
real_root.size += real_root.children.iter().map(|c| c.size).sum::<u64>();
|
||||||
|
return Ok(real_root);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to roll up the directory last/key -> {child -> ...}
|
||||||
|
// so last -> {key -> {child -> ...}}
|
||||||
|
let (parent_dir, child_name) = last_key.rsplit_once('/').unwrap_or(("", &last_key));
|
||||||
|
let parent = dirs
|
||||||
|
.get_mut(parent_dir)
|
||||||
|
.context("bad PMap? no parent in rollup")?;
|
||||||
|
let child_in_parent = parent
|
||||||
|
.get_mut(child_name)
|
||||||
|
.context("dir child not populated")?;
|
||||||
|
child_in_parent.children = last_children.into_values().collect();
|
||||||
|
child_in_parent.size += child_in_parent.children.iter().map(|c| c.size).sum::<u64>();
|
||||||
|
}
|
||||||
|
|
||||||
|
bail!("no root found; bad PMap or bad roll-up???");
|
||||||
|
}
|
416
yama/src/extract.rs
Normal file
416
yama/src/extract.rs
Normal file
@ -0,0 +1,416 @@
|
|||||||
|
use crate::pile_with_cache::PileWithCache;
|
||||||
|
use crate::retriever::decompressor::PipelineDecompressor;
|
||||||
|
use crate::retriever::{create_fixed_retriever, FileId, JobChunkReq, JobId, RetrieverResp};
|
||||||
|
use eyre::{bail, ensure, Context, ContextCompat, eyre};
|
||||||
|
use flume::Receiver;
|
||||||
|
use patricia_tree::PatriciaMap;
|
||||||
|
use std::cmp::Reverse;
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::fs::Permissions;
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use indicatif::ProgressStyle;
|
||||||
|
use tokio::fs::OpenOptions;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tokio::task::JoinSet;
|
||||||
|
use tracing::{info_span, Instrument, Span};
|
||||||
|
use tracing_indicatif::span_ext::IndicatifSpanExt;
|
||||||
|
use yama_midlevel_crypto::chunk_id::ChunkId;
|
||||||
|
use yama_pile::definitions::{BloblogId, RecursiveChunkRef};
|
||||||
|
use yama_pile::tree::unpopulated::ScanEntry;
|
||||||
|
use yama_pile::tree::{FilesystemPermissions, TreeNode};
|
||||||
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
use crate::PROGRESS_BAR_STYLE;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct FlattenedTree {
|
||||||
|
pub files: PatriciaMap<(ScanEntry, RecursiveChunkRef)>,
|
||||||
|
pub nonfiles: PatriciaMap<ScanEntry>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flatten_treenode(root_node: &TreeNode) -> eyre::Result<FlattenedTree> {
|
||||||
|
let mut flat = FlattenedTree::default();
|
||||||
|
|
||||||
|
root_node.visit(
|
||||||
|
&mut |node, path| {
|
||||||
|
match node {
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size,
|
||||||
|
content,
|
||||||
|
} => {
|
||||||
|
flat.files.insert(
|
||||||
|
path,
|
||||||
|
(
|
||||||
|
ScanEntry::NormalFile {
|
||||||
|
mtime: *mtime,
|
||||||
|
ownership: *ownership,
|
||||||
|
permissions: *permissions,
|
||||||
|
size: *size,
|
||||||
|
},
|
||||||
|
*content,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
children: _,
|
||||||
|
} => {
|
||||||
|
flat.nonfiles.insert(
|
||||||
|
path,
|
||||||
|
ScanEntry::Directory {
|
||||||
|
ownership: *ownership,
|
||||||
|
permissions: *permissions,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
TreeNode::SymbolicLink { ownership, target } => {
|
||||||
|
flat.nonfiles.insert(
|
||||||
|
path,
|
||||||
|
ScanEntry::SymbolicLink {
|
||||||
|
ownership: *ownership,
|
||||||
|
target: target.clone(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
TreeNode::Deleted => {
|
||||||
|
bail!("unexpected TreeNode::Deleted in flatten_treenode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
String::new(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(flat)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create directories and symbolic links.
|
||||||
|
pub async fn unpack_nonfiles(
|
||||||
|
root: &PathBuf,
|
||||||
|
nonfiles: &PatriciaMap<ScanEntry>,
|
||||||
|
restore_ownership: bool,
|
||||||
|
restore_permissions: bool,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
if restore_ownership {
|
||||||
|
bail!("restoring ownership is not yet supported...");
|
||||||
|
}
|
||||||
|
for (rel_path, scan_entry) in nonfiles.iter() {
|
||||||
|
let path = root
|
||||||
|
.join(String::from_utf8(rel_path).context("nonfiles map contains non-string keys?")?);
|
||||||
|
|
||||||
|
match scan_entry {
|
||||||
|
ScanEntry::NormalFile { .. } => {
|
||||||
|
bail!("found NormalFile in unpack_nonfiles()");
|
||||||
|
}
|
||||||
|
ScanEntry::Directory {
|
||||||
|
ownership: _,
|
||||||
|
permissions,
|
||||||
|
} => {
|
||||||
|
tokio::fs::create_dir(&path).await?;
|
||||||
|
if restore_permissions {
|
||||||
|
tokio::fs::set_permissions(&path, Permissions::from_mode(permissions.mode))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ScanEntry::SymbolicLink {
|
||||||
|
ownership: _,
|
||||||
|
target,
|
||||||
|
} => {
|
||||||
|
tokio::fs::symlink(target, &path).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(perf): move out file writes into separate tasks...
|
||||||
|
pub async fn unpack_files(
|
||||||
|
pwc: &Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
root: &PathBuf,
|
||||||
|
files: &PatriciaMap<(ScanEntry, RecursiveChunkRef)>,
|
||||||
|
restore_ownership: bool,
|
||||||
|
restore_permissions: bool,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
if restore_ownership {
|
||||||
|
bail!("restoring ownership is not yet supported...");
|
||||||
|
}
|
||||||
|
let expanded_chunkrefs = expand_chunkrefs(
|
||||||
|
pwc,
|
||||||
|
files
|
||||||
|
.iter()
|
||||||
|
.map(|(path_bytes, (scan_entry, rcr))| ((path_bytes, scan_entry), *rcr)),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let total_chunks = expanded_chunkrefs.iter().map(|(_, cs)| cs.len() as u64).sum::<u64>();
|
||||||
|
let unpack_span = info_span!("unpack_files");
|
||||||
|
|
||||||
|
async move {
|
||||||
|
let unpack_span = Span::current();
|
||||||
|
unpack_span.pb_set_style(&ProgressStyle::default_bar().template(
|
||||||
|
PROGRESS_BAR_STYLE,
|
||||||
|
).unwrap());
|
||||||
|
unpack_span.pb_set_message("unpack");
|
||||||
|
unpack_span.pb_set_length(total_chunks);
|
||||||
|
|
||||||
|
let mut join_set = JoinSet::new();
|
||||||
|
|
||||||
|
let (file_part_retriever, mut jobs) =
|
||||||
|
lookup_chunkrefs_and_create_retriever(pwc, expanded_chunkrefs).await?;
|
||||||
|
let mut open_files = BTreeMap::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
Ok(next_part) = file_part_retriever.recv_async() => {
|
||||||
|
match next_part {
|
||||||
|
RetrieverResp::Blob { job, subjob, blob } => {
|
||||||
|
if subjob == 0 {
|
||||||
|
// eprintln!("subjob 0 for job {job:?}");
|
||||||
|
let (path_bytes, scan_entry) = jobs
|
||||||
|
.remove(&job)
|
||||||
|
.with_context(|| format!("bad job {job:?} to extract"))?;
|
||||||
|
|
||||||
|
let (permissions, _ownership) = if let ScanEntry::NormalFile {
|
||||||
|
permissions,
|
||||||
|
ownership,
|
||||||
|
..
|
||||||
|
} = scan_entry
|
||||||
|
{
|
||||||
|
(permissions, ownership)
|
||||||
|
} else {
|
||||||
|
bail!("not a Normal File in unpack_files()");
|
||||||
|
};
|
||||||
|
|
||||||
|
let path = root.join(String::from_utf8(path_bytes).context("bad utf-8 in PM")?);
|
||||||
|
|
||||||
|
let (tx, rx) = flume::bounded(16);
|
||||||
|
|
||||||
|
join_set.spawn(file_unpacker_writer(path, *permissions, restore_permissions, rx));
|
||||||
|
open_files.insert(job, tx);
|
||||||
|
}
|
||||||
|
open_files
|
||||||
|
.get_mut(&job)
|
||||||
|
.context("bad job to write file")?
|
||||||
|
.send_async(Some(blob))
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("file tx shutdown"))?;
|
||||||
|
|
||||||
|
unpack_span.pb_inc(1);
|
||||||
|
}
|
||||||
|
RetrieverResp::JobComplete(job) => {
|
||||||
|
open_files
|
||||||
|
.remove(&job)
|
||||||
|
.context("bad job to finish file")?
|
||||||
|
.send_async(None)
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("file tx shutdown"))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Some(join_result) = join_set.join_next() => {
|
||||||
|
join_result
|
||||||
|
.context("failed file unpacker writer (a)")?
|
||||||
|
.context("failed file unpacker writer (b)")?;
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we should have already drained the join set, but check...
|
||||||
|
assert!(join_set.join_next().await.is_none());
|
||||||
|
|
||||||
|
if !open_files.is_empty() || !jobs.is_empty() {
|
||||||
|
bail!("There were errors extracting.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}.instrument(unpack_span).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, restore_permissions: bool, rx: Receiver<Option<Vec<u8>>>) -> eyre::Result<()> {
|
||||||
|
let mut oo = OpenOptions::new();
|
||||||
|
oo.write(true).create_new(true);
|
||||||
|
if restore_permissions {
|
||||||
|
oo.mode(permissions.mode);
|
||||||
|
};
|
||||||
|
let mut file = oo
|
||||||
|
.open(&path)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("can't create {path:?}"))?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match rx.recv_async().await {
|
||||||
|
Ok(Some(next_block)) => {
|
||||||
|
file.write_all(&next_block)
|
||||||
|
.await?;
|
||||||
|
},
|
||||||
|
Ok(None) => {
|
||||||
|
file.flush()
|
||||||
|
.await
|
||||||
|
.context("failed to flush")?;
|
||||||
|
return Ok(());
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
bail!("rx for file unpacking into {path:?} disconnected unexpectedly");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn expand_chunkrefs<T>(
|
||||||
|
pwc: &Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
chunkrefs: impl Iterator<Item = (T, RecursiveChunkRef)>,
|
||||||
|
) -> eyre::Result<Vec<(T, Vec<ChunkId>)>> {
|
||||||
|
let mut by_depth = BTreeMap::<Reverse<u32>, Vec<(T, Vec<ChunkId>)>>::new();
|
||||||
|
for (t, rec) in chunkrefs {
|
||||||
|
by_depth
|
||||||
|
.entry(Reverse(rec.depth))
|
||||||
|
.or_default()
|
||||||
|
.push((t, vec![rec.chunk_id]));
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(Reverse(next_depth)) = by_depth.keys().next().cloned() {
|
||||||
|
let ts_and_chunks = by_depth.remove(&Reverse(next_depth)).unwrap();
|
||||||
|
|
||||||
|
if next_depth == 0 {
|
||||||
|
return Ok(ts_and_chunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
let ec_span = info_span!("expand_chunkrefs");
|
||||||
|
ec_span.pb_set_style(&ProgressStyle::default_bar().template(
|
||||||
|
PROGRESS_BAR_STYLE,
|
||||||
|
).unwrap());
|
||||||
|
ec_span.pb_set_length(ts_and_chunks.iter().map(|(_, cs)| cs.len() as u64).sum::<u64>());
|
||||||
|
ec_span.pb_set_message(&format!("resolve (d={next_depth})"));
|
||||||
|
let expanded_ts_and_chunks = expand_chunkrefs_one_layer(pwc, ts_and_chunks)
|
||||||
|
.instrument(ec_span).await?;
|
||||||
|
by_depth
|
||||||
|
.entry(Reverse(next_depth - 1))
|
||||||
|
.or_default()
|
||||||
|
.extend(expanded_ts_and_chunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn lookup_chunkrefs_and_create_retriever<T>(
|
||||||
|
pwc: &Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
input: Vec<(T, Vec<ChunkId>)>,
|
||||||
|
) -> eyre::Result<(Receiver<RetrieverResp>, BTreeMap<JobId, T>)> {
|
||||||
|
let mut next_job_id = JobId(0);
|
||||||
|
|
||||||
|
let chunks_to_lookup: BTreeSet<ChunkId> = input
|
||||||
|
.iter()
|
||||||
|
.flat_map(|(_t, chunkids)| chunkids)
|
||||||
|
.copied()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let looked_up_chunks = pwc
|
||||||
|
.localcache
|
||||||
|
.read()
|
||||||
|
.await?
|
||||||
|
.locate_chunks(&chunks_to_lookup)
|
||||||
|
.await?;
|
||||||
|
ensure!(
|
||||||
|
chunks_to_lookup.len() == looked_up_chunks.len(),
|
||||||
|
"chunks are missing"
|
||||||
|
);
|
||||||
|
|
||||||
|
let bloblog_ids: BTreeSet<BloblogId> = looked_up_chunks.values().map(|(bi, _)| *bi).collect();
|
||||||
|
let num_bloblogs = bloblog_ids.len();
|
||||||
|
let bloblog_to_file_ids: BTreeMap<BloblogId, FileId> = bloblog_ids
|
||||||
|
.into_iter()
|
||||||
|
.zip((0..num_bloblogs as u32).map(FileId))
|
||||||
|
.collect();
|
||||||
|
let files: BTreeMap<FileId, BloblogId> =
|
||||||
|
bloblog_to_file_ids.iter().map(|(&k, &v)| (v, k)).collect();
|
||||||
|
|
||||||
|
let mut out_by_job = BTreeMap::<JobId, T>::new();
|
||||||
|
let mut jobs = BTreeMap::<JobId, Vec<JobChunkReq>>::new();
|
||||||
|
for (t, chunks) in input {
|
||||||
|
let job_id = next_job_id;
|
||||||
|
next_job_id.0 += 1;
|
||||||
|
out_by_job.insert(job_id, t);
|
||||||
|
jobs.insert(
|
||||||
|
job_id,
|
||||||
|
chunks
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| {
|
||||||
|
let (bloblog_id, blob_locator) = &looked_up_chunks[&c];
|
||||||
|
JobChunkReq {
|
||||||
|
file: bloblog_to_file_ids[bloblog_id],
|
||||||
|
offset: blob_locator.offset,
|
||||||
|
length: blob_locator.length,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let retriever = create_fixed_retriever(pwc.clone(), jobs, files, 8)?;
|
||||||
|
let retriever =
|
||||||
|
PipelineDecompressor::start(pwc.pile.pile_config.zstd_dict.clone(), 2, retriever)?;
|
||||||
|
Ok((retriever, out_by_job))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn expand_chunkrefs_one_layer<T>(
|
||||||
|
pwc: &Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
input: Vec<(T, Vec<ChunkId>)>,
|
||||||
|
) -> eyre::Result<Vec<(T, Vec<ChunkId>)>> {
|
||||||
|
let (retriever, jobs_to_ts) = lookup_chunkrefs_and_create_retriever(pwc, input).await?;
|
||||||
|
|
||||||
|
let mut out_by_job: BTreeMap<JobId, (T, Vec<u8>)> = jobs_to_ts
|
||||||
|
.into_iter()
|
||||||
|
.map(|(ji, t)| (ji, (t, Vec::new())))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut num_jobs_left = out_by_job.len();
|
||||||
|
|
||||||
|
while let Ok(result) = retriever.recv_async().await {
|
||||||
|
match result {
|
||||||
|
RetrieverResp::Blob {
|
||||||
|
job,
|
||||||
|
subjob: _,
|
||||||
|
blob,
|
||||||
|
} => {
|
||||||
|
out_by_job
|
||||||
|
.get_mut(&job)
|
||||||
|
.context("bad job gm")?
|
||||||
|
.1
|
||||||
|
.extend_from_slice(&blob);
|
||||||
|
Span::current().pb_inc(1);
|
||||||
|
}
|
||||||
|
RetrieverResp::JobComplete(_) => {
|
||||||
|
num_jobs_left -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure!(num_jobs_left == 0, "jobs left over, recovery not complete");
|
||||||
|
|
||||||
|
out_by_job
|
||||||
|
.into_values()
|
||||||
|
.map(|(t, bytes)| {
|
||||||
|
let chunk_ids = bytes
|
||||||
|
.chunks(32)
|
||||||
|
.map(|b| {
|
||||||
|
if b.len() != 32 {
|
||||||
|
bail!("wrong number of bytes for chunk refs");
|
||||||
|
}
|
||||||
|
let mut b32 = [0u8; 32];
|
||||||
|
b32.copy_from_slice(b);
|
||||||
|
Ok(ChunkId::from(b32))
|
||||||
|
})
|
||||||
|
.collect::<eyre::Result<_>>()?;
|
||||||
|
Ok((t, chunk_ids))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
110
yama/src/init.rs
Normal file
110
yama/src/init.rs
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use std::path::Path;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde};
|
||||||
|
use yama_midlevel_crypto::key_derivation::KeyDerivationParameters;
|
||||||
|
use yama_midlevel_crypto::sym_box::SymBox;
|
||||||
|
use yama_pile::definitions::{PackedKeyring, PackedPileConfig, UnlockedOrLockedKeyring};
|
||||||
|
use yama_pile::keyring::{generate_r_w_keys, Keyring};
|
||||||
|
use yama_pile::{DIR_BLOBLOGS, DIR_INDICES, DIR_LOCKS, FILE_MASTER_KEYRING, FILE_YAMA_CONFIG};
|
||||||
|
use yama_wormfile::paths::WormPath;
|
||||||
|
use yama_wormfile::{WormFileProvider, WormFileWriter};
|
||||||
|
|
||||||
|
/// Perform checks before we init a pile in the given directory.
|
||||||
|
pub async fn pre_init_check(path: &Path) -> eyre::Result<()> {
|
||||||
|
if path.exists() && !path.is_dir() {
|
||||||
|
bail!("{path:?} is not a directory; cannot create pile or connector here.");
|
||||||
|
}
|
||||||
|
|
||||||
|
for important_path in [
|
||||||
|
"yama.toml",
|
||||||
|
DIR_BLOBLOGS,
|
||||||
|
DIR_LOCKS,
|
||||||
|
FILE_YAMA_CONFIG,
|
||||||
|
DIR_INDICES,
|
||||||
|
] {
|
||||||
|
let important_path = path.join(important_path);
|
||||||
|
if important_path.exists() {
|
||||||
|
bail!("{important_path:?} already exists: can't create pile or connector here.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform checks before we init a pile in the given WormFileProvider.
|
||||||
|
pub async fn pre_init_check_wfp(wfp: &impl WormFileProvider) -> eyre::Result<()> {
|
||||||
|
for important_path in ["yama.toml", FILE_YAMA_CONFIG] {
|
||||||
|
let important_path = WormPath::new(important_path).unwrap();
|
||||||
|
if wfp.is_regular_file(&important_path).await? {
|
||||||
|
bail!("{important_path:?} already exists: can't create pile.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initialise a pile.
|
||||||
|
/// Should be run after `pre_init_check_wfp`.
|
||||||
|
pub async fn init_pile(
|
||||||
|
wfp: &impl WormFileProvider,
|
||||||
|
pile_config: PackedPileConfig,
|
||||||
|
master_keyring_copy: Option<PackedKeyring>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let mut writer = wfp.write().await?;
|
||||||
|
writer.write_all(&pile_config.into_byte_vec()).await?;
|
||||||
|
writer.flush().await?;
|
||||||
|
writer
|
||||||
|
.finalise(WormPath::new(FILE_YAMA_CONFIG).unwrap(), false)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if let Some(master_keyring_copy) = master_keyring_copy {
|
||||||
|
let mut writer = wfp.write().await?;
|
||||||
|
writer
|
||||||
|
.write_all(&master_keyring_copy.into_byte_vec())
|
||||||
|
.await?;
|
||||||
|
writer.flush().await?;
|
||||||
|
writer
|
||||||
|
.finalise(WormPath::new(FILE_MASTER_KEYRING).unwrap(), false)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_master_keyring() -> Keyring {
|
||||||
|
let (r_config, w_config) = generate_r_w_keys();
|
||||||
|
let (r_bloblog_footer, w_bloblog_footer) = generate_r_w_keys();
|
||||||
|
let (r_bloblog_contents, w_bloblog_contents) = generate_r_w_keys();
|
||||||
|
let (r_locks, w_locks) = generate_r_w_keys();
|
||||||
|
let (r_pointer, w_pointer) = generate_r_w_keys();
|
||||||
|
Keyring {
|
||||||
|
r_config: Some(r_config),
|
||||||
|
w_config: Some(w_config),
|
||||||
|
r_bloblog_footer: Some(r_bloblog_footer),
|
||||||
|
w_bloblog_footer: Some(w_bloblog_footer),
|
||||||
|
r_bloblog_contents: Some(r_bloblog_contents),
|
||||||
|
w_bloblog_contents: Some(w_bloblog_contents),
|
||||||
|
r_locks: Some(r_locks),
|
||||||
|
w_locks: Some(w_locks),
|
||||||
|
r_pointer: Some(r_pointer),
|
||||||
|
w_pointer: Some(w_pointer),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo move this
|
||||||
|
pub fn pack_keyring(unpacked: Keyring, password: Option<&str>) -> eyre::Result<PackedKeyring> {
|
||||||
|
let packed = if let Some(password) = password {
|
||||||
|
let deriver = KeyDerivationParameters::new_recommended();
|
||||||
|
let key = deriver
|
||||||
|
.derive(password)
|
||||||
|
.context("Failed to derive key from password")?;
|
||||||
|
let symkey = key.into_symkey();
|
||||||
|
|
||||||
|
let lockbox = SymBox::new(CborSerde::serialise(&unpacked).unwrap(), &symkey)
|
||||||
|
.context("Failed to encrypt keyring")?;
|
||||||
|
UnlockedOrLockedKeyring::Locked { deriver, lockbox }
|
||||||
|
} else {
|
||||||
|
UnlockedOrLockedKeyring::Unlocked(unpacked)
|
||||||
|
};
|
||||||
|
Ok(PackedKeyring::serialise(&packed).unwrap())
|
||||||
|
}
|
@ -1,10 +1,21 @@
|
|||||||
pub mod chunking;
|
pub mod init;
|
||||||
pub mod commands;
|
pub mod open;
|
||||||
pub mod debug;
|
|
||||||
pub mod definitions;
|
pub mod extract;
|
||||||
pub mod operations;
|
pub mod scan;
|
||||||
pub mod pile;
|
pub mod storing;
|
||||||
pub mod progress;
|
pub mod vacuum;
|
||||||
pub mod remote;
|
|
||||||
pub mod tree;
|
pub mod pile_connector;
|
||||||
pub mod utils;
|
pub mod pile_with_cache;
|
||||||
|
|
||||||
|
pub mod retriever;
|
||||||
|
|
||||||
|
pub const PROGRESS_BAR_STYLE: &'static str = "[{elapsed_precise}]/[{eta}] {wide_bar:.cyan/blue} {pos:>7}/{len:7} {msg}";
|
||||||
|
|
||||||
|
pub fn get_hostname() -> String {
|
||||||
|
hostname::get()
|
||||||
|
.expect("No hostname")
|
||||||
|
.into_string()
|
||||||
|
.expect("Hostname string must be sensible.")
|
||||||
|
}
|
||||||
|
167
yama/src/open.rs
Normal file
167
yama/src/open.rs
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
use crate::pile_connector::PileConnectionScheme;
|
||||||
|
use crate::pile_with_cache::PileWithCache;
|
||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||||
|
use tracing::debug;
|
||||||
|
use twox_hash::XxHash64;
|
||||||
|
use yama_midlevel_crypto::byte_layer::ByteLayer;
|
||||||
|
use yama_pile::definitions::{IndexId, PackedKeyring, UnlockedOrLockedKeyring};
|
||||||
|
use yama_pile::keyring::Keyring;
|
||||||
|
use yama_pile::locks::LockKind;
|
||||||
|
use yama_pile::{Pile, FILE_YAMA_CONFIG, FILE_YAMA_CONNECTOR};
|
||||||
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
|
||||||
|
pub const KEYRING_LOOKUP_SEQ: [&'static str; 2] = ["access.yamakeyring", "master.yamakeyring"];
|
||||||
|
|
||||||
|
pub async fn pre_open_keyring(connector_in_dir: &Path) -> eyre::Result<UnlockedOrLockedKeyring> {
|
||||||
|
for lookup in KEYRING_LOOKUP_SEQ {
|
||||||
|
let keyring_path = connector_in_dir.join(lookup);
|
||||||
|
if keyring_path.exists() {
|
||||||
|
let packed_keyring_bytes = tokio::fs::read(&keyring_path)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to read keyring file at {:?}", keyring_path))?;
|
||||||
|
let packed_keyring = PackedKeyring::from_byte_vec(packed_keyring_bytes)
|
||||||
|
.deserialise()
|
||||||
|
.with_context(|| {
|
||||||
|
format!("failed to deserialise keyring file at {:?}", keyring_path)
|
||||||
|
})?;
|
||||||
|
return Ok(packed_keyring);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bail!(
|
||||||
|
"No keyring found in {:?}. Expected to see one at one of: {:?}",
|
||||||
|
connector_in_dir,
|
||||||
|
KEYRING_LOOKUP_SEQ
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn open_keyring_interactive(input: UnlockedOrLockedKeyring) -> eyre::Result<Keyring> {
|
||||||
|
match input {
|
||||||
|
UnlockedOrLockedKeyring::Locked { deriver, lockbox } => {
|
||||||
|
println!("enter keyring password:");
|
||||||
|
let stdin = tokio::io::stdin();
|
||||||
|
let mut stdin_br = BufReader::new(stdin);
|
||||||
|
let mut line = String::new();
|
||||||
|
stdin_br.read_line(&mut line).await?;
|
||||||
|
|
||||||
|
let derived = deriver
|
||||||
|
.derive(line.trim())
|
||||||
|
.context("failed to derive key from password")?;
|
||||||
|
let keyring = lockbox
|
||||||
|
.unlock(&derived.into_symkey())
|
||||||
|
.context("failed to decrypt keyring")?
|
||||||
|
.deserialise()
|
||||||
|
.context("failed to deserialise keyring")?;
|
||||||
|
|
||||||
|
Ok(keyring)
|
||||||
|
}
|
||||||
|
UnlockedOrLockedKeyring::Unlocked(keyring) => Ok(keyring),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn open_pile(
|
||||||
|
connector_in_dir: &Path,
|
||||||
|
keyring: Keyring,
|
||||||
|
lock_kind: LockKind,
|
||||||
|
lock_holder: String,
|
||||||
|
) -> eyre::Result<PileWithCache<BoxedWormFileProvider>> {
|
||||||
|
let connection_scheme = if connector_in_dir.join(FILE_YAMA_CONFIG).exists() {
|
||||||
|
PileConnectionScheme::Local {
|
||||||
|
directory: connector_in_dir
|
||||||
|
.canonicalize()
|
||||||
|
.context("can't canonicalise local pile path")?
|
||||||
|
.to_owned(),
|
||||||
|
}
|
||||||
|
} else if connector_in_dir.join(FILE_YAMA_CONNECTOR).exists() {
|
||||||
|
let connector_toml = tokio::fs::read_to_string(&connector_in_dir.join(FILE_YAMA_CONNECTOR))
|
||||||
|
.await
|
||||||
|
.context("failed to read connector")?;
|
||||||
|
let connector: PileConnectionScheme =
|
||||||
|
toml::from_str(&connector_toml).context("failed to deserialise connector")?;
|
||||||
|
connector
|
||||||
|
} else {
|
||||||
|
bail!("Neither yama.cfg nor yama.toml exists; doesn't look like a Yama pile or pile connector.");
|
||||||
|
};
|
||||||
|
|
||||||
|
let wormfileprovider = Arc::new(connection_scheme.connect_to_wormfileprovider().await?);
|
||||||
|
let pile = Pile::open_manual(wormfileprovider, lock_kind, lock_holder, keyring).await?;
|
||||||
|
|
||||||
|
let cache_dir = appdirs::user_cache_dir(Some("yama"), None).expect("can't obtain cache dir!");
|
||||||
|
|
||||||
|
let mut hasher = XxHash64::default();
|
||||||
|
connection_scheme.hash(&mut hasher);
|
||||||
|
let u64_hash = hasher.finish();
|
||||||
|
|
||||||
|
let base_name = connector_in_dir
|
||||||
|
.file_name()
|
||||||
|
.map(|f| f.to_string_lossy())
|
||||||
|
.unwrap_or(Cow::Borrowed("_"));
|
||||||
|
let cache_key = format!("{}-{:016x}.sqlite3", base_name, u64_hash);
|
||||||
|
|
||||||
|
tokio::fs::create_dir_all(&cache_dir).await?;
|
||||||
|
let cache_file = cache_dir.join(&cache_key);
|
||||||
|
let localcache = yama_localcache::Store::new(&cache_file)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to open local cache"))?;
|
||||||
|
|
||||||
|
Ok(PileWithCache { pile, localcache })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn update_cache(pwc: &PileWithCache<BoxedWormFileProvider>) -> eyre::Result<()> {
|
||||||
|
debug!("updating cache");
|
||||||
|
let available_indices = pwc
|
||||||
|
.pile
|
||||||
|
.list_indices()
|
||||||
|
.await
|
||||||
|
.context("can't list available indices")?;
|
||||||
|
let present_indices = pwc
|
||||||
|
.localcache
|
||||||
|
.read()
|
||||||
|
.await?
|
||||||
|
.list_indices()
|
||||||
|
.await
|
||||||
|
.context("can't list cached indices")?;
|
||||||
|
|
||||||
|
let missing_indices: BTreeSet<IndexId> = available_indices
|
||||||
|
.difference(&present_indices)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
let deleted_indices: BTreeSet<IndexId> = present_indices
|
||||||
|
.difference(&available_indices)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut downloaded_indices = Vec::new();
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"{} new indices to cache, {} deleted indices to back out",
|
||||||
|
missing_indices.len(),
|
||||||
|
deleted_indices.len()
|
||||||
|
);
|
||||||
|
for missing_index in missing_indices {
|
||||||
|
debug!("downloading index {missing_index:?}");
|
||||||
|
downloaded_indices.push((missing_index, pwc.pile.read_index(missing_index).await?));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut txn = pwc.localcache.write().await?;
|
||||||
|
|
||||||
|
for deleted_index in deleted_indices {
|
||||||
|
debug!("backing out index {deleted_index:?}");
|
||||||
|
txn.delete_index(deleted_index).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (index_id, index) in downloaded_indices {
|
||||||
|
debug!("applying index {index_id:?}");
|
||||||
|
txn.apply_index(index_id, Arc::new(index)).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("finished updating cache");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,138 +0,0 @@
|
|||||||
/*
|
|
||||||
This file is part of Yama.
|
|
||||||
|
|
||||||
Yama is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
Yama is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
use anyhow::anyhow;
|
|
||||||
use log::warn;
|
|
||||||
use sodiumoxide::crypto::secretbox;
|
|
||||||
use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
|
||||||
use crate::pile::{
|
|
||||||
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use crossbeam_channel::Sender;
|
|
||||||
|
|
||||||
/// A RawPile that provides encryption of chunk contents.
|
|
||||||
/// Please note that keys are not currently encrypted, so this scheme is not CPA-secure.
|
|
||||||
/// It seems easily possible to test the pile for inclusion of a known file (by first chunking it and
|
|
||||||
/// looking for matching chunk IDs).
|
|
||||||
/// Use of compression a custom Zstd dictionary may make that harder but in general it seems dubious
|
|
||||||
/// to rely on that.
|
|
||||||
/// This feature will be revisited soon...
|
|
||||||
/// Notably, keys should be passed through a secure permutation first.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct RawPileEncryptor<R: RawPile> {
|
|
||||||
underlying: R,
|
|
||||||
secret_key: Key,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: RawPile> RawPileEncryptor<R> {
|
|
||||||
pub fn new(underlying: R, key: Key) -> Self {
|
|
||||||
warn!(
|
|
||||||
"WARNING! Encrypted RawPiles are not CPA secure. Do not rely on them for security yet!"
|
|
||||||
);
|
|
||||||
RawPileEncryptor {
|
|
||||||
underlying,
|
|
||||||
secret_key: key,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decrypt(&self, kind: Keyspace, key: &[u8], data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
|
||||||
Ok(if kind == Keyspace::Chunk {
|
|
||||||
let mut nonce = [0u8; NONCEBYTES];
|
|
||||||
nonce[0..key.len()].copy_from_slice(key);
|
|
||||||
secretbox::open(data, &Nonce(nonce), &self.secret_key)
|
|
||||||
.or(Err(anyhow!("Failed to decrypt")))?
|
|
||||||
} else {
|
|
||||||
let mut nonce = [0u8; NONCEBYTES];
|
|
||||||
nonce.copy_from_slice(&data[0..NONCEBYTES]);
|
|
||||||
secretbox::open(&data[NONCEBYTES..], &Nonce(nonce), &self.secret_key)
|
|
||||||
.or(Err(anyhow!("Failed to decrypt")))?
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn encrypt(&self, kind: Keyspace, key: &[u8], data: &[u8]) -> Vec<u8> {
|
|
||||||
if kind == Keyspace::Chunk {
|
|
||||||
let mut nonce = [0u8; NONCEBYTES];
|
|
||||||
nonce[0..key.len()].copy_from_slice(key);
|
|
||||||
secretbox::seal(data, &Nonce(nonce), &self.secret_key)
|
|
||||||
} else {
|
|
||||||
let nonce = secretbox::gen_nonce();
|
|
||||||
let mut out = Vec::new();
|
|
||||||
out.extend_from_slice(&nonce.0);
|
|
||||||
out.extend_from_slice(&secretbox::seal(data, &nonce, &self.secret_key));
|
|
||||||
out
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: RawPile> RawPile for RawPileEncryptor<R> {
|
|
||||||
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
|
|
||||||
self.underlying.exists(kind, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
|
|
||||||
if let Some(data) = self.underlying.read(kind, key)? {
|
|
||||||
Ok(Some(self.decrypt(kind, key, &data)?))
|
|
||||||
} else {
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
|
|
||||||
let encrypted = self.encrypt(kind, key, value);
|
|
||||||
self.underlying.write(kind, key, &encrypted)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete(kind, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
|
||||||
&self,
|
|
||||||
kind: Keyspace,
|
|
||||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
|
|
||||||
self.underlying.list_keys(kind)
|
|
||||||
}
|
|
||||||
fn flush(&self) -> anyhow::Result<()> {
|
|
||||||
self.underlying.flush()
|
|
||||||
}
|
|
||||||
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
|
||||||
self.underlying.check_lowlevel()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_storage_pipeline(
|
|
||||||
&self,
|
|
||||||
_settings: StoragePipelineSettings,
|
|
||||||
_controller_send: Sender<ControllerMessage>,
|
|
||||||
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
let mut underlying = self.underlying.describe_pipeline()?;
|
|
||||||
underlying.push(PipelineDescription::Encryption);
|
|
||||||
Ok(underlying)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
|
76
yama/src/pile_connector.rs
Normal file
76
yama/src/pile_connector.rs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
use eyre::{bail, Context};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
use yama_wormfile_fs::LocalWormFilesystem;
|
||||||
|
use yama_wormfile_sftp::SftpWormFilesystem;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, Hash)]
|
||||||
|
#[serde(tag = "scheme")]
|
||||||
|
pub enum PileConnectionScheme {
|
||||||
|
#[serde(rename = "local")]
|
||||||
|
Local { directory: PathBuf },
|
||||||
|
#[serde(rename = "sftp")]
|
||||||
|
Sftp {
|
||||||
|
user_at_host: String,
|
||||||
|
// TODO Should probably not serialise the password
|
||||||
|
password: String,
|
||||||
|
directory: String,
|
||||||
|
},
|
||||||
|
#[serde(rename = "s3")]
|
||||||
|
S3 {},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PileConnectionScheme {
|
||||||
|
pub async fn connect_to_wormfileprovider(&self) -> eyre::Result<BoxedWormFileProvider> {
|
||||||
|
match self {
|
||||||
|
PileConnectionScheme::Local { directory } => {
|
||||||
|
if directory.exists() {
|
||||||
|
if !directory.is_dir() {
|
||||||
|
bail!("Can't connect to local pile {directory:?}: not a directory.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tokio::fs::create_dir(directory)
|
||||||
|
.await
|
||||||
|
.context("Can't connect to local pile; can't create directory.")?;
|
||||||
|
}
|
||||||
|
Ok(BoxedWormFileProvider::new(LocalWormFilesystem::new(
|
||||||
|
directory,
|
||||||
|
)?))
|
||||||
|
}
|
||||||
|
PileConnectionScheme::Sftp {
|
||||||
|
user_at_host,
|
||||||
|
password,
|
||||||
|
directory,
|
||||||
|
} => {
|
||||||
|
if !password.is_empty() {
|
||||||
|
bail!("SFTP passwords not supported at the moment.");
|
||||||
|
}
|
||||||
|
Ok(BoxedWormFileProvider::new(
|
||||||
|
SftpWormFilesystem::new(user_at_host, directory)
|
||||||
|
.await
|
||||||
|
.context("Failed SFTP connection")?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
PileConnectionScheme::S3 { .. } => {
|
||||||
|
//S3WormFilesystem::new()
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct PileConnectionDetails {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub scheme: PileConnectionScheme,
|
||||||
|
|
||||||
|
pub keyring: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PileConnectionDetails {
|
||||||
|
pub async fn connect(self) -> eyre::Result<()> {
|
||||||
|
// TODO
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
56
yama/src/pile_with_cache.rs
Normal file
56
yama/src/pile_with_cache.rs
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
use yama_localcache::Store;
|
||||||
|
use yama_pile::pointers::Pointer;
|
||||||
|
use yama_pile::Pile;
|
||||||
|
use yama_wormfile::WormFileProvider;
|
||||||
|
|
||||||
|
use crate::scan::integrate_uid_or_gid_map;
|
||||||
|
use async_recursion::async_recursion;
|
||||||
|
use eyre::{Context, ContextCompat};
|
||||||
|
use yama_pile::tree::integrate_node_in_place;
|
||||||
|
|
||||||
|
pub struct PileWithCache<WFP: WormFileProvider> {
|
||||||
|
pub pile: Pile<WFP>,
|
||||||
|
pub localcache: Store,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<WFP: WormFileProvider + 'static> PileWithCache<WFP> {
|
||||||
|
pub async fn fully_integrate_pointer_in_place(
|
||||||
|
&self,
|
||||||
|
pointer: &mut Pointer,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
if let Some(parent_pointer_name) = pointer.parent.as_ref() {
|
||||||
|
let parent_pointer = self
|
||||||
|
.read_pointer_fully_integrated(parent_pointer_name)
|
||||||
|
.await
|
||||||
|
.with_context(|| {
|
||||||
|
format!("failed to read pointer {parent_pointer_name} whilst integrating")
|
||||||
|
})?
|
||||||
|
.with_context(|| {
|
||||||
|
format!("whilst integrating, expected pointer {parent_pointer_name} to exist")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node);
|
||||||
|
integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids);
|
||||||
|
integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids);
|
||||||
|
pointer.parent = None;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_recursion]
|
||||||
|
pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result<Option<Pointer>> {
|
||||||
|
match self.pile.read_pointer(name).await? {
|
||||||
|
Some(mut pointer) => {
|
||||||
|
self.fully_integrate_pointer_in_place(&mut pointer).await?;
|
||||||
|
Ok(Some(pointer))
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gracefully close this pile + local cache.
|
||||||
|
pub async fn close(self) -> eyre::Result<()> {
|
||||||
|
self.pile.close().await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
396
yama/src/retriever.rs
Normal file
396
yama/src/retriever.rs
Normal file
@ -0,0 +1,396 @@
|
|||||||
|
// TODO The retriever should possibly live somewhere else
|
||||||
|
|
||||||
|
use crate::pile_with_cache::PileWithCache;
|
||||||
|
use eyre::{bail, ensure, eyre, ContextCompat};
|
||||||
|
use flume::{Receiver, Sender};
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tracing::error;
|
||||||
|
use yama_pile::bloblogs::BloblogReader;
|
||||||
|
use yama_pile::definitions::BloblogId;
|
||||||
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
use yama_wormfile::WormFileReader;
|
||||||
|
|
||||||
|
pub mod decompressor;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct JobId(pub u32);
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct FileId(pub u32);
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
pub struct JobChunkReq {
|
||||||
|
pub file: FileId,
|
||||||
|
pub offset: u64,
|
||||||
|
pub length: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum RetrieverResp {
|
||||||
|
Blob {
|
||||||
|
job: JobId,
|
||||||
|
subjob: u32,
|
||||||
|
blob: Vec<u8>,
|
||||||
|
},
|
||||||
|
JobComplete(JobId),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||||
|
struct FileRegionMarker {
|
||||||
|
pub file: FileId,
|
||||||
|
pub offset: u64,
|
||||||
|
pub length: u64,
|
||||||
|
pub job: JobId,
|
||||||
|
pub subjob: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct OpenFileState {
|
||||||
|
pub req_tx: Sender<OpenFileReq>,
|
||||||
|
pub offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct OpenFileReq {
|
||||||
|
pub offset: u64,
|
||||||
|
pub length: u64,
|
||||||
|
pub job: JobId,
|
||||||
|
pub subjob: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ActiveJobState {
|
||||||
|
pub subjobs: Vec<JobChunkReq>,
|
||||||
|
pub next_subjob: u32,
|
||||||
|
pub inflight: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Retriever {
|
||||||
|
job_tx: Sender<(JobId, Vec<JobChunkReq>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RetrieverInternals {
|
||||||
|
pwc: Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
jobs_queue: BTreeMap<JobId, Vec<JobChunkReq>>,
|
||||||
|
file_regions: BTreeSet<FileRegionMarker>,
|
||||||
|
files: BTreeMap<FileId, BloblogId>,
|
||||||
|
open_files: BTreeMap<FileId, OpenFileState>,
|
||||||
|
results_tx: Sender<RetrieverResp>,
|
||||||
|
active_jobs: BTreeMap<JobId, ActiveJobState>,
|
||||||
|
ack_rx: Receiver<JobId>,
|
||||||
|
|
||||||
|
self_ack_tx: Sender<JobId>,
|
||||||
|
|
||||||
|
rec_active_jobs: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_fixed_retriever(
|
||||||
|
pwc: Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
jobs: BTreeMap<JobId, Vec<JobChunkReq>>,
|
||||||
|
files: BTreeMap<FileId, BloblogId>,
|
||||||
|
rec_active_jobs: u16,
|
||||||
|
) -> eyre::Result<Receiver<RetrieverResp>> {
|
||||||
|
let (results_tx, results_rx) = flume::bounded(4);
|
||||||
|
let (self_ack_tx, ack_rx) = flume::bounded(4);
|
||||||
|
let mut rint = RetrieverInternals {
|
||||||
|
pwc,
|
||||||
|
jobs_queue: Default::default(),
|
||||||
|
file_regions: Default::default(),
|
||||||
|
files,
|
||||||
|
open_files: Default::default(),
|
||||||
|
results_tx,
|
||||||
|
active_jobs: Default::default(),
|
||||||
|
ack_rx,
|
||||||
|
self_ack_tx,
|
||||||
|
rec_active_jobs,
|
||||||
|
};
|
||||||
|
for (job_id, job) in jobs {
|
||||||
|
rint.set_up_job(job_id, job);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = rint.retrieval_task().await {
|
||||||
|
error!("retriever failed: {e:?}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(results_rx)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RetrieverInternals {
|
||||||
|
fn set_up_job(&mut self, job_id: JobId, job: Vec<JobChunkReq>) {
|
||||||
|
for (subjob, chunk) in job.iter().enumerate() {
|
||||||
|
self.file_regions.insert(FileRegionMarker {
|
||||||
|
file: chunk.file,
|
||||||
|
offset: chunk.offset,
|
||||||
|
length: chunk.length,
|
||||||
|
job: job_id,
|
||||||
|
subjob: subjob as u32,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
self.jobs_queue.insert(job_id, job);
|
||||||
|
// eprintln!("new job {job_id:?}");
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn file_request(
|
||||||
|
open_file: &mut OpenFileState,
|
||||||
|
job: JobId,
|
||||||
|
subjob: u32,
|
||||||
|
offset: u64,
|
||||||
|
length: u64,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
open_file
|
||||||
|
.req_tx
|
||||||
|
.send_async(OpenFileReq {
|
||||||
|
offset,
|
||||||
|
length,
|
||||||
|
job,
|
||||||
|
subjob,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("open file shut down :/"))?;
|
||||||
|
open_file.offset = offset + length;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn open_file(&mut self, file_id: FileId) -> eyre::Result<()> {
|
||||||
|
assert!(!self.open_files.contains_key(&file_id));
|
||||||
|
|
||||||
|
let &bloblog_id = self.files.get(&file_id).context("no file by that ID")?;
|
||||||
|
let bloblog_reader = self.pwc.pile.read_bloblog(bloblog_id).await?;
|
||||||
|
|
||||||
|
let completion_tx = self.results_tx.clone();
|
||||||
|
let ack_tx = self.self_ack_tx.clone();
|
||||||
|
|
||||||
|
let (subjob_tx, subjob_rx) = flume::unbounded();
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) =
|
||||||
|
Self::reader_task(bloblog_reader, subjob_rx, ack_tx, completion_tx).await
|
||||||
|
{
|
||||||
|
error!("error in reader for {bloblog_id:?}: {e:?}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
self.open_files.insert(
|
||||||
|
file_id,
|
||||||
|
OpenFileState {
|
||||||
|
req_tx: subjob_tx,
|
||||||
|
offset: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reader_task(
|
||||||
|
mut bloblog_reader: BloblogReader<Pin<Box<dyn WormFileReader>>>,
|
||||||
|
subjob_rx: Receiver<OpenFileReq>,
|
||||||
|
ack_tx: Sender<JobId>,
|
||||||
|
completion_tx: Sender<RetrieverResp>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
while let Ok(next_job) = subjob_rx.recv_async().await {
|
||||||
|
let mut blob = Vec::with_capacity(next_job.length as usize);
|
||||||
|
bloblog_reader
|
||||||
|
.read_to_buf(&mut blob, next_job.offset, next_job.length)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
completion_tx
|
||||||
|
.send_async(RetrieverResp::Blob {
|
||||||
|
job: next_job.job,
|
||||||
|
subjob: next_job.subjob,
|
||||||
|
blob,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("completions shut");
|
||||||
|
// eprintln!("completion of{next_job:?}");
|
||||||
|
ack_tx.send_async(next_job.job).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn retrieval_task(&mut self) -> eyre::Result<()> {
|
||||||
|
loop {
|
||||||
|
// 0. Try to progress open jobs if they are staring right at the bytes they need...
|
||||||
|
let mut to_remove = Vec::new();
|
||||||
|
for (active_job_id, active_job) in &mut self.active_jobs {
|
||||||
|
if active_job.inflight > 0 {
|
||||||
|
// skip if it's busy, we don't want to send blobs out of order...
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if active_job.next_subjob as usize >= active_job.subjobs.len() {
|
||||||
|
// this job is to be finished!
|
||||||
|
to_remove.push(*active_job_id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
'single_job_staring: loop {
|
||||||
|
let desired_blob = &active_job.subjobs[active_job.next_subjob as usize];
|
||||||
|
if let Some(open_file) = self.open_files.get_mut(&desired_blob.file) {
|
||||||
|
if open_file.offset == desired_blob.offset {
|
||||||
|
Self::file_request(
|
||||||
|
open_file,
|
||||||
|
*active_job_id,
|
||||||
|
active_job.next_subjob,
|
||||||
|
desired_blob.offset,
|
||||||
|
desired_blob.length,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
ensure!(
|
||||||
|
self.file_regions.remove(&FileRegionMarker {
|
||||||
|
file: desired_blob.file,
|
||||||
|
offset: desired_blob.offset,
|
||||||
|
length: desired_blob.length,
|
||||||
|
job: *active_job_id,
|
||||||
|
subjob: active_job.next_subjob,
|
||||||
|
}),
|
||||||
|
"no FRM to remove (0)"
|
||||||
|
);
|
||||||
|
active_job.next_subjob += 1;
|
||||||
|
active_job.inflight += 1;
|
||||||
|
|
||||||
|
if active_job.next_subjob as usize >= active_job.subjobs.len() {
|
||||||
|
// this job is to be finished!
|
||||||
|
break 'single_job_staring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break 'single_job_staring;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for remove in to_remove {
|
||||||
|
self.active_jobs.remove(&remove);
|
||||||
|
// eprintln!("job complete {remove:?}");
|
||||||
|
self.results_tx
|
||||||
|
.send_async(RetrieverResp::JobComplete(remove))
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("results_tx shutdown"))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Try to make the most of open files by opening new jobs in convenient locations.
|
||||||
|
// Basically: if we have slots for new active jobs, then look to see if we have any
|
||||||
|
// jobs that begin at the offset in question...
|
||||||
|
if self.active_jobs.len() < self.rec_active_jobs as usize {
|
||||||
|
let mut allowed = self.rec_active_jobs as usize - self.active_jobs.len();
|
||||||
|
let mut progress = false;
|
||||||
|
for (open_file_id, open_file_state) in &self.open_files {
|
||||||
|
for region in self.file_regions.range(
|
||||||
|
FileRegionMarker {
|
||||||
|
file: *open_file_id,
|
||||||
|
offset: open_file_state.offset,
|
||||||
|
length: 0,
|
||||||
|
job: JobId(0),
|
||||||
|
subjob: 0,
|
||||||
|
}..FileRegionMarker {
|
||||||
|
file: *open_file_id,
|
||||||
|
offset: open_file_state.offset + 1,
|
||||||
|
length: 0,
|
||||||
|
job: JobId(0),
|
||||||
|
subjob: 0,
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
if region.subjob != 0 {
|
||||||
|
// only accept this region if it's the start of a job
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(subjobs) = self.jobs_queue.remove(®ion.job) {
|
||||||
|
self.active_jobs.insert(
|
||||||
|
region.job,
|
||||||
|
ActiveJobState {
|
||||||
|
subjobs,
|
||||||
|
next_subjob: 0,
|
||||||
|
inflight: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
allowed -= 1;
|
||||||
|
progress = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allowed == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if progress {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Try to progress active jobs, even if we have to open new files or seek.
|
||||||
|
let mut files_to_open = BTreeSet::new();
|
||||||
|
for (active_job_id, active_job) in &mut self.active_jobs {
|
||||||
|
if active_job.inflight > 0 {
|
||||||
|
// skip if it's busy, we don't want to send blobs out of order...
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let desired_blob = &active_job.subjobs[active_job.next_subjob as usize];
|
||||||
|
if let Some(open_file) = self.open_files.get_mut(&desired_blob.file) {
|
||||||
|
Self::file_request(
|
||||||
|
open_file,
|
||||||
|
*active_job_id,
|
||||||
|
active_job.next_subjob,
|
||||||
|
desired_blob.offset,
|
||||||
|
desired_blob.length,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
ensure!(
|
||||||
|
self.file_regions.remove(&FileRegionMarker {
|
||||||
|
file: desired_blob.file,
|
||||||
|
offset: desired_blob.offset,
|
||||||
|
length: desired_blob.length,
|
||||||
|
job: *active_job_id,
|
||||||
|
subjob: active_job.next_subjob,
|
||||||
|
}),
|
||||||
|
"no FRM to remove (0)"
|
||||||
|
);
|
||||||
|
active_job.next_subjob += 1;
|
||||||
|
active_job.inflight += 1;
|
||||||
|
} else {
|
||||||
|
// can't open immediately here due to mut borrow.
|
||||||
|
files_to_open.insert(desired_blob.file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !files_to_open.is_empty() {
|
||||||
|
for file in files_to_open {
|
||||||
|
self.open_file(file).await?;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Start new jobs
|
||||||
|
if self.active_jobs.len() < self.rec_active_jobs as usize {
|
||||||
|
// spawn a new job...
|
||||||
|
if let Some(activate_job_id) = self.jobs_queue.keys().next().cloned() {
|
||||||
|
let new_job = self.jobs_queue.remove(&activate_job_id).unwrap();
|
||||||
|
self.active_jobs.insert(
|
||||||
|
activate_job_id,
|
||||||
|
ActiveJobState {
|
||||||
|
subjobs: new_job,
|
||||||
|
next_subjob: 0,
|
||||||
|
inflight: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Block for acks, unless there are no jobs in which case we should just finish!
|
||||||
|
if self.active_jobs.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if let Ok(ack) = self.ack_rx.recv_async().await {
|
||||||
|
if let Some(job) = self.active_jobs.get_mut(&ack) {
|
||||||
|
ensure!(job.inflight > 0, "recv'd ack for job that has 0 inflight");
|
||||||
|
job.inflight -= 1;
|
||||||
|
} else {
|
||||||
|
bail!("recv'd ack for bad job {ack:?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
211
yama/src/retriever/decompressor.rs
Normal file
211
yama/src/retriever/decompressor.rs
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
use crate::retriever::{JobId, RetrieverResp};
|
||||||
|
use eyre::{bail, ensure, eyre, Context, ContextCompat};
|
||||||
|
use flume::{Receiver, Sender};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tracing::error;
|
||||||
|
use zstd::bulk::Decompressor;
|
||||||
|
|
||||||
|
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
||||||
|
|
||||||
|
pub struct PipelineDecompressor {
|
||||||
|
rx: Receiver<RetrieverResp>,
|
||||||
|
tx: Sender<RetrieverResp>,
|
||||||
|
job_pool_tx: Sender<(JobId, u32, Vec<u8>)>,
|
||||||
|
complete_rx: Receiver<(JobId, u32, Vec<u8>)>,
|
||||||
|
|
||||||
|
processing: BTreeMap<JobId, JobState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct JobState {
|
||||||
|
pub next_submit_subjob: u32,
|
||||||
|
pub next_enqueue_subjob: u32,
|
||||||
|
pub queued: BTreeMap<u32, Vec<u8>>,
|
||||||
|
pub complete: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PipelineDecompressor {
|
||||||
|
pub fn start(
|
||||||
|
decom_dict: Option<Arc<Vec<u8>>>,
|
||||||
|
num_decom: u8,
|
||||||
|
rx: Receiver<RetrieverResp>,
|
||||||
|
) -> eyre::Result<Receiver<RetrieverResp>> {
|
||||||
|
let (out_tx, out_rx) = flume::bounded(4);
|
||||||
|
|
||||||
|
let (job_pool_tx, job_pool_rx) = flume::bounded(0);
|
||||||
|
let (complete_tx, complete_rx) = flume::unbounded();
|
||||||
|
|
||||||
|
for num in 0..num_decom {
|
||||||
|
let decom_dict = decom_dict.clone();
|
||||||
|
let job_pool_rx = job_pool_rx.clone();
|
||||||
|
let complete_tx = complete_tx.clone();
|
||||||
|
std::thread::Builder::new()
|
||||||
|
.name(format!("decomp {num}"))
|
||||||
|
.spawn(move || {
|
||||||
|
if let Err(err) =
|
||||||
|
Self::decompressor_worker(decom_dict, job_pool_rx, complete_tx)
|
||||||
|
{
|
||||||
|
error!("error in decompressor worker: {err:?}");
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut pd = PipelineDecompressor {
|
||||||
|
rx,
|
||||||
|
tx: out_tx,
|
||||||
|
job_pool_tx,
|
||||||
|
complete_rx,
|
||||||
|
processing: Default::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = pd.decompressor_manager().await {
|
||||||
|
eprintln!("pipeline decompressor error: {e:?}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(out_rx)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decompressor_worker(
|
||||||
|
decom_dict: Option<Arc<Vec<u8>>>,
|
||||||
|
job_pool_rx: Receiver<(JobId, u32, Vec<u8>)>,
|
||||||
|
complete_tx: Sender<(JobId, u32, Vec<u8>)>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let mut decompressor = match decom_dict {
|
||||||
|
Some(dict) => Decompressor::with_dictionary(&dict)?,
|
||||||
|
None => Decompressor::new()?,
|
||||||
|
};
|
||||||
|
while let Ok((job_id, subjob, compressed_bytes)) = job_pool_rx.recv() {
|
||||||
|
let decompressed_bytes = decompressor
|
||||||
|
.decompress(&compressed_bytes, DECOMPRESS_CAPACITY)
|
||||||
|
.context("failed to decompress")?;
|
||||||
|
complete_tx
|
||||||
|
.send((job_id, subjob, decompressed_bytes))
|
||||||
|
.map_err(|_| eyre!("complete_tx shutdown"))?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn decompressor_manager(&mut self) -> eyre::Result<()> {
|
||||||
|
let mut incoming_open = true;
|
||||||
|
loop {
|
||||||
|
// Always process completed jobs as top priority
|
||||||
|
while let Ok(completion) = self.complete_rx.try_recv() {
|
||||||
|
self.handle_completion(completion).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then it doesn't matter so much what we process after that
|
||||||
|
tokio::select! {
|
||||||
|
Ok(completion) = self.complete_rx.recv_async(), if !self.processing.is_empty() => {
|
||||||
|
self.handle_completion(completion).await?;
|
||||||
|
},
|
||||||
|
incoming_res = self.rx.recv_async(), if incoming_open => {
|
||||||
|
if let Ok(incoming) = incoming_res {
|
||||||
|
self.handle_incoming(incoming).await?;
|
||||||
|
} else {
|
||||||
|
incoming_open = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else => {
|
||||||
|
if !self.processing.is_empty() {
|
||||||
|
bail!("decompressor still procesing but shutting down?");
|
||||||
|
}
|
||||||
|
// eprintln!("D shutdown");
|
||||||
|
break Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_completion(
|
||||||
|
&mut self,
|
||||||
|
(job_id, subjob, decompressed): (JobId, u32, Vec<u8>),
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let state = self
|
||||||
|
.processing
|
||||||
|
.get_mut(&job_id)
|
||||||
|
.context("bad job when recv complete decomp")?;
|
||||||
|
ensure!(
|
||||||
|
state.queued.insert(subjob, decompressed).is_none(),
|
||||||
|
"overwrote decompressed block??"
|
||||||
|
);
|
||||||
|
while let Some(send_off) = state.queued.remove(&state.next_submit_subjob) {
|
||||||
|
// eprintln!("D send off {job_id:?} {subjob}");
|
||||||
|
self.tx
|
||||||
|
.send(RetrieverResp::Blob {
|
||||||
|
job: job_id,
|
||||||
|
subjob: state.next_submit_subjob,
|
||||||
|
blob: send_off,
|
||||||
|
})
|
||||||
|
.map_err(|_| eyre!("tx shutdown"))?;
|
||||||
|
state.next_submit_subjob += 1;
|
||||||
|
}
|
||||||
|
if state.queued.is_empty()
|
||||||
|
&& state.complete
|
||||||
|
&& state.next_submit_subjob == state.next_enqueue_subjob
|
||||||
|
{
|
||||||
|
// This job is done now
|
||||||
|
// eprintln!("D jc {job_id:?}");
|
||||||
|
self.tx
|
||||||
|
.send(RetrieverResp::JobComplete(job_id))
|
||||||
|
.map_err(|_| eyre!("tx shutdown"))?;
|
||||||
|
self.processing.remove(&job_id);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_incoming(&mut self, incoming: RetrieverResp) -> eyre::Result<()> {
|
||||||
|
match incoming {
|
||||||
|
RetrieverResp::Blob { job, subjob, blob } => {
|
||||||
|
if subjob == 0 {
|
||||||
|
ensure!(
|
||||||
|
self.processing
|
||||||
|
.insert(
|
||||||
|
job,
|
||||||
|
JobState {
|
||||||
|
next_submit_subjob: 0,
|
||||||
|
next_enqueue_subjob: 0,
|
||||||
|
queued: Default::default(),
|
||||||
|
complete: false,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.is_none(),
|
||||||
|
"job was overwritten"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = self
|
||||||
|
.processing
|
||||||
|
.get_mut(&job)
|
||||||
|
.context("bad job/not starting at 0 for job")?;
|
||||||
|
ensure!(
|
||||||
|
state.next_enqueue_subjob == subjob,
|
||||||
|
"out of order Blob commands"
|
||||||
|
);
|
||||||
|
state.next_enqueue_subjob += 1;
|
||||||
|
self.job_pool_tx
|
||||||
|
.send_async((job, subjob, blob))
|
||||||
|
.await
|
||||||
|
.map_err(|_| eyre!("job_pool_tx shutdown"))?;
|
||||||
|
}
|
||||||
|
RetrieverResp::JobComplete(job) => {
|
||||||
|
let state = self
|
||||||
|
.processing
|
||||||
|
.get_mut(&job)
|
||||||
|
.context("bad job to complete")?;
|
||||||
|
state.complete = true;
|
||||||
|
|
||||||
|
let can_remove = state.next_submit_subjob == state.next_enqueue_subjob;
|
||||||
|
|
||||||
|
if can_remove {
|
||||||
|
self.tx
|
||||||
|
.send(RetrieverResp::JobComplete(job))
|
||||||
|
.map_err(|_| eyre!("tx shutdown"))?;
|
||||||
|
self.processing.remove(&job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
263
yama/src/scan.rs
Normal file
263
yama/src/scan.rs
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
use eyre::{bail, eyre, Context};
|
||||||
|
use ignore::WalkBuilder;
|
||||||
|
use patricia_tree::PatriciaMap;
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::fs::{read_link, Metadata};
|
||||||
|
use std::io::ErrorKind;
|
||||||
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
use std::path::{Component, Path};
|
||||||
|
use tracing::warn;
|
||||||
|
use yama_pile::definitions::RecursiveChunkRef;
|
||||||
|
use yama_pile::tree::unpopulated::ScanEntry;
|
||||||
|
use yama_pile::tree::{mtime_msec, FilesystemOwnership, FilesystemPermissions, TreeNode};
|
||||||
|
|
||||||
|
/// Given a node, recursively constructs a UID and GID lookup table based on THIS system's
|
||||||
|
/// users and groups.
|
||||||
|
///
|
||||||
|
/// Returns UIDs then GIDs.
|
||||||
|
pub fn create_uidgid_lookup_tables(
|
||||||
|
node: &TreeNode,
|
||||||
|
) -> eyre::Result<(BTreeMap<u16, String>, BTreeMap<u16, String>)> {
|
||||||
|
let mut uids = BTreeMap::<u16, String>::new();
|
||||||
|
let mut gids = BTreeMap::<u16, String>::new();
|
||||||
|
let mut used_uids = BTreeSet::new();
|
||||||
|
let mut used_gids = BTreeSet::new();
|
||||||
|
|
||||||
|
find_used_uidsgids(&node, &mut used_uids, &mut used_gids);
|
||||||
|
|
||||||
|
for uid in used_uids {
|
||||||
|
if let Some(user) = users::get_user_by_uid(uid.into()) {
|
||||||
|
uids.insert(
|
||||||
|
uid,
|
||||||
|
user.name()
|
||||||
|
.to_str()
|
||||||
|
.ok_or(eyre!("uid leads to non-String name"))?
|
||||||
|
.to_owned(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for gid in used_gids {
|
||||||
|
if let Some(group) = users::get_group_by_gid(gid.into()) {
|
||||||
|
gids.insert(
|
||||||
|
gid,
|
||||||
|
group
|
||||||
|
.name()
|
||||||
|
.to_str()
|
||||||
|
.ok_or(eyre!("gid leads to non-String name"))?
|
||||||
|
.to_owned(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((uids, gids))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_used_uidsgids(node: &TreeNode, uids: &mut BTreeSet<u16>, gids: &mut BTreeSet<u16>) {
|
||||||
|
match &node {
|
||||||
|
TreeNode::NormalFile { ownership, .. }
|
||||||
|
| TreeNode::Directory { ownership, .. }
|
||||||
|
| TreeNode::SymbolicLink { ownership, .. } => {
|
||||||
|
uids.insert(ownership.uid);
|
||||||
|
gids.insert(ownership.gid);
|
||||||
|
}
|
||||||
|
TreeNode::Deleted => { /* nop */ }
|
||||||
|
};
|
||||||
|
|
||||||
|
if let TreeNode::Directory { children, .. } = &node {
|
||||||
|
for (_name, child) in children {
|
||||||
|
find_used_uidsgids(child, uids, gids);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the relative path.
|
||||||
|
///
|
||||||
|
/// Returns empty string if the paths are the same, otherwise it's a /-separated string.
|
||||||
|
/// The returned string is not allowed to contain any . or .. components.
|
||||||
|
pub fn relative_path(base: &Path, leaf: &Path) -> Option<String> {
|
||||||
|
assert_eq!(std::path::MAIN_SEPARATOR, '/');
|
||||||
|
|
||||||
|
let relative = leaf.strip_prefix(base).ok()?;
|
||||||
|
|
||||||
|
if relative
|
||||||
|
.components()
|
||||||
|
.any(|c| c == Component::CurDir || c == Component::ParentDir || c == Component::RootDir)
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
relative.to_str().map(|s| s.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scans a directory tree.
|
||||||
|
///
|
||||||
|
/// Aborts if any errors (permission, bad .yamaignore files, etc) are encountered.
|
||||||
|
/// In the future, we possibly want to consider allowing
|
||||||
|
pub fn scan(root: &Path, ignores: &Vec<String>) -> eyre::Result<PatriciaMap<ScanEntry>> {
|
||||||
|
let mut walker = WalkBuilder::new(root);
|
||||||
|
walker
|
||||||
|
.standard_filters(false)
|
||||||
|
.add_custom_ignore_filename(".yamaignore")
|
||||||
|
.parents(false)
|
||||||
|
.follow_links(false)
|
||||||
|
.same_file_system(true);
|
||||||
|
|
||||||
|
for ign in ignores {
|
||||||
|
walker.add_ignore(ign);
|
||||||
|
}
|
||||||
|
let walker = walker.build();
|
||||||
|
|
||||||
|
let mut entries: PatriciaMap<ScanEntry> = PatriciaMap::new();
|
||||||
|
|
||||||
|
for entry in walker {
|
||||||
|
let entry = entry?;
|
||||||
|
|
||||||
|
if !entry.path().starts_with(root) {
|
||||||
|
bail!(
|
||||||
|
"Scanned entry {:?} does not start with search path {:?}",
|
||||||
|
entry.path(),
|
||||||
|
root
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let rel_path = if let Some(rel_path) = relative_path(root, entry.path()) {
|
||||||
|
rel_path
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if !rel_path.is_empty() {
|
||||||
|
let parent_relpath = rel_path
|
||||||
|
.rsplit_once('/')
|
||||||
|
.map(|(parent, _child)| parent)
|
||||||
|
.unwrap_or("");
|
||||||
|
assert!(
|
||||||
|
entries.contains_key(parent_relpath),
|
||||||
|
"have not scanned parent for {}",
|
||||||
|
rel_path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(single_scan) = scan_one_no_recurse(
|
||||||
|
entry.path(),
|
||||||
|
entry
|
||||||
|
.metadata()
|
||||||
|
.with_context(|| format!("Failed to read metadata for {:?}", rel_path))?,
|
||||||
|
)
|
||||||
|
.with_context(|| format!("Failed to scan {:?}", rel_path))?
|
||||||
|
{
|
||||||
|
entries.insert(rel_path, single_scan);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scan_one_no_recurse(path: &Path, metadata: Metadata) -> eyre::Result<Option<ScanEntry>> {
|
||||||
|
let filetype = metadata.file_type();
|
||||||
|
|
||||||
|
let ownership = FilesystemOwnership {
|
||||||
|
uid: metadata.uid() as u16,
|
||||||
|
gid: metadata.gid() as u16,
|
||||||
|
};
|
||||||
|
|
||||||
|
let permissions = FilesystemPermissions {
|
||||||
|
mode: metadata.mode(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if filetype.is_file() {
|
||||||
|
// Leave an unpopulated file node. It's not my responsibility to chunk it right now.
|
||||||
|
Ok(Some(ScanEntry::NormalFile {
|
||||||
|
mtime: mtime_msec(&metadata),
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size: metadata.size(),
|
||||||
|
}))
|
||||||
|
} else if filetype.is_dir() {
|
||||||
|
let dir_read = path.read_dir();
|
||||||
|
|
||||||
|
if let Err(e) = &dir_read {
|
||||||
|
match e.kind() {
|
||||||
|
ErrorKind::NotFound => {
|
||||||
|
warn!("vanished/: {:?}", path);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
ErrorKind::PermissionDenied => {
|
||||||
|
warn!("permission denied/: {:?}", path);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
_ => { /* nop */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(ScanEntry::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
}))
|
||||||
|
} else if filetype.is_symlink() {
|
||||||
|
let target = read_link(path)?
|
||||||
|
.to_str()
|
||||||
|
.ok_or(eyre!("target path cannot be to_str()d"))?
|
||||||
|
.to_owned();
|
||||||
|
|
||||||
|
Ok(Some(ScanEntry::SymbolicLink { ownership, target }))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given the parent pointer's root TreeNode and a scan entry map of the current pointer,
|
||||||
|
/// return a chunkings map prepopulated with the reusable entries.
|
||||||
|
/// Also returns a pruned copy of the scan entry map.
|
||||||
|
pub fn prepopulate_unmodified(
|
||||||
|
parent_tree: &TreeNode,
|
||||||
|
scan_entry_map: &PatriciaMap<ScanEntry>,
|
||||||
|
) -> (
|
||||||
|
PatriciaMap<(RecursiveChunkRef, u64)>,
|
||||||
|
PatriciaMap<ScanEntry>,
|
||||||
|
) {
|
||||||
|
let mut reusable_chunkings = PatriciaMap::new();
|
||||||
|
let mut pruned_scan_entry_map = scan_entry_map.clone();
|
||||||
|
parent_tree
|
||||||
|
.visit(
|
||||||
|
&mut |tree_node, path| {
|
||||||
|
if let TreeNode::NormalFile {
|
||||||
|
mtime: prev_mtime,
|
||||||
|
ownership: prev_ownership,
|
||||||
|
permissions: prev_permissions,
|
||||||
|
size: prev_size,
|
||||||
|
content: prev_content,
|
||||||
|
} = tree_node
|
||||||
|
{
|
||||||
|
if let Some(ScanEntry::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size,
|
||||||
|
}) = scan_entry_map.get(path)
|
||||||
|
{
|
||||||
|
if mtime == prev_mtime
|
||||||
|
&& size == prev_size
|
||||||
|
&& ownership == prev_ownership
|
||||||
|
&& prev_permissions == permissions
|
||||||
|
{
|
||||||
|
// Nothing seems to have changed about this file, let's just reuse the `content` from last time.
|
||||||
|
reusable_chunkings.insert(path, (*prev_content, *size));
|
||||||
|
pruned_scan_entry_map.remove(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
String::new(),
|
||||||
|
)
|
||||||
|
.expect("no reason to fail");
|
||||||
|
(reusable_chunkings, pruned_scan_entry_map)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn integrate_uid_or_gid_map(new: &mut BTreeMap<u16, String>, old: &BTreeMap<u16, String>) {
|
||||||
|
for (old_uid, old_user) in old {
|
||||||
|
new.entry(*old_uid).or_insert_with(|| old_user.clone());
|
||||||
|
}
|
||||||
|
}
|
391
yama/src/storing.rs
Normal file
391
yama/src/storing.rs
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
use crate::pile_with_cache::PileWithCache;
|
||||||
|
use dashmap::DashSet;
|
||||||
|
use eyre::{bail, Context};
|
||||||
|
use fastcdc::v2020::FastCDC;
|
||||||
|
use flume::{Receiver, RecvError, SendError, Sender};
|
||||||
|
use std::cmp::Reverse;
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::fs::File;
|
||||||
|
use tokio::runtime::Handle;
|
||||||
|
use tokio::task;
|
||||||
|
use tokio::task::JoinSet;
|
||||||
|
use tracing::{debug, error, info_span, Instrument};
|
||||||
|
use yama_localcache::StoreConnection;
|
||||||
|
use yama_midlevel_crypto::chunk_id::{ChunkId, ChunkIdKey};
|
||||||
|
use yama_pile::bloblogs::BloblogWriter;
|
||||||
|
use yama_pile::definitions::{BlobLocator, BloblogId, Index, IndexBloblogEntry, RecursiveChunkRef};
|
||||||
|
use yama_wormfile::boxed::BoxedWormFileProvider;
|
||||||
|
use yama_wormfile::WormFileWriter;
|
||||||
|
use zstd::bulk::Compressor;
|
||||||
|
|
||||||
|
pub const DESIRED_INDEX_SIZE_ENTRIES: usize = 32768;
|
||||||
|
|
||||||
|
// 256 kiB
|
||||||
|
pub const FASTCDC_MIN: u32 = 256 * 1024;
|
||||||
|
// 1 MiB
|
||||||
|
pub const FASTCDC_AVG: u32 = 1024 * 1024;
|
||||||
|
// 8 MiB
|
||||||
|
pub const FASTCDC_MAX: u32 = 8 * 1024 * 1024;
|
||||||
|
|
||||||
|
pub struct StoringState {
|
||||||
|
/// A connection to the local cache for checking whether
|
||||||
|
pub cache_conn: StoreConnection<false>,
|
||||||
|
/// Set of unflushed chunks, not present in any index, which we can assume have been created in this session.
|
||||||
|
pub new_unflushed_chunks: Arc<DashSet<ChunkId>>,
|
||||||
|
/// New bloblogs that we have created but not yet written out indices for.
|
||||||
|
pub new_bloblogs: Vec<(BloblogId, BTreeMap<ChunkId, BlobLocator>)>,
|
||||||
|
|
||||||
|
pub pwc: Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
|
||||||
|
pub chunk_id_key: ChunkIdKey,
|
||||||
|
|
||||||
|
pub compressor: zstd::bulk::Compressor<'static>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct StoringIntermediate {
|
||||||
|
/// New bloblogs that we have created but not yet written out indices for.
|
||||||
|
pub new_bloblogs: Vec<(BloblogId, BTreeMap<ChunkId, BlobLocator>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<StoringState> for StoringIntermediate {
|
||||||
|
fn from(ss: StoringState) -> Self {
|
||||||
|
StoringIntermediate {
|
||||||
|
new_bloblogs: ss.new_bloblogs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct StoringBloblogWriters {
|
||||||
|
/// Bloblog writer for actual file contents (we try to keep file contents sequential in the
|
||||||
|
/// common case)
|
||||||
|
pub file_contents: Option<BloblogWriter<Pin<Box<dyn WormFileWriter>>>>,
|
||||||
|
/// Bloblog writer for chunks of chunks
|
||||||
|
pub metachunks: Option<BloblogWriter<Pin<Box<dyn WormFileWriter>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoringBloblogWriters {
|
||||||
|
async fn finish_bloblogs(&mut self, ss: &mut StoringState) -> eyre::Result<()> {
|
||||||
|
if let Some(writer_to_finish) = self.file_contents.take() {
|
||||||
|
let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?;
|
||||||
|
ss.new_bloblogs.push((bloblog_id, chunkmap));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(writer_to_finish) = self.metachunks.take() {
|
||||||
|
let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?;
|
||||||
|
ss.new_bloblogs.push((bloblog_id, chunkmap));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoringState {
|
||||||
|
/// Acquire a bloblog writer handle, reusing the existing one in the slot if suitable.
|
||||||
|
async fn obtain_bloblog_writer<'a>(
|
||||||
|
&mut self,
|
||||||
|
slot: &'a mut Option<BloblogWriter<Pin<Box<dyn WormFileWriter>>>>,
|
||||||
|
) -> eyre::Result<&'a mut BloblogWriter<Pin<Box<dyn WormFileWriter>>>> {
|
||||||
|
// if let Some(ref mut writer) = slot {
|
||||||
|
// if !writer.should_finish() {
|
||||||
|
// return Ok(writer);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// awkward avoidance of strange borrow issues that I don't fully grok
|
||||||
|
if slot.as_ref().map(|w| w.should_finish()) == Some(false) {
|
||||||
|
return Ok(slot.as_mut().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(writer_to_finish) = slot.take() {
|
||||||
|
let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?;
|
||||||
|
self.new_bloblogs.push((bloblog_id, chunkmap));
|
||||||
|
}
|
||||||
|
|
||||||
|
*slot = Some(self.pwc.pile.create_bloblog().await?);
|
||||||
|
Ok(slot.as_mut().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_full_slice_returning_chunks(
|
||||||
|
&mut self,
|
||||||
|
store_slice: &[u8],
|
||||||
|
slot: &mut Option<BloblogWriter<Pin<Box<dyn WormFileWriter>>>>,
|
||||||
|
) -> eyre::Result<Vec<ChunkId>> {
|
||||||
|
task::block_in_place(|| {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for chunk in FastCDC::new(store_slice, FASTCDC_MIN, FASTCDC_AVG, FASTCDC_MAX) {
|
||||||
|
let chunk_bytes = &store_slice[chunk.offset..chunk.offset + chunk.length];
|
||||||
|
let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key);
|
||||||
|
result.push(chunk_id);
|
||||||
|
let is_new = Handle::current().block_on(async {
|
||||||
|
Ok::<bool, eyre::Report>(
|
||||||
|
self.cache_conn.is_chunk_new(chunk_id).await?
|
||||||
|
&& self.new_unflushed_chunks.insert(chunk_id),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if is_new {
|
||||||
|
let compressed_bytes = self.compressor.compress(&chunk_bytes)?;
|
||||||
|
|
||||||
|
Handle::current().block_on(async {
|
||||||
|
let writer = self.obtain_bloblog_writer(slot).await?;
|
||||||
|
writer.write_chunk(chunk_id, &compressed_bytes).await?;
|
||||||
|
Ok::<(), eyre::Report>(())
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn store_full_slice(
|
||||||
|
&mut self,
|
||||||
|
store_slice: &[u8],
|
||||||
|
sbw: &mut StoringBloblogWriters,
|
||||||
|
) -> eyre::Result<RecursiveChunkRef> {
|
||||||
|
// First calculate all the chunk IDs needed to be written here.
|
||||||
|
let mut chunk_ids =
|
||||||
|
self.store_full_slice_returning_chunks(store_slice, &mut sbw.file_contents)?;
|
||||||
|
let mut depth = 0;
|
||||||
|
|
||||||
|
// If we have the wrong number of chunks, we should chunk the chunk list...
|
||||||
|
while chunk_ids.len() != 1 {
|
||||||
|
let mut metachunks_list_bytes: Vec<u8> = Vec::with_capacity(chunk_ids.len() * 32);
|
||||||
|
for chunk_id in chunk_ids {
|
||||||
|
metachunks_list_bytes.extend_from_slice(&chunk_id.to_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO It might be nice to store these in opposite order, so a read is a true sequential
|
||||||
|
// scan.
|
||||||
|
// i.e. (depth=3) (depth=2) (depth=1) (depth=0) ...
|
||||||
|
chunk_ids = self
|
||||||
|
.store_full_slice_returning_chunks(&metachunks_list_bytes, &mut sbw.metachunks)?;
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(RecursiveChunkRef {
|
||||||
|
chunk_id: chunk_ids[0],
|
||||||
|
depth,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn store_file(
|
||||||
|
file_path: &Path,
|
||||||
|
storing_state: &mut StoringState,
|
||||||
|
sbw: &mut StoringBloblogWriters,
|
||||||
|
) -> eyre::Result<(RecursiveChunkRef, u64)> {
|
||||||
|
let file = File::open(file_path).await?.into_std().await;
|
||||||
|
let mapped = unsafe { memmap2::Mmap::map(&file) }?;
|
||||||
|
let size_of_file = mapped.as_ref().len();
|
||||||
|
let chunkref = storing_state.store_full_slice(mapped.as_ref(), sbw)?;
|
||||||
|
Ok((chunkref, size_of_file as u64))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct StoragePipeline {
|
||||||
|
result_rx: Receiver<(String, RecursiveChunkRef, u64)>,
|
||||||
|
join_set: JoinSet<eyre::Result<StoringIntermediate>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn storage_pipeline_worker(
|
||||||
|
job_rx: Receiver<(String, PathBuf)>,
|
||||||
|
result_tx: Sender<(String, RecursiveChunkRef, u64)>,
|
||||||
|
mut storing_state: StoringState,
|
||||||
|
) -> eyre::Result<StoringIntermediate> {
|
||||||
|
let mut bloblog_writers = StoringBloblogWriters::default();
|
||||||
|
|
||||||
|
debug!("SPW startup");
|
||||||
|
|
||||||
|
while let Ok((job_id, file_path)) = job_rx.recv_async().await {
|
||||||
|
let span = info_span!("store_file", file=?file_path);
|
||||||
|
let span_enter = span.enter();
|
||||||
|
// debug!("SPW job {job_id:?}");
|
||||||
|
let (rec_chunk_ref, file_length) =
|
||||||
|
store_file(&file_path, &mut storing_state, &mut bloblog_writers)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to store {file_path:?}"))?;
|
||||||
|
// debug!("SPW good {job_id:?}");
|
||||||
|
if let Err(SendError(to_be_sent)) = result_tx
|
||||||
|
.send_async((job_id, rec_chunk_ref, file_length))
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
bail!("Can't return result for {to_be_sent:?} — result_tx shut down.");
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(span_enter);
|
||||||
|
drop(span);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("SPW shutdown");
|
||||||
|
|
||||||
|
bloblog_writers.finish_bloblogs(&mut storing_state).await?;
|
||||||
|
|
||||||
|
Ok(StoringIntermediate::from(storing_state))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_zstd_level() -> i32 {
|
||||||
|
// TODO Read from env?
|
||||||
|
return 12;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoragePipeline {
|
||||||
|
pub async fn launch_new(
|
||||||
|
workers: u32,
|
||||||
|
pwc: Arc<PileWithCache<BoxedWormFileProvider>>,
|
||||||
|
) -> eyre::Result<(StoragePipeline, Sender<(String, PathBuf)>)> {
|
||||||
|
let (job_tx, job_rx) = flume::bounded(16);
|
||||||
|
let (result_tx, result_rx) = flume::bounded(4);
|
||||||
|
|
||||||
|
let mut join_set = JoinSet::new();
|
||||||
|
for spw_num in 0..workers {
|
||||||
|
let job_rx = job_rx.clone();
|
||||||
|
let result_tx = result_tx.clone();
|
||||||
|
let pwc = pwc.clone();
|
||||||
|
|
||||||
|
let compressor = match pwc.pile.pile_config.zstd_dict.as_ref() {
|
||||||
|
None => {
|
||||||
|
Compressor::new(get_zstd_level()).context("can't create dictless compressor")?
|
||||||
|
}
|
||||||
|
Some(dict_bytes) => Compressor::with_dictionary(get_zstd_level(), dict_bytes)
|
||||||
|
.context("can't create dictful compressor")?,
|
||||||
|
};
|
||||||
|
|
||||||
|
let chunk_id_key = pwc.pile.pile_config.chunk_id_key;
|
||||||
|
let storing_state = StoringState {
|
||||||
|
cache_conn: pwc.localcache.read().await?,
|
||||||
|
new_unflushed_chunks: Arc::new(Default::default()),
|
||||||
|
new_bloblogs: vec![],
|
||||||
|
pwc,
|
||||||
|
chunk_id_key,
|
||||||
|
compressor,
|
||||||
|
};
|
||||||
|
// make a logging span for the Storage Pipeline Workers
|
||||||
|
let spw_span = info_span!("spw", n = spw_num);
|
||||||
|
join_set.spawn(
|
||||||
|
async move {
|
||||||
|
let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await;
|
||||||
|
if let Err(ref err) = result {
|
||||||
|
error!("Error in SPW {err:?}");
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
.instrument(spw_span),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
StoragePipeline {
|
||||||
|
result_rx,
|
||||||
|
join_set,
|
||||||
|
},
|
||||||
|
job_tx,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub async fn next_result(&self) -> Result<(String, RecursiveChunkRef, u64), RecvError> {
|
||||||
|
self.result_rx.recv_async().await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Must be sure that all results have been collected first.
|
||||||
|
pub async fn finish_into_chunkmaps(
|
||||||
|
mut self,
|
||||||
|
) -> eyre::Result<Vec<(BloblogId, BTreeMap<ChunkId, BlobLocator>)>> {
|
||||||
|
if let Ok(msg) = self.result_rx.recv_async().await {
|
||||||
|
bail!("Haven't processed all results yet! {msg:?}");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut chunkmap = Vec::new();
|
||||||
|
|
||||||
|
while let Some(join_resres) = self.join_set.join_next().await {
|
||||||
|
chunkmap.extend(join_resres??.new_bloblogs);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(chunkmap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assemble_indices(chunkmap: Vec<(BloblogId, BTreeMap<ChunkId, BlobLocator>)>) -> Vec<Index> {
|
||||||
|
let mut sorted_map = BTreeMap::new();
|
||||||
|
for (idx, chunkmap) in chunkmap.into_iter().enumerate() {
|
||||||
|
let size_of_chunkmap = chunkmap.1.len() + 1;
|
||||||
|
sorted_map.insert(Reverse((size_of_chunkmap, idx)), chunkmap);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut indices = Vec::new();
|
||||||
|
|
||||||
|
while let Some(k) = sorted_map.keys().cloned().next() {
|
||||||
|
let (Reverse((size, _)), (bloblog_id, bloblog_chunks)) =
|
||||||
|
sorted_map.remove_entry(&k).unwrap();
|
||||||
|
let mut new_index_contents = BTreeMap::new();
|
||||||
|
new_index_contents.insert(
|
||||||
|
bloblog_id,
|
||||||
|
IndexBloblogEntry {
|
||||||
|
chunks: bloblog_chunks,
|
||||||
|
forgotten_bytes: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
let mut new_index_size_so_far = size;
|
||||||
|
|
||||||
|
while new_index_size_so_far < DESIRED_INDEX_SIZE_ENTRIES && !sorted_map.is_empty() {
|
||||||
|
if let Some((k, _)) = sorted_map
|
||||||
|
.range(
|
||||||
|
Reverse((
|
||||||
|
DESIRED_INDEX_SIZE_ENTRIES - new_index_size_so_far,
|
||||||
|
usize::MAX,
|
||||||
|
))..,
|
||||||
|
)
|
||||||
|
.next()
|
||||||
|
{
|
||||||
|
let k = k.clone();
|
||||||
|
let (Reverse((add_size, _)), (bloblog_id, bloblog_chunks)) =
|
||||||
|
sorted_map.remove_entry(&k).unwrap();
|
||||||
|
new_index_size_so_far += add_size;
|
||||||
|
new_index_contents.insert(
|
||||||
|
bloblog_id,
|
||||||
|
IndexBloblogEntry {
|
||||||
|
chunks: bloblog_chunks,
|
||||||
|
forgotten_bytes: 0,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
indices.push(Index {
|
||||||
|
supersedes: BTreeSet::new(),
|
||||||
|
bloblogs: new_index_contents,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
indices
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_indices(
|
||||||
|
pwc: &PileWithCache<BoxedWormFileProvider>,
|
||||||
|
indices: Vec<Index>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
for index in indices {
|
||||||
|
let index_id = pwc.pile.create_index(&index).await?;
|
||||||
|
if !pwc
|
||||||
|
.localcache
|
||||||
|
.write()
|
||||||
|
.await?
|
||||||
|
.apply_index(index_id, Arc::new(index))
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
error!("freshly-created index wasn't new. This is suspicious.");
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn assemble_and_write_indices(
|
||||||
|
pwc: &PileWithCache<BoxedWormFileProvider>,
|
||||||
|
chunkmap: Vec<(BloblogId, BTreeMap<ChunkId, BlobLocator>)>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let indices = assemble_indices(chunkmap);
|
||||||
|
write_indices(pwc, indices).await
|
||||||
|
}
|
1
yama/src/vacuum.rs
Normal file
1
yama/src/vacuum.rs
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
91
yama_cli_readme.txt
Normal file
91
yama_cli_readme.txt
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
|
||||||
|
|
||||||
|
`yama init` → init a yama pile right here, right now
|
||||||
|
creates:
|
||||||
|
* config
|
||||||
|
* directory structure
|
||||||
|
* master keyring (prompts for password)
|
||||||
|
|
||||||
|
|
||||||
|
`--zstd-dict <dict> | --no-zstd-dict`: choose a Zstd dictionary (or lack thereof)
|
||||||
|
|
||||||
|
OR
|
||||||
|
|
||||||
|
`yama init --sftp` → interactively create SFTP pile
|
||||||
|
`yama init --s3` → interactively create S3 pile
|
||||||
|
creates:
|
||||||
|
* config (remote)
|
||||||
|
* directory structure (remote)
|
||||||
|
* master keyring (local + optionally remote too, prompts for password)
|
||||||
|
* connection information file (local)
|
||||||
|
|
||||||
|
|
||||||
|
`yama keyring`
|
||||||
|
`inspect <file>.yamakeyring` → print contents of keyring, ask for password if needed
|
||||||
|
|
||||||
|
`new|create <new> [--from <src>] [--with <keylist>] [--no-password]`
|
||||||
|
create a new keyring based on another one. If `--from` not specified, then defaults to the master key in this directory (`master.yamakeyring`).
|
||||||
|
|
||||||
|
if `--no-password` is specified, then the new keyring will be unprotected
|
||||||
|
|
||||||
|
if `--with` is specified, then it's either a list of keynames to include (e.g. `r_bloblog_contents`, etc)
|
||||||
|
or a list of opinionated roles (ALL, config, backup, restore, ...?)
|
||||||
|
|
||||||
|
e.g. you might give your server a keyring with:
|
||||||
|
`yama keyring new myserver.yamakeyring --from master.yamakeyring --with backup --no-password` to allow it to create backups but not read from them
|
||||||
|
|
||||||
|
|
||||||
|
`yama store <source file/dir> [<dest pile/pileconnector dir>:]<pointer name>`
|
||||||
|
Stores a file/directory into Yama, with the given pointer.
|
||||||
|
|
||||||
|
If `--stdin` is passed, then the contents to store are actually read from stdin instead and the provided filename is a fake filename for informational purposes only.
|
||||||
|
Would be suitable for `pg_dump | yama store --stdin mydbname.sql
|
||||||
|
|
||||||
|
If `--force` is passed, this can overwrite a pointer name.
|
||||||
|
|
||||||
|
I expect we will also have `--exclude` and `--exclude-list` options.
|
||||||
|
I expect we will also have a `--dry-run` option.
|
||||||
|
|
||||||
|
`yama extract [<dest pile/pileconnector dir>:]<pointer name>[/path/to/subtree] (--stdout | <target file/dir>)`
|
||||||
|
Extracts a file/directory from Yama, from the given pointer.
|
||||||
|
|
||||||
|
If `--stdout` is passed, writes to stdout, in which case the input must be just one file.
|
||||||
|
|
||||||
|
I expect we will also have `--exclude` and `--exclude-list` options.
|
||||||
|
I expect we will also have a `--dry-run` option.
|
||||||
|
|
||||||
|
`yama mount [<dest pile/pileconnector dir>:]<pointer name>[/path/to/subtree] <target file/dir>`
|
||||||
|
Mount a pointer as a read-only FUSE filesystem.
|
||||||
|
|
||||||
|
`yama check`
|
||||||
|
Checks consistency of the pile. One of the levels must be specified:
|
||||||
|
`--pointers`|`-1`: checks that all pointers are valid
|
||||||
|
`--shallow`|`-2`: checks that all pointers' tree nodes point to chunks that exist.
|
||||||
|
|
||||||
|
`--intensive`|`-9`: checks that all chunks have the correct hash, that all indices correctly represent the bloblogs, that all pointers point to valid files in the end, ... as much as possible
|
||||||
|
|
||||||
|
|
||||||
|
`yama lsp [[<dest pile/pileconnector dir>:]<glob>]`
|
||||||
|
(glob defaults to `.:*`)
|
||||||
|
Lists pointers in the pile.
|
||||||
|
|
||||||
|
If `--deleted` is specified, includes deleted pointers that have yet to be vacuumed.
|
||||||
|
|
||||||
|
|
||||||
|
`yama rmp [<dest pile/pileconnector dir>:]<pointer>`
|
||||||
|
Deletes pointers, or marks them as deleted.
|
||||||
|
|
||||||
|
If `--glob` specified, then `<pointer>` is a glob.
|
||||||
|
|
||||||
|
If `--now` is specified, an exclusive lock is required to actually delete the pointer.
|
||||||
|
If `--now` is *not* specified, then the pointer is merely marked as deleted and this only requires a shared lock.
|
||||||
|
|
||||||
|
`yama vacuum`
|
||||||
|
Vacuums the pile, reclaiming disk space. Holds an exclusive lock over the pile.
|
||||||
|
Does things like:
|
||||||
|
- (--pointers) clean up deleted pointers that need to be actually deleted
|
||||||
|
- (--sweep) scans all pointers to discover all the chunks that are present in bloblogs but not used, then removes them from the indices (possibly slow, but necessary to actually make bloblog repacking possible)
|
||||||
|
- (--indices) writes new indices to replace existing indices, if the existing indices are not space-efficient
|
||||||
|
- (--bloblogs) repacks bloblogs that aren't space-efficient, removing unindexed blobs in the process
|
||||||
|
|
||||||
|
`--all` for everything.
|
15
yama_localcache/Cargo.toml
Normal file
15
yama_localcache/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
name = "yama_localcache"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
sqlx = { version = "0.6.3", features = ["sqlite", "runtime-tokio-rustls"] }
|
||||||
|
tracing = "0.1.37"
|
||||||
|
eyre = "0.6.8"
|
||||||
|
tokio = "1.27.0"
|
||||||
|
yama_pile = { path = "../yama_pile" }
|
||||||
|
yama_midlevel_crypto = { path = "../yama_midlevel_crypto" }
|
||||||
|
itertools = "0.10.5"
|
7
yama_localcache/dev_db.sh
Executable file
7
yama_localcache/dev_db.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -eu
|
||||||
|
dbpath="$(dirname "$0")/testdb.sqlite"
|
||||||
|
#echo $dbpath
|
||||||
|
sqlx db create --database-url sqlite:"$dbpath"
|
||||||
|
sqlx migrate run --database-url sqlite:"$dbpath"
|
||||||
|
|
@ -0,0 +1,30 @@
|
|||||||
|
-- Create a local cache of indices.
|
||||||
|
|
||||||
|
CREATE TABLE indices (
|
||||||
|
index_short_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||||
|
index_sha256 TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE UNIQUE INDEX indices_index_sha256 ON indices(index_sha256);
|
||||||
|
|
||||||
|
CREATE TABLE blobs (
|
||||||
|
chunk_id TEXT NOT NULL,
|
||||||
|
bloblog_short_id INTEGER NOT NULL REFERENCES bloblogs(bloblog_short_id),
|
||||||
|
index_short_id INTEGER NOT NULL REFERENCES indices(index_short_id),
|
||||||
|
offset INTEGER NOT NULL,
|
||||||
|
size INTEGER NOT NULL,
|
||||||
|
PRIMARY KEY (chunk_id, bloblog_short_id, index_short_id)
|
||||||
|
);
|
||||||
|
CREATE INDEX blobs_bloblog_short_id ON blobs(bloblog_short_id);
|
||||||
|
CREATE INDEX blobs_index_short_id ON blobs(index_short_id);
|
||||||
|
|
||||||
|
CREATE TABLE bloblogs (
|
||||||
|
bloblog_short_id INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
bloblog_sha256 TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE UNIQUE INDEX bloblogs_bloblog_sha256 ON bloblogs(bloblog_sha256);
|
||||||
|
|
||||||
|
CREATE TABLE indices_supersede (
|
||||||
|
superseded_sha256 TEXT NOT NULL,
|
||||||
|
successor_sha256 TEXT NOT NULL REFERENCES indices(index_sha256),
|
||||||
|
PRIMARY KEY (superseded_sha256, successor_sha256)
|
||||||
|
);
|
335
yama_localcache/src/lib.rs
Normal file
335
yama_localcache/src/lib.rs
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
use eyre::Context;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use sqlx::pool::PoolConnection;
|
||||||
|
use sqlx::sqlite::{
|
||||||
|
SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteRow, SqliteSynchronous,
|
||||||
|
};
|
||||||
|
use sqlx::{query, Connection, Row, Sqlite, SqlitePool};
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
||||||
|
use yama_midlevel_crypto::chunk_id::ChunkId;
|
||||||
|
use yama_pile::definitions::{BlobLocator, BloblogId, Index, IndexId};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Store {
|
||||||
|
pool: Arc<SqlitePool>,
|
||||||
|
writer_semaphore: Arc<Semaphore>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct StoreConnection<const RW: bool> {
|
||||||
|
/// The underlying 'connection'.
|
||||||
|
conn: PoolConnection<Sqlite>,
|
||||||
|
|
||||||
|
/// Permit to write. Only here so that it is dropped at the correct time.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
writer_permit: Option<OwnedSemaphorePermit>,
|
||||||
|
}
|
||||||
|
|
||||||
|
const MAX_SQLITE_CONNECTIONS: u32 = 16;
|
||||||
|
|
||||||
|
impl Store {
|
||||||
|
pub async fn new(path: &Path) -> eyre::Result<Store> {
|
||||||
|
let pool = SqlitePoolOptions::new()
|
||||||
|
.max_connections(MAX_SQLITE_CONNECTIONS)
|
||||||
|
.connect_with(
|
||||||
|
SqliteConnectOptions::new()
|
||||||
|
.create_if_missing(true)
|
||||||
|
.journal_mode(SqliteJournalMode::Wal)
|
||||||
|
.synchronous(SqliteSynchronous::Normal)
|
||||||
|
.foreign_keys(true)
|
||||||
|
.filename(path),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let store = Store {
|
||||||
|
pool: Arc::new(pool),
|
||||||
|
writer_semaphore: Arc::new(Semaphore::new(1)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut conn = store.pool.acquire().await?;
|
||||||
|
|
||||||
|
// This will run the necessary migrations.
|
||||||
|
sqlx::migrate!("./migrations").run(&mut conn).await?;
|
||||||
|
|
||||||
|
Ok(store)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn txn<const RW: bool>(&self) -> eyre::Result<StoreConnection<RW>> {
|
||||||
|
let writer_permit = if RW {
|
||||||
|
Some(self.writer_semaphore.clone().acquire_owned().await?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let conn = self.pool.acquire().await?;
|
||||||
|
|
||||||
|
Ok(StoreConnection {
|
||||||
|
conn,
|
||||||
|
writer_permit,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read(&self) -> eyre::Result<StoreConnection<false>> {
|
||||||
|
self.txn().await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn write(&self) -> eyre::Result<StoreConnection<true>> {
|
||||||
|
self.txn().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StoreConnection<true> {
|
||||||
|
/// Store an index into the local index cache.
|
||||||
|
/// If the index supersedes any others, then those supersedings are stored and the blob entries
|
||||||
|
/// for the superseded indices are removed.
|
||||||
|
///
|
||||||
|
/// Returns true iff the index was new.
|
||||||
|
pub async fn apply_index(
|
||||||
|
&mut self,
|
||||||
|
index_id: IndexId,
|
||||||
|
index: Arc<Index>,
|
||||||
|
) -> eyre::Result<bool> {
|
||||||
|
let index_id_txt = index_id.to_string();
|
||||||
|
self.conn.transaction(|txn| Box::pin(async move {
|
||||||
|
let needs_index = query!("
|
||||||
|
SELECT 1 AS one FROM indices WHERE index_sha256 = ?
|
||||||
|
", index_id_txt).fetch_optional(&mut *txn).await?.is_none();
|
||||||
|
|
||||||
|
if !needs_index {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_short_id = query!("
|
||||||
|
INSERT INTO indices (index_sha256)
|
||||||
|
VALUES (?)
|
||||||
|
RETURNING index_short_id
|
||||||
|
", index_id_txt).fetch_one(&mut *txn).await?.index_short_id;
|
||||||
|
|
||||||
|
for supersede in &index.supersedes {
|
||||||
|
let supersede_txt = supersede.to_string();
|
||||||
|
query!("
|
||||||
|
INSERT INTO indices_supersede (superseded_sha256, successor_sha256)
|
||||||
|
VALUES (?, ?)
|
||||||
|
", supersede_txt, index_id_txt).execute(&mut *txn).await?;
|
||||||
|
|
||||||
|
if let Some(row) = query!("
|
||||||
|
SELECT index_short_id FROM indices WHERE index_sha256 = ?
|
||||||
|
", supersede_txt).fetch_optional(&mut *txn).await? {
|
||||||
|
// Clear out any chunk entries for the superseded indices.
|
||||||
|
// This ensures we don't rely on them in the future and also clears up space.
|
||||||
|
query!("
|
||||||
|
DELETE FROM blobs WHERE index_short_id = ?
|
||||||
|
", row.index_short_id).execute(&mut *txn).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the index hasn't already been superseded, before adding blobs
|
||||||
|
let is_superseded = query!("
|
||||||
|
SELECT 1 as _yes FROM indices_supersede WHERE superseded_sha256 = ?",
|
||||||
|
index_id_txt
|
||||||
|
).fetch_optional(&mut *txn).await?.is_some();
|
||||||
|
|
||||||
|
if !is_superseded {
|
||||||
|
for (bloblog_sha256, index_bloblog_entry) in &index.bloblogs {
|
||||||
|
let bloblog_sha256_txt = bloblog_sha256.to_string();
|
||||||
|
let bloblog_short_id_opt = query!("
|
||||||
|
SELECT bloblog_short_id FROM bloblogs WHERE bloblog_sha256 = ?
|
||||||
|
", bloblog_sha256_txt).fetch_optional(&mut *txn).await?;
|
||||||
|
|
||||||
|
let bloblog_short_id = match bloblog_short_id_opt {
|
||||||
|
None => {
|
||||||
|
query!("
|
||||||
|
INSERT INTO bloblogs (bloblog_sha256)
|
||||||
|
VALUES (?)
|
||||||
|
RETURNING bloblog_short_id
|
||||||
|
", bloblog_sha256_txt).fetch_one(&mut *txn).await?.bloblog_short_id
|
||||||
|
},
|
||||||
|
Some(row) => row.bloblog_short_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
for (chunk_id, chunk_locator) in index_bloblog_entry.chunks.iter() {
|
||||||
|
let chunk_id_txt = chunk_id.to_string();
|
||||||
|
let coffset = chunk_locator.offset as i64;
|
||||||
|
let clen = chunk_locator.length as i64;
|
||||||
|
query!("
|
||||||
|
INSERT INTO blobs (index_short_id, bloblog_short_id, chunk_id, offset, size)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
", index_short_id, bloblog_short_id, chunk_id_txt, coffset, clen).execute(&mut *txn).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
})).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delete an index from the cache, if the cache was deleted from the pile.
|
||||||
|
pub async fn delete_index(&mut self, index_id: IndexId) -> eyre::Result<()> {
|
||||||
|
self.conn
|
||||||
|
.transaction(|txn| {
|
||||||
|
Box::pin(async move {
|
||||||
|
let index_id_txt = index_id.to_string();
|
||||||
|
query!(
|
||||||
|
"
|
||||||
|
DELETE FROM indices_supersede WHERE successor_sha256 = ?
|
||||||
|
",
|
||||||
|
index_id_txt
|
||||||
|
)
|
||||||
|
.execute(&mut *txn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let index_short_id = query!(
|
||||||
|
"
|
||||||
|
SELECT index_short_id FROM indices WHERE index_sha256 = ?
|
||||||
|
",
|
||||||
|
index_id_txt
|
||||||
|
)
|
||||||
|
.fetch_one(&mut *txn)
|
||||||
|
.await?
|
||||||
|
.index_short_id;
|
||||||
|
|
||||||
|
query!(
|
||||||
|
"
|
||||||
|
DELETE FROM blobs WHERE index_short_id = ?
|
||||||
|
",
|
||||||
|
index_short_id
|
||||||
|
)
|
||||||
|
.execute(&mut *txn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
query!(
|
||||||
|
"
|
||||||
|
DELETE FROM indices WHERE index_short_id = ?
|
||||||
|
",
|
||||||
|
index_short_id
|
||||||
|
)
|
||||||
|
.execute(&mut *txn)
|
||||||
|
.await?;
|
||||||
|
Ok::<_, eyre::Report>(())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<const RW: bool> StoreConnection<RW> {
|
||||||
|
pub async fn locate_chunk(
|
||||||
|
&mut self,
|
||||||
|
chunk_id: ChunkId,
|
||||||
|
) -> eyre::Result<Option<(BloblogId, BlobLocator)>> {
|
||||||
|
let chunk_id_text = chunk_id.to_string();
|
||||||
|
let row_opt = query!(
|
||||||
|
"
|
||||||
|
SELECT bl.bloblog_sha256, b.offset, b.size
|
||||||
|
FROM blobs b
|
||||||
|
JOIN bloblogs bl USING (bloblog_short_id)
|
||||||
|
WHERE b.chunk_id = ?
|
||||||
|
LIMIT 1
|
||||||
|
",
|
||||||
|
chunk_id_text
|
||||||
|
)
|
||||||
|
.fetch_optional(&mut *self.conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
match row_opt {
|
||||||
|
None => Ok(None),
|
||||||
|
Some(row) => {
|
||||||
|
let bloblog_id =
|
||||||
|
BloblogId::try_from(row.bloblog_sha256.as_str()).with_context(|| {
|
||||||
|
format!("failed to decode bloblog ID: {:?}", row.bloblog_sha256)
|
||||||
|
})?;
|
||||||
|
Ok(Some((
|
||||||
|
bloblog_id,
|
||||||
|
BlobLocator {
|
||||||
|
offset: row.offset.try_into().context("offset too big")?,
|
||||||
|
length: row.size.try_into().context("size too big")?,
|
||||||
|
},
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn locate_chunks(
|
||||||
|
&mut self,
|
||||||
|
chunk_ids: &BTreeSet<ChunkId>,
|
||||||
|
) -> eyre::Result<BTreeMap<ChunkId, (BloblogId, BlobLocator)>> {
|
||||||
|
let mut out = BTreeMap::new();
|
||||||
|
for batch in &chunk_ids.iter().chunks(64) {
|
||||||
|
let chunk_id_texts: Vec<String> = batch.map(|ci| ci.to_string()).collect();
|
||||||
|
let query_param_str = format!("({})", &",?".repeat(chunk_id_texts.len())[1..]);
|
||||||
|
let sql = format!(
|
||||||
|
"
|
||||||
|
SELECT b.chunk_id, bl.bloblog_sha256, b.offset, b.size
|
||||||
|
FROM blobs b
|
||||||
|
JOIN bloblogs bl USING (bloblog_short_id)
|
||||||
|
WHERE b.chunk_id IN {query_param_str}
|
||||||
|
"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut q = query(&sql);
|
||||||
|
for chunk_id in &chunk_id_texts {
|
||||||
|
q = q.bind(chunk_id);
|
||||||
|
}
|
||||||
|
let rows = q
|
||||||
|
.map(|row: SqliteRow| {
|
||||||
|
Ok::<_, eyre::Report>((
|
||||||
|
ChunkId::from_str(row.get::<&str, _>(0))?,
|
||||||
|
BloblogId::try_from(row.get::<&str, _>(1))?,
|
||||||
|
row.get::<i64, _>(2),
|
||||||
|
row.get::<i64, _>(3),
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.fetch_all(&mut *self.conn)
|
||||||
|
.await?;
|
||||||
|
for row in rows {
|
||||||
|
let (chunk_id, bloblog_id, offset, size) = row?;
|
||||||
|
out.insert(
|
||||||
|
chunk_id,
|
||||||
|
(
|
||||||
|
bloblog_id,
|
||||||
|
BlobLocator {
|
||||||
|
offset: offset as u64,
|
||||||
|
length: size as u64,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_indices(&mut self) -> eyre::Result<BTreeSet<IndexId>> {
|
||||||
|
let row_results = query!(
|
||||||
|
"
|
||||||
|
SELECT index_sha256 FROM indices
|
||||||
|
"
|
||||||
|
)
|
||||||
|
.map(|row| {
|
||||||
|
IndexId::try_from(row.index_sha256.as_ref())
|
||||||
|
.context("failed to decode IndexId in local cache")
|
||||||
|
})
|
||||||
|
.fetch_all(&mut *self.conn)
|
||||||
|
.await?;
|
||||||
|
row_results.into_iter().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn is_chunk_new(&mut self, chunk_id: ChunkId) -> eyre::Result<bool> {
|
||||||
|
let chunk_id_text = chunk_id.to_string();
|
||||||
|
let is_new = query!(
|
||||||
|
"
|
||||||
|
SELECT 1 AS _yes FROM blobs WHERE chunk_id = ?
|
||||||
|
",
|
||||||
|
chunk_id_text
|
||||||
|
)
|
||||||
|
.fetch_optional(&mut *self.conn)
|
||||||
|
.await?
|
||||||
|
.is_none();
|
||||||
|
Ok(is_new)
|
||||||
|
}
|
||||||
|
}
|
35
yama_midlevel_crypto/Cargo.toml
Normal file
35
yama_midlevel_crypto/Cargo.toml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
[package]
|
||||||
|
name = "yama_midlevel_crypto"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
serde = { version = "1.0.159", features = ["derive"] }
|
||||||
|
ciborium = "0.2.0"
|
||||||
|
blake3 = "1.3.3"
|
||||||
|
|
||||||
|
# Unauthenticated symmetric seekable stream constructions
|
||||||
|
chacha20 = "0.9.1"
|
||||||
|
|
||||||
|
x25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "static_secrets"] }
|
||||||
|
poly1305 = "0.8.0"
|
||||||
|
ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] }
|
||||||
|
|
||||||
|
# Hybrid quantum-resistant asymmetric 'key encapsulation' mechanisms
|
||||||
|
pqc_kyber = { version = "0.5.0", features = ["kyber1024"] }
|
||||||
|
#alkali = "0.3.0"
|
||||||
|
|
||||||
|
rand = "0.8.5"
|
||||||
|
|
||||||
|
|
||||||
|
eyre = "0.6.8"
|
||||||
|
|
||||||
|
# 0.12.3+zstd.1.5.2
|
||||||
|
zstd = "0.12.3"
|
||||||
|
|
||||||
|
hex = "0.4.3"
|
||||||
|
|
||||||
|
argon2 = { version = "0.4.1", default-features = false, features = ["alloc", "std"] }
|
||||||
|
|
92
yama_midlevel_crypto/src/asym_box.rs
Normal file
92
yama_midlevel_crypto/src/asym_box.rs
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
use ed25519_dalek::SIGNATURE_LENGTH;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use crate::asym_keyx::{AsymKeyExchange, DecryptingKey, EncryptingKey, KEY_EXCHANGE_LENGTH};
|
||||||
|
use crate::asym_signed::{SignedBytes, SigningKey, VerifyingKey};
|
||||||
|
use crate::byte_layer::ByteLayer;
|
||||||
|
use crate::sym_box::{SymBox, SymKey};
|
||||||
|
|
||||||
|
/// A locked box storing something using asymmetric cryptography.
|
||||||
|
///
|
||||||
|
/// For key encapsulation: x25519 and kyber (quantum-resistant)
|
||||||
|
/// For signing: ed25519 (not quantum-resistant)
|
||||||
|
///
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct AsymBox<T> {
|
||||||
|
payload: SignedBytes,
|
||||||
|
#[serde(skip, default)]
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A public key, needed to lock an AsymBox or verify the signature when unlocking an AsymBox.
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct AsymPublicKey {
|
||||||
|
verify: VerifyingKey,
|
||||||
|
encrypt: EncryptingKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A private key, needed to open an AsymBox or to sign an AsymBox that is being locked.
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct AsymPrivateKey {
|
||||||
|
sign: SigningKey,
|
||||||
|
decrypt: DecryptingKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ByteLayer for AsymBox<T> {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
payload: SignedBytes::from_bytes_vec_assumed(bytes),
|
||||||
|
_phantom: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_byte_vec(self) -> Vec<u8> {
|
||||||
|
self.payload.into_bytes_vec()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ByteLayer> AsymBox<T> {
|
||||||
|
// TODO error
|
||||||
|
pub fn unlock(
|
||||||
|
self,
|
||||||
|
receiver_decrypt_key: &DecryptingKey,
|
||||||
|
sender_verify_key: &VerifyingKey,
|
||||||
|
) -> Option<T> {
|
||||||
|
let verified_payload = self.payload.into_verified(sender_verify_key)?;
|
||||||
|
|
||||||
|
let cutoff = verified_payload.len() - KEY_EXCHANGE_LENGTH;
|
||||||
|
|
||||||
|
let key_exchanger_bytes = &verified_payload[cutoff..];
|
||||||
|
let key_exchanger = AsymKeyExchange::load_from_bytes(key_exchanger_bytes)
|
||||||
|
.expect("can't load AsymKeyExchange");
|
||||||
|
let exchanged = key_exchanger.unlock(receiver_decrypt_key)?;
|
||||||
|
let symkey = SymKey::from(exchanged);
|
||||||
|
|
||||||
|
let symbox: SymBox<T> = SymBox::new_from_raw(&verified_payload[0..cutoff]);
|
||||||
|
symbox.unlock(&symkey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ByteLayer> AsymBox<T> {
|
||||||
|
// TODO error
|
||||||
|
pub fn new(
|
||||||
|
contents: T,
|
||||||
|
sender_signing_key: &SigningKey,
|
||||||
|
receiver_encrypt_key: &EncryptingKey,
|
||||||
|
) -> Option<Self> {
|
||||||
|
let (key_exchanger, exchanged) = AsymKeyExchange::lock(receiver_encrypt_key);
|
||||||
|
let symkey = SymKey::from(exchanged);
|
||||||
|
|
||||||
|
let mut signed_bytes = SymBox::new(contents, &symkey)?.into_vec();
|
||||||
|
signed_bytes.reserve(KEY_EXCHANGE_LENGTH + SIGNATURE_LENGTH);
|
||||||
|
signed_bytes.extend_from_slice(key_exchanger.as_bytes());
|
||||||
|
|
||||||
|
let signed = SignedBytes::new(signed_bytes, sender_signing_key);
|
||||||
|
Some(Self {
|
||||||
|
payload: signed,
|
||||||
|
_phantom: Default::default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
149
yama_midlevel_crypto/src/asym_keyx.rs
Normal file
149
yama_midlevel_crypto/src/asym_keyx.rs
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
use crate::keys_kyber::{kyber_keypair, KyberPrivateKey, KyberPublicKey};
|
||||||
|
use crate::keys_x25519::{x25519_keypair, X25519PrivateKey, X25519PublicKey};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
// x25519 ephemeral pubkey (32) + kyber (1568)
|
||||||
|
pub const KEY_EXCHANGE_LENGTH: usize = 32 + 1568;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct AsymKeyExchange<'bytes> {
|
||||||
|
inner: Cow<'bytes, [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct EncryptingKey {
|
||||||
|
x25519: X25519PublicKey,
|
||||||
|
kyber: KyberPublicKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct DecryptingKey {
|
||||||
|
x25519: X25519PrivateKey,
|
||||||
|
kyber: KyberPrivateKey,
|
||||||
|
|
||||||
|
x25519_pub: X25519PublicKey,
|
||||||
|
kyber_pub: KyberPublicKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_asym_keypair() -> (EncryptingKey, DecryptingKey) {
|
||||||
|
let (x25519_pub, x25519_priv) = x25519_keypair();
|
||||||
|
let (kyber_pub, kyber_priv) = kyber_keypair();
|
||||||
|
|
||||||
|
(
|
||||||
|
EncryptingKey {
|
||||||
|
x25519: x25519_pub.clone(),
|
||||||
|
kyber: kyber_pub.clone(),
|
||||||
|
},
|
||||||
|
DecryptingKey {
|
||||||
|
x25519: x25519_priv,
|
||||||
|
kyber: kyber_priv,
|
||||||
|
x25519_pub,
|
||||||
|
kyber_pub,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Exchanged(pub(crate) [u8; 64]);
|
||||||
|
|
||||||
|
impl<'bytes> AsymKeyExchange<'bytes> {
|
||||||
|
pub fn load_from_bytes(bytes: &'bytes [u8]) -> Option<Self> {
|
||||||
|
if bytes.len() != KEY_EXCHANGE_LENGTH {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(Self {
|
||||||
|
inner: Cow::Borrowed(&bytes),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.inner.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lock(ek: &EncryptingKey) -> (AsymKeyExchange, Exchanged) {
|
||||||
|
let mut public_bytes = Vec::with_capacity(KEY_EXCHANGE_LENGTH);
|
||||||
|
|
||||||
|
let mut rand = rand::thread_rng();
|
||||||
|
|
||||||
|
// X25519
|
||||||
|
let ephemeral_privkey = x25519_dalek::EphemeralSecret::random_from_rng(&mut rand);
|
||||||
|
let ephemeral_pubkey = x25519_dalek::PublicKey::from(&ephemeral_privkey);
|
||||||
|
public_bytes.extend_from_slice(ephemeral_pubkey.as_bytes());
|
||||||
|
let shared_secret_x25519 = ephemeral_privkey.diffie_hellman(&ek.x25519.inner);
|
||||||
|
|
||||||
|
// Kyber
|
||||||
|
let kyber = ek.kyber.encapsulate(&mut rand);
|
||||||
|
public_bytes.extend_from_slice(&kyber.public_bytes);
|
||||||
|
|
||||||
|
assert_eq!(public_bytes.len(), KEY_EXCHANGE_LENGTH);
|
||||||
|
|
||||||
|
let exchanged = Self::perform_exchange(
|
||||||
|
&public_bytes,
|
||||||
|
&ek.x25519,
|
||||||
|
&ek.kyber,
|
||||||
|
shared_secret_x25519.as_bytes(),
|
||||||
|
&kyber.shared_secret,
|
||||||
|
);
|
||||||
|
|
||||||
|
(
|
||||||
|
AsymKeyExchange {
|
||||||
|
inner: Cow::Owned(public_bytes),
|
||||||
|
},
|
||||||
|
exchanged,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn perform_exchange(
|
||||||
|
public_bytes: &[u8],
|
||||||
|
rx_x25519_pub: &X25519PublicKey,
|
||||||
|
rx_kyber_pub: &KyberPublicKey,
|
||||||
|
x25519_ss: &[u8; 32],
|
||||||
|
kyber_ss: &[u8; 32],
|
||||||
|
) -> Exchanged {
|
||||||
|
assert_eq!(public_bytes.len(), KEY_EXCHANGE_LENGTH);
|
||||||
|
|
||||||
|
let mut hasher = blake3::Hasher::new_derive_key("yama AsymKeyExchange");
|
||||||
|
// Includes the pubkeys of the writer
|
||||||
|
hasher.update(public_bytes);
|
||||||
|
|
||||||
|
// Include the pubkeys of the receiver
|
||||||
|
hasher.update(rx_x25519_pub.as_bytes());
|
||||||
|
hasher.update(rx_kyber_pub.as_bytes());
|
||||||
|
|
||||||
|
// Include what was exchanged
|
||||||
|
hasher.update(x25519_ss);
|
||||||
|
hasher.update(kyber_ss);
|
||||||
|
|
||||||
|
let mut exchanged_bytes = [0u8; 64];
|
||||||
|
let mut out = hasher.finalize_xof();
|
||||||
|
out.read_exact(&mut exchanged_bytes)
|
||||||
|
.expect("failed to read 64b from blake3");
|
||||||
|
|
||||||
|
Exchanged(exchanged_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unlock(&self, dk: &DecryptingKey) -> Option<Exchanged> {
|
||||||
|
if self.inner.len() != KEY_EXCHANGE_LENGTH {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// X25519
|
||||||
|
let ephemeral_x25519_pubkey_bytes: &[u8; 32] = &self.inner[0..32].try_into().unwrap();
|
||||||
|
let ephemeral_x25519_pubkey = x25519_dalek::PublicKey::from(*ephemeral_x25519_pubkey_bytes);
|
||||||
|
let shared_secret_x25519 = dk.x25519.inner.diffie_hellman(&ephemeral_x25519_pubkey);
|
||||||
|
|
||||||
|
// Kyber
|
||||||
|
let kyber_ciphertext_bytes: &[u8; 1568] = &self.inner[32..].try_into().unwrap();
|
||||||
|
let kyber = dk.kyber.decapsulate(&kyber_ciphertext_bytes);
|
||||||
|
|
||||||
|
Some(Self::perform_exchange(
|
||||||
|
&self.inner,
|
||||||
|
&dk.x25519_pub,
|
||||||
|
&dk.kyber_pub,
|
||||||
|
shared_secret_x25519.as_bytes(),
|
||||||
|
&kyber,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
126
yama_midlevel_crypto/src/asym_signed.rs
Normal file
126
yama_midlevel_crypto/src/asym_signed.rs
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
use serde::de::Error as DeError;
|
||||||
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
|
||||||
|
use ed25519_dalek::{
|
||||||
|
Signature, Signer, SigningKey as Ed25519PrivateKey, Verifier, VerifyingKey as Ed25519PublicKey,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub use ed25519_dalek::SIGNATURE_LENGTH;
|
||||||
|
use rand::thread_rng;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct SignedBytes {
|
||||||
|
inner: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SigningKey {
|
||||||
|
ed25519: Ed25519PrivateKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct VerifyingKey {
|
||||||
|
ed25519: Ed25519PublicKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn asym_signing_keypair() -> (SigningKey, VerifyingKey) {
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
let keypair = ed25519_dalek::SigningKey::generate(&mut rng);
|
||||||
|
|
||||||
|
(
|
||||||
|
SigningKey {
|
||||||
|
ed25519: keypair.clone(),
|
||||||
|
},
|
||||||
|
VerifyingKey {
|
||||||
|
ed25519: keypair.verifying_key(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for SigningKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
let ed25519 = self.ed25519.to_bytes();
|
||||||
|
<[u8]>::serialize(&ed25519, serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for SigningKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Vec::<u8>::deserialize(deserializer)?;
|
||||||
|
|
||||||
|
let mut ed25519 = [0u8; 32];
|
||||||
|
if bytes.len() != ed25519.len() {
|
||||||
|
return Err(D::Error::custom("wrong length of ed25519 key"));
|
||||||
|
}
|
||||||
|
ed25519.copy_from_slice(&bytes);
|
||||||
|
|
||||||
|
Ok(SigningKey {
|
||||||
|
ed25519: Ed25519PrivateKey::from_bytes(&ed25519),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for VerifyingKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
(self.ed25519.as_bytes() as &[u8]).serialize(serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for VerifyingKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Vec::<u8>::deserialize(deserializer)?;
|
||||||
|
|
||||||
|
let mut ed25519 = [0u8; 32];
|
||||||
|
if bytes.len() != ed25519.len() {
|
||||||
|
return Err(D::Error::custom("wrong length of ed25519 key"));
|
||||||
|
}
|
||||||
|
ed25519.copy_from_slice(&bytes);
|
||||||
|
|
||||||
|
Ok(VerifyingKey {
|
||||||
|
ed25519: Ed25519PublicKey::from_bytes(&ed25519).map_err(D::Error::custom)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SignedBytes {
|
||||||
|
pub fn new(mut bytes: Vec<u8>, sign_with: &SigningKey) -> SignedBytes {
|
||||||
|
let signature = sign_with.ed25519.sign(&bytes);
|
||||||
|
let sig = signature.to_bytes();
|
||||||
|
assert_eq!(sig.len(), SIGNATURE_LENGTH);
|
||||||
|
|
||||||
|
bytes.extend(sig);
|
||||||
|
SignedBytes { inner: bytes }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_verified(mut self, verify_with: &VerifyingKey) -> Option<Vec<u8>> {
|
||||||
|
if self.inner.len() < SIGNATURE_LENGTH {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let (payload, sig) = self.inner.split_at(self.inner.len() - SIGNATURE_LENGTH);
|
||||||
|
let sig = Signature::from_bytes(sig.try_into().expect("wrong split"));
|
||||||
|
verify_with.ed25519.verify(&payload, &sig).ok()?;
|
||||||
|
self.inner.drain(self.inner.len() - SIGNATURE_LENGTH..);
|
||||||
|
Some(self.inner)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_bytes_vec(self) -> Vec<u8> {
|
||||||
|
self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_bytes_vec_assumed(inner: Vec<u8>) -> Self {
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
|
}
|
55
yama_midlevel_crypto/src/byte_layer.rs
Normal file
55
yama_midlevel_crypto/src/byte_layer.rs
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
use serde::de::DeserializeOwned;
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
/// Trait to help layering byte transformers together.
|
||||||
|
pub trait ByteLayer {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self;
|
||||||
|
fn into_byte_vec(self) -> Vec<u8>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct CborSerde<T> {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
marker: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Serialize + DeserializeOwned> ByteLayer for CborSerde<T> {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self {
|
||||||
|
CborSerde {
|
||||||
|
bytes,
|
||||||
|
marker: PhantomData::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_byte_vec(self) -> Vec<u8> {
|
||||||
|
self.bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Serialize> CborSerde<T> {
|
||||||
|
pub fn serialise(from: &T) -> eyre::Result<Self> {
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
ciborium::ser::into_writer(from, &mut bytes)?;
|
||||||
|
Ok(CborSerde {
|
||||||
|
bytes,
|
||||||
|
marker: Default::default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: DeserializeOwned> CborSerde<T> {
|
||||||
|
pub fn deserialise(&self) -> eyre::Result<T> {
|
||||||
|
Ok(ciborium::de::from_reader(&self.bytes[..])?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ByteLayer for Vec<u8> {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self {
|
||||||
|
bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_byte_vec(self) -> Vec<u8> {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
91
yama_midlevel_crypto/src/chunk_id.rs
Normal file
91
yama_midlevel_crypto/src/chunk_id.rs
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
use eyre::bail;
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::fmt::{Debug, Formatter};
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
/// The ID of a chunk. It's a 256-bit BLAKE3 hash.
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct ChunkId {
|
||||||
|
blake3: [u8; 32],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for ChunkId {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
for &byte in self.blake3.iter() {
|
||||||
|
write!(f, "{:02x}", byte)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for ChunkId {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
hex::encode(&self.blake3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<[u8; 32]> for ChunkId {
|
||||||
|
fn from(bytes: [u8; 32]) -> Self {
|
||||||
|
ChunkId { blake3: bytes }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for ChunkId {
|
||||||
|
type Err = eyre::Report;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
if s.len() != 64 {
|
||||||
|
bail!("chunk ID of wrong length");
|
||||||
|
}
|
||||||
|
let decoded = hex::decode(s)?;
|
||||||
|
let mut new = ChunkId {
|
||||||
|
blake3: Default::default(),
|
||||||
|
};
|
||||||
|
new.blake3.copy_from_slice(&decoded);
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChunkId {
|
||||||
|
pub fn to_bytes(self) -> [u8; 32] {
|
||||||
|
self.blake3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Key needed to create and verify chunk IDs. It's a 256-bit key for the BLAKE3 keyed hash function.
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ChunkIdKey {
|
||||||
|
key: [u8; 32],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChunkIdKey {
|
||||||
|
pub fn new_rand() -> ChunkIdKey {
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
ChunkIdKey { key: rng.gen() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for ChunkIdKey {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
// for privacy, don't leak the contents of the key
|
||||||
|
write!(f, "ChunkIdKey(...)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChunkId {
|
||||||
|
/// Computes a chunk ID given the input and key.
|
||||||
|
pub fn compute(input: &[u8], key: &ChunkIdKey) -> ChunkId {
|
||||||
|
ChunkId {
|
||||||
|
blake3: blake3::keyed_hash(&key.key, input).into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true iff this Chunk ID is correct for the given input and key.
|
||||||
|
pub fn verify(&self, input: &[u8], key: &ChunkIdKey) -> bool {
|
||||||
|
let comparison = Self::compute(input, key);
|
||||||
|
self == &comparison
|
||||||
|
}
|
||||||
|
}
|
72
yama_midlevel_crypto/src/key_derivation.rs
Normal file
72
yama_midlevel_crypto/src/key_derivation.rs
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
use crate::sym_box::SymKey;
|
||||||
|
use argon2::Algorithm::Argon2id;
|
||||||
|
use argon2::{Argon2, Version};
|
||||||
|
use eyre::{bail, Context};
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// 1 GiB. Intended to prevent maliciously large memory costs; not sure if that's a real risk.
|
||||||
|
pub const MAX_MEMORY_COST_KIBIBYTES: u32 = 1048576;
|
||||||
|
|
||||||
|
/// 512 MiB
|
||||||
|
pub const DEFAULT_MEMORY_COST_KIBIBYTES: u32 = 524288;
|
||||||
|
|
||||||
|
pub const DEFAULT_LANES: u32 = 1;
|
||||||
|
pub const DEFAULT_ITERATIONS: u32 = 256;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct KeyDerivationParameters {
|
||||||
|
salt: [u8; 32],
|
||||||
|
iterations: u32,
|
||||||
|
memory_kibibytes: u32,
|
||||||
|
lanes: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KeyDerivationParameters {
|
||||||
|
pub fn new_recommended() -> KeyDerivationParameters {
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
KeyDerivationParameters {
|
||||||
|
salt: rng.gen(),
|
||||||
|
iterations: DEFAULT_ITERATIONS,
|
||||||
|
memory_kibibytes: DEFAULT_MEMORY_COST_KIBIBYTES,
|
||||||
|
lanes: DEFAULT_LANES,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn derive(&self, password: &str) -> eyre::Result<DerivedKey> {
|
||||||
|
if self.memory_kibibytes > MAX_MEMORY_COST_KIBIBYTES {
|
||||||
|
bail!(
|
||||||
|
"Too much memory needed for key derivation! {} > {}",
|
||||||
|
self.memory_kibibytes,
|
||||||
|
MAX_MEMORY_COST_KIBIBYTES
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut params = argon2::ParamsBuilder::new();
|
||||||
|
params
|
||||||
|
.m_cost(self.memory_kibibytes)
|
||||||
|
.unwrap()
|
||||||
|
.p_cost(self.lanes)
|
||||||
|
.unwrap()
|
||||||
|
.t_cost(self.iterations)
|
||||||
|
.unwrap()
|
||||||
|
.output_len(64)
|
||||||
|
.unwrap();
|
||||||
|
let params = params.params().unwrap();
|
||||||
|
let argon = Argon2::new(Argon2id, Version::V0x13, params.clone());
|
||||||
|
let mut derived_key = DerivedKey([0; 64]);
|
||||||
|
argon
|
||||||
|
.hash_password_into(password.as_bytes(), &self.salt, &mut derived_key.0)
|
||||||
|
.context("failed to hash password")?;
|
||||||
|
|
||||||
|
Ok(derived_key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DerivedKey(pub(crate) [u8; 64]);
|
||||||
|
|
||||||
|
impl DerivedKey {
|
||||||
|
pub fn into_symkey(self) -> SymKey {
|
||||||
|
SymKey::from(self)
|
||||||
|
}
|
||||||
|
}
|
109
yama_midlevel_crypto/src/keys_kyber.rs
Normal file
109
yama_midlevel_crypto/src/keys_kyber.rs
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
use pqc_kyber::CryptoRng;
|
||||||
|
use rand::{thread_rng, RngCore};
|
||||||
|
use serde::de::Error;
|
||||||
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KyberPublicKey {
|
||||||
|
inner: pqc_kyber::PublicKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KyberPrivateKey {
|
||||||
|
inner: pqc_kyber::SecretKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn kyber_keypair() -> (KyberPublicKey, KyberPrivateKey) {
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
let kyber_pair = pqc_kyber::keypair(&mut rng);
|
||||||
|
|
||||||
|
(
|
||||||
|
KyberPublicKey {
|
||||||
|
inner: kyber_pair.public,
|
||||||
|
},
|
||||||
|
KyberPrivateKey {
|
||||||
|
inner: kyber_pair.secret,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for KyberPublicKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
<[u8]>::serialize(&self.inner, serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for KyberPublicKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?;
|
||||||
|
|
||||||
|
let mut kyber: pqc_kyber::PublicKey = [0; 1568];
|
||||||
|
if kyber.len() != bytes.len() {
|
||||||
|
return Err(D::Error::custom("wrong length of kyber key"));
|
||||||
|
}
|
||||||
|
kyber.copy_from_slice(&bytes);
|
||||||
|
|
||||||
|
Ok(KyberPublicKey { inner: kyber })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KyberPublicKey {
|
||||||
|
pub fn as_bytes(&self) -> &[u8; 1568] {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for KyberPrivateKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
<[u8]>::serialize(&self.inner, serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for KyberPrivateKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?;
|
||||||
|
|
||||||
|
let mut kyber: pqc_kyber::SecretKey = [0; 3168];
|
||||||
|
if kyber.len() != bytes.len() {
|
||||||
|
return Err(D::Error::custom("wrong length of kyber key"));
|
||||||
|
}
|
||||||
|
kyber.copy_from_slice(&bytes);
|
||||||
|
|
||||||
|
Ok(KyberPrivateKey { inner: kyber })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct KyberEncapsulation {
|
||||||
|
pub shared_secret: pqc_kyber::SharedSecret,
|
||||||
|
pub public_bytes: [u8; pqc_kyber::KYBER_CIPHERTEXTBYTES],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KyberPublicKey {
|
||||||
|
pub fn encapsulate<T: RngCore + CryptoRng>(&self, rng: &mut T) -> KyberEncapsulation {
|
||||||
|
let (public_bytes, shared_secret) =
|
||||||
|
pqc_kyber::encapsulate(&self.inner, rng).expect("bad kyber encapsulation");
|
||||||
|
KyberEncapsulation {
|
||||||
|
shared_secret,
|
||||||
|
public_bytes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KyberPrivateKey {
|
||||||
|
pub fn decapsulate(&self, ciphertext: &[u8; 1568]) -> pqc_kyber::SharedSecret {
|
||||||
|
pqc_kyber::decapsulate(ciphertext, &self.inner).expect("bad kyber decapsulation")
|
||||||
|
}
|
||||||
|
}
|
76
yama_midlevel_crypto/src/keys_x25519.rs
Normal file
76
yama_midlevel_crypto/src/keys_x25519.rs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
use rand::thread_rng;
|
||||||
|
use serde::de::Error as _DeError;
|
||||||
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct X25519PrivateKey {
|
||||||
|
pub(crate) inner: x25519_dalek::StaticSecret,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct X25519PublicKey {
|
||||||
|
pub(crate) inner: x25519_dalek::PublicKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn x25519_keypair() -> (X25519PublicKey, X25519PrivateKey) {
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
let x25519_priv = x25519_dalek::StaticSecret::random_from_rng(&mut rng);
|
||||||
|
let x25519_pub = x25519_dalek::PublicKey::from(&x25519_priv);
|
||||||
|
(
|
||||||
|
X25519PublicKey { inner: x25519_pub },
|
||||||
|
X25519PrivateKey { inner: x25519_priv },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for X25519PrivateKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
<[u8]>::serialize(self.inner.as_bytes(), serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for X25519PrivateKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?;
|
||||||
|
let counted_bytes: [u8; 32] = bytes.as_ref().try_into().map_err(D::Error::custom)?;
|
||||||
|
Ok(X25519PrivateKey {
|
||||||
|
inner: x25519_dalek::StaticSecret::from(counted_bytes),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl X25519PublicKey {
|
||||||
|
pub fn as_bytes(&self) -> &[u8; 32] {
|
||||||
|
self.inner.as_bytes()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for X25519PublicKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
<[u8]>::serialize(self.inner.as_bytes(), serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> Deserialize<'d> for X25519PublicKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'d>,
|
||||||
|
{
|
||||||
|
let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?;
|
||||||
|
let counted_bytes: [u8; 32] = bytes.as_ref().try_into().map_err(D::Error::custom)?;
|
||||||
|
Ok(X25519PublicKey {
|
||||||
|
inner: x25519_dalek::PublicKey::from(counted_bytes),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
17
yama_midlevel_crypto/src/lib.rs
Normal file
17
yama_midlevel_crypto/src/lib.rs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
pub mod asym_box;
|
||||||
|
pub mod asym_keyx;
|
||||||
|
pub mod asym_signed;
|
||||||
|
|
||||||
|
pub mod keys_kyber;
|
||||||
|
pub mod keys_x25519;
|
||||||
|
|
||||||
|
pub mod sym_box;
|
||||||
|
pub mod sym_stream;
|
||||||
|
|
||||||
|
pub mod chunk_id;
|
||||||
|
|
||||||
|
pub mod zstd_box;
|
||||||
|
|
||||||
|
pub mod byte_layer;
|
||||||
|
|
||||||
|
pub mod key_derivation;
|
142
yama_midlevel_crypto/src/sym_box.rs
Normal file
142
yama_midlevel_crypto/src/sym_box.rs
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
use crate::asym_keyx::Exchanged;
|
||||||
|
use crate::byte_layer::ByteLayer;
|
||||||
|
use crate::key_derivation::DerivedKey;
|
||||||
|
use chacha20::cipher::{KeyIvInit, StreamCipher};
|
||||||
|
use chacha20::XChaCha20;
|
||||||
|
use poly1305::universal_hash::KeyInit;
|
||||||
|
use poly1305::Poly1305;
|
||||||
|
use rand::Rng;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
pub const SYMBOX_NONCE_LENGTH: usize = 24;
|
||||||
|
pub const SYMBOX_MAC_LENGTH: usize = 16;
|
||||||
|
pub const SYMBOX_FOOTER_LENGTH: usize = SYMBOX_MAC_LENGTH + SYMBOX_NONCE_LENGTH;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct SymBox<'bytes, T> {
|
||||||
|
// payload || nonce || mac
|
||||||
|
bytes: Cow<'bytes, [u8]>,
|
||||||
|
|
||||||
|
#[serde(skip)]
|
||||||
|
phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SymKey {
|
||||||
|
xchacha20: [u8; 32],
|
||||||
|
poly1305: poly1305::Key,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SymKey {
|
||||||
|
fn from_64_bytes(input: &[u8]) -> Self {
|
||||||
|
let (xchacha20_bytes, poly1305_bytes) = input.split_at(32);
|
||||||
|
|
||||||
|
SymKey {
|
||||||
|
xchacha20: xchacha20_bytes.try_into().unwrap(),
|
||||||
|
poly1305: *poly1305::Key::from_slice(poly1305_bytes),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Exchanged> for SymKey {
|
||||||
|
fn from(exchanged: Exchanged) -> Self {
|
||||||
|
SymKey::from_64_bytes(&exchanged.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<DerivedKey> for SymKey {
|
||||||
|
fn from(derived: DerivedKey) -> Self {
|
||||||
|
SymKey::from_64_bytes(&derived.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'bytes, T> SymBox<'bytes, T> {
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.bytes.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_from_raw(bytes: &'bytes [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes: Cow::Borrowed(bytes),
|
||||||
|
phantom: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'bytes, T> ByteLayer for SymBox<'bytes, T> {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes: Cow::Owned(bytes),
|
||||||
|
phantom: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_byte_vec(self) -> Vec<u8> {
|
||||||
|
self.bytes.into_owned()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> SymBox<'static, T> {
|
||||||
|
pub fn into_vec(self) -> Vec<u8> {
|
||||||
|
match self.bytes {
|
||||||
|
Cow::Borrowed(b) => b.to_vec(),
|
||||||
|
Cow::Owned(o) => o,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'bytes, T: ByteLayer> SymBox<'bytes, T> {
|
||||||
|
// TODO error
|
||||||
|
pub fn unlock(&self, symkey: &SymKey) -> Option<T> {
|
||||||
|
let blen = self.bytes.len();
|
||||||
|
|
||||||
|
if blen < SYMBOX_FOOTER_LENGTH {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (ciphertext_then_nonce, mac) = self.bytes.split_at(blen - SYMBOX_MAC_LENGTH);
|
||||||
|
let (ciphertext, nonce) = ciphertext_then_nonce.split_at(blen - SYMBOX_FOOTER_LENGTH);
|
||||||
|
|
||||||
|
let poly1305_mac = Poly1305::new(&symkey.poly1305).compute_unpadded(&ciphertext_then_nonce);
|
||||||
|
if poly1305_mac.as_slice() != mac {
|
||||||
|
// TODO Should this pedantically be a constant-time equality check?
|
||||||
|
// I don't think it matters in any exploitable way for Yama though...
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut out_buf = Vec::<u8>::with_capacity(ciphertext.len());
|
||||||
|
out_buf.extend_from_slice(&ciphertext);
|
||||||
|
let mut xchacha20: XChaCha20 = XChaCha20::new(&symkey.xchacha20.into(), nonce.into());
|
||||||
|
xchacha20.apply_keystream(&mut out_buf);
|
||||||
|
let decrypted: Vec<u8> = out_buf;
|
||||||
|
|
||||||
|
Some(T::from_byte_vec(decrypted))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'bytes, T: ByteLayer> SymBox<'bytes, T> {
|
||||||
|
// TODO error
|
||||||
|
pub fn new(contents: T, symkey: &SymKey) -> Option<Self> {
|
||||||
|
let unencrypted = contents.into_byte_vec();
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let nonce = rng.gen::<[u8; SYMBOX_NONCE_LENGTH]>();
|
||||||
|
|
||||||
|
let mut out_buf = Vec::<u8>::with_capacity(unencrypted.len() + SYMBOX_FOOTER_LENGTH);
|
||||||
|
out_buf.extend_from_slice(&unencrypted);
|
||||||
|
let mut xchacha20: XChaCha20 = XChaCha20::new(&symkey.xchacha20.into(), &nonce.into());
|
||||||
|
xchacha20.apply_keystream(&mut out_buf);
|
||||||
|
|
||||||
|
out_buf.extend_from_slice(&nonce);
|
||||||
|
|
||||||
|
let poly1305_mac = Poly1305::new(&symkey.poly1305).compute_unpadded(&out_buf);
|
||||||
|
out_buf.extend_from_slice(poly1305_mac.as_slice());
|
||||||
|
|
||||||
|
Some(SymBox {
|
||||||
|
bytes: Cow::Owned(out_buf),
|
||||||
|
phantom: Default::default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
81
yama_midlevel_crypto/src/sym_stream.rs
Normal file
81
yama_midlevel_crypto/src/sym_stream.rs
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
use chacha20::cipher::{KeyIvInit, StreamCipher, StreamCipherSeek};
|
||||||
|
use chacha20::XChaCha20;
|
||||||
|
use rand::{CryptoRng, Rng, RngCore};
|
||||||
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
|
use std::fmt::{Debug, Formatter};
|
||||||
|
|
||||||
|
/// A symmetric key needed to encrypt or decrypt a symmetric stream.
|
||||||
|
pub struct SymStreamKey {
|
||||||
|
/// 256-bit key
|
||||||
|
key: [u8; 32],
|
||||||
|
|
||||||
|
/// 192-bit nonce for XChaCha20
|
||||||
|
nonce: [u8; 24],
|
||||||
|
|
||||||
|
/// Cipher from chacha20 crate.
|
||||||
|
cipher: XChaCha20,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for SymStreamKey {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
SymStreamKey {
|
||||||
|
key: self.key,
|
||||||
|
nonce: self.nonce,
|
||||||
|
cipher: XChaCha20::new(&self.key.into(), &self.nonce.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct SymKeySerialisable {
|
||||||
|
key: [u8; 32],
|
||||||
|
nonce: [u8; 24],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for SymStreamKey {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: Serializer,
|
||||||
|
{
|
||||||
|
SymKeySerialisable {
|
||||||
|
key: self.key,
|
||||||
|
nonce: self.nonce,
|
||||||
|
}
|
||||||
|
.serialize(serializer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for SymStreamKey {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
let SymKeySerialisable { key, nonce } = SymKeySerialisable::deserialize(deserializer)?;
|
||||||
|
Ok(SymStreamKey {
|
||||||
|
key,
|
||||||
|
nonce,
|
||||||
|
cipher: XChaCha20::new(&key.into(), &nonce.into()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for SymStreamKey {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "SymKey(...)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SymStreamKey {
|
||||||
|
pub fn new_random<R: RngCore + CryptoRng>(rng: &mut R) -> Self {
|
||||||
|
let key: [u8; 32] = rng.gen();
|
||||||
|
let nonce: [u8; 24] = rng.gen();
|
||||||
|
let cipher = XChaCha20::new(&key.into(), &nonce.into());
|
||||||
|
Self { key, nonce, cipher }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn apply_xor(&mut self, offset: u64, content: &mut [u8]) {
|
||||||
|
self.cipher.seek(offset);
|
||||||
|
self.cipher.apply_keystream(content);
|
||||||
|
}
|
||||||
|
}
|
40
yama_midlevel_crypto/src/zstd_box.rs
Normal file
40
yama_midlevel_crypto/src/zstd_box.rs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
use crate::byte_layer::ByteLayer;
|
||||||
|
use eyre::Context;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
pub struct Zstd<T> {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
marker: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ByteLayer for Zstd<T> {
|
||||||
|
fn from_byte_vec(bytes: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
bytes,
|
||||||
|
marker: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_byte_vec(self) -> Vec<u8> {
|
||||||
|
self.bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const OUR_DEFAULT_COMPRESSION_LEVEL: i32 = 10;
|
||||||
|
|
||||||
|
impl<T: ByteLayer> Zstd<T> {
|
||||||
|
pub fn compress(src: T) -> Self {
|
||||||
|
let bytes = zstd::encode_all(&src.into_byte_vec()[..], OUR_DEFAULT_COMPRESSION_LEVEL)
|
||||||
|
.expect("zstd shouldn't fail to compress");
|
||||||
|
Self {
|
||||||
|
bytes,
|
||||||
|
marker: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decompress(self) -> eyre::Result<T> {
|
||||||
|
let decompressed_bytes =
|
||||||
|
zstd::decode_all(&self.bytes[..]).context("zstd decompression failure")?;
|
||||||
|
Ok(T::from_byte_vec(decompressed_bytes))
|
||||||
|
}
|
||||||
|
}
|
25
yama_pile/Cargo.toml
Normal file
25
yama_pile/Cargo.toml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
[package]
|
||||||
|
name = "yama_pile"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
yama_midlevel_crypto = { path = "../yama_midlevel_crypto" }
|
||||||
|
yama_wormfile = { path = "../yama_wormfile" }
|
||||||
|
tracing = "0.1.37"
|
||||||
|
eyre = "0.6.8"
|
||||||
|
sha2 = "0.10.6"
|
||||||
|
rand = "0.8.5"
|
||||||
|
patricia_tree = "0.5.7"
|
||||||
|
|
||||||
|
hex = "0.4.3"
|
||||||
|
tokio = { version = "1.27.0", features = ["io-util", "macros"] }
|
||||||
|
serde = { version = "1.0.159", features = ["derive", "rc"] }
|
||||||
|
chrono = { version = "0.4.24", features = ["serde"] }
|
||||||
|
|
||||||
|
uuid = { version = "1.3.0", features = ["fast-rng", "v4"] }
|
||||||
|
unix_mode = "0.1.3"
|
||||||
|
|
||||||
|
backtrace = "0.3.67"
|
211
yama_pile/src/bloblogs.rs
Normal file
211
yama_pile/src/bloblogs.rs
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
use crate::definitions::{BlobLocator, BloblogFooter, BloblogId, PackedBloblogFooter};
|
||||||
|
use crate::keyring::{Keyring, WriterKey};
|
||||||
|
use crate::locks::{LockHandle, LockKind};
|
||||||
|
use crate::utils::{HashedWormWriter, SymStreamReader, SymStreamWriter};
|
||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use rand::thread_rng;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
|
||||||
|
use tracing::warn;
|
||||||
|
use yama_midlevel_crypto::asym_box::AsymBox;
|
||||||
|
use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde};
|
||||||
|
use yama_midlevel_crypto::chunk_id::ChunkId;
|
||||||
|
use yama_midlevel_crypto::sym_stream::SymStreamKey;
|
||||||
|
use yama_midlevel_crypto::zstd_box::Zstd;
|
||||||
|
use yama_wormfile::paths::WormPathBuf;
|
||||||
|
use yama_wormfile::{WormFileReader, WormFileWriter};
|
||||||
|
|
||||||
|
/// The recommended bloblog size is 2 GiB.
|
||||||
|
/// This is below the 4 GiB file size limit on FAT32.
|
||||||
|
pub const RECOMMENDED_BLOBLOG_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
||||||
|
|
||||||
|
/// A writer for a bloblog.
|
||||||
|
pub struct BloblogWriter<W: WormFileWriter + Unpin> {
|
||||||
|
writer: SymStreamWriter<HashedWormWriter<W>>,
|
||||||
|
footer_write_key: WriterKey,
|
||||||
|
footer: BloblogFooter,
|
||||||
|
lock: Arc<LockHandle>,
|
||||||
|
written: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<WFW: WormFileWriter + Unpin> BloblogWriter<WFW> {
|
||||||
|
/// Creates a bloblog writer.
|
||||||
|
pub async fn new(
|
||||||
|
worm_writer: WFW,
|
||||||
|
keyring: &Keyring,
|
||||||
|
lock: Arc<LockHandle>,
|
||||||
|
) -> eyre::Result<Self> {
|
||||||
|
let hashed_writer = HashedWormWriter::new(worm_writer);
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
let secret_content_stream_key = SymStreamKey::new_random(&mut rng);
|
||||||
|
|
||||||
|
let content_stream_key = keyring
|
||||||
|
.w_bloblog_contents
|
||||||
|
.as_ref()
|
||||||
|
.context("No `w_bloblog_contents` key on keyring")?
|
||||||
|
.make_locked_asymbox(CborSerde::serialise(&secret_content_stream_key)?);
|
||||||
|
|
||||||
|
let footer_write_key = keyring
|
||||||
|
.w_bloblog_footer
|
||||||
|
.clone()
|
||||||
|
.context("No `w_bloblog_footer` key on keyring")?;
|
||||||
|
|
||||||
|
let writer = SymStreamWriter::new(hashed_writer, secret_content_stream_key);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
writer,
|
||||||
|
footer_write_key,
|
||||||
|
footer: BloblogFooter {
|
||||||
|
content_stream_key,
|
||||||
|
chunks: Default::default(),
|
||||||
|
},
|
||||||
|
lock,
|
||||||
|
written: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a chunk to the bloblog.
|
||||||
|
pub async fn write_chunk(&mut self, chunk_id: ChunkId, chunk: &[u8]) -> eyre::Result<()> {
|
||||||
|
let locator = BlobLocator {
|
||||||
|
offset: self.writer.offset(),
|
||||||
|
length: chunk.len() as u64,
|
||||||
|
};
|
||||||
|
self.writer.write_all(&chunk).await?;
|
||||||
|
if self.footer.chunks.insert(chunk_id, locator).is_some() {
|
||||||
|
warn!("Duplicate chunk ID inserted into bloblog: {:?}", chunk_id);
|
||||||
|
}
|
||||||
|
self.written += chunk.len() as u64;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true iff this bloblog writer should be finished to be close to the recommended
|
||||||
|
/// maximum size.
|
||||||
|
pub fn should_finish(&self) -> bool {
|
||||||
|
self.written >= RECOMMENDED_BLOBLOG_SIZE
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finishes this bloblog.
|
||||||
|
///
|
||||||
|
/// This:
|
||||||
|
/// - writes the header
|
||||||
|
/// - flushes
|
||||||
|
/// - finishes computing the hash of the file
|
||||||
|
/// - moves the bloblog to the correct place
|
||||||
|
pub async fn finish(
|
||||||
|
mut self,
|
||||||
|
) -> eyre::Result<(WormPathBuf, BloblogId, BTreeMap<ChunkId, BlobLocator>)> {
|
||||||
|
self.writer.flush().await?;
|
||||||
|
let mut hashed_writer = self.writer.finish();
|
||||||
|
|
||||||
|
// Write the footer, then the length of the footer.
|
||||||
|
let packed_footer: PackedBloblogFooter = self
|
||||||
|
.footer_write_key
|
||||||
|
.make_locked_asymbox(Zstd::compress(CborSerde::serialise(&self.footer)?));
|
||||||
|
let footer_encoded = packed_footer.into_byte_vec();
|
||||||
|
hashed_writer.write_all(&footer_encoded).await?;
|
||||||
|
hashed_writer.write_u32(footer_encoded.len() as u32).await?;
|
||||||
|
|
||||||
|
hashed_writer.flush().await?;
|
||||||
|
let (mut worm_writer, file_hash) = hashed_writer.finalise();
|
||||||
|
|
||||||
|
let target_path = WormPathBuf::new(format!(
|
||||||
|
"bloblogs/{}/{}",
|
||||||
|
hex::encode(&file_hash.0[0..1]),
|
||||||
|
file_hash.to_string()
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
if !self.lock.is_active_now(LockKind::Shared) {
|
||||||
|
bail!(
|
||||||
|
"Can't complete finish() on bloblog {:?} because lock expired",
|
||||||
|
target_path
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
worm_writer.finalise(target_path.as_ref(), false).await?;
|
||||||
|
|
||||||
|
Ok((target_path, BloblogId(file_hash), self.footer.chunks))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BloblogReader<R: WormFileReader> {
|
||||||
|
reader: SymStreamReader<R>,
|
||||||
|
footer: BloblogFooter,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: WormFileReader + Unpin> BloblogReader<R> {
|
||||||
|
/// Read the bloblog footer from the file only.
|
||||||
|
/// This only requires the `r_bloblog_footer` key.
|
||||||
|
pub async fn read_footer_only(
|
||||||
|
worm_reader: &mut R,
|
||||||
|
keyring: &Keyring,
|
||||||
|
) -> eyre::Result<BloblogFooter> {
|
||||||
|
let r_bloblog_footer = keyring
|
||||||
|
.r_bloblog_footer
|
||||||
|
.as_ref()
|
||||||
|
.context("No key `r_bloblog_footer` on keyring")?;
|
||||||
|
worm_reader.seek(SeekFrom::End(-4)).await?;
|
||||||
|
let footer_length = worm_reader.read_u32().await?;
|
||||||
|
|
||||||
|
worm_reader
|
||||||
|
.seek(SeekFrom::End(-4 - footer_length as i64))
|
||||||
|
.await?;
|
||||||
|
let mut footer_enc = vec![0u8; footer_length as usize];
|
||||||
|
worm_reader.read_exact(&mut footer_enc).await?;
|
||||||
|
|
||||||
|
let footer_wrapped: PackedBloblogFooter = AsymBox::from_byte_vec(footer_enc);
|
||||||
|
let footer: BloblogFooter = r_bloblog_footer
|
||||||
|
.unlock_asymbox(footer_wrapped)
|
||||||
|
.context("failed to decrypt bloblog footer")?
|
||||||
|
.decompress()?
|
||||||
|
.deserialise()?;
|
||||||
|
Ok(footer)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn new(mut worm_reader: R, keyring: &Keyring) -> eyre::Result<Self> {
|
||||||
|
let r_bloblog_contents = keyring
|
||||||
|
.r_bloblog_contents
|
||||||
|
.clone()
|
||||||
|
.context("No key `r_bloblog_contents` on keyring")?;
|
||||||
|
|
||||||
|
let footer = Self::read_footer_only(&mut worm_reader, keyring)
|
||||||
|
.await
|
||||||
|
.context("failed to read footer")?;
|
||||||
|
|
||||||
|
let stream_key = r_bloblog_contents
|
||||||
|
.unlock_asymbox(footer.content_stream_key.clone())
|
||||||
|
.context("failed to decrypt stream key")?
|
||||||
|
.deserialise()?;
|
||||||
|
|
||||||
|
let reader = SymStreamReader::new(worm_reader, stream_key);
|
||||||
|
|
||||||
|
Ok(Self { reader, footer })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_to_buf(
|
||||||
|
&mut self,
|
||||||
|
buf: &mut Vec<u8>,
|
||||||
|
offset: u64,
|
||||||
|
read_length: u64,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
// eprintln!("RTB @ {offset} r{read_length}");
|
||||||
|
self.reader.seek(SeekFrom::Start(offset)).await?;
|
||||||
|
buf.resize(read_length as usize, 0);
|
||||||
|
self.reader.read_exact(buf).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_chunk(&mut self, chunk_id: ChunkId) -> eyre::Result<Option<Vec<u8>>> {
|
||||||
|
match self.footer.chunks.get(&chunk_id) {
|
||||||
|
Some(chunk_locator) => {
|
||||||
|
let mut buf = Vec::with_capacity(chunk_locator.length as usize);
|
||||||
|
self.read_to_buf(&mut buf, chunk_locator.offset, chunk_locator.length)
|
||||||
|
.await?;
|
||||||
|
Ok(Some(buf))
|
||||||
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
146
yama_pile/src/definitions.rs
Normal file
146
yama_pile/src/definitions.rs
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
use crate::keyring::Keyring;
|
||||||
|
use crate::utils::Sha256;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
use std::fmt::{Debug, Formatter};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use yama_midlevel_crypto::asym_box::AsymBox;
|
||||||
|
use yama_midlevel_crypto::byte_layer::CborSerde;
|
||||||
|
use yama_midlevel_crypto::chunk_id::{ChunkId, ChunkIdKey};
|
||||||
|
use yama_midlevel_crypto::key_derivation::KeyDerivationParameters;
|
||||||
|
use yama_midlevel_crypto::sym_box::SymBox;
|
||||||
|
use yama_midlevel_crypto::sym_stream::SymStreamKey;
|
||||||
|
use yama_midlevel_crypto::zstd_box::Zstd;
|
||||||
|
|
||||||
|
/// The footer at the end of a bloblog.
|
||||||
|
/// This footer should be encrypted and signed.
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct BloblogFooter {
|
||||||
|
/// The key needed to read the content stream.
|
||||||
|
pub content_stream_key: AsymBox<CborSerde<SymStreamKey>>,
|
||||||
|
|
||||||
|
/// IDs of chunks and whereabouts they are in the bloblog.
|
||||||
|
pub chunks: BTreeMap<ChunkId, BlobLocator>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PackedBloblogFooter = AsymBox<Zstd<CborSerde<BloblogFooter>>>;
|
||||||
|
|
||||||
|
/// Locator for a blob within a bloblog.
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct BlobLocator {
|
||||||
|
pub offset: u64,
|
||||||
|
pub length: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct BloblogId(pub Sha256);
|
||||||
|
|
||||||
|
impl ToString for BloblogId {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
self.0.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for BloblogId {
|
||||||
|
type Error = eyre::Error;
|
||||||
|
|
||||||
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
|
Sha256::try_from(value).map(BloblogId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for BloblogId {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "BloblogId({})", &self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct IndexId(pub Sha256);
|
||||||
|
|
||||||
|
impl ToString for IndexId {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
self.0.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&str> for IndexId {
|
||||||
|
type Error = eyre::Error;
|
||||||
|
|
||||||
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
|
Sha256::try_from(value).map(IndexId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for IndexId {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "IndexId({})", &self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An index.
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct Index {
|
||||||
|
/// What indices this index supersedes.
|
||||||
|
/// In other words: applying this index means you can ignore the stated indices below.
|
||||||
|
pub supersedes: BTreeSet<IndexId>,
|
||||||
|
|
||||||
|
/// Index
|
||||||
|
pub bloblogs: BTreeMap<BloblogId, IndexBloblogEntry>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One bloblog's entry within an index. Must contain an exhaustive list of chunks for that bloblog.
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct IndexBloblogEntry {
|
||||||
|
/// Chunk locators
|
||||||
|
pub chunks: BTreeMap<ChunkId, BlobLocator>,
|
||||||
|
|
||||||
|
/// How much space, in bytes, has been deallocated / forgotten about
|
||||||
|
/// (the cumulative size of chunks that have been removed from the chunk map)
|
||||||
|
pub forgotten_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PackedIndex = AsymBox<Zstd<CborSerde<Index>>>;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||||
|
pub struct RecursiveChunkRef {
|
||||||
|
/// The root Chunk ID.
|
||||||
|
pub chunk_id: ChunkId,
|
||||||
|
/// The depth of the data bytes.
|
||||||
|
/// 0 means that the chunk addressed by `chunk_id` contains data bytes.
|
||||||
|
/// 1 means that the chunk addressed by `chunk_id` contains references to chunk that contain
|
||||||
|
/// data bytes.
|
||||||
|
/// (and so on)
|
||||||
|
pub depth: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for RecursiveChunkRef {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{:?}<{}>", &self.chunk_id, self.depth)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(tag = "_kind")]
|
||||||
|
pub enum UnlockedOrLockedKeyring {
|
||||||
|
Locked {
|
||||||
|
deriver: KeyDerivationParameters,
|
||||||
|
lockbox: SymBox<'static, CborSerde<Keyring>>,
|
||||||
|
},
|
||||||
|
Unlocked(Keyring),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PackedKeyring = CborSerde<UnlockedOrLockedKeyring>;
|
||||||
|
|
||||||
|
pub const SUPPORTED_YAMA_PILE_VERSION: &'static str = "yama v0.7.0 pile format";
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct PileConfig {
|
||||||
|
pub yama_pile_version: String,
|
||||||
|
pub chunk_id_key: ChunkIdKey,
|
||||||
|
pub zstd_dict: Option<Arc<Vec<u8>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PackedPileConfig = AsymBox<CborSerde<PileConfig>>;
|
53
yama_pile/src/keyring.rs
Normal file
53
yama_pile/src/keyring.rs
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use yama_midlevel_crypto::asym_box::AsymBox;
|
||||||
|
use yama_midlevel_crypto::asym_keyx::{generate_asym_keypair, DecryptingKey, EncryptingKey};
|
||||||
|
use yama_midlevel_crypto::asym_signed::{asym_signing_keypair, SigningKey, VerifyingKey};
|
||||||
|
use yama_midlevel_crypto::byte_layer::ByteLayer;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Keyring {
|
||||||
|
pub r_config: Option<ReaderKey>,
|
||||||
|
pub w_config: Option<WriterKey>,
|
||||||
|
|
||||||
|
pub r_bloblog_footer: Option<ReaderKey>,
|
||||||
|
pub w_bloblog_footer: Option<WriterKey>,
|
||||||
|
|
||||||
|
pub r_bloblog_contents: Option<ReaderKey>,
|
||||||
|
pub w_bloblog_contents: Option<WriterKey>,
|
||||||
|
|
||||||
|
pub r_locks: Option<ReaderKey>,
|
||||||
|
pub w_locks: Option<WriterKey>,
|
||||||
|
|
||||||
|
pub r_pointer: Option<ReaderKey>,
|
||||||
|
pub w_pointer: Option<WriterKey>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_r_w_keys() -> (ReaderKey, WriterKey) {
|
||||||
|
let (encrypt, decrypt) = generate_asym_keypair();
|
||||||
|
let (sign, verify) = asym_signing_keypair();
|
||||||
|
(ReaderKey { decrypt, verify }, WriterKey { encrypt, sign })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct WriterKey {
|
||||||
|
encrypt: EncryptingKey,
|
||||||
|
sign: SigningKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WriterKey {
|
||||||
|
pub fn make_locked_asymbox<T: ByteLayer>(&self, contents: T) -> AsymBox<T> {
|
||||||
|
AsymBox::new(contents, &self.sign, &self.encrypt).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ReaderKey {
|
||||||
|
decrypt: DecryptingKey,
|
||||||
|
verify: VerifyingKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReaderKey {
|
||||||
|
pub fn unlock_asymbox<T: ByteLayer>(&self, asymbox: AsymBox<T>) -> Option<T> {
|
||||||
|
asymbox.unlock(&self.decrypt, &self.verify)
|
||||||
|
}
|
||||||
|
}
|
249
yama_pile/src/lib.rs
Normal file
249
yama_pile/src/lib.rs
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
use crate::bloblogs::{BloblogReader, BloblogWriter};
|
||||||
|
use crate::definitions::{
|
||||||
|
BloblogId, Index, IndexId, PackedIndex, PackedPileConfig, PileConfig,
|
||||||
|
SUPPORTED_YAMA_PILE_VERSION,
|
||||||
|
};
|
||||||
|
use crate::keyring::Keyring;
|
||||||
|
use crate::locks::{LockHandle, LockKind};
|
||||||
|
use crate::pointers::{PackedPointer, Pointer};
|
||||||
|
use crate::utils::HashedWormWriter;
|
||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
|
use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde};
|
||||||
|
use yama_midlevel_crypto::zstd_box::Zstd;
|
||||||
|
use yama_wormfile::paths::{WormPath, WormPathBuf};
|
||||||
|
use yama_wormfile::{WormFileProvider, WormFileWriter};
|
||||||
|
|
||||||
|
pub mod definitions;
|
||||||
|
|
||||||
|
pub mod bloblogs;
|
||||||
|
pub mod keyring;
|
||||||
|
pub mod locks;
|
||||||
|
pub mod pointers;
|
||||||
|
pub mod tree;
|
||||||
|
pub mod utils;
|
||||||
|
|
||||||
|
pub const DIR_LOCKS: &'static str = "locks";
|
||||||
|
pub const DIR_BLOBLOGS: &'static str = "bloblogs";
|
||||||
|
pub const DIR_INDICES: &'static str = "indices";
|
||||||
|
pub const FILE_YAMA_CONFIG: &'static str = "yama.cfg";
|
||||||
|
pub const FILE_YAMA_CONNECTOR: &'static str = "yama.toml";
|
||||||
|
pub const FILE_MASTER_KEYRING: &'static str = "master.yamakeyring";
|
||||||
|
|
||||||
|
pub struct Pile<WFP: WormFileProvider> {
|
||||||
|
provider: Arc<WFP>,
|
||||||
|
lock: Arc<LockHandle>,
|
||||||
|
keyring: Arc<Keyring>,
|
||||||
|
pub pile_config: Arc<PileConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<WFP: WormFileProvider + 'static> Pile<WFP> {
|
||||||
|
pub async fn open_manual(
|
||||||
|
provider: Arc<WFP>,
|
||||||
|
lock_kind: LockKind,
|
||||||
|
holder: String,
|
||||||
|
keyring: Keyring,
|
||||||
|
) -> eyre::Result<Self> {
|
||||||
|
let lock = Arc::new(LockHandle::new(provider.clone(), lock_kind, holder, &keyring).await?);
|
||||||
|
let keyring = Arc::new(keyring);
|
||||||
|
|
||||||
|
let r_config = keyring.r_config.as_ref().context("No r_config key")?;
|
||||||
|
|
||||||
|
let pile_config: PileConfig = {
|
||||||
|
let mut file = provider.read(WormPath::new("yama.cfg").unwrap()).await?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
file.read_to_end(&mut buf).await?;
|
||||||
|
let packed = PackedPileConfig::from_byte_vec(buf);
|
||||||
|
r_config
|
||||||
|
.unlock_asymbox(packed)
|
||||||
|
.context("Failed to decrypt pile config")?
|
||||||
|
.deserialise()
|
||||||
|
.context("Failed to deserialise pile config")?
|
||||||
|
};
|
||||||
|
|
||||||
|
if &pile_config.yama_pile_version != SUPPORTED_YAMA_PILE_VERSION {
|
||||||
|
bail!(
|
||||||
|
"Unsupported pile version: {:?} (expected {:?})",
|
||||||
|
pile_config.yama_pile_version,
|
||||||
|
SUPPORTED_YAMA_PILE_VERSION
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Pile {
|
||||||
|
provider,
|
||||||
|
lock,
|
||||||
|
keyring,
|
||||||
|
pile_config: Arc::new(pile_config),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new bloblog writer.
|
||||||
|
///
|
||||||
|
/// Requires key: w_bloblog_footer, w_bloblog_contents
|
||||||
|
pub async fn create_bloblog(&self) -> eyre::Result<BloblogWriter<WFP::WormFileWriter>> {
|
||||||
|
if !self.lock.is_active_now(LockKind::Shared) {
|
||||||
|
bail!("can't create bloblog: lock not active");
|
||||||
|
}
|
||||||
|
let writer = BloblogWriter::new(
|
||||||
|
self.provider.write().await?,
|
||||||
|
&self.keyring,
|
||||||
|
self.lock.clone(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(writer)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_bloblog(
|
||||||
|
&self,
|
||||||
|
bloblog_id: BloblogId,
|
||||||
|
) -> eyre::Result<BloblogReader<WFP::WormFileReader>> {
|
||||||
|
let bloblog_path = WormPathBuf::new(format!(
|
||||||
|
"bloblogs/{}/{}",
|
||||||
|
hex::encode(&bloblog_id.0 .0[0..1]),
|
||||||
|
bloblog_id.0.to_string()
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
let worm_reader = self.provider.read(&bloblog_path).await?;
|
||||||
|
Ok(BloblogReader::new(worm_reader, &self.keyring).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new index, returning the index ID.
|
||||||
|
///
|
||||||
|
/// Requires key: w_bloblog_footer
|
||||||
|
pub async fn create_index(&self, index: &Index) -> eyre::Result<IndexId> {
|
||||||
|
let worm_writer = self.provider.write().await?;
|
||||||
|
let mut writer = HashedWormWriter::new(worm_writer);
|
||||||
|
let packed_index: PackedIndex = self
|
||||||
|
.keyring
|
||||||
|
.w_bloblog_footer
|
||||||
|
.as_ref()
|
||||||
|
.context("No w_bloblog_footer key")?
|
||||||
|
.make_locked_asymbox(Zstd::compress(CborSerde::serialise(index)?));
|
||||||
|
writer.write_all(&packed_index.into_byte_vec()).await?;
|
||||||
|
let (mut worm_writer, hash) = writer.finalise();
|
||||||
|
|
||||||
|
let target = WormPathBuf::new(format!("indices/{}", hash)).unwrap();
|
||||||
|
worm_writer.flush().await?;
|
||||||
|
worm_writer.finalise(target.as_ref(), false).await?;
|
||||||
|
|
||||||
|
Ok(IndexId(hash))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// List all indices present in the pile.
|
||||||
|
pub async fn list_indices(&self) -> eyre::Result<BTreeSet<IndexId>> {
|
||||||
|
let files = self
|
||||||
|
.provider
|
||||||
|
.list(WormPath::new("indices").unwrap())
|
||||||
|
.await
|
||||||
|
.context("failed to list indices")?;
|
||||||
|
let mut result = BTreeSet::new();
|
||||||
|
for file in files {
|
||||||
|
let (_, filename) = file
|
||||||
|
.as_ref()
|
||||||
|
.as_str()
|
||||||
|
.rsplit_once('/')
|
||||||
|
.context("index listing entry should split at /")?;
|
||||||
|
let index_id = IndexId::try_from(filename)
|
||||||
|
.with_context(|| format!("not a valid index ID: {filename:?}"))?;
|
||||||
|
result.insert(index_id);
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read an index from the pile.
|
||||||
|
///
|
||||||
|
/// Requires key: r_bloblog_footer
|
||||||
|
pub async fn read_index(&self, index_id: IndexId) -> eyre::Result<Index> {
|
||||||
|
let r_bloblog_footer = self
|
||||||
|
.keyring
|
||||||
|
.r_bloblog_footer
|
||||||
|
.as_ref()
|
||||||
|
.context("No r_bloblog_footer key")?;
|
||||||
|
let target = WormPathBuf::new(format!("indices/{}", index_id.0)).unwrap();
|
||||||
|
let mut reader = self.provider.read(target.as_ref()).await?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
reader.read_to_end(&mut buf).await?;
|
||||||
|
let packed_index = PackedIndex::from_byte_vec(buf);
|
||||||
|
let index = r_bloblog_footer
|
||||||
|
.unlock_asymbox(packed_index)
|
||||||
|
.context("can't unlock packed index")?
|
||||||
|
.decompress()?
|
||||||
|
.deserialise()?;
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_pointer(&self, name: &str) -> eyre::Result<Option<Pointer>> {
|
||||||
|
let r_pointer = self
|
||||||
|
.keyring
|
||||||
|
.r_pointer
|
||||||
|
.as_ref()
|
||||||
|
.context("No r_pointer key")?;
|
||||||
|
|
||||||
|
let pointer_path = WormPath::new("pointers")
|
||||||
|
.unwrap()
|
||||||
|
.join(name)
|
||||||
|
.with_context(|| format!("bad pointer name {name:?}"))?;
|
||||||
|
|
||||||
|
if !self.provider.is_regular_file(pointer_path.as_ref()).await? {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut reader = self.provider.read(pointer_path.as_ref()).await?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
reader.read_to_end(&mut buf).await?;
|
||||||
|
|
||||||
|
let packed_pointer = PackedPointer::from_byte_vec(buf);
|
||||||
|
|
||||||
|
Ok(Some(
|
||||||
|
r_pointer
|
||||||
|
.unlock_asymbox(packed_pointer)
|
||||||
|
.context("failed to decrypt packed pointer")?
|
||||||
|
.decompress()
|
||||||
|
.context("failed to decompress packed pointer")?
|
||||||
|
.deserialise()
|
||||||
|
.context("failed to deserialise packed pointer")?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn write_pointer(
|
||||||
|
&self,
|
||||||
|
name: &str,
|
||||||
|
replace: bool,
|
||||||
|
data: &Pointer,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
let w_pointer = self
|
||||||
|
.keyring
|
||||||
|
.w_pointer
|
||||||
|
.as_ref()
|
||||||
|
.context("No w_pointer key")?;
|
||||||
|
|
||||||
|
let packed_pointer: PackedPointer = w_pointer.make_locked_asymbox(Zstd::compress(
|
||||||
|
CborSerde::serialise(data).context("can't serialise pointer")?,
|
||||||
|
));
|
||||||
|
|
||||||
|
let pointer_path = WormPath::new("pointers")
|
||||||
|
.unwrap()
|
||||||
|
.join(name)
|
||||||
|
.with_context(|| format!("bad pointer name {name:?}"))?;
|
||||||
|
|
||||||
|
let mut writer = self.provider.write().await?;
|
||||||
|
writer.write_all(&packed_pointer.into_byte_vec()).await?;
|
||||||
|
writer.finalise(pointer_path.as_ref(), replace).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn close(mut self) -> eyre::Result<()> {
|
||||||
|
match Arc::try_unwrap(self.lock) {
|
||||||
|
Ok(lock) => {
|
||||||
|
lock.close().await
|
||||||
|
.context("failed to release lock gracefully")?;
|
||||||
|
}
|
||||||
|
Err(arc) => {
|
||||||
|
bail!("could not close pile gracefully; lock arc has {} strong refs and {} weak refs", Arc::strong_count(&arc), Arc::weak_count(&arc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
358
yama_pile/src/locks.rs
Normal file
358
yama_pile/src/locks.rs
Normal file
@ -0,0 +1,358 @@
|
|||||||
|
use crate::keyring::{Keyring, ReaderKey, WriterKey};
|
||||||
|
use chrono::{DateTime, Duration, Utc};
|
||||||
|
use eyre::{bail, Context, ContextCompat, eyre};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::{Arc, RwLock};
|
||||||
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
|
use tokio::sync::oneshot;
|
||||||
|
use tokio::task::JoinHandle;
|
||||||
|
use tokio::time::Instant;
|
||||||
|
use tracing::{error, info, warn};
|
||||||
|
use uuid::Uuid;
|
||||||
|
use yama_midlevel_crypto::asym_box::AsymBox;
|
||||||
|
use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde};
|
||||||
|
use yama_wormfile::paths::{WormPath, WormPathBuf};
|
||||||
|
use yama_wormfile::{WormFileProvider, WormFileWriter};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct LockDesc {
|
||||||
|
/// Expiry time of the lock. Should be renewed occasionally.
|
||||||
|
/// Should not exceed more than 10 minutes in the future.
|
||||||
|
expires_at: DateTime<Utc>,
|
||||||
|
|
||||||
|
/// Human-friendly description of who is holding the lock.
|
||||||
|
holder: String,
|
||||||
|
|
||||||
|
/// What kind of lock this is.
|
||||||
|
kind: LockKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PackedLock = AsymBox<CborSerde<LockDesc>>;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub enum LockKind {
|
||||||
|
/// Lock can coexist with other Shared and PendingExclusive locks.
|
||||||
|
/// Lock may not be created when there are non-Shared locks.
|
||||||
|
Shared,
|
||||||
|
|
||||||
|
/// Lock can coexist with Shared locks.
|
||||||
|
PendingExclusive,
|
||||||
|
|
||||||
|
/// Lock can not coexist with other locks at all.
|
||||||
|
Exclusive,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LockKind {
|
||||||
|
pub fn conflicts_with_lock(self, newer_lock: LockKind) -> bool {
|
||||||
|
match (self, newer_lock) {
|
||||||
|
(LockKind::Shared, LockKind::Shared) => false,
|
||||||
|
// We can add PendingExclusives after a Shared, but not the other way around.
|
||||||
|
(LockKind::Shared, LockKind::PendingExclusive) => false,
|
||||||
|
_ => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LockHandle {
|
||||||
|
/// The latest lock descriptor
|
||||||
|
lock: Arc<RwLock<LockDesc>>,
|
||||||
|
|
||||||
|
/// Our lock ID
|
||||||
|
lock_id: String,
|
||||||
|
|
||||||
|
/// Path to the lock
|
||||||
|
lock_path: WormPathBuf,
|
||||||
|
|
||||||
|
/// A signal for relinquishing the lock.
|
||||||
|
lock_release_tx: Option<oneshot::Sender<()>>,
|
||||||
|
|
||||||
|
/// Handle for waiting for a graceful shutdown of the lock.
|
||||||
|
lock_task_join_handle: Option<JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for LockHandle {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(lock_release_tx) = self.lock_release_tx
|
||||||
|
.take() {
|
||||||
|
lock_release_tx
|
||||||
|
.send(())
|
||||||
|
.expect("can't drop lock");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LockHandle {
|
||||||
|
pub async fn close(mut self) -> eyre::Result<()> {
|
||||||
|
self.lock_release_tx.take().unwrap().send(()).map_err(|_| eyre!("can't drop lock"))?;
|
||||||
|
self.lock_task_join_handle.take().unwrap().await
|
||||||
|
.context("lock task fail")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_active_at(&self, kind: LockKind, now: DateTime<Utc>) -> bool {
|
||||||
|
let lock = self.lock.read().unwrap();
|
||||||
|
lock.kind == kind || kind == LockKind::Shared && lock.expires_at > now
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true iff the lock is active now.
|
||||||
|
/// This actually looks 1 minute into the future to afford some divergence for clock drift.
|
||||||
|
pub fn is_active_now(&self, kind: LockKind) -> bool {
|
||||||
|
let now = Utc::now() + Duration::minutes(1);
|
||||||
|
self.is_active_at(kind, now)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn new(
|
||||||
|
provider: Arc<impl WormFileProvider + 'static>,
|
||||||
|
kind: LockKind,
|
||||||
|
holder: String,
|
||||||
|
keyring: &Keyring,
|
||||||
|
) -> eyre::Result<LockHandle> {
|
||||||
|
let duration = Duration::minutes(10);
|
||||||
|
let until = Utc::now() + duration;
|
||||||
|
Self::new_until(provider, kind, holder, keyring, until, duration).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn new_until(
|
||||||
|
provider: Arc<impl WormFileProvider + 'static>,
|
||||||
|
kind: LockKind,
|
||||||
|
holder: String,
|
||||||
|
keyring: &Keyring,
|
||||||
|
until: DateTime<Utc>,
|
||||||
|
duration: Duration,
|
||||||
|
) -> eyre::Result<LockHandle> {
|
||||||
|
let lock_desc = LockDesc {
|
||||||
|
expires_at: until,
|
||||||
|
holder,
|
||||||
|
kind,
|
||||||
|
};
|
||||||
|
let w_locks = keyring.w_locks.clone().context("No w_locks key")?;
|
||||||
|
let r_locks = keyring.r_locks.clone().context("No r_locks key")?;
|
||||||
|
|
||||||
|
'retry: loop {
|
||||||
|
let lock_id = Uuid::new_v4().to_string();
|
||||||
|
let lock_path = WormPathBuf::new(format!("locks/{lock_id}")).unwrap();
|
||||||
|
let (lock_release_tx, lock_release_rx) = oneshot::channel();
|
||||||
|
|
||||||
|
let now = Utc::now();
|
||||||
|
|
||||||
|
// Stage 1: create lock
|
||||||
|
let stage1_locks = scan_locks(provider.as_ref(), &r_locks, now).await?;
|
||||||
|
if let Some(blocker) = find_lock_blocker(&stage1_locks, &lock_id, kind) {
|
||||||
|
let lock = &stage1_locks[blocker];
|
||||||
|
warn!("{:?} lock {} held by {} currently expiring at {} is blocking our potential lock.", lock.kind, lock_id, lock.holder, lock.expires_at);
|
||||||
|
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_secs(
|
||||||
|
(lock.expires_at - now).num_seconds().max(0) as u64 + 10,
|
||||||
|
))
|
||||||
|
.await;
|
||||||
|
continue 'retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut writer = provider.write().await?;
|
||||||
|
let packed_lock: PackedLock =
|
||||||
|
w_locks.make_locked_asymbox(CborSerde::serialise(&lock_desc)?);
|
||||||
|
writer.write_all(&packed_lock.into_byte_vec()).await?;
|
||||||
|
writer.flush().await?;
|
||||||
|
writer.finalise(lock_path.as_ref(), false).await?;
|
||||||
|
|
||||||
|
// Stage 2: confirm lock
|
||||||
|
let stage2_locks = scan_locks(provider.as_ref(), &r_locks, now).await?;
|
||||||
|
if let Some(blocker) = find_lock_blocker(&stage2_locks, &lock_id, kind) {
|
||||||
|
let lock = &stage2_locks[blocker];
|
||||||
|
warn!("{:?} lock {} held by {} currently expiring at {} blocked our lock; backing out.", lock.kind, lock_id, lock.holder, lock.expires_at);
|
||||||
|
|
||||||
|
// Back out our lock.
|
||||||
|
provider.delete(lock_path.as_ref()).await?;
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_secs(10)).await;
|
||||||
|
continue 'retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Acqured {:?} lock {}", kind, lock_id);
|
||||||
|
|
||||||
|
let lock = Arc::new(RwLock::new(lock_desc));
|
||||||
|
let lock2 = lock.clone();
|
||||||
|
|
||||||
|
// Good. Now start a background task for refreshing it as necessary.
|
||||||
|
// TODO spawn this onto a joinset and then make sure we release locks at end of program...
|
||||||
|
let lock_path2 = lock_path.clone();
|
||||||
|
let lock_task_join_handle = Some(tokio::spawn(async move {
|
||||||
|
if let Err(err) = lock_renewal(
|
||||||
|
provider,
|
||||||
|
lock_path2,
|
||||||
|
lock2,
|
||||||
|
lock_release_rx,
|
||||||
|
w_locks,
|
||||||
|
duration,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
error!("Lock renewal task failed: {err:?}");
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
break Ok(LockHandle {
|
||||||
|
lock,
|
||||||
|
lock_path,
|
||||||
|
lock_id,
|
||||||
|
lock_release_tx: Some(lock_release_tx),
|
||||||
|
lock_task_join_handle
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn upgrade_pending_exclusive(
|
||||||
|
&self,
|
||||||
|
provider: Arc<impl WormFileProvider + 'static>,
|
||||||
|
r_locks: &ReaderKey,
|
||||||
|
w_locks: &WriterKey,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
'retry: loop {
|
||||||
|
if !self.is_active_now(LockKind::PendingExclusive) {
|
||||||
|
bail!("PendingExclusive not active: can't upgrade to Exclusive");
|
||||||
|
}
|
||||||
|
|
||||||
|
let now = Utc::now();
|
||||||
|
let locks = scan_locks(provider.as_ref(), r_locks, Utc::now()).await?;
|
||||||
|
if let Some((conflicting_lock_id, conflicting_lock)) =
|
||||||
|
locks.iter().find(|(lock_id, _)| lock_id != &&self.lock_id)
|
||||||
|
{
|
||||||
|
warn!("Conflicting {:?} lock {} held by {:?} expiring at {} is blocking us from upgrading to Exclusive", conflicting_lock.kind, conflicting_lock_id, conflicting_lock.holder, conflicting_lock.expires_at);
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_secs(
|
||||||
|
(conflicting_lock.expires_at - now).num_seconds().max(0) as u64 + 10,
|
||||||
|
))
|
||||||
|
.await;
|
||||||
|
continue 'retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut lock = self.lock.write().unwrap();
|
||||||
|
lock.kind = LockKind::Exclusive;
|
||||||
|
let mut writer = provider.write().await?;
|
||||||
|
let packed_lock: PackedLock =
|
||||||
|
w_locks.make_locked_asymbox(CborSerde::serialise(&*lock)?);
|
||||||
|
writer.write_all(&packed_lock.into_byte_vec()).await?;
|
||||||
|
writer.flush().await?;
|
||||||
|
writer.finalise(self.lock_path.as_ref(), true).await?;
|
||||||
|
|
||||||
|
break Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn lock_renewal(
|
||||||
|
provider: Arc<impl WormFileProvider>,
|
||||||
|
lock_path: WormPathBuf,
|
||||||
|
lock: Arc<RwLock<LockDesc>>,
|
||||||
|
mut lock_release_rx: oneshot::Receiver<()>,
|
||||||
|
w_locks: WriterKey,
|
||||||
|
duration: Duration,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
loop {
|
||||||
|
let next_renewal = {
|
||||||
|
let lock = lock.read().unwrap();
|
||||||
|
let secs_until_renewal = ((lock.expires_at - Utc::now()).num_seconds() - 60).max(0);
|
||||||
|
Instant::now() + tokio::time::Duration::from_secs(secs_until_renewal as u64)
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::select! {
|
||||||
|
_ = &mut lock_release_rx => {
|
||||||
|
provider.delete(lock_path.as_ref()).await
|
||||||
|
.context("failed to delete lock upon release")?;
|
||||||
|
break Ok(());
|
||||||
|
},
|
||||||
|
_ = tokio::time::sleep_until(next_renewal) => {
|
||||||
|
// nop.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_lock_desc: LockDesc = {
|
||||||
|
let lock = lock.read().unwrap();
|
||||||
|
lock.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let now = Utc::now();
|
||||||
|
if now > old_lock_desc.expires_at {
|
||||||
|
error!("Missed lock renewal; {now} > {}", old_lock_desc.expires_at);
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_lock_desc = LockDesc {
|
||||||
|
expires_at: now + duration,
|
||||||
|
..old_lock_desc.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut writer = provider
|
||||||
|
.write()
|
||||||
|
.await
|
||||||
|
.context("failed to acquire writing handle for write of packed lock")?;
|
||||||
|
let packed_lock: PackedLock = w_locks.make_locked_asymbox(
|
||||||
|
CborSerde::serialise(&new_lock_desc)
|
||||||
|
.context("failed to serialise new lock description")?,
|
||||||
|
);
|
||||||
|
writer
|
||||||
|
.write_all(&packed_lock.into_byte_vec())
|
||||||
|
.await
|
||||||
|
.context("failed to write packed lock")?;
|
||||||
|
writer
|
||||||
|
.flush()
|
||||||
|
.await
|
||||||
|
.context("failed to flush write of packed lock")?;
|
||||||
|
writer
|
||||||
|
.finalise(lock_path.as_ref(), true)
|
||||||
|
.await
|
||||||
|
.context("failed to finalise write of packed lock")?;
|
||||||
|
|
||||||
|
*(lock.write().unwrap()) = new_lock_desc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn scan_locks(
|
||||||
|
provider: &impl WormFileProvider,
|
||||||
|
r_locks: &ReaderKey,
|
||||||
|
now: DateTime<Utc>,
|
||||||
|
) -> eyre::Result<BTreeMap<String, LockDesc>> {
|
||||||
|
let mut result = BTreeMap::new();
|
||||||
|
|
||||||
|
let files = provider
|
||||||
|
.list(WormPath::new("locks").unwrap())
|
||||||
|
.await
|
||||||
|
.context("failed to list locks")?;
|
||||||
|
for file in files {
|
||||||
|
let (_, lock_id) = file
|
||||||
|
.as_ref()
|
||||||
|
.as_str()
|
||||||
|
.rsplit_once('/')
|
||||||
|
.context("bad lock split")?;
|
||||||
|
|
||||||
|
let mut reader = provider.read(&file).await?;
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
reader.read_to_end(&mut buf).await?;
|
||||||
|
let packed_lock = PackedLock::from_byte_vec(buf);
|
||||||
|
let lock_desc = r_locks
|
||||||
|
.unlock_asymbox(packed_lock)
|
||||||
|
.context("failed to decrypt lock")?
|
||||||
|
.deserialise()?;
|
||||||
|
|
||||||
|
if lock_desc.expires_at > now {
|
||||||
|
result.insert(lock_id.to_owned(), lock_desc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_lock_blocker<'a>(
|
||||||
|
locks: &'a BTreeMap<String, LockDesc>,
|
||||||
|
our_lock_id: &'a str,
|
||||||
|
our_lock_kind: LockKind,
|
||||||
|
) -> Option<&'a str> {
|
||||||
|
for (lock_id, lock_desc) in locks {
|
||||||
|
if lock_id == our_lock_id {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if lock_desc.kind.conflicts_with_lock(our_lock_kind) {
|
||||||
|
return Some(lock_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
19
yama_pile/src/pointers.rs
Normal file
19
yama_pile/src/pointers.rs
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
use crate::tree::RootTreeNode;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use yama_midlevel_crypto::asym_box::AsymBox;
|
||||||
|
use yama_midlevel_crypto::byte_layer::CborSerde;
|
||||||
|
use yama_midlevel_crypto::zstd_box::Zstd;
|
||||||
|
|
||||||
|
/// Pointer
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct Pointer {
|
||||||
|
pub parent: Option<String>,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub root: RootTreeNode,
|
||||||
|
pub uids: BTreeMap<u16, String>,
|
||||||
|
pub gids: BTreeMap<u16, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pointer file as stored on disk
|
||||||
|
pub type PackedPointer = AsymBox<Zstd<CborSerde<Pointer>>>;
|
978
yama_pile/src/tree.rs
Normal file
978
yama_pile/src/tree.rs
Normal file
@ -0,0 +1,978 @@
|
|||||||
|
/*
|
||||||
|
This file is part of Yama.
|
||||||
|
|
||||||
|
Yama is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
Yama is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::btree_map::Entry;
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt::{Debug, Formatter};
|
||||||
|
use std::fs::Metadata;
|
||||||
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
|
||||||
|
use crate::definitions::RecursiveChunkRef;
|
||||||
|
use crate::tree::unpopulated::ScanEntry;
|
||||||
|
use eyre::{bail, Context, ContextCompat};
|
||||||
|
use patricia_tree::PatriciaMap;
|
||||||
|
|
||||||
|
pub mod unpopulated;
|
||||||
|
|
||||||
|
/// Given a file's metadata, returns the mtime in milliseconds.
|
||||||
|
pub fn mtime_msec(metadata: &Metadata) -> u64 {
|
||||||
|
(metadata.mtime() * 1000 + metadata.mtime_nsec() / 1_000_000) as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct RootTreeNode {
|
||||||
|
pub name: String,
|
||||||
|
pub node: TreeNode,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||||
|
pub enum TreeNode {
|
||||||
|
NormalFile {
|
||||||
|
/// modification time in ms
|
||||||
|
mtime: u64,
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
#[serde(flatten)]
|
||||||
|
permissions: FilesystemPermissions,
|
||||||
|
size: u64,
|
||||||
|
#[serde(flatten)]
|
||||||
|
content: RecursiveChunkRef,
|
||||||
|
},
|
||||||
|
Directory {
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
#[serde(flatten)]
|
||||||
|
permissions: FilesystemPermissions,
|
||||||
|
children: BTreeMap<String, TreeNode>,
|
||||||
|
},
|
||||||
|
SymbolicLink {
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
target: String,
|
||||||
|
},
|
||||||
|
// TODO is there any other kind of file we need to store?
|
||||||
|
Deleted,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TreeNode {
|
||||||
|
pub fn metadata_invalidates(&self, other: &TreeNode) -> eyre::Result<bool> {
|
||||||
|
Ok(match self {
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
if let TreeNode::NormalFile {
|
||||||
|
mtime: other_mtime,
|
||||||
|
ownership: other_ownership,
|
||||||
|
permissions: other_permissions,
|
||||||
|
size: other_size,
|
||||||
|
..
|
||||||
|
} = other
|
||||||
|
{
|
||||||
|
mtime != other_mtime
|
||||||
|
|| size != other_size
|
||||||
|
|| ownership != other_ownership
|
||||||
|
|| permissions != other_permissions
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
children,
|
||||||
|
} => {
|
||||||
|
if let TreeNode::Directory {
|
||||||
|
ownership: other_ownership,
|
||||||
|
permissions: other_permissions,
|
||||||
|
children: other_children,
|
||||||
|
} = other
|
||||||
|
{
|
||||||
|
if ownership != other_ownership || permissions != other_permissions {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
if children.len() != other_children.len() {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
for ((left_name, left_node), (right_name, right_node)) in
|
||||||
|
children.iter().zip(other_children.iter())
|
||||||
|
{
|
||||||
|
if left_name != right_name || left_node.metadata_invalidates(right_node)? {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TreeNode::SymbolicLink { ownership, target } => {
|
||||||
|
if let TreeNode::SymbolicLink {
|
||||||
|
ownership: other_ownership,
|
||||||
|
target: other_target,
|
||||||
|
} = other
|
||||||
|
{
|
||||||
|
ownership != other_ownership || target != other_target
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TreeNode::Deleted => {
|
||||||
|
// unreachable
|
||||||
|
bail!("Why is Deleted here?");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn count_normal_files(&self) -> u32 {
|
||||||
|
match self {
|
||||||
|
TreeNode::NormalFile { .. } => 1,
|
||||||
|
TreeNode::Directory { children, .. } => children
|
||||||
|
.values()
|
||||||
|
.map(|child| child.count_normal_files())
|
||||||
|
.sum(),
|
||||||
|
_ => 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn visit<F: FnMut(&TreeNode, &str) -> eyre::Result<()>>(
|
||||||
|
&self,
|
||||||
|
func: &mut F,
|
||||||
|
prefix: String,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
func(self, &prefix)?;
|
||||||
|
if let TreeNode::Directory { children, .. } = &self {
|
||||||
|
for (name, child) in children.iter() {
|
||||||
|
if prefix.is_empty() {
|
||||||
|
// don't want a slash prefix
|
||||||
|
child.visit(func, name.clone())?;
|
||||||
|
} else {
|
||||||
|
child.visit(func, format!("{}/{}", prefix, name))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn visit_mut<F: FnMut(&mut TreeNode, &str) -> eyre::Result<()>>(
|
||||||
|
&mut self,
|
||||||
|
func: &mut F,
|
||||||
|
prefix: String,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
func(self, &prefix)?;
|
||||||
|
if let TreeNode::Directory { children, .. } = self {
|
||||||
|
for (name, child) in children.iter_mut() {
|
||||||
|
if prefix.is_empty() {
|
||||||
|
// don't want a slash prefix
|
||||||
|
child.visit_mut(func, name.clone())?;
|
||||||
|
} else {
|
||||||
|
child.visit_mut(func, format!("{}/{}", prefix, name))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn exists(&self, pieces: &[&str]) -> bool {
|
||||||
|
match pieces.first() {
|
||||||
|
None => true,
|
||||||
|
Some(subpath) => {
|
||||||
|
if let TreeNode::Directory { children, .. } = self {
|
||||||
|
match children.get(*subpath) {
|
||||||
|
None => false,
|
||||||
|
Some(child) => child.exists(&pieces[1..]),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recurses into a child by name, or returns Err with a reason.
|
||||||
|
pub fn child(&mut self, name: &str) -> Result<&mut TreeNode, &'static str> {
|
||||||
|
match self {
|
||||||
|
TreeNode::NormalFile { .. } => Err("not a directory: normal file"),
|
||||||
|
TreeNode::Directory { children, .. } => match children.get_mut(name) {
|
||||||
|
None => Err("child not in directory"),
|
||||||
|
Some(node) => Ok(node),
|
||||||
|
},
|
||||||
|
TreeNode::SymbolicLink { .. } => Err("not a directory: symlink"),
|
||||||
|
TreeNode::Deleted => Err("not a directory: deleted"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flattens this treenode to a PatriciaMap of ScanEntries.
|
||||||
|
pub fn flatten(&self) -> eyre::Result<PatriciaMap<ScanEntry>> {
|
||||||
|
let mut out = PatriciaMap::new();
|
||||||
|
Self::flatten_impl(self, "", &mut out)?;
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flatten_impl(
|
||||||
|
tree_node: &TreeNode,
|
||||||
|
prefix: &str,
|
||||||
|
out: &mut PatriciaMap<ScanEntry>,
|
||||||
|
) -> eyre::Result<()> {
|
||||||
|
match tree_node {
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size,
|
||||||
|
content: _,
|
||||||
|
} => {
|
||||||
|
out.insert(
|
||||||
|
prefix,
|
||||||
|
ScanEntry::NormalFile {
|
||||||
|
mtime: *mtime,
|
||||||
|
ownership: ownership.clone(),
|
||||||
|
permissions: permissions.clone(),
|
||||||
|
size: *size,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
children,
|
||||||
|
} => {
|
||||||
|
out.insert(
|
||||||
|
prefix,
|
||||||
|
ScanEntry::Directory {
|
||||||
|
ownership: ownership.clone(),
|
||||||
|
permissions: permissions.clone(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
for (child_name, node) in children {
|
||||||
|
let new_prefix = if prefix.is_empty() {
|
||||||
|
child_name.clone()
|
||||||
|
} else {
|
||||||
|
format!("{}/{}", prefix, child_name)
|
||||||
|
};
|
||||||
|
Self::flatten_impl(node, &new_prefix, out)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TreeNode::SymbolicLink { ownership, target } => {
|
||||||
|
out.insert(
|
||||||
|
prefix,
|
||||||
|
ScanEntry::SymbolicLink {
|
||||||
|
ownership: ownership.clone(),
|
||||||
|
target: target.clone(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
TreeNode::Deleted => {
|
||||||
|
bail!("found Deleted at {prefix:?} when flattening");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct FilesystemOwnership {
|
||||||
|
pub uid: u16,
|
||||||
|
pub gid: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for FilesystemOwnership {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}:{}", self.uid, self.gid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct FilesystemPermissions {
|
||||||
|
pub mode: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for FilesystemPermissions {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", unix_mode::to_string(self.mode))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Differentiates a node in place.
|
||||||
|
/// This makes `old` the parent of `new` (though it is up to the caller to properly update the
|
||||||
|
/// `PointerData` to reflect this!).
|
||||||
|
/// Loosely speaking, `new` is modified to contain the differences that, when applied to `old`, will
|
||||||
|
/// result in the original value of `new`.
|
||||||
|
/// See `integrate_node_in_place` for the inverse of this operation.
|
||||||
|
pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::Result<()> {
|
||||||
|
if let TreeNode::Directory { children, .. } = new {
|
||||||
|
if let TreeNode::Directory {
|
||||||
|
children: old_children,
|
||||||
|
..
|
||||||
|
} = old
|
||||||
|
{
|
||||||
|
for (name, old_node) in old_children.iter() {
|
||||||
|
match children.entry(name.clone()) {
|
||||||
|
Entry::Vacant(vacant_entry) => {
|
||||||
|
vacant_entry.insert(TreeNode::Deleted);
|
||||||
|
}
|
||||||
|
Entry::Occupied(occupied_entry) => {
|
||||||
|
if !occupied_entry.get().metadata_invalidates(old_node)? {
|
||||||
|
// The entry is the same, so we don't need to store it!
|
||||||
|
occupied_entry.remove_entry();
|
||||||
|
} else {
|
||||||
|
differentiate_node_in_place(occupied_entry.into_mut(), old_node)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Integrates a node in place.
|
||||||
|
/// This makes `new` no longer have a parent (remember, the caller is responsible for updating
|
||||||
|
/// `PointerData` appropriately if needed to reflect this).
|
||||||
|
///
|
||||||
|
/// Loosely speaking, `new` is treated as a set of differences that are applied to `old`, though the
|
||||||
|
/// result is in-place.
|
||||||
|
///
|
||||||
|
/// Preconditions:
|
||||||
|
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
|
||||||
|
/// - `old` is the parent of `new`
|
||||||
|
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) {
|
||||||
|
if let TreeNode::Directory { children, .. } = new {
|
||||||
|
if let TreeNode::Directory {
|
||||||
|
children: old_children,
|
||||||
|
..
|
||||||
|
} = old
|
||||||
|
{
|
||||||
|
for (name, node) in old_children.iter() {
|
||||||
|
match children.entry(name.clone()) {
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
entry.insert(node.clone());
|
||||||
|
}
|
||||||
|
Entry::Occupied(entry) => {
|
||||||
|
if entry.get() == &TreeNode::Deleted {
|
||||||
|
// We don't insert the old node but we do remove the 'deleted' marker
|
||||||
|
// node!
|
||||||
|
entry.remove();
|
||||||
|
} else {
|
||||||
|
integrate_node_in_place(entry.into_mut(), node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// the node stays the same...
|
||||||
|
// intentional NOP!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn assemble_tree_from_scan_entries(
|
||||||
|
scan: PatriciaMap<ScanEntry>,
|
||||||
|
mut chunkings: PatriciaMap<(RecursiveChunkRef, u64)>,
|
||||||
|
) -> eyre::Result<TreeNode> {
|
||||||
|
let mut dirs: BTreeMap<String, BTreeMap<String, TreeNode>> = BTreeMap::new();
|
||||||
|
// special-case the root ("")
|
||||||
|
dirs.insert(String::new(), BTreeMap::new());
|
||||||
|
|
||||||
|
for (key, entry) in scan.into_iter() {
|
||||||
|
let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?;
|
||||||
|
let (parent_dir_name, child_name) =
|
||||||
|
key_string.rsplit_once('/').unwrap_or(("", &key_string));
|
||||||
|
match entry {
|
||||||
|
ScanEntry::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size: _unverified_size_ignore,
|
||||||
|
} => {
|
||||||
|
let (content, size) = chunkings
|
||||||
|
.remove(&key_string)
|
||||||
|
.context("bad chunkings PMap: missing entry")?;
|
||||||
|
|
||||||
|
// note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime,
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
size,
|
||||||
|
content,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ScanEntry::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
} => {
|
||||||
|
dirs.insert(key_string.clone(), BTreeMap::new());
|
||||||
|
// note: for the root, this inserts the root directory entry as a child called "" within the root.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership,
|
||||||
|
permissions,
|
||||||
|
children: BTreeMap::new(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ScanEntry::SymbolicLink { ownership, target } => {
|
||||||
|
// note: for the root, this inserts the root symlink entry as a child called "" within a fake root 'directory'.
|
||||||
|
// That's fine. We'll patch this up later.
|
||||||
|
dirs.get_mut(parent_dir_name)
|
||||||
|
.context("bad PMap: parent not seen first")?
|
||||||
|
.insert(
|
||||||
|
child_name.to_owned(),
|
||||||
|
TreeNode::SymbolicLink { ownership, target },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now roll up the directories. In Rustc v1.66 it'd be nice to use pop_last()...
|
||||||
|
while let Some(last_key) = dirs.keys().last().cloned() {
|
||||||
|
let mut last_children = dirs.remove(&last_key).unwrap();
|
||||||
|
if last_key.is_empty() {
|
||||||
|
assert!(
|
||||||
|
dirs.is_empty(),
|
||||||
|
"when pulling out root pseudo-dir, dirs must be empty for roll-up."
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut real_root = last_children.remove("").unwrap();
|
||||||
|
if let TreeNode::Directory { children, .. } = &mut real_root {
|
||||||
|
*children = last_children;
|
||||||
|
} else if !last_children.is_empty() {
|
||||||
|
bail!("root is not a directory but it contains children...");
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(real_root);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to roll up the directory last/key -> {child -> ...}
|
||||||
|
// so last -> {key -> {child -> ...}}
|
||||||
|
let (parent_dir, child_name) = last_key.rsplit_once('/').unwrap_or(("", &last_key));
|
||||||
|
let parent = dirs
|
||||||
|
.get_mut(parent_dir)
|
||||||
|
.context("bad PMap? no parent in rollup")?;
|
||||||
|
let child_in_parent = parent
|
||||||
|
.get_mut(child_name)
|
||||||
|
.context("dir child not populated")?;
|
||||||
|
if let TreeNode::Directory { children, .. } = child_in_parent {
|
||||||
|
*children = last_children;
|
||||||
|
} else {
|
||||||
|
bail!("child in parent not a directory...");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bail!("no root found; bad PMap or bad roll-up???");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::definitions::RecursiveChunkRef;
|
||||||
|
use crate::tree::{
|
||||||
|
differentiate_node_in_place, integrate_node_in_place, FilesystemOwnership,
|
||||||
|
FilesystemPermissions, TreeNode,
|
||||||
|
};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use yama_midlevel_crypto::chunk_id::ChunkId;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_differentiate_in_place_primitive() {
|
||||||
|
let mut new = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: BTreeMap::new(),
|
||||||
|
};
|
||||||
|
let old = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: BTreeMap::new(),
|
||||||
|
};
|
||||||
|
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
new,
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: BTreeMap::new()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests the creation of a file in the tree.
|
||||||
|
#[test]
|
||||||
|
pub fn test_differentiate_in_place_create_only() {
|
||||||
|
let alice = TreeNode::NormalFile {
|
||||||
|
mtime: 98347523,
|
||||||
|
ownership: FilesystemOwnership { uid: 43, gid: 48 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1338 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([36; 32]),
|
||||||
|
depth: 22,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let bob_new = TreeNode::SymbolicLink {
|
||||||
|
ownership: FilesystemOwnership { uid: 43, gid: 48 },
|
||||||
|
target: "alice".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut children_new = BTreeMap::new();
|
||||||
|
children_new.insert("bob".to_owned(), bob_new.clone());
|
||||||
|
children_new.insert("alice".to_owned(), alice.clone());
|
||||||
|
|
||||||
|
let mut children_old = BTreeMap::new();
|
||||||
|
children_old.insert("alice".to_owned(), alice.clone());
|
||||||
|
|
||||||
|
let mut new = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: children_new.clone(),
|
||||||
|
};
|
||||||
|
let old = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 41, gid: 46 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1336 },
|
||||||
|
children: children_old.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut children_result = BTreeMap::new();
|
||||||
|
children_result.insert("bob".to_owned(), bob_new);
|
||||||
|
|
||||||
|
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
|
||||||
|
assert_eq!(
|
||||||
|
new,
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: children_result
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests only a change in metadata in the tree.
|
||||||
|
#[test]
|
||||||
|
pub fn test_differentiate_in_place_meta_only() {
|
||||||
|
let alice = TreeNode::NormalFile {
|
||||||
|
mtime: 98347523,
|
||||||
|
ownership: FilesystemOwnership { uid: 43, gid: 48 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1338 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([36; 32]),
|
||||||
|
depth: 22,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut children = BTreeMap::new();
|
||||||
|
children.insert("alice".to_owned(), alice);
|
||||||
|
|
||||||
|
let mut new = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: children.clone(),
|
||||||
|
};
|
||||||
|
let old = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 41, gid: 46 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1336 },
|
||||||
|
children: children.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
|
||||||
|
assert_eq!(
|
||||||
|
new,
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: BTreeMap::new()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests that nodes that vanish lead to creation of a Deleted node.
|
||||||
|
#[test]
|
||||||
|
pub fn test_differences_in_place_deleted() {
|
||||||
|
let alice = TreeNode::NormalFile {
|
||||||
|
mtime: 98347523,
|
||||||
|
ownership: FilesystemOwnership { uid: 43, gid: 48 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1338 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([36; 32]),
|
||||||
|
depth: 22,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let bob_old = TreeNode::SymbolicLink {
|
||||||
|
ownership: FilesystemOwnership { uid: 43, gid: 48 },
|
||||||
|
target: "alice".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut children_old = BTreeMap::new();
|
||||||
|
children_old.insert("bob".to_owned(), bob_old.clone());
|
||||||
|
children_old.insert("alice".to_owned(), alice.clone());
|
||||||
|
|
||||||
|
let mut children_new = BTreeMap::new();
|
||||||
|
children_new.insert("alice".to_owned(), alice.clone());
|
||||||
|
|
||||||
|
let old = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 42, gid: 47 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1337 },
|
||||||
|
children: children_old.clone(),
|
||||||
|
};
|
||||||
|
let mut new = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 41, gid: 46 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1336 },
|
||||||
|
children: children_new.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut children_result = BTreeMap::new();
|
||||||
|
children_result.insert("bob".to_owned(), TreeNode::Deleted);
|
||||||
|
|
||||||
|
assert!(differentiate_node_in_place(&mut new, &old).is_ok());
|
||||||
|
assert_eq!(
|
||||||
|
new,
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 41, gid: 46 },
|
||||||
|
permissions: FilesystemPermissions { mode: 1336 },
|
||||||
|
children: children_result
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_differentiate_node_in_place_mega_example() {
|
||||||
|
// TODO extend this example
|
||||||
|
let parent = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0550 },
|
||||||
|
children: vec![
|
||||||
|
(
|
||||||
|
"file1".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1996,
|
||||||
|
ownership: FilesystemOwnership { uid: 54, gid: 59 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0311 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([37; 32]),
|
||||||
|
depth: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file2".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let child_full = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0560 },
|
||||||
|
children: vec![
|
||||||
|
(
|
||||||
|
"file1".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1996,
|
||||||
|
ownership: FilesystemOwnership { uid: 54, gid: 59 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0311 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([37; 32]),
|
||||||
|
depth: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file42".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut child_diff = child_full.clone();
|
||||||
|
differentiate_node_in_place(&mut child_diff, &parent).unwrap();
|
||||||
|
|
||||||
|
let expected_child_diff = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0560 },
|
||||||
|
children: vec![
|
||||||
|
("file2".to_string(), TreeNode::Deleted),
|
||||||
|
(
|
||||||
|
"file42".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(child_diff, expected_child_diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_integrate_node_in_place_mega_example() {
|
||||||
|
// TODO extend this example
|
||||||
|
let parent = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0550 },
|
||||||
|
children: vec![
|
||||||
|
(
|
||||||
|
"file1".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1996,
|
||||||
|
ownership: FilesystemOwnership { uid: 54, gid: 59 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0311 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([37; 32]),
|
||||||
|
depth: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file2".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let child_diff = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0560 },
|
||||||
|
children: vec![
|
||||||
|
("file2".to_string(), TreeNode::Deleted),
|
||||||
|
(
|
||||||
|
"file42".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut child_full = child_diff.clone();
|
||||||
|
integrate_node_in_place(&mut child_full, &parent).unwrap();
|
||||||
|
|
||||||
|
let expected_child_full = TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 47, gid: 49 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0660 },
|
||||||
|
children: vec![(
|
||||||
|
"dir1".to_string(),
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership { uid: 46, gid: 50 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0560 },
|
||||||
|
children: vec![
|
||||||
|
(
|
||||||
|
"file1".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1996,
|
||||||
|
ownership: FilesystemOwnership { uid: 54, gid: 59 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0311 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([37; 32]),
|
||||||
|
depth: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file42".to_string(),
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 1970,
|
||||||
|
ownership: FilesystemOwnership { uid: 55, gid: 60 },
|
||||||
|
permissions: FilesystemPermissions { mode: 0321 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([42; 32]),
|
||||||
|
depth: 29,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
},
|
||||||
|
)]
|
||||||
|
.into_iter()
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(child_full, expected_child_full);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn example_file() -> TreeNode {
|
||||||
|
TreeNode::NormalFile {
|
||||||
|
mtime: 424242,
|
||||||
|
ownership: FilesystemOwnership {
|
||||||
|
uid: 1042,
|
||||||
|
gid: 1043,
|
||||||
|
},
|
||||||
|
permissions: FilesystemPermissions { mode: 0o760 },
|
||||||
|
size: 42,
|
||||||
|
content: RecursiveChunkRef {
|
||||||
|
chunk_id: ChunkId::from([0u8; 32]),
|
||||||
|
depth: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn example_dir(
|
||||||
|
file1: Option<(&str, TreeNode)>,
|
||||||
|
file2: Option<(&str, TreeNode)>,
|
||||||
|
) -> TreeNode {
|
||||||
|
let mut map = BTreeMap::new();
|
||||||
|
if let Some((name, file)) = file1 {
|
||||||
|
map.insert(name.to_owned(), file);
|
||||||
|
}
|
||||||
|
if let Some((name, file)) = file2 {
|
||||||
|
map.insert(name.to_owned(), file);
|
||||||
|
}
|
||||||
|
TreeNode::Directory {
|
||||||
|
ownership: FilesystemOwnership {
|
||||||
|
uid: 1042,
|
||||||
|
gid: 1043,
|
||||||
|
},
|
||||||
|
permissions: FilesystemPermissions { mode: 0o770 },
|
||||||
|
children: map,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_exists() {
|
||||||
|
let file = example_file();
|
||||||
|
assert!(file.exists(&[]));
|
||||||
|
assert!(!file.exists(&["anything"]));
|
||||||
|
|
||||||
|
let subdir = example_dir(Some(("fetchmailrc", example_file())), None);
|
||||||
|
let dir = example_dir(Some(("boot.img", example_file())), Some(("etc", subdir)));
|
||||||
|
assert!(dir.exists(&[]));
|
||||||
|
assert!(dir.exists(&["boot.img"]));
|
||||||
|
assert!(dir.exists(&["etc", "fetchmailrc"]));
|
||||||
|
assert!(!dir.exists(&["bin"]));
|
||||||
|
assert!(!dir.exists(&["etc", "resolv.conf"]));
|
||||||
|
assert!(!dir.exists(&["boot.img", "hehe"]));
|
||||||
|
}
|
||||||
|
}
|
27
yama_pile/src/tree/unpopulated.rs
Normal file
27
yama_pile/src/tree/unpopulated.rs
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
use crate::tree::{FilesystemOwnership, FilesystemPermissions};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// A tree node, but unpopulated and not a tree.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||||
|
pub enum ScanEntry {
|
||||||
|
NormalFile {
|
||||||
|
/// modification time in ms
|
||||||
|
mtime: u64,
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
#[serde(flatten)]
|
||||||
|
permissions: FilesystemPermissions,
|
||||||
|
size: u64,
|
||||||
|
},
|
||||||
|
Directory {
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
#[serde(flatten)]
|
||||||
|
permissions: FilesystemPermissions,
|
||||||
|
},
|
||||||
|
SymbolicLink {
|
||||||
|
#[serde(flatten)]
|
||||||
|
ownership: FilesystemOwnership,
|
||||||
|
target: String,
|
||||||
|
},
|
||||||
|
}
|
226
yama_pile/src/utils.rs
Normal file
226
yama_pile/src/utils.rs
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
use eyre::{bail, Context as EyreContext};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sha2::Digest;
|
||||||
|
use std::fmt::{Debug, Display, Formatter};
|
||||||
|
use std::io;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::task::{Context, Poll};
|
||||||
|
use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf};
|
||||||
|
use yama_midlevel_crypto::sym_stream::SymStreamKey;
|
||||||
|
use yama_wormfile::WormFileWriter;
|
||||||
|
|
||||||
|
pub struct HashedWormWriter<W: WormFileWriter> {
|
||||||
|
inner: W,
|
||||||
|
hasher: sha2::Sha256,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)]
|
||||||
|
#[serde(transparent)]
|
||||||
|
pub struct Sha256(pub [u8; 32]);
|
||||||
|
|
||||||
|
impl TryFrom<&str> for Sha256 {
|
||||||
|
type Error = eyre::Error;
|
||||||
|
|
||||||
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||||
|
if value.len() != 64 {
|
||||||
|
bail!("sha256 hexlength not 64 chars");
|
||||||
|
}
|
||||||
|
let decoded = hex::decode(value).context("failed to decode hex")?;
|
||||||
|
|
||||||
|
if decoded.len() != 32 {
|
||||||
|
bail!("wrong number of decoded bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut bytes = [0u8; 32];
|
||||||
|
bytes.copy_from_slice(&decoded);
|
||||||
|
Ok(Sha256(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for Sha256 {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Sha256({})", hex::encode(&self.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Sha256 {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", hex::encode(&self.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: WormFileWriter> HashedWormWriter<W> {
|
||||||
|
pub fn new(writer: W) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: writer,
|
||||||
|
hasher: sha2::Sha256::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: WormFileWriter + Unpin> AsyncWrite for HashedWormWriter<W> {
|
||||||
|
#[inline]
|
||||||
|
fn poll_write(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> Poll<Result<usize, io::Error>> {
|
||||||
|
let result = Pin::new(&mut self.inner).poll_write(cx, buf);
|
||||||
|
if let Poll::Ready(Ok(num_bytes_written)) = result {
|
||||||
|
// Once a write is complete, add the written bytes to the hasher.
|
||||||
|
self.hasher.update(&buf[0..num_bytes_written]);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||||
|
Pin::new(&mut self.inner).poll_flush(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn poll_shutdown(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
) -> Poll<Result<(), io::Error>> {
|
||||||
|
Pin::new(&mut self.inner).poll_shutdown(cx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: WormFileWriter> HashedWormWriter<W> {
|
||||||
|
/// Finish hashing. Returns the hash and gives back the writer.
|
||||||
|
pub fn finalise(self) -> (W, Sha256) {
|
||||||
|
let mut output = [0; 32];
|
||||||
|
output.copy_from_slice(&self.hasher.finalize()[..]);
|
||||||
|
(self.inner, Sha256(output))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO We should consider buffering writes so we don't waste encryptions. But that would make it
|
||||||
|
// a little more complex, so will save that for later...
|
||||||
|
pub struct SymStreamWriter<W: AsyncWrite> {
|
||||||
|
inner: W,
|
||||||
|
offset: u64,
|
||||||
|
sym_stream_key: SymStreamKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: AsyncWrite> SymStreamWriter<W> {
|
||||||
|
pub fn new(inner: W, sym_stream_key: SymStreamKey) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
offset: 0,
|
||||||
|
sym_stream_key,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(self) -> W {
|
||||||
|
self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn offset(&self) -> u64 {
|
||||||
|
self.offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: AsyncWrite + Unpin> AsyncWrite for SymStreamWriter<W> {
|
||||||
|
fn poll_write(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> Poll<Result<usize, io::Error>> {
|
||||||
|
let mut enc_buf = buf.to_vec();
|
||||||
|
// Safety: Deny use of unencrypted `buf` from here on.
|
||||||
|
let buf = ();
|
||||||
|
let offset = self.offset;
|
||||||
|
self.sym_stream_key.apply_xor(offset, &mut enc_buf);
|
||||||
|
|
||||||
|
let result = Pin::new(&mut self.inner).poll_write(cx, &enc_buf);
|
||||||
|
|
||||||
|
if let Poll::Ready(Ok(num_bytes_written)) = result {
|
||||||
|
// Once a write is complete, add the offset to our internally tracked offset.
|
||||||
|
self.offset += num_bytes_written as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||||
|
Pin::new(&mut self.inner).poll_flush(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn poll_shutdown(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
) -> Poll<Result<(), io::Error>> {
|
||||||
|
Pin::new(&mut self.inner).poll_shutdown(cx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SymStreamReader<R: AsyncRead> {
|
||||||
|
inner: R,
|
||||||
|
offset: u64,
|
||||||
|
sym_stream_key: SymStreamKey,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: AsyncRead> SymStreamReader<R> {
|
||||||
|
pub fn new(inner: R, sym_stream_key: SymStreamKey) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
offset: 0,
|
||||||
|
sym_stream_key,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(self) -> R {
|
||||||
|
self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn offset(&self) -> u64 {
|
||||||
|
self.offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: AsyncRead + Unpin> AsyncRead for SymStreamReader<R> {
|
||||||
|
fn poll_read(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &mut ReadBuf<'_>,
|
||||||
|
) -> Poll<std::io::Result<()>> {
|
||||||
|
let old_filled = buf.filled().len();
|
||||||
|
|
||||||
|
let result = Pin::new(&mut self.inner).poll_read(cx, buf);
|
||||||
|
|
||||||
|
if result.is_ready() {
|
||||||
|
let filled = buf.filled_mut();
|
||||||
|
let new_filled = filled.len();
|
||||||
|
let to_decrypt = &mut filled[old_filled..new_filled];
|
||||||
|
|
||||||
|
let offset = self.offset;
|
||||||
|
// eprintln!("read {} @ {offset}", to_decrypt.len());
|
||||||
|
self.sym_stream_key.apply_xor(offset, to_decrypt);
|
||||||
|
self.offset += to_decrypt.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: AsyncRead + AsyncSeek + Unpin> AsyncSeek for SymStreamReader<R> {
|
||||||
|
fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> std::io::Result<()> {
|
||||||
|
// eprintln!("SS {position:?}");
|
||||||
|
Pin::new(&mut self.inner).start_seek(position)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<u64>> {
|
||||||
|
let result = Pin::new(&mut self.inner).poll_complete(cx);
|
||||||
|
if let Poll::Ready(Ok(new_offset)) = result {
|
||||||
|
// eprintln!("sought {new_offset}");
|
||||||
|
|
||||||
|
self.offset = new_offset;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
@ -8,3 +8,4 @@ edition = "2021"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
async-trait = "0.1.68"
|
async-trait = "0.1.68"
|
||||||
tokio = { version = "1.27.0", features = ["io-util"] }
|
tokio = { version = "1.27.0", features = ["io-util"] }
|
||||||
|
eyre = "0.6.8"
|
@ -3,6 +3,7 @@ use crate::{WormFileProvider, WormFileReader, WormFileWriter};
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fmt::{Debug, Display, Formatter};
|
use std::fmt::{Debug, Display, Formatter};
|
||||||
|
use std::ops::DerefMut;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
|
|
||||||
pub struct BoxErr(Box<dyn Error + Send + Sync>);
|
pub struct BoxErr(Box<dyn Error + Send + Sync>);
|
||||||
@ -28,40 +29,43 @@ impl BoxErr {
|
|||||||
}
|
}
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
trait BoxableWormFileProvider: Debug + Send + Sync {
|
trait BoxableWormFileProvider: Debug + Send + Sync {
|
||||||
async fn is_dir_b(&self, path: &WormPath) -> Result<bool, BoxErr>;
|
async fn is_dir_b(&self, path: &WormPath) -> eyre::Result<bool>;
|
||||||
async fn is_regular_file_b(&self, path: &WormPath) -> Result<bool, BoxErr>;
|
async fn is_regular_file_b(&self, path: &WormPath) -> eyre::Result<bool>;
|
||||||
async fn list_b(&self, path: &WormPath) -> Result<Vec<WormPathBuf>, BoxErr>;
|
async fn list_b(&self, path: &WormPath) -> eyre::Result<Vec<WormPathBuf>>;
|
||||||
async fn read_b(&self, path: &WormPath) -> Result<Pin<Box<dyn WormFileReader>>, BoxErr>;
|
async fn read_b(&self, path: &WormPath) -> eyre::Result<Pin<Box<dyn WormFileReader>>>;
|
||||||
async fn write_b(&self) -> Result<Pin<Box<dyn WormFileWriter>>, BoxErr>;
|
async fn write_b(&self) -> eyre::Result<Pin<Box<dyn WormFileWriter>>>;
|
||||||
|
async fn delete_b(&self, path: &WormPath) -> eyre::Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<T: WormFileProvider> BoxableWormFileProvider for T {
|
impl<T: WormFileProvider> BoxableWormFileProvider for T {
|
||||||
async fn is_dir_b(&self, path: &WormPath) -> Result<bool, BoxErr> {
|
async fn is_dir_b(&self, path: &WormPath) -> eyre::Result<bool> {
|
||||||
self.is_dir(path).await.map_err(BoxErr::new)
|
self.is_dir(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn is_regular_file_b(&self, path: &WormPath) -> Result<bool, BoxErr> {
|
async fn is_regular_file_b(&self, path: &WormPath) -> eyre::Result<bool> {
|
||||||
self.is_regular_file(path).await.map_err(BoxErr::new)
|
self.is_regular_file(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_b(&self, path: &WormPath) -> Result<Vec<WormPathBuf>, BoxErr> {
|
async fn list_b(&self, path: &WormPath) -> eyre::Result<Vec<WormPathBuf>> {
|
||||||
self.list(path).await.map_err(BoxErr::new)
|
self.list(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read_b(&self, path: &WormPath) -> Result<Pin<Box<dyn WormFileReader>>, BoxErr> {
|
async fn read_b(&self, path: &WormPath) -> eyre::Result<Pin<Box<dyn WormFileReader>>> {
|
||||||
self.read(path)
|
self.read(path)
|
||||||
.await
|
.await
|
||||||
.map_err(BoxErr::new)
|
|
||||||
.map(|wfr| Box::pin(wfr) as Pin<Box<dyn WormFileReader>>)
|
.map(|wfr| Box::pin(wfr) as Pin<Box<dyn WormFileReader>>)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn write_b(&self) -> Result<Pin<Box<dyn WormFileWriter>>, BoxErr> {
|
async fn write_b(&self) -> eyre::Result<Pin<Box<dyn WormFileWriter>>> {
|
||||||
self.write()
|
self.write()
|
||||||
.await
|
.await
|
||||||
.map_err(BoxErr::new)
|
|
||||||
.map(|wfw| Box::pin(wfw) as Pin<Box<dyn WormFileWriter>>)
|
.map(|wfw| Box::pin(wfw) as Pin<Box<dyn WormFileWriter>>)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn delete_b(&self, path: &WormPath) -> eyre::Result<()> {
|
||||||
|
self.delete(path).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -69,44 +73,47 @@ pub struct BoxedWormFileProvider {
|
|||||||
inner: Box<dyn BoxableWormFileProvider>,
|
inner: Box<dyn BoxableWormFileProvider>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl BoxedWormFileProvider {
|
||||||
|
pub fn new(inner: impl WormFileProvider + 'static) -> BoxedWormFileProvider {
|
||||||
|
Self {
|
||||||
|
inner: Box::new(inner),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl WormFileProvider for BoxedWormFileProvider {
|
impl WormFileProvider for BoxedWormFileProvider {
|
||||||
type WormFileReader = Pin<Box<dyn WormFileReader>>;
|
type WormFileReader = Pin<Box<dyn WormFileReader>>;
|
||||||
type WormFileWriter = Pin<Box<dyn WormFileWriter>>;
|
type WormFileWriter = Pin<Box<dyn WormFileWriter>>;
|
||||||
type Error = BoxErr;
|
|
||||||
|
|
||||||
async fn is_dir(&self, path: impl AsRef<WormPath> + Send) -> Result<bool, Self::Error> {
|
async fn is_dir(&self, path: impl AsRef<WormPath> + Send) -> eyre::Result<bool> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
self.inner.is_dir_b(path).await
|
self.inner.is_dir_b(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn is_regular_file(
|
async fn is_regular_file(&self, path: impl AsRef<WormPath> + Send) -> eyre::Result<bool> {
|
||||||
&self,
|
|
||||||
path: impl AsRef<WormPath> + Send,
|
|
||||||
) -> Result<bool, Self::Error> {
|
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
self.inner.is_regular_file_b(path).await
|
self.inner.is_regular_file_b(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list(
|
async fn list(&self, path: impl AsRef<WormPath> + Send) -> eyre::Result<Vec<WormPathBuf>> {
|
||||||
&self,
|
|
||||||
path: impl AsRef<WormPath> + Send,
|
|
||||||
) -> Result<Vec<WormPathBuf>, Self::Error> {
|
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
self.inner.list_b(path).await
|
self.inner.list_b(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read(
|
async fn read(&self, path: impl AsRef<WormPath> + Send) -> eyre::Result<Self::WormFileReader> {
|
||||||
&self,
|
|
||||||
path: impl AsRef<WormPath> + Send,
|
|
||||||
) -> Result<Self::WormFileReader, Self::Error> {
|
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
self.inner.read_b(path).await
|
self.inner.read_b(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn write(&self) -> Result<Self::WormFileWriter, Self::Error> {
|
async fn write(&self) -> eyre::Result<Self::WormFileWriter> {
|
||||||
self.inner.write_b().await
|
self.inner.write_b().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn delete(&self, path: impl AsRef<WormPath> + Send) -> eyre::Result<()> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
self.inner.delete_b(path).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@ -114,7 +121,16 @@ impl WormFileReader for Pin<Box<dyn WormFileReader>> {}
|
|||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl WormFileWriter for Pin<Box<dyn WormFileWriter>> {
|
impl WormFileWriter for Pin<Box<dyn WormFileWriter>> {
|
||||||
async fn finalise(self, target_path: &WormPath, replace: bool) -> std::io::Result<()> {
|
async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> std::io::Result<()> {
|
||||||
WormFileWriter::finalise(self, target_path, replace).await
|
self.deref_mut().finalise(target_path, replace).await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pub struct BoxedWormFileWriter(Pin<Box<dyn WormFileWriter>>)
|
||||||
|
//
|
||||||
|
// #[async_trait]
|
||||||
|
// impl WormFileWriter for BoxedWormFileWriter {
|
||||||
|
// async fn finalise(self, target_path: &WormPath, replace: bool) -> std::io::Result<()> {
|
||||||
|
// self.0.finalise(ztarget_path, replace).await
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user