From 7cd71695bc12b81cd1666dcbc8177be4fe9f5c64 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sun, 2 Apr 2023 20:46:47 +0100 Subject: [PATCH 01/51] Start of Yama & Datman v0.7.0-alpha.1 --- datman/Cargo.toml | 4 ++-- yama/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datman/Cargo.toml b/datman/Cargo.toml index ef308a7..58c910a 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "datman" -version = "0.6.0-alpha.5" +version = "0.7.0-alpha.1" authors = ["Olivier 'reivilibre' "] edition = "2021" repository = "https://bics.ga/reivilibre/yama" @@ -30,7 +30,7 @@ humansize = "1.1.1" chrono = "0.4.19" itertools = "0.10.1" hostname = "0.3.1" -yama = { path = "../yama", version = "0.6.0-alpha.5" } +yama = { path = "../yama", version = "0.7.0-alpha.1" } metrics = "0.17.1" bare-metrics-recorder = { version = "0.1.0" } comfy-table = "6.0.0-rc.1" diff --git a/yama/Cargo.toml b/yama/Cargo.toml index 64ba592..1e1e50e 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yama" -version = "0.6.0-alpha.5" +version = "0.7.0-alpha.1" authors = ["Olivier 'reivilibre' "] edition = "2018" description = "Deduplicated, compressed and encrypted content pile manager" From 1fe4d9d2f3f88441981b82470ed31eb48dfdeaad Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sun, 2 Apr 2023 20:47:34 +0100 Subject: [PATCH 02/51] Add yama_wormfile crates These will be a useful component of the new on-disk storage format --- Cargo.lock | 178 ++++++++++++++++++++++++---- Cargo.toml | 4 + shell.nix | 2 +- yama_wormfile/Cargo.toml | 10 ++ yama_wormfile/src/boxed.rs | 120 +++++++++++++++++++ yama_wormfile/src/lib.rs | 64 ++++++++++ yama_wormfile/src/paths.rs | 96 +++++++++++++++ yama_wormfile_fs/Cargo.toml | 13 ++ yama_wormfile_fs/src/lib.rs | 216 ++++++++++++++++++++++++++++++++++ yama_wormfile_s3/Cargo.toml | 8 ++ yama_wormfile_s3/src/lib.rs | 0 yama_wormfile_sftp/Cargo.toml | 8 ++ yama_wormfile_sftp/src/lib.rs | 0 13 files changed, 698 insertions(+), 21 deletions(-) create mode 100644 yama_wormfile/Cargo.toml create mode 100644 yama_wormfile/src/boxed.rs create mode 100644 yama_wormfile/src/lib.rs create mode 100644 yama_wormfile/src/paths.rs create mode 100644 yama_wormfile_fs/Cargo.toml create mode 100644 yama_wormfile_fs/src/lib.rs create mode 100644 yama_wormfile_s3/Cargo.toml create mode 100644 yama_wormfile_s3/src/lib.rs create mode 100644 yama_wormfile_sftp/Cargo.toml create mode 100644 yama_wormfile_sftp/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index e35a146..d2e786e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,17 @@ dependencies = [ "serde", ] +[[package]] +name = "async-trait" +version = "0.1.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + [[package]] name = "atty" version = "0.2.14" @@ -141,6 +152,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" + [[package]] name = "cap-fs-ext" version = "0.24.4" @@ -246,7 +263,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.96", ] [[package]] @@ -404,7 +421,7 @@ dependencies = [ [[package]] name = "datman" -version = "0.6.0-alpha.5" +version = "0.7.0-alpha.1" dependencies = [ "anyhow", "arc-interner", @@ -441,7 +458,7 @@ checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.96", ] [[package]] @@ -883,7 +900,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn", + "syn 1.0.96", ] [[package]] @@ -910,7 +927,7 @@ dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1037,9 +1054,15 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.36.1", ] +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + [[package]] name = "pkg-config" version = "0.3.25" @@ -1061,7 +1084,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.96", "version_check", ] @@ -1084,9 +1107,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.39" +version = "1.0.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "1d0dd4be24fcdcfeaa12a432d588dc59bbad6cad3510c67e74a2b6b2fc950564" dependencies = [ "unicode-ident", ] @@ -1099,9 +1122,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.18" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -1320,7 +1343,7 @@ checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.96", ] [[package]] @@ -1429,7 +1452,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.96", ] [[package]] @@ -1443,6 +1466,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "system-interface" version = "0.20.0" @@ -1520,7 +1554,7 @@ checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.96", ] [[package]] @@ -1543,6 +1577,18 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tokio" +version = "1.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +dependencies = [ + "autocfg", + "bytes", + "pin-project-lite", + "windows-sys 0.45.0", +] + [[package]] name = "toml" version = "0.5.9" @@ -1683,43 +1729,109 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "winx" version = "0.31.0" @@ -1733,7 +1845,7 @@ dependencies = [ [[package]] name = "yama" -version = "0.6.0-alpha.5" +version = "0.7.0-alpha.1" dependencies = [ "anyhow", "blake", @@ -1766,6 +1878,32 @@ dependencies = [ "zstd", ] +[[package]] +name = "yama_wormfile" +version = "0.1.0" +dependencies = [ + "async-trait", + "tokio", +] + +[[package]] +name = "yama_wormfile_fs" +version = "0.1.0" +dependencies = [ + "async-trait", + "rand", + "tokio", + "yama_wormfile", +] + +[[package]] +name = "yama_wormfile_s3" +version = "0.1.0" + +[[package]] +name = "yama_wormfile_sftp" +version = "0.1.0" + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/Cargo.toml b/Cargo.toml index 5c733e5..abd6a2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,10 @@ members = [ "yama", "datman", + "yama_wormfile", + "yama_wormfile_fs", + "yama_wormfile_sftp", + "yama_wormfile_s3", ] [profile.release] diff --git a/shell.nix b/shell.nix index dfac4b9..7d956d4 100644 --- a/shell.nix +++ b/shell.nix @@ -6,7 +6,7 @@ let rust-toolchain = pkgs.symlinkJoin { name = "rust-toolchain"; - paths = [pkgs.rustc pkgs.cargo pkgs.rustfmt pkgs.rustPlatform.rustcSrc]; + paths = [pkgs.rustc pkgs.cargo pkgs.clippy pkgs.rustfmt pkgs.rustPlatform.rustcSrc]; }; in diff --git a/yama_wormfile/Cargo.toml b/yama_wormfile/Cargo.toml new file mode 100644 index 0000000..9430d59 --- /dev/null +++ b/yama_wormfile/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "yama_wormfile" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +async-trait = "0.1.68" +tokio = { version = "1.27.0", features = ["io-util"] } \ No newline at end of file diff --git a/yama_wormfile/src/boxed.rs b/yama_wormfile/src/boxed.rs new file mode 100644 index 0000000..996fdf8 --- /dev/null +++ b/yama_wormfile/src/boxed.rs @@ -0,0 +1,120 @@ +use crate::paths::{WormPath, WormPathBuf}; +use crate::{WormFileProvider, WormFileReader, WormFileWriter}; +use async_trait::async_trait; +use std::error::Error; +use std::fmt::{Debug, Display, Formatter}; +use std::pin::Pin; + +pub struct BoxErr(Box); + +impl Debug for BoxErr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Debug::fmt(&self.0, f) + } +} + +impl Display for BoxErr { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self.0, f) + } +} + +impl Error for BoxErr {} + +impl BoxErr { + pub fn new(err: impl Error + Sync + Send + 'static) -> Self { + BoxErr(Box::new(err)) + } +} +#[async_trait] +trait BoxableWormFileProvider: Debug + Send + Sync { + async fn is_dir_b(&self, path: &WormPath) -> Result; + async fn is_regular_file_b(&self, path: &WormPath) -> Result; + async fn list_b(&self, path: &WormPath) -> Result, BoxErr>; + async fn read_b(&self, path: &WormPath) -> Result>, BoxErr>; + async fn write_b(&self) -> Result>, BoxErr>; +} + +#[async_trait] +impl BoxableWormFileProvider for T { + async fn is_dir_b(&self, path: &WormPath) -> Result { + self.is_dir(path).await.map_err(BoxErr::new) + } + + async fn is_regular_file_b(&self, path: &WormPath) -> Result { + self.is_regular_file(path).await.map_err(BoxErr::new) + } + + async fn list_b(&self, path: &WormPath) -> Result, BoxErr> { + self.list(path).await.map_err(BoxErr::new) + } + + async fn read_b(&self, path: &WormPath) -> Result>, BoxErr> { + self.read(path) + .await + .map_err(BoxErr::new) + .map(|wfr| Box::pin(wfr) as Pin>) + } + + async fn write_b(&self) -> Result>, BoxErr> { + self.write() + .await + .map_err(BoxErr::new) + .map(|wfw| Box::pin(wfw) as Pin>) + } +} + +#[derive(Debug)] +pub struct BoxedWormFileProvider { + inner: Box, +} + +#[async_trait] +impl WormFileProvider for BoxedWormFileProvider { + type WormFileReader = Pin>; + type WormFileWriter = Pin>; + type Error = BoxErr; + + async fn is_dir(&self, path: impl AsRef + Send) -> Result { + let path = path.as_ref(); + self.inner.is_dir_b(path).await + } + + async fn is_regular_file( + &self, + path: impl AsRef + Send, + ) -> Result { + let path = path.as_ref(); + self.inner.is_regular_file_b(path).await + } + + async fn list( + &self, + path: impl AsRef + Send, + ) -> Result, Self::Error> { + let path = path.as_ref(); + self.inner.list_b(path).await + } + + async fn read( + &self, + path: impl AsRef + Send, + ) -> Result { + let path = path.as_ref(); + self.inner.read_b(path).await + } + + async fn write(&self) -> Result { + self.inner.write_b().await + } +} + +#[async_trait] +impl WormFileReader for Pin> {} + +#[async_trait] +impl WormFileWriter for Pin> { + async fn finalise(self, target_path: &WormPath, replace: bool) -> std::io::Result<()> { + WormFileWriter::finalise(self, target_path, replace).await + } +} diff --git a/yama_wormfile/src/lib.rs b/yama_wormfile/src/lib.rs new file mode 100644 index 0000000..d1ad2c1 --- /dev/null +++ b/yama_wormfile/src/lib.rs @@ -0,0 +1,64 @@ +use crate::paths::{WormPath, WormPathBuf}; +use async_trait::async_trait; +use std::error::Error; +use std::fmt::Debug; +use std::io; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite}; + +pub mod boxed; +pub mod paths; + +#[async_trait] +pub trait WormFileProvider: Debug + Send + Sync { + type WormFileReader: WormFileReader; + type WormFileWriter: WormFileWriter; + type Error: Error + Send + Sync + 'static; + + /// Tests whether the path is a directory. + /// Does not fail if the path does not exist, even if the parent path doesn't exist — returns + /// false in that case. + async fn is_dir(&self, path: impl AsRef + Send) -> Result; + + /// Tests whether the path is a regular file. + /// Does not fail if the path does not exist, even if the parent path doesn't exist — returns + /// false in that case. + async fn is_regular_file(&self, path: impl AsRef + Send) + -> Result; + + /// Lists all the files and directories in the specified path. + /// + /// If the path does not exist, gives an error. + /// TODO a streaming version of this might be beneficial. + async fn list( + &self, + path: impl AsRef + Send, + ) -> Result, Self::Error>; + + /// Reads a file. + /// + /// Fails if the file does not exist or is not a regular file. + async fn read( + &self, + path: impl AsRef + Send, + ) -> Result; + + /// Writes to a file. + /// + /// No path is supplied here because this interface provides atomic writing by first writing + /// to a temporary file and moving it in place once ready. + /// + /// When applicable, the file is first created in the `tmp` directory during writing and then + /// moved into place afterwards. + async fn write(&self) -> Result; +} + +pub trait WormFileReader: AsyncRead + AsyncSeek + Debug + Send + Sync + 'static {} + +#[async_trait] +pub trait WormFileWriter: AsyncWrite + Debug + Send + Sync + 'static { + /// Finish writing the file. + /// Moves the file atomically to `target_path`. + /// If `replace` is false, will not overwrite a file. (May be best-effort depending on backend; + /// intended as a sanity check rather than a flawless safeguard.) + async fn finalise(self, target_path: &WormPath, replace: bool) -> io::Result<()>; +} diff --git a/yama_wormfile/src/paths.rs b/yama_wormfile/src/paths.rs new file mode 100644 index 0000000..cf9b8eb --- /dev/null +++ b/yama_wormfile/src/paths.rs @@ -0,0 +1,96 @@ +use std::borrow::Borrow; + +/// Simplified version of `Path` for use in WormFile situations. +/// The Path is guaranteed to remain within the root and does not contain any `.` or `..` elements. +#[repr(transparent)] +pub struct WormPath { + inner: str, +} + +impl WormPath { + pub fn new(path_str: &str) -> Option<&WormPath> { + if path_str + .split('/') + .any(|component| component.is_empty() || component == "." || component == "..") + { + None + } else { + Some(unsafe { Self::new_unchecked(path_str) }) + } + } + + unsafe fn new_unchecked(path_str: &str) -> &WormPath { + &*(path_str as *const str as *const WormPath) + } + + pub fn as_str(&self) -> &str { + &self.inner + } + + pub fn join(&self, extension: impl AsRef) -> Option { + let extension = extension.as_ref(); + + if extension.starts_with('/') + || extension + .split('/') + .any(|component| component.is_empty() || component == "." || component == "..") + { + return None; + } + + let mut owned = self.inner.to_owned(); + owned.reserve_exact(extension.len() + 1); + owned.push('/'); + owned.push_str(extension); + // TODO maybe convert to new_unchecked in the future. + Some(WormPathBuf::new(owned).expect("already checked upfront")) + } +} + +impl ToOwned for WormPath { + type Owned = WormPathBuf; + + fn to_owned(&self) -> Self::Owned { + WormPathBuf { + inner: self.inner.to_owned(), + } + } +} + +impl AsRef for WormPath { + fn as_ref(&self) -> &WormPath { + self + } +} + +/// Simplified version of `PathBuf` for use in WormFile situations. +/// Owned form of `WormPath`. +#[repr(transparent)] +pub struct WormPathBuf { + inner: String, +} + +impl WormPathBuf { + pub fn new(path_string: String) -> Option { + if path_string + .split('/') + .any(|component| component.is_empty() || component == "." || component == "..") + { + None + } else { + Some(WormPathBuf { inner: path_string }) + } + } +} + +impl AsRef for WormPathBuf { + fn as_ref(&self) -> &WormPath { + unsafe { WormPath::new_unchecked(&self.inner) } + } +} + +impl Borrow for WormPathBuf { + fn borrow(&self) -> &WormPath { + unsafe { WormPath::new_unchecked(&self.inner) } + } +} diff --git a/yama_wormfile_fs/Cargo.toml b/yama_wormfile_fs/Cargo.toml new file mode 100644 index 0000000..e062022 --- /dev/null +++ b/yama_wormfile_fs/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "yama_wormfile_fs" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } + +async-trait = "0.1.68" +tokio = { version = "1.27.0", features = ["io-std", "fs"] } +rand = "0.8.5" \ No newline at end of file diff --git a/yama_wormfile_fs/src/lib.rs b/yama_wormfile_fs/src/lib.rs new file mode 100644 index 0000000..69468f2 --- /dev/null +++ b/yama_wormfile_fs/src/lib.rs @@ -0,0 +1,216 @@ +use async_trait::async_trait; +use std::fmt::{Debug, Formatter}; +use std::io; +use std::io::{ErrorKind, SeekFrom}; +use std::path::PathBuf; +use std::pin::Pin; +use std::task::{Context, Poll}; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; +use yama_wormfile::paths::{WormPath, WormPathBuf}; +use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; + +/// WormFileProvider that uses the local filesystem, in a given root directory. +#[derive(Debug)] +pub struct LocalWormFilesystem { + /// The root directory. + root_dir: PathBuf, +} + +impl LocalWormFilesystem { + pub fn new(root_dir: impl Into) -> io::Result { + let root_dir = root_dir.into(); + if !root_dir.is_dir() { + return Err(io::Error::new( + ErrorKind::Other, + "root for LocalWormFilesystem is not a directory!", + )); + } + Ok(LocalWormFilesystem { root_dir }) + } + + pub fn resolve_real_path(&self, path: impl AsRef) -> PathBuf { + let relpath = path.as_ref(); + self.root_dir.join(relpath.as_str()) + } +} + +#[async_trait] +impl WormFileProvider for LocalWormFilesystem { + type WormFileReader = FileWormReader; + type WormFileWriter = FileWormWriter; + type Error = io::Error; + + async fn is_dir(&self, path: impl AsRef + Send) -> Result { + let path = self.resolve_real_path(path.as_ref()); + Ok(tokio::fs::metadata(path).await?.is_dir()) + } + + async fn is_regular_file( + &self, + path: impl AsRef + Send, + ) -> Result { + let path = self.resolve_real_path(path.as_ref()); + Ok(tokio::fs::metadata(path).await?.is_file()) + } + + async fn list( + &self, + path: impl AsRef + Send, + ) -> Result, Self::Error> { + let worm_path = path.as_ref(); + let real_path = self.resolve_real_path(worm_path); + let mut dir_reader = tokio::fs::read_dir(real_path).await?; + let mut out = Vec::new(); + while let Some(next_ent) = dir_reader.next_entry().await? { + if let Some(name_str) = next_ent.file_name().to_str() { + out.push(worm_path.join(name_str).unwrap()); + } + } + Ok(out) + } + + async fn read( + &self, + path: impl AsRef + Send, + ) -> Result { + let worm_path = path.as_ref(); + let real_path = self.resolve_real_path(worm_path); + let file = OpenOptions::new().read(true).open(&real_path).await?; + Ok(FileWormReader { + path: real_path, + file, + }) + } + + async fn write(&self) -> Result { + let tmp_dir = self.root_dir.join("tmp"); + if !tokio::fs::try_exists(&tmp_dir).await? { + tokio::fs::create_dir(&tmp_dir).await?; + } + + let (tmp_path, file) = loop { + let rand_num: u32 = rand::random(); + let pid = std::process::id(); + + let try_fn = format!("pid{pid}-{rand_num:08X}.writing"); + let try_path = tmp_dir.join(try_fn); + match OpenOptions::new().create_new(true).open(&try_path).await { + Ok(file) => break (try_path, file), + Err(err) => { + if err.kind() == ErrorKind::AlreadyExists { + continue; + } else { + return Err(err); + } + } + } + }; + + Ok(FileWormWriter { + temp_path: tmp_path, + file, + root_dir: self.root_dir.clone(), + }) + } +} + +pub struct FileWormReader { + path: PathBuf, + file: File, +} + +impl Debug for FileWormReader { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FileWormReader({:?})", self.path) + } +} + +impl AsyncRead for FileWormReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + Pin::new(&mut self.file).poll_read(cx, buf) + } +} + +impl AsyncSeek for FileWormReader { + fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + Pin::new(&mut self.file).start_seek(position) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.file).poll_complete(cx) + } +} + +impl WormFileReader for FileWormReader {} + +pub struct FileWormWriter { + temp_path: PathBuf, + file: File, + root_dir: PathBuf, +} + +impl Debug for FileWormWriter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FileWormWriter({:?})", self.temp_path) + } +} + +impl AsyncWrite for FileWormWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + Pin::new(&mut self.file).poll_write(cx, buf) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.file).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.file).poll_shutdown(cx) + } +} + +#[async_trait] +impl WormFileWriter for FileWormWriter { + async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + self.flush().await?; + + let FileWormWriter { + root_dir, + temp_path, + .. + } = self; + + let worm_path = target_path; + let real_path = root_dir.join(worm_path.as_str()); + + // Directories will be created as needed. + let parent = real_path.parent().unwrap(); + tokio::fs::create_dir_all(parent).await?; + + // Avoid allowing a replacement if not intended. + // But this is currently not atomic, so it's just a sanity check rather than a foolproof + // safeguard! + if !replace && tokio::fs::try_exists(&real_path).await? { + return Err(io::Error::new( + ErrorKind::AlreadyExists, + "finalise()ing a writer: dest already exists and replace = false", + )); + } + + // Perform the move, atomically. + tokio::fs::rename(&temp_path, &real_path).await?; + Ok(()) + } +} diff --git a/yama_wormfile_s3/Cargo.toml b/yama_wormfile_s3/Cargo.toml new file mode 100644 index 0000000..32780f5 --- /dev/null +++ b/yama_wormfile_s3/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "yama_wormfile_s3" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/yama_wormfile_s3/src/lib.rs b/yama_wormfile_s3/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml new file mode 100644 index 0000000..cc1cc03 --- /dev/null +++ b/yama_wormfile_sftp/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "yama_wormfile_sftp" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs new file mode 100644 index 0000000..e69de29 From a5d6bf30850eea22148c9070d029c62e9323395d Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sun, 2 Apr 2023 22:54:18 +0100 Subject: [PATCH 03/51] Add an untested SFTP implementation of Yama Wormfiles --- Cargo.lock | 559 +++++++++++++++++++++++++++++++--- yama_wormfile/src/paths.rs | 13 + yama_wormfile_sftp/Cargo.toml | 9 + yama_wormfile_sftp/src/lib.rs | 463 ++++++++++++++++++++++++++++ 4 files changed, 998 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2e786e..1818e11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" + [[package]] name = "adler" version = "1.0.2" @@ -43,6 +49,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "ambient-authority" version = "0.0.1" @@ -67,6 +79,18 @@ dependencies = [ "serde", ] +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "async-trait" version = "0.1.68" @@ -84,7 +108,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -95,6 +119,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "awaitable" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70af449c9a763cb655c6a1e5338b42d99c67190824ff90658c1e30be844c0775" +dependencies = [ + "awaitable-error", + "cfg-if 1.0.0", +] + +[[package]] +name = "awaitable-error" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5b3469636cdf8543cceab175efca534471f36eee12fb8374aba00eb5e7e7f8a" + [[package]] name = "bare-metrics-core" version = "0.1.0" @@ -136,6 +176,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" + [[package]] name = "blake" version = "2.0.2" @@ -166,7 +212,7 @@ checksum = "e54b86398b5852ddd45784b1d9b196b98beb39171821bad4b8b44534a1e87927" dependencies = [ "cap-primitives", "cap-std", - "io-lifetimes", + "io-lifetimes 0.5.3", "winapi", ] @@ -177,13 +223,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb8fca3e81fae1d91a36e9784ca22a39ef623702b5f7904d89dc31f10184a178" dependencies = [ "ambient-authority", - "errno", + "errno 0.2.8", "fs-set-times", "io-extras", - "io-lifetimes", + "io-lifetimes 0.5.3", "ipnet", "maybe-owned", - "rustix", + "rustix 0.33.7", "winapi", "winapi-util", "winx", @@ -197,9 +243,9 @@ checksum = "2247568946095c7765ad2b441a56caffc08027734c634a6d5edda648f04e32eb" dependencies = [ "cap-primitives", "io-extras", - "io-lifetimes", + "io-lifetimes 0.5.3", "ipnet", - "rustix", + "rustix 0.33.7", ] [[package]] @@ -243,7 +289,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df386a2d0f35bdefc0642fd8bcb2cd28243959f028abfd22fbade6f7d30980e" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "clap_derive", "clap_lex", "indexmap", @@ -287,6 +333,19 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "concurrent_arena" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24bfeb060a299f86521bb3940344800fc861cc506356e44a273a42cb552afde5" +dependencies = [ + "arc-swap", + "array-init", + "const_fn_assert", + "parking_lot", + "triomphe", +] + [[package]] name = "console" version = "0.15.0" @@ -324,6 +383,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_fn_assert" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d614f23f34f7b5165a77dc1591f497e2518f9cec4b4f4b92bfc4dc6cf7a190" + [[package]] name = "crc32fast" version = "1.3.2" @@ -384,7 +449,7 @@ version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" dependencies = [ - "bitflags", + "bitflags 1.3.2", "crossterm_winapi", "libc", "mio", @@ -461,6 +526,26 @@ dependencies = [ "syn 1.0.96", ] +[[package]] +name = "derive_destructure2" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35cb7e5875e1028a73e551747d6d0118f25c3d6dbba2dadf97cc0f4d0c53f2f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.96", +] + +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -471,6 +556,17 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -533,6 +629,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "errno" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.45.0", +] + [[package]] name = "errno-dragonfly" version = "0.1.2" @@ -561,6 +668,15 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "861d7b3427fbf3e06300b4aca5c430a2e263b7a7b6821faff8b200d3dc4a61cb" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "flate2" version = "1.0.24" @@ -577,8 +693,8 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7df62ee66ee2d532ea8d567b5a3f0d03ecd64636b98bad5be1e93dcc918b92aa" dependencies = [ - "io-lifetimes", - "rustix", + "io-lifetimes 0.5.3", + "rustix 0.33.7", "winapi", ] @@ -592,6 +708,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + [[package]] name = "fxhash" version = "0.2.1" @@ -671,6 +799,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hostname" version = "0.3.1" @@ -719,13 +853,22 @@ dependencies = [ "regex", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "io-extras" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0c937cc9891c12eaa8c63ad347e4a288364b1328b924886970b47a14ab8f8f8" dependencies = [ - "io-lifetimes", + "io-lifetimes 0.5.3", "os_pipe", "winapi", ] @@ -739,6 +882,17 @@ dependencies = [ "os_pipe", ] +[[package]] +name = "io-lifetimes" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +dependencies = [ + "hermit-abi 0.3.1", + "libc", + "windows-sys 0.45.0", +] + [[package]] name = "io-streams" version = "0.11.0" @@ -747,11 +901,11 @@ checksum = "fba6685e8e5efa7bd0ce8c4e92ac113b3b059b2e18bd1bf51f7cfab0f61b4b19" dependencies = [ "duplex", "io-extras", - "io-lifetimes", + "io-lifetimes 0.5.3", "memchr", "os_pipe", "parking", - "rustix", + "rustix 0.33.7", "system-interface", ] @@ -802,9 +956,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "libsodium-sys" @@ -834,6 +988,12 @@ version = "0.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7" +[[package]] +name = "linux-raw-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" + [[package]] name = "lock_api" version = "0.4.7" @@ -920,14 +1080,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.36.1", + "windows-sys 0.45.0", ] [[package]] @@ -936,7 +1096,7 @@ version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50e4785f2c3b7589a0d0c1dd60285e1188adac4006e8abd6dd578e1567027363" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cc", "cfg-if 0.1.10", "libc", @@ -949,7 +1109,7 @@ version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cc", "cfg-if 1.0.0", "libc", @@ -965,6 +1125,17 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.96", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -990,7 +1161,7 @@ version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", ] @@ -1012,6 +1183,95 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +[[package]] +name = "openssh" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca6c277973fb549b36dd8980941b5ea3ecebea026f5b1f0060acde74d893c22" +dependencies = [ + "dirs", + "libc", + "once_cell", + "shell-escape", + "tempfile", + "thiserror", + "tokio", + "tokio-pipe", +] + +[[package]] +name = "openssh-sftp-client" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa8e5f26e549bd266d9bcd9e5b4fd344729985ef1a7f5ac3e51f3f96a4a620" +dependencies = [ + "bytes", + "derive_destructure2", + "once_cell", + "openssh-sftp-client-lowlevel", + "openssh-sftp-error", + "scopeguard", + "tokio", + "tokio-io-utility", + "tokio-util", +] + +[[package]] +name = "openssh-sftp-client-lowlevel" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "406bf41d8372365497d5645e802a8dfe22008b8183edbe6c79e4b75614431daa" +dependencies = [ + "awaitable", + "bytes", + "concurrent_arena", + "derive_destructure2", + "openssh-sftp-error", + "openssh-sftp-protocol", + "pin-project", + "tokio", + "tokio-io-utility", +] + +[[package]] +name = "openssh-sftp-error" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d836b428ead150165d1178ed0aa672791c13b3ae9616ea1e34d13730a2cb486" +dependencies = [ + "awaitable-error", + "openssh-sftp-protocol-error", + "ssh_format_error", + "thiserror", + "tokio", +] + +[[package]] +name = "openssh-sftp-protocol" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf38532d784978966f95d241226223823f351d5bb2a4bebcf6b20b9cb1e393e0" +dependencies = [ + "bitflags 2.0.2", + "num-derive", + "num-traits", + "openssh-sftp-protocol-error", + "serde", + "ssh_format", + "vec-strings", +] + +[[package]] +name = "openssh-sftp-protocol-error" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0719269eb3f037866ae07ec89cb44ed2c1d63b72b2390cef8e1aa3016a956ff8" +dependencies = [ + "serde", + "thiserror", + "vec-strings", +] + [[package]] name = "os_pipe" version = "1.0.1" @@ -1028,6 +1288,29 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" +[[package]] +name = "ouroboros" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1358bd1558bd2a083fed428ffeda486fbfb323e698cdda7794259d592ca72db" +dependencies = [ + "aliasable", + "ouroboros_macro", +] + +[[package]] +name = "ouroboros_macro" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f7d21ccd03305a674437ee1248f3ab5d4b1db095cf1caf49f1713ddf61956b7" +dependencies = [ + "Inflector", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.96", +] + [[package]] name = "parking" version = "2.0.0" @@ -1052,11 +1335,31 @@ checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" dependencies = [ "cfg-if 1.0.0", "libc", - "redox_syscall", + "redox_syscall 0.2.13", "smallvec", "windows-sys 0.36.1", ] +[[package]] +name = "pin-project" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.96", +] + [[package]] name = "pin-project-lite" version = "0.2.9" @@ -1189,7 +1492,16 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ - "bitflags", + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", ] [[package]] @@ -1198,7 +1510,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" dependencies = [ - "redox_syscall", + "redox_syscall 0.2.13", ] [[package]] @@ -1208,7 +1520,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ "getrandom", - "redox_syscall", + "redox_syscall 0.2.13", "thiserror", ] @@ -1235,7 +1547,7 @@ version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38ee71cbab2c827ec0ac24e76f82eca723cee92c509a65f67dee393c25112" dependencies = [ - "bitflags", + "bitflags 1.3.2", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -1250,16 +1562,30 @@ version = "0.33.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "938a344304321a9da4973b9ff4f9f8db9caf4597dfd9dda6a60b523340a0fff0" dependencies = [ - "bitflags", - "errno", - "io-lifetimes", + "bitflags 1.3.2", + "errno 0.2.8", + "io-lifetimes 0.5.3", "itoa", "libc", - "linux-raw-sys", + "linux-raw-sys 0.0.42", "once_cell", "winapi", ] +[[package]] +name = "rustix" +version = "0.37.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d097081ed288dfe45699b72f5b5d648e5f15d64d900c7080273baa20c16a6849" +dependencies = [ + "bitflags 1.3.2", + "errno 0.3.0", + "io-lifetimes 1.0.9", + "libc", + "linux-raw-sys 0.3.1", + "windows-sys 0.45.0", +] + [[package]] name = "rustversion" version = "1.0.6" @@ -1272,7 +1598,7 @@ version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8227301bfc717136f0ecbd3d064ba8199e44497a0bdd46bb01ede4387cfd2cec" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if 1.0.0", "dirs-next", "fs2", @@ -1357,6 +1683,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shell-escape" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f" + [[package]] name = "signal-hook" version = "0.3.14" @@ -1399,6 +1731,16 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "socketpair" version = "0.14.0" @@ -1406,8 +1748,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f0c0b3fc17356799222affc5a40345b7cc25b548c489c5a31eca0888ee2404c" dependencies = [ "io-extras", - "io-lifetimes", - "rustix", + "io-lifetimes 0.5.3", + "rustix 0.33.7", "uuid", "winapi", ] @@ -1424,6 +1766,32 @@ dependencies = [ "serde", ] +[[package]] +name = "ssh_format" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ab31081d1c9097c327ec23550858cb5ffb4af6b866c1ef4d728455f01f3304" +dependencies = [ + "bytes", + "serde", + "ssh_format_error", +] + +[[package]] +name = "ssh_format_error" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be3c6519de7ca611f71ef7e8a56eb57aa1c818fecb5242d0a0f39c83776c210c" +dependencies = [ + "serde", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -1484,11 +1852,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e09bb3fb4e02ec4b87e182ea9718fadbc0fa3e50085b40a9af9690572b67f9e" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "cap-fs-ext", - "io-lifetimes", + "io-lifetimes 0.5.3", "os_pipe", - "rustix", + "rustix 0.33.7", "socketpair", "winapi", "winx", @@ -1500,6 +1868,19 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab" +[[package]] +name = "tempfile" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +dependencies = [ + "cfg-if 1.0.0", + "fastrand", + "redox_syscall 0.3.5", + "rustix 0.37.6", + "windows-sys 0.45.0", +] + [[package]] name = "termcolor" version = "1.1.3" @@ -1527,7 +1908,7 @@ checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e" dependencies = [ "libc", "numtoa", - "redox_syscall", + "redox_syscall 0.2.13", "redox_termios", ] @@ -1538,23 +1919,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] -name = "thiserror" -version = "1.0.31" +name = "thin-vec" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +checksum = "aac81b6fd6beb5884b0cf3321b8117e6e5d47ecb6fc89f414cfdcca8b2fe2dd8" + +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.31" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 2.0.13", ] [[package]] @@ -1585,10 +1972,59 @@ checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", + "libc", + "mio", "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", "windows-sys 0.45.0", ] +[[package]] +name = "tokio-io-utility" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6948d15bb5da6f565846828025df4d7b503df3890f0fcdb9a667de1c81bc1976" +dependencies = [ + "bytes", + "tokio", +] + +[[package]] +name = "tokio-macros" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "tokio-pipe" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784" +dependencies = [ + "libc", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.5.9" @@ -1598,6 +2034,17 @@ dependencies = [ "serde", ] +[[package]] +name = "triomphe" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1ee9bd9239c339d714d657fac840c6d2a4f9c45f4f9ec7b0975113458be78db" +dependencies = [ + "arc-swap", + "serde", + "stable_deref_trait", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -1657,6 +2104,16 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec-strings" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8509489e2a7ee219522238ad45fd370bec6808811ac15ac6b07453804e77659" +dependencies = [ + "serde", + "thin-vec", +] + [[package]] name = "version_check" version = "0.9.4" @@ -1838,8 +2295,8 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d5973cb8cd94a77d03ad7e23bbe14889cb29805da1cec0e4aff75e21aebded" dependencies = [ - "bitflags", - "io-lifetimes", + "bitflags 1.3.2", + "io-lifetimes 0.5.3", "winapi", ] @@ -1903,6 +2360,16 @@ version = "0.1.0" [[package]] name = "yama_wormfile_sftp" version = "0.1.0" +dependencies = [ + "async-trait", + "openssh", + "openssh-sftp-client", + "ouroboros", + "rand", + "thiserror", + "tokio", + "yama_wormfile", +] [[package]] name = "zstd" diff --git a/yama_wormfile/src/paths.rs b/yama_wormfile/src/paths.rs index cf9b8eb..860b554 100644 --- a/yama_wormfile/src/paths.rs +++ b/yama_wormfile/src/paths.rs @@ -1,4 +1,5 @@ use std::borrow::Borrow; +use std::fmt::{Debug, Formatter}; /// Simplified version of `Path` for use in WormFile situations. /// The Path is guaranteed to remain within the root and does not contain any `.` or `..` elements. @@ -7,6 +8,12 @@ pub struct WormPath { inner: str, } +impl Debug for WormPath { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "<{}>", &self.inner) + } +} + impl WormPath { pub fn new(path_str: &str) -> Option<&WormPath> { if path_str @@ -70,6 +77,12 @@ pub struct WormPathBuf { inner: String, } +impl Debug for WormPathBuf { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "<{}>", self.inner) + } +} + impl WormPathBuf { pub fn new(path_string: String) -> Option { if path_string diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index cc1cc03..f53b32b 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -6,3 +6,12 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } + +ouroboros = "0.15.6" +openssh = "0.9.9" +openssh-sftp-client = "0.12.2" +async-trait = "0.1.68" +tokio = { version = "1.27.0", features = ["io-std"] } +rand = "0.8.5" +thiserror = "1.0.40" \ No newline at end of file diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index e69de29..3c2ebf8 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -0,0 +1,463 @@ +extern crate core; + +use async_trait::async_trait; +use openssh::{KnownHosts, RemoteChild, Session, Stdio}; +use openssh_sftp_client::error::SftpErrorKind; +use openssh_sftp_client::file::TokioCompatFile; +use openssh_sftp_client::fs::Fs; +use openssh_sftp_client::Error::SftpError; +use openssh_sftp_client::Sftp; +use ouroboros::self_referencing; +use std::fmt::{Debug, Formatter}; +use std::io; +use std::io::{ErrorKind, SeekFrom}; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use thiserror::Error; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; +use yama_wormfile::paths::{WormPath, WormPathBuf}; +use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; + +/// WormFileProvider that uses an SFTP connection, in a given root directory. +#[derive(Debug)] +pub struct SftpWormFilesystem { + conn: Arc, + + /// The root directory. + root_dir: PathBuf, +} + +#[self_referencing] +struct SftpConn { + /// The SSH session. + ssh: Session, + + /// The SSH child process + #[borrows(ssh)] + #[covariant] + ssh_child: RemoteChild<'this>, + + /// The SFTP client. + #[borrows(mut ssh_child)] + sftp: Sftp, + + root_dir: PathBuf, + // #[borrows(sftp)] + // #[covariant] + // fs: Fs<'this>, +} + +#[self_referencing] +struct FileWithSftpConn { + conn: Arc, + #[borrows(conn)] + #[covariant] + file: Option>, +} + +impl SftpConn { + pub async fn create(ssh_connect: &str, root_dir: impl Into) -> YWSResult { + let root_dir = root_dir.into(); + let session = Session::connect(ssh_connect, KnownHosts::Strict).await?; + + let res = SftpConnAsyncTryBuilder { + ssh: session, + ssh_child_builder: |ssh| { + Box::pin(async move { + ssh.subsystem("sftp") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .await + .map_err(SftpWormFileError::from) + }) + }, + sftp_builder: |ssh_child| { + Box::pin(async move { + Sftp::new( + ssh_child.stdin().take().unwrap(), + ssh_child.stdout().take().unwrap(), + Default::default(), + ) + .await + .map_err(SftpWormFileError::from) + }) + }, + // fs_builder: |sftp| Box::pin(async move { + // let mut fs = sftp.fs(); + // fs.set_cwd(&root_dir); + // Ok(fs) + // }) + root_dir, + } + .try_build() + .await?; + Ok(res) + } + + pub fn get_fs(&self) -> Fs<'_> { + let mut fs = self.borrow_sftp().fs(); + fs.set_cwd(&self.borrow_root_dir()); + fs + } + + async fn create_dir_all(&self, worm_path_as_pathbuf: PathBuf) -> YWSResult<()> { + let mut fs = self.get_fs(); + let mut stack = vec![]; + + let mut at = Some(worm_path_as_pathbuf); + while let Some(at_path) = at { + match fs.metadata(&at_path).await { + Ok(_) => { + break; + } + Err(SftpError(SftpErrorKind::NoSuchFile, _)) => { + at = at_path.parent().map(Path::to_owned); + stack.push(at_path); + } + Err(sftp_err) => { + return Err(SftpWormFileError::SftpError(sftp_err)); + } + } + } + + while let Some(path) = stack.pop() { + fs.create_dir(path).await?; + } + + Ok(()) + } +} + +impl Debug for SftpConn { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "") + } +} + +#[derive(Debug, Error)] +pub enum SftpWormFileError { + #[error("ssh error: {0:?}")] + SshError(#[from] openssh::Error), + + #[error("sftp error: {0:?}")] + SftpError(#[from] openssh_sftp_client::Error), + + #[error("error: {0}")] + Message(String), +} + +type YWSResult = Result; + +impl SftpWormFilesystem { + pub async fn new( + ssh_connect: &str, + root_dir: impl Into, + ) -> YWSResult { + let root_dir = root_dir.into(); + + let conn = Arc::new(SftpConn::create(ssh_connect, &root_dir).await?); + + if !conn + .get_fs() + .metadata(".") + .await? + .file_type() + .unwrap() + .is_dir() + { + return Err(SftpWormFileError::Message(format!( + "{root_dir:?} is not a dir on SFTP remote." + ))); + } + + Ok(SftpWormFilesystem { conn, root_dir }) + } + + fn get_fs(&self) -> Fs<'_> { + let mut fs = self.conn.borrow_sftp().fs(); + fs.set_cwd(&self.root_dir); + fs + } +} + +#[async_trait] +impl WormFileProvider for SftpWormFilesystem { + type WormFileReader = SftpWormReader; + type WormFileWriter = SftpWormWriter; + type Error = SftpWormFileError; + + async fn is_dir(&self, path: impl AsRef + Send) -> Result { + let path = path.as_ref().as_str(); + let mut fs = self.get_fs(); + match fs.metadata(path).await { + Ok(meta) => Ok(meta.file_type().unwrap().is_dir()), + Err(SftpError(SftpErrorKind::NoSuchFile, _)) => Ok(false), + Err(sftp_err) => Err(SftpWormFileError::SftpError(sftp_err)), + } + } + + async fn is_regular_file( + &self, + path: impl AsRef + Send, + ) -> Result { + let path = path.as_ref().as_str(); + let mut fs = self.get_fs(); + match fs.metadata(path).await { + Ok(meta) => Ok(meta.file_type().unwrap().is_file()), + Err(SftpError(SftpErrorKind::NoSuchFile, _)) => Ok(false), + Err(sftp_err) => Err(SftpWormFileError::SftpError(sftp_err)), + } + } + + async fn list( + &self, + path: impl AsRef + Send, + ) -> Result, Self::Error> { + let worm_path = path.as_ref(); + let path = worm_path.as_str(); + let mut fs = self.get_fs(); + + let mut remote_dir = fs.open_dir(path).await?; + let dir_reader = remote_dir.read_dir().await?; + + Ok(dir_reader + .iter() + .filter_map(|entry| { + if let Some(name_str) = entry.filename().as_os_str().to_str() { + if name_str.is_empty() || name_str == "." || name_str == ".." { + None + } else { + Some(worm_path.join(name_str).expect("pre-checked")) + } + } else { + None + } + }) + .collect()) + } + + async fn read( + &self, + path: impl AsRef + Send, + ) -> Result { + let real_path = self.root_dir.join(path.as_ref().as_str()); + + let real_path2 = real_path.clone(); + // the `Send` in the below line is very important... + let file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { + conn: self.conn.clone(), + file_builder: |conn| { + Box::pin(async move { + let file = conn + .borrow_sftp() + .open(real_path) + .await + .map_err(SftpWormFileError::from)?; + Ok::<_, SftpWormFileError>(Some(TokioCompatFile::new(file))) + }) + }, + } + .try_build() + .await?; + + Ok(SftpWormReader { + path: real_path2, + file_with_conn, + }) + } + + async fn write(&self) -> Result { + // let tmp_dir = self.root_dir.join("tmp"); + // if !tokio::fs::try_exists(&tmp_dir).await? { + // tokio::fs::create_dir(&tmp_dir).await?; + // } + // + // let (tmp_path, file) = loop { + // let rand_num: u32 = rand::random(); + // let pid = std::process::id(); + // + // let try_fn = format!("pid{pid}-{rand_num:08X}.writing"); + // let try_path = tmp_dir.join(try_fn); + // match OpenOptions::new().create_new(true).open(&try_path).await { + // Ok(file) => break (try_path, file), + // Err(err) => { + // if err.kind() == ErrorKind::AlreadyExists { + // continue; + // } else { + // return Err(err); + // } + // } + // } + // }; + // + // Ok(SftpWormWriter { + // temp_path: tmp_path, + // file, + // root_dir: self.root_dir.clone(), + // }) + todo!() + } +} + +pub struct SftpWormReader { + path: PathBuf, + file_with_conn: FileWithSftpConn, +} + +impl Debug for SftpWormReader { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FileWormReader({:?})", self.path) + } +} + +impl AsyncRead for SftpWormReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_read(cx, buf)) + } +} + +impl AsyncSeek for SftpWormReader { + fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).start_seek(position)) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_complete(cx)) + } +} + +impl WormFileReader for SftpWormReader {} + +pub struct SftpWormWriter { + temp_path: PathBuf, + file_with_conn: FileWithSftpConn, + root_dir: PathBuf, +} + +impl Debug for SftpWormWriter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FileWormWriter({:?})", self.temp_path) + } +} + +impl AsyncWrite for SftpWormWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_write(cx, buf)) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_flush(cx)) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.file_with_conn + .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_shutdown(cx)) + } +} + +#[async_trait] +impl WormFileWriter for SftpWormWriter { + async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + self.flush().await?; + + let SftpWormWriter { + root_dir, + temp_path, + mut file_with_conn, + .. + } = self; + + let file = file_with_conn.with_file_mut(|file| file.take().unwrap()); + file.close() + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + + let conn: Arc = file_with_conn.into_heads().conn; + let mut fs = conn.get_fs(); + + let worm_path = target_path; + + // Directories will be created as needed. + if let Some(parent) = PathBuf::from(worm_path.as_str()).parent() { + conn.create_dir_all(parent.to_owned()) + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + } + + // Avoid allowing a replacement if not intended. + // But this is currently not atomic, so it's just a sanity check rather than a foolproof + // safeguard! + if !replace { + match fs.metadata(worm_path.as_str()).await { + Ok(_) => { + return Err(io::Error::new( + ErrorKind::AlreadyExists, + "finalise()ing a writer: dest already exists and replace = false", + )); + } + Err(SftpError(SftpErrorKind::NoSuchFile, _)) => { + // ideal. nop. + } + Err(sftp_err) => { + return Err(SftpWormFileError::SftpError(sftp_err)) + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + } + } + } + + // Perform the move. + fs.rename(root_dir.join(&temp_path), root_dir.join(worm_path.as_str())) + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + Ok(()) + } +} + +#[tokio::test] +async fn test_lol() { + let swf = SftpWormFilesystem::new("scone@sallie", "").await.unwrap(); + let _ = swf.is_dir(WormPath::new("maddy").unwrap()).await; + + match swf.is_dir(WormPath::new("maddyss").unwrap()).await { + Ok(x) => eprintln!("{x:?}"), + Err(SftpWormFileError::SftpError(openssh_sftp_client::Error::SftpError( + openssh_sftp_client::error::SftpErrorKind::NoSuchFile, + _, + ))) => { + eprintln!("NSF"); + } + Err(other) => { + eprintln!("other {other:?}"); + } + } + + let x = swf.list(WormPath::new("maddy").unwrap()).await.unwrap(); + eprintln!("{x:?}"); + + // if let Err(e) = swf.sftp.close().await { + // eprintln!("sftp {e:?}"); + // } + // + // if let Err(e) = swf.ssh.close().await { + // eprintln!("sftp {e:?}"); + // } +} From 8d5c373abcad72b380b071131fa30ce7d3d60d3d Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Mon, 3 Apr 2023 21:09:26 +0100 Subject: [PATCH 04/51] Add a rather dodgy untested S3 implementation --- Cargo.lock | 975 +++++++++++++++++++++++++++++++++++- yama_wormfile/src/lib.rs | 2 + yama_wormfile_s3/Cargo.toml | 13 + yama_wormfile_s3/src/lib.rs | 393 +++++++++++++++ 4 files changed, 1380 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1818e11..cab7847 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,20 @@ dependencies = [ "syn 2.0.13", ] +[[package]] +name = "attohttpc" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fcf00bc6d5abb29b5f97e3c61a90b6d3caa12f3faf897d4a3e3607c050a35a7" +dependencies = [ + "http", + "log", + "native-tls", + "serde", + "serde_json", + "url", +] + [[package]] name = "atty" version = "0.2.14" @@ -135,6 +149,32 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5b3469636cdf8543cceab175efca534471f36eee12fb8374aba00eb5e7e7f8a" +[[package]] +name = "aws-creds" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1be164cb32cc9dd28dc0f6a8576c164c74516137ddc62dfe9eef584c81ee9488" +dependencies = [ + "attohttpc", + "dirs", + "log", + "rust-ini", + "serde", + "serde-xml-rs", + "thiserror", + "time 0.3.20", + "url", +] + +[[package]] +name = "aws-region" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92a8af5850d0ea0916ca3e015ab86951ded0bf4b70fd27896e81ae1dfb0af37" +dependencies = [ + "thiserror", +] + [[package]] name = "bare-metrics-core" version = "0.1.0" @@ -170,6 +210,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "base64" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" + [[package]] name = "bitflags" version = "1.3.2" @@ -192,6 +238,21 @@ dependencies = [ "libc", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + [[package]] name = "byteorder" version = "1.4.3" @@ -278,7 +339,7 @@ dependencies = [ "libc", "num-integer", "num-traits", - "time", + "time 0.1.44", "winapi", ] @@ -389,6 +450,31 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27d614f23f34f7b5165a77dc1591f497e2518f9cec4b4f4b92bfc4dc6cf7a190" +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + +[[package]] +name = "cpufeatures" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -474,6 +560,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "dashmap" version = "4.0.2" @@ -537,6 +633,17 @@ dependencies = [ "syn 1.0.96", ] +[[package]] +name = "digest" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs" version = "4.0.0" @@ -578,6 +685,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "dlv-list" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" + [[package]] name = "duplex" version = "0.11.0" @@ -605,6 +718,15 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encoding_rs" +version = "0.8.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "env_logger" version = "0.7.1" @@ -687,6 +809,36 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +dependencies = [ + "percent-encoding", +] + [[package]] name = "fs-set-times" version = "0.15.0" @@ -708,18 +860,95 @@ dependencies = [ "winapi", ] +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + [[package]] name = "futures-core" version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + [[package]] name = "futures-sink" version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "fxhash" version = "0.2.1" @@ -729,6 +958,16 @@ dependencies = [ "byteorder", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.7" @@ -746,6 +985,25 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "h2" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.9.1" @@ -761,6 +1019,15 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.6", +] + [[package]] name = "hashlink" version = "0.6.0" @@ -776,7 +1043,7 @@ version = "7.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31672b7011be2c4f7456c4ddbcb40e7e9a4a9fad8efe49a6ebaf5f307d0109c0" dependencies = [ - "base64", + "base64 0.13.0", "byteorder", "crossbeam-channel", "flate2", @@ -805,6 +1072,21 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "hostname" version = "0.3.1" @@ -816,6 +1098,40 @@ dependencies = [ "winapi", ] +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + [[package]] name = "humansize" version = "1.1.1" @@ -831,6 +1147,53 @@ dependencies = [ "quick-error", ] +[[package]] +name = "hyper" +version = "0.14.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "idna" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "1.8.2" @@ -948,6 +1311,15 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -1019,12 +1391,29 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" +[[package]] +name = "maybe-async" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f1b8c13cb1f814b634a96b2c725449fe7ed464a7b8781de8688be5ffbd3f305" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.96", +] + [[package]] name = "maybe-owned" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.5.0" @@ -1063,6 +1452,21 @@ dependencies = [ "syn 1.0.96", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minidom" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e9ce45d459e358790a285e7609ff5ae4cfab88b75f237e8838e62029dda397b" +dependencies = [ + "rxml", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1090,6 +1494,24 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nix" version = "0.17.0" @@ -1272,6 +1694,60 @@ dependencies = [ "vec-strings", ] +[[package]] +name = "openssl" +version = "0.10.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d2f106ab837a24e03672c59b1239669a0596406ff657c3c0835b6b7f0f35a33" +dependencies = [ + "bitflags 1.3.2", + "cfg-if 1.0.0", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a20eace9dc2d82904039cb76dcf50fb1a0bba071cfd1629720b5d6f1ddba0fa" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-multimap" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" +dependencies = [ + "dlv-list", + "hashbrown 0.12.3", +] + [[package]] name = "os_pipe" version = "1.0.1" @@ -1340,6 +1816,12 @@ dependencies = [ "windows-sys 0.36.1", ] +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" + [[package]] name = "pin-project" version = "1.0.12" @@ -1366,6 +1848,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.25" @@ -1423,6 +1911,16 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.26" @@ -1541,6 +2039,45 @@ version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +[[package]] +name = "reqwest" +version = "0.11.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" +dependencies = [ + "base64 0.21.0", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "winreg", +] + [[package]] name = "rusqlite" version = "0.24.2" @@ -1556,6 +2093,49 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rust-ini" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" +dependencies = [ + "cfg-if 1.0.0", + "ordered-multimap", +] + +[[package]] +name = "rust-s3" +version = "0.33.0-beta5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ceb8ffcf553348777f08c4c0f3fb6f62091ab32ee0b64b6b2fcedd6756c46dc" +dependencies = [ + "async-trait", + "aws-creds", + "aws-region", + "base64 0.13.0", + "bytes", + "cfg-if 1.0.0", + "futures", + "hex", + "hmac", + "http", + "log", + "maybe-async", + "md5", + "minidom", + "percent-encoding", + "quick-xml", + "reqwest", + "serde", + "serde_derive", + "sha2", + "thiserror", + "time 0.3.20", + "tokio", + "tokio-stream", + "url", +] + [[package]] name = "rustix" version = "0.33.7" @@ -1613,6 +2193,25 @@ dependencies = [ "winapi", ] +[[package]] +name = "rxml" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a071866b8c681dc2cfffa77184adc32b57b0caad4e620b6292609703bceb804" +dependencies = [ + "bytes", + "pin-project-lite", + "rxml_validation", + "smartstring", + "tokio", +] + +[[package]] +name = "rxml_validation" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53bc79743f9a66c2fb1f951cd83735f275d46bfe466259fbc5897bb60a0d00ee" + [[package]] name = "ryu" version = "1.0.10" @@ -1628,12 +2227,44 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +dependencies = [ + "windows-sys 0.42.0", +] + [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "security-framework" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "serde" version = "1.0.137" @@ -1643,6 +2274,18 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-xml-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65162e9059be2f6a3421ebbb4fef3e74b7d9e7c60c50a0e292c6239f19f1edfa" +dependencies = [ + "log", + "serde", + "thiserror", + "xml-rs", +] + [[package]] name = "serde_bare" version = "0.3.0" @@ -1683,6 +2326,29 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "digest", +] + [[package]] name = "shell-escape" version = "0.1.5" @@ -1725,12 +2391,30 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f054c6c1a6e95179d6f23ed974060dcefb2d9388bb7256900badad682c499de4" +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + [[package]] name = "smallvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "smartstring" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e714dff2b33f2321fdcd475b71cec79781a692d846f37f415fb395a1d2bcd48e" +dependencies = [ + "static_assertions", +] + [[package]] name = "socket2" version = "0.4.9" @@ -1750,7 +2434,7 @@ dependencies = [ "io-extras", "io-lifetimes 0.5.3", "rustix 0.33.7", - "uuid", + "uuid 0.8.2", "winapi", ] @@ -1823,6 +2507,12 @@ dependencies = [ "syn 1.0.96", ] +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + [[package]] name = "syn" version = "1.0.96" @@ -1955,6 +2645,33 @@ dependencies = [ "winapi", ] +[[package]] +name = "time" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +dependencies = [ + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +dependencies = [ + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -1964,6 +2681,21 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.27.0" @@ -2002,6 +2734,16 @@ dependencies = [ "syn 2.0.13", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-pipe" version = "0.2.12" @@ -2012,6 +2754,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.7" @@ -2023,6 +2776,7 @@ dependencies = [ "futures-sink", "pin-project-lite", "tokio", + "tracing", ] [[package]] @@ -2034,6 +2788,32 @@ dependencies = [ "serde", ] +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a400e31aa60b9d44a52a8ee0343b5b18566b03a8321e0d321f695cf56e940160" +dependencies = [ + "cfg-if 1.0.0", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" +dependencies = [ + "once_cell", +] + [[package]] name = "triomphe" version = "0.1.8" @@ -2045,6 +2825,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + [[package]] name = "twox-hash" version = "1.6.3" @@ -2056,12 +2842,33 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.9.0" @@ -2074,6 +2881,17 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +[[package]] +name = "url" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "users" version = "0.9.1" @@ -2098,6 +2916,16 @@ dependencies = [ "getrandom", ] +[[package]] +name = "uuid" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +dependencies = [ + "getrandom", + "rand", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -2137,6 +2965,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -2149,6 +2987,95 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 1.0.96", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +dependencies = [ + "cfg-if 1.0.0", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.96", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "wasm-streams" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2193,6 +3120,21 @@ dependencies = [ "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.45.0" @@ -2289,6 +3231,15 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + [[package]] name = "winx" version = "0.31.0" @@ -2300,6 +3251,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "xml-rs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" + [[package]] name = "yama" version = "0.7.0-alpha.1" @@ -2356,6 +3313,18 @@ dependencies = [ [[package]] name = "yama_wormfile_s3" version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "rand", + "rust-s3", + "thiserror", + "tokio", + "tokio-stream", + "tokio-util", + "uuid 1.3.0", + "yama_wormfile", +] [[package]] name = "yama_wormfile_sftp" diff --git a/yama_wormfile/src/lib.rs b/yama_wormfile/src/lib.rs index d1ad2c1..db53e6e 100644 --- a/yama_wormfile/src/lib.rs +++ b/yama_wormfile/src/lib.rs @@ -17,6 +17,8 @@ pub trait WormFileProvider: Debug + Send + Sync { /// Tests whether the path is a directory. /// Does not fail if the path does not exist, even if the parent path doesn't exist — returns /// false in that case. + /// Beware! Some backends do not support the concept of a directory and will happily report + /// `true` for any path. async fn is_dir(&self, path: impl AsRef + Send) -> Result; /// Tests whether the path is a regular file. diff --git a/yama_wormfile_s3/Cargo.toml b/yama_wormfile_s3/Cargo.toml index 32780f5..be0df27 100644 --- a/yama_wormfile_s3/Cargo.toml +++ b/yama_wormfile_s3/Cargo.toml @@ -6,3 +6,16 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } + +#ouroboros = "0.15.6" +rust-s3 = { version = "0.33.0-beta5", features = ["with-tokio"] } +async-trait = "0.1.68" +tokio = { version = "1.27.0", features = ["io-std"] } +rand = "0.8.5" +thiserror = "1.0.40" + +tokio-stream = "0.1.12" +tokio-util = "0.7.7" +bytes = "1.4.0" +uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } \ No newline at end of file diff --git a/yama_wormfile_s3/src/lib.rs b/yama_wormfile_s3/src/lib.rs index e69de29..a7e45ed 100644 --- a/yama_wormfile_s3/src/lib.rs +++ b/yama_wormfile_s3/src/lib.rs @@ -0,0 +1,393 @@ +use async_trait::async_trait; +use s3::error::S3Error; +use s3::serde_types::HeadObjectResult; +use s3::Bucket; +use std::fmt::{Debug, Formatter}; +use std::io; +use std::io::{ErrorKind, SeekFrom}; +use std::ops::Range; +use std::pin::Pin; +use std::task::{ready, Context, Poll}; +use tokio::io::{duplex, AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, DuplexStream, ReadBuf}; +use tokio::task::JoinHandle; +use uuid::Uuid; +use yama_wormfile::paths::{WormPath, WormPathBuf}; +use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; + +/// WormFileProvider that uses an S3 bucket, with a given path prefix. +#[derive(Debug)] +pub struct S3WormFilesystem { + /// The path prefix for all S3 objects. + path_prefix: String, + + bucket: Bucket, +} + +impl S3WormFilesystem { + pub fn new(bucket: Bucket, path_prefix: String) -> io::Result { + Ok(S3WormFilesystem { + path_prefix, + bucket, + }) + } + + pub fn resolve_real_path(&self, path: impl AsRef) -> String { + format!("{}{}", self.path_prefix, path.as_ref().as_str()) + } + + async fn head_object(&self, full_path: &str) -> Result, S3Error> { + let (head, status) = self.bucket.head_object(full_path).await?; + if status == 404 { + return Ok(None); + } + if status == 200 { + return Ok(Some(head)); + } + Err(S3Error::Http( + status, + format!("bad status for HEAD {full_path}"), + )) + } +} + +#[async_trait] +impl WormFileProvider for S3WormFilesystem { + type WormFileReader = S3WormReader; + type WormFileWriter = S3WormWriter; + type Error = S3Error; + + async fn is_dir(&self, _path: impl AsRef + Send) -> Result { + Ok(true) + } + + async fn is_regular_file( + &self, + path: impl AsRef + Send, + ) -> Result { + let full_path = self.resolve_real_path(path.as_ref()); + Ok(self.head_object(&full_path).await?.is_some()) + } + + async fn list( + &self, + path: impl AsRef + Send, + ) -> Result, Self::Error> { + let path = path.as_ref(); + let full_path = self.resolve_real_path(path); + let list = self + .bucket + .list(format!("{full_path}/"), Some("/".to_owned())) + .await?; + Ok(list + .into_iter() + .map(|lbr| lbr.contents) + .flatten() + .filter_map(|obj| { + // Strip the path prefix from items, plus convert to `WormPathBuf`s + obj.key + .strip_prefix(&self.path_prefix) + .map(|s| WormPathBuf::new(s.to_owned())) + .flatten() + }) + .collect()) + } + + async fn read( + &self, + path: impl AsRef + Send, + ) -> Result { + let path = path.as_ref(); + let full_path = self.resolve_real_path(path); + let head = self.head_object(&full_path).await?.ok_or_else(|| { + S3Error::Http(404, format!("can't read {path:?} as it does not exist.")) + })?; + + let length = head + .content_length + .ok_or_else(|| S3Error::Http(999, format!("No content-length from HEAD {path:?}")))?; + + Ok(S3WormReader { + bucket: self.bucket.clone(), + path: full_path, + reader: None, + offset: 0, + length: length as u64, + next_read_size_hint: None, + }) + } + + async fn write(&self) -> Result { + let (tx, mut rx) = duplex(8192); + // The rx half won't be doing any writing. + rx.shutdown().await?; + + // let (tx, rx) = tokio::sync::mpsc::channel::>(4); + // let mut reader = StreamReader::new(ReceiverStream::new(rx)); + + let uuid = Uuid::new_v4(); + let temp_path = format!("tmp/{}.writing", uuid); + let upload_to = self.resolve_real_path(WormPath::new(&temp_path).unwrap()); + + let bucket = self.bucket.clone(); + let join_handle: JoinHandle> = tokio::spawn(async move { + let resp_code = bucket.put_object_stream(&mut rx, upload_to).await?; + if resp_code != 200 { + eprintln!("non-200 resp code for put!") + } + Ok(resp_code) + }); + + Ok(S3WormWriter { + tx, + join_handle, + temp_path, + bucket: self.bucket.clone(), + path_prefix: self.path_prefix.clone(), + }) + } +} + +pub struct S3WormReader { + /// S3 bucket + bucket: Bucket, + /// S3-side path (absolute) + path: String, + + /// A stream for reading bytes from S3, if open. + reader: Option<(DuplexStream, Range)>, + /// Our current stream offset + offset: u64, + /// The length of the file + length: u64, + /// Optionally, a hint for the expected size of the next read operation. + /// Can be used to request the correct range from S3. + next_read_size_hint: Option, +} + +impl S3WormReader { + fn spawn_reader_for_range(&self, range: Range) -> DuplexStream { + let (mut tx, rx) = duplex(65536); + + let bucket = self.bucket.clone(); + let path = self.path.clone(); + + tokio::spawn(async move { + // TODO it says this is 'inclusive', but asserts start < end... + // TODO it'd be nice to have a `get_object_range_stream`: PR one to rust-s3? + // alternatively rebuild on top of reqwest and serde_xml_rs, since rust-s3 is + // a little bit dodgy around the corners... + if let Err(err) = bucket + .get_object_range_to_writer(path, range.start, Some(range.end), &mut tx) + .await + { + eprintln!("TODO error reading from S3 {err:?}"); + } + }); + + rx + } +} + +impl Debug for S3WormReader { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "S3WormReader({:?})", self.path) + } +} + +/// Default size of read request from S3. +const DEFAULT_READ_WINDOW_SIZE: u64 = 64 * 1024 * 1024; + +impl AsyncRead for S3WormReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + if self.reader.is_none() { + // Need to spawn a reader + if self.offset > self.length { + panic!("offset {} > length {}", self.offset, self.length); + } + + if self.offset == self.length { + // EOF + return Poll::Ready(Ok(())); + } + + let read_window_size = self + .next_read_size_hint + .take() + .unwrap_or(DEFAULT_READ_WINDOW_SIZE) + .min(self.length - self.offset); + + let read_range = self.offset..(self.offset + read_window_size); + + let rx = self.spawn_reader_for_range(read_range.clone()); + self.reader = Some((rx, read_range)); + } + + let (reader, read_range) = self.reader.as_mut().unwrap(); + let orig_remaining = buf.remaining(); + let read = ready!(Pin::new(reader).poll_read(cx, buf)); + let bytes_read = (buf.remaining() - orig_remaining) as u64; + + if bytes_read == 0 && read_range.start != read_range.end { + // Unexpected EOF + return Poll::Ready(Err(io::Error::new( + ErrorKind::UnexpectedEof, + "early EOF reading from S3", + ))); + } + + read_range.start += bytes_read; + if read_range.start >= read_range.end { + self.reader = None; + } + self.offset += bytes_read; + + Poll::Ready(read) + } +} + +impl AsyncSeek for S3WormReader { + fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + let desired_offset = match position { + SeekFrom::Start(offset) => offset, + SeekFrom::End(end_offset) => { + let wanted = self.length as i64 + end_offset; + if wanted < 0 { + return Err(io::Error::new( + ErrorKind::InvalidInput, + "can't seek below zero (using relative end seek)", + )); + } + wanted as u64 + } + SeekFrom::Current(delta) => { + let wanted = self.offset as i64 + delta; + if wanted < 0 { + return Err(io::Error::new( + ErrorKind::InvalidInput, + "can't seek below zero (using relative current seek)", + )); + } + wanted as u64 + } + }; + + // Don't seek beyond the end. (That makes no sense for reading, but is technically allowed.) + let desired_offset = desired_offset.min(self.length); + self.offset = desired_offset; + + // Discard any readers + self.reader = None; + self.next_read_size_hint = None; + + // Return OK right away, the actual work will get done when we read. + Ok(()) + } + + fn poll_complete(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { + // Return OK right away, the actual work gets done when we read. + Poll::Ready(Ok(self.offset)) + } +} + +impl WormFileReader for S3WormReader {} + +pub struct S3WormWriter { + tx: DuplexStream, + temp_path: String, + join_handle: JoinHandle>, + bucket: Bucket, + path_prefix: String, +} + +impl Debug for S3WormWriter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "FileWormWriter({:?})", self.temp_path) + } +} + +impl AsyncWrite for S3WormWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + Pin::new(&mut self.tx).poll_write(cx, buf) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.tx).poll_flush(cx) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.tx).poll_shutdown(cx) + } +} + +#[async_trait] +impl WormFileWriter for S3WormWriter { + async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + self.tx.shutdown().await?; + + let resp_code = self + .join_handle + .await? + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + if resp_code != 200 { + return Err(io::Error::new( + ErrorKind::Other, + "non-200 resp code for PUT", + )); + } + + let full_target_path = format!("{}{}", self.path_prefix, target_path.as_str()); + + if !replace { + let (_head, head_code) = self + .bucket + .head_object(&full_target_path) + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + if head_code != 404 { + return Err(io::Error::new( + ErrorKind::Other, + "won't replace file; HEAD of target path not 404", + )); + } + } + + // S3 moves are done as a copy + delete + + let response_code = self + .bucket + .copy_object_internal(&self.temp_path, &full_target_path) + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + if response_code != 200 { + return Err(io::Error::new( + ErrorKind::Other, + "non-200 response for copy object", + )); + } + + let response_code = self + .bucket + .delete_object(&self.temp_path) + .await + .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + if response_code.status_code() != 200 { + return Err(io::Error::new( + ErrorKind::Other, + "non-200 response for delete object", + )); + } + + Ok(()) + } +} From 5cd2700396071962ee8af900771710e86c41afc6 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Wed, 3 May 2023 22:35:41 +0100 Subject: [PATCH 05/51] CHECKPOINT overhaul --- .env | 1 + .envrc | 3 +- .gitignore | 6 + Cargo.lock | 2326 ++++++++++------- Cargo.toml | 16 + GLOSSARY.md | 5 + datman.old/Cargo.toml | 38 + datman.old/README.md | 13 + datman.old/src/bin/datman.rs | 468 ++++ {datman => datman.old}/src/commands.rs | 0 {datman => datman.old}/src/commands/backup.rs | 0 .../src/commands/extract.rs | 0 .../src/commands/ibrowse.rs | 0 {datman => datman.old}/src/commands/ilabel.rs | 0 {datman => datman.old}/src/commands/prune.rs | 0 .../src/commands/pushpull.rs | 0 {datman => datman.old}/src/commands/report.rs | 0 {datman => datman.old}/src/descriptor.rs | 0 {datman => datman.old}/src/labelling.rs | 0 datman.old/src/lib.rs | 12 + {datman => datman.old}/src/remote.rs | 0 .../src/remote/backup_source_requester.rs | 0 .../src/remote/backup_source_responder.rs | 0 {datman => datman.old}/src/tree.rs | 0 datman/Cargo.toml | 26 +- datman/src/bin/datman.rs | 450 +--- datman/src/lib.rs | 11 - flake.lock | 50 +- flake.nix | 71 +- shell.nix | 1 + yama.old/Cargo.toml | 44 + yama.old/README.md | 25 + yama.old/src/bin/yama.rs | 311 +++ {yama => yama.old}/src/chunking.rs | 0 {yama => yama.old}/src/commands.rs | 0 {yama => yama.old}/src/debug.rs | 0 {yama => yama.old}/src/definitions.rs | 0 yama.old/src/lib.rs | 10 + {yama => yama.old}/src/operations.rs | 0 {yama => yama.old}/src/operations/checking.rs | 0 {yama => yama.old}/src/operations/cleanup.rs | 0 .../src/operations/extracting.rs | 0 .../src/operations/legacy_pushpull.rs | 0 {yama => yama.old}/src/operations/storing.rs | 0 {yama => yama.old}/src/pile.rs | 1 - {yama => yama.old}/src/pile/access_guard.rs | 0 {yama => yama.old}/src/pile/compression.rs | 0 {yama => yama.old}/src/pile/integrity.rs | 0 .../src/pile/local_sqlitebloblogs.rs | 0 {yama => yama.old}/src/progress.rs | 0 {yama => yama.old}/src/remote.rs | 0 {yama => yama.old}/src/remote/requester.rs | 0 {yama => yama.old}/src/remote/responder.rs | 0 {yama => yama.old}/src/tree.rs | 0 {yama => yama.old}/src/utils.rs | 0 yama/Cargo.toml | 67 +- yama/src/bin/yama.rs | 862 ++++-- yama/src/bin/yamascan.rs | 234 ++ yama/src/extract.rs | 416 +++ yama/src/init.rs | 110 + yama/src/lib.rs | 31 +- yama/src/open.rs | 167 ++ yama/src/pile/encryption.rs | 138 - yama/src/pile_connector.rs | 76 + yama/src/pile_with_cache.rs | 56 + yama/src/retriever.rs | 396 +++ yama/src/retriever/decompressor.rs | 211 ++ yama/src/scan.rs | 263 ++ yama/src/storing.rs | 391 +++ yama/src/vacuum.rs | 1 + yama_cli_readme.txt | 91 + yama_localcache/Cargo.toml | 15 + yama_localcache/dev_db.sh | 7 + .../20230413133342_local_index_cache.sql | 30 + yama_localcache/src/lib.rs | 335 +++ yama_midlevel_crypto/Cargo.toml | 35 + yama_midlevel_crypto/src/asym_box.rs | 92 + yama_midlevel_crypto/src/asym_keyx.rs | 149 ++ yama_midlevel_crypto/src/asym_signed.rs | 126 + yama_midlevel_crypto/src/byte_layer.rs | 55 + yama_midlevel_crypto/src/chunk_id.rs | 91 + yama_midlevel_crypto/src/key_derivation.rs | 72 + yama_midlevel_crypto/src/keys_kyber.rs | 109 + yama_midlevel_crypto/src/keys_x25519.rs | 76 + yama_midlevel_crypto/src/lib.rs | 17 + yama_midlevel_crypto/src/sym_box.rs | 142 + yama_midlevel_crypto/src/sym_stream.rs | 81 + yama_midlevel_crypto/src/zstd_box.rs | 40 + yama_pile/Cargo.toml | 25 + yama_pile/src/bloblogs.rs | 211 ++ yama_pile/src/definitions.rs | 146 ++ yama_pile/src/keyring.rs | 53 + yama_pile/src/lib.rs | 249 ++ yama_pile/src/locks.rs | 358 +++ yama_pile/src/pointers.rs | 19 + yama_pile/src/tree.rs | 978 +++++++ yama_pile/src/tree/unpopulated.rs | 27 + yama_pile/src/utils.rs | 226 ++ yama_wormfile/Cargo.toml | 3 +- yama_wormfile/src/boxed.rs | 80 +- yama_wormfile/src/lib.rs | 32 +- yama_wormfile/src/paths.rs | 1 + yama_wormfile_fs/Cargo.toml | 3 +- yama_wormfile_fs/src/lib.rs | 58 +- yama_wormfile_s3/Cargo.toml | 7 +- yama_wormfile_s3/examples/s3demo.rs | 43 + yama_wormfile_s3/src/lib.rs | 78 +- yama_wormfile_sftp/Cargo.toml | 3 +- yama_wormfile_sftp/src/lib.rs | 247 +- 109 files changed, 9623 insertions(+), 2094 deletions(-) create mode 100644 .env create mode 100644 GLOSSARY.md create mode 100644 datman.old/Cargo.toml create mode 100644 datman.old/README.md create mode 100644 datman.old/src/bin/datman.rs rename {datman => datman.old}/src/commands.rs (100%) rename {datman => datman.old}/src/commands/backup.rs (100%) rename {datman => datman.old}/src/commands/extract.rs (100%) rename {datman => datman.old}/src/commands/ibrowse.rs (100%) rename {datman => datman.old}/src/commands/ilabel.rs (100%) rename {datman => datman.old}/src/commands/prune.rs (100%) rename {datman => datman.old}/src/commands/pushpull.rs (100%) rename {datman => datman.old}/src/commands/report.rs (100%) rename {datman => datman.old}/src/descriptor.rs (100%) rename {datman => datman.old}/src/labelling.rs (100%) create mode 100644 datman.old/src/lib.rs rename {datman => datman.old}/src/remote.rs (100%) rename {datman => datman.old}/src/remote/backup_source_requester.rs (100%) rename {datman => datman.old}/src/remote/backup_source_responder.rs (100%) rename {datman => datman.old}/src/tree.rs (100%) create mode 100644 yama.old/Cargo.toml create mode 100644 yama.old/README.md create mode 100644 yama.old/src/bin/yama.rs rename {yama => yama.old}/src/chunking.rs (100%) rename {yama => yama.old}/src/commands.rs (100%) rename {yama => yama.old}/src/debug.rs (100%) rename {yama => yama.old}/src/definitions.rs (100%) create mode 100644 yama.old/src/lib.rs rename {yama => yama.old}/src/operations.rs (100%) rename {yama => yama.old}/src/operations/checking.rs (100%) rename {yama => yama.old}/src/operations/cleanup.rs (100%) rename {yama => yama.old}/src/operations/extracting.rs (100%) rename {yama => yama.old}/src/operations/legacy_pushpull.rs (100%) rename {yama => yama.old}/src/operations/storing.rs (100%) rename {yama => yama.old}/src/pile.rs (99%) rename {yama => yama.old}/src/pile/access_guard.rs (100%) rename {yama => yama.old}/src/pile/compression.rs (100%) rename {yama => yama.old}/src/pile/integrity.rs (100%) rename {yama => yama.old}/src/pile/local_sqlitebloblogs.rs (100%) rename {yama => yama.old}/src/progress.rs (100%) rename {yama => yama.old}/src/remote.rs (100%) rename {yama => yama.old}/src/remote/requester.rs (100%) rename {yama => yama.old}/src/remote/responder.rs (100%) rename {yama => yama.old}/src/tree.rs (100%) rename {yama => yama.old}/src/utils.rs (100%) create mode 100644 yama/src/bin/yamascan.rs create mode 100644 yama/src/extract.rs create mode 100644 yama/src/init.rs create mode 100644 yama/src/open.rs delete mode 100644 yama/src/pile/encryption.rs create mode 100644 yama/src/pile_connector.rs create mode 100644 yama/src/pile_with_cache.rs create mode 100644 yama/src/retriever.rs create mode 100644 yama/src/retriever/decompressor.rs create mode 100644 yama/src/scan.rs create mode 100644 yama/src/storing.rs create mode 100644 yama/src/vacuum.rs create mode 100644 yama_cli_readme.txt create mode 100644 yama_localcache/Cargo.toml create mode 100755 yama_localcache/dev_db.sh create mode 100644 yama_localcache/migrations/20230413133342_local_index_cache.sql create mode 100644 yama_localcache/src/lib.rs create mode 100644 yama_midlevel_crypto/Cargo.toml create mode 100644 yama_midlevel_crypto/src/asym_box.rs create mode 100644 yama_midlevel_crypto/src/asym_keyx.rs create mode 100644 yama_midlevel_crypto/src/asym_signed.rs create mode 100644 yama_midlevel_crypto/src/byte_layer.rs create mode 100644 yama_midlevel_crypto/src/chunk_id.rs create mode 100644 yama_midlevel_crypto/src/key_derivation.rs create mode 100644 yama_midlevel_crypto/src/keys_kyber.rs create mode 100644 yama_midlevel_crypto/src/keys_x25519.rs create mode 100644 yama_midlevel_crypto/src/lib.rs create mode 100644 yama_midlevel_crypto/src/sym_box.rs create mode 100644 yama_midlevel_crypto/src/sym_stream.rs create mode 100644 yama_midlevel_crypto/src/zstd_box.rs create mode 100644 yama_pile/Cargo.toml create mode 100644 yama_pile/src/bloblogs.rs create mode 100644 yama_pile/src/definitions.rs create mode 100644 yama_pile/src/keyring.rs create mode 100644 yama_pile/src/lib.rs create mode 100644 yama_pile/src/locks.rs create mode 100644 yama_pile/src/pointers.rs create mode 100644 yama_pile/src/tree.rs create mode 100644 yama_pile/src/tree/unpopulated.rs create mode 100644 yama_pile/src/utils.rs create mode 100644 yama_wormfile_s3/examples/s3demo.rs diff --git a/.env b/.env new file mode 100644 index 0000000..8025357 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +DATABASE_URL=sqlite:yama_localcache/testdb.sqlite diff --git a/.envrc b/.envrc index c9293c0..5d9f395 100644 --- a/.envrc +++ b/.envrc @@ -1,2 +1,3 @@ -use nix +#use nix +use flake . diff --git a/.gitignore b/.gitignore index 3a7f2da..0761fab 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,9 @@ __pycache__ /datman-helper-mysql/datman_helper_mysql.egg-info /result + +.direnv +yama7demo +yamaSFTPdemo + +yama_localcache/testdb.sqlite \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index cab7847..180fe3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,27 +8,21 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +[[package]] +name = "addr2line" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" -[[package]] -name = "ahash" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" -dependencies = [ - "const-random", -] - -[[package]] -name = "ahash" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" - [[package]] name = "ahash" version = "0.7.6" @@ -42,9 +36,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.18" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] @@ -56,27 +50,81 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" [[package]] -name = "ambient-authority" -version = "0.0.1" +name = "android_system_properties" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec8ad6edb4840b78c5c3d88de606b22252d552b55f3a4699fbb10fc070ec3049" - -[[package]] -name = "anyhow" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" - -[[package]] -name = "arc-interner" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03d96e5bb1b3f9313145dfc9c15d22036fa900533d1a21744f684c642919dd09" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "ahash 0.3.8", - "dashmap", - "once_cell", - "serde", + "libc", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "anstream" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "appdirs" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d512b3e61196d27562dcc71446a58ba8a93d3bed2a03a87f96101b9a17f1d378" +dependencies = [ + "ole32-sys", + "shell32-sys", + "winapi 0.2.8", ] [[package]] @@ -85,12 +133,46 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" +[[package]] +name = "argon2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4ce4441f99dbd377ca8a8f57b698c44d0d6e712d8329b5040da5a64aa1ce73" +dependencies = [ + "base64ct", + "blake2", + "password-hash", +] + [[package]] name = "array-init" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "async-recursion" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + [[package]] name = "async-trait" version = "0.1.68" @@ -102,6 +184,15 @@ dependencies = [ "syn 2.0.13", ] +[[package]] +name = "atoi" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c57d12312ff59c811c0643f4d80830505833c9ffaebd193d819392b265be8e" +dependencies = [ + "num-traits", +] + [[package]] name = "attohttpc" version = "0.22.0" @@ -124,7 +215,7 @@ checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi 0.1.19", "libc", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -140,7 +231,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70af449c9a763cb655c6a1e5338b42d99c67190824ff90658c1e30be844c0775" dependencies = [ "awaitable-error", - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -176,32 +267,18 @@ dependencies = [ ] [[package]] -name = "bare-metrics-core" -version = "0.1.0" +name = "backtrace" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff7b4664025c0967087f03e9f1a9be00b3ce61cc4e99448afa9d4daa757e9954" +checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" dependencies = [ - "hdrhistogram", - "serde", - "serde_bare 0.5.0", -] - -[[package]] -name = "bare-metrics-recorder" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f81335882068ff256d4545eee4c8721e75331d4420e954adf5c88dbceb2dc74" -dependencies = [ - "anyhow", - "bare-metrics-core", - "crossbeam-channel", - "dashmap", - "fxhash", - "hdrhistogram", - "log", - "metrics", - "serde_bare 0.5.0", - "thiserror", + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", ] [[package]] @@ -216,6 +293,12 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "bitflags" version = "1.3.2" @@ -229,13 +312,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1" [[package]] -name = "blake" -version = "2.0.2" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee55e9ca33be1f257d8356cfb29b10b1c8f86dc38cf1344ca01525464356cd0c" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +dependencies = [ + "arrayref", + "arrayvec", "cc", - "libc", + "cfg-if", + "constant_time_eq", + "digest", ] [[package]] @@ -247,6 +343,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.12.0" @@ -265,50 +371,6 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" -[[package]] -name = "cap-fs-ext" -version = "0.24.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54b86398b5852ddd45784b1d9b196b98beb39171821bad4b8b44534a1e87927" -dependencies = [ - "cap-primitives", - "cap-std", - "io-lifetimes 0.5.3", - "winapi", -] - -[[package]] -name = "cap-primitives" -version = "0.24.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb8fca3e81fae1d91a36e9784ca22a39ef623702b5f7904d89dc31f10184a178" -dependencies = [ - "ambient-authority", - "errno 0.2.8", - "fs-set-times", - "io-extras", - "io-lifetimes 0.5.3", - "ipnet", - "maybe-owned", - "rustix 0.33.7", - "winapi", - "winapi-util", - "winx", -] - -[[package]] -name = "cap-std" -version = "0.24.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2247568946095c7765ad2b441a56caffc08027734c634a6d5edda648f04e32eb" -dependencies = [ - "cap-primitives", - "io-extras", - "io-lifetimes 0.5.3", - "ipnet", - "rustix 0.33.7", -] - [[package]] name = "cc" version = "1.0.73" @@ -318,12 +380,6 @@ dependencies = [ "jobserver", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -331,68 +387,126 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "chrono" -version = "0.4.19" +name = "chacha20" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" dependencies = [ - "libc", + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chrono" +version = "0.4.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +dependencies = [ + "iana-time-zone", + "js-sys", "num-integer", "num-traits", - "time 0.1.44", - "winapi", + "serde", + "time 0.1.45", + "wasm-bindgen", + "winapi 0.3.9", +] + +[[package]] +name = "ciborium" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" + +[[package]] +name = "ciborium-ll" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", ] [[package]] name = "clap" -version = "3.2.3" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df386a2d0f35bdefc0642fd8bcb2cd28243959f028abfd22fbade6f7d30980e" +checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a" dependencies = [ - "atty", - "bitflags 1.3.2", + "clap_builder", "clap_derive", - "clap_lex", - "indexmap", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6" +dependencies = [ + "anstream", + "anstyle", + "bitflags 1.3.2", + "clap_lex", "strsim", - "termcolor", - "textwrap", ] [[package]] name = "clap_derive" -version = "3.2.3" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b740354ad9fcf20e27b46d921be4bb3712f5b3c2c7a89ba68a72a8e51d3a47f" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.96", + "syn 2.0.13", ] [[package]] name = "clap_lex" -version = "0.2.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5538cd660450ebeb4234cfecf8f2284b844ffc4c50531e66d584ad5b91293613" +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" dependencies = [ - "os_str_bytes", + "termcolor", + "unicode-width", ] [[package]] -name = "comfy-table" -version = "6.0.0" +name = "colorchoice" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121d8a5b0346092c18a4b2fd6f620d7a06f0eb7ac0a45860939a0884bc579c56" -dependencies = [ - "crossterm", - "strum", - "strum_macros", - "unicode-width", -] +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "concurrent_arena" @@ -403,53 +517,41 @@ dependencies = [ "arc-swap", "array-init", "const_fn_assert", - "parking_lot", + "parking_lot 0.12.1", "triomphe", ] [[package]] name = "console" -version = "0.15.0" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" +checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" dependencies = [ "encode_unicode", - "libc", - "once_cell", - "regex", - "terminal_size", - "unicode-width", - "winapi", -] - -[[package]] -name = "const-random" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" -dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" -dependencies = [ - "getrandom", "lazy_static", - "proc-macro-hack", - "tiny-keccak", + "libc", + "unicode-width", + "windows-sys 0.42.0", ] +[[package]] +name = "const-oid" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520fbf3c07483f94e3e3ca9d0cfd913d7718ef2483d2cfd91c0d9e91474ab913" + [[package]] name = "const_fn_assert" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27d614f23f34f7b5165a77dc1591f497e2518f9cec4b4f4b92bfc4dc6cf7a190" +[[package]] +name = "constant_time_eq" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" + [[package]] name = "core-foundation" version = "0.9.3" @@ -476,90 +578,40 @@ dependencies = [ ] [[package]] -name = "crc32fast" -version = "1.3.2" +name = "crc" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" dependencies = [ - "cfg-if 1.0.0", + "crc-catalog", ] [[package]] -name = "crossbeam-channel" -version = "0.5.4" +name = "crc-catalog" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] +checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484" [[package]] -name = "crossbeam-deque" -version = "0.8.1" +name = "crossbeam-queue" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ - "cfg-if 1.0.0", - "crossbeam-epoch", + "cfg-if", "crossbeam-utils", ] -[[package]] -name = "crossbeam-epoch" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" -dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] - [[package]] name = "crossbeam-utils" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "lazy_static", ] -[[package]] -name = "crossterm" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" -dependencies = [ - "bitflags 1.3.2", - "crossterm_winapi", - "libc", - "mio", - "parking_lot", - "signal-hook", - "signal-hook-mio", - "winapi", -] - -[[package]] -name = "crossterm_winapi" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" -dependencies = [ - "winapi", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - [[package]] name = "crypto-common" version = "0.1.6" @@ -571,55 +623,93 @@ dependencies = [ ] [[package]] -name = "dashmap" -version = "4.0.2" +name = "curve25519-dalek" +version = "4.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" +checksum = "03d928d978dbec61a1167414f5ec534f24bea0d7a0d24dd9b6233d3d8223e585" dependencies = [ - "cfg-if 1.0.0", - "num_cpus", + "cfg-if", + "digest", + "fiat-crypto", + "packed_simd_2", + "platforms", + "serde", + "subtle", + "zeroize", +] + +[[package]] +name = "cxx" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn 2.0.13", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "dashmap" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core 0.9.3", ] [[package]] name = "datman" version = "0.7.0-alpha.1" dependencies = [ - "anyhow", - "arc-interner", - "bare-metrics-recorder", - "byteorder", - "chrono", - "clap", - "comfy-table", - "crossbeam-channel", - "env_logger", - "glob", - "hostname", - "humansize", - "indicatif", - "io-streams", - "itertools 0.10.3", - "libc", - "log", - "metrics", - "serde", - "serde_json", - "termion", - "thiserror", - "toml", - "yama", - "zstd", + "eyre", ] [[package]] -name = "derivative" -version = "2.2.0" +name = "der" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +checksum = "82b10af9f9f9f2134a42d3f8aa74658660f2e0234b0eb81bd171df8aa32779ed" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.96", + "const-oid", + "zeroize", ] [[package]] @@ -630,7 +720,7 @@ checksum = "35cb7e5875e1028a73e551747d6d0118f25c3d6dbba2dadf97cc0f4d0c53f2f5" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", ] [[package]] @@ -653,16 +743,6 @@ dependencies = [ "dirs-sys", ] -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if 1.0.0", - "dirs-sys-next", -] - [[package]] name = "dirs-sys" version = "0.3.7" @@ -671,18 +751,7 @@ checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" dependencies = [ "libc", "redox_users", - "winapi", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -692,20 +761,53 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" [[package]] -name = "duplex" -version = "0.11.0" +name = "dotenvy" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1178dea852a9bad9cb4b8e9111fbf0379a4c288e5cf4e14c36c87b85fe0cbbfa" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "dust_style_filetree_display" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cfb3415646f374442f16f937fc46558c9406c2759cf0c246b00ea4707bc1ade" +dependencies = [ + "ansi_term", + "atty", + "lscolors", + "regex", + "stfu8", + "terminal_size", + "thousands", + "unicode-width", + "winapi-util", +] [[package]] name = "ed25519" -version = "1.5.2" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9c280362032ea4203659fc489832d0204ef09f247a0506f170dafcac08c369" +checksum = "5fb04eee5d9d907f29e80ee6b0e78f7e2c82342c63e3580d8c4f69d9d5aad963" dependencies = [ + "pkcs8", + "serde", "signature", ] +[[package]] +name = "ed25519-dalek" +version = "2.0.0-rc.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "798f704d128510932661a3489b08e3f4c934a01d61c5def59ae7b8e48f19665a" +dependencies = [ + "curve25519-dalek", + "ed25519", + "rand_core", + "serde", + "sha2", + "zeroize", +] + [[package]] name = "either" version = "1.6.1" @@ -724,31 +826,7 @@ version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "env_logger" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", + "cfg-if", ] [[package]] @@ -773,22 +851,26 @@ dependencies = [ ] [[package]] -name = "fallible-iterator" -version = "0.2.0" +name = "event-listener" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" [[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" +name = "eyre" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +checksum = "4c2b6b5a29c02cdc822728b7d7b8ae1bab3e3b05d44522770ddd49722eeac7eb" +dependencies = [ + "indenter", + "once_cell", +] [[package]] name = "fastcdc" -version = "1.0.6" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "861d7b3427fbf3e06300b4aca5c430a2e263b7a7b6821faff8b200d3dc4a61cb" +checksum = "10010f9b2e601acfda445cb35385cf4241fce85c6e1ea702b157c39f79f8787a" [[package]] name = "fastrand" @@ -800,13 +882,22 @@ dependencies = [ ] [[package]] -name = "flate2" -version = "1.0.24" +name = "fiat-crypto" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +checksum = "e825f6987101665dea6ec934c09ec6d721de7bc1bf92248e1d5810c8cd636b77" + +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" dependencies = [ - "crc32fast", - "miniz_oxide", + "futures-core", + "futures-sink", + "nanorand", + "pin-project", + "spin 0.9.8", ] [[package]] @@ -839,27 +930,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs-set-times" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df62ee66ee2d532ea8d567b5a3f0d03ecd64636b98bad5be1e93dcc918b92aa" -dependencies = [ - "io-lifetimes 0.5.3", - "rustix 0.33.7", - "winapi", -] - -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "futures" version = "0.3.28" @@ -902,6 +972,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a604f7a68fbf8103337523b1fadc8ade7361ee3f112f7c680ad179651616aed5" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot 0.11.2", +] + [[package]] name = "futures-io" version = "0.3.28" @@ -949,15 +1030,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -974,16 +1046,31 @@ version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", + "js-sys", "libc", "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] -name = "glob" -version = "0.3.0" +name = "gimli" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" + +[[package]] +name = "globset" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] [[package]] name = "h2" @@ -1005,19 +1092,10 @@ dependencies = [ ] [[package]] -name = "hashbrown" -version = "0.9.1" +name = "half" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" -dependencies = [ - "ahash 0.4.7", -] - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" @@ -1025,30 +1103,16 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash 0.7.6", + "ahash", ] [[package]] name = "hashlink" -version = "0.6.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d99cf782f0dc4372d26846bec3de7804ceb5df083c2d4462c0b8d2330e894fa8" +checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" dependencies = [ - "hashbrown 0.9.1", -] - -[[package]] -name = "hdrhistogram" -version = "7.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31672b7011be2c4f7456c4ddbcb40e7e9a4a9fad8efe49a6ebaf5f307d0109c0" -dependencies = [ - "base64 0.13.0", - "byteorder", - "crossbeam-channel", - "flate2", - "nom", - "num-traits", + "hashbrown", ] [[package]] @@ -1056,6 +1120,9 @@ name = "heck" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "hermit-abi" @@ -1095,7 +1162,7 @@ checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" dependencies = [ "libc", "match_cfg", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -1132,21 +1199,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" -[[package]] -name = "humansize" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" - -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] - [[package]] name = "hyper" version = "0.14.25" @@ -1184,6 +1236,30 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "iana-time-zone" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + [[package]] name = "idna" version = "0.3.0" @@ -1195,25 +1271,58 @@ dependencies = [ ] [[package]] -name = "indexmap" -version = "1.8.2" +name = "ignore" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +dependencies = [ + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown 0.11.2", + "hashbrown", ] [[package]] name = "indicatif" -version = "0.14.0" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49a68371cf417889c9d7f98235b7102ea7c54fc59bcbd22f3dea785be9d27e40" +checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", - "lazy_static", "number_prefix", - "regex", + "portable-atomic", + "unicode-width", + "vt100", +] + +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", ] [[package]] @@ -1222,27 +1331,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ - "cfg-if 1.0.0", -] - -[[package]] -name = "io-extras" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c937cc9891c12eaa8c63ad347e4a288364b1328b924886970b47a14ab8f8f8" -dependencies = [ - "io-lifetimes 0.5.3", - "os_pipe", - "winapi", -] - -[[package]] -name = "io-lifetimes" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec58677acfea8a15352d42fc87d11d63596ade9239e0a7c9352914417515dbe6" -dependencies = [ - "os_pipe", + "cfg-if", ] [[package]] @@ -1256,22 +1345,6 @@ dependencies = [ "windows-sys 0.45.0", ] -[[package]] -name = "io-streams" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fba6685e8e5efa7bd0ce8c4e92ac113b3b059b2e18bd1bf51f7cfab0f61b4b19" -dependencies = [ - "duplex", - "io-extras", - "io-lifetimes 0.5.3", - "memchr", - "os_pipe", - "parking", - "rustix 0.33.7", - "system-interface", -] - [[package]] name = "ipnet" version = "2.5.0" @@ -1279,28 +1352,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" [[package]] -name = "itertools" -version = "0.9.0" +name = "is-terminal" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "either", + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys 0.48.0", ] [[package]] name = "itertools" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.2" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jobserver" @@ -1333,32 +1409,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] -name = "libsodium-sys" -version = "0.2.7" +name = "libm" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b779387cd56adfbc02ea4a668e704f729be8d6a6abd2c27ca5ee537849a92fd" -dependencies = [ - "cc", - "libc", - "pkg-config", - "walkdir", -] +checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a" [[package]] name = "libsqlite3-sys" -version = "0.20.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d31059f22935e6c31830db5249ba2b7ecd54fd73a9909286f0a67aa55c2fbd" +checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14" dependencies = [ + "cc", "pkg-config", "vcpkg", ] [[package]] -name = "linux-raw-sys" -version = "0.0.42" +name = "link-cplusplus" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] [[package]] name = "linux-raw-sys" @@ -1368,9 +1442,9 @@ checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "lock_api" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" dependencies = [ "autocfg", "scopeguard", @@ -1382,7 +1456,17 @@ version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", +] + +[[package]] +name = "lscolors" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dedc85d67baf5327114fad78ab9418f8893b1121c17d5538dd11005ad1ddf2" +dependencies = [ + "ansi_term", + "nu-ansi-term", ] [[package]] @@ -1391,6 +1475,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + [[package]] name = "maybe-async" version = "0.2.7" @@ -1399,15 +1492,9 @@ checksum = "0f1b8c13cb1f814b634a96b2c725449fe7ed464a7b8781de8688be5ffbd3f305" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", ] -[[package]] -name = "maybe-owned" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" - [[package]] name = "md5" version = "0.7.0" @@ -1421,35 +1508,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] -name = "memoffset" -version = "0.6.5" +name = "memmap2" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" dependencies = [ - "autocfg", -] - -[[package]] -name = "metrics" -version = "0.17.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55586aa936c35f34ba8aa5d97356d554311206e1ce1f9e68fe7b07288e5ad827" -dependencies = [ - "ahash 0.7.6", - "metrics-macros", -] - -[[package]] -name = "metrics-macros" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0daa0ab3a0ae956d0e2c1f42511422850e577d36a255357d1a7d08d45ee3a2f1" -dependencies = [ - "lazy_static", - "proc-macro2", - "quote", - "regex", - "syn 1.0.96", + "libc", ] [[package]] @@ -1475,9 +1539,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.5.3" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] @@ -1494,6 +1558,15 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom", +] + [[package]] name = "native-tls" version = "0.2.11" @@ -1512,31 +1585,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nix" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50e4785f2c3b7589a0d0c1dd60285e1188adac4006e8abd6dd578e1567027363" -dependencies = [ - "bitflags 1.3.2", - "cc", - "cfg-if 0.1.10", - "libc", - "void", -] - -[[package]] -name = "nix" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" -dependencies = [ - "bitflags 1.3.2", - "cc", - "cfg-if 1.0.0", - "libc", -] - [[package]] name = "nom" version = "7.1.1" @@ -1547,6 +1595,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi 0.3.9", +] + [[package]] name = "num-derive" version = "0.3.3" @@ -1555,7 +1613,7 @@ checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", ] [[package]] @@ -1589,21 +1647,40 @@ dependencies = [ [[package]] name = "number_prefix" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] -name = "numtoa" -version = "0.1.0" +name = "object" +version = "0.30.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" +dependencies = [ + "memchr", +] + +[[package]] +name = "ole32-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d2c49021782e5233cd243168edfa8037574afed4eba4bbaf538b3d8d1789d8c" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] [[package]] name = "once_cell" -version = "1.12.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssh" @@ -1701,7 +1778,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d2f106ab837a24e03672c59b1239669a0596406ff657c3c0835b6b7f0f35a33" dependencies = [ "bitflags 1.3.2", - "cfg-if 1.0.0", + "cfg-if", "foreign-types", "libc", "once_cell", @@ -1745,25 +1822,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" dependencies = [ "dlv-list", - "hashbrown 0.12.3", + "hashbrown", ] -[[package]] -name = "os_pipe" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c92f2b54f081d635c77e7120862d48db8e91f7f21cef23ab1b4fe9971c59f55" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "os_str_bytes" -version = "6.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" - [[package]] name = "ouroboros" version = "0.15.6" @@ -1784,14 +1845,35 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", ] [[package]] -name = "parking" -version = "2.0.0" +name = "overload" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "packed_simd_2" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1914cd452d8fccd6f9db48147b29fd4ae05bea9dc5d9ad578509f72415de282" +dependencies = [ + "cfg-if", + "libm", +] + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] [[package]] name = "parking_lot" @@ -1800,7 +1882,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core", + "parking_lot_core 0.9.3", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall 0.2.13", + "smallvec", + "winapi 0.3.9", ] [[package]] @@ -1809,13 +1905,39 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall 0.2.13", "smallvec", "windows-sys 0.36.1", ] +[[package]] +name = "password-hash" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" +dependencies = [ + "base64ct", + "rand_core", + "subtle", +] + +[[package]] +name = "paste" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" + +[[package]] +name = "patricia_tree" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3bc1f0380d1594aff660cf73ed9a38cfb9947ae2824cddf49d9aa0b8e6b8c40" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "percent-encoding" version = "2.2.0" @@ -1839,7 +1961,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", ] [[package]] @@ -1854,18 +1976,60 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +[[package]] +name = "platforms" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d7ddaed09e0eb771a79ab0fd64609ba0afb0a8366421957936ad14cbd13630" + +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "portable-atomic" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" + [[package]] name = "ppv-lite86" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "pqc_kyber" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e365a00e3d714892ca43db85b6f3030a326ed21d8ecc64854f7eb8e15f7dd28f" +dependencies = [ + "rand_core", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -1875,7 +2039,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", "version_check", ] @@ -1890,12 +2054,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - [[package]] name = "proc-macro2" version = "1.0.55" @@ -1905,12 +2063,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quick-xml" version = "0.26.0" @@ -1953,37 +2105,13 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] -[[package]] -name = "rayon" -version = "1.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "num_cpus", -] - [[package]] name = "redox_syscall" version = "0.2.13" @@ -2002,15 +2130,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_termios" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" -dependencies = [ - "redox_syscall 0.2.13", -] - [[package]] name = "redox_users" version = "0.4.3" @@ -2024,9 +2143,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.6" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -2034,10 +2153,19 @@ dependencies = [ ] [[package]] -name = "regex-syntax" -version = "0.6.26" +name = "regex-automata" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "reqwest" @@ -2079,18 +2207,18 @@ dependencies = [ ] [[package]] -name = "rusqlite" -version = "0.24.2" +name = "ring" +version = "0.16.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38ee71cbab2c827ec0ac24e76f82eca723cee92c509a65f67dee393c25112" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" dependencies = [ - "bitflags 1.3.2", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "memchr", - "smallvec", + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted", + "web-sys", + "winapi 0.3.9", ] [[package]] @@ -2099,7 +2227,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "ordered-multimap", ] @@ -2114,7 +2242,7 @@ dependencies = [ "aws-region", "base64 0.13.0", "bytes", - "cfg-if 1.0.0", + "cfg-if", "futures", "hex", "hmac", @@ -2137,20 +2265,10 @@ dependencies = [ ] [[package]] -name = "rustix" -version = "0.33.7" +name = "rustc-demangle" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938a344304321a9da4973b9ff4f9f8db9caf4597dfd9dda6a60b523340a0fff0" -dependencies = [ - "bitflags 1.3.2", - "errno 0.2.8", - "io-lifetimes 0.5.3", - "itoa", - "libc", - "linux-raw-sys 0.0.42", - "once_cell", - "winapi", -] +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustix" @@ -2159,38 +2277,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d097081ed288dfe45699b72f5b5d648e5f15d64d900c7080273baa20c16a6849" dependencies = [ "bitflags 1.3.2", - "errno 0.3.0", - "io-lifetimes 1.0.9", + "errno", + "io-lifetimes", "libc", - "linux-raw-sys 0.3.1", + "linux-raw-sys", "windows-sys 0.45.0", ] [[package]] -name = "rustversion" -version = "1.0.6" +name = "rustls" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" +checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" +dependencies = [ + "log", + "ring", + "sct", + "webpki", +] [[package]] -name = "rustyline" -version = "7.1.0" +name = "rustls-pemfile" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8227301bfc717136f0ecbd3d064ba8199e44497a0bdd46bb01ede4387cfd2cec" +checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "bitflags 1.3.2", - "cfg-if 1.0.0", - "dirs-next", - "fs2", - "libc", - "log", - "memchr", - "nix 0.19.1", - "scopeguard", - "unicode-segmentation", - "unicode-width", - "utf8parse", - "winapi", + "base64 0.21.0", ] [[package]] @@ -2242,6 +2354,22 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scratch" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" + +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "2.8.2" @@ -2267,9 +2395,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.137" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] @@ -2286,46 +2414,37 @@ dependencies = [ "xml-rs", ] -[[package]] -name = "serde_bare" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01db2255aa98fb93ad74272d8b2e6fd4851860e733e944b9439cf148127164b2" -dependencies = [ - "serde", -] - -[[package]] -name = "serde_bare" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51c55386eed0f1ae957b091dc2ca8122f287b60c79c774cbe3d5f2b69fded660" -dependencies = [ - "serde", -] - [[package]] name = "serde_derive" -version = "1.0.137" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 2.0.13", ] [[package]] name = "serde_json" -version = "1.0.81" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2344,11 +2463,20 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + [[package]] name = "shell-escape" version = "0.1.5" @@ -2356,24 +2484,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f" [[package]] -name = "signal-hook" -version = "0.3.14" +name = "shell32-sys" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +checksum = "9ee04b46101f57121c9da2b151988283b6beb79b34f5bb29a58ee48cb695122c" dependencies = [ - "libc", - "signal-hook-registry", -] - -[[package]] -name = "signal-hook-mio" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" -dependencies = [ - "libc", - "mio", - "signal-hook", + "winapi 0.2.8", + "winapi-build", ] [[package]] @@ -2387,9 +2504,9 @@ dependencies = [ [[package]] name = "signature" -version = "1.5.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f054c6c1a6e95179d6f23ed974060dcefb2d9388bb7256900badad682c499de4" +checksum = "5e1788eed21689f9cf370582dfc467ef36ed9c707f073528ddafa8d83e3b8500" [[package]] name = "slab" @@ -2402,9 +2519,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "smartstring" @@ -2422,32 +2539,129 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", - "winapi", + "winapi 0.3.9", ] [[package]] -name = "socketpair" -version = "0.14.0" +name = "spin" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0c0b3fc17356799222affc5a40345b7cc25b548c489c5a31eca0888ee2404c" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ - "io-extras", - "io-lifetimes 0.5.3", - "rustix 0.33.7", - "uuid 0.8.2", - "winapi", + "lock_api", ] [[package]] -name = "sodiumoxide" -version = "0.2.7" +name = "spki" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26be3acb6c2d9a7aac28482586a7856436af4cfe7100031d219de2d2ecb0028" +checksum = "37a5be806ab6f127c3da44b7378837ebf01dadca8510a0e572460216b228bd0e" dependencies = [ - "ed25519", + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8de3b03a925878ed54a954f621e64bf55a3c1bd29652d0d1a17830405350188" +dependencies = [ + "sqlx-core", + "sqlx-macros", +] + +[[package]] +name = "sqlx-core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa8241483a83a3f33aa5fff7e7d9def398ff9990b2752b6c6112b83c6d246029" +dependencies = [ + "ahash", + "atoi", + "bitflags 1.3.2", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "dotenvy", + "either", + "event-listener", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "hashlink", + "hex", + "indexmap", + "itoa", "libc", - "libsodium-sys", - "serde", + "libsqlite3-sys", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "rustls", + "rustls-pemfile", + "sha2", + "smallvec", + "sqlformat", + "sqlx-rt", + "stringprep", + "thiserror", + "tokio-stream", + "url", + "webpki-roots", +] + +[[package]] +name = "sqlx-macros" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9966e64ae989e7e575b19d7265cb79d7fc3cbbdf179835cb0d716f294c2049c9" +dependencies = [ + "dotenvy", + "either", + "heck", + "once_cell", + "proc-macro2", + "quote", + "sha2", + "sqlx-core", + "sqlx-rt", + "syn 1.0.109", + "url", +] + +[[package]] +name = "sqlx-rt" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804d3f245f894e61b1e6263c84b23ca675d96753b5abfd5cc8597d86806e8024" +dependencies = [ + "once_cell", + "tokio", + "tokio-rustls", ] [[package]] @@ -2482,31 +2696,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stfu8" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1310970b29733b601839578f8ba24991a97057dbedc4ac0decea835474054ee7" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "stringprep" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" - -[[package]] -name = "strum_macros" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9550962e7cf70d9980392878dfaf1dcc3ece024f4cf3bf3c46b978d0bad61d6c" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.96", -] - [[package]] name = "subtle" version = "2.4.1" @@ -2515,9 +2730,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.96" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -2535,79 +2750,38 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "system-interface" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e09bb3fb4e02ec4b87e182ea9718fadbc0fa3e50085b40a9af9690572b67f9e" -dependencies = [ - "atty", - "bitflags 1.3.2", - "cap-fs-ext", - "io-lifetimes 0.5.3", - "os_pipe", - "rustix 0.33.7", - "socketpair", - "winapi", - "winx", -] - -[[package]] -name = "temp-dir" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab" - [[package]] name = "tempfile" version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix 0.37.6", + "rustix", "windows-sys 0.45.0", ] [[package]] name = "termcolor" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] [[package]] name = "terminal_size" -version = "0.1.17" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ - "libc", - "winapi", + "rustix", + "windows-sys 0.48.0", ] -[[package]] -name = "termion" -version = "1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e" -dependencies = [ - "libc", - "numtoa", - "redox_syscall 0.2.13", - "redox_termios", -] - -[[package]] -name = "textwrap" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" - [[package]] name = "thin-vec" version = "0.2.12" @@ -2635,14 +2809,30 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.1.44" +name = "thousands" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" dependencies = [ "libc", "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -2672,15 +2862,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinyvec" version = "1.6.0" @@ -2706,6 +2887,8 @@ dependencies = [ "bytes", "libc", "mio", + "num_cpus", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", @@ -2754,6 +2937,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls", + "tokio", + "webpki", +] + [[package]] name = "tokio-stream" version = "0.1.12" @@ -2781,11 +2975,36 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.9" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" dependencies = [ "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.19.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", ] [[package]] @@ -2796,22 +3015,76 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.35" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a400e31aa60b9d44a52a8ee0343b5b18566b03a8321e0d321f695cf56e940160" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "pin-project-lite", + "tracing-attributes", "tracing-core", ] [[package]] -name = "tracing-core" -version = "0.1.28" +name = "tracing-attributes" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" +checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "tracing-core" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-indicatif" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4a5c90a233548de33c35a082c05b6fc91600d4f49765c31faf42b48f029ae8" +dependencies = [ + "indicatif", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -2837,7 +3110,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "rand", "static_assertions", ] @@ -2877,9 +3150,37 @@ checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" [[package]] name = "unicode-width" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "universal-hash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d3160b73c9a19f7e2939a2fdad446c57c1bbbbf4d919d3213ff1267a580d8b5" +dependencies = [ + "crypto-common", + "subtle", +] + +[[package]] +name = "unix_mode" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35abed4630bb800f02451a7428205d1f37b8e125001471bfab259beee6a587ed" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" [[package]] name = "url" @@ -2894,27 +3195,19 @@ dependencies = [ [[package]] name = "users" -version = "0.9.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72f4267aea0c3ec6d07eaabea6ead7c5ddacfafc5e22bcf8d186706851fb4cf" +checksum = "24cc0f6d6f267b73e5a2cadf007ba8f9bc39c6a6f9666f8cf25ea809a153b032" dependencies = [ "libc", + "log", ] [[package]] name = "utf8parse" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom", -] +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" @@ -2926,6 +3219,12 @@ dependencies = [ "rand", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -2949,19 +3248,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "void" -version = "1.0.2" +name = "vt100" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de" +dependencies = [ + "itoa", + "log", + "unicode-width", + "vte", +] + +[[package]] +name = "vte" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aae21c12ad2ec2d168c236f369c38ff332bc1134f7246350dca641437365045" +dependencies = [ + "arrayvec", + "utf8parse", + "vte_generate_state_changes", +] + +[[package]] +name = "vte_generate_state_changes" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff" +dependencies = [ + "proc-macro2", + "quote", +] [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -2993,7 +3318,7 @@ version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "wasm-bindgen-macro", ] @@ -3008,7 +3333,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -3018,7 +3343,7 @@ version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "wasm-bindgen", "web-sys", @@ -3042,7 +3367,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn 1.0.96", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3076,6 +3401,31 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" +dependencies = [ + "webpki", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + [[package]] name = "winapi" version = "0.3.9" @@ -3086,6 +3436,12 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu", ] +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" @@ -3098,7 +3454,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ - "winapi", + "winapi 0.3.9", ] [[package]] @@ -3107,6 +3463,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-sys" version = "0.36.1" @@ -3126,12 +3491,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.2", "windows_aarch64_msvc 0.42.2", "windows_i686_gnu 0.42.2", "windows_i686_msvc 0.42.2", "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.2", "windows_x86_64_msvc 0.42.2", ] @@ -3141,7 +3506,16 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", ] [[package]] @@ -3150,21 +3524,42 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.2", "windows_aarch64_msvc 0.42.2", "windows_i686_gnu 0.42.2", "windows_i686_msvc 0.42.2", "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.2", "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" @@ -3177,6 +3572,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.36.1" @@ -3189,6 +3590,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.36.1" @@ -3201,6 +3608,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" @@ -3213,12 +3626,24 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" @@ -3231,24 +3656,40 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" dependencies = [ - "winapi", + "winapi 0.3.9", ] [[package]] -name = "winx" -version = "0.31.0" +name = "x25519-dalek" +version = "2.0.0-rc.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d5973cb8cd94a77d03ad7e23bbe14889cb29805da1cec0e4aff75e21aebded" +checksum = "fabd6e16dd08033932fc3265ad4510cc2eab24656058a6dcb107ffe274abcc95" dependencies = [ - "bitflags 1.3.2", - "io-lifetimes 0.5.3", - "winapi", + "curve25519-dalek", + "rand_core", + "serde", + "zeroize", ] [[package]] @@ -3261,42 +3702,95 @@ checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" name = "yama" version = "0.7.0-alpha.1" dependencies = [ - "anyhow", - "blake", - "byteorder", - "chrono", + "appdirs", + "async-recursion", "clap", - "crossbeam-channel", - "crossbeam-utils", - "derivative", - "env_logger", + "dashmap", + "dust_style_filetree_display", + "eyre", "fastcdc", - "glob", + "flume", + "hostname", + "ignore", "indicatif", - "itertools 0.9.0", - "log", - "metrics", - "nix 0.17.0", - "num_cpus", - "rayon", - "rusqlite", - "rustyline", + "memmap2", + "patricia_tree", "serde", - "serde_bare 0.3.0", - "sodiumoxide", - "temp-dir", - "thiserror", + "tokio", "toml", + "tracing", + "tracing-indicatif", + "tracing-subscriber", "twox-hash", "users", + "yama_localcache", + "yama_midlevel_crypto", + "yama_pile", + "yama_wormfile", + "yama_wormfile_fs", + "yama_wormfile_s3", + "yama_wormfile_sftp", "zstd", ] +[[package]] +name = "yama_localcache" +version = "0.1.0" +dependencies = [ + "eyre", + "itertools", + "sqlx", + "tokio", + "tracing", + "yama_midlevel_crypto", + "yama_pile", +] + +[[package]] +name = "yama_midlevel_crypto" +version = "0.1.0" +dependencies = [ + "argon2", + "blake3", + "chacha20", + "ciborium", + "ed25519-dalek", + "eyre", + "hex", + "poly1305", + "pqc_kyber", + "rand", + "serde", + "x25519-dalek", + "zstd", +] + +[[package]] +name = "yama_pile" +version = "0.1.0" +dependencies = [ + "backtrace", + "chrono", + "eyre", + "hex", + "patricia_tree", + "rand", + "serde", + "sha2", + "tokio", + "tracing", + "unix_mode", + "uuid", + "yama_midlevel_crypto", + "yama_wormfile", +] + [[package]] name = "yama_wormfile" version = "0.1.0" dependencies = [ "async-trait", + "eyre", "tokio", ] @@ -3305,6 +3799,7 @@ name = "yama_wormfile_fs" version = "0.1.0" dependencies = [ "async-trait", + "eyre", "rand", "tokio", "yama_wormfile", @@ -3316,13 +3811,14 @@ version = "0.1.0" dependencies = [ "async-trait", "bytes", + "eyre", "rand", "rust-s3", "thiserror", "tokio", "tokio-stream", "tokio-util", - "uuid 1.3.0", + "uuid", "yama_wormfile", ] @@ -3331,6 +3827,7 @@ name = "yama_wormfile_sftp" version = "0.1.0" dependencies = [ "async-trait", + "eyre", "openssh", "openssh-sftp-client", "ouroboros", @@ -3341,19 +3838,39 @@ dependencies = [ ] [[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" +name = "zeroize" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "zstd" +version = "0.12.3+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -3361,10 +3878,11 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.1+zstd.1.5.2" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", + "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index abd6a2c..cb8a7e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,9 @@ members = [ "yama_wormfile_fs", "yama_wormfile_sftp", "yama_wormfile_s3", + "yama_midlevel_crypto", + "yama_pile", + "yama_localcache", ] [profile.release] @@ -13,3 +16,16 @@ members = [ debug = 2 # When this feature stabilises, it will be possible to split the debug information into a file alongside the binary #split-debuginfo = "packed" + + + +# A few packages benefit from optimisations in the dev profile, otherwise Yama operations are needlessly slowed down. +[profile.dev.package.fastcdc] +opt-level = 2 + +[profile.dev.package.blake3] +opt-level = 2 + +# not so obvious with sqlx: +#[profile.dev.package.sqlx] +#opt-level = 2 \ No newline at end of file diff --git a/GLOSSARY.md b/GLOSSARY.md new file mode 100644 index 0000000..a4d0656 --- /dev/null +++ b/GLOSSARY.md @@ -0,0 +1,5 @@ +## Internals + +* **Chunk**: piece of a file that is obtained using a Content-Defined Chunking scheme +* **Chunk ID**: Blake3 hash of the contents of a chunk +* **Metachunk**: a chunk that itself contains (part of) a list of chunks. diff --git a/datman.old/Cargo.toml b/datman.old/Cargo.toml new file mode 100644 index 0000000..58c910a --- /dev/null +++ b/datman.old/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "datman" +version = "0.7.0-alpha.1" +authors = ["Olivier 'reivilibre' "] +edition = "2021" +repository = "https://bics.ga/reivilibre/yama" +license = "GPL-3.0-or-later" + +description = "A chunked and deduplicated backup system using Yama" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "3.1.18", features = ["derive"] } +crossbeam-channel = "0.5.1" +anyhow = "1.0" +thiserror = "1.0" +serde = { version = "1.0.104", features = ["derive"] } +serde_json = "1.0.64" +toml = "0.5.5" +log = "0.4" +env_logger = "0.7.1" +indicatif = "0.14.0" +arc-interner = "0.5.1" +zstd = "0.11.2" # 0.11.2+zstd.1.5.2 +byteorder = "1" +termion = "1.5.6" +glob = "0.3.0" +humansize = "1.1.1" +chrono = "0.4.19" +itertools = "0.10.1" +hostname = "0.3.1" +yama = { path = "../yama", version = "0.7.0-alpha.1" } +metrics = "0.17.1" +bare-metrics-recorder = { version = "0.1.0" } +comfy-table = "6.0.0-rc.1" +libc = "0.2.126" +io-streams = "0.11.0" \ No newline at end of file diff --git a/datman.old/README.md b/datman.old/README.md new file mode 100644 index 0000000..934b50e --- /dev/null +++ b/datman.old/README.md @@ -0,0 +1,13 @@ +# datman: DATa MANager + +Datman is a tool to make it easier to use Yama for backups. + +Features: + +* Chunk-based deduplication +* (optional) Compression using Zstd and a specifiable dictionary +* (optional) Encryption +* Ability to back up to remote machines over SSH +* Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels. + +See the documentation for more information. diff --git a/datman.old/src/bin/datman.rs b/datman.old/src/bin/datman.rs new file mode 100644 index 0000000..01f03ea --- /dev/null +++ b/datman.old/src/bin/datman.rs @@ -0,0 +1,468 @@ +/* +This file is part of Yama. + +Yama is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Yama is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Yama. If not, see . +*/ + +use std::fs::File; +use std::io::{BufReader, BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use clap::Parser; +use env_logger::Env; + +use anyhow::{bail, Context}; +use bare_metrics_recorder::recording::BareMetricsRecorderCore; +use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc}; +use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination}; +use datman::commands::ilabel::interactive_labelling_session; +use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy}; +use datman::commands::{init_descriptor, pushpull}; +use datman::descriptor::{load_descriptor, SourceDescriptor}; +use datman::get_hostname; +use datman::remote::backup_source_requester::backup_remote_source_to_destination; +use datman::remote::backup_source_responder; +use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; +use itertools::Itertools; +use log::info; +use std::str::FromStr; +use yama::commands::load_pile_descriptor; +use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel}; + +pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m"; +pub const BOLD: &str = "\x1b[1m"; +pub const BOLD_OFF: &str = "\x1b[22m"; +pub const WHITE: &str = "\x1b[37m"; +pub const RED: &str = "\x1b[31m"; +pub const GREEN: &str = "\x1b[32m"; + +#[derive(Parser)] +pub enum DatmanCommand { + /// Initialise a datman descriptor in this directory. + Init {}, + + /// + Status {}, + + #[clap(name = "ilabel")] + InteractiveLabelling { + /// Name of the source to label. + source_name: String, + }, + + #[clap(name = "ibrowse")] + InteractiveBrowsing { + /// Name of the source to browse. + source_name: String, + }, + + /// Back up a source locally or over the network. + BackupOne { + /// Name of the source to back up. + source_name: String, + + /// Name of the destination to back up to. + destination_name: String, + }, + + BackupAll { + /// Name of the remote to back up. + /// Special value 'self' means 'this host only'. + /// Special value 'all' means 'all hosts'. + remote_name: String, + + /// Name of the destination to back up to. + destination_name: String, + }, + + Extract { + /// Name of the 'source' to extract + /// Omit for 'all'. + #[clap(short)] + source_name: Option, + + /// If specified, will get the first backup after this date. + #[clap(long)] + after: Option, + + /// If specified, will get the last backup before this date. The default behaviour is to get the latest. + #[clap(long)] + before: Option, + + /// If not specified, time-restricted extractions that don't have a pointer for every source + /// will instead lead to an error. + #[clap(long)] + accept_partial: bool, // TODO unimplemented. + + /// Name of the pile to extract from + pile_name: String, + + /// Place to extract to. + destination: PathBuf, + + /// Skip applying metadata. Might be needed to extract without superuser privileges. + #[clap(long)] + skip_metadata: bool, + }, + + Report { + /// Name of the pile to report on. + pile_name: String, + + /// Don't summarise months. + #[clap(long)] + individual: bool, + }, + + #[clap(name = "_backup_source_responder")] + InternalBackupSourceResponder, + + /// Pulls all pointers from a remote pile to a local pile. + /// Does not yet support label filtering, but will do in the future. + Pull { + /// e.g. 'myserver:main' + remote_and_remote_pile: String, + + pile_name: String, + }, + + /// Applies a retention policy by removing unnecessary backups. + /// Does not reclaim space by itself: use + /// `yama check --apply-gc --shallow` + /// & `yama compact` + /// to do that. + Prune { pile_name: String }, + + #[clap(name = "_pull_responder_offerer")] + InternalPullResponderOfferer { + datman_path: PathBuf, + pile_name: String, + }, +} + +pub struct HumanDateTime(pub DateTime); + +impl FromStr for HumanDateTime { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { + let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap(); + let local_datetime = local_date.and_hms(0, 0, 0); + Ok(HumanDateTime(local_datetime)) + } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { + let local_datetime = chrono::offset::Local + .from_local_datetime(&date_and_time) + .unwrap(); + Ok(HumanDateTime(local_datetime)) + } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + let local_datetime = chrono::offset::Local + .from_local_datetime(&date_and_time) + .unwrap(); + Ok(HumanDateTime(local_datetime)) + } else { + bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14"); + } + } +} + +fn with_obvious_successfail_message(result: anyhow::Result) -> anyhow::Result { + match &result { + Ok(_) => { + eprintln!("Operation {}successful{}.", GREEN, WHITE); + } + Err(error) => { + eprintln!("{:?}", error); + eprintln!( + "{}{}Operation {}{}FAILED{}!{}", + FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF + ); + } + }; + result +} + +fn with_exitcode(result: anyhow::Result) { + match &result { + Ok(_) => { + std::process::exit(0); + } + Err(_) => { + std::process::exit(5); + } + }; +} + +fn main() -> anyhow::Result<()> { + env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); + + let now = Utc::now(); + + let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!( + "/tmp/datman_{}.baremetrics", + now.format("%F_%H%M%S") + ))?) + .start("datman".to_string())?; + shard.install_as_metrics_recorder()?; + + let opts: DatmanCommand = DatmanCommand::parse(); + + match opts { + DatmanCommand::Init {} => { + init_descriptor(Path::new(".")).unwrap(); + } + DatmanCommand::Status { .. } => { + unimplemented!(); + } + DatmanCommand::InteractiveLabelling { source_name } => { + interactive_labelling_session(Path::new("."), source_name)?; + } + DatmanCommand::InteractiveBrowsing { source_name } => { + datman::commands::ibrowse::session(Path::new("."), source_name)?; + } + DatmanCommand::BackupOne { + source_name, + destination_name, + } => { + let my_hostname = get_hostname(); + let descriptor = load_descriptor(Path::new(".")).unwrap(); + let source = &descriptor.sources[&source_name]; + let destination = &descriptor.piles[&destination_name]; + + let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); + pbar.set_style( + ProgressStyle::default_bar().template( + "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", + ), + ); + pbar.set_message("storing"); + + let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source { + hostname != &my_hostname + } else { + false + }; + + let result = if is_remote { + backup_remote_source_to_destination( + source, + destination, + &descriptor, + Path::new("."), + &source_name, + &destination_name, + yama::utils::get_number_of_workers("YAMA_CHUNKERS"), + pbar, + ) + } else { + backup_source_to_destination( + source, + destination, + &descriptor, + Path::new("."), + &source_name, + &destination_name, + yama::utils::get_number_of_workers("YAMA_CHUNKERS"), + &mut pbar, + ) + }; + with_exitcode(with_obvious_successfail_message(result)) + } + DatmanCommand::BackupAll { + remote_name, + destination_name, + } => { + let descriptor = load_descriptor(Path::new(".")).unwrap(); + let destination = &descriptor.piles[&destination_name]; + + let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); + pbar.set_style( + ProgressStyle::default_bar().template( + "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", + ), + ); + pbar.set_message("storing"); + + backup_all_sources_to_destination( + destination, + &descriptor, + Path::new("."), + &destination_name, + yama::utils::get_number_of_workers("YAMA_CHUNKERS"), + &mut pbar, + remote_name, + ) + .unwrap(); + } + DatmanCommand::Extract { + source_name, + after, + before, + accept_partial, + pile_name, + destination, + skip_metadata, + } => { + if !accept_partial { + bail!("Specify --accept-partial until running without it is supported."); + } + + if after.is_some() && before.is_some() { + bail!("Can't specify both before and after!"); + } + + let before = before.map(|dt| dt.0.with_timezone(&Utc)); + let after = after.map(|dt| dt.0.with_timezone(&Utc)); + + datman::commands::extract::extract( + &destination, + Path::new("."), + source_name.as_ref().map(|x| x.as_ref()), + &pile_name, + before.into(), + after.into(), + !skip_metadata, + !skip_metadata, + !skip_metadata, + yama::utils::get_number_of_workers("YAMA_EXTRACTORS"), + )?; + } + + DatmanCommand::InternalBackupSourceResponder => { + info!("Datman responder at {:?}", std::env::current_exe()?); + backup_source_responder::handler_stdio()?; + } + + DatmanCommand::Report { + pile_name, + individual, + } => { + let descriptor = load_descriptor(Path::new(".")).unwrap(); + let destination = &descriptor.piles[&pile_name]; + let report = + datman::commands::report::generate_report(destination, &descriptor, !individual)?; + + datman::commands::report::print_filesystem_space(&destination.path)?; + datman::commands::report::print_report(&report)?; + } + DatmanCommand::Pull { + remote_and_remote_pile, + pile_name, + } => { + let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile + .split(':') + .collect_tuple() + .context("You must pull from a remote pile specified as remote:path:pile.")?; + + let descriptor = load_descriptor(Path::new(".")).unwrap(); + let source = &descriptor.piles[&pile_name]; + + let pile_desc = load_pile_descriptor(&source.path)?; + let (pile, bypass_raw_pile) = open_pile_with_work_bypass( + &source.path, + &pile_desc, + BypassLevel::CompressionBypass, + )?; + + let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); + pbar.set_style( + ProgressStyle::default_bar().template( + "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", + ), + ); + pbar.set_message("pulling"); + + let remote_host_descriptor = descriptor + .remote_hosts + .get(hostname) + .ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?; + + let mut connection = Command::new("ssh") + .arg(&remote_host_descriptor.user_at_host) + .arg("--") + .arg( + &remote_host_descriptor + .path_to_datman + .as_ref() + .map(|x| x.as_str()) + .unwrap_or("datman"), + ) + .arg("_pull_responder_offerer") + .arg(remote_datman_path) + .arg(remote_pile_name) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn()?; + + let mut reader = BufReader::new(connection.stdout.take().unwrap()); + let mut writer = BufWriter::new(connection.stdin.take().unwrap()); + + pushpull::accepting_side( + &pile, + &bypass_raw_pile, + &mut reader, + &mut writer, + Box::new(pbar), + )?; + } + + DatmanCommand::Prune { pile_name } => { + let descriptor = load_descriptor(Path::new(".")).unwrap(); + let retention_policy = descriptor + .retention + .context("No retention policy set in descriptor")?; + let dest_desc = &descriptor.piles[&pile_name]; + + let pile_desc = load_pile_descriptor(&dest_desc.path)?; + + prune_with_retention_policy( + &dest_desc.path, + &pile_desc, + &RetentionPolicy::from_config(retention_policy), + true, + )?; + } + + DatmanCommand::InternalPullResponderOfferer { + datman_path, + pile_name, + } => { + let descriptor = load_descriptor(&datman_path).unwrap(); + let source = &descriptor.piles[&pile_name]; + + let pile_desc = load_pile_descriptor(&source.path)?; + let (pile, bypass_raw_pile) = open_pile_with_work_bypass( + &source.path, + &pile_desc, + BypassLevel::CompressionBypass, + )?; + + let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?); + let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?); + + pushpull::offering_side( + &pile, + &bypass_raw_pile, + &mut stdin, + &mut stdout, + Box::new(()), + )?; + + stdout.flush()?; + } + } + Ok(()) +} diff --git a/datman/src/commands.rs b/datman.old/src/commands.rs similarity index 100% rename from datman/src/commands.rs rename to datman.old/src/commands.rs diff --git a/datman/src/commands/backup.rs b/datman.old/src/commands/backup.rs similarity index 100% rename from datman/src/commands/backup.rs rename to datman.old/src/commands/backup.rs diff --git a/datman/src/commands/extract.rs b/datman.old/src/commands/extract.rs similarity index 100% rename from datman/src/commands/extract.rs rename to datman.old/src/commands/extract.rs diff --git a/datman/src/commands/ibrowse.rs b/datman.old/src/commands/ibrowse.rs similarity index 100% rename from datman/src/commands/ibrowse.rs rename to datman.old/src/commands/ibrowse.rs diff --git a/datman/src/commands/ilabel.rs b/datman.old/src/commands/ilabel.rs similarity index 100% rename from datman/src/commands/ilabel.rs rename to datman.old/src/commands/ilabel.rs diff --git a/datman/src/commands/prune.rs b/datman.old/src/commands/prune.rs similarity index 100% rename from datman/src/commands/prune.rs rename to datman.old/src/commands/prune.rs diff --git a/datman/src/commands/pushpull.rs b/datman.old/src/commands/pushpull.rs similarity index 100% rename from datman/src/commands/pushpull.rs rename to datman.old/src/commands/pushpull.rs diff --git a/datman/src/commands/report.rs b/datman.old/src/commands/report.rs similarity index 100% rename from datman/src/commands/report.rs rename to datman.old/src/commands/report.rs diff --git a/datman/src/descriptor.rs b/datman.old/src/descriptor.rs similarity index 100% rename from datman/src/descriptor.rs rename to datman.old/src/descriptor.rs diff --git a/datman/src/labelling.rs b/datman.old/src/labelling.rs similarity index 100% rename from datman/src/labelling.rs rename to datman.old/src/labelling.rs diff --git a/datman.old/src/lib.rs b/datman.old/src/lib.rs new file mode 100644 index 0000000..91475d3 --- /dev/null +++ b/datman.old/src/lib.rs @@ -0,0 +1,12 @@ +pub mod commands; +pub mod descriptor; +pub mod labelling; +pub mod remote; +pub mod tree; + +pub fn get_hostname() -> String { + hostname::get() + .expect("No hostname") + .into_string() + .expect("Hostname string must be sensible.") +} diff --git a/datman/src/remote.rs b/datman.old/src/remote.rs similarity index 100% rename from datman/src/remote.rs rename to datman.old/src/remote.rs diff --git a/datman/src/remote/backup_source_requester.rs b/datman.old/src/remote/backup_source_requester.rs similarity index 100% rename from datman/src/remote/backup_source_requester.rs rename to datman.old/src/remote/backup_source_requester.rs diff --git a/datman/src/remote/backup_source_responder.rs b/datman.old/src/remote/backup_source_responder.rs similarity index 100% rename from datman/src/remote/backup_source_responder.rs rename to datman.old/src/remote/backup_source_responder.rs diff --git a/datman/src/tree.rs b/datman.old/src/tree.rs similarity index 100% rename from datman/src/tree.rs rename to datman.old/src/tree.rs diff --git a/datman/Cargo.toml b/datman/Cargo.toml index 58c910a..fa5e7c8 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -11,28 +11,4 @@ description = "A chunked and deduplicated backup system using Yama" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "3.1.18", features = ["derive"] } -crossbeam-channel = "0.5.1" -anyhow = "1.0" -thiserror = "1.0" -serde = { version = "1.0.104", features = ["derive"] } -serde_json = "1.0.64" -toml = "0.5.5" -log = "0.4" -env_logger = "0.7.1" -indicatif = "0.14.0" -arc-interner = "0.5.1" -zstd = "0.11.2" # 0.11.2+zstd.1.5.2 -byteorder = "1" -termion = "1.5.6" -glob = "0.3.0" -humansize = "1.1.1" -chrono = "0.4.19" -itertools = "0.10.1" -hostname = "0.3.1" -yama = { path = "../yama", version = "0.7.0-alpha.1" } -metrics = "0.17.1" -bare-metrics-recorder = { version = "0.1.0" } -comfy-table = "6.0.0-rc.1" -libc = "0.2.126" -io-streams = "0.11.0" \ No newline at end of file +eyre = "0.6.8" diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 01f03ea..20b202f 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -15,454 +15,6 @@ You should have received a copy of the GNU General Public License along with Yama. If not, see . */ -use std::fs::File; -use std::io::{BufReader, BufWriter, Write}; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; - -use clap::Parser; -use env_logger::Env; - -use anyhow::{bail, Context}; -use bare_metrics_recorder::recording::BareMetricsRecorderCore; -use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc}; -use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination}; -use datman::commands::ilabel::interactive_labelling_session; -use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy}; -use datman::commands::{init_descriptor, pushpull}; -use datman::descriptor::{load_descriptor, SourceDescriptor}; -use datman::get_hostname; -use datman::remote::backup_source_requester::backup_remote_source_to_destination; -use datman::remote::backup_source_responder; -use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; -use itertools::Itertools; -use log::info; -use std::str::FromStr; -use yama::commands::load_pile_descriptor; -use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel}; - -pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m"; -pub const BOLD: &str = "\x1b[1m"; -pub const BOLD_OFF: &str = "\x1b[22m"; -pub const WHITE: &str = "\x1b[37m"; -pub const RED: &str = "\x1b[31m"; -pub const GREEN: &str = "\x1b[32m"; - -#[derive(Parser)] -pub enum DatmanCommand { - /// Initialise a datman descriptor in this directory. - Init {}, - - /// - Status {}, - - #[clap(name = "ilabel")] - InteractiveLabelling { - /// Name of the source to label. - source_name: String, - }, - - #[clap(name = "ibrowse")] - InteractiveBrowsing { - /// Name of the source to browse. - source_name: String, - }, - - /// Back up a source locally or over the network. - BackupOne { - /// Name of the source to back up. - source_name: String, - - /// Name of the destination to back up to. - destination_name: String, - }, - - BackupAll { - /// Name of the remote to back up. - /// Special value 'self' means 'this host only'. - /// Special value 'all' means 'all hosts'. - remote_name: String, - - /// Name of the destination to back up to. - destination_name: String, - }, - - Extract { - /// Name of the 'source' to extract - /// Omit for 'all'. - #[clap(short)] - source_name: Option, - - /// If specified, will get the first backup after this date. - #[clap(long)] - after: Option, - - /// If specified, will get the last backup before this date. The default behaviour is to get the latest. - #[clap(long)] - before: Option, - - /// If not specified, time-restricted extractions that don't have a pointer for every source - /// will instead lead to an error. - #[clap(long)] - accept_partial: bool, // TODO unimplemented. - - /// Name of the pile to extract from - pile_name: String, - - /// Place to extract to. - destination: PathBuf, - - /// Skip applying metadata. Might be needed to extract without superuser privileges. - #[clap(long)] - skip_metadata: bool, - }, - - Report { - /// Name of the pile to report on. - pile_name: String, - - /// Don't summarise months. - #[clap(long)] - individual: bool, - }, - - #[clap(name = "_backup_source_responder")] - InternalBackupSourceResponder, - - /// Pulls all pointers from a remote pile to a local pile. - /// Does not yet support label filtering, but will do in the future. - Pull { - /// e.g. 'myserver:main' - remote_and_remote_pile: String, - - pile_name: String, - }, - - /// Applies a retention policy by removing unnecessary backups. - /// Does not reclaim space by itself: use - /// `yama check --apply-gc --shallow` - /// & `yama compact` - /// to do that. - Prune { pile_name: String }, - - #[clap(name = "_pull_responder_offerer")] - InternalPullResponderOfferer { - datman_path: PathBuf, - pile_name: String, - }, -} - -pub struct HumanDateTime(pub DateTime); - -impl FromStr for HumanDateTime { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { - let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap(); - let local_datetime = local_date.and_hms(0, 0, 0); - Ok(HumanDateTime(local_datetime)) - } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { - let local_datetime = chrono::offset::Local - .from_local_datetime(&date_and_time) - .unwrap(); - Ok(HumanDateTime(local_datetime)) - } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { - let local_datetime = chrono::offset::Local - .from_local_datetime(&date_and_time) - .unwrap(); - Ok(HumanDateTime(local_datetime)) - } else { - bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14"); - } - } -} - -fn with_obvious_successfail_message(result: anyhow::Result) -> anyhow::Result { - match &result { - Ok(_) => { - eprintln!("Operation {}successful{}.", GREEN, WHITE); - } - Err(error) => { - eprintln!("{:?}", error); - eprintln!( - "{}{}Operation {}{}FAILED{}!{}", - FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF - ); - } - }; - result -} - -fn with_exitcode(result: anyhow::Result) { - match &result { - Ok(_) => { - std::process::exit(0); - } - Err(_) => { - std::process::exit(5); - } - }; -} - -fn main() -> anyhow::Result<()> { - env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); - - let now = Utc::now(); - - let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!( - "/tmp/datman_{}.baremetrics", - now.format("%F_%H%M%S") - ))?) - .start("datman".to_string())?; - shard.install_as_metrics_recorder()?; - - let opts: DatmanCommand = DatmanCommand::parse(); - - match opts { - DatmanCommand::Init {} => { - init_descriptor(Path::new(".")).unwrap(); - } - DatmanCommand::Status { .. } => { - unimplemented!(); - } - DatmanCommand::InteractiveLabelling { source_name } => { - interactive_labelling_session(Path::new("."), source_name)?; - } - DatmanCommand::InteractiveBrowsing { source_name } => { - datman::commands::ibrowse::session(Path::new("."), source_name)?; - } - DatmanCommand::BackupOne { - source_name, - destination_name, - } => { - let my_hostname = get_hostname(); - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let source = &descriptor.sources[&source_name]; - let destination = &descriptor.piles[&destination_name]; - - let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("storing"); - - let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source { - hostname != &my_hostname - } else { - false - }; - - let result = if is_remote { - backup_remote_source_to_destination( - source, - destination, - &descriptor, - Path::new("."), - &source_name, - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - pbar, - ) - } else { - backup_source_to_destination( - source, - destination, - &descriptor, - Path::new("."), - &source_name, - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - &mut pbar, - ) - }; - with_exitcode(with_obvious_successfail_message(result)) - } - DatmanCommand::BackupAll { - remote_name, - destination_name, - } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let destination = &descriptor.piles[&destination_name]; - - let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("storing"); - - backup_all_sources_to_destination( - destination, - &descriptor, - Path::new("."), - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - &mut pbar, - remote_name, - ) - .unwrap(); - } - DatmanCommand::Extract { - source_name, - after, - before, - accept_partial, - pile_name, - destination, - skip_metadata, - } => { - if !accept_partial { - bail!("Specify --accept-partial until running without it is supported."); - } - - if after.is_some() && before.is_some() { - bail!("Can't specify both before and after!"); - } - - let before = before.map(|dt| dt.0.with_timezone(&Utc)); - let after = after.map(|dt| dt.0.with_timezone(&Utc)); - - datman::commands::extract::extract( - &destination, - Path::new("."), - source_name.as_ref().map(|x| x.as_ref()), - &pile_name, - before.into(), - after.into(), - !skip_metadata, - !skip_metadata, - !skip_metadata, - yama::utils::get_number_of_workers("YAMA_EXTRACTORS"), - )?; - } - - DatmanCommand::InternalBackupSourceResponder => { - info!("Datman responder at {:?}", std::env::current_exe()?); - backup_source_responder::handler_stdio()?; - } - - DatmanCommand::Report { - pile_name, - individual, - } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let destination = &descriptor.piles[&pile_name]; - let report = - datman::commands::report::generate_report(destination, &descriptor, !individual)?; - - datman::commands::report::print_filesystem_space(&destination.path)?; - datman::commands::report::print_report(&report)?; - } - DatmanCommand::Pull { - remote_and_remote_pile, - pile_name, - } => { - let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile - .split(':') - .collect_tuple() - .context("You must pull from a remote pile specified as remote:path:pile.")?; - - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let source = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&source.path)?; - let (pile, bypass_raw_pile) = open_pile_with_work_bypass( - &source.path, - &pile_desc, - BypassLevel::CompressionBypass, - )?; - - let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("pulling"); - - let remote_host_descriptor = descriptor - .remote_hosts - .get(hostname) - .ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?; - - let mut connection = Command::new("ssh") - .arg(&remote_host_descriptor.user_at_host) - .arg("--") - .arg( - &remote_host_descriptor - .path_to_datman - .as_ref() - .map(|x| x.as_str()) - .unwrap_or("datman"), - ) - .arg("_pull_responder_offerer") - .arg(remote_datman_path) - .arg(remote_pile_name) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn()?; - - let mut reader = BufReader::new(connection.stdout.take().unwrap()); - let mut writer = BufWriter::new(connection.stdin.take().unwrap()); - - pushpull::accepting_side( - &pile, - &bypass_raw_pile, - &mut reader, - &mut writer, - Box::new(pbar), - )?; - } - - DatmanCommand::Prune { pile_name } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let retention_policy = descriptor - .retention - .context("No retention policy set in descriptor")?; - let dest_desc = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&dest_desc.path)?; - - prune_with_retention_policy( - &dest_desc.path, - &pile_desc, - &RetentionPolicy::from_config(retention_policy), - true, - )?; - } - - DatmanCommand::InternalPullResponderOfferer { - datman_path, - pile_name, - } => { - let descriptor = load_descriptor(&datman_path).unwrap(); - let source = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&source.path)?; - let (pile, bypass_raw_pile) = open_pile_with_work_bypass( - &source.path, - &pile_desc, - BypassLevel::CompressionBypass, - )?; - - let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?); - let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?); - - pushpull::offering_side( - &pile, - &bypass_raw_pile, - &mut stdin, - &mut stdout, - Box::new(()), - )?; - - stdout.flush()?; - } - } +pub fn main() -> eyre::Result<()> { Ok(()) } diff --git a/datman/src/lib.rs b/datman/src/lib.rs index 91475d3..8b13789 100644 --- a/datman/src/lib.rs +++ b/datman/src/lib.rs @@ -1,12 +1 @@ -pub mod commands; -pub mod descriptor; -pub mod labelling; -pub mod remote; -pub mod tree; -pub fn get_hostname() -> String { - hostname::get() - .expect("No hostname") - .into_string() - .expect("Hostname string must be sensible.") -} diff --git a/flake.lock b/flake.lock index 09ab414..b952c9a 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,26 @@ { "nodes": { + "fenix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ], + "rust-analyzer-src": "rust-analyzer-src" + }, + "locked": { + "lastModified": 1682230876, + "narHash": "sha256-vCnd1pZRQKCdNvivQBD7WzaOlU1GcN91OCAz1rnoe5M=", + "owner": "nix-community", + "repo": "fenix", + "rev": "378f052d9f1cd90060ec4329f81782fee80490a4", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "fenix", + "type": "github" + } + }, "naersk": { "inputs": { "nixpkgs": "nixpkgs" @@ -32,23 +53,44 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 0, - "narHash": "sha256-50235YW76Jnx4okogoJv/sMz+WNnqC+0DqtkV3jm2XM=", - "path": "/nix/store/aw7bxjysi3wd3xia5qh7qqwsbqmyqbya-source", - "type": "path" + "lastModified": 1682173319, + "narHash": "sha256-tPhOpJJ+wrWIusvGgIB2+x6ILfDkEgQMX0BTtM5vd/4=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ee7ec1c71adc47d2e3c2d5eb0d6b8fbbd42a8d1c", + "type": "github" }, "original": { "id": "nixpkgs", + "ref": "nixos-22.11", "type": "indirect" } }, "root": { "inputs": { + "fenix": "fenix", "naersk": "naersk", "nixpkgs": "nixpkgs_2", "utils": "utils" } }, + "rust-analyzer-src": { + "flake": false, + "locked": { + "lastModified": 1682163822, + "narHash": "sha256-u7vaRlI6rYiutytoTk8lyOtNKO/rz5Q63Z6S6QzYCtU=", + "owner": "rust-lang", + "repo": "rust-analyzer", + "rev": "2feabc4dc462644287372922928110eea4c60ca7", + "type": "github" + }, + "original": { + "owner": "rust-lang", + "ref": "nightly", + "repo": "rust-analyzer", + "type": "github" + } + }, "utils": { "locked": { "lastModified": 1659877975, diff --git a/flake.nix b/flake.nix index fe10a2f..c7b9927 100644 --- a/flake.nix +++ b/flake.nix @@ -4,11 +4,40 @@ inputs = { utils.url = "github:numtide/flake-utils"; naersk.url = "github:nix-community/naersk"; + # Current Rust in nixpkgs is too old unfortunately — let's use the Fenix overlay's packages... + fenix = { + url = "github:nix-community/fenix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + nixpkgs.url = "nixpkgs/nixos-22.11"; }; - outputs = { self, nixpkgs, utils, naersk }: + outputs = { self, nixpkgs, utils, naersk, fenix }: utils.lib.eachDefaultSystem (system: let pkgs = nixpkgs.legacyPackages."${system}"; + #fenixRustToolchain = fenix.packages."${system}".minimal.toolchain +# fenixRustToolchain = +# fenix."${system}".complete.withComponents [ +# "cargo" +# "clippy" +# "rust-src" +# "rustc" +# "rustfmt" +# ]; +# fenixRustToolchain = fenix.packages."${system}".stable.toolchain; + fenixRustToolchain = + fenix.packages."${system}".stable.withComponents [ + "cargo" + "clippy" + "rust-src" + "rustc" + "rustfmt" + ]; +# rust-toolchain = pkgs.symlinkJoin { +# name = "rust-toolchain"; +# paths = [fenixRustToolchain.rustc fenixRustToolchain.cargo fenixRustToolchain.clippy fenixRustToolchain.rustfmt fenixRustToolchain.rustPlatform.rustcSrc]; +# }; + naersk-lib = naersk.lib."${system}"; rustComponents = naersk-lib.buildPackage { @@ -77,7 +106,45 @@ # `nix develop` devShell = pkgs.mkShell { - nativeBuildInputs = with pkgs; [ rustc cargo ]; + buildInputs = [ + fenixRustToolchain + #rust-toolchain + + pkgs.pkg-config + + pkgs.alsa-lib + pkgs.sqlite + pkgs.sqlx-cli + #pkgs.libclang # ?? + ]; + + nativeBuildInputs = [ + pkgs.openssl + pkgs.python3 + ]; + + # Needed for bindgen when binding to avahi + LIBCLANG_PATH="${pkgs.llvmPackages_latest.libclang.lib}/lib"; + + # Don't know if this var does anything by itself, but you need to feed this value in to IntelliJ IDEA and it's probably easier to pull out of an env var than look it up each time. + RUST_SRC_PATH = "${fenixRustToolchain}/lib/rustlib/src/rust/library"; + + # Cargo culted: + # Add to rustc search path + RUSTFLAGS = (builtins.map (a: ''-L ${a}/lib'') [ + ]); + # Add to bindgen search path + BINDGEN_EXTRA_CLANG_ARGS = + # Includes with normal include path + (builtins.map (a: ''-I"${a}/include"'') [ + ]) + # Includes with special directory paths + ++ [ + ''-I"${pkgs.llvmPackages_latest.libclang.lib}/lib/clang/${pkgs.llvmPackages_latest.libclang.version}/include"'' + #''-I"${pkgs.glib.dev}/include/glib-2.0"'' + #''-I${pkgs.glib.out}/lib/glib-2.0/include/'' + ]; + #nativeBuildInputs = with pkgs; [ rustc cargo ]; }; }); } diff --git a/shell.nix b/shell.nix index 7d956d4..1b9948c 100644 --- a/shell.nix +++ b/shell.nix @@ -19,6 +19,7 @@ pkgs.mkShell { pkgs.alsa-lib pkgs.sqlite + pkgs.sqlx-cli #pkgs.libclang # ?? ]; diff --git a/yama.old/Cargo.toml b/yama.old/Cargo.toml new file mode 100644 index 0000000..9187721 --- /dev/null +++ b/yama.old/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "yama" +version = "0.7.0-alpha.1" +authors = ["Olivier 'reivilibre' "] +edition = "2018" +description = "Deduplicated, compressed and encrypted content pile manager" + +repository = "https://bics.ga/reivilibre/yama" +license = "GPL-3.0-or-later" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +fastcdc = "1.0.6" +zstd = "0.11.2" # 0.11.2+zstd.1.5.2 +clap = { version = "3.1.18", features = ["derive"] } +blake = "2.0.2" +twox-hash = "1.5.0" +serde = { version = "1.0.104", features = ["derive"] } +serde_bare = "0.3.0" +users = "0.9.1" +crossbeam-channel = "0.5.1" +crossbeam-utils = "0.8.5" +toml = "0.5.5" +glob = "0.3.0" +nix = "0.17.0" +log = "0.4" +env_logger = "0.7.1" +indicatif = "0.14.0" +num_cpus = "1" +anyhow = "1.0" +thiserror = "1.0" + +byteorder = "1" +itertools = "0.9.0" +rayon = "1.5.0" +chrono = "0.4.19" +rustyline = "7.1.0" +derivative = "2.2.0" +metrics = "0.17.1" + + +[dev-dependencies] +temp-dir = "0.1.11" diff --git a/yama.old/README.md b/yama.old/README.md new file mode 100644 index 0000000..c8499d5 --- /dev/null +++ b/yama.old/README.md @@ -0,0 +1,25 @@ +# 山 (yama): deduplicated heap repository + +Yama is a system for storing files and directory trees in 'piles'. The data stored is deduplicated (by using content-defined chunking) and can be compressed and encrypted, too. + +NOT YET ~~Yama also permits storing to piles on remote computers, using SSH.~~ + +Yama is intended for use as a storage mechanism for backups. Datman is a tool to make it easier to use Yama for backups. + +The documentation is currently the best source of information about Yama, see the `docs` directory. + +Yama can be used as a library for your own programs; further information about this is yet to be provided but the API documentation (Rustdocs) may be useful. + +## Other, unpolished, notes + +### Training a Zstd Dictionary + +`zstd --train FILEs -o zstd.dict` + +* Candidate size: `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -printf "%s\n" | jq -s 'add'` +* Want to sample: + * `find ~/Programming -size -4k -size +64c -type f -exec grep -Iq . {} \; -exec cp {} -t /tmp/d/ \;` + * `du -sh` + * `find > file.list` + * `wc -l < file.list` → gives a № lines + * `shuf -n 4242 file.list | xargs -x zstd --train -o zstd.dict` for 4242 files. Chokes if it receives a filename with a space, just re-run until you get a working set. diff --git a/yama.old/src/bin/yama.rs b/yama.old/src/bin/yama.rs new file mode 100644 index 0000000..7ab8d0b --- /dev/null +++ b/yama.old/src/bin/yama.rs @@ -0,0 +1,311 @@ +/* +This file is part of Yama. + +Yama is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Yama is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Yama. If not, see . +*/ + +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context}; +use log::info; + +use clap::Parser; +use env_logger::Env; +use std::sync::Arc; +use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile}; +use yama::debug::{debug_command, DebugCommand}; +use yama::operations::checking::VacuumMode; +use yama::operations::legacy_pushpull::{ + determine_bypass_level, open_pile_with_work_bypass, push_to, +}; +use yama::operations::{checking, cleanup, extracting}; +use yama::pile::local_sqlitebloblogs::CompactionThresholds; +use yama::pile::{Pile, PileDescriptor, RawPile}; +use yama::{commands, debug}; + +#[derive(Parser)] +#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))] +struct Opts { + /// Chooses a different pile to be the working pile. + /// If specified, must be the name of a remote in yama.toml. + // TODO OBS? #[clap(short, long)] + // with: Option, + + #[clap(subcommand)] + command: PileCommand, +} + +#[derive(Parser)] +enum PileCommand { + /// Initialise a yama pile in this directory. + Init {}, + + /// Retrieve a pointer from the yama pile, using a named pointer name. + Retrieve { + /// Name of the pointer to retrieve. + pointer_name: String, + + /// Limited expression(s) of files to retrieve. + /// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE. + #[clap(short, long)] + subset: Option, + + destination: PathBuf, + + /// Number of extraction workers to use. Ideal value varies, but probably not much more than + /// the number of CPU threads. + #[clap(long)] + num_workers: Option, + }, + + /// Check this yama pile for corruption. + Check { + #[clap(long)] + apply_gc: bool, + + #[clap(long)] + dry_run_gc: bool, + + #[clap(long)] + deep: bool, + + #[clap(long)] + shallow: bool, + }, + + Compact { + /// Don't actually perform any compaction; just plan it out. + #[clap(long)] + dry_run: bool, + + /// Allocated size under which a bloblog is considered small. + #[clap(long = "small")] + small_thresh: Option, + + /// Minimum amount of space to reclaim in order to run compaction for reclaim. + #[clap(long = "reclaim")] + min_reclaim: Option, + + /// Maximum amount of space that can be deallocated in a bloblog before we consider it + /// worthwhile to replace. + #[clap(long = "max-dealloc")] + max_deallocated: Option, + + /// Minimum number of mergeable small bloblogs in order to run compaction for merge. + #[clap(long)] + mergeable: Option, + }, + + /// Enter a debug prompt for manually operating on the yama pile. + Debug { supplied_command: Vec }, + + /// Pushes a pointer from this pile to another pile. + Push { + /// The name of the pointer to push. + pointer_name: String, + + /// The path to the other pile to push the pointer to. + other_pile_path: PathBuf, + }, +} + +fn main() -> anyhow::Result<()> { + std::process::exit(wrapped_main()?); +} + +/// Wrapped main, returning the exit code. +/// DO NOT USE exit() in this function, because it can interfere with destructors. +/// (Destructors are needed to ensure some piles are flushed, for example.) +fn wrapped_main() -> anyhow::Result { + env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); + + let opts: Opts = Opts::parse(); + + let open_pile = || -> anyhow::Result<(PileDescriptor, Pile>)> { + let this_dir = Path::new("."); + let descriptor = + load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?; + let pile = open_pile(this_dir, &descriptor).context("Failed to open pile")?; + Ok((descriptor, pile)) + }; + + match &opts.command { + PileCommand::Retrieve { + pointer_name, + subset, + destination, + num_workers: workers, + } => { + let (_pdesc, pile) = open_pile()?; + let mut pointer = pile + .read_pointer(pointer_name)? + .expect("No pointer by that name!"); // todo be nice + + if destination.exists() { + bail!("The destination already exists. Overwriting not allowed (yet)."); + } + + let mut root_tree_node = + commands::retrieve_tree_node(&pile, pointer.chunk_ref.clone())?; + + fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?; + + let mut node_to_extract = &mut root_tree_node.node; + + if let Some(subset) = subset { + for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) { + match node_to_extract.child(path_to_descend) { + Ok(new_node) => { + node_to_extract = new_node; + } + Err(msg) => { + bail!("Can't descend into {path_to_descend:?}: {msg}"); + } + } + } + } + + // todo allow disabling apply metadata + extracting::extract( + destination, + node_to_extract, + &pile, + true, + workers.unwrap_or(2), + true, + true, + true, + )?; + } + PileCommand::Check { + apply_gc, + dry_run_gc, + deep, + shallow, + } => { + let vacuum_mode = if *dry_run_gc { + VacuumMode::DryRunVacuum + } else if *apply_gc { + VacuumMode::Vacuum + } else { + VacuumMode::NoVacuum + }; + let (_pdesc, pile) = open_pile()?; + let error_count = if *deep { + checking::check_deep(pile, vacuum_mode, true)? + } else if *shallow { + checking::check_shallow(pile, vacuum_mode, true, true)? + } else { + bail!("You need to specify either --shallow or --deep."); + }; + + if error_count > 0 { + eprintln!("THERE ARE {} ERRORS.", error_count); + return Ok(1); + } + } + PileCommand::Compact { + dry_run, + small_thresh, + min_reclaim, + max_deallocated, + mergeable, + } => { + let this_dir = Path::new("."); + let descriptor = + load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?; + cleanup::compact( + this_dir, + &descriptor, + !*dry_run, + true, + CompactionThresholds { + minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024), + minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64), + cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024), + cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024), + }, + )?; + } + PileCommand::Init {} => { + commands::init(".".as_ref())?; + } + + PileCommand::Debug { supplied_command } => { + let (pdesc, pile) = open_pile()?; + if supplied_command.is_empty() { + debug::debug_prompt(pdesc, pile)?; + } else { + let mut prefixed_command = vec![String::from("yama-debug")]; + prefixed_command.extend(supplied_command.iter().cloned()); + match DebugCommand::try_parse_from(prefixed_command) { + Ok(command) => { + if let Err(e) = debug_command(&pdesc, &pile, command) { + eprintln!("Failed {:?}", e); + pile.flush()?; + return Ok(2); + } else { + pile.flush()?; + return Ok(0); + } + } + Err(err) => { + eprintln!("Invalid {:?}", err); + return Ok(3); + } + } + } + } + + PileCommand::Push { + pointer_name, + other_pile_path, + } => { + let this_pile_path = PathBuf::from("."); + + let descriptor_this = load_pile_descriptor(".".as_ref()) + .context("Failed to load this pile descriptor")?; + + let descriptor_other = load_pile_descriptor(other_pile_path) + .context("Failed to load foreign pile descriptor.")?; + + let bypass_level = determine_bypass_level( + &descriptor_this, + &this_pile_path, + &descriptor_other, + &other_pile_path, + )?; + + info!("Using bypass level: {:?}", bypass_level); + + let (this_pile, this_rp_bypass) = + open_pile_with_work_bypass(&this_pile_path, &descriptor_this, bypass_level)?; + let (other_pile, other_rp_bypass) = + open_pile_with_work_bypass(&other_pile_path, &descriptor_other, bypass_level)?; + + // TODO flush the pile after here yet + push_to( + Arc::new(this_pile), + this_rp_bypass, + Arc::new(other_pile), + other_rp_bypass, + vec![pointer_name.clone()], + true, + 32, + )?; + } + } + + Ok(0) +} diff --git a/yama/src/chunking.rs b/yama.old/src/chunking.rs similarity index 100% rename from yama/src/chunking.rs rename to yama.old/src/chunking.rs diff --git a/yama/src/commands.rs b/yama.old/src/commands.rs similarity index 100% rename from yama/src/commands.rs rename to yama.old/src/commands.rs diff --git a/yama/src/debug.rs b/yama.old/src/debug.rs similarity index 100% rename from yama/src/debug.rs rename to yama.old/src/debug.rs diff --git a/yama/src/definitions.rs b/yama.old/src/definitions.rs similarity index 100% rename from yama/src/definitions.rs rename to yama.old/src/definitions.rs diff --git a/yama.old/src/lib.rs b/yama.old/src/lib.rs new file mode 100644 index 0000000..79c4f2f --- /dev/null +++ b/yama.old/src/lib.rs @@ -0,0 +1,10 @@ +pub mod chunking; +pub mod commands; +pub mod debug; +pub mod definitions; +pub mod operations; +pub mod pile; +pub mod progress; +pub mod remote; +pub mod tree; +pub mod utils; diff --git a/yama/src/operations.rs b/yama.old/src/operations.rs similarity index 100% rename from yama/src/operations.rs rename to yama.old/src/operations.rs diff --git a/yama/src/operations/checking.rs b/yama.old/src/operations/checking.rs similarity index 100% rename from yama/src/operations/checking.rs rename to yama.old/src/operations/checking.rs diff --git a/yama/src/operations/cleanup.rs b/yama.old/src/operations/cleanup.rs similarity index 100% rename from yama/src/operations/cleanup.rs rename to yama.old/src/operations/cleanup.rs diff --git a/yama/src/operations/extracting.rs b/yama.old/src/operations/extracting.rs similarity index 100% rename from yama/src/operations/extracting.rs rename to yama.old/src/operations/extracting.rs diff --git a/yama/src/operations/legacy_pushpull.rs b/yama.old/src/operations/legacy_pushpull.rs similarity index 100% rename from yama/src/operations/legacy_pushpull.rs rename to yama.old/src/operations/legacy_pushpull.rs diff --git a/yama/src/operations/storing.rs b/yama.old/src/operations/storing.rs similarity index 100% rename from yama/src/operations/storing.rs rename to yama.old/src/operations/storing.rs diff --git a/yama/src/pile.rs b/yama.old/src/pile.rs similarity index 99% rename from yama/src/pile.rs rename to yama.old/src/pile.rs index 78926f9..bb0ca8c 100644 --- a/yama/src/pile.rs +++ b/yama.old/src/pile.rs @@ -28,7 +28,6 @@ use std::sync::{Arc, Condvar, Mutex}; pub mod access_guard; pub mod compression; -pub mod encryption; pub mod integrity; pub mod local_sqlitebloblogs; diff --git a/yama/src/pile/access_guard.rs b/yama.old/src/pile/access_guard.rs similarity index 100% rename from yama/src/pile/access_guard.rs rename to yama.old/src/pile/access_guard.rs diff --git a/yama/src/pile/compression.rs b/yama.old/src/pile/compression.rs similarity index 100% rename from yama/src/pile/compression.rs rename to yama.old/src/pile/compression.rs diff --git a/yama/src/pile/integrity.rs b/yama.old/src/pile/integrity.rs similarity index 100% rename from yama/src/pile/integrity.rs rename to yama.old/src/pile/integrity.rs diff --git a/yama/src/pile/local_sqlitebloblogs.rs b/yama.old/src/pile/local_sqlitebloblogs.rs similarity index 100% rename from yama/src/pile/local_sqlitebloblogs.rs rename to yama.old/src/pile/local_sqlitebloblogs.rs diff --git a/yama/src/progress.rs b/yama.old/src/progress.rs similarity index 100% rename from yama/src/progress.rs rename to yama.old/src/progress.rs diff --git a/yama/src/remote.rs b/yama.old/src/remote.rs similarity index 100% rename from yama/src/remote.rs rename to yama.old/src/remote.rs diff --git a/yama/src/remote/requester.rs b/yama.old/src/remote/requester.rs similarity index 100% rename from yama/src/remote/requester.rs rename to yama.old/src/remote/requester.rs diff --git a/yama/src/remote/responder.rs b/yama.old/src/remote/responder.rs similarity index 100% rename from yama/src/remote/responder.rs rename to yama.old/src/remote/responder.rs diff --git a/yama/src/tree.rs b/yama.old/src/tree.rs similarity index 100% rename from yama/src/tree.rs rename to yama.old/src/tree.rs diff --git a/yama/src/utils.rs b/yama.old/src/utils.rs similarity index 100% rename from yama/src/utils.rs rename to yama.old/src/utils.rs diff --git a/yama/Cargo.toml b/yama/Cargo.toml index 1e1e50e..28680e4 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -11,35 +11,42 @@ license = "GPL-3.0-or-later" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -fastcdc = "1.0.6" -zstd = "0.11.2" # 0.11.2+zstd.1.5.2 -clap = { version = "3.1.18", features = ["derive"] } -blake = "2.0.2" -twox-hash = "1.5.0" -serde = { version = "1.0.104", features = ["derive"] } -serde_bare = "0.3.0" -users = "0.9.1" -crossbeam-channel = "0.5.1" -crossbeam-utils = "0.8.5" -toml = "0.5.5" -glob = "0.3.0" -nix = "0.17.0" -log = "0.4" -env_logger = "0.7.1" -indicatif = "0.14.0" -num_cpus = "1" -anyhow = "1.0" -thiserror = "1.0" -sodiumoxide = "0.2.6" -byteorder = "1" -itertools = "0.9.0" -rayon = "1.5.0" -rusqlite = "0.24.2" -chrono = "0.4.19" -rustyline = "7.1.0" -derivative = "2.2.0" -metrics = "0.17.1" +eyre = "0.6.8" +tracing = "0.1.37" +ignore = "0.4.20" +patricia_tree = "0.5.7" + +users = "0.11.0" + +serde = { version = "1.0.160", features = ["derive"] } + +yama_pile = { path = "../yama_pile" } +yama_localcache = { path = "../yama_localcache" } +yama_wormfile = { path = "../yama_wormfile" } +yama_wormfile_fs = { path = "../yama_wormfile_fs" } +yama_wormfile_s3 = { path = "../yama_wormfile_s3" } +yama_wormfile_sftp = { path = "../yama_wormfile_sftp" } +yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } + +clap = { version = "4.2.2", features = ["derive"] } + +tokio = { version = "1.27.0", features = ["io-std"] } +appdirs = "0.2.0" +twox-hash = "1.6.3" +hostname = "0.3.1" + +tracing-subscriber = { version = "0.3.16", features = ["tracing-log", "env-filter"] } +tracing-indicatif = "0.3.0" +indicatif = "0.17.3" + +dashmap = "5.4.0" +fastcdc = "3.0.3" +zstd = "0.12.3" +memmap2 = "0.5.10" +flume = "0.10.14" + +async-recursion = "1.0.4" +toml = "0.7.3" -[dev-dependencies] -temp-dir = "0.1.11" +dust_style_filetree_display = "0.8.5" diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index 7ab8d0b..cb19ab1 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -15,297 +15,599 @@ You should have received a copy of the GNU General Public License along with Yama. If not, see . */ +use clap::{Parser, Subcommand}; +use eyre::{bail, ensure, eyre, Context, ContextCompat}; +use patricia_tree::PatriciaMap; +use std::borrow::Cow; +use std::iter::Iterator; use std::path::{Path, PathBuf}; - -use anyhow::{bail, Context}; -use log::info; - -use clap::Parser; -use env_logger::Env; +use std::str::FromStr; use std::sync::Arc; -use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile}; -use yama::debug::{debug_command, DebugCommand}; -use yama::operations::checking::VacuumMode; -use yama::operations::legacy_pushpull::{ - determine_bypass_level, open_pile_with_work_bypass, push_to, +use indicatif::ProgressStyle; +use tokio::io::{stdin, AsyncBufReadExt, BufReader}; +use tracing::{info, info_span, warn, Span, Instrument}; +use tracing_indicatif::IndicatifLayer; +use tracing_indicatif::span_ext::IndicatifSpanExt; +use tracing_subscriber::filter::filter_fn; +use tracing_subscriber::Layer; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use yama::extract::flatten_treenode; +use yama::init::{generate_master_keyring, pack_keyring}; +use yama::open::{open_keyring_interactive, open_pile, pre_open_keyring, update_cache}; +use yama::pile_connector::PileConnectionScheme; +use yama::scan::create_uidgid_lookup_tables; +use yama::storing::{assemble_and_write_indices, StoragePipeline}; +use yama::{extract, get_hostname, init, PROGRESS_BAR_STYLE, scan}; +use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; +use yama_midlevel_crypto::chunk_id::ChunkIdKey; +use yama_pile::definitions::{ + PackedPileConfig, PileConfig, RecursiveChunkRef, SUPPORTED_YAMA_PILE_VERSION, }; -use yama::operations::{checking, cleanup, extracting}; -use yama::pile::local_sqlitebloblogs::CompactionThresholds; -use yama::pile::{Pile, PileDescriptor, RawPile}; -use yama::{commands, debug}; +use yama_pile::locks::LockKind; +use yama_pile::pointers::Pointer; +use yama_pile::tree::unpopulated::ScanEntry; +use yama_pile::tree::{ + assemble_tree_from_scan_entries, differentiate_node_in_place, RootTreeNode, TreeNode, +}; +use yama_pile::FILE_YAMA_CONNECTOR; -#[derive(Parser)] -#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))] -struct Opts { - /// Chooses a different pile to be the working pile. - /// If specified, must be the name of a remote in yama.toml. - // TODO OBS? #[clap(short, long)] - // with: Option, - - #[clap(subcommand)] - command: PileCommand, +#[derive(Clone, Debug)] +pub struct PileAndPointer { + pub pile_path: Option, + pub pointer: PointerName, } -#[derive(Parser)] -enum PileCommand { - /// Initialise a yama pile in this directory. - Init {}, +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct PointerName(String); - /// Retrieve a pointer from the yama pile, using a named pointer name. - Retrieve { - /// Name of the pointer to retrieve. - pointer_name: String, +impl FromStr for PointerName { + type Err = eyre::Error; - /// Limited expression(s) of files to retrieve. - /// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE. - #[clap(short, long)] - subset: Option, - - destination: PathBuf, - - /// Number of extraction workers to use. Ideal value varies, but probably not much more than - /// the number of CPU threads. - #[clap(long)] - num_workers: Option, - }, - - /// Check this yama pile for corruption. - Check { - #[clap(long)] - apply_gc: bool, - - #[clap(long)] - dry_run_gc: bool, - - #[clap(long)] - deep: bool, - - #[clap(long)] - shallow: bool, - }, - - Compact { - /// Don't actually perform any compaction; just plan it out. - #[clap(long)] - dry_run: bool, - - /// Allocated size under which a bloblog is considered small. - #[clap(long = "small")] - small_thresh: Option, - - /// Minimum amount of space to reclaim in order to run compaction for reclaim. - #[clap(long = "reclaim")] - min_reclaim: Option, - - /// Maximum amount of space that can be deallocated in a bloblog before we consider it - /// worthwhile to replace. - #[clap(long = "max-dealloc")] - max_deallocated: Option, - - /// Minimum number of mergeable small bloblogs in order to run compaction for merge. - #[clap(long)] - mergeable: Option, - }, - - /// Enter a debug prompt for manually operating on the yama pile. - Debug { supplied_command: Vec }, - - /// Pushes a pointer from this pile to another pile. - Push { - /// The name of the pointer to push. - pointer_name: String, - - /// The path to the other pile to push the pointer to. - other_pile_path: PathBuf, - }, + fn from_str(s: &str) -> Result { + if !s + .chars() + .all(|c| c.is_alphanumeric() || ['_', '+', '-'].contains(&c)) + { + bail!("Bad pointer name: {s:?}"); + } + Ok(PointerName(s.to_owned())) + } } -fn main() -> anyhow::Result<()> { - std::process::exit(wrapped_main()?); -} +impl FromStr for PileAndPointer { + type Err = eyre::Error; -/// Wrapped main, returning the exit code. -/// DO NOT USE exit() in this function, because it can interfere with destructors. -/// (Destructors are needed to ensure some piles are flushed, for example.) -fn wrapped_main() -> anyhow::Result { - env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); - - let opts: Opts = Opts::parse(); - - let open_pile = || -> anyhow::Result<(PileDescriptor, Pile>)> { - let this_dir = Path::new("."); - let descriptor = - load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?; - let pile = open_pile(this_dir, &descriptor).context("Failed to open pile")?; - Ok((descriptor, pile)) - }; - - match &opts.command { - PileCommand::Retrieve { - pointer_name, - subset, - destination, - num_workers: workers, - } => { - let (_pdesc, pile) = open_pile()?; - let mut pointer = pile - .read_pointer(pointer_name)? - .expect("No pointer by that name!"); // todo be nice - - if destination.exists() { - bail!("The destination already exists. Overwriting not allowed (yet)."); - } - - let mut root_tree_node = - commands::retrieve_tree_node(&pile, pointer.chunk_ref.clone())?; - - fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?; - - let mut node_to_extract = &mut root_tree_node.node; - - if let Some(subset) = subset { - for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) { - match node_to_extract.child(path_to_descend) { - Ok(new_node) => { - node_to_extract = new_node; - } - Err(msg) => { - bail!("Can't descend into {path_to_descend:?}: {msg}"); - } - } - } - } - - // todo allow disabling apply metadata - extracting::extract( - destination, - node_to_extract, - &pile, - true, - workers.unwrap_or(2), - true, - true, - true, - )?; - } - PileCommand::Check { - apply_gc, - dry_run_gc, - deep, - shallow, - } => { - let vacuum_mode = if *dry_run_gc { - VacuumMode::DryRunVacuum - } else if *apply_gc { - VacuumMode::Vacuum - } else { - VacuumMode::NoVacuum - }; - let (_pdesc, pile) = open_pile()?; - let error_count = if *deep { - checking::check_deep(pile, vacuum_mode, true)? - } else if *shallow { - checking::check_shallow(pile, vacuum_mode, true, true)? - } else { - bail!("You need to specify either --shallow or --deep."); - }; - - if error_count > 0 { - eprintln!("THERE ARE {} ERRORS.", error_count); - return Ok(1); - } - } - PileCommand::Compact { - dry_run, - small_thresh, - min_reclaim, - max_deallocated, - mergeable, - } => { - let this_dir = Path::new("."); - let descriptor = - load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?; - cleanup::compact( - this_dir, - &descriptor, - !*dry_run, - true, - CompactionThresholds { - minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024), - minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64), - cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024), - cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024), - }, - )?; - } - PileCommand::Init {} => { - commands::init(".".as_ref())?; - } - - PileCommand::Debug { supplied_command } => { - let (pdesc, pile) = open_pile()?; - if supplied_command.is_empty() { - debug::debug_prompt(pdesc, pile)?; - } else { - let mut prefixed_command = vec![String::from("yama-debug")]; - prefixed_command.extend(supplied_command.iter().cloned()); - match DebugCommand::try_parse_from(prefixed_command) { - Ok(command) => { - if let Err(e) = debug_command(&pdesc, &pile, command) { - eprintln!("Failed {:?}", e); - pile.flush()?; - return Ok(2); - } else { - pile.flush()?; - return Ok(0); - } - } - Err(err) => { - eprintln!("Invalid {:?}", err); - return Ok(3); - } - } - } - } - - PileCommand::Push { - pointer_name, - other_pile_path, - } => { - let this_pile_path = PathBuf::from("."); - - let descriptor_this = load_pile_descriptor(".".as_ref()) - .context("Failed to load this pile descriptor")?; - - let descriptor_other = load_pile_descriptor(other_pile_path) - .context("Failed to load foreign pile descriptor.")?; - - let bypass_level = determine_bypass_level( - &descriptor_this, - &this_pile_path, - &descriptor_other, - &other_pile_path, - )?; - - info!("Using bypass level: {:?}", bypass_level); - - let (this_pile, this_rp_bypass) = - open_pile_with_work_bypass(&this_pile_path, &descriptor_this, bypass_level)?; - let (other_pile, other_rp_bypass) = - open_pile_with_work_bypass(&other_pile_path, &descriptor_other, bypass_level)?; - - // TODO flush the pile after here yet - push_to( - Arc::new(this_pile), - this_rp_bypass, - Arc::new(other_pile), - other_rp_bypass, - vec![pointer_name.clone()], - true, - 32, - )?; + fn from_str(s: &str) -> Result { + match s.split_once(":") { + None => Ok(PileAndPointer { + pile_path: None, + pointer: PointerName::from_str(s)?, + }), + Some((pile_path, pointer)) => Ok(PileAndPointer { + pile_path: Some(PathBuf::from(pile_path)), + pointer: PointerName::from_str(pointer)?, + }), + } + } +} + +#[derive(Clone, Debug)] +pub struct PileAndPointerWithSubTree { + pub pile_path: Option, + pub pointer: PointerName, + // TODO how to represent... + pub sub_tree: String, +} + +impl FromStr for PileAndPointerWithSubTree { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + let (pile_path, pointer_and_subtree) = match s.split_once(":") { + None => (None, s), + Some((pile_path, pointer)) => (Some(PathBuf::from(pile_path)), pointer), + }; + + if let Some(slash) = pointer_and_subtree.find('/') { + Ok(PileAndPointerWithSubTree { + pile_path, + pointer: PointerName::from_str(&pointer_and_subtree[0..slash])?, + sub_tree: pointer_and_subtree[slash + 1..].to_owned(), + }) + } else { + Ok(PileAndPointerWithSubTree { + pile_path, + pointer: PointerName::from_str(&pointer_and_subtree)?, + sub_tree: String::new(), + }) + } + } +} + +#[derive(Parser, Clone, Debug)] +pub enum YamaCommand { + /// Create a new Yama pile. + Init { + #[arg(long)] + sftp: bool, + #[arg(long)] + s3: bool, + #[arg(long)] + no_password: bool, + + #[arg(long)] + zstd_dict: Option, + #[arg(long)] + no_zstd_dict: bool, + }, + + /// Keyring management commands; see `yama keyring --help`. + #[command(subcommand)] + Keyring(KeyringCommand), + + /// Store a file, directory or input stream to a Yama pile. + Store { + source: PathBuf, + destination: PileAndPointer, + + #[arg(long)] + stdin: bool, + + #[arg(long)] + overwrite: bool, + + /// Name of a parent pointer to use, if storing an incremental update. + /// Although optional, this can make storing time much faster as it can prevent unchanged + /// files from being rescanned. + #[arg(short = 'p', long)] + parent: Option, + }, + + /// Extract a file, directory or output stream from a Yama pile. + Extract { + source: PileAndPointerWithSubTree, + destination: PathBuf, + + #[arg(long)] + stdout: bool, + + #[arg(long)] + overwrite: bool, + }, + + // TODO Mount { ... }, + Check { + #[arg(long, short = '1')] + pointers: bool, + + #[arg(long, short = '2')] + shallow: bool, + + #[arg(long, short = '9')] + intensive: bool, + }, + // TODO lsp, rmp + + // TODO vacuum + + // TODO `locks` to inspect locks +} + +#[derive(Subcommand, Clone, Debug)] +pub enum KeyringCommand { + Inspect { + file: PathBuf, + }, + Create { + /// Where to put the new keyring. + new: PathBuf, + + /// What keyring to use to copy keys from. Default is `master.yamakeyring`. + #[arg(long)] + from: Option, + + /// Which keys to put into the new keyring. + #[arg(long)] + with: String, + + /// Don't password-protect the new keyring. + /// It goes without saying that you should never disclose an unprotected keyring to anyone. + #[arg(long)] + no_password: bool, + + /// Use weaker, faster password protection for the new keyring. + /// You should use different passwords for weak and strong keyrings! + /// This may be suitable if the keyring is kept in a moderately safe environment, but + /// prefer a strongly-protected keyring if the file might be reasonably accessible by + /// enemies. + #[arg(long)] + weak: bool, + }, // TODO ChangePassword +} + +const PROGRESS_SPANS: &'static [&'static str] = &["store_file", "storing", "unpack_files", "expand_chunkrefs", "extract_files"]; + +#[tokio::main] +async fn main() -> eyre::Result<()> { + let indicatif_layer = IndicatifLayer::new(); + let stderr_writer = indicatif_layer.get_stderr_writer(); + let indicatif_layer = indicatif_layer + .with_filter(filter_fn(|span_metadata| { + span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name()) + })); + + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "sqlx=warn,yama=debug,info".into()), + ) + .with(tracing_subscriber::fmt::layer().with_writer(stderr_writer)) + .with(indicatif_layer) + .init(); + + match dbg!(YamaCommand::parse()) { + YamaCommand::Init { + sftp, + s3, + no_password, + zstd_dict, + no_zstd_dict, + } => { + if zstd_dict.is_some() && no_zstd_dict { + bail!("Can only choose one of --zstd-dict or --no-zstd-dict!"); + } + if zstd_dict.is_none() && !no_zstd_dict { + bail!("Must choose one of --zstd-dict or --no-zstd-dict!"); + } + + // Begin. Create a pile in the current directory. + let pile_path = PathBuf::from("."); + + // Check we are happy to make a pile or connector here. + init::pre_init_check(&pile_path).await?; + + let connection_scheme = set_up_connection(sftp, s3, &pile_path).await?; + let connection = connection_scheme + .connect_to_wormfileprovider() + .await + .context("Failed to open pile destination")?; + + info!("Pile destination seems OK; can proceed to set up pile."); + + let master_password = if no_password { + warn!("Not setting a master password. The master keyring will be unprotected."); + None + } else { + println!("enter master password:"); + let stdin = stdin(); + let mut stdin_br = BufReader::new(stdin); + let mut line = String::new(); + stdin_br.read_line(&mut line).await?; + Some(line.trim().to_owned()) + }; + + let master_keyring = generate_master_keyring(); + let master_key_packed = pack_keyring( + master_keyring.clone(), + master_password.as_ref().map(|s| s.as_ref()), + )?; + + let zstd_dict = if no_zstd_dict { + None + } else { + let zstd_dict_path = zstd_dict.unwrap(); + Some(Arc::new(tokio::fs::read(&zstd_dict_path) + .await + .with_context(|| format!("failed to read Zstd dict at {zstd_dict_path:?}"))?)) + }; + + let pile_config = PileConfig { + yama_pile_version: SUPPORTED_YAMA_PILE_VERSION.to_owned(), + chunk_id_key: ChunkIdKey::new_rand(), + zstd_dict, + }; + let packed_pile_config: PackedPileConfig = master_keyring + .w_config + .unwrap() + .make_locked_asymbox(CborSerde::serialise(&pile_config).unwrap()); + + let make_pileside_copy_of_master_key = !(sftp || s3); // TODO ask for these + + init::init_pile( + &connection, + packed_pile_config, + if make_pileside_copy_of_master_key { + Some(master_key_packed.clone()) + } else { + None + }, + ) + .await?; + + if sftp || s3 { + // For remote piles: save a master keyring copy locally + tokio::fs::write( + pile_path.join("master.yamakeyring"), + &master_key_packed.into_byte_vec(), + ) + .await + .context("failed to make local copy of master keyring")?; + tokio::fs::write( + pile_path.join(FILE_YAMA_CONNECTOR), + toml::to_string_pretty(&connection_scheme) + .context("failed to serialise connector")?, + ) + .await + .context("failed to write connector")?; + } + } + YamaCommand::Store { + source, + destination, + stdin, + overwrite, + parent, + } => { + ensure!(!stdin, "stdin not supported yet"); + let pile_connector_path = destination.pile_path.unwrap_or(PathBuf::from(".")); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), destination.pointer), + ) + .await?; + update_cache(&pwc).await?; + + let parent_pointer = if let Some(ref parent) = parent { + let pointer = pwc + .read_pointer_fully_integrated(parent.0.as_str()) + .await + .with_context(|| format!("whilst reading parent pointer: {parent:?}"))? + .with_context(|| { + format!("it appears that the pointer {parent:?} does not exist") + })?; + assert!(pointer.parent.is_none()); + Some(pointer) + } else { + None + }; + + let source2 = source.clone(); + let scan_entry_map = tokio::task::spawn_blocking(move || { + scan::scan(&source2, &Vec::new()).context("Failed to scan") + }) + .await??; + + let pwc = Arc::new(pwc); + + let (mut chunk_file_map, pruned_scan_entry_map) = + if let Some(ref parent_node) = parent_pointer { + let (cfm, pruned) = + scan::prepopulate_unmodified(&parent_node.root.node, &scan_entry_map); + + (cfm, Cow::Owned(pruned)) + } else { + ( + PatriciaMap::<(RecursiveChunkRef, u64)>::new(), + Cow::Borrowed(&scan_entry_map), + ) + }; + + let store_span = info_span!("storing"); + // store_span.pb_set_style(&ProgressStyle::default_bar()); + store_span.pb_set_style(&ProgressStyle::default_bar().template( + PROGRESS_BAR_STYLE, + ).unwrap()); + store_span.pb_set_message("storing files"); + store_span.pb_set_length(pruned_scan_entry_map.values() + .filter(|v| matches!(v, ScanEntry::NormalFile { .. })).count() as u64); + let store_span_entered = store_span.enter(); + + let (pipeline, pipeline_job_tx) = StoragePipeline::launch_new(4, pwc.clone()).await?; + + let source2 = source.clone(); + let (submitter_task, receiver_task) = tokio::join!( + async move { + let pipeline_job_tx = pipeline_job_tx; + for (name_bytes, scan_entry) in pruned_scan_entry_map.iter() { + if let ScanEntry::NormalFile { .. } = scan_entry { + let name = std::str::from_utf8(name_bytes.as_slice()) + .context("name is not str")?; + pipeline_job_tx + .send_async((name.to_owned(), source2.join(name))) + .await + .map_err(|_| eyre!("unable to send to pipeline."))?; + } + } + drop(pipeline_job_tx); + Ok::<_, eyre::Report>(()) + }, + async { + while let Ok((job_id, rec_chunk_ref, real_size)) = pipeline.next_result().await + { + chunk_file_map.insert_str(&job_id, (rec_chunk_ref, real_size)); + Span::current().pb_inc(1); + } + // eprintln!("fin rec"); + Ok::<_, eyre::Report>(()) + } + ); + submitter_task?; + receiver_task?; + + drop(store_span_entered); + drop(store_span); + + info!("All files stored, writing indices..."); + + // Write indices for the new bloblogs we have created. This is a prerequisite for creating a pointer. + let chunkmaps = pipeline + .finish_into_chunkmaps() + .await + .context("failed to finish into chunkmaps")?; + assemble_and_write_indices(&pwc, chunkmaps) + .await + .context("failed to assemble and write indices")?; + + info!("All indices stored, writing pointer..."); + + // Assemble and write a pointer + let mut tree = assemble_tree_from_scan_entries(scan_entry_map, chunk_file_map) + .context("failed to assemble tree")?; + let (uids, gids) = + create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; + + if let Some(ref parent_node) = parent_pointer { + differentiate_node_in_place(&mut tree, &parent_node.root.node) + .context("failed to differentiate?")?; + } + + pwc.pile + .write_pointer( + destination.pointer.0.as_str(), + overwrite, + &Pointer { + parent: parent.as_ref().map(|p| p.0.clone()), + root: RootTreeNode { + name: source + .file_name() + .map(|oss| oss.to_str()) + .flatten() + .unwrap_or("") + .to_owned(), + node: tree, + }, + uids, + gids, + }, + ) + .await + .context("failed to write pointer")?; + + Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; + } + YamaCommand::Extract { + source, + destination, + stdout, + overwrite, + } => { + ensure!(!stdout, "stdout not supported yet"); + let pile_connector_path = source + .pile_path + .as_ref() + .map(|p| p.as_ref()) + .unwrap_or(Path::new(".")); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = Arc::new(open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), source.pointer), + ) + .await?); + update_cache(&pwc).await?; + + let pointer = pwc + .read_pointer_fully_integrated(source.pointer.0.as_str()) + .await + .context("failed to read pointer")? + .with_context(|| { + format!( + "it appears that the pointer {:?} does not exist", + source.pointer + ) + })?; + assert!(pointer.parent.is_none()); + + let node = if source.sub_tree.is_empty() { + &pointer.root.node + } else { + let mut current = &pointer.root.node; + for subpath in source.sub_tree.split('/') { + if let TreeNode::Directory { children, .. } = current { + current = children.get(subpath).with_context(|| { + format!("can't descend into {subpath}: doesn't exist in directory.") + })?; + } else { + bail!("can't descend into {subpath}; parent isn't a directory..."); + } + } + current + }; + + let flat = flatten_treenode(&node)?; + + extract::unpack_nonfiles(&destination, &flat.nonfiles, false, true).await?; + + let extract_span = info_span!("extract_files"); + extract::unpack_files(&pwc, &destination, &flat.files, false, true) + .instrument(extract_span) + .await?; + + Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; + } + other => todo!(), + } + + Ok(()) +} + +async fn set_up_connection( + sftp: bool, + s3: bool, + local_dir: &Path, +) -> eyre::Result { + let stdin = stdin(); + let mut stdin_br = BufReader::new(stdin); + let mut line = String::new(); + + match (sftp, s3) { + (true, true) => { + bail!("Can only choose one of --sftp or --s3 (or local)!"); + } + (true, false) => { + // SFTP + println!("Enter user@host for SFTP:"); + stdin_br.read_line(&mut line).await?; + let user_at_host = line.trim().to_owned(); + line.clear(); + + println!("Enter remote path (can be relative to user home):"); + stdin_br.read_line(&mut line).await?; + let remote_path = line.trim().to_owned(); + line.clear(); + + println!( + "Enter password, or blank if not required (due to use of key authentication):" + ); + stdin_br.read_line(&mut line).await?; + let password = line.trim().to_owned(); + line.clear(); + + Ok(PileConnectionScheme::Sftp { + user_at_host, + password, + directory: remote_path, + }) + } + (false, true) => { + // S3 + todo!() + } + (false, false) => { + // Local filesystem + Ok(PileConnectionScheme::Local { + directory: local_dir.to_owned(), + }) } } - - Ok(0) } diff --git a/yama/src/bin/yamascan.rs b/yama/src/bin/yamascan.rs new file mode 100644 index 0000000..6b88a87 --- /dev/null +++ b/yama/src/bin/yamascan.rs @@ -0,0 +1,234 @@ +/* +This file is part of Yama. + +Yama is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Yama is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Yama. If not, see . +*/ + +use std::collections::BTreeMap; +use std::io::SeekFrom; +use std::path::{Path, PathBuf}; + +use clap::Parser; +use dust_style_filetree_display::display::{draw_it, InitialDisplayData}; + +use dust_style_filetree_display::filter::AggregateData; +use dust_style_filetree_display::node::Node; +use dust_style_filetree_display::{get_height_of_terminal, get_width_of_terminal, init_color}; +use eyre::{bail, Context, ContextCompat}; +use patricia_tree::PatriciaMap; +use tokio::fs::OpenOptions; +use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; + +use yama::scan; + +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; + +use yama_pile::tree::unpopulated::ScanEntry; + +#[derive(Parser, Clone, Debug)] +pub enum YamaScanCommand { + /// Add an entry to an ignore file + #[command(alias = "i")] + Ignore { + /// What to ignore + path: String, + + /// Don't anchor the match to this directory. + #[arg(short = 'a')] + unanchored: bool, + }, + + /// Show dust-style usage graph of the current directory, excluding excluded files. + #[command(alias = "du")] + Usage {}, +} + +#[tokio::main] +async fn main() -> eyre::Result<()> { + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "sqlx=warn,yama=debug,info".into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + match YamaScanCommand::parse() { + YamaScanCommand::Usage {} => { + let idd = InitialDisplayData { + short_paths: true, + is_reversed: false, + colors_on: !init_color(false), + by_filecount: false, + is_screen_reader: false, + iso: false, + }; + + let scan = scan::scan(Path::new("."), &Vec::new()).context("Couldn't scan")?; + let top_nodes = assemble_display_tree_from_scan_entries(scan)?.children; + + let root_display_node = dust_style_filetree_display::filter::get_biggest( + top_nodes, + AggregateData { + min_size: None, + only_dir: false, + only_file: false, + number_of_lines: get_height_of_terminal(), + depth: usize::MAX, + using_a_filter: false, + }, + ) + .expect("no root?"); + + draw_it( + idd, + false, + get_width_of_terminal(), + &root_display_node, + false, + ) + }, + YamaScanCommand::Ignore { + path, unanchored + } => { + let mut oo = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(".yamaignore").await + .context("failed to open .yamaignore for r/w")?; + let pos = oo.seek(SeekFrom::End(0)).await?; + if pos > 1 { + oo.seek(SeekFrom::End(-1)).await?; + let last_byte = oo.read_u8().await?; + if last_byte != b'\n' { + oo.write_u8(b'\n').await?; + } + } + + if unanchored { + oo.write_all(format!("{}\n", path).as_bytes()).await?; + } else { + oo.write_all(format!("/{}\n", path).as_bytes()).await?; + } + + oo.flush().await?; + drop(oo); + }, + _other => todo!(), + } + + Ok(()) +} + +pub fn assemble_display_tree_from_scan_entries(scan: PatriciaMap) -> eyre::Result { + let mut dirs: BTreeMap> = BTreeMap::new(); + // special-case the root ("") + dirs.insert(String::new(), BTreeMap::new()); + + for (key, entry) in scan.into_iter() { + let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?; + let (parent_dir_name, child_name) = + key_string.rsplit_once('/').unwrap_or(("", &key_string)); + match entry { + ScanEntry::NormalFile { size, .. } => { + // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + Node { + name: PathBuf::from(&key_string), + size, + children: vec![], + inode_device: None, + depth: 0, + }, + ); + } + ScanEntry::Directory { + ownership: _, + permissions: _, + } => { + dirs.insert(key_string.clone(), BTreeMap::new()); + // note: for the root, this inserts the root directory entry as a child called "" within the root. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + Node { + name: PathBuf::from(&key_string), + size: 4096, + children: vec![], + inode_device: None, + depth: 0, + }, + ); + } + ScanEntry::SymbolicLink { + ownership: _, + target: _, + } => { + // note: for the root, this inserts the root symlink entry as a child called "" within a fake root 'directory'. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + Node { + name: PathBuf::from(&key_string), + size: 4096, + children: vec![], + inode_device: None, + depth: 0, + }, + ); + } + } + } + + // Now roll up the directories. In Rustc v1.66 it'd be nice to use pop_last()... + while let Some(last_key) = dirs.keys().last().cloned() { + let mut last_children = dirs.remove(&last_key).unwrap(); + if last_key.is_empty() { + assert!( + dirs.is_empty(), + "when pulling out root pseudo-dir, dirs must be empty for roll-up." + ); + + let mut real_root = last_children.remove("").unwrap(); + real_root.children = last_children.into_values().collect(); + real_root.size += real_root.children.iter().map(|c| c.size).sum::(); + return Ok(real_root); + } + + // We want to roll up the directory last/key -> {child -> ...} + // so last -> {key -> {child -> ...}} + let (parent_dir, child_name) = last_key.rsplit_once('/').unwrap_or(("", &last_key)); + let parent = dirs + .get_mut(parent_dir) + .context("bad PMap? no parent in rollup")?; + let child_in_parent = parent + .get_mut(child_name) + .context("dir child not populated")?; + child_in_parent.children = last_children.into_values().collect(); + child_in_parent.size += child_in_parent.children.iter().map(|c| c.size).sum::(); + } + + bail!("no root found; bad PMap or bad roll-up???"); +} diff --git a/yama/src/extract.rs b/yama/src/extract.rs new file mode 100644 index 0000000..9549c85 --- /dev/null +++ b/yama/src/extract.rs @@ -0,0 +1,416 @@ +use crate::pile_with_cache::PileWithCache; +use crate::retriever::decompressor::PipelineDecompressor; +use crate::retriever::{create_fixed_retriever, FileId, JobChunkReq, JobId, RetrieverResp}; +use eyre::{bail, ensure, Context, ContextCompat, eyre}; +use flume::Receiver; +use patricia_tree::PatriciaMap; +use std::cmp::Reverse; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::Permissions; +use std::os::unix::fs::PermissionsExt; +use std::path::PathBuf; +use std::sync::Arc; +use indicatif::ProgressStyle; +use tokio::fs::OpenOptions; +use tokio::io::AsyncWriteExt; +use tokio::task::JoinSet; +use tracing::{info_span, Instrument, Span}; +use tracing_indicatif::span_ext::IndicatifSpanExt; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::definitions::{BloblogId, RecursiveChunkRef}; +use yama_pile::tree::unpopulated::ScanEntry; +use yama_pile::tree::{FilesystemPermissions, TreeNode}; +use yama_wormfile::boxed::BoxedWormFileProvider; +use crate::PROGRESS_BAR_STYLE; + +#[derive(Clone, Debug, Default)] +pub struct FlattenedTree { + pub files: PatriciaMap<(ScanEntry, RecursiveChunkRef)>, + pub nonfiles: PatriciaMap, +} + +pub fn flatten_treenode(root_node: &TreeNode) -> eyre::Result { + let mut flat = FlattenedTree::default(); + + root_node.visit( + &mut |node, path| { + match node { + TreeNode::NormalFile { + mtime, + ownership, + permissions, + size, + content, + } => { + flat.files.insert( + path, + ( + ScanEntry::NormalFile { + mtime: *mtime, + ownership: *ownership, + permissions: *permissions, + size: *size, + }, + *content, + ), + ); + } + TreeNode::Directory { + ownership, + permissions, + children: _, + } => { + flat.nonfiles.insert( + path, + ScanEntry::Directory { + ownership: *ownership, + permissions: *permissions, + }, + ); + } + TreeNode::SymbolicLink { ownership, target } => { + flat.nonfiles.insert( + path, + ScanEntry::SymbolicLink { + ownership: *ownership, + target: target.clone(), + }, + ); + } + TreeNode::Deleted => { + bail!("unexpected TreeNode::Deleted in flatten_treenode"); + } + } + + Ok(()) + }, + String::new(), + )?; + + Ok(flat) +} + +/// Create directories and symbolic links. +pub async fn unpack_nonfiles( + root: &PathBuf, + nonfiles: &PatriciaMap, + restore_ownership: bool, + restore_permissions: bool, +) -> eyre::Result<()> { + if restore_ownership { + bail!("restoring ownership is not yet supported..."); + } + for (rel_path, scan_entry) in nonfiles.iter() { + let path = root + .join(String::from_utf8(rel_path).context("nonfiles map contains non-string keys?")?); + + match scan_entry { + ScanEntry::NormalFile { .. } => { + bail!("found NormalFile in unpack_nonfiles()"); + } + ScanEntry::Directory { + ownership: _, + permissions, + } => { + tokio::fs::create_dir(&path).await?; + if restore_permissions { + tokio::fs::set_permissions(&path, Permissions::from_mode(permissions.mode)) + .await?; + } + } + ScanEntry::SymbolicLink { + ownership: _, + target, + } => { + tokio::fs::symlink(target, &path).await?; + } + } + } + Ok(()) +} + +// TODO(perf): move out file writes into separate tasks... +pub async fn unpack_files( + pwc: &Arc>, + root: &PathBuf, + files: &PatriciaMap<(ScanEntry, RecursiveChunkRef)>, + restore_ownership: bool, + restore_permissions: bool, +) -> eyre::Result<()> { + if restore_ownership { + bail!("restoring ownership is not yet supported..."); + } + let expanded_chunkrefs = expand_chunkrefs( + pwc, + files + .iter() + .map(|(path_bytes, (scan_entry, rcr))| ((path_bytes, scan_entry), *rcr)), + ) + .await?; + + let total_chunks = expanded_chunkrefs.iter().map(|(_, cs)| cs.len() as u64).sum::(); + let unpack_span = info_span!("unpack_files"); + + async move { + let unpack_span = Span::current(); + unpack_span.pb_set_style(&ProgressStyle::default_bar().template( + PROGRESS_BAR_STYLE, + ).unwrap()); + unpack_span.pb_set_message("unpack"); + unpack_span.pb_set_length(total_chunks); + + let mut join_set = JoinSet::new(); + + let (file_part_retriever, mut jobs) = + lookup_chunkrefs_and_create_retriever(pwc, expanded_chunkrefs).await?; + let mut open_files = BTreeMap::new(); + + loop { + tokio::select! { + Ok(next_part) = file_part_retriever.recv_async() => { + match next_part { + RetrieverResp::Blob { job, subjob, blob } => { + if subjob == 0 { + // eprintln!("subjob 0 for job {job:?}"); + let (path_bytes, scan_entry) = jobs + .remove(&job) + .with_context(|| format!("bad job {job:?} to extract"))?; + + let (permissions, _ownership) = if let ScanEntry::NormalFile { + permissions, + ownership, + .. + } = scan_entry + { + (permissions, ownership) + } else { + bail!("not a Normal File in unpack_files()"); + }; + + let path = root.join(String::from_utf8(path_bytes).context("bad utf-8 in PM")?); + + let (tx, rx) = flume::bounded(16); + + join_set.spawn(file_unpacker_writer(path, *permissions, restore_permissions, rx)); + open_files.insert(job, tx); + } + open_files + .get_mut(&job) + .context("bad job to write file")? + .send_async(Some(blob)) + .await + .map_err(|_| eyre!("file tx shutdown"))?; + + unpack_span.pb_inc(1); + } + RetrieverResp::JobComplete(job) => { + open_files + .remove(&job) + .context("bad job to finish file")? + .send_async(None) + .await + .map_err(|_| eyre!("file tx shutdown"))?; + } + } + }, + Some(join_result) = join_set.join_next() => { + join_result + .context("failed file unpacker writer (a)")? + .context("failed file unpacker writer (b)")?; + }, + else => { + break; + } + } + } + + // we should have already drained the join set, but check... + assert!(join_set.join_next().await.is_none()); + + if !open_files.is_empty() || !jobs.is_empty() { + bail!("There were errors extracting."); + } + + Ok(()) + }.instrument(unpack_span).await +} + +async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, restore_permissions: bool, rx: Receiver>>) -> eyre::Result<()> { + let mut oo = OpenOptions::new(); + oo.write(true).create_new(true); + if restore_permissions { + oo.mode(permissions.mode); + }; + let mut file = oo + .open(&path) + .await + .with_context(|| format!("can't create {path:?}"))?; + + loop { + match rx.recv_async().await { + Ok(Some(next_block)) => { + file.write_all(&next_block) + .await?; + }, + Ok(None) => { + file.flush() + .await + .context("failed to flush")?; + return Ok(()); + }, + Err(_) => { + bail!("rx for file unpacking into {path:?} disconnected unexpectedly"); + } + } + } +} + +async fn expand_chunkrefs( + pwc: &Arc>, + chunkrefs: impl Iterator, +) -> eyre::Result)>> { + let mut by_depth = BTreeMap::, Vec<(T, Vec)>>::new(); + for (t, rec) in chunkrefs { + by_depth + .entry(Reverse(rec.depth)) + .or_default() + .push((t, vec![rec.chunk_id])); + } + + while let Some(Reverse(next_depth)) = by_depth.keys().next().cloned() { + let ts_and_chunks = by_depth.remove(&Reverse(next_depth)).unwrap(); + + if next_depth == 0 { + return Ok(ts_and_chunks); + } + + let ec_span = info_span!("expand_chunkrefs"); + ec_span.pb_set_style(&ProgressStyle::default_bar().template( + PROGRESS_BAR_STYLE, + ).unwrap()); + ec_span.pb_set_length(ts_and_chunks.iter().map(|(_, cs)| cs.len() as u64).sum::()); + ec_span.pb_set_message(&format!("resolve (d={next_depth})")); + let expanded_ts_and_chunks = expand_chunkrefs_one_layer(pwc, ts_and_chunks) + .instrument(ec_span).await?; + by_depth + .entry(Reverse(next_depth - 1)) + .or_default() + .extend(expanded_ts_and_chunks); + } + + Ok(Vec::new()) +} + +async fn lookup_chunkrefs_and_create_retriever( + pwc: &Arc>, + input: Vec<(T, Vec)>, +) -> eyre::Result<(Receiver, BTreeMap)> { + let mut next_job_id = JobId(0); + + let chunks_to_lookup: BTreeSet = input + .iter() + .flat_map(|(_t, chunkids)| chunkids) + .copied() + .collect(); + + let looked_up_chunks = pwc + .localcache + .read() + .await? + .locate_chunks(&chunks_to_lookup) + .await?; + ensure!( + chunks_to_lookup.len() == looked_up_chunks.len(), + "chunks are missing" + ); + + let bloblog_ids: BTreeSet = looked_up_chunks.values().map(|(bi, _)| *bi).collect(); + let num_bloblogs = bloblog_ids.len(); + let bloblog_to_file_ids: BTreeMap = bloblog_ids + .into_iter() + .zip((0..num_bloblogs as u32).map(FileId)) + .collect(); + let files: BTreeMap = + bloblog_to_file_ids.iter().map(|(&k, &v)| (v, k)).collect(); + + let mut out_by_job = BTreeMap::::new(); + let mut jobs = BTreeMap::>::new(); + for (t, chunks) in input { + let job_id = next_job_id; + next_job_id.0 += 1; + out_by_job.insert(job_id, t); + jobs.insert( + job_id, + chunks + .into_iter() + .map(|c| { + let (bloblog_id, blob_locator) = &looked_up_chunks[&c]; + JobChunkReq { + file: bloblog_to_file_ids[bloblog_id], + offset: blob_locator.offset, + length: blob_locator.length, + } + }) + .collect(), + ); + } + + let retriever = create_fixed_retriever(pwc.clone(), jobs, files, 8)?; + let retriever = + PipelineDecompressor::start(pwc.pile.pile_config.zstd_dict.clone(), 2, retriever)?; + Ok((retriever, out_by_job)) +} + +async fn expand_chunkrefs_one_layer( + pwc: &Arc>, + input: Vec<(T, Vec)>, +) -> eyre::Result)>> { + let (retriever, jobs_to_ts) = lookup_chunkrefs_and_create_retriever(pwc, input).await?; + + let mut out_by_job: BTreeMap)> = jobs_to_ts + .into_iter() + .map(|(ji, t)| (ji, (t, Vec::new()))) + .collect(); + + let mut num_jobs_left = out_by_job.len(); + + while let Ok(result) = retriever.recv_async().await { + match result { + RetrieverResp::Blob { + job, + subjob: _, + blob, + } => { + out_by_job + .get_mut(&job) + .context("bad job gm")? + .1 + .extend_from_slice(&blob); + Span::current().pb_inc(1); + } + RetrieverResp::JobComplete(_) => { + num_jobs_left -= 1; + } + } + } + + ensure!(num_jobs_left == 0, "jobs left over, recovery not complete"); + + out_by_job + .into_values() + .map(|(t, bytes)| { + let chunk_ids = bytes + .chunks(32) + .map(|b| { + if b.len() != 32 { + bail!("wrong number of bytes for chunk refs"); + } + let mut b32 = [0u8; 32]; + b32.copy_from_slice(b); + Ok(ChunkId::from(b32)) + }) + .collect::>()?; + Ok((t, chunk_ids)) + }) + .collect() +} diff --git a/yama/src/init.rs b/yama/src/init.rs new file mode 100644 index 0000000..533c54c --- /dev/null +++ b/yama/src/init.rs @@ -0,0 +1,110 @@ +use eyre::{bail, Context, ContextCompat}; +use std::path::Path; +use tokio::io::AsyncWriteExt; +use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; +use yama_midlevel_crypto::key_derivation::KeyDerivationParameters; +use yama_midlevel_crypto::sym_box::SymBox; +use yama_pile::definitions::{PackedKeyring, PackedPileConfig, UnlockedOrLockedKeyring}; +use yama_pile::keyring::{generate_r_w_keys, Keyring}; +use yama_pile::{DIR_BLOBLOGS, DIR_INDICES, DIR_LOCKS, FILE_MASTER_KEYRING, FILE_YAMA_CONFIG}; +use yama_wormfile::paths::WormPath; +use yama_wormfile::{WormFileProvider, WormFileWriter}; + +/// Perform checks before we init a pile in the given directory. +pub async fn pre_init_check(path: &Path) -> eyre::Result<()> { + if path.exists() && !path.is_dir() { + bail!("{path:?} is not a directory; cannot create pile or connector here."); + } + + for important_path in [ + "yama.toml", + DIR_BLOBLOGS, + DIR_LOCKS, + FILE_YAMA_CONFIG, + DIR_INDICES, + ] { + let important_path = path.join(important_path); + if important_path.exists() { + bail!("{important_path:?} already exists: can't create pile or connector here."); + } + } + + Ok(()) +} + +/// Perform checks before we init a pile in the given WormFileProvider. +pub async fn pre_init_check_wfp(wfp: &impl WormFileProvider) -> eyre::Result<()> { + for important_path in ["yama.toml", FILE_YAMA_CONFIG] { + let important_path = WormPath::new(important_path).unwrap(); + if wfp.is_regular_file(&important_path).await? { + bail!("{important_path:?} already exists: can't create pile."); + } + } + Ok(()) +} + +/// Initialise a pile. +/// Should be run after `pre_init_check_wfp`. +pub async fn init_pile( + wfp: &impl WormFileProvider, + pile_config: PackedPileConfig, + master_keyring_copy: Option, +) -> eyre::Result<()> { + let mut writer = wfp.write().await?; + writer.write_all(&pile_config.into_byte_vec()).await?; + writer.flush().await?; + writer + .finalise(WormPath::new(FILE_YAMA_CONFIG).unwrap(), false) + .await?; + + if let Some(master_keyring_copy) = master_keyring_copy { + let mut writer = wfp.write().await?; + writer + .write_all(&master_keyring_copy.into_byte_vec()) + .await?; + writer.flush().await?; + writer + .finalise(WormPath::new(FILE_MASTER_KEYRING).unwrap(), false) + .await?; + } + + Ok(()) +} + +pub fn generate_master_keyring() -> Keyring { + let (r_config, w_config) = generate_r_w_keys(); + let (r_bloblog_footer, w_bloblog_footer) = generate_r_w_keys(); + let (r_bloblog_contents, w_bloblog_contents) = generate_r_w_keys(); + let (r_locks, w_locks) = generate_r_w_keys(); + let (r_pointer, w_pointer) = generate_r_w_keys(); + Keyring { + r_config: Some(r_config), + w_config: Some(w_config), + r_bloblog_footer: Some(r_bloblog_footer), + w_bloblog_footer: Some(w_bloblog_footer), + r_bloblog_contents: Some(r_bloblog_contents), + w_bloblog_contents: Some(w_bloblog_contents), + r_locks: Some(r_locks), + w_locks: Some(w_locks), + r_pointer: Some(r_pointer), + w_pointer: Some(w_pointer), + } +} + +// todo move this +pub fn pack_keyring(unpacked: Keyring, password: Option<&str>) -> eyre::Result { + let packed = if let Some(password) = password { + let deriver = KeyDerivationParameters::new_recommended(); + let key = deriver + .derive(password) + .context("Failed to derive key from password")?; + let symkey = key.into_symkey(); + + let lockbox = SymBox::new(CborSerde::serialise(&unpacked).unwrap(), &symkey) + .context("Failed to encrypt keyring")?; + UnlockedOrLockedKeyring::Locked { deriver, lockbox } + } else { + UnlockedOrLockedKeyring::Unlocked(unpacked) + }; + Ok(PackedKeyring::serialise(&packed).unwrap()) +} diff --git a/yama/src/lib.rs b/yama/src/lib.rs index 79c4f2f..d391b1c 100644 --- a/yama/src/lib.rs +++ b/yama/src/lib.rs @@ -1,10 +1,21 @@ -pub mod chunking; -pub mod commands; -pub mod debug; -pub mod definitions; -pub mod operations; -pub mod pile; -pub mod progress; -pub mod remote; -pub mod tree; -pub mod utils; +pub mod init; +pub mod open; + +pub mod extract; +pub mod scan; +pub mod storing; +pub mod vacuum; + +pub mod pile_connector; +pub mod pile_with_cache; + +pub mod retriever; + +pub const PROGRESS_BAR_STYLE: &'static str = "[{elapsed_precise}]/[{eta}] {wide_bar:.cyan/blue} {pos:>7}/{len:7} {msg}"; + +pub fn get_hostname() -> String { + hostname::get() + .expect("No hostname") + .into_string() + .expect("Hostname string must be sensible.") +} diff --git a/yama/src/open.rs b/yama/src/open.rs new file mode 100644 index 0000000..3ccc6aa --- /dev/null +++ b/yama/src/open.rs @@ -0,0 +1,167 @@ +use crate::pile_connector::PileConnectionScheme; +use crate::pile_with_cache::PileWithCache; +use eyre::{bail, Context, ContextCompat}; +use std::borrow::Cow; +use std::collections::BTreeSet; +use std::hash::{Hash, Hasher}; +use std::path::Path; +use std::sync::Arc; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tracing::debug; +use twox_hash::XxHash64; +use yama_midlevel_crypto::byte_layer::ByteLayer; +use yama_pile::definitions::{IndexId, PackedKeyring, UnlockedOrLockedKeyring}; +use yama_pile::keyring::Keyring; +use yama_pile::locks::LockKind; +use yama_pile::{Pile, FILE_YAMA_CONFIG, FILE_YAMA_CONNECTOR}; +use yama_wormfile::boxed::BoxedWormFileProvider; + +pub const KEYRING_LOOKUP_SEQ: [&'static str; 2] = ["access.yamakeyring", "master.yamakeyring"]; + +pub async fn pre_open_keyring(connector_in_dir: &Path) -> eyre::Result { + for lookup in KEYRING_LOOKUP_SEQ { + let keyring_path = connector_in_dir.join(lookup); + if keyring_path.exists() { + let packed_keyring_bytes = tokio::fs::read(&keyring_path) + .await + .with_context(|| format!("failed to read keyring file at {:?}", keyring_path))?; + let packed_keyring = PackedKeyring::from_byte_vec(packed_keyring_bytes) + .deserialise() + .with_context(|| { + format!("failed to deserialise keyring file at {:?}", keyring_path) + })?; + return Ok(packed_keyring); + } + } + + bail!( + "No keyring found in {:?}. Expected to see one at one of: {:?}", + connector_in_dir, + KEYRING_LOOKUP_SEQ + ); +} + +pub async fn open_keyring_interactive(input: UnlockedOrLockedKeyring) -> eyre::Result { + match input { + UnlockedOrLockedKeyring::Locked { deriver, lockbox } => { + println!("enter keyring password:"); + let stdin = tokio::io::stdin(); + let mut stdin_br = BufReader::new(stdin); + let mut line = String::new(); + stdin_br.read_line(&mut line).await?; + + let derived = deriver + .derive(line.trim()) + .context("failed to derive key from password")?; + let keyring = lockbox + .unlock(&derived.into_symkey()) + .context("failed to decrypt keyring")? + .deserialise() + .context("failed to deserialise keyring")?; + + Ok(keyring) + } + UnlockedOrLockedKeyring::Unlocked(keyring) => Ok(keyring), + } +} + +pub async fn open_pile( + connector_in_dir: &Path, + keyring: Keyring, + lock_kind: LockKind, + lock_holder: String, +) -> eyre::Result> { + let connection_scheme = if connector_in_dir.join(FILE_YAMA_CONFIG).exists() { + PileConnectionScheme::Local { + directory: connector_in_dir + .canonicalize() + .context("can't canonicalise local pile path")? + .to_owned(), + } + } else if connector_in_dir.join(FILE_YAMA_CONNECTOR).exists() { + let connector_toml = tokio::fs::read_to_string(&connector_in_dir.join(FILE_YAMA_CONNECTOR)) + .await + .context("failed to read connector")?; + let connector: PileConnectionScheme = + toml::from_str(&connector_toml).context("failed to deserialise connector")?; + connector + } else { + bail!("Neither yama.cfg nor yama.toml exists; doesn't look like a Yama pile or pile connector."); + }; + + let wormfileprovider = Arc::new(connection_scheme.connect_to_wormfileprovider().await?); + let pile = Pile::open_manual(wormfileprovider, lock_kind, lock_holder, keyring).await?; + + let cache_dir = appdirs::user_cache_dir(Some("yama"), None).expect("can't obtain cache dir!"); + + let mut hasher = XxHash64::default(); + connection_scheme.hash(&mut hasher); + let u64_hash = hasher.finish(); + + let base_name = connector_in_dir + .file_name() + .map(|f| f.to_string_lossy()) + .unwrap_or(Cow::Borrowed("_")); + let cache_key = format!("{}-{:016x}.sqlite3", base_name, u64_hash); + + tokio::fs::create_dir_all(&cache_dir).await?; + let cache_file = cache_dir.join(&cache_key); + let localcache = yama_localcache::Store::new(&cache_file) + .await + .with_context(|| format!("failed to open local cache"))?; + + Ok(PileWithCache { pile, localcache }) +} + +pub async fn update_cache(pwc: &PileWithCache) -> eyre::Result<()> { + debug!("updating cache"); + let available_indices = pwc + .pile + .list_indices() + .await + .context("can't list available indices")?; + let present_indices = pwc + .localcache + .read() + .await? + .list_indices() + .await + .context("can't list cached indices")?; + + let missing_indices: BTreeSet = available_indices + .difference(&present_indices) + .cloned() + .collect(); + let deleted_indices: BTreeSet = present_indices + .difference(&available_indices) + .cloned() + .collect(); + + let mut downloaded_indices = Vec::new(); + + debug!( + "{} new indices to cache, {} deleted indices to back out", + missing_indices.len(), + deleted_indices.len() + ); + for missing_index in missing_indices { + debug!("downloading index {missing_index:?}"); + downloaded_indices.push((missing_index, pwc.pile.read_index(missing_index).await?)); + } + + let mut txn = pwc.localcache.write().await?; + + for deleted_index in deleted_indices { + debug!("backing out index {deleted_index:?}"); + txn.delete_index(deleted_index).await?; + } + + for (index_id, index) in downloaded_indices { + debug!("applying index {index_id:?}"); + txn.apply_index(index_id, Arc::new(index)).await?; + } + + debug!("finished updating cache"); + + Ok(()) +} diff --git a/yama/src/pile/encryption.rs b/yama/src/pile/encryption.rs deleted file mode 100644 index 53584cf..0000000 --- a/yama/src/pile/encryption.rs +++ /dev/null @@ -1,138 +0,0 @@ -/* -This file is part of Yama. - -Yama is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Yama is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Yama. If not, see . -*/ - -use anyhow::anyhow; -use log::warn; -use sodiumoxide::crypto::secretbox; -use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES}; - -use crate::definitions::ChunkId; -use crate::pile::{ - ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings, -}; -use crossbeam_channel::Sender; - -/// A RawPile that provides encryption of chunk contents. -/// Please note that keys are not currently encrypted, so this scheme is not CPA-secure. -/// It seems easily possible to test the pile for inclusion of a known file (by first chunking it and -/// looking for matching chunk IDs). -/// Use of compression a custom Zstd dictionary may make that harder but in general it seems dubious -/// to rely on that. -/// This feature will be revisited soon... -/// Notably, keys should be passed through a secure permutation first. -#[derive(Debug)] -pub struct RawPileEncryptor { - underlying: R, - secret_key: Key, -} - -impl RawPileEncryptor { - pub fn new(underlying: R, key: Key) -> Self { - warn!( - "WARNING! Encrypted RawPiles are not CPA secure. Do not rely on them for security yet!" - ); - RawPileEncryptor { - underlying, - secret_key: key, - } - } - - fn decrypt(&self, kind: Keyspace, key: &[u8], data: &[u8]) -> anyhow::Result> { - Ok(if kind == Keyspace::Chunk { - let mut nonce = [0u8; NONCEBYTES]; - nonce[0..key.len()].copy_from_slice(key); - secretbox::open(data, &Nonce(nonce), &self.secret_key) - .or(Err(anyhow!("Failed to decrypt")))? - } else { - let mut nonce = [0u8; NONCEBYTES]; - nonce.copy_from_slice(&data[0..NONCEBYTES]); - secretbox::open(&data[NONCEBYTES..], &Nonce(nonce), &self.secret_key) - .or(Err(anyhow!("Failed to decrypt")))? - }) - } - - fn encrypt(&self, kind: Keyspace, key: &[u8], data: &[u8]) -> Vec { - if kind == Keyspace::Chunk { - let mut nonce = [0u8; NONCEBYTES]; - nonce[0..key.len()].copy_from_slice(key); - secretbox::seal(data, &Nonce(nonce), &self.secret_key) - } else { - let nonce = secretbox::gen_nonce(); - let mut out = Vec::new(); - out.extend_from_slice(&nonce.0); - out.extend_from_slice(&secretbox::seal(data, &nonce, &self.secret_key)); - out - } - } -} - -impl RawPile for RawPileEncryptor { - fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result { - self.underlying.exists(kind, key) - } - - fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result>> { - if let Some(data) = self.underlying.read(kind, key)? { - Ok(Some(self.decrypt(kind, key, &data)?)) - } else { - Ok(None) - } - } - fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> { - let encrypted = self.encrypt(kind, key, value); - self.underlying.write(kind, key, &encrypted) - } - - fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> { - self.underlying.delete(kind, key) - } - - fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> { - self.underlying.delete_many(kind, keys) - } - - fn list_keys( - &self, - kind: Keyspace, - ) -> anyhow::Result>>>> { - self.underlying.list_keys(kind) - } - fn flush(&self) -> anyhow::Result<()> { - self.underlying.flush() - } - fn check_lowlevel(&self) -> anyhow::Result { - self.underlying.check_lowlevel() - } - - fn build_storage_pipeline( - &self, - _settings: StoragePipelineSettings, - _controller_send: Sender, - ) -> anyhow::Result)>> { - todo!() - } - - fn describe_pipeline(&self) -> anyhow::Result> { - let mut underlying = self.underlying.describe_pipeline()?; - underlying.push(PipelineDescription::Encryption); - Ok(underlying) - } - - fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result { - self.underlying.chunk_id_transfer_ordering_hint(chunk_id) - } -} diff --git a/yama/src/pile_connector.rs b/yama/src/pile_connector.rs new file mode 100644 index 0000000..a229bfd --- /dev/null +++ b/yama/src/pile_connector.rs @@ -0,0 +1,76 @@ +use eyre::{bail, Context}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use yama_wormfile::boxed::BoxedWormFileProvider; +use yama_wormfile_fs::LocalWormFilesystem; +use yama_wormfile_sftp::SftpWormFilesystem; + +#[derive(Clone, Serialize, Deserialize, Hash)] +#[serde(tag = "scheme")] +pub enum PileConnectionScheme { + #[serde(rename = "local")] + Local { directory: PathBuf }, + #[serde(rename = "sftp")] + Sftp { + user_at_host: String, + // TODO Should probably not serialise the password + password: String, + directory: String, + }, + #[serde(rename = "s3")] + S3 {}, +} + +impl PileConnectionScheme { + pub async fn connect_to_wormfileprovider(&self) -> eyre::Result { + match self { + PileConnectionScheme::Local { directory } => { + if directory.exists() { + if !directory.is_dir() { + bail!("Can't connect to local pile {directory:?}: not a directory."); + } + } else { + tokio::fs::create_dir(directory) + .await + .context("Can't connect to local pile; can't create directory.")?; + } + Ok(BoxedWormFileProvider::new(LocalWormFilesystem::new( + directory, + )?)) + } + PileConnectionScheme::Sftp { + user_at_host, + password, + directory, + } => { + if !password.is_empty() { + bail!("SFTP passwords not supported at the moment."); + } + Ok(BoxedWormFileProvider::new( + SftpWormFilesystem::new(user_at_host, directory) + .await + .context("Failed SFTP connection")?, + )) + } + PileConnectionScheme::S3 { .. } => { + //S3WormFilesystem::new() + todo!() + } + } + } +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct PileConnectionDetails { + #[serde(flatten)] + pub scheme: PileConnectionScheme, + + pub keyring: PathBuf, +} + +impl PileConnectionDetails { + pub async fn connect(self) -> eyre::Result<()> { + // TODO + Ok(()) + } +} diff --git a/yama/src/pile_with_cache.rs b/yama/src/pile_with_cache.rs new file mode 100644 index 0000000..2123b36 --- /dev/null +++ b/yama/src/pile_with_cache.rs @@ -0,0 +1,56 @@ +use yama_localcache::Store; +use yama_pile::pointers::Pointer; +use yama_pile::Pile; +use yama_wormfile::WormFileProvider; + +use crate::scan::integrate_uid_or_gid_map; +use async_recursion::async_recursion; +use eyre::{Context, ContextCompat}; +use yama_pile::tree::integrate_node_in_place; + +pub struct PileWithCache { + pub pile: Pile, + pub localcache: Store, +} + +impl PileWithCache { + pub async fn fully_integrate_pointer_in_place( + &self, + pointer: &mut Pointer, + ) -> eyre::Result<()> { + if let Some(parent_pointer_name) = pointer.parent.as_ref() { + let parent_pointer = self + .read_pointer_fully_integrated(parent_pointer_name) + .await + .with_context(|| { + format!("failed to read pointer {parent_pointer_name} whilst integrating") + })? + .with_context(|| { + format!("whilst integrating, expected pointer {parent_pointer_name} to exist") + })?; + + integrate_node_in_place(&mut pointer.root.node, &parent_pointer.root.node); + integrate_uid_or_gid_map(&mut pointer.uids, &parent_pointer.uids); + integrate_uid_or_gid_map(&mut pointer.gids, &parent_pointer.gids); + pointer.parent = None; + } + Ok(()) + } + + #[async_recursion] + pub async fn read_pointer_fully_integrated(&self, name: &str) -> eyre::Result> { + match self.pile.read_pointer(name).await? { + Some(mut pointer) => { + self.fully_integrate_pointer_in_place(&mut pointer).await?; + Ok(Some(pointer)) + } + None => Ok(None), + } + } + + /// Gracefully close this pile + local cache. + pub async fn close(self) -> eyre::Result<()> { + self.pile.close().await?; + Ok(()) + } +} diff --git a/yama/src/retriever.rs b/yama/src/retriever.rs new file mode 100644 index 0000000..2f7cfa2 --- /dev/null +++ b/yama/src/retriever.rs @@ -0,0 +1,396 @@ +// TODO The retriever should possibly live somewhere else + +use crate::pile_with_cache::PileWithCache; +use eyre::{bail, ensure, eyre, ContextCompat}; +use flume::{Receiver, Sender}; +use std::collections::{BTreeMap, BTreeSet}; +use std::pin::Pin; +use std::sync::Arc; +use tracing::error; +use yama_pile::bloblogs::BloblogReader; +use yama_pile::definitions::BloblogId; +use yama_wormfile::boxed::BoxedWormFileProvider; +use yama_wormfile::WormFileReader; + +pub mod decompressor; + +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +#[repr(transparent)] +pub struct JobId(pub u32); + +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +#[repr(transparent)] +pub struct FileId(pub u32); + +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +pub struct JobChunkReq { + pub file: FileId, + pub offset: u64, + pub length: u64, +} + +#[derive(Clone, Debug)] +pub enum RetrieverResp { + Blob { + job: JobId, + subjob: u32, + blob: Vec, + }, + JobComplete(JobId), +} + +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +struct FileRegionMarker { + pub file: FileId, + pub offset: u64, + pub length: u64, + pub job: JobId, + pub subjob: u32, +} + +struct OpenFileState { + pub req_tx: Sender, + pub offset: u64, +} + +#[derive(Debug)] +struct OpenFileReq { + pub offset: u64, + pub length: u64, + pub job: JobId, + pub subjob: u32, +} + +struct ActiveJobState { + pub subjobs: Vec, + pub next_subjob: u32, + pub inflight: u32, +} + +pub struct Retriever { + job_tx: Sender<(JobId, Vec)>, +} + +struct RetrieverInternals { + pwc: Arc>, + jobs_queue: BTreeMap>, + file_regions: BTreeSet, + files: BTreeMap, + open_files: BTreeMap, + results_tx: Sender, + active_jobs: BTreeMap, + ack_rx: Receiver, + + self_ack_tx: Sender, + + rec_active_jobs: u16, +} + +pub fn create_fixed_retriever( + pwc: Arc>, + jobs: BTreeMap>, + files: BTreeMap, + rec_active_jobs: u16, +) -> eyre::Result> { + let (results_tx, results_rx) = flume::bounded(4); + let (self_ack_tx, ack_rx) = flume::bounded(4); + let mut rint = RetrieverInternals { + pwc, + jobs_queue: Default::default(), + file_regions: Default::default(), + files, + open_files: Default::default(), + results_tx, + active_jobs: Default::default(), + ack_rx, + self_ack_tx, + rec_active_jobs, + }; + for (job_id, job) in jobs { + rint.set_up_job(job_id, job); + } + + tokio::spawn(async move { + if let Err(e) = rint.retrieval_task().await { + error!("retriever failed: {e:?}"); + } + }); + + Ok(results_rx) +} + +impl RetrieverInternals { + fn set_up_job(&mut self, job_id: JobId, job: Vec) { + for (subjob, chunk) in job.iter().enumerate() { + self.file_regions.insert(FileRegionMarker { + file: chunk.file, + offset: chunk.offset, + length: chunk.length, + job: job_id, + subjob: subjob as u32, + }); + } + self.jobs_queue.insert(job_id, job); + // eprintln!("new job {job_id:?}"); + } + + async fn file_request( + open_file: &mut OpenFileState, + job: JobId, + subjob: u32, + offset: u64, + length: u64, + ) -> eyre::Result<()> { + open_file + .req_tx + .send_async(OpenFileReq { + offset, + length, + job, + subjob, + }) + .await + .map_err(|_| eyre!("open file shut down :/"))?; + open_file.offset = offset + length; + Ok(()) + } + + async fn open_file(&mut self, file_id: FileId) -> eyre::Result<()> { + assert!(!self.open_files.contains_key(&file_id)); + + let &bloblog_id = self.files.get(&file_id).context("no file by that ID")?; + let bloblog_reader = self.pwc.pile.read_bloblog(bloblog_id).await?; + + let completion_tx = self.results_tx.clone(); + let ack_tx = self.self_ack_tx.clone(); + + let (subjob_tx, subjob_rx) = flume::unbounded(); + + tokio::spawn(async move { + if let Err(e) = + Self::reader_task(bloblog_reader, subjob_rx, ack_tx, completion_tx).await + { + error!("error in reader for {bloblog_id:?}: {e:?}"); + } + }); + + self.open_files.insert( + file_id, + OpenFileState { + req_tx: subjob_tx, + offset: 0, + }, + ); + + Ok(()) + } + + async fn reader_task( + mut bloblog_reader: BloblogReader>>, + subjob_rx: Receiver, + ack_tx: Sender, + completion_tx: Sender, + ) -> eyre::Result<()> { + while let Ok(next_job) = subjob_rx.recv_async().await { + let mut blob = Vec::with_capacity(next_job.length as usize); + bloblog_reader + .read_to_buf(&mut blob, next_job.offset, next_job.length) + .await?; + + completion_tx + .send_async(RetrieverResp::Blob { + job: next_job.job, + subjob: next_job.subjob, + blob, + }) + .await + .expect("completions shut"); + // eprintln!("completion of{next_job:?}"); + ack_tx.send_async(next_job.job).await?; + } + + Ok(()) + } + + async fn retrieval_task(&mut self) -> eyre::Result<()> { + loop { + // 0. Try to progress open jobs if they are staring right at the bytes they need... + let mut to_remove = Vec::new(); + for (active_job_id, active_job) in &mut self.active_jobs { + if active_job.inflight > 0 { + // skip if it's busy, we don't want to send blobs out of order... + continue; + } + if active_job.next_subjob as usize >= active_job.subjobs.len() { + // this job is to be finished! + to_remove.push(*active_job_id); + continue; + } + 'single_job_staring: loop { + let desired_blob = &active_job.subjobs[active_job.next_subjob as usize]; + if let Some(open_file) = self.open_files.get_mut(&desired_blob.file) { + if open_file.offset == desired_blob.offset { + Self::file_request( + open_file, + *active_job_id, + active_job.next_subjob, + desired_blob.offset, + desired_blob.length, + ) + .await?; + ensure!( + self.file_regions.remove(&FileRegionMarker { + file: desired_blob.file, + offset: desired_blob.offset, + length: desired_blob.length, + job: *active_job_id, + subjob: active_job.next_subjob, + }), + "no FRM to remove (0)" + ); + active_job.next_subjob += 1; + active_job.inflight += 1; + + if active_job.next_subjob as usize >= active_job.subjobs.len() { + // this job is to be finished! + break 'single_job_staring; + } + } + } else { + break 'single_job_staring; + } + } + } + for remove in to_remove { + self.active_jobs.remove(&remove); + // eprintln!("job complete {remove:?}"); + self.results_tx + .send_async(RetrieverResp::JobComplete(remove)) + .await + .map_err(|_| eyre!("results_tx shutdown"))?; + } + + // 1. Try to make the most of open files by opening new jobs in convenient locations. + // Basically: if we have slots for new active jobs, then look to see if we have any + // jobs that begin at the offset in question... + if self.active_jobs.len() < self.rec_active_jobs as usize { + let mut allowed = self.rec_active_jobs as usize - self.active_jobs.len(); + let mut progress = false; + for (open_file_id, open_file_state) in &self.open_files { + for region in self.file_regions.range( + FileRegionMarker { + file: *open_file_id, + offset: open_file_state.offset, + length: 0, + job: JobId(0), + subjob: 0, + }..FileRegionMarker { + file: *open_file_id, + offset: open_file_state.offset + 1, + length: 0, + job: JobId(0), + subjob: 0, + }, + ) { + if region.subjob != 0 { + // only accept this region if it's the start of a job + continue; + } + if let Some(subjobs) = self.jobs_queue.remove(®ion.job) { + self.active_jobs.insert( + region.job, + ActiveJobState { + subjobs, + next_subjob: 0, + inflight: 0, + }, + ); + allowed -= 1; + progress = true; + break; + } + } + if allowed == 0 { + break; + } + } + if progress { + continue; + } + } + + // 2. Try to progress active jobs, even if we have to open new files or seek. + let mut files_to_open = BTreeSet::new(); + for (active_job_id, active_job) in &mut self.active_jobs { + if active_job.inflight > 0 { + // skip if it's busy, we don't want to send blobs out of order... + continue; + } + + let desired_blob = &active_job.subjobs[active_job.next_subjob as usize]; + if let Some(open_file) = self.open_files.get_mut(&desired_blob.file) { + Self::file_request( + open_file, + *active_job_id, + active_job.next_subjob, + desired_blob.offset, + desired_blob.length, + ) + .await?; + ensure!( + self.file_regions.remove(&FileRegionMarker { + file: desired_blob.file, + offset: desired_blob.offset, + length: desired_blob.length, + job: *active_job_id, + subjob: active_job.next_subjob, + }), + "no FRM to remove (0)" + ); + active_job.next_subjob += 1; + active_job.inflight += 1; + } else { + // can't open immediately here due to mut borrow. + files_to_open.insert(desired_blob.file); + } + } + if !files_to_open.is_empty() { + for file in files_to_open { + self.open_file(file).await?; + } + continue; + } + + // 3. Start new jobs + if self.active_jobs.len() < self.rec_active_jobs as usize { + // spawn a new job... + if let Some(activate_job_id) = self.jobs_queue.keys().next().cloned() { + let new_job = self.jobs_queue.remove(&activate_job_id).unwrap(); + self.active_jobs.insert( + activate_job_id, + ActiveJobState { + subjobs: new_job, + next_subjob: 0, + inflight: 0, + }, + ); + continue; + } + } + + // 4. Block for acks, unless there are no jobs in which case we should just finish! + if self.active_jobs.is_empty() { + break; + } + if let Ok(ack) = self.ack_rx.recv_async().await { + if let Some(job) = self.active_jobs.get_mut(&ack) { + ensure!(job.inflight > 0, "recv'd ack for job that has 0 inflight"); + job.inflight -= 1; + } else { + bail!("recv'd ack for bad job {ack:?}"); + } + } + } + Ok(()) + } +} diff --git a/yama/src/retriever/decompressor.rs b/yama/src/retriever/decompressor.rs new file mode 100644 index 0000000..5bdbc5b --- /dev/null +++ b/yama/src/retriever/decompressor.rs @@ -0,0 +1,211 @@ +use crate::retriever::{JobId, RetrieverResp}; +use eyre::{bail, ensure, eyre, Context, ContextCompat}; +use flume::{Receiver, Sender}; +use std::collections::BTreeMap; +use std::sync::Arc; +use tracing::error; +use zstd::bulk::Decompressor; + +pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024; + +pub struct PipelineDecompressor { + rx: Receiver, + tx: Sender, + job_pool_tx: Sender<(JobId, u32, Vec)>, + complete_rx: Receiver<(JobId, u32, Vec)>, + + processing: BTreeMap, +} + +struct JobState { + pub next_submit_subjob: u32, + pub next_enqueue_subjob: u32, + pub queued: BTreeMap>, + pub complete: bool, +} + +impl PipelineDecompressor { + pub fn start( + decom_dict: Option>>, + num_decom: u8, + rx: Receiver, + ) -> eyre::Result> { + let (out_tx, out_rx) = flume::bounded(4); + + let (job_pool_tx, job_pool_rx) = flume::bounded(0); + let (complete_tx, complete_rx) = flume::unbounded(); + + for num in 0..num_decom { + let decom_dict = decom_dict.clone(); + let job_pool_rx = job_pool_rx.clone(); + let complete_tx = complete_tx.clone(); + std::thread::Builder::new() + .name(format!("decomp {num}")) + .spawn(move || { + if let Err(err) = + Self::decompressor_worker(decom_dict, job_pool_rx, complete_tx) + { + error!("error in decompressor worker: {err:?}"); + } + })?; + } + + let mut pd = PipelineDecompressor { + rx, + tx: out_tx, + job_pool_tx, + complete_rx, + processing: Default::default(), + }; + + tokio::spawn(async move { + if let Err(e) = pd.decompressor_manager().await { + eprintln!("pipeline decompressor error: {e:?}"); + } + }); + + Ok(out_rx) + } + + fn decompressor_worker( + decom_dict: Option>>, + job_pool_rx: Receiver<(JobId, u32, Vec)>, + complete_tx: Sender<(JobId, u32, Vec)>, + ) -> eyre::Result<()> { + let mut decompressor = match decom_dict { + Some(dict) => Decompressor::with_dictionary(&dict)?, + None => Decompressor::new()?, + }; + while let Ok((job_id, subjob, compressed_bytes)) = job_pool_rx.recv() { + let decompressed_bytes = decompressor + .decompress(&compressed_bytes, DECOMPRESS_CAPACITY) + .context("failed to decompress")?; + complete_tx + .send((job_id, subjob, decompressed_bytes)) + .map_err(|_| eyre!("complete_tx shutdown"))?; + } + Ok(()) + } + + async fn decompressor_manager(&mut self) -> eyre::Result<()> { + let mut incoming_open = true; + loop { + // Always process completed jobs as top priority + while let Ok(completion) = self.complete_rx.try_recv() { + self.handle_completion(completion).await?; + } + + // Then it doesn't matter so much what we process after that + tokio::select! { + Ok(completion) = self.complete_rx.recv_async(), if !self.processing.is_empty() => { + self.handle_completion(completion).await?; + }, + incoming_res = self.rx.recv_async(), if incoming_open => { + if let Ok(incoming) = incoming_res { + self.handle_incoming(incoming).await?; + } else { + incoming_open = false; + } + } + else => { + if !self.processing.is_empty() { + bail!("decompressor still procesing but shutting down?"); + } + // eprintln!("D shutdown"); + break Ok(()); + } + }; + } + } + + async fn handle_completion( + &mut self, + (job_id, subjob, decompressed): (JobId, u32, Vec), + ) -> eyre::Result<()> { + let state = self + .processing + .get_mut(&job_id) + .context("bad job when recv complete decomp")?; + ensure!( + state.queued.insert(subjob, decompressed).is_none(), + "overwrote decompressed block??" + ); + while let Some(send_off) = state.queued.remove(&state.next_submit_subjob) { + // eprintln!("D send off {job_id:?} {subjob}"); + self.tx + .send(RetrieverResp::Blob { + job: job_id, + subjob: state.next_submit_subjob, + blob: send_off, + }) + .map_err(|_| eyre!("tx shutdown"))?; + state.next_submit_subjob += 1; + } + if state.queued.is_empty() + && state.complete + && state.next_submit_subjob == state.next_enqueue_subjob + { + // This job is done now + // eprintln!("D jc {job_id:?}"); + self.tx + .send(RetrieverResp::JobComplete(job_id)) + .map_err(|_| eyre!("tx shutdown"))?; + self.processing.remove(&job_id); + } + Ok(()) + } + + async fn handle_incoming(&mut self, incoming: RetrieverResp) -> eyre::Result<()> { + match incoming { + RetrieverResp::Blob { job, subjob, blob } => { + if subjob == 0 { + ensure!( + self.processing + .insert( + job, + JobState { + next_submit_subjob: 0, + next_enqueue_subjob: 0, + queued: Default::default(), + complete: false, + } + ) + .is_none(), + "job was overwritten" + ); + } + + let state = self + .processing + .get_mut(&job) + .context("bad job/not starting at 0 for job")?; + ensure!( + state.next_enqueue_subjob == subjob, + "out of order Blob commands" + ); + state.next_enqueue_subjob += 1; + self.job_pool_tx + .send_async((job, subjob, blob)) + .await + .map_err(|_| eyre!("job_pool_tx shutdown"))?; + } + RetrieverResp::JobComplete(job) => { + let state = self + .processing + .get_mut(&job) + .context("bad job to complete")?; + state.complete = true; + + let can_remove = state.next_submit_subjob == state.next_enqueue_subjob; + + if can_remove { + self.tx + .send(RetrieverResp::JobComplete(job)) + .map_err(|_| eyre!("tx shutdown"))?; + self.processing.remove(&job); + } + } + } + Ok(()) + } +} diff --git a/yama/src/scan.rs b/yama/src/scan.rs new file mode 100644 index 0000000..2f6631a --- /dev/null +++ b/yama/src/scan.rs @@ -0,0 +1,263 @@ +use eyre::{bail, eyre, Context}; +use ignore::WalkBuilder; +use patricia_tree::PatriciaMap; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::{read_link, Metadata}; +use std::io::ErrorKind; +use std::os::unix::fs::MetadataExt; +use std::path::{Component, Path}; +use tracing::warn; +use yama_pile::definitions::RecursiveChunkRef; +use yama_pile::tree::unpopulated::ScanEntry; +use yama_pile::tree::{mtime_msec, FilesystemOwnership, FilesystemPermissions, TreeNode}; + +/// Given a node, recursively constructs a UID and GID lookup table based on THIS system's +/// users and groups. +/// +/// Returns UIDs then GIDs. +pub fn create_uidgid_lookup_tables( + node: &TreeNode, +) -> eyre::Result<(BTreeMap, BTreeMap)> { + let mut uids = BTreeMap::::new(); + let mut gids = BTreeMap::::new(); + let mut used_uids = BTreeSet::new(); + let mut used_gids = BTreeSet::new(); + + find_used_uidsgids(&node, &mut used_uids, &mut used_gids); + + for uid in used_uids { + if let Some(user) = users::get_user_by_uid(uid.into()) { + uids.insert( + uid, + user.name() + .to_str() + .ok_or(eyre!("uid leads to non-String name"))? + .to_owned(), + ); + } + } + + for gid in used_gids { + if let Some(group) = users::get_group_by_gid(gid.into()) { + gids.insert( + gid, + group + .name() + .to_str() + .ok_or(eyre!("gid leads to non-String name"))? + .to_owned(), + ); + } + } + + Ok((uids, gids)) +} + +fn find_used_uidsgids(node: &TreeNode, uids: &mut BTreeSet, gids: &mut BTreeSet) { + match &node { + TreeNode::NormalFile { ownership, .. } + | TreeNode::Directory { ownership, .. } + | TreeNode::SymbolicLink { ownership, .. } => { + uids.insert(ownership.uid); + gids.insert(ownership.gid); + } + TreeNode::Deleted => { /* nop */ } + }; + + if let TreeNode::Directory { children, .. } = &node { + for (_name, child) in children { + find_used_uidsgids(child, uids, gids); + } + } +} + +/// Calculates the relative path. +/// +/// Returns empty string if the paths are the same, otherwise it's a /-separated string. +/// The returned string is not allowed to contain any . or .. components. +pub fn relative_path(base: &Path, leaf: &Path) -> Option { + assert_eq!(std::path::MAIN_SEPARATOR, '/'); + + let relative = leaf.strip_prefix(base).ok()?; + + if relative + .components() + .any(|c| c == Component::CurDir || c == Component::ParentDir || c == Component::RootDir) + { + return None; + } + + relative.to_str().map(|s| s.to_owned()) +} + +/// Scans a directory tree. +/// +/// Aborts if any errors (permission, bad .yamaignore files, etc) are encountered. +/// In the future, we possibly want to consider allowing +pub fn scan(root: &Path, ignores: &Vec) -> eyre::Result> { + let mut walker = WalkBuilder::new(root); + walker + .standard_filters(false) + .add_custom_ignore_filename(".yamaignore") + .parents(false) + .follow_links(false) + .same_file_system(true); + + for ign in ignores { + walker.add_ignore(ign); + } + let walker = walker.build(); + + let mut entries: PatriciaMap = PatriciaMap::new(); + + for entry in walker { + let entry = entry?; + + if !entry.path().starts_with(root) { + bail!( + "Scanned entry {:?} does not start with search path {:?}", + entry.path(), + root + ); + } + let rel_path = if let Some(rel_path) = relative_path(root, entry.path()) { + rel_path + } else { + continue; + }; + + if !rel_path.is_empty() { + let parent_relpath = rel_path + .rsplit_once('/') + .map(|(parent, _child)| parent) + .unwrap_or(""); + assert!( + entries.contains_key(parent_relpath), + "have not scanned parent for {}", + rel_path + ); + } + + if let Some(single_scan) = scan_one_no_recurse( + entry.path(), + entry + .metadata() + .with_context(|| format!("Failed to read metadata for {:?}", rel_path))?, + ) + .with_context(|| format!("Failed to scan {:?}", rel_path))? + { + entries.insert(rel_path, single_scan); + } + } + + Ok(entries) +} + +fn scan_one_no_recurse(path: &Path, metadata: Metadata) -> eyre::Result> { + let filetype = metadata.file_type(); + + let ownership = FilesystemOwnership { + uid: metadata.uid() as u16, + gid: metadata.gid() as u16, + }; + + let permissions = FilesystemPermissions { + mode: metadata.mode(), + }; + + if filetype.is_file() { + // Leave an unpopulated file node. It's not my responsibility to chunk it right now. + Ok(Some(ScanEntry::NormalFile { + mtime: mtime_msec(&metadata), + ownership, + permissions, + size: metadata.size(), + })) + } else if filetype.is_dir() { + let dir_read = path.read_dir(); + + if let Err(e) = &dir_read { + match e.kind() { + ErrorKind::NotFound => { + warn!("vanished/: {:?}", path); + return Ok(None); + } + ErrorKind::PermissionDenied => { + warn!("permission denied/: {:?}", path); + return Ok(None); + } + _ => { /* nop */ } + } + } + + Ok(Some(ScanEntry::Directory { + ownership, + permissions, + })) + } else if filetype.is_symlink() { + let target = read_link(path)? + .to_str() + .ok_or(eyre!("target path cannot be to_str()d"))? + .to_owned(); + + Ok(Some(ScanEntry::SymbolicLink { ownership, target })) + } else { + Ok(None) + } +} + +/// Given the parent pointer's root TreeNode and a scan entry map of the current pointer, +/// return a chunkings map prepopulated with the reusable entries. +/// Also returns a pruned copy of the scan entry map. +pub fn prepopulate_unmodified( + parent_tree: &TreeNode, + scan_entry_map: &PatriciaMap, +) -> ( + PatriciaMap<(RecursiveChunkRef, u64)>, + PatriciaMap, +) { + let mut reusable_chunkings = PatriciaMap::new(); + let mut pruned_scan_entry_map = scan_entry_map.clone(); + parent_tree + .visit( + &mut |tree_node, path| { + if let TreeNode::NormalFile { + mtime: prev_mtime, + ownership: prev_ownership, + permissions: prev_permissions, + size: prev_size, + content: prev_content, + } = tree_node + { + if let Some(ScanEntry::NormalFile { + mtime, + ownership, + permissions, + size, + }) = scan_entry_map.get(path) + { + if mtime == prev_mtime + && size == prev_size + && ownership == prev_ownership + && prev_permissions == permissions + { + // Nothing seems to have changed about this file, let's just reuse the `content` from last time. + reusable_chunkings.insert(path, (*prev_content, *size)); + pruned_scan_entry_map.remove(path); + } + } + } + + Ok(()) + }, + String::new(), + ) + .expect("no reason to fail"); + (reusable_chunkings, pruned_scan_entry_map) +} + +pub fn integrate_uid_or_gid_map(new: &mut BTreeMap, old: &BTreeMap) { + for (old_uid, old_user) in old { + new.entry(*old_uid).or_insert_with(|| old_user.clone()); + } +} diff --git a/yama/src/storing.rs b/yama/src/storing.rs new file mode 100644 index 0000000..fe454b6 --- /dev/null +++ b/yama/src/storing.rs @@ -0,0 +1,391 @@ +use crate::pile_with_cache::PileWithCache; +use dashmap::DashSet; +use eyre::{bail, Context}; +use fastcdc::v2020::FastCDC; +use flume::{Receiver, RecvError, SendError, Sender}; +use std::cmp::Reverse; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::sync::Arc; +use tokio::fs::File; +use tokio::runtime::Handle; +use tokio::task; +use tokio::task::JoinSet; +use tracing::{debug, error, info_span, Instrument}; +use yama_localcache::StoreConnection; +use yama_midlevel_crypto::chunk_id::{ChunkId, ChunkIdKey}; +use yama_pile::bloblogs::BloblogWriter; +use yama_pile::definitions::{BlobLocator, BloblogId, Index, IndexBloblogEntry, RecursiveChunkRef}; +use yama_wormfile::boxed::BoxedWormFileProvider; +use yama_wormfile::WormFileWriter; +use zstd::bulk::Compressor; + +pub const DESIRED_INDEX_SIZE_ENTRIES: usize = 32768; + +// 256 kiB +pub const FASTCDC_MIN: u32 = 256 * 1024; +// 1 MiB +pub const FASTCDC_AVG: u32 = 1024 * 1024; +// 8 MiB +pub const FASTCDC_MAX: u32 = 8 * 1024 * 1024; + +pub struct StoringState { + /// A connection to the local cache for checking whether + pub cache_conn: StoreConnection, + /// Set of unflushed chunks, not present in any index, which we can assume have been created in this session. + pub new_unflushed_chunks: Arc>, + /// New bloblogs that we have created but not yet written out indices for. + pub new_bloblogs: Vec<(BloblogId, BTreeMap)>, + + pub pwc: Arc>, + + pub chunk_id_key: ChunkIdKey, + + pub compressor: zstd::bulk::Compressor<'static>, +} + +struct StoringIntermediate { + /// New bloblogs that we have created but not yet written out indices for. + pub new_bloblogs: Vec<(BloblogId, BTreeMap)>, +} + +impl From for StoringIntermediate { + fn from(ss: StoringState) -> Self { + StoringIntermediate { + new_bloblogs: ss.new_bloblogs, + } + } +} + +#[derive(Default)] +pub struct StoringBloblogWriters { + /// Bloblog writer for actual file contents (we try to keep file contents sequential in the + /// common case) + pub file_contents: Option>>>, + /// Bloblog writer for chunks of chunks + pub metachunks: Option>>>, +} + +impl StoringBloblogWriters { + async fn finish_bloblogs(&mut self, ss: &mut StoringState) -> eyre::Result<()> { + if let Some(writer_to_finish) = self.file_contents.take() { + let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?; + ss.new_bloblogs.push((bloblog_id, chunkmap)); + } + + if let Some(writer_to_finish) = self.metachunks.take() { + let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?; + ss.new_bloblogs.push((bloblog_id, chunkmap)); + } + Ok(()) + } +} + +impl StoringState { + /// Acquire a bloblog writer handle, reusing the existing one in the slot if suitable. + async fn obtain_bloblog_writer<'a>( + &mut self, + slot: &'a mut Option>>>, + ) -> eyre::Result<&'a mut BloblogWriter>>> { + // if let Some(ref mut writer) = slot { + // if !writer.should_finish() { + // return Ok(writer); + // } + // } + // awkward avoidance of strange borrow issues that I don't fully grok + if slot.as_ref().map(|w| w.should_finish()) == Some(false) { + return Ok(slot.as_mut().unwrap()); + } + + if let Some(writer_to_finish) = slot.take() { + let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?; + self.new_bloblogs.push((bloblog_id, chunkmap)); + } + + *slot = Some(self.pwc.pile.create_bloblog().await?); + Ok(slot.as_mut().unwrap()) + } + + fn store_full_slice_returning_chunks( + &mut self, + store_slice: &[u8], + slot: &mut Option>>>, + ) -> eyre::Result> { + task::block_in_place(|| { + let mut result = Vec::new(); + for chunk in FastCDC::new(store_slice, FASTCDC_MIN, FASTCDC_AVG, FASTCDC_MAX) { + let chunk_bytes = &store_slice[chunk.offset..chunk.offset + chunk.length]; + let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key); + result.push(chunk_id); + let is_new = Handle::current().block_on(async { + Ok::( + self.cache_conn.is_chunk_new(chunk_id).await? + && self.new_unflushed_chunks.insert(chunk_id), + ) + })?; + + if is_new { + let compressed_bytes = self.compressor.compress(&chunk_bytes)?; + + Handle::current().block_on(async { + let writer = self.obtain_bloblog_writer(slot).await?; + writer.write_chunk(chunk_id, &compressed_bytes).await?; + Ok::<(), eyre::Report>(()) + })?; + } + } + + Ok(result) + }) + } + + pub fn store_full_slice( + &mut self, + store_slice: &[u8], + sbw: &mut StoringBloblogWriters, + ) -> eyre::Result { + // First calculate all the chunk IDs needed to be written here. + let mut chunk_ids = + self.store_full_slice_returning_chunks(store_slice, &mut sbw.file_contents)?; + let mut depth = 0; + + // If we have the wrong number of chunks, we should chunk the chunk list... + while chunk_ids.len() != 1 { + let mut metachunks_list_bytes: Vec = Vec::with_capacity(chunk_ids.len() * 32); + for chunk_id in chunk_ids { + metachunks_list_bytes.extend_from_slice(&chunk_id.to_bytes()); + } + + // TODO It might be nice to store these in opposite order, so a read is a true sequential + // scan. + // i.e. (depth=3) (depth=2) (depth=1) (depth=0) ... + chunk_ids = self + .store_full_slice_returning_chunks(&metachunks_list_bytes, &mut sbw.metachunks)?; + depth += 1; + } + + Ok(RecursiveChunkRef { + chunk_id: chunk_ids[0], + depth, + }) + } +} + +async fn store_file( + file_path: &Path, + storing_state: &mut StoringState, + sbw: &mut StoringBloblogWriters, +) -> eyre::Result<(RecursiveChunkRef, u64)> { + let file = File::open(file_path).await?.into_std().await; + let mapped = unsafe { memmap2::Mmap::map(&file) }?; + let size_of_file = mapped.as_ref().len(); + let chunkref = storing_state.store_full_slice(mapped.as_ref(), sbw)?; + Ok((chunkref, size_of_file as u64)) +} + +pub struct StoragePipeline { + result_rx: Receiver<(String, RecursiveChunkRef, u64)>, + join_set: JoinSet>, +} + +async fn storage_pipeline_worker( + job_rx: Receiver<(String, PathBuf)>, + result_tx: Sender<(String, RecursiveChunkRef, u64)>, + mut storing_state: StoringState, +) -> eyre::Result { + let mut bloblog_writers = StoringBloblogWriters::default(); + + debug!("SPW startup"); + + while let Ok((job_id, file_path)) = job_rx.recv_async().await { + let span = info_span!("store_file", file=?file_path); + let span_enter = span.enter(); + // debug!("SPW job {job_id:?}"); + let (rec_chunk_ref, file_length) = + store_file(&file_path, &mut storing_state, &mut bloblog_writers) + .await + .with_context(|| format!("failed to store {file_path:?}"))?; + // debug!("SPW good {job_id:?}"); + if let Err(SendError(to_be_sent)) = result_tx + .send_async((job_id, rec_chunk_ref, file_length)) + .await + { + bail!("Can't return result for {to_be_sent:?} — result_tx shut down."); + } + + drop(span_enter); + drop(span); + } + + debug!("SPW shutdown"); + + bloblog_writers.finish_bloblogs(&mut storing_state).await?; + + Ok(StoringIntermediate::from(storing_state)) +} + +fn get_zstd_level() -> i32 { + // TODO Read from env? + return 12; +} + +impl StoragePipeline { + pub async fn launch_new( + workers: u32, + pwc: Arc>, + ) -> eyre::Result<(StoragePipeline, Sender<(String, PathBuf)>)> { + let (job_tx, job_rx) = flume::bounded(16); + let (result_tx, result_rx) = flume::bounded(4); + + let mut join_set = JoinSet::new(); + for spw_num in 0..workers { + let job_rx = job_rx.clone(); + let result_tx = result_tx.clone(); + let pwc = pwc.clone(); + + let compressor = match pwc.pile.pile_config.zstd_dict.as_ref() { + None => { + Compressor::new(get_zstd_level()).context("can't create dictless compressor")? + } + Some(dict_bytes) => Compressor::with_dictionary(get_zstd_level(), dict_bytes) + .context("can't create dictful compressor")?, + }; + + let chunk_id_key = pwc.pile.pile_config.chunk_id_key; + let storing_state = StoringState { + cache_conn: pwc.localcache.read().await?, + new_unflushed_chunks: Arc::new(Default::default()), + new_bloblogs: vec![], + pwc, + chunk_id_key, + compressor, + }; + // make a logging span for the Storage Pipeline Workers + let spw_span = info_span!("spw", n = spw_num); + join_set.spawn( + async move { + let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await; + if let Err(ref err) = result { + error!("Error in SPW {err:?}"); + } + result + } + .instrument(spw_span), + ); + } + + Ok(( + StoragePipeline { + result_rx, + join_set, + }, + job_tx, + )) + } + + #[inline] + pub async fn next_result(&self) -> Result<(String, RecursiveChunkRef, u64), RecvError> { + self.result_rx.recv_async().await + } + + /// Must be sure that all results have been collected first. + pub async fn finish_into_chunkmaps( + mut self, + ) -> eyre::Result)>> { + if let Ok(msg) = self.result_rx.recv_async().await { + bail!("Haven't processed all results yet! {msg:?}"); + } + + let mut chunkmap = Vec::new(); + + while let Some(join_resres) = self.join_set.join_next().await { + chunkmap.extend(join_resres??.new_bloblogs); + } + + Ok(chunkmap) + } +} + +fn assemble_indices(chunkmap: Vec<(BloblogId, BTreeMap)>) -> Vec { + let mut sorted_map = BTreeMap::new(); + for (idx, chunkmap) in chunkmap.into_iter().enumerate() { + let size_of_chunkmap = chunkmap.1.len() + 1; + sorted_map.insert(Reverse((size_of_chunkmap, idx)), chunkmap); + } + + let mut indices = Vec::new(); + + while let Some(k) = sorted_map.keys().cloned().next() { + let (Reverse((size, _)), (bloblog_id, bloblog_chunks)) = + sorted_map.remove_entry(&k).unwrap(); + let mut new_index_contents = BTreeMap::new(); + new_index_contents.insert( + bloblog_id, + IndexBloblogEntry { + chunks: bloblog_chunks, + forgotten_bytes: 0, + }, + ); + let mut new_index_size_so_far = size; + + while new_index_size_so_far < DESIRED_INDEX_SIZE_ENTRIES && !sorted_map.is_empty() { + if let Some((k, _)) = sorted_map + .range( + Reverse(( + DESIRED_INDEX_SIZE_ENTRIES - new_index_size_so_far, + usize::MAX, + )).., + ) + .next() + { + let k = k.clone(); + let (Reverse((add_size, _)), (bloblog_id, bloblog_chunks)) = + sorted_map.remove_entry(&k).unwrap(); + new_index_size_so_far += add_size; + new_index_contents.insert( + bloblog_id, + IndexBloblogEntry { + chunks: bloblog_chunks, + forgotten_bytes: 0, + }, + ); + } + } + + indices.push(Index { + supersedes: BTreeSet::new(), + bloblogs: new_index_contents, + }); + } + + indices +} + +async fn write_indices( + pwc: &PileWithCache, + indices: Vec, +) -> eyre::Result<()> { + for index in indices { + let index_id = pwc.pile.create_index(&index).await?; + if !pwc + .localcache + .write() + .await? + .apply_index(index_id, Arc::new(index)) + .await? + { + error!("freshly-created index wasn't new. This is suspicious."); + }; + } + + Ok(()) +} + +pub async fn assemble_and_write_indices( + pwc: &PileWithCache, + chunkmap: Vec<(BloblogId, BTreeMap)>, +) -> eyre::Result<()> { + let indices = assemble_indices(chunkmap); + write_indices(pwc, indices).await +} diff --git a/yama/src/vacuum.rs b/yama/src/vacuum.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yama/src/vacuum.rs @@ -0,0 +1 @@ + diff --git a/yama_cli_readme.txt b/yama_cli_readme.txt new file mode 100644 index 0000000..311fa13 --- /dev/null +++ b/yama_cli_readme.txt @@ -0,0 +1,91 @@ + + +`yama init` → init a yama pile right here, right now + creates: + * config + * directory structure + * master keyring (prompts for password) + + + `--zstd-dict | --no-zstd-dict`: choose a Zstd dictionary (or lack thereof) + +OR + +`yama init --sftp` → interactively create SFTP pile +`yama init --s3` → interactively create S3 pile + creates: + * config (remote) + * directory structure (remote) + * master keyring (local + optionally remote too, prompts for password) + * connection information file (local) + + +`yama keyring` + `inspect .yamakeyring` → print contents of keyring, ask for password if needed + + `new|create [--from ] [--with ] [--no-password]` + create a new keyring based on another one. If `--from` not specified, then defaults to the master key in this directory (`master.yamakeyring`). + + if `--no-password` is specified, then the new keyring will be unprotected + + if `--with` is specified, then it's either a list of keynames to include (e.g. `r_bloblog_contents`, etc) + or a list of opinionated roles (ALL, config, backup, restore, ...?) + + e.g. you might give your server a keyring with: + `yama keyring new myserver.yamakeyring --from master.yamakeyring --with backup --no-password` to allow it to create backups but not read from them + + +`yama store [:]` + Stores a file/directory into Yama, with the given pointer. + + If `--stdin` is passed, then the contents to store are actually read from stdin instead and the provided filename is a fake filename for informational purposes only. + Would be suitable for `pg_dump | yama store --stdin mydbname.sql + + If `--force` is passed, this can overwrite a pointer name. + + I expect we will also have `--exclude` and `--exclude-list` options. + I expect we will also have a `--dry-run` option. + +`yama extract [:][/path/to/subtree] (--stdout | )` + Extracts a file/directory from Yama, from the given pointer. + + If `--stdout` is passed, writes to stdout, in which case the input must be just one file. + + I expect we will also have `--exclude` and `--exclude-list` options. + I expect we will also have a `--dry-run` option. + +`yama mount [:][/path/to/subtree] ` + Mount a pointer as a read-only FUSE filesystem. + +`yama check` + Checks consistency of the pile. One of the levels must be specified: + `--pointers`|`-1`: checks that all pointers are valid + `--shallow`|`-2`: checks that all pointers' tree nodes point to chunks that exist. + + `--intensive`|`-9`: checks that all chunks have the correct hash, that all indices correctly represent the bloblogs, that all pointers point to valid files in the end, ... as much as possible + + +`yama lsp [[:]]` + (glob defaults to `.:*`) + Lists pointers in the pile. + + If `--deleted` is specified, includes deleted pointers that have yet to be vacuumed. + + +`yama rmp [:]` + Deletes pointers, or marks them as deleted. + + If `--glob` specified, then `` is a glob. + + If `--now` is specified, an exclusive lock is required to actually delete the pointer. + If `--now` is *not* specified, then the pointer is merely marked as deleted and this only requires a shared lock. + +`yama vacuum` + Vacuums the pile, reclaiming disk space. Holds an exclusive lock over the pile. + Does things like: + - (--pointers) clean up deleted pointers that need to be actually deleted + - (--sweep) scans all pointers to discover all the chunks that are present in bloblogs but not used, then removes them from the indices (possibly slow, but necessary to actually make bloblog repacking possible) + - (--indices) writes new indices to replace existing indices, if the existing indices are not space-efficient + - (--bloblogs) repacks bloblogs that aren't space-efficient, removing unindexed blobs in the process + + `--all` for everything. diff --git a/yama_localcache/Cargo.toml b/yama_localcache/Cargo.toml new file mode 100644 index 0000000..3ef059f --- /dev/null +++ b/yama_localcache/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "yama_localcache" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +sqlx = { version = "0.6.3", features = ["sqlite", "runtime-tokio-rustls"] } +tracing = "0.1.37" +eyre = "0.6.8" +tokio = "1.27.0" +yama_pile = { path = "../yama_pile" } +yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } +itertools = "0.10.5" \ No newline at end of file diff --git a/yama_localcache/dev_db.sh b/yama_localcache/dev_db.sh new file mode 100755 index 0000000..51f18e5 --- /dev/null +++ b/yama_localcache/dev_db.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -eu +dbpath="$(dirname "$0")/testdb.sqlite" +#echo $dbpath +sqlx db create --database-url sqlite:"$dbpath" +sqlx migrate run --database-url sqlite:"$dbpath" + diff --git a/yama_localcache/migrations/20230413133342_local_index_cache.sql b/yama_localcache/migrations/20230413133342_local_index_cache.sql new file mode 100644 index 0000000..1fb4975 --- /dev/null +++ b/yama_localcache/migrations/20230413133342_local_index_cache.sql @@ -0,0 +1,30 @@ +-- Create a local cache of indices. + +CREATE TABLE indices ( + index_short_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + index_sha256 TEXT NOT NULL +); +CREATE UNIQUE INDEX indices_index_sha256 ON indices(index_sha256); + +CREATE TABLE blobs ( + chunk_id TEXT NOT NULL, + bloblog_short_id INTEGER NOT NULL REFERENCES bloblogs(bloblog_short_id), + index_short_id INTEGER NOT NULL REFERENCES indices(index_short_id), + offset INTEGER NOT NULL, + size INTEGER NOT NULL, + PRIMARY KEY (chunk_id, bloblog_short_id, index_short_id) +); +CREATE INDEX blobs_bloblog_short_id ON blobs(bloblog_short_id); +CREATE INDEX blobs_index_short_id ON blobs(index_short_id); + +CREATE TABLE bloblogs ( + bloblog_short_id INTEGER PRIMARY KEY NOT NULL, + bloblog_sha256 TEXT NOT NULL +); +CREATE UNIQUE INDEX bloblogs_bloblog_sha256 ON bloblogs(bloblog_sha256); + +CREATE TABLE indices_supersede ( + superseded_sha256 TEXT NOT NULL, + successor_sha256 TEXT NOT NULL REFERENCES indices(index_sha256), + PRIMARY KEY (superseded_sha256, successor_sha256) +); \ No newline at end of file diff --git a/yama_localcache/src/lib.rs b/yama_localcache/src/lib.rs new file mode 100644 index 0000000..50c8c14 --- /dev/null +++ b/yama_localcache/src/lib.rs @@ -0,0 +1,335 @@ +use eyre::Context; +use itertools::Itertools; +use sqlx::pool::PoolConnection; +use sqlx::sqlite::{ + SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteRow, SqliteSynchronous, +}; +use sqlx::{query, Connection, Row, Sqlite, SqlitePool}; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::definitions::{BlobLocator, BloblogId, Index, IndexId}; + +#[derive(Clone)] +pub struct Store { + pool: Arc, + writer_semaphore: Arc, +} + +pub struct StoreConnection { + /// The underlying 'connection'. + conn: PoolConnection, + + /// Permit to write. Only here so that it is dropped at the correct time. + #[allow(dead_code)] + writer_permit: Option, +} + +const MAX_SQLITE_CONNECTIONS: u32 = 16; + +impl Store { + pub async fn new(path: &Path) -> eyre::Result { + let pool = SqlitePoolOptions::new() + .max_connections(MAX_SQLITE_CONNECTIONS) + .connect_with( + SqliteConnectOptions::new() + .create_if_missing(true) + .journal_mode(SqliteJournalMode::Wal) + .synchronous(SqliteSynchronous::Normal) + .foreign_keys(true) + .filename(path), + ) + .await?; + + let store = Store { + pool: Arc::new(pool), + writer_semaphore: Arc::new(Semaphore::new(1)), + }; + + let mut conn = store.pool.acquire().await?; + + // This will run the necessary migrations. + sqlx::migrate!("./migrations").run(&mut conn).await?; + + Ok(store) + } + + async fn txn(&self) -> eyre::Result> { + let writer_permit = if RW { + Some(self.writer_semaphore.clone().acquire_owned().await?) + } else { + None + }; + + let conn = self.pool.acquire().await?; + + Ok(StoreConnection { + conn, + writer_permit, + }) + } + + pub async fn read(&self) -> eyre::Result> { + self.txn().await + } + + pub async fn write(&self) -> eyre::Result> { + self.txn().await + } +} + +impl StoreConnection { + /// Store an index into the local index cache. + /// If the index supersedes any others, then those supersedings are stored and the blob entries + /// for the superseded indices are removed. + /// + /// Returns true iff the index was new. + pub async fn apply_index( + &mut self, + index_id: IndexId, + index: Arc, + ) -> eyre::Result { + let index_id_txt = index_id.to_string(); + self.conn.transaction(|txn| Box::pin(async move { + let needs_index = query!(" + SELECT 1 AS one FROM indices WHERE index_sha256 = ? + ", index_id_txt).fetch_optional(&mut *txn).await?.is_none(); + + if !needs_index { + return Ok(false); + } + + let index_short_id = query!(" + INSERT INTO indices (index_sha256) + VALUES (?) + RETURNING index_short_id + ", index_id_txt).fetch_one(&mut *txn).await?.index_short_id; + + for supersede in &index.supersedes { + let supersede_txt = supersede.to_string(); + query!(" + INSERT INTO indices_supersede (superseded_sha256, successor_sha256) + VALUES (?, ?) + ", supersede_txt, index_id_txt).execute(&mut *txn).await?; + + if let Some(row) = query!(" + SELECT index_short_id FROM indices WHERE index_sha256 = ? + ", supersede_txt).fetch_optional(&mut *txn).await? { + // Clear out any chunk entries for the superseded indices. + // This ensures we don't rely on them in the future and also clears up space. + query!(" + DELETE FROM blobs WHERE index_short_id = ? + ", row.index_short_id).execute(&mut *txn).await?; + } + } + + // Check that the index hasn't already been superseded, before adding blobs + let is_superseded = query!(" + SELECT 1 as _yes FROM indices_supersede WHERE superseded_sha256 = ?", + index_id_txt + ).fetch_optional(&mut *txn).await?.is_some(); + + if !is_superseded { + for (bloblog_sha256, index_bloblog_entry) in &index.bloblogs { + let bloblog_sha256_txt = bloblog_sha256.to_string(); + let bloblog_short_id_opt = query!(" + SELECT bloblog_short_id FROM bloblogs WHERE bloblog_sha256 = ? + ", bloblog_sha256_txt).fetch_optional(&mut *txn).await?; + + let bloblog_short_id = match bloblog_short_id_opt { + None => { + query!(" + INSERT INTO bloblogs (bloblog_sha256) + VALUES (?) + RETURNING bloblog_short_id + ", bloblog_sha256_txt).fetch_one(&mut *txn).await?.bloblog_short_id + }, + Some(row) => row.bloblog_short_id, + }; + + + for (chunk_id, chunk_locator) in index_bloblog_entry.chunks.iter() { + let chunk_id_txt = chunk_id.to_string(); + let coffset = chunk_locator.offset as i64; + let clen = chunk_locator.length as i64; + query!(" + INSERT INTO blobs (index_short_id, bloblog_short_id, chunk_id, offset, size) + VALUES (?, ?, ?, ?, ?) + ", index_short_id, bloblog_short_id, chunk_id_txt, coffset, clen).execute(&mut *txn).await?; + } + } + } + + Ok(true) + })).await + } + + /// Delete an index from the cache, if the cache was deleted from the pile. + pub async fn delete_index(&mut self, index_id: IndexId) -> eyre::Result<()> { + self.conn + .transaction(|txn| { + Box::pin(async move { + let index_id_txt = index_id.to_string(); + query!( + " + DELETE FROM indices_supersede WHERE successor_sha256 = ? + ", + index_id_txt + ) + .execute(&mut *txn) + .await?; + + let index_short_id = query!( + " + SELECT index_short_id FROM indices WHERE index_sha256 = ? + ", + index_id_txt + ) + .fetch_one(&mut *txn) + .await? + .index_short_id; + + query!( + " + DELETE FROM blobs WHERE index_short_id = ? + ", + index_short_id + ) + .execute(&mut *txn) + .await?; + + query!( + " + DELETE FROM indices WHERE index_short_id = ? + ", + index_short_id + ) + .execute(&mut *txn) + .await?; + Ok::<_, eyre::Report>(()) + }) + }) + .await?; + + Ok(()) + } +} + +impl StoreConnection { + pub async fn locate_chunk( + &mut self, + chunk_id: ChunkId, + ) -> eyre::Result> { + let chunk_id_text = chunk_id.to_string(); + let row_opt = query!( + " + SELECT bl.bloblog_sha256, b.offset, b.size + FROM blobs b + JOIN bloblogs bl USING (bloblog_short_id) + WHERE b.chunk_id = ? + LIMIT 1 + ", + chunk_id_text + ) + .fetch_optional(&mut *self.conn) + .await?; + + match row_opt { + None => Ok(None), + Some(row) => { + let bloblog_id = + BloblogId::try_from(row.bloblog_sha256.as_str()).with_context(|| { + format!("failed to decode bloblog ID: {:?}", row.bloblog_sha256) + })?; + Ok(Some(( + bloblog_id, + BlobLocator { + offset: row.offset.try_into().context("offset too big")?, + length: row.size.try_into().context("size too big")?, + }, + ))) + } + } + } + + pub async fn locate_chunks( + &mut self, + chunk_ids: &BTreeSet, + ) -> eyre::Result> { + let mut out = BTreeMap::new(); + for batch in &chunk_ids.iter().chunks(64) { + let chunk_id_texts: Vec = batch.map(|ci| ci.to_string()).collect(); + let query_param_str = format!("({})", &",?".repeat(chunk_id_texts.len())[1..]); + let sql = format!( + " + SELECT b.chunk_id, bl.bloblog_sha256, b.offset, b.size + FROM blobs b + JOIN bloblogs bl USING (bloblog_short_id) + WHERE b.chunk_id IN {query_param_str} + " + ); + + let mut q = query(&sql); + for chunk_id in &chunk_id_texts { + q = q.bind(chunk_id); + } + let rows = q + .map(|row: SqliteRow| { + Ok::<_, eyre::Report>(( + ChunkId::from_str(row.get::<&str, _>(0))?, + BloblogId::try_from(row.get::<&str, _>(1))?, + row.get::(2), + row.get::(3), + )) + }) + .fetch_all(&mut *self.conn) + .await?; + for row in rows { + let (chunk_id, bloblog_id, offset, size) = row?; + out.insert( + chunk_id, + ( + bloblog_id, + BlobLocator { + offset: offset as u64, + length: size as u64, + }, + ), + ); + } + } + Ok(out) + } + + pub async fn list_indices(&mut self) -> eyre::Result> { + let row_results = query!( + " + SELECT index_sha256 FROM indices + " + ) + .map(|row| { + IndexId::try_from(row.index_sha256.as_ref()) + .context("failed to decode IndexId in local cache") + }) + .fetch_all(&mut *self.conn) + .await?; + row_results.into_iter().collect() + } + + pub async fn is_chunk_new(&mut self, chunk_id: ChunkId) -> eyre::Result { + let chunk_id_text = chunk_id.to_string(); + let is_new = query!( + " + SELECT 1 AS _yes FROM blobs WHERE chunk_id = ? + ", + chunk_id_text + ) + .fetch_optional(&mut *self.conn) + .await? + .is_none(); + Ok(is_new) + } +} diff --git a/yama_midlevel_crypto/Cargo.toml b/yama_midlevel_crypto/Cargo.toml new file mode 100644 index 0000000..02c3983 --- /dev/null +++ b/yama_midlevel_crypto/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "yama_midlevel_crypto" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = { version = "1.0.159", features = ["derive"] } +ciborium = "0.2.0" +blake3 = "1.3.3" + +# Unauthenticated symmetric seekable stream constructions +chacha20 = "0.9.1" + +x25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "static_secrets"] } +poly1305 = "0.8.0" +ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } + +# Hybrid quantum-resistant asymmetric 'key encapsulation' mechanisms +pqc_kyber = { version = "0.5.0", features = ["kyber1024"] } +#alkali = "0.3.0" + +rand = "0.8.5" + + +eyre = "0.6.8" + +# 0.12.3+zstd.1.5.2 +zstd = "0.12.3" + +hex = "0.4.3" + +argon2 = { version = "0.4.1", default-features = false, features = ["alloc", "std"] } + diff --git a/yama_midlevel_crypto/src/asym_box.rs b/yama_midlevel_crypto/src/asym_box.rs new file mode 100644 index 0000000..3dce5fe --- /dev/null +++ b/yama_midlevel_crypto/src/asym_box.rs @@ -0,0 +1,92 @@ +use ed25519_dalek::SIGNATURE_LENGTH; +use serde::{Deserialize, Serialize}; +use std::marker::PhantomData; + +use crate::asym_keyx::{AsymKeyExchange, DecryptingKey, EncryptingKey, KEY_EXCHANGE_LENGTH}; +use crate::asym_signed::{SignedBytes, SigningKey, VerifyingKey}; +use crate::byte_layer::ByteLayer; +use crate::sym_box::{SymBox, SymKey}; + +/// A locked box storing something using asymmetric cryptography. +/// +/// For key encapsulation: x25519 and kyber (quantum-resistant) +/// For signing: ed25519 (not quantum-resistant) +/// +#[derive(Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct AsymBox { + payload: SignedBytes, + #[serde(skip, default)] + _phantom: PhantomData, +} + +/// A public key, needed to lock an AsymBox or verify the signature when unlocking an AsymBox. +#[derive(Clone, Serialize, Deserialize)] +pub struct AsymPublicKey { + verify: VerifyingKey, + encrypt: EncryptingKey, +} + +/// A private key, needed to open an AsymBox or to sign an AsymBox that is being locked. +#[derive(Clone, Serialize, Deserialize)] +pub struct AsymPrivateKey { + sign: SigningKey, + decrypt: DecryptingKey, +} + +impl ByteLayer for AsymBox { + fn from_byte_vec(bytes: Vec) -> Self { + Self { + payload: SignedBytes::from_bytes_vec_assumed(bytes), + _phantom: Default::default(), + } + } + + fn into_byte_vec(self) -> Vec { + self.payload.into_bytes_vec() + } +} + +impl AsymBox { + // TODO error + pub fn unlock( + self, + receiver_decrypt_key: &DecryptingKey, + sender_verify_key: &VerifyingKey, + ) -> Option { + let verified_payload = self.payload.into_verified(sender_verify_key)?; + + let cutoff = verified_payload.len() - KEY_EXCHANGE_LENGTH; + + let key_exchanger_bytes = &verified_payload[cutoff..]; + let key_exchanger = AsymKeyExchange::load_from_bytes(key_exchanger_bytes) + .expect("can't load AsymKeyExchange"); + let exchanged = key_exchanger.unlock(receiver_decrypt_key)?; + let symkey = SymKey::from(exchanged); + + let symbox: SymBox = SymBox::new_from_raw(&verified_payload[0..cutoff]); + symbox.unlock(&symkey) + } +} + +impl AsymBox { + // TODO error + pub fn new( + contents: T, + sender_signing_key: &SigningKey, + receiver_encrypt_key: &EncryptingKey, + ) -> Option { + let (key_exchanger, exchanged) = AsymKeyExchange::lock(receiver_encrypt_key); + let symkey = SymKey::from(exchanged); + + let mut signed_bytes = SymBox::new(contents, &symkey)?.into_vec(); + signed_bytes.reserve(KEY_EXCHANGE_LENGTH + SIGNATURE_LENGTH); + signed_bytes.extend_from_slice(key_exchanger.as_bytes()); + + let signed = SignedBytes::new(signed_bytes, sender_signing_key); + Some(Self { + payload: signed, + _phantom: Default::default(), + }) + } +} diff --git a/yama_midlevel_crypto/src/asym_keyx.rs b/yama_midlevel_crypto/src/asym_keyx.rs new file mode 100644 index 0000000..9fdbbbc --- /dev/null +++ b/yama_midlevel_crypto/src/asym_keyx.rs @@ -0,0 +1,149 @@ +use crate::keys_kyber::{kyber_keypair, KyberPrivateKey, KyberPublicKey}; +use crate::keys_x25519::{x25519_keypair, X25519PrivateKey, X25519PublicKey}; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::io::Read; + +// x25519 ephemeral pubkey (32) + kyber (1568) +pub const KEY_EXCHANGE_LENGTH: usize = 32 + 1568; + +#[derive(Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct AsymKeyExchange<'bytes> { + inner: Cow<'bytes, [u8]>, +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct EncryptingKey { + x25519: X25519PublicKey, + kyber: KyberPublicKey, +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct DecryptingKey { + x25519: X25519PrivateKey, + kyber: KyberPrivateKey, + + x25519_pub: X25519PublicKey, + kyber_pub: KyberPublicKey, +} + +pub fn generate_asym_keypair() -> (EncryptingKey, DecryptingKey) { + let (x25519_pub, x25519_priv) = x25519_keypair(); + let (kyber_pub, kyber_priv) = kyber_keypair(); + + ( + EncryptingKey { + x25519: x25519_pub.clone(), + kyber: kyber_pub.clone(), + }, + DecryptingKey { + x25519: x25519_priv, + kyber: kyber_priv, + x25519_pub, + kyber_pub, + }, + ) +} + +pub struct Exchanged(pub(crate) [u8; 64]); + +impl<'bytes> AsymKeyExchange<'bytes> { + pub fn load_from_bytes(bytes: &'bytes [u8]) -> Option { + if bytes.len() != KEY_EXCHANGE_LENGTH { + return None; + } + Some(Self { + inner: Cow::Borrowed(&bytes), + }) + } + + pub fn as_bytes(&self) -> &[u8] { + self.inner.as_ref() + } + + pub fn lock(ek: &EncryptingKey) -> (AsymKeyExchange, Exchanged) { + let mut public_bytes = Vec::with_capacity(KEY_EXCHANGE_LENGTH); + + let mut rand = rand::thread_rng(); + + // X25519 + let ephemeral_privkey = x25519_dalek::EphemeralSecret::random_from_rng(&mut rand); + let ephemeral_pubkey = x25519_dalek::PublicKey::from(&ephemeral_privkey); + public_bytes.extend_from_slice(ephemeral_pubkey.as_bytes()); + let shared_secret_x25519 = ephemeral_privkey.diffie_hellman(&ek.x25519.inner); + + // Kyber + let kyber = ek.kyber.encapsulate(&mut rand); + public_bytes.extend_from_slice(&kyber.public_bytes); + + assert_eq!(public_bytes.len(), KEY_EXCHANGE_LENGTH); + + let exchanged = Self::perform_exchange( + &public_bytes, + &ek.x25519, + &ek.kyber, + shared_secret_x25519.as_bytes(), + &kyber.shared_secret, + ); + + ( + AsymKeyExchange { + inner: Cow::Owned(public_bytes), + }, + exchanged, + ) + } + + fn perform_exchange( + public_bytes: &[u8], + rx_x25519_pub: &X25519PublicKey, + rx_kyber_pub: &KyberPublicKey, + x25519_ss: &[u8; 32], + kyber_ss: &[u8; 32], + ) -> Exchanged { + assert_eq!(public_bytes.len(), KEY_EXCHANGE_LENGTH); + + let mut hasher = blake3::Hasher::new_derive_key("yama AsymKeyExchange"); + // Includes the pubkeys of the writer + hasher.update(public_bytes); + + // Include the pubkeys of the receiver + hasher.update(rx_x25519_pub.as_bytes()); + hasher.update(rx_kyber_pub.as_bytes()); + + // Include what was exchanged + hasher.update(x25519_ss); + hasher.update(kyber_ss); + + let mut exchanged_bytes = [0u8; 64]; + let mut out = hasher.finalize_xof(); + out.read_exact(&mut exchanged_bytes) + .expect("failed to read 64b from blake3"); + + Exchanged(exchanged_bytes) + } + + pub fn unlock(&self, dk: &DecryptingKey) -> Option { + if self.inner.len() != KEY_EXCHANGE_LENGTH { + return None; + } + + // X25519 + let ephemeral_x25519_pubkey_bytes: &[u8; 32] = &self.inner[0..32].try_into().unwrap(); + let ephemeral_x25519_pubkey = x25519_dalek::PublicKey::from(*ephemeral_x25519_pubkey_bytes); + let shared_secret_x25519 = dk.x25519.inner.diffie_hellman(&ephemeral_x25519_pubkey); + + // Kyber + let kyber_ciphertext_bytes: &[u8; 1568] = &self.inner[32..].try_into().unwrap(); + let kyber = dk.kyber.decapsulate(&kyber_ciphertext_bytes); + + Some(Self::perform_exchange( + &self.inner, + &dk.x25519_pub, + &dk.kyber_pub, + shared_secret_x25519.as_bytes(), + &kyber, + )) + } +} diff --git a/yama_midlevel_crypto/src/asym_signed.rs b/yama_midlevel_crypto/src/asym_signed.rs new file mode 100644 index 0000000..9b6c7f8 --- /dev/null +++ b/yama_midlevel_crypto/src/asym_signed.rs @@ -0,0 +1,126 @@ +use serde::de::Error as DeError; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +use ed25519_dalek::{ + Signature, Signer, SigningKey as Ed25519PrivateKey, Verifier, VerifyingKey as Ed25519PublicKey, +}; + +pub use ed25519_dalek::SIGNATURE_LENGTH; +use rand::thread_rng; + +#[derive(Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct SignedBytes { + inner: Vec, +} + +#[derive(Clone)] +pub struct SigningKey { + ed25519: Ed25519PrivateKey, +} + +#[derive(Clone)] +pub struct VerifyingKey { + ed25519: Ed25519PublicKey, +} + +pub fn asym_signing_keypair() -> (SigningKey, VerifyingKey) { + let mut rng = thread_rng(); + let keypair = ed25519_dalek::SigningKey::generate(&mut rng); + + ( + SigningKey { + ed25519: keypair.clone(), + }, + VerifyingKey { + ed25519: keypair.verifying_key(), + }, + ) +} + +impl Serialize for SigningKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let ed25519 = self.ed25519.to_bytes(); + <[u8]>::serialize(&ed25519, serializer) + } +} + +impl<'d> Deserialize<'d> for SigningKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Vec::::deserialize(deserializer)?; + + let mut ed25519 = [0u8; 32]; + if bytes.len() != ed25519.len() { + return Err(D::Error::custom("wrong length of ed25519 key")); + } + ed25519.copy_from_slice(&bytes); + + Ok(SigningKey { + ed25519: Ed25519PrivateKey::from_bytes(&ed25519), + }) + } +} + +impl Serialize for VerifyingKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + (self.ed25519.as_bytes() as &[u8]).serialize(serializer) + } +} + +impl<'d> Deserialize<'d> for VerifyingKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Vec::::deserialize(deserializer)?; + + let mut ed25519 = [0u8; 32]; + if bytes.len() != ed25519.len() { + return Err(D::Error::custom("wrong length of ed25519 key")); + } + ed25519.copy_from_slice(&bytes); + + Ok(VerifyingKey { + ed25519: Ed25519PublicKey::from_bytes(&ed25519).map_err(D::Error::custom)?, + }) + } +} + +impl SignedBytes { + pub fn new(mut bytes: Vec, sign_with: &SigningKey) -> SignedBytes { + let signature = sign_with.ed25519.sign(&bytes); + let sig = signature.to_bytes(); + assert_eq!(sig.len(), SIGNATURE_LENGTH); + + bytes.extend(sig); + SignedBytes { inner: bytes } + } + + pub fn into_verified(mut self, verify_with: &VerifyingKey) -> Option> { + if self.inner.len() < SIGNATURE_LENGTH { + return None; + } + let (payload, sig) = self.inner.split_at(self.inner.len() - SIGNATURE_LENGTH); + let sig = Signature::from_bytes(sig.try_into().expect("wrong split")); + verify_with.ed25519.verify(&payload, &sig).ok()?; + self.inner.drain(self.inner.len() - SIGNATURE_LENGTH..); + Some(self.inner) + } + + pub fn into_bytes_vec(self) -> Vec { + self.inner + } + + pub fn from_bytes_vec_assumed(inner: Vec) -> Self { + Self { inner } + } +} diff --git a/yama_midlevel_crypto/src/byte_layer.rs b/yama_midlevel_crypto/src/byte_layer.rs new file mode 100644 index 0000000..45bd651 --- /dev/null +++ b/yama_midlevel_crypto/src/byte_layer.rs @@ -0,0 +1,55 @@ +use serde::de::DeserializeOwned; +use serde::Serialize; +use std::marker::PhantomData; + +/// Trait to help layering byte transformers together. +pub trait ByteLayer { + fn from_byte_vec(bytes: Vec) -> Self; + fn into_byte_vec(self) -> Vec; +} + +#[derive(Clone)] +pub struct CborSerde { + bytes: Vec, + marker: PhantomData, +} + +impl ByteLayer for CborSerde { + fn from_byte_vec(bytes: Vec) -> Self { + CborSerde { + bytes, + marker: PhantomData::default(), + } + } + + fn into_byte_vec(self) -> Vec { + self.bytes + } +} + +impl CborSerde { + pub fn serialise(from: &T) -> eyre::Result { + let mut bytes = Vec::new(); + ciborium::ser::into_writer(from, &mut bytes)?; + Ok(CborSerde { + bytes, + marker: Default::default(), + }) + } +} + +impl CborSerde { + pub fn deserialise(&self) -> eyre::Result { + Ok(ciborium::de::from_reader(&self.bytes[..])?) + } +} + +impl ByteLayer for Vec { + fn from_byte_vec(bytes: Vec) -> Self { + bytes + } + + fn into_byte_vec(self) -> Vec { + self + } +} diff --git a/yama_midlevel_crypto/src/chunk_id.rs b/yama_midlevel_crypto/src/chunk_id.rs new file mode 100644 index 0000000..26de31d --- /dev/null +++ b/yama_midlevel_crypto/src/chunk_id.rs @@ -0,0 +1,91 @@ +use eyre::bail; +use rand::{thread_rng, Rng}; +use serde::{Deserialize, Serialize}; +use std::fmt::{Debug, Formatter}; +use std::str::FromStr; + +/// The ID of a chunk. It's a 256-bit BLAKE3 hash. +#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)] +#[repr(transparent)] +#[serde(transparent)] +pub struct ChunkId { + blake3: [u8; 32], +} + +impl Debug for ChunkId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for &byte in self.blake3.iter() { + write!(f, "{:02x}", byte)?; + } + Ok(()) + } +} + +impl ToString for ChunkId { + fn to_string(&self) -> String { + hex::encode(&self.blake3) + } +} + +impl From<[u8; 32]> for ChunkId { + fn from(bytes: [u8; 32]) -> Self { + ChunkId { blake3: bytes } + } +} + +impl FromStr for ChunkId { + type Err = eyre::Report; + + fn from_str(s: &str) -> Result { + if s.len() != 64 { + bail!("chunk ID of wrong length"); + } + let decoded = hex::decode(s)?; + let mut new = ChunkId { + blake3: Default::default(), + }; + new.blake3.copy_from_slice(&decoded); + Ok(new) + } +} + +impl ChunkId { + pub fn to_bytes(self) -> [u8; 32] { + self.blake3 + } +} + +/// Key needed to create and verify chunk IDs. It's a 256-bit key for the BLAKE3 keyed hash function. +#[derive(Copy, Clone, Serialize, Deserialize)] +pub struct ChunkIdKey { + key: [u8; 32], +} + +impl ChunkIdKey { + pub fn new_rand() -> ChunkIdKey { + let mut rng = thread_rng(); + ChunkIdKey { key: rng.gen() } + } +} + +impl Debug for ChunkIdKey { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // for privacy, don't leak the contents of the key + write!(f, "ChunkIdKey(...)") + } +} + +impl ChunkId { + /// Computes a chunk ID given the input and key. + pub fn compute(input: &[u8], key: &ChunkIdKey) -> ChunkId { + ChunkId { + blake3: blake3::keyed_hash(&key.key, input).into(), + } + } + + /// Returns true iff this Chunk ID is correct for the given input and key. + pub fn verify(&self, input: &[u8], key: &ChunkIdKey) -> bool { + let comparison = Self::compute(input, key); + self == &comparison + } +} diff --git a/yama_midlevel_crypto/src/key_derivation.rs b/yama_midlevel_crypto/src/key_derivation.rs new file mode 100644 index 0000000..c8dbf1f --- /dev/null +++ b/yama_midlevel_crypto/src/key_derivation.rs @@ -0,0 +1,72 @@ +use crate::sym_box::SymKey; +use argon2::Algorithm::Argon2id; +use argon2::{Argon2, Version}; +use eyre::{bail, Context}; +use rand::{thread_rng, Rng}; +use serde::{Deserialize, Serialize}; + +/// 1 GiB. Intended to prevent maliciously large memory costs; not sure if that's a real risk. +pub const MAX_MEMORY_COST_KIBIBYTES: u32 = 1048576; + +/// 512 MiB +pub const DEFAULT_MEMORY_COST_KIBIBYTES: u32 = 524288; + +pub const DEFAULT_LANES: u32 = 1; +pub const DEFAULT_ITERATIONS: u32 = 256; + +#[derive(Clone, Serialize, Deserialize)] +pub struct KeyDerivationParameters { + salt: [u8; 32], + iterations: u32, + memory_kibibytes: u32, + lanes: u32, +} + +impl KeyDerivationParameters { + pub fn new_recommended() -> KeyDerivationParameters { + let mut rng = thread_rng(); + KeyDerivationParameters { + salt: rng.gen(), + iterations: DEFAULT_ITERATIONS, + memory_kibibytes: DEFAULT_MEMORY_COST_KIBIBYTES, + lanes: DEFAULT_LANES, + } + } + + pub fn derive(&self, password: &str) -> eyre::Result { + if self.memory_kibibytes > MAX_MEMORY_COST_KIBIBYTES { + bail!( + "Too much memory needed for key derivation! {} > {}", + self.memory_kibibytes, + MAX_MEMORY_COST_KIBIBYTES + ); + } + + let mut params = argon2::ParamsBuilder::new(); + params + .m_cost(self.memory_kibibytes) + .unwrap() + .p_cost(self.lanes) + .unwrap() + .t_cost(self.iterations) + .unwrap() + .output_len(64) + .unwrap(); + let params = params.params().unwrap(); + let argon = Argon2::new(Argon2id, Version::V0x13, params.clone()); + let mut derived_key = DerivedKey([0; 64]); + argon + .hash_password_into(password.as_bytes(), &self.salt, &mut derived_key.0) + .context("failed to hash password")?; + + Ok(derived_key) + } +} + +pub struct DerivedKey(pub(crate) [u8; 64]); + +impl DerivedKey { + pub fn into_symkey(self) -> SymKey { + SymKey::from(self) + } +} diff --git a/yama_midlevel_crypto/src/keys_kyber.rs b/yama_midlevel_crypto/src/keys_kyber.rs new file mode 100644 index 0000000..d238a33 --- /dev/null +++ b/yama_midlevel_crypto/src/keys_kyber.rs @@ -0,0 +1,109 @@ +use pqc_kyber::CryptoRng; +use rand::{thread_rng, RngCore}; +use serde::de::Error; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; + +#[derive(Clone)] +pub struct KyberPublicKey { + inner: pqc_kyber::PublicKey, +} + +#[derive(Clone)] +pub struct KyberPrivateKey { + inner: pqc_kyber::SecretKey, +} + +pub fn kyber_keypair() -> (KyberPublicKey, KyberPrivateKey) { + let mut rng = thread_rng(); + let kyber_pair = pqc_kyber::keypair(&mut rng); + + ( + KyberPublicKey { + inner: kyber_pair.public, + }, + KyberPrivateKey { + inner: kyber_pair.secret, + }, + ) +} + +impl Serialize for KyberPublicKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + <[u8]>::serialize(&self.inner, serializer) + } +} + +impl<'d> Deserialize<'d> for KyberPublicKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?; + + let mut kyber: pqc_kyber::PublicKey = [0; 1568]; + if kyber.len() != bytes.len() { + return Err(D::Error::custom("wrong length of kyber key")); + } + kyber.copy_from_slice(&bytes); + + Ok(KyberPublicKey { inner: kyber }) + } +} + +impl KyberPublicKey { + pub fn as_bytes(&self) -> &[u8; 1568] { + &self.inner + } +} + +impl Serialize for KyberPrivateKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + <[u8]>::serialize(&self.inner, serializer) + } +} + +impl<'d> Deserialize<'d> for KyberPrivateKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?; + + let mut kyber: pqc_kyber::SecretKey = [0; 3168]; + if kyber.len() != bytes.len() { + return Err(D::Error::custom("wrong length of kyber key")); + } + kyber.copy_from_slice(&bytes); + + Ok(KyberPrivateKey { inner: kyber }) + } +} + +pub struct KyberEncapsulation { + pub shared_secret: pqc_kyber::SharedSecret, + pub public_bytes: [u8; pqc_kyber::KYBER_CIPHERTEXTBYTES], +} + +impl KyberPublicKey { + pub fn encapsulate(&self, rng: &mut T) -> KyberEncapsulation { + let (public_bytes, shared_secret) = + pqc_kyber::encapsulate(&self.inner, rng).expect("bad kyber encapsulation"); + KyberEncapsulation { + shared_secret, + public_bytes, + } + } +} + +impl KyberPrivateKey { + pub fn decapsulate(&self, ciphertext: &[u8; 1568]) -> pqc_kyber::SharedSecret { + pqc_kyber::decapsulate(ciphertext, &self.inner).expect("bad kyber decapsulation") + } +} diff --git a/yama_midlevel_crypto/src/keys_x25519.rs b/yama_midlevel_crypto/src/keys_x25519.rs new file mode 100644 index 0000000..cdb98c5 --- /dev/null +++ b/yama_midlevel_crypto/src/keys_x25519.rs @@ -0,0 +1,76 @@ +use rand::thread_rng; +use serde::de::Error as _DeError; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::borrow::Cow; + +#[derive(Clone)] +#[repr(transparent)] +pub struct X25519PrivateKey { + pub(crate) inner: x25519_dalek::StaticSecret, +} + +#[derive(Clone)] +#[repr(transparent)] +pub struct X25519PublicKey { + pub(crate) inner: x25519_dalek::PublicKey, +} + +pub fn x25519_keypair() -> (X25519PublicKey, X25519PrivateKey) { + let mut rng = thread_rng(); + let x25519_priv = x25519_dalek::StaticSecret::random_from_rng(&mut rng); + let x25519_pub = x25519_dalek::PublicKey::from(&x25519_priv); + ( + X25519PublicKey { inner: x25519_pub }, + X25519PrivateKey { inner: x25519_priv }, + ) +} + +impl Serialize for X25519PrivateKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + <[u8]>::serialize(self.inner.as_bytes(), serializer) + } +} + +impl<'d> Deserialize<'d> for X25519PrivateKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?; + let counted_bytes: [u8; 32] = bytes.as_ref().try_into().map_err(D::Error::custom)?; + Ok(X25519PrivateKey { + inner: x25519_dalek::StaticSecret::from(counted_bytes), + }) + } +} + +impl X25519PublicKey { + pub fn as_bytes(&self) -> &[u8; 32] { + self.inner.as_bytes() + } +} + +impl Serialize for X25519PublicKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + <[u8]>::serialize(self.inner.as_bytes(), serializer) + } +} + +impl<'d> Deserialize<'d> for X25519PublicKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'d>, + { + let bytes = Cow::<'d, [u8]>::deserialize(deserializer)?; + let counted_bytes: [u8; 32] = bytes.as_ref().try_into().map_err(D::Error::custom)?; + Ok(X25519PublicKey { + inner: x25519_dalek::PublicKey::from(counted_bytes), + }) + } +} diff --git a/yama_midlevel_crypto/src/lib.rs b/yama_midlevel_crypto/src/lib.rs new file mode 100644 index 0000000..5ef502a --- /dev/null +++ b/yama_midlevel_crypto/src/lib.rs @@ -0,0 +1,17 @@ +pub mod asym_box; +pub mod asym_keyx; +pub mod asym_signed; + +pub mod keys_kyber; +pub mod keys_x25519; + +pub mod sym_box; +pub mod sym_stream; + +pub mod chunk_id; + +pub mod zstd_box; + +pub mod byte_layer; + +pub mod key_derivation; diff --git a/yama_midlevel_crypto/src/sym_box.rs b/yama_midlevel_crypto/src/sym_box.rs new file mode 100644 index 0000000..cacca56 --- /dev/null +++ b/yama_midlevel_crypto/src/sym_box.rs @@ -0,0 +1,142 @@ +use crate::asym_keyx::Exchanged; +use crate::byte_layer::ByteLayer; +use crate::key_derivation::DerivedKey; +use chacha20::cipher::{KeyIvInit, StreamCipher}; +use chacha20::XChaCha20; +use poly1305::universal_hash::KeyInit; +use poly1305::Poly1305; +use rand::Rng; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::marker::PhantomData; + +pub const SYMBOX_NONCE_LENGTH: usize = 24; +pub const SYMBOX_MAC_LENGTH: usize = 16; +pub const SYMBOX_FOOTER_LENGTH: usize = SYMBOX_MAC_LENGTH + SYMBOX_NONCE_LENGTH; + +#[derive(Clone, Serialize, Deserialize)] +#[serde(transparent)] +pub struct SymBox<'bytes, T> { + // payload || nonce || mac + bytes: Cow<'bytes, [u8]>, + + #[serde(skip)] + phantom: PhantomData, +} + +#[derive(Clone)] +pub struct SymKey { + xchacha20: [u8; 32], + poly1305: poly1305::Key, +} + +impl SymKey { + fn from_64_bytes(input: &[u8]) -> Self { + let (xchacha20_bytes, poly1305_bytes) = input.split_at(32); + + SymKey { + xchacha20: xchacha20_bytes.try_into().unwrap(), + poly1305: *poly1305::Key::from_slice(poly1305_bytes), + } + } +} + +impl From for SymKey { + fn from(exchanged: Exchanged) -> Self { + SymKey::from_64_bytes(&exchanged.0) + } +} + +impl From for SymKey { + fn from(derived: DerivedKey) -> Self { + SymKey::from_64_bytes(&derived.0) + } +} + +impl<'bytes, T> SymBox<'bytes, T> { + pub fn as_bytes(&self) -> &[u8] { + self.bytes.as_ref() + } + + pub fn new_from_raw(bytes: &'bytes [u8]) -> Self { + Self { + bytes: Cow::Borrowed(bytes), + phantom: Default::default(), + } + } +} + +impl<'bytes, T> ByteLayer for SymBox<'bytes, T> { + fn from_byte_vec(bytes: Vec) -> Self { + Self { + bytes: Cow::Owned(bytes), + phantom: Default::default(), + } + } + + fn into_byte_vec(self) -> Vec { + self.bytes.into_owned() + } +} + +impl SymBox<'static, T> { + pub fn into_vec(self) -> Vec { + match self.bytes { + Cow::Borrowed(b) => b.to_vec(), + Cow::Owned(o) => o, + } + } +} + +impl<'bytes, T: ByteLayer> SymBox<'bytes, T> { + // TODO error + pub fn unlock(&self, symkey: &SymKey) -> Option { + let blen = self.bytes.len(); + + if blen < SYMBOX_FOOTER_LENGTH { + return None; + } + + let (ciphertext_then_nonce, mac) = self.bytes.split_at(blen - SYMBOX_MAC_LENGTH); + let (ciphertext, nonce) = ciphertext_then_nonce.split_at(blen - SYMBOX_FOOTER_LENGTH); + + let poly1305_mac = Poly1305::new(&symkey.poly1305).compute_unpadded(&ciphertext_then_nonce); + if poly1305_mac.as_slice() != mac { + // TODO Should this pedantically be a constant-time equality check? + // I don't think it matters in any exploitable way for Yama though... + return None; + } + + let mut out_buf = Vec::::with_capacity(ciphertext.len()); + out_buf.extend_from_slice(&ciphertext); + let mut xchacha20: XChaCha20 = XChaCha20::new(&symkey.xchacha20.into(), nonce.into()); + xchacha20.apply_keystream(&mut out_buf); + let decrypted: Vec = out_buf; + + Some(T::from_byte_vec(decrypted)) + } +} + +impl<'bytes, T: ByteLayer> SymBox<'bytes, T> { + // TODO error + pub fn new(contents: T, symkey: &SymKey) -> Option { + let unencrypted = contents.into_byte_vec(); + let mut rng = rand::thread_rng(); + let nonce = rng.gen::<[u8; SYMBOX_NONCE_LENGTH]>(); + + let mut out_buf = Vec::::with_capacity(unencrypted.len() + SYMBOX_FOOTER_LENGTH); + out_buf.extend_from_slice(&unencrypted); + let mut xchacha20: XChaCha20 = XChaCha20::new(&symkey.xchacha20.into(), &nonce.into()); + xchacha20.apply_keystream(&mut out_buf); + + out_buf.extend_from_slice(&nonce); + + let poly1305_mac = Poly1305::new(&symkey.poly1305).compute_unpadded(&out_buf); + out_buf.extend_from_slice(poly1305_mac.as_slice()); + + Some(SymBox { + bytes: Cow::Owned(out_buf), + phantom: Default::default(), + }) + } +} diff --git a/yama_midlevel_crypto/src/sym_stream.rs b/yama_midlevel_crypto/src/sym_stream.rs new file mode 100644 index 0000000..cf4a613 --- /dev/null +++ b/yama_midlevel_crypto/src/sym_stream.rs @@ -0,0 +1,81 @@ +use chacha20::cipher::{KeyIvInit, StreamCipher, StreamCipherSeek}; +use chacha20::XChaCha20; +use rand::{CryptoRng, Rng, RngCore}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::fmt::{Debug, Formatter}; + +/// A symmetric key needed to encrypt or decrypt a symmetric stream. +pub struct SymStreamKey { + /// 256-bit key + key: [u8; 32], + + /// 192-bit nonce for XChaCha20 + nonce: [u8; 24], + + /// Cipher from chacha20 crate. + cipher: XChaCha20, +} + +impl Clone for SymStreamKey { + fn clone(&self) -> Self { + SymStreamKey { + key: self.key, + nonce: self.nonce, + cipher: XChaCha20::new(&self.key.into(), &self.nonce.into()), + } + } +} + +#[derive(Serialize, Deserialize)] +struct SymKeySerialisable { + key: [u8; 32], + nonce: [u8; 24], +} + +impl Serialize for SymStreamKey { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + SymKeySerialisable { + key: self.key, + nonce: self.nonce, + } + .serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for SymStreamKey { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let SymKeySerialisable { key, nonce } = SymKeySerialisable::deserialize(deserializer)?; + Ok(SymStreamKey { + key, + nonce, + cipher: XChaCha20::new(&key.into(), &nonce.into()), + }) + } +} + +impl Debug for SymStreamKey { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "SymKey(...)") + } +} + +impl SymStreamKey { + pub fn new_random(rng: &mut R) -> Self { + let key: [u8; 32] = rng.gen(); + let nonce: [u8; 24] = rng.gen(); + let cipher = XChaCha20::new(&key.into(), &nonce.into()); + Self { key, nonce, cipher } + } + + #[inline] + pub fn apply_xor(&mut self, offset: u64, content: &mut [u8]) { + self.cipher.seek(offset); + self.cipher.apply_keystream(content); + } +} diff --git a/yama_midlevel_crypto/src/zstd_box.rs b/yama_midlevel_crypto/src/zstd_box.rs new file mode 100644 index 0000000..a8eb91c --- /dev/null +++ b/yama_midlevel_crypto/src/zstd_box.rs @@ -0,0 +1,40 @@ +use crate::byte_layer::ByteLayer; +use eyre::Context; +use std::marker::PhantomData; + +pub struct Zstd { + bytes: Vec, + marker: PhantomData, +} + +impl ByteLayer for Zstd { + fn from_byte_vec(bytes: Vec) -> Self { + Self { + bytes, + marker: Default::default(), + } + } + + fn into_byte_vec(self) -> Vec { + self.bytes + } +} + +pub const OUR_DEFAULT_COMPRESSION_LEVEL: i32 = 10; + +impl Zstd { + pub fn compress(src: T) -> Self { + let bytes = zstd::encode_all(&src.into_byte_vec()[..], OUR_DEFAULT_COMPRESSION_LEVEL) + .expect("zstd shouldn't fail to compress"); + Self { + bytes, + marker: Default::default(), + } + } + + pub fn decompress(self) -> eyre::Result { + let decompressed_bytes = + zstd::decode_all(&self.bytes[..]).context("zstd decompression failure")?; + Ok(T::from_byte_vec(decompressed_bytes)) + } +} diff --git a/yama_pile/Cargo.toml b/yama_pile/Cargo.toml new file mode 100644 index 0000000..f0af66e --- /dev/null +++ b/yama_pile/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "yama_pile" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } +yama_wormfile = { path = "../yama_wormfile" } +tracing = "0.1.37" +eyre = "0.6.8" +sha2 = "0.10.6" +rand = "0.8.5" +patricia_tree = "0.5.7" + +hex = "0.4.3" +tokio = { version = "1.27.0", features = ["io-util", "macros"] } +serde = { version = "1.0.159", features = ["derive", "rc"] } +chrono = { version = "0.4.24", features = ["serde"] } + +uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } +unix_mode = "0.1.3" + +backtrace = "0.3.67" \ No newline at end of file diff --git a/yama_pile/src/bloblogs.rs b/yama_pile/src/bloblogs.rs new file mode 100644 index 0000000..5d0405f --- /dev/null +++ b/yama_pile/src/bloblogs.rs @@ -0,0 +1,211 @@ +use crate::definitions::{BlobLocator, BloblogFooter, BloblogId, PackedBloblogFooter}; +use crate::keyring::{Keyring, WriterKey}; +use crate::locks::{LockHandle, LockKind}; +use crate::utils::{HashedWormWriter, SymStreamReader, SymStreamWriter}; +use eyre::{bail, Context, ContextCompat}; +use rand::thread_rng; +use std::collections::BTreeMap; +use std::io::SeekFrom; +use std::sync::Arc; +use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; +use tracing::warn; +use yama_midlevel_crypto::asym_box::AsymBox; +use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_midlevel_crypto::sym_stream::SymStreamKey; +use yama_midlevel_crypto::zstd_box::Zstd; +use yama_wormfile::paths::WormPathBuf; +use yama_wormfile::{WormFileReader, WormFileWriter}; + +/// The recommended bloblog size is 2 GiB. +/// This is below the 4 GiB file size limit on FAT32. +pub const RECOMMENDED_BLOBLOG_SIZE: u64 = 2 * 1024 * 1024 * 1024; + +/// A writer for a bloblog. +pub struct BloblogWriter { + writer: SymStreamWriter>, + footer_write_key: WriterKey, + footer: BloblogFooter, + lock: Arc, + written: u64, +} + +impl BloblogWriter { + /// Creates a bloblog writer. + pub async fn new( + worm_writer: WFW, + keyring: &Keyring, + lock: Arc, + ) -> eyre::Result { + let hashed_writer = HashedWormWriter::new(worm_writer); + let mut rng = thread_rng(); + let secret_content_stream_key = SymStreamKey::new_random(&mut rng); + + let content_stream_key = keyring + .w_bloblog_contents + .as_ref() + .context("No `w_bloblog_contents` key on keyring")? + .make_locked_asymbox(CborSerde::serialise(&secret_content_stream_key)?); + + let footer_write_key = keyring + .w_bloblog_footer + .clone() + .context("No `w_bloblog_footer` key on keyring")?; + + let writer = SymStreamWriter::new(hashed_writer, secret_content_stream_key); + + Ok(Self { + writer, + footer_write_key, + footer: BloblogFooter { + content_stream_key, + chunks: Default::default(), + }, + lock, + written: 0, + }) + } + + /// Adds a chunk to the bloblog. + pub async fn write_chunk(&mut self, chunk_id: ChunkId, chunk: &[u8]) -> eyre::Result<()> { + let locator = BlobLocator { + offset: self.writer.offset(), + length: chunk.len() as u64, + }; + self.writer.write_all(&chunk).await?; + if self.footer.chunks.insert(chunk_id, locator).is_some() { + warn!("Duplicate chunk ID inserted into bloblog: {:?}", chunk_id); + } + self.written += chunk.len() as u64; + Ok(()) + } + + /// Returns true iff this bloblog writer should be finished to be close to the recommended + /// maximum size. + pub fn should_finish(&self) -> bool { + self.written >= RECOMMENDED_BLOBLOG_SIZE + } + + /// Finishes this bloblog. + /// + /// This: + /// - writes the header + /// - flushes + /// - finishes computing the hash of the file + /// - moves the bloblog to the correct place + pub async fn finish( + mut self, + ) -> eyre::Result<(WormPathBuf, BloblogId, BTreeMap)> { + self.writer.flush().await?; + let mut hashed_writer = self.writer.finish(); + + // Write the footer, then the length of the footer. + let packed_footer: PackedBloblogFooter = self + .footer_write_key + .make_locked_asymbox(Zstd::compress(CborSerde::serialise(&self.footer)?)); + let footer_encoded = packed_footer.into_byte_vec(); + hashed_writer.write_all(&footer_encoded).await?; + hashed_writer.write_u32(footer_encoded.len() as u32).await?; + + hashed_writer.flush().await?; + let (mut worm_writer, file_hash) = hashed_writer.finalise(); + + let target_path = WormPathBuf::new(format!( + "bloblogs/{}/{}", + hex::encode(&file_hash.0[0..1]), + file_hash.to_string() + )) + .unwrap(); + + if !self.lock.is_active_now(LockKind::Shared) { + bail!( + "Can't complete finish() on bloblog {:?} because lock expired", + target_path + ); + } + + worm_writer.finalise(target_path.as_ref(), false).await?; + + Ok((target_path, BloblogId(file_hash), self.footer.chunks)) + } +} + +pub struct BloblogReader { + reader: SymStreamReader, + footer: BloblogFooter, +} + +impl BloblogReader { + /// Read the bloblog footer from the file only. + /// This only requires the `r_bloblog_footer` key. + pub async fn read_footer_only( + worm_reader: &mut R, + keyring: &Keyring, + ) -> eyre::Result { + let r_bloblog_footer = keyring + .r_bloblog_footer + .as_ref() + .context("No key `r_bloblog_footer` on keyring")?; + worm_reader.seek(SeekFrom::End(-4)).await?; + let footer_length = worm_reader.read_u32().await?; + + worm_reader + .seek(SeekFrom::End(-4 - footer_length as i64)) + .await?; + let mut footer_enc = vec![0u8; footer_length as usize]; + worm_reader.read_exact(&mut footer_enc).await?; + + let footer_wrapped: PackedBloblogFooter = AsymBox::from_byte_vec(footer_enc); + let footer: BloblogFooter = r_bloblog_footer + .unlock_asymbox(footer_wrapped) + .context("failed to decrypt bloblog footer")? + .decompress()? + .deserialise()?; + Ok(footer) + } + + pub async fn new(mut worm_reader: R, keyring: &Keyring) -> eyre::Result { + let r_bloblog_contents = keyring + .r_bloblog_contents + .clone() + .context("No key `r_bloblog_contents` on keyring")?; + + let footer = Self::read_footer_only(&mut worm_reader, keyring) + .await + .context("failed to read footer")?; + + let stream_key = r_bloblog_contents + .unlock_asymbox(footer.content_stream_key.clone()) + .context("failed to decrypt stream key")? + .deserialise()?; + + let reader = SymStreamReader::new(worm_reader, stream_key); + + Ok(Self { reader, footer }) + } + + pub async fn read_to_buf( + &mut self, + buf: &mut Vec, + offset: u64, + read_length: u64, + ) -> eyre::Result<()> { + // eprintln!("RTB @ {offset} r{read_length}"); + self.reader.seek(SeekFrom::Start(offset)).await?; + buf.resize(read_length as usize, 0); + self.reader.read_exact(buf).await?; + Ok(()) + } + + pub async fn read_chunk(&mut self, chunk_id: ChunkId) -> eyre::Result>> { + match self.footer.chunks.get(&chunk_id) { + Some(chunk_locator) => { + let mut buf = Vec::with_capacity(chunk_locator.length as usize); + self.read_to_buf(&mut buf, chunk_locator.offset, chunk_locator.length) + .await?; + Ok(Some(buf)) + } + None => Ok(None), + } + } +} diff --git a/yama_pile/src/definitions.rs b/yama_pile/src/definitions.rs new file mode 100644 index 0000000..2db35c7 --- /dev/null +++ b/yama_pile/src/definitions.rs @@ -0,0 +1,146 @@ +use crate::keyring::Keyring; +use crate::utils::Sha256; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; +use yama_midlevel_crypto::asym_box::AsymBox; +use yama_midlevel_crypto::byte_layer::CborSerde; +use yama_midlevel_crypto::chunk_id::{ChunkId, ChunkIdKey}; +use yama_midlevel_crypto::key_derivation::KeyDerivationParameters; +use yama_midlevel_crypto::sym_box::SymBox; +use yama_midlevel_crypto::sym_stream::SymStreamKey; +use yama_midlevel_crypto::zstd_box::Zstd; + +/// The footer at the end of a bloblog. +/// This footer should be encrypted and signed. +#[derive(Serialize, Deserialize)] +pub struct BloblogFooter { + /// The key needed to read the content stream. + pub content_stream_key: AsymBox>, + + /// IDs of chunks and whereabouts they are in the bloblog. + pub chunks: BTreeMap, +} + +pub type PackedBloblogFooter = AsymBox>>; + +/// Locator for a blob within a bloblog. +#[derive(Serialize, Deserialize)] +pub struct BlobLocator { + pub offset: u64, + pub length: u64, +} + +#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)] +#[serde(transparent)] +pub struct BloblogId(pub Sha256); + +impl ToString for BloblogId { + fn to_string(&self) -> String { + self.0.to_string() + } +} + +impl TryFrom<&str> for BloblogId { + type Error = eyre::Error; + + fn try_from(value: &str) -> Result { + Sha256::try_from(value).map(BloblogId) + } +} + +impl Debug for BloblogId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "BloblogId({})", &self.0) + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)] +#[serde(transparent)] +pub struct IndexId(pub Sha256); + +impl ToString for IndexId { + fn to_string(&self) -> String { + self.0.to_string() + } +} + +impl TryFrom<&str> for IndexId { + type Error = eyre::Error; + + fn try_from(value: &str) -> Result { + Sha256::try_from(value).map(IndexId) + } +} + +impl Debug for IndexId { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "IndexId({})", &self.0) + } +} + +/// An index. +#[derive(Serialize, Deserialize)] +pub struct Index { + /// What indices this index supersedes. + /// In other words: applying this index means you can ignore the stated indices below. + pub supersedes: BTreeSet, + + /// Index + pub bloblogs: BTreeMap, +} + +/// One bloblog's entry within an index. Must contain an exhaustive list of chunks for that bloblog. +#[derive(Serialize, Deserialize)] +pub struct IndexBloblogEntry { + /// Chunk locators + pub chunks: BTreeMap, + + /// How much space, in bytes, has been deallocated / forgotten about + /// (the cumulative size of chunks that have been removed from the chunk map) + pub forgotten_bytes: u64, +} + +pub type PackedIndex = AsymBox>>; + +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct RecursiveChunkRef { + /// The root Chunk ID. + pub chunk_id: ChunkId, + /// The depth of the data bytes. + /// 0 means that the chunk addressed by `chunk_id` contains data bytes. + /// 1 means that the chunk addressed by `chunk_id` contains references to chunk that contain + /// data bytes. + /// (and so on) + pub depth: u32, +} + +impl Debug for RecursiveChunkRef { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}<{}>", &self.chunk_id, self.depth) + } +} + +#[derive(Clone, Serialize, Deserialize)] +#[serde(tag = "_kind")] +pub enum UnlockedOrLockedKeyring { + Locked { + deriver: KeyDerivationParameters, + lockbox: SymBox<'static, CborSerde>, + }, + Unlocked(Keyring), +} + +pub type PackedKeyring = CborSerde; + +pub const SUPPORTED_YAMA_PILE_VERSION: &'static str = "yama v0.7.0 pile format"; + +#[derive(Clone, Serialize, Deserialize)] +pub struct PileConfig { + pub yama_pile_version: String, + pub chunk_id_key: ChunkIdKey, + pub zstd_dict: Option>>, +} + +pub type PackedPileConfig = AsymBox>; diff --git a/yama_pile/src/keyring.rs b/yama_pile/src/keyring.rs new file mode 100644 index 0000000..0427840 --- /dev/null +++ b/yama_pile/src/keyring.rs @@ -0,0 +1,53 @@ +use serde::{Deserialize, Serialize}; +use yama_midlevel_crypto::asym_box::AsymBox; +use yama_midlevel_crypto::asym_keyx::{generate_asym_keypair, DecryptingKey, EncryptingKey}; +use yama_midlevel_crypto::asym_signed::{asym_signing_keypair, SigningKey, VerifyingKey}; +use yama_midlevel_crypto::byte_layer::ByteLayer; + +#[derive(Clone, Serialize, Deserialize)] +pub struct Keyring { + pub r_config: Option, + pub w_config: Option, + + pub r_bloblog_footer: Option, + pub w_bloblog_footer: Option, + + pub r_bloblog_contents: Option, + pub w_bloblog_contents: Option, + + pub r_locks: Option, + pub w_locks: Option, + + pub r_pointer: Option, + pub w_pointer: Option, +} + +pub fn generate_r_w_keys() -> (ReaderKey, WriterKey) { + let (encrypt, decrypt) = generate_asym_keypair(); + let (sign, verify) = asym_signing_keypair(); + (ReaderKey { decrypt, verify }, WriterKey { encrypt, sign }) +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct WriterKey { + encrypt: EncryptingKey, + sign: SigningKey, +} + +impl WriterKey { + pub fn make_locked_asymbox(&self, contents: T) -> AsymBox { + AsymBox::new(contents, &self.sign, &self.encrypt).unwrap() + } +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct ReaderKey { + decrypt: DecryptingKey, + verify: VerifyingKey, +} + +impl ReaderKey { + pub fn unlock_asymbox(&self, asymbox: AsymBox) -> Option { + asymbox.unlock(&self.decrypt, &self.verify) + } +} diff --git a/yama_pile/src/lib.rs b/yama_pile/src/lib.rs new file mode 100644 index 0000000..e3b3939 --- /dev/null +++ b/yama_pile/src/lib.rs @@ -0,0 +1,249 @@ +use crate::bloblogs::{BloblogReader, BloblogWriter}; +use crate::definitions::{ + BloblogId, Index, IndexId, PackedIndex, PackedPileConfig, PileConfig, + SUPPORTED_YAMA_PILE_VERSION, +}; +use crate::keyring::Keyring; +use crate::locks::{LockHandle, LockKind}; +use crate::pointers::{PackedPointer, Pointer}; +use crate::utils::HashedWormWriter; +use eyre::{bail, Context, ContextCompat}; +use std::collections::BTreeSet; +use std::sync::Arc; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; +use yama_midlevel_crypto::zstd_box::Zstd; +use yama_wormfile::paths::{WormPath, WormPathBuf}; +use yama_wormfile::{WormFileProvider, WormFileWriter}; + +pub mod definitions; + +pub mod bloblogs; +pub mod keyring; +pub mod locks; +pub mod pointers; +pub mod tree; +pub mod utils; + +pub const DIR_LOCKS: &'static str = "locks"; +pub const DIR_BLOBLOGS: &'static str = "bloblogs"; +pub const DIR_INDICES: &'static str = "indices"; +pub const FILE_YAMA_CONFIG: &'static str = "yama.cfg"; +pub const FILE_YAMA_CONNECTOR: &'static str = "yama.toml"; +pub const FILE_MASTER_KEYRING: &'static str = "master.yamakeyring"; + +pub struct Pile { + provider: Arc, + lock: Arc, + keyring: Arc, + pub pile_config: Arc, +} + +impl Pile { + pub async fn open_manual( + provider: Arc, + lock_kind: LockKind, + holder: String, + keyring: Keyring, + ) -> eyre::Result { + let lock = Arc::new(LockHandle::new(provider.clone(), lock_kind, holder, &keyring).await?); + let keyring = Arc::new(keyring); + + let r_config = keyring.r_config.as_ref().context("No r_config key")?; + + let pile_config: PileConfig = { + let mut file = provider.read(WormPath::new("yama.cfg").unwrap()).await?; + let mut buf = Vec::new(); + file.read_to_end(&mut buf).await?; + let packed = PackedPileConfig::from_byte_vec(buf); + r_config + .unlock_asymbox(packed) + .context("Failed to decrypt pile config")? + .deserialise() + .context("Failed to deserialise pile config")? + }; + + if &pile_config.yama_pile_version != SUPPORTED_YAMA_PILE_VERSION { + bail!( + "Unsupported pile version: {:?} (expected {:?})", + pile_config.yama_pile_version, + SUPPORTED_YAMA_PILE_VERSION + ); + } + + Ok(Pile { + provider, + lock, + keyring, + pile_config: Arc::new(pile_config), + }) + } + + /// Creates a new bloblog writer. + /// + /// Requires key: w_bloblog_footer, w_bloblog_contents + pub async fn create_bloblog(&self) -> eyre::Result> { + if !self.lock.is_active_now(LockKind::Shared) { + bail!("can't create bloblog: lock not active"); + } + let writer = BloblogWriter::new( + self.provider.write().await?, + &self.keyring, + self.lock.clone(), + ) + .await?; + Ok(writer) + } + + pub async fn read_bloblog( + &self, + bloblog_id: BloblogId, + ) -> eyre::Result> { + let bloblog_path = WormPathBuf::new(format!( + "bloblogs/{}/{}", + hex::encode(&bloblog_id.0 .0[0..1]), + bloblog_id.0.to_string() + )) + .unwrap(); + let worm_reader = self.provider.read(&bloblog_path).await?; + Ok(BloblogReader::new(worm_reader, &self.keyring).await?) + } + + /// Create a new index, returning the index ID. + /// + /// Requires key: w_bloblog_footer + pub async fn create_index(&self, index: &Index) -> eyre::Result { + let worm_writer = self.provider.write().await?; + let mut writer = HashedWormWriter::new(worm_writer); + let packed_index: PackedIndex = self + .keyring + .w_bloblog_footer + .as_ref() + .context("No w_bloblog_footer key")? + .make_locked_asymbox(Zstd::compress(CborSerde::serialise(index)?)); + writer.write_all(&packed_index.into_byte_vec()).await?; + let (mut worm_writer, hash) = writer.finalise(); + + let target = WormPathBuf::new(format!("indices/{}", hash)).unwrap(); + worm_writer.flush().await?; + worm_writer.finalise(target.as_ref(), false).await?; + + Ok(IndexId(hash)) + } + + /// List all indices present in the pile. + pub async fn list_indices(&self) -> eyre::Result> { + let files = self + .provider + .list(WormPath::new("indices").unwrap()) + .await + .context("failed to list indices")?; + let mut result = BTreeSet::new(); + for file in files { + let (_, filename) = file + .as_ref() + .as_str() + .rsplit_once('/') + .context("index listing entry should split at /")?; + let index_id = IndexId::try_from(filename) + .with_context(|| format!("not a valid index ID: {filename:?}"))?; + result.insert(index_id); + } + Ok(result) + } + + /// Read an index from the pile. + /// + /// Requires key: r_bloblog_footer + pub async fn read_index(&self, index_id: IndexId) -> eyre::Result { + let r_bloblog_footer = self + .keyring + .r_bloblog_footer + .as_ref() + .context("No r_bloblog_footer key")?; + let target = WormPathBuf::new(format!("indices/{}", index_id.0)).unwrap(); + let mut reader = self.provider.read(target.as_ref()).await?; + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).await?; + let packed_index = PackedIndex::from_byte_vec(buf); + let index = r_bloblog_footer + .unlock_asymbox(packed_index) + .context("can't unlock packed index")? + .decompress()? + .deserialise()?; + Ok(index) + } + + pub async fn read_pointer(&self, name: &str) -> eyre::Result> { + let r_pointer = self + .keyring + .r_pointer + .as_ref() + .context("No r_pointer key")?; + + let pointer_path = WormPath::new("pointers") + .unwrap() + .join(name) + .with_context(|| format!("bad pointer name {name:?}"))?; + + if !self.provider.is_regular_file(pointer_path.as_ref()).await? { + return Ok(None); + } + + let mut reader = self.provider.read(pointer_path.as_ref()).await?; + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).await?; + + let packed_pointer = PackedPointer::from_byte_vec(buf); + + Ok(Some( + r_pointer + .unlock_asymbox(packed_pointer) + .context("failed to decrypt packed pointer")? + .decompress() + .context("failed to decompress packed pointer")? + .deserialise() + .context("failed to deserialise packed pointer")?, + )) + } + + pub async fn write_pointer( + &self, + name: &str, + replace: bool, + data: &Pointer, + ) -> eyre::Result<()> { + let w_pointer = self + .keyring + .w_pointer + .as_ref() + .context("No w_pointer key")?; + + let packed_pointer: PackedPointer = w_pointer.make_locked_asymbox(Zstd::compress( + CborSerde::serialise(data).context("can't serialise pointer")?, + )); + + let pointer_path = WormPath::new("pointers") + .unwrap() + .join(name) + .with_context(|| format!("bad pointer name {name:?}"))?; + + let mut writer = self.provider.write().await?; + writer.write_all(&packed_pointer.into_byte_vec()).await?; + writer.finalise(pointer_path.as_ref(), replace).await?; + Ok(()) + } + + pub async fn close(mut self) -> eyre::Result<()> { + match Arc::try_unwrap(self.lock) { + Ok(lock) => { + lock.close().await + .context("failed to release lock gracefully")?; + } + Err(arc) => { + bail!("could not close pile gracefully; lock arc has {} strong refs and {} weak refs", Arc::strong_count(&arc), Arc::weak_count(&arc)); + } + } + Ok(()) + } +} diff --git a/yama_pile/src/locks.rs b/yama_pile/src/locks.rs new file mode 100644 index 0000000..0d54f82 --- /dev/null +++ b/yama_pile/src/locks.rs @@ -0,0 +1,358 @@ +use crate::keyring::{Keyring, ReaderKey, WriterKey}; +use chrono::{DateTime, Duration, Utc}; +use eyre::{bail, Context, ContextCompat, eyre}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::sync::{Arc, RwLock}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::sync::oneshot; +use tokio::task::JoinHandle; +use tokio::time::Instant; +use tracing::{error, info, warn}; +use uuid::Uuid; +use yama_midlevel_crypto::asym_box::AsymBox; +use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; +use yama_wormfile::paths::{WormPath, WormPathBuf}; +use yama_wormfile::{WormFileProvider, WormFileWriter}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LockDesc { + /// Expiry time of the lock. Should be renewed occasionally. + /// Should not exceed more than 10 minutes in the future. + expires_at: DateTime, + + /// Human-friendly description of who is holding the lock. + holder: String, + + /// What kind of lock this is. + kind: LockKind, +} + +pub type PackedLock = AsymBox>; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum LockKind { + /// Lock can coexist with other Shared and PendingExclusive locks. + /// Lock may not be created when there are non-Shared locks. + Shared, + + /// Lock can coexist with Shared locks. + PendingExclusive, + + /// Lock can not coexist with other locks at all. + Exclusive, +} + +impl LockKind { + pub fn conflicts_with_lock(self, newer_lock: LockKind) -> bool { + match (self, newer_lock) { + (LockKind::Shared, LockKind::Shared) => false, + // We can add PendingExclusives after a Shared, but not the other way around. + (LockKind::Shared, LockKind::PendingExclusive) => false, + _ => true, + } + } +} + +#[derive(Debug)] +pub struct LockHandle { + /// The latest lock descriptor + lock: Arc>, + + /// Our lock ID + lock_id: String, + + /// Path to the lock + lock_path: WormPathBuf, + + /// A signal for relinquishing the lock. + lock_release_tx: Option>, + + /// Handle for waiting for a graceful shutdown of the lock. + lock_task_join_handle: Option>, +} + +impl Drop for LockHandle { + fn drop(&mut self) { + if let Some(lock_release_tx) = self.lock_release_tx + .take() { + lock_release_tx + .send(()) + .expect("can't drop lock"); + } + } +} + +impl LockHandle { + pub async fn close(mut self) -> eyre::Result<()> { + self.lock_release_tx.take().unwrap().send(()).map_err(|_| eyre!("can't drop lock"))?; + self.lock_task_join_handle.take().unwrap().await + .context("lock task fail")?; + Ok(()) + } + + pub fn is_active_at(&self, kind: LockKind, now: DateTime) -> bool { + let lock = self.lock.read().unwrap(); + lock.kind == kind || kind == LockKind::Shared && lock.expires_at > now + } + + /// Returns true iff the lock is active now. + /// This actually looks 1 minute into the future to afford some divergence for clock drift. + pub fn is_active_now(&self, kind: LockKind) -> bool { + let now = Utc::now() + Duration::minutes(1); + self.is_active_at(kind, now) + } + + pub async fn new( + provider: Arc, + kind: LockKind, + holder: String, + keyring: &Keyring, + ) -> eyre::Result { + let duration = Duration::minutes(10); + let until = Utc::now() + duration; + Self::new_until(provider, kind, holder, keyring, until, duration).await + } + + pub async fn new_until( + provider: Arc, + kind: LockKind, + holder: String, + keyring: &Keyring, + until: DateTime, + duration: Duration, + ) -> eyre::Result { + let lock_desc = LockDesc { + expires_at: until, + holder, + kind, + }; + let w_locks = keyring.w_locks.clone().context("No w_locks key")?; + let r_locks = keyring.r_locks.clone().context("No r_locks key")?; + + 'retry: loop { + let lock_id = Uuid::new_v4().to_string(); + let lock_path = WormPathBuf::new(format!("locks/{lock_id}")).unwrap(); + let (lock_release_tx, lock_release_rx) = oneshot::channel(); + + let now = Utc::now(); + + // Stage 1: create lock + let stage1_locks = scan_locks(provider.as_ref(), &r_locks, now).await?; + if let Some(blocker) = find_lock_blocker(&stage1_locks, &lock_id, kind) { + let lock = &stage1_locks[blocker]; + warn!("{:?} lock {} held by {} currently expiring at {} is blocking our potential lock.", lock.kind, lock_id, lock.holder, lock.expires_at); + + tokio::time::sleep(tokio::time::Duration::from_secs( + (lock.expires_at - now).num_seconds().max(0) as u64 + 10, + )) + .await; + continue 'retry; + } + + let mut writer = provider.write().await?; + let packed_lock: PackedLock = + w_locks.make_locked_asymbox(CborSerde::serialise(&lock_desc)?); + writer.write_all(&packed_lock.into_byte_vec()).await?; + writer.flush().await?; + writer.finalise(lock_path.as_ref(), false).await?; + + // Stage 2: confirm lock + let stage2_locks = scan_locks(provider.as_ref(), &r_locks, now).await?; + if let Some(blocker) = find_lock_blocker(&stage2_locks, &lock_id, kind) { + let lock = &stage2_locks[blocker]; + warn!("{:?} lock {} held by {} currently expiring at {} blocked our lock; backing out.", lock.kind, lock_id, lock.holder, lock.expires_at); + + // Back out our lock. + provider.delete(lock_path.as_ref()).await?; + tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + continue 'retry; + } + + info!("Acqured {:?} lock {}", kind, lock_id); + + let lock = Arc::new(RwLock::new(lock_desc)); + let lock2 = lock.clone(); + + // Good. Now start a background task for refreshing it as necessary. + // TODO spawn this onto a joinset and then make sure we release locks at end of program... + let lock_path2 = lock_path.clone(); + let lock_task_join_handle = Some(tokio::spawn(async move { + if let Err(err) = lock_renewal( + provider, + lock_path2, + lock2, + lock_release_rx, + w_locks, + duration, + ) + .await + { + error!("Lock renewal task failed: {err:?}"); + } + })); + + break Ok(LockHandle { + lock, + lock_path, + lock_id, + lock_release_tx: Some(lock_release_tx), + lock_task_join_handle + }); + } + } + + pub async fn upgrade_pending_exclusive( + &self, + provider: Arc, + r_locks: &ReaderKey, + w_locks: &WriterKey, + ) -> eyre::Result<()> { + 'retry: loop { + if !self.is_active_now(LockKind::PendingExclusive) { + bail!("PendingExclusive not active: can't upgrade to Exclusive"); + } + + let now = Utc::now(); + let locks = scan_locks(provider.as_ref(), r_locks, Utc::now()).await?; + if let Some((conflicting_lock_id, conflicting_lock)) = + locks.iter().find(|(lock_id, _)| lock_id != &&self.lock_id) + { + warn!("Conflicting {:?} lock {} held by {:?} expiring at {} is blocking us from upgrading to Exclusive", conflicting_lock.kind, conflicting_lock_id, conflicting_lock.holder, conflicting_lock.expires_at); + tokio::time::sleep(tokio::time::Duration::from_secs( + (conflicting_lock.expires_at - now).num_seconds().max(0) as u64 + 10, + )) + .await; + continue 'retry; + } + + let mut lock = self.lock.write().unwrap(); + lock.kind = LockKind::Exclusive; + let mut writer = provider.write().await?; + let packed_lock: PackedLock = + w_locks.make_locked_asymbox(CborSerde::serialise(&*lock)?); + writer.write_all(&packed_lock.into_byte_vec()).await?; + writer.flush().await?; + writer.finalise(self.lock_path.as_ref(), true).await?; + + break Ok(()); + } + } +} + +async fn lock_renewal( + provider: Arc, + lock_path: WormPathBuf, + lock: Arc>, + mut lock_release_rx: oneshot::Receiver<()>, + w_locks: WriterKey, + duration: Duration, +) -> eyre::Result<()> { + loop { + let next_renewal = { + let lock = lock.read().unwrap(); + let secs_until_renewal = ((lock.expires_at - Utc::now()).num_seconds() - 60).max(0); + Instant::now() + tokio::time::Duration::from_secs(secs_until_renewal as u64) + }; + + tokio::select! { + _ = &mut lock_release_rx => { + provider.delete(lock_path.as_ref()).await + .context("failed to delete lock upon release")?; + break Ok(()); + }, + _ = tokio::time::sleep_until(next_renewal) => { + // nop. + } + } + + let old_lock_desc: LockDesc = { + let lock = lock.read().unwrap(); + lock.clone() + }; + + let now = Utc::now(); + if now > old_lock_desc.expires_at { + error!("Missed lock renewal; {now} > {}", old_lock_desc.expires_at); + } + + let new_lock_desc = LockDesc { + expires_at: now + duration, + ..old_lock_desc.clone() + }; + + let mut writer = provider + .write() + .await + .context("failed to acquire writing handle for write of packed lock")?; + let packed_lock: PackedLock = w_locks.make_locked_asymbox( + CborSerde::serialise(&new_lock_desc) + .context("failed to serialise new lock description")?, + ); + writer + .write_all(&packed_lock.into_byte_vec()) + .await + .context("failed to write packed lock")?; + writer + .flush() + .await + .context("failed to flush write of packed lock")?; + writer + .finalise(lock_path.as_ref(), true) + .await + .context("failed to finalise write of packed lock")?; + + *(lock.write().unwrap()) = new_lock_desc; + } +} + +async fn scan_locks( + provider: &impl WormFileProvider, + r_locks: &ReaderKey, + now: DateTime, +) -> eyre::Result> { + let mut result = BTreeMap::new(); + + let files = provider + .list(WormPath::new("locks").unwrap()) + .await + .context("failed to list locks")?; + for file in files { + let (_, lock_id) = file + .as_ref() + .as_str() + .rsplit_once('/') + .context("bad lock split")?; + + let mut reader = provider.read(&file).await?; + let mut buf = Vec::new(); + reader.read_to_end(&mut buf).await?; + let packed_lock = PackedLock::from_byte_vec(buf); + let lock_desc = r_locks + .unlock_asymbox(packed_lock) + .context("failed to decrypt lock")? + .deserialise()?; + + if lock_desc.expires_at > now { + result.insert(lock_id.to_owned(), lock_desc); + } + } + + Ok(result) +} + +fn find_lock_blocker<'a>( + locks: &'a BTreeMap, + our_lock_id: &'a str, + our_lock_kind: LockKind, +) -> Option<&'a str> { + for (lock_id, lock_desc) in locks { + if lock_id == our_lock_id { + continue; + } + if lock_desc.kind.conflicts_with_lock(our_lock_kind) { + return Some(lock_id); + } + } + None +} diff --git a/yama_pile/src/pointers.rs b/yama_pile/src/pointers.rs new file mode 100644 index 0000000..aed0a2c --- /dev/null +++ b/yama_pile/src/pointers.rs @@ -0,0 +1,19 @@ +use crate::tree::RootTreeNode; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use yama_midlevel_crypto::asym_box::AsymBox; +use yama_midlevel_crypto::byte_layer::CborSerde; +use yama_midlevel_crypto::zstd_box::Zstd; + +/// Pointer +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Pointer { + pub parent: Option, + #[serde(flatten)] + pub root: RootTreeNode, + pub uids: BTreeMap, + pub gids: BTreeMap, +} + +/// Pointer file as stored on disk +pub type PackedPointer = AsymBox>>; diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs new file mode 100644 index 0000000..3802953 --- /dev/null +++ b/yama_pile/src/tree.rs @@ -0,0 +1,978 @@ +/* +This file is part of Yama. + +Yama is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Yama is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Yama. If not, see . +*/ + +use serde::{Deserialize, Serialize}; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::fmt::{Debug, Formatter}; +use std::fs::Metadata; +use std::os::unix::fs::MetadataExt; + +use crate::definitions::RecursiveChunkRef; +use crate::tree::unpopulated::ScanEntry; +use eyre::{bail, Context, ContextCompat}; +use patricia_tree::PatriciaMap; + +pub mod unpopulated; + +/// Given a file's metadata, returns the mtime in milliseconds. +pub fn mtime_msec(metadata: &Metadata) -> u64 { + (metadata.mtime() * 1000 + metadata.mtime_nsec() / 1_000_000) as u64 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RootTreeNode { + pub name: String, + pub node: TreeNode, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +pub enum TreeNode { + NormalFile { + /// modification time in ms + mtime: u64, + #[serde(flatten)] + ownership: FilesystemOwnership, + #[serde(flatten)] + permissions: FilesystemPermissions, + size: u64, + #[serde(flatten)] + content: RecursiveChunkRef, + }, + Directory { + #[serde(flatten)] + ownership: FilesystemOwnership, + #[serde(flatten)] + permissions: FilesystemPermissions, + children: BTreeMap, + }, + SymbolicLink { + #[serde(flatten)] + ownership: FilesystemOwnership, + target: String, + }, + // TODO is there any other kind of file we need to store? + Deleted, +} + +impl TreeNode { + pub fn metadata_invalidates(&self, other: &TreeNode) -> eyre::Result { + Ok(match self { + TreeNode::NormalFile { + mtime, + ownership, + permissions, + size, + .. + } => { + if let TreeNode::NormalFile { + mtime: other_mtime, + ownership: other_ownership, + permissions: other_permissions, + size: other_size, + .. + } = other + { + mtime != other_mtime + || size != other_size + || ownership != other_ownership + || permissions != other_permissions + } else { + true + } + } + TreeNode::Directory { + ownership, + permissions, + children, + } => { + if let TreeNode::Directory { + ownership: other_ownership, + permissions: other_permissions, + children: other_children, + } = other + { + if ownership != other_ownership || permissions != other_permissions { + return Ok(true); + } + if children.len() != other_children.len() { + return Ok(true); + } + for ((left_name, left_node), (right_name, right_node)) in + children.iter().zip(other_children.iter()) + { + if left_name != right_name || left_node.metadata_invalidates(right_node)? { + return Ok(true); + } + } + false + } else { + true + } + } + TreeNode::SymbolicLink { ownership, target } => { + if let TreeNode::SymbolicLink { + ownership: other_ownership, + target: other_target, + } = other + { + ownership != other_ownership || target != other_target + } else { + true + } + } + TreeNode::Deleted => { + // unreachable + bail!("Why is Deleted here?"); + } + }) + } + + pub fn count_normal_files(&self) -> u32 { + match self { + TreeNode::NormalFile { .. } => 1, + TreeNode::Directory { children, .. } => children + .values() + .map(|child| child.count_normal_files()) + .sum(), + _ => 0, + } + } + + pub fn visit eyre::Result<()>>( + &self, + func: &mut F, + prefix: String, + ) -> eyre::Result<()> { + func(self, &prefix)?; + if let TreeNode::Directory { children, .. } = &self { + for (name, child) in children.iter() { + if prefix.is_empty() { + // don't want a slash prefix + child.visit(func, name.clone())?; + } else { + child.visit(func, format!("{}/{}", prefix, name))?; + } + } + } + Ok(()) + } + + pub fn visit_mut eyre::Result<()>>( + &mut self, + func: &mut F, + prefix: String, + ) -> eyre::Result<()> { + func(self, &prefix)?; + if let TreeNode::Directory { children, .. } = self { + for (name, child) in children.iter_mut() { + if prefix.is_empty() { + // don't want a slash prefix + child.visit_mut(func, name.clone())?; + } else { + child.visit_mut(func, format!("{}/{}", prefix, name))?; + } + } + } + Ok(()) + } + + pub fn exists(&self, pieces: &[&str]) -> bool { + match pieces.first() { + None => true, + Some(subpath) => { + if let TreeNode::Directory { children, .. } = self { + match children.get(*subpath) { + None => false, + Some(child) => child.exists(&pieces[1..]), + } + } else { + false + } + } + } + } + + /// Recurses into a child by name, or returns Err with a reason. + pub fn child(&mut self, name: &str) -> Result<&mut TreeNode, &'static str> { + match self { + TreeNode::NormalFile { .. } => Err("not a directory: normal file"), + TreeNode::Directory { children, .. } => match children.get_mut(name) { + None => Err("child not in directory"), + Some(node) => Ok(node), + }, + TreeNode::SymbolicLink { .. } => Err("not a directory: symlink"), + TreeNode::Deleted => Err("not a directory: deleted"), + } + } + + /// Flattens this treenode to a PatriciaMap of ScanEntries. + pub fn flatten(&self) -> eyre::Result> { + let mut out = PatriciaMap::new(); + Self::flatten_impl(self, "", &mut out)?; + Ok(out) + } + + fn flatten_impl( + tree_node: &TreeNode, + prefix: &str, + out: &mut PatriciaMap, + ) -> eyre::Result<()> { + match tree_node { + TreeNode::NormalFile { + mtime, + ownership, + permissions, + size, + content: _, + } => { + out.insert( + prefix, + ScanEntry::NormalFile { + mtime: *mtime, + ownership: ownership.clone(), + permissions: permissions.clone(), + size: *size, + }, + ); + } + TreeNode::Directory { + ownership, + permissions, + children, + } => { + out.insert( + prefix, + ScanEntry::Directory { + ownership: ownership.clone(), + permissions: permissions.clone(), + }, + ); + + for (child_name, node) in children { + let new_prefix = if prefix.is_empty() { + child_name.clone() + } else { + format!("{}/{}", prefix, child_name) + }; + Self::flatten_impl(node, &new_prefix, out)?; + } + } + TreeNode::SymbolicLink { ownership, target } => { + out.insert( + prefix, + ScanEntry::SymbolicLink { + ownership: ownership.clone(), + target: target.clone(), + }, + ); + } + TreeNode::Deleted => { + bail!("found Deleted at {prefix:?} when flattening"); + } + } + Ok(()) + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FilesystemOwnership { + pub uid: u16, + pub gid: u16, +} + +impl Debug for FilesystemOwnership { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.uid, self.gid) + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FilesystemPermissions { + pub mode: u32, +} + +impl Debug for FilesystemPermissions { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", unix_mode::to_string(self.mode)) + } +} + +/// Differentiates a node in place. +/// This makes `old` the parent of `new` (though it is up to the caller to properly update the +/// `PointerData` to reflect this!). +/// Loosely speaking, `new` is modified to contain the differences that, when applied to `old`, will +/// result in the original value of `new`. +/// See `integrate_node_in_place` for the inverse of this operation. +pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> eyre::Result<()> { + if let TreeNode::Directory { children, .. } = new { + if let TreeNode::Directory { + children: old_children, + .. + } = old + { + for (name, old_node) in old_children.iter() { + match children.entry(name.clone()) { + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(TreeNode::Deleted); + } + Entry::Occupied(occupied_entry) => { + if !occupied_entry.get().metadata_invalidates(old_node)? { + // The entry is the same, so we don't need to store it! + occupied_entry.remove_entry(); + } else { + differentiate_node_in_place(occupied_entry.into_mut(), old_node)?; + } + } + } + } + } + } + Ok(()) +} + +/// Integrates a node in place. +/// This makes `new` no longer have a parent (remember, the caller is responsible for updating +/// `PointerData` appropriately if needed to reflect this). +/// +/// Loosely speaking, `new` is treated as a set of differences that are applied to `old`, though the +/// result is in-place. +/// +/// Preconditions: +/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.) +/// - `old` is the parent of `new` +pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) { + if let TreeNode::Directory { children, .. } = new { + if let TreeNode::Directory { + children: old_children, + .. + } = old + { + for (name, node) in old_children.iter() { + match children.entry(name.clone()) { + Entry::Vacant(entry) => { + entry.insert(node.clone()); + } + Entry::Occupied(entry) => { + if entry.get() == &TreeNode::Deleted { + // We don't insert the old node but we do remove the 'deleted' marker + // node! + entry.remove(); + } else { + integrate_node_in_place(entry.into_mut(), node); + } + } + } + } + } + } else { + // the node stays the same... + // intentional NOP! + } +} + +pub fn assemble_tree_from_scan_entries( + scan: PatriciaMap, + mut chunkings: PatriciaMap<(RecursiveChunkRef, u64)>, +) -> eyre::Result { + let mut dirs: BTreeMap> = BTreeMap::new(); + // special-case the root ("") + dirs.insert(String::new(), BTreeMap::new()); + + for (key, entry) in scan.into_iter() { + let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?; + let (parent_dir_name, child_name) = + key_string.rsplit_once('/').unwrap_or(("", &key_string)); + match entry { + ScanEntry::NormalFile { + mtime, + ownership, + permissions, + size: _unverified_size_ignore, + } => { + let (content, size) = chunkings + .remove(&key_string) + .context("bad chunkings PMap: missing entry")?; + + // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + TreeNode::NormalFile { + mtime, + ownership, + permissions, + size, + content, + }, + ); + } + ScanEntry::Directory { + ownership, + permissions, + } => { + dirs.insert(key_string.clone(), BTreeMap::new()); + // note: for the root, this inserts the root directory entry as a child called "" within the root. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + TreeNode::Directory { + ownership, + permissions, + children: BTreeMap::new(), + }, + ); + } + ScanEntry::SymbolicLink { ownership, target } => { + // note: for the root, this inserts the root symlink entry as a child called "" within a fake root 'directory'. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + TreeNode::SymbolicLink { ownership, target }, + ); + } + } + } + + // Now roll up the directories. In Rustc v1.66 it'd be nice to use pop_last()... + while let Some(last_key) = dirs.keys().last().cloned() { + let mut last_children = dirs.remove(&last_key).unwrap(); + if last_key.is_empty() { + assert!( + dirs.is_empty(), + "when pulling out root pseudo-dir, dirs must be empty for roll-up." + ); + + let mut real_root = last_children.remove("").unwrap(); + if let TreeNode::Directory { children, .. } = &mut real_root { + *children = last_children; + } else if !last_children.is_empty() { + bail!("root is not a directory but it contains children..."); + } + + return Ok(real_root); + } + + // We want to roll up the directory last/key -> {child -> ...} + // so last -> {key -> {child -> ...}} + let (parent_dir, child_name) = last_key.rsplit_once('/').unwrap_or(("", &last_key)); + let parent = dirs + .get_mut(parent_dir) + .context("bad PMap? no parent in rollup")?; + let child_in_parent = parent + .get_mut(child_name) + .context("dir child not populated")?; + if let TreeNode::Directory { children, .. } = child_in_parent { + *children = last_children; + } else { + bail!("child in parent not a directory..."); + } + } + + bail!("no root found; bad PMap or bad roll-up???"); +} + +#[cfg(test)] +mod tests { + use crate::definitions::RecursiveChunkRef; + use crate::tree::{ + differentiate_node_in_place, integrate_node_in_place, FilesystemOwnership, + FilesystemPermissions, TreeNode, + }; + use std::collections::BTreeMap; + use yama_midlevel_crypto::chunk_id::ChunkId; + + #[test] + pub fn test_differentiate_in_place_primitive() { + let mut new = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: BTreeMap::new(), + }; + let old = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: BTreeMap::new(), + }; + assert!(differentiate_node_in_place(&mut new, &old).is_ok()); + + assert_eq!( + new, + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: BTreeMap::new() + } + ); + } + + /// Tests the creation of a file in the tree. + #[test] + pub fn test_differentiate_in_place_create_only() { + let alice = TreeNode::NormalFile { + mtime: 98347523, + ownership: FilesystemOwnership { uid: 43, gid: 48 }, + permissions: FilesystemPermissions { mode: 1338 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([36; 32]), + depth: 22, + }, + }; + let bob_new = TreeNode::SymbolicLink { + ownership: FilesystemOwnership { uid: 43, gid: 48 }, + target: "alice".to_string(), + }; + + let mut children_new = BTreeMap::new(); + children_new.insert("bob".to_owned(), bob_new.clone()); + children_new.insert("alice".to_owned(), alice.clone()); + + let mut children_old = BTreeMap::new(); + children_old.insert("alice".to_owned(), alice.clone()); + + let mut new = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: children_new.clone(), + }; + let old = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 41, gid: 46 }, + permissions: FilesystemPermissions { mode: 1336 }, + children: children_old.clone(), + }; + + let mut children_result = BTreeMap::new(); + children_result.insert("bob".to_owned(), bob_new); + + assert!(differentiate_node_in_place(&mut new, &old).is_ok()); + assert_eq!( + new, + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: children_result + } + ); + } + + /// Tests only a change in metadata in the tree. + #[test] + pub fn test_differentiate_in_place_meta_only() { + let alice = TreeNode::NormalFile { + mtime: 98347523, + ownership: FilesystemOwnership { uid: 43, gid: 48 }, + permissions: FilesystemPermissions { mode: 1338 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([36; 32]), + depth: 22, + }, + }; + + let mut children = BTreeMap::new(); + children.insert("alice".to_owned(), alice); + + let mut new = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: children.clone(), + }; + let old = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 41, gid: 46 }, + permissions: FilesystemPermissions { mode: 1336 }, + children: children.clone(), + }; + + assert!(differentiate_node_in_place(&mut new, &old).is_ok()); + assert_eq!( + new, + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: BTreeMap::new() + } + ); + } + + /// Tests that nodes that vanish lead to creation of a Deleted node. + #[test] + pub fn test_differences_in_place_deleted() { + let alice = TreeNode::NormalFile { + mtime: 98347523, + ownership: FilesystemOwnership { uid: 43, gid: 48 }, + permissions: FilesystemPermissions { mode: 1338 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([36; 32]), + depth: 22, + }, + }; + let bob_old = TreeNode::SymbolicLink { + ownership: FilesystemOwnership { uid: 43, gid: 48 }, + target: "alice".to_string(), + }; + + let mut children_old = BTreeMap::new(); + children_old.insert("bob".to_owned(), bob_old.clone()); + children_old.insert("alice".to_owned(), alice.clone()); + + let mut children_new = BTreeMap::new(); + children_new.insert("alice".to_owned(), alice.clone()); + + let old = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 42, gid: 47 }, + permissions: FilesystemPermissions { mode: 1337 }, + children: children_old.clone(), + }; + let mut new = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 41, gid: 46 }, + permissions: FilesystemPermissions { mode: 1336 }, + children: children_new.clone(), + }; + + let mut children_result = BTreeMap::new(); + children_result.insert("bob".to_owned(), TreeNode::Deleted); + + assert!(differentiate_node_in_place(&mut new, &old).is_ok()); + assert_eq!( + new, + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 41, gid: 46 }, + permissions: FilesystemPermissions { mode: 1336 }, + children: children_result + } + ); + } + + #[test] + pub fn test_differentiate_node_in_place_mega_example() { + // TODO extend this example + let parent = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0550 }, + children: vec![ + ( + "file1".to_string(), + TreeNode::NormalFile { + mtime: 1996, + ownership: FilesystemOwnership { uid: 54, gid: 59 }, + permissions: FilesystemPermissions { mode: 0311 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([37; 32]), + depth: 2, + }, + }, + ), + ( + "file2".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + let child_full = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0560 }, + children: vec![ + ( + "file1".to_string(), + TreeNode::NormalFile { + mtime: 1996, + ownership: FilesystemOwnership { uid: 54, gid: 59 }, + permissions: FilesystemPermissions { mode: 0311 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([37; 32]), + depth: 2, + }, + }, + ), + ( + "file42".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + let mut child_diff = child_full.clone(); + differentiate_node_in_place(&mut child_diff, &parent).unwrap(); + + let expected_child_diff = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0560 }, + children: vec![ + ("file2".to_string(), TreeNode::Deleted), + ( + "file42".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + assert_eq!(child_diff, expected_child_diff); + } + + #[test] + pub fn test_integrate_node_in_place_mega_example() { + // TODO extend this example + let parent = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0550 }, + children: vec![ + ( + "file1".to_string(), + TreeNode::NormalFile { + mtime: 1996, + ownership: FilesystemOwnership { uid: 54, gid: 59 }, + permissions: FilesystemPermissions { mode: 0311 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([37; 32]), + depth: 2, + }, + }, + ), + ( + "file2".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + let child_diff = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0560 }, + children: vec![ + ("file2".to_string(), TreeNode::Deleted), + ( + "file42".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + let mut child_full = child_diff.clone(); + integrate_node_in_place(&mut child_full, &parent).unwrap(); + + let expected_child_full = TreeNode::Directory { + ownership: FilesystemOwnership { uid: 47, gid: 49 }, + permissions: FilesystemPermissions { mode: 0660 }, + children: vec![( + "dir1".to_string(), + TreeNode::Directory { + ownership: FilesystemOwnership { uid: 46, gid: 50 }, + permissions: FilesystemPermissions { mode: 0560 }, + children: vec![ + ( + "file1".to_string(), + TreeNode::NormalFile { + mtime: 1996, + ownership: FilesystemOwnership { uid: 54, gid: 59 }, + permissions: FilesystemPermissions { mode: 0311 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([37; 32]), + depth: 2, + }, + }, + ), + ( + "file42".to_string(), + TreeNode::NormalFile { + mtime: 1970, + ownership: FilesystemOwnership { uid: 55, gid: 60 }, + permissions: FilesystemPermissions { mode: 0321 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([42; 32]), + depth: 29, + }, + }, + ), + ] + .into_iter() + .collect(), + }, + )] + .into_iter() + .collect(), + }; + + assert_eq!(child_full, expected_child_full); + } + + pub fn example_file() -> TreeNode { + TreeNode::NormalFile { + mtime: 424242, + ownership: FilesystemOwnership { + uid: 1042, + gid: 1043, + }, + permissions: FilesystemPermissions { mode: 0o760 }, + size: 42, + content: RecursiveChunkRef { + chunk_id: ChunkId::from([0u8; 32]), + depth: 0, + }, + } + } + + pub fn example_dir( + file1: Option<(&str, TreeNode)>, + file2: Option<(&str, TreeNode)>, + ) -> TreeNode { + let mut map = BTreeMap::new(); + if let Some((name, file)) = file1 { + map.insert(name.to_owned(), file); + } + if let Some((name, file)) = file2 { + map.insert(name.to_owned(), file); + } + TreeNode::Directory { + ownership: FilesystemOwnership { + uid: 1042, + gid: 1043, + }, + permissions: FilesystemPermissions { mode: 0o770 }, + children: map, + } + } + + #[test] + pub fn test_exists() { + let file = example_file(); + assert!(file.exists(&[])); + assert!(!file.exists(&["anything"])); + + let subdir = example_dir(Some(("fetchmailrc", example_file())), None); + let dir = example_dir(Some(("boot.img", example_file())), Some(("etc", subdir))); + assert!(dir.exists(&[])); + assert!(dir.exists(&["boot.img"])); + assert!(dir.exists(&["etc", "fetchmailrc"])); + assert!(!dir.exists(&["bin"])); + assert!(!dir.exists(&["etc", "resolv.conf"])); + assert!(!dir.exists(&["boot.img", "hehe"])); + } +} diff --git a/yama_pile/src/tree/unpopulated.rs b/yama_pile/src/tree/unpopulated.rs new file mode 100644 index 0000000..148eeca --- /dev/null +++ b/yama_pile/src/tree/unpopulated.rs @@ -0,0 +1,27 @@ +use crate::tree::{FilesystemOwnership, FilesystemPermissions}; +use serde::{Deserialize, Serialize}; + +/// A tree node, but unpopulated and not a tree. +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +pub enum ScanEntry { + NormalFile { + /// modification time in ms + mtime: u64, + #[serde(flatten)] + ownership: FilesystemOwnership, + #[serde(flatten)] + permissions: FilesystemPermissions, + size: u64, + }, + Directory { + #[serde(flatten)] + ownership: FilesystemOwnership, + #[serde(flatten)] + permissions: FilesystemPermissions, + }, + SymbolicLink { + #[serde(flatten)] + ownership: FilesystemOwnership, + target: String, + }, +} diff --git a/yama_pile/src/utils.rs b/yama_pile/src/utils.rs new file mode 100644 index 0000000..1a7e50e --- /dev/null +++ b/yama_pile/src/utils.rs @@ -0,0 +1,226 @@ +use eyre::{bail, Context as EyreContext}; +use serde::{Deserialize, Serialize}; +use sha2::Digest; +use std::fmt::{Debug, Display, Formatter}; +use std::io; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::{Context, Poll}; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; +use yama_midlevel_crypto::sym_stream::SymStreamKey; +use yama_wormfile::WormFileWriter; + +pub struct HashedWormWriter { + inner: W, + hasher: sha2::Sha256, +} + +#[derive(Copy, Clone, Serialize, Deserialize, Ord, PartialOrd, Eq, PartialEq, Hash)] +#[serde(transparent)] +pub struct Sha256(pub [u8; 32]); + +impl TryFrom<&str> for Sha256 { + type Error = eyre::Error; + + fn try_from(value: &str) -> Result { + if value.len() != 64 { + bail!("sha256 hexlength not 64 chars"); + } + let decoded = hex::decode(value).context("failed to decode hex")?; + + if decoded.len() != 32 { + bail!("wrong number of decoded bytes"); + } + + let mut bytes = [0u8; 32]; + bytes.copy_from_slice(&decoded); + Ok(Sha256(bytes)) + } +} + +impl Debug for Sha256 { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Sha256({})", hex::encode(&self.0)) + } +} + +impl Display for Sha256 { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", hex::encode(&self.0)) + } +} + +impl HashedWormWriter { + pub fn new(writer: W) -> Self { + Self { + inner: writer, + hasher: sha2::Sha256::new(), + } + } +} + +impl AsyncWrite for HashedWormWriter { + #[inline] + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let result = Pin::new(&mut self.inner).poll_write(cx, buf); + if let Poll::Ready(Ok(num_bytes_written)) = result { + // Once a write is complete, add the written bytes to the hasher. + self.hasher.update(&buf[0..num_bytes_written]); + } + result + } + + #[inline] + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.inner).poll_flush(cx) + } + + #[inline] + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.inner).poll_shutdown(cx) + } +} + +impl HashedWormWriter { + /// Finish hashing. Returns the hash and gives back the writer. + pub fn finalise(self) -> (W, Sha256) { + let mut output = [0; 32]; + output.copy_from_slice(&self.hasher.finalize()[..]); + (self.inner, Sha256(output)) + } +} + +// TODO We should consider buffering writes so we don't waste encryptions. But that would make it +// a little more complex, so will save that for later... +pub struct SymStreamWriter { + inner: W, + offset: u64, + sym_stream_key: SymStreamKey, +} + +impl SymStreamWriter { + pub fn new(inner: W, sym_stream_key: SymStreamKey) -> Self { + Self { + inner, + offset: 0, + sym_stream_key, + } + } + + pub fn finish(self) -> W { + self.inner + } + + pub fn offset(&self) -> u64 { + self.offset + } +} + +impl AsyncWrite for SymStreamWriter { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let mut enc_buf = buf.to_vec(); + // Safety: Deny use of unencrypted `buf` from here on. + let buf = (); + let offset = self.offset; + self.sym_stream_key.apply_xor(offset, &mut enc_buf); + + let result = Pin::new(&mut self.inner).poll_write(cx, &enc_buf); + + if let Poll::Ready(Ok(num_bytes_written)) = result { + // Once a write is complete, add the offset to our internally tracked offset. + self.offset += num_bytes_written as u64; + } + + result + } + + #[inline] + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.inner).poll_flush(cx) + } + + #[inline] + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + Pin::new(&mut self.inner).poll_shutdown(cx) + } +} + +pub struct SymStreamReader { + inner: R, + offset: u64, + sym_stream_key: SymStreamKey, +} + +impl SymStreamReader { + pub fn new(inner: R, sym_stream_key: SymStreamKey) -> Self { + Self { + inner, + offset: 0, + sym_stream_key, + } + } + + pub fn finish(self) -> R { + self.inner + } + + pub fn offset(&self) -> u64 { + self.offset + } +} + +impl AsyncRead for SymStreamReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let old_filled = buf.filled().len(); + + let result = Pin::new(&mut self.inner).poll_read(cx, buf); + + if result.is_ready() { + let filled = buf.filled_mut(); + let new_filled = filled.len(); + let to_decrypt = &mut filled[old_filled..new_filled]; + + let offset = self.offset; + // eprintln!("read {} @ {offset}", to_decrypt.len()); + self.sym_stream_key.apply_xor(offset, to_decrypt); + self.offset += to_decrypt.len() as u64; + } + + result + } +} + +impl AsyncSeek for SymStreamReader { + fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> std::io::Result<()> { + // eprintln!("SS {position:?}"); + Pin::new(&mut self.inner).start_seek(position) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let result = Pin::new(&mut self.inner).poll_complete(cx); + if let Poll::Ready(Ok(new_offset)) = result { + // eprintln!("sought {new_offset}"); + + self.offset = new_offset; + } + result + } +} diff --git a/yama_wormfile/Cargo.toml b/yama_wormfile/Cargo.toml index 9430d59..e7680ad 100644 --- a/yama_wormfile/Cargo.toml +++ b/yama_wormfile/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" [dependencies] async-trait = "0.1.68" -tokio = { version = "1.27.0", features = ["io-util"] } \ No newline at end of file +tokio = { version = "1.27.0", features = ["io-util"] } +eyre = "0.6.8" \ No newline at end of file diff --git a/yama_wormfile/src/boxed.rs b/yama_wormfile/src/boxed.rs index 996fdf8..3459b8b 100644 --- a/yama_wormfile/src/boxed.rs +++ b/yama_wormfile/src/boxed.rs @@ -3,6 +3,7 @@ use crate::{WormFileProvider, WormFileReader, WormFileWriter}; use async_trait::async_trait; use std::error::Error; use std::fmt::{Debug, Display, Formatter}; +use std::ops::DerefMut; use std::pin::Pin; pub struct BoxErr(Box); @@ -28,40 +29,43 @@ impl BoxErr { } #[async_trait] trait BoxableWormFileProvider: Debug + Send + Sync { - async fn is_dir_b(&self, path: &WormPath) -> Result; - async fn is_regular_file_b(&self, path: &WormPath) -> Result; - async fn list_b(&self, path: &WormPath) -> Result, BoxErr>; - async fn read_b(&self, path: &WormPath) -> Result>, BoxErr>; - async fn write_b(&self) -> Result>, BoxErr>; + async fn is_dir_b(&self, path: &WormPath) -> eyre::Result; + async fn is_regular_file_b(&self, path: &WormPath) -> eyre::Result; + async fn list_b(&self, path: &WormPath) -> eyre::Result>; + async fn read_b(&self, path: &WormPath) -> eyre::Result>>; + async fn write_b(&self) -> eyre::Result>>; + async fn delete_b(&self, path: &WormPath) -> eyre::Result<()>; } #[async_trait] impl BoxableWormFileProvider for T { - async fn is_dir_b(&self, path: &WormPath) -> Result { - self.is_dir(path).await.map_err(BoxErr::new) + async fn is_dir_b(&self, path: &WormPath) -> eyre::Result { + self.is_dir(path).await } - async fn is_regular_file_b(&self, path: &WormPath) -> Result { - self.is_regular_file(path).await.map_err(BoxErr::new) + async fn is_regular_file_b(&self, path: &WormPath) -> eyre::Result { + self.is_regular_file(path).await } - async fn list_b(&self, path: &WormPath) -> Result, BoxErr> { - self.list(path).await.map_err(BoxErr::new) + async fn list_b(&self, path: &WormPath) -> eyre::Result> { + self.list(path).await } - async fn read_b(&self, path: &WormPath) -> Result>, BoxErr> { + async fn read_b(&self, path: &WormPath) -> eyre::Result>> { self.read(path) .await - .map_err(BoxErr::new) .map(|wfr| Box::pin(wfr) as Pin>) } - async fn write_b(&self) -> Result>, BoxErr> { + async fn write_b(&self) -> eyre::Result>> { self.write() .await - .map_err(BoxErr::new) .map(|wfw| Box::pin(wfw) as Pin>) } + + async fn delete_b(&self, path: &WormPath) -> eyre::Result<()> { + self.delete(path).await + } } #[derive(Debug)] @@ -69,44 +73,47 @@ pub struct BoxedWormFileProvider { inner: Box, } +impl BoxedWormFileProvider { + pub fn new(inner: impl WormFileProvider + 'static) -> BoxedWormFileProvider { + Self { + inner: Box::new(inner), + } + } +} + #[async_trait] impl WormFileProvider for BoxedWormFileProvider { type WormFileReader = Pin>; type WormFileWriter = Pin>; - type Error = BoxErr; - async fn is_dir(&self, path: impl AsRef + Send) -> Result { + async fn is_dir(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref(); self.inner.is_dir_b(path).await } - async fn is_regular_file( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn is_regular_file(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref(); self.inner.is_regular_file_b(path).await } - async fn list( - &self, - path: impl AsRef + Send, - ) -> Result, Self::Error> { + async fn list(&self, path: impl AsRef + Send) -> eyre::Result> { let path = path.as_ref(); self.inner.list_b(path).await } - async fn read( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref(); self.inner.read_b(path).await } - async fn write(&self) -> Result { + async fn write(&self) -> eyre::Result { self.inner.write_b().await } + + async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()> { + let path = path.as_ref(); + self.inner.delete_b(path).await + } } #[async_trait] @@ -114,7 +121,16 @@ impl WormFileReader for Pin> {} #[async_trait] impl WormFileWriter for Pin> { - async fn finalise(self, target_path: &WormPath, replace: bool) -> std::io::Result<()> { - WormFileWriter::finalise(self, target_path, replace).await + async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> std::io::Result<()> { + self.deref_mut().finalise(target_path, replace).await } } + +// pub struct BoxedWormFileWriter(Pin>) +// +// #[async_trait] +// impl WormFileWriter for BoxedWormFileWriter { +// async fn finalise(self, target_path: &WormPath, replace: bool) -> std::io::Result<()> { +// self.0.finalise(ztarget_path, replace).await +// } +// } diff --git a/yama_wormfile/src/lib.rs b/yama_wormfile/src/lib.rs index db53e6e..762ca52 100644 --- a/yama_wormfile/src/lib.rs +++ b/yama_wormfile/src/lib.rs @@ -1,6 +1,5 @@ use crate::paths::{WormPath, WormPathBuf}; use async_trait::async_trait; -use std::error::Error; use std::fmt::Debug; use std::io; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite}; @@ -12,37 +11,31 @@ pub mod paths; pub trait WormFileProvider: Debug + Send + Sync { type WormFileReader: WormFileReader; type WormFileWriter: WormFileWriter; - type Error: Error + Send + Sync + 'static; + // sad because no context: + //type Error: Error + Send + Sync + 'static; /// Tests whether the path is a directory. /// Does not fail if the path does not exist, even if the parent path doesn't exist — returns /// false in that case. /// Beware! Some backends do not support the concept of a directory and will happily report /// `true` for any path. - async fn is_dir(&self, path: impl AsRef + Send) -> Result; + async fn is_dir(&self, path: impl AsRef + Send) -> eyre::Result; /// Tests whether the path is a regular file. /// Does not fail if the path does not exist, even if the parent path doesn't exist — returns /// false in that case. - async fn is_regular_file(&self, path: impl AsRef + Send) - -> Result; + async fn is_regular_file(&self, path: impl AsRef + Send) -> eyre::Result; /// Lists all the files and directories in the specified path. /// /// If the path does not exist, gives an error. /// TODO a streaming version of this might be beneficial. - async fn list( - &self, - path: impl AsRef + Send, - ) -> Result, Self::Error>; + async fn list(&self, path: impl AsRef + Send) -> eyre::Result>; /// Reads a file. /// /// Fails if the file does not exist or is not a regular file. - async fn read( - &self, - path: impl AsRef + Send, - ) -> Result; + async fn read(&self, path: impl AsRef + Send) -> eyre::Result; /// Writes to a file. /// @@ -51,16 +44,21 @@ pub trait WormFileProvider: Debug + Send + Sync { /// /// When applicable, the file is first created in the `tmp` directory during writing and then /// moved into place afterwards. - async fn write(&self) -> Result; + async fn write(&self) -> eyre::Result; + + /// Deletes the given file. + async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()>; } -pub trait WormFileReader: AsyncRead + AsyncSeek + Debug + Send + Sync + 'static {} +pub trait WormFileReader: AsyncRead + AsyncSeek + Debug + Send + Sync + Unpin + 'static {} #[async_trait] -pub trait WormFileWriter: AsyncWrite + Debug + Send + Sync + 'static { +pub trait WormFileWriter: AsyncWrite + Debug + Send + Sync + Unpin + 'static { /// Finish writing the file. /// Moves the file atomically to `target_path`. /// If `replace` is false, will not overwrite a file. (May be best-effort depending on backend; /// intended as a sanity check rather than a flawless safeguard.) - async fn finalise(self, target_path: &WormPath, replace: bool) -> io::Result<()>; + /// + /// Must only be called once, but can't consume the WormFileWriter due to object safety... + async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> io::Result<()>; } diff --git a/yama_wormfile/src/paths.rs b/yama_wormfile/src/paths.rs index 860b554..ae2c3c7 100644 --- a/yama_wormfile/src/paths.rs +++ b/yama_wormfile/src/paths.rs @@ -72,6 +72,7 @@ impl AsRef for WormPath { /// Simplified version of `PathBuf` for use in WormFile situations. /// Owned form of `WormPath`. +#[derive(Clone)] #[repr(transparent)] pub struct WormPathBuf { inner: String, diff --git a/yama_wormfile_fs/Cargo.toml b/yama_wormfile_fs/Cargo.toml index e062022..1d05373 100644 --- a/yama_wormfile_fs/Cargo.toml +++ b/yama_wormfile_fs/Cargo.toml @@ -10,4 +10,5 @@ yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } async-trait = "0.1.68" tokio = { version = "1.27.0", features = ["io-std", "fs"] } -rand = "0.8.5" \ No newline at end of file +rand = "0.8.5" +eyre = "0.6.8" \ No newline at end of file diff --git a/yama_wormfile_fs/src/lib.rs b/yama_wormfile_fs/src/lib.rs index 69468f2..f0d6464 100644 --- a/yama_wormfile_fs/src/lib.rs +++ b/yama_wormfile_fs/src/lib.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use eyre::Context as EyreContext; use std::fmt::{Debug, Formatter}; use std::io; use std::io::{ErrorKind, SeekFrom}; @@ -39,28 +40,27 @@ impl LocalWormFilesystem { impl WormFileProvider for LocalWormFilesystem { type WormFileReader = FileWormReader; type WormFileWriter = FileWormWriter; - type Error = io::Error; - async fn is_dir(&self, path: impl AsRef + Send) -> Result { + async fn is_dir(&self, path: impl AsRef + Send) -> eyre::Result { let path = self.resolve_real_path(path.as_ref()); Ok(tokio::fs::metadata(path).await?.is_dir()) } - async fn is_regular_file( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn is_regular_file(&self, path: impl AsRef + Send) -> eyre::Result { let path = self.resolve_real_path(path.as_ref()); Ok(tokio::fs::metadata(path).await?.is_file()) } - async fn list( - &self, - path: impl AsRef + Send, - ) -> Result, Self::Error> { + async fn list(&self, path: impl AsRef + Send) -> eyre::Result> { let worm_path = path.as_ref(); let real_path = self.resolve_real_path(worm_path); - let mut dir_reader = tokio::fs::read_dir(real_path).await?; + let mut dir_reader = match tokio::fs::read_dir(real_path).await { + Ok(ok) => ok, + Err(e) if e.kind() == ErrorKind::NotFound => { + return Ok(Vec::new()); + } + Err(other) => return Err(other.into()), + }; let mut out = Vec::new(); while let Some(next_ent) = dir_reader.next_entry().await? { if let Some(name_str) = next_ent.file_name().to_str() { @@ -70,10 +70,7 @@ impl WormFileProvider for LocalWormFilesystem { Ok(out) } - async fn read( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let worm_path = path.as_ref(); let real_path = self.resolve_real_path(worm_path); let file = OpenOptions::new().read(true).open(&real_path).await?; @@ -83,10 +80,15 @@ impl WormFileProvider for LocalWormFilesystem { }) } - async fn write(&self) -> Result { + async fn write(&self) -> eyre::Result { let tmp_dir = self.root_dir.join("tmp"); - if !tokio::fs::try_exists(&tmp_dir).await? { - tokio::fs::create_dir(&tmp_dir).await?; + if !tokio::fs::try_exists(&tmp_dir) + .await + .context("can't check exists")? + { + tokio::fs::create_dir(&tmp_dir) + .await + .context("can't create ")?; } let (tmp_path, file) = loop { @@ -95,13 +97,18 @@ impl WormFileProvider for LocalWormFilesystem { let try_fn = format!("pid{pid}-{rand_num:08X}.writing"); let try_path = tmp_dir.join(try_fn); - match OpenOptions::new().create_new(true).open(&try_path).await { + match OpenOptions::new() + .write(true) + .create_new(true) + .open(&try_path) + .await + { Ok(file) => break (try_path, file), Err(err) => { if err.kind() == ErrorKind::AlreadyExists { continue; } else { - return Err(err); + return Err(err).context("can't create_new in "); } } } @@ -113,6 +120,15 @@ impl WormFileProvider for LocalWormFilesystem { root_dir: self.root_dir.clone(), }) } + + async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()> { + let worm_path = path.as_ref(); + let real_path = self.resolve_real_path(worm_path); + tokio::fs::remove_file(&real_path) + .await + .with_context(|| format!("failed to remove_file({real_path:?})"))?; + Ok(()) + } } pub struct FileWormReader { @@ -183,7 +199,7 @@ impl AsyncWrite for FileWormWriter { #[async_trait] impl WormFileWriter for FileWormWriter { - async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { self.flush().await?; let FileWormWriter { diff --git a/yama_wormfile_s3/Cargo.toml b/yama_wormfile_s3/Cargo.toml index be0df27..0893483 100644 --- a/yama_wormfile_s3/Cargo.toml +++ b/yama_wormfile_s3/Cargo.toml @@ -18,4 +18,9 @@ thiserror = "1.0.40" tokio-stream = "0.1.12" tokio-util = "0.7.7" bytes = "1.4.0" -uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } \ No newline at end of file +uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } + +eyre = "0.6.8" + +[dev-dependencies] +tokio = { version = "1.27.0", features = ["full"] } diff --git a/yama_wormfile_s3/examples/s3demo.rs b/yama_wormfile_s3/examples/s3demo.rs new file mode 100644 index 0000000..974e021 --- /dev/null +++ b/yama_wormfile_s3/examples/s3demo.rs @@ -0,0 +1,43 @@ +use s3::creds::Credentials; +use s3::{Bucket, Region}; +use std::time::Instant; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use yama_wormfile::paths::WormPath; +use yama_wormfile::{WormFileProvider, WormFileWriter}; +use yama_wormfile_s3::S3WormFilesystem; + +#[tokio::main] +async fn main() -> eyre::Result<()> { + let bucket = Bucket::new( + "yama-test-bucket", + Region::Custom { + region: "unknown".to_owned(), + endpoint: "https://gateway.storjshare.io".to_owned(), + }, + Credentials::from_env_specific(Some("S3_ACCESS"), Some("S3_SECRET"), None, None)?, + )? + .with_path_style(); + + let s3fs = S3WormFilesystem::new(bucket, "yamademo/".to_owned()).unwrap(); + + let mut w = s3fs.write().await.unwrap(); + w.write_all(b"hi lols").await?; + w.flush().await?; + w.finalise(WormPath::new("worms/xyz2").unwrap(), true) + .await?; + + let start = Instant::now(); + let mut r = s3fs.read(WormPath::new("worms/xyz").unwrap()).await?; + let mut buf = Vec::new(); + r.read_u16().await?; + r.read_to_end(&mut buf).await?; + let end = Instant::now(); + + eprintln!( + "{:?} {}", + std::str::from_utf8(&buf), + (end - start).as_millis() + ); + + Ok(()) +} diff --git a/yama_wormfile_s3/src/lib.rs b/yama_wormfile_s3/src/lib.rs index a7e45ed..34340e9 100644 --- a/yama_wormfile_s3/src/lib.rs +++ b/yama_wormfile_s3/src/lib.rs @@ -35,7 +35,10 @@ impl S3WormFilesystem { format!("{}{}", self.path_prefix, path.as_ref().as_str()) } - async fn head_object(&self, full_path: &str) -> Result, S3Error> { + async fn head_object( + &self, + full_path: &str, + ) -> eyre::Result, S3Error> { let (head, status) = self.bucket.head_object(full_path).await?; if status == 404 { return Ok(None); @@ -54,24 +57,17 @@ impl S3WormFilesystem { impl WormFileProvider for S3WormFilesystem { type WormFileReader = S3WormReader; type WormFileWriter = S3WormWriter; - type Error = S3Error; - async fn is_dir(&self, _path: impl AsRef + Send) -> Result { + async fn is_dir(&self, _path: impl AsRef + Send) -> eyre::Result { Ok(true) } - async fn is_regular_file( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn is_regular_file(&self, path: impl AsRef + Send) -> eyre::Result { let full_path = self.resolve_real_path(path.as_ref()); Ok(self.head_object(&full_path).await?.is_some()) } - async fn list( - &self, - path: impl AsRef + Send, - ) -> Result, Self::Error> { + async fn list(&self, path: impl AsRef + Send) -> eyre::Result> { let path = path.as_ref(); let full_path = self.resolve_real_path(path); let list = self @@ -92,10 +88,7 @@ impl WormFileProvider for S3WormFilesystem { .collect()) } - async fn read( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref(); let full_path = self.resolve_real_path(path); let head = self.head_object(&full_path).await?.ok_or_else(|| { @@ -116,7 +109,7 @@ impl WormFileProvider for S3WormFilesystem { }) } - async fn write(&self) -> Result { + async fn write(&self) -> eyre::Result { let (tx, mut rx) = duplex(8192); // The rx half won't be doing any writing. rx.shutdown().await?; @@ -139,12 +132,18 @@ impl WormFileProvider for S3WormFilesystem { Ok(S3WormWriter { tx, - join_handle, + join_handle: Some(join_handle), temp_path, bucket: self.bucket.clone(), path_prefix: self.path_prefix.clone(), }) } + + async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()> { + let full_path = self.resolve_real_path(path.as_ref()); + self.bucket.delete_object(full_path).await?; + Ok(()) + } } pub struct S3WormReader { @@ -229,7 +228,7 @@ impl AsyncRead for S3WormReader { let (reader, read_range) = self.reader.as_mut().unwrap(); let orig_remaining = buf.remaining(); let read = ready!(Pin::new(reader).poll_read(cx, buf)); - let bytes_read = (buf.remaining() - orig_remaining) as u64; + let bytes_read = (orig_remaining - buf.remaining()) as u64; if bytes_read == 0 && read_range.start != read_range.end { // Unexpected EOF @@ -298,7 +297,7 @@ impl WormFileReader for S3WormReader {} pub struct S3WormWriter { tx: DuplexStream, temp_path: String, - join_handle: JoinHandle>, + join_handle: Option>>, bucket: Bucket, path_prefix: String, } @@ -332,11 +331,12 @@ impl AsyncWrite for S3WormWriter { #[async_trait] impl WormFileWriter for S3WormWriter { - async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { self.tx.shutdown().await?; - let resp_code = self .join_handle + .take() + .unwrap() .await? .map_err(|e| io::Error::new(ErrorKind::Other, e))?; if resp_code != 200 { @@ -349,24 +349,28 @@ impl WormFileWriter for S3WormWriter { let full_target_path = format!("{}{}", self.path_prefix, target_path.as_str()); if !replace { - let (_head, head_code) = self - .bucket - .head_object(&full_target_path) - .await - .map_err(|e| io::Error::new(ErrorKind::Other, e))?; - if head_code != 404 { - return Err(io::Error::new( - ErrorKind::Other, - "won't replace file; HEAD of target path not 404", - )); + match self.bucket.head_object(&full_target_path).await { + Ok((_head, _head_code)) => { + return Err(io::Error::new( + ErrorKind::Other, + "won't replace file; HEAD of target path not 404", + )); + } + Err(S3Error::Http(404, _)) => { + // Fine + } + Err(other) => { + return Err(io::Error::new(ErrorKind::Other, other)); + } } } // S3 moves are done as a copy + delete + let full_temp_prefix = format!("{}{}", self.path_prefix, self.temp_path); let response_code = self .bucket - .copy_object_internal(&self.temp_path, &full_target_path) + .copy_object_internal(&full_temp_prefix, &full_target_path) .await .map_err(|e| io::Error::new(ErrorKind::Other, e))?; if response_code != 200 { @@ -376,17 +380,11 @@ impl WormFileWriter for S3WormWriter { )); } - let response_code = self + let _ = self .bucket - .delete_object(&self.temp_path) + .delete_object(&full_temp_prefix) .await .map_err(|e| io::Error::new(ErrorKind::Other, e))?; - if response_code.status_code() != 200 { - return Err(io::Error::new( - ErrorKind::Other, - "non-200 response for delete object", - )); - } Ok(()) } diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index f53b32b..6348a0c 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -14,4 +14,5 @@ openssh-sftp-client = "0.12.2" async-trait = "0.1.68" tokio = { version = "1.27.0", features = ["io-std"] } rand = "0.8.5" -thiserror = "1.0.40" \ No newline at end of file +thiserror = "1.0.40" +eyre = "0.6.8" \ No newline at end of file diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index 3c2ebf8..b931e9d 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -1,6 +1,7 @@ extern crate core; use async_trait::async_trait; +use eyre::{bail, Context as EyreContext, ContextCompat}; use openssh::{KnownHosts, RemoteChild, Session, Stdio}; use openssh_sftp_client::error::SftpErrorKind; use openssh_sftp_client::file::TokioCompatFile; @@ -15,8 +16,8 @@ use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use thiserror::Error; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; +use tokio::runtime::Handle; use yama_wormfile::paths::{WormPath, WormPathBuf}; use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; @@ -58,7 +59,7 @@ struct FileWithSftpConn { } impl SftpConn { - pub async fn create(ssh_connect: &str, root_dir: impl Into) -> YWSResult { + pub async fn create(ssh_connect: &str, root_dir: impl Into) -> eyre::Result { let root_dir = root_dir.into(); let session = Session::connect(ssh_connect, KnownHosts::Strict).await?; @@ -71,7 +72,7 @@ impl SftpConn { .stdout(Stdio::piped()) .spawn() .await - .map_err(SftpWormFileError::from) + .map_err(|e| eyre::Error::from(e)) }) }, sftp_builder: |ssh_child| { @@ -82,7 +83,7 @@ impl SftpConn { Default::default(), ) .await - .map_err(SftpWormFileError::from) + .map_err(|e| eyre::Error::from(e)) }) }, // fs_builder: |sftp| Box::pin(async move { @@ -103,7 +104,7 @@ impl SftpConn { fs } - async fn create_dir_all(&self, worm_path_as_pathbuf: PathBuf) -> YWSResult<()> { + async fn create_dir_all(&self, worm_path_as_pathbuf: PathBuf) -> eyre::Result<()> { let mut fs = self.get_fs(); let mut stack = vec![]; @@ -118,13 +119,15 @@ impl SftpConn { stack.push(at_path); } Err(sftp_err) => { - return Err(SftpWormFileError::SftpError(sftp_err)); + return Err(sftp_err).context("other sftp err when create_dir_all (main loop)"); } } } while let Some(path) = stack.pop() { - fs.create_dir(path).await?; + fs.create_dir(path) + .await + .context("failed to create dir in create_dir_all")?; } Ok(()) @@ -137,25 +140,11 @@ impl Debug for SftpConn { } } -#[derive(Debug, Error)] -pub enum SftpWormFileError { - #[error("ssh error: {0:?}")] - SshError(#[from] openssh::Error), - - #[error("sftp error: {0:?}")] - SftpError(#[from] openssh_sftp_client::Error), - - #[error("error: {0}")] - Message(String), -} - -type YWSResult = Result; - impl SftpWormFilesystem { pub async fn new( ssh_connect: &str, root_dir: impl Into, - ) -> YWSResult { + ) -> eyre::Result { let root_dir = root_dir.into(); let conn = Arc::new(SftpConn::create(ssh_connect, &root_dir).await?); @@ -168,9 +157,7 @@ impl SftpWormFilesystem { .unwrap() .is_dir() { - return Err(SftpWormFileError::Message(format!( - "{root_dir:?} is not a dir on SFTP remote." - ))); + bail!("{root_dir:?} is not a dir on SFTP remote."); } Ok(SftpWormFilesystem { conn, root_dir }) @@ -187,40 +174,41 @@ impl SftpWormFilesystem { impl WormFileProvider for SftpWormFilesystem { type WormFileReader = SftpWormReader; type WormFileWriter = SftpWormWriter; - type Error = SftpWormFileError; - async fn is_dir(&self, path: impl AsRef + Send) -> Result { + async fn is_dir(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref().as_str(); let mut fs = self.get_fs(); match fs.metadata(path).await { Ok(meta) => Ok(meta.file_type().unwrap().is_dir()), Err(SftpError(SftpErrorKind::NoSuchFile, _)) => Ok(false), - Err(sftp_err) => Err(SftpWormFileError::SftpError(sftp_err)), + Err(sftp_err) => Err(sftp_err.into()), } } - async fn is_regular_file( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn is_regular_file(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref().as_str(); let mut fs = self.get_fs(); match fs.metadata(path).await { Ok(meta) => Ok(meta.file_type().unwrap().is_file()), Err(SftpError(SftpErrorKind::NoSuchFile, _)) => Ok(false), - Err(sftp_err) => Err(SftpWormFileError::SftpError(sftp_err)), + Err(sftp_err) => Err(sftp_err.into()), } } - async fn list( - &self, - path: impl AsRef + Send, - ) -> Result, Self::Error> { + async fn list(&self, path: impl AsRef + Send) -> eyre::Result> { let worm_path = path.as_ref(); let path = worm_path.as_str(); let mut fs = self.get_fs(); - let mut remote_dir = fs.open_dir(path).await?; + let mut remote_dir = match fs.open_dir(path).await { + Ok(ok) => ok, + Err(openssh_sftp_client::Error::SftpError(SftpErrorKind::NoSuchFile, _msg)) => { + return Ok(Vec::new()); + } + Err(other) => { + return Err(other.into()); + } + }; let dir_reader = remote_dir.read_dir().await?; Ok(dir_reader @@ -239,72 +227,104 @@ impl WormFileProvider for SftpWormFilesystem { .collect()) } - async fn read( - &self, - path: impl AsRef + Send, - ) -> Result { + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let real_path = self.root_dir.join(path.as_ref().as_str()); let real_path2 = real_path.clone(); // the `Send` in the below line is very important... - let file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { + let mut file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { conn: self.conn.clone(), file_builder: |conn| { Box::pin(async move { - let file = conn - .borrow_sftp() - .open(real_path) - .await - .map_err(SftpWormFileError::from)?; - Ok::<_, SftpWormFileError>(Some(TokioCompatFile::new(file))) + let file = conn.borrow_sftp().open(real_path).await?; + Ok::<_, eyre::Report>(Some(TokioCompatFile::new(file))) }) }, } .try_build() .await?; + // yucky hacks... but we need to get to the file to get the length out, so we can seek from the end... + let file_length = tokio::task::block_in_place(|| { + file_with_conn.with_file_mut(|file| { + Handle::current().block_on(async move { + file.as_mut() + .unwrap() + .metadata() + .await? + .len() + .context("no len in SFTP file metadata!") + }) + }) + })?; + Ok(SftpWormReader { path: real_path2, file_with_conn, + length: file_length, }) } - async fn write(&self) -> Result { - // let tmp_dir = self.root_dir.join("tmp"); - // if !tokio::fs::try_exists(&tmp_dir).await? { - // tokio::fs::create_dir(&tmp_dir).await?; - // } - // - // let (tmp_path, file) = loop { - // let rand_num: u32 = rand::random(); - // let pid = std::process::id(); - // - // let try_fn = format!("pid{pid}-{rand_num:08X}.writing"); - // let try_path = tmp_dir.join(try_fn); - // match OpenOptions::new().create_new(true).open(&try_path).await { - // Ok(file) => break (try_path, file), - // Err(err) => { - // if err.kind() == ErrorKind::AlreadyExists { - // continue; - // } else { - // return Err(err); - // } - // } - // } - // }; - // - // Ok(SftpWormWriter { - // temp_path: tmp_path, - // file, - // root_dir: self.root_dir.clone(), - // }) - todo!() + async fn write(&self) -> eyre::Result { + if !self + .is_dir(WormPath::new("tmp").unwrap()) + .await + .context("can't check exists")? + { + self.get_fs() + .create_dir(Path::new("tmp")) + .await + .context("failed to make ")?; + } + loop { + let rand_num: u32 = rand::random(); + let pid = std::process::id(); + + let try_path = format!("tmp/pid{pid}-{rand_num:08X}.writing"); + + let real_path = self.root_dir.join(try_path.as_str()); + // the `Send` in the below line is very important... + + if self + .is_regular_file(WormPath::new(&try_path).unwrap()) + .await + .context("can't check that try_path doesn't exist")? + { + continue; + } + + let file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { + conn: self.conn.clone(), + file_builder: |conn| { + Box::pin(async move { + let file = conn.borrow_sftp().create(real_path).await?; + Ok::<_, eyre::Report>(Some(TokioCompatFile::new(file))) + }) + }, + } + .try_build() + .await?; + + break Ok(SftpWormWriter { + temp_path: WormPathBuf::new(try_path).unwrap(), + file_with_conn: Some(file_with_conn), + }); + } + } + + async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()> { + let worm_path = path.as_ref(); + let path = worm_path.as_str(); + let mut fs = self.get_fs(); + fs.remove_file(path).await?; + Ok(()) } } pub struct SftpWormReader { path: PathBuf, file_with_conn: FileWithSftpConn, + length: u64, } impl Debug for SftpWormReader { @@ -325,7 +345,13 @@ impl AsyncRead for SftpWormReader { } impl AsyncSeek for SftpWormReader { - fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + fn start_seek(mut self: Pin<&mut Self>, mut position: SeekFrom) -> io::Result<()> { + if let SeekFrom::End(pos) = position { + // SFTP doesn't support seek from the end, so implement it manually... + position = SeekFrom::Start((self.length as i64 + pos).try_into().map_err(|_| { + io::Error::new(ErrorKind::InvalidInput, "SeekFrom::End out of bounds") + })?); + } self.file_with_conn .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).start_seek(position)) } @@ -339,9 +365,8 @@ impl AsyncSeek for SftpWormReader { impl WormFileReader for SftpWormReader {} pub struct SftpWormWriter { - temp_path: PathBuf, - file_with_conn: FileWithSftpConn, - root_dir: PathBuf, + temp_path: WormPathBuf, + file_with_conn: Option, } impl Debug for SftpWormWriter { @@ -357,11 +382,15 @@ impl AsyncWrite for SftpWormWriter { buf: &[u8], ) -> Poll> { self.file_with_conn + .as_mut() + .unwrap() .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_write(cx, buf)) } fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { self.file_with_conn + .as_mut() + .unwrap() .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_flush(cx)) } @@ -370,21 +399,23 @@ impl AsyncWrite for SftpWormWriter { cx: &mut Context<'_>, ) -> Poll> { self.file_with_conn + .as_mut() + .unwrap() .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_shutdown(cx)) } } #[async_trait] impl WormFileWriter for SftpWormWriter { - async fn finalise(mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { self.flush().await?; let SftpWormWriter { - root_dir, temp_path, - mut file_with_conn, + file_with_conn, .. } = self; + let mut file_with_conn = file_with_conn.take().unwrap(); let file = file_with_conn.with_file_mut(|file| file.take().unwrap()); file.close() @@ -394,20 +425,17 @@ impl WormFileWriter for SftpWormWriter { let conn: Arc = file_with_conn.into_heads().conn; let mut fs = conn.get_fs(); - let worm_path = target_path; - // Directories will be created as needed. - if let Some(parent) = PathBuf::from(worm_path.as_str()).parent() { + if let Some(parent) = PathBuf::from(target_path.as_str()).parent() { conn.create_dir_all(parent.to_owned()) .await - .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + .map_err(|e| io::Error::new(ErrorKind::Other, e.to_string()))?; } - // Avoid allowing a replacement if not intended. // But this is currently not atomic, so it's just a sanity check rather than a foolproof // safeguard! if !replace { - match fs.metadata(worm_path.as_str()).await { + match fs.metadata(target_path.as_str()).await { Ok(_) => { return Err(io::Error::new( ErrorKind::AlreadyExists, @@ -418,46 +446,15 @@ impl WormFileWriter for SftpWormWriter { // ideal. nop. } Err(sftp_err) => { - return Err(SftpWormFileError::SftpError(sftp_err)) - .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + return Err(io::Error::new(ErrorKind::Other, sftp_err.to_string())); } } } // Perform the move. - fs.rename(root_dir.join(&temp_path), root_dir.join(worm_path.as_str())) + fs.rename(&temp_path.as_ref().as_str(), target_path.as_str()) .await .map_err(|e| io::Error::new(ErrorKind::Other, e))?; Ok(()) } } - -#[tokio::test] -async fn test_lol() { - let swf = SftpWormFilesystem::new("scone@sallie", "").await.unwrap(); - let _ = swf.is_dir(WormPath::new("maddy").unwrap()).await; - - match swf.is_dir(WormPath::new("maddyss").unwrap()).await { - Ok(x) => eprintln!("{x:?}"), - Err(SftpWormFileError::SftpError(openssh_sftp_client::Error::SftpError( - openssh_sftp_client::error::SftpErrorKind::NoSuchFile, - _, - ))) => { - eprintln!("NSF"); - } - Err(other) => { - eprintln!("other {other:?}"); - } - } - - let x = swf.list(WormPath::new("maddy").unwrap()).await.unwrap(); - eprintln!("{x:?}"); - - // if let Err(e) = swf.sftp.close().await { - // eprintln!("sftp {e:?}"); - // } - // - // if let Err(e) = swf.ssh.close().await { - // eprintln!("sftp {e:?}"); - // } -} From a8e1cc45efe4c80b84f185b257ae0254aee0af62 Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 3 May 2023 21:03:33 +0100 Subject: [PATCH 06/51] CHECKPOINT overhaul 2 --- datman_cli_readme.txt | 12 ++++++++++++ docs.old/SUMMARY.md | 11 +++++++++++ {docs => docs.old}/datman/getting_started.md | 0 {docs => docs.old}/datman/index.md | 0 {docs => docs.old}/datman/remote_backups.md | 0 {docs => docs.old}/yama/getting_started.md | 0 {docs => docs.old}/yama/index.md | 0 {docs => docs.old}/yama/internals.md | 0 .../yama/internals/pointers-and-nodes.md | 0 {docs => docs.old}/yama/internals/raw-piles.md | 0 docs/SUMMARY.md | 11 ----------- docs/yama/zstd.md | 5 +++++ yama_pile/src/tree.rs | 2 +- 13 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 datman_cli_readme.txt create mode 100644 docs.old/SUMMARY.md rename {docs => docs.old}/datman/getting_started.md (100%) rename {docs => docs.old}/datman/index.md (100%) rename {docs => docs.old}/datman/remote_backups.md (100%) rename {docs => docs.old}/yama/getting_started.md (100%) rename {docs => docs.old}/yama/index.md (100%) rename {docs => docs.old}/yama/internals.md (100%) rename {docs => docs.old}/yama/internals/pointers-and-nodes.md (100%) rename {docs => docs.old}/yama/internals/raw-piles.md (100%) create mode 100644 docs/yama/zstd.md diff --git a/datman_cli_readme.txt b/datman_cli_readme.txt new file mode 100644 index 0000000..0db6fd6 --- /dev/null +++ b/datman_cli_readme.txt @@ -0,0 +1,12 @@ + +`datman backup-one ` +`datman backup-all ` + Backs up now (either just one source or all sources) to a destination. + + With `--config <>`, use a specified Datman config file; otherwise try current directory. + + + +.. +`datman extract ` with same filtering options as now. + (unimportant: Yama tools should be decent for this right now.) diff --git a/docs.old/SUMMARY.md b/docs.old/SUMMARY.md new file mode 100644 index 0000000..851c8cf --- /dev/null +++ b/docs.old/SUMMARY.md @@ -0,0 +1,11 @@ +# Summary + +- [Yama](./yama/index.md) + - [Getting Started](./yama/getting_started.md) + - [Internals](./yama/internals.md) + - [Raw Piles](./yama/internals/raw-piles.md) + - [Pointers and Nodes](./yama/internals/pointers-and-nodes.md) +- [Datman](./datman/index.md) + - [Getting Started](./datman/getting_started.md) + - [Remote Backups](./datman/remote_backups.md) + diff --git a/docs/datman/getting_started.md b/docs.old/datman/getting_started.md similarity index 100% rename from docs/datman/getting_started.md rename to docs.old/datman/getting_started.md diff --git a/docs/datman/index.md b/docs.old/datman/index.md similarity index 100% rename from docs/datman/index.md rename to docs.old/datman/index.md diff --git a/docs/datman/remote_backups.md b/docs.old/datman/remote_backups.md similarity index 100% rename from docs/datman/remote_backups.md rename to docs.old/datman/remote_backups.md diff --git a/docs/yama/getting_started.md b/docs.old/yama/getting_started.md similarity index 100% rename from docs/yama/getting_started.md rename to docs.old/yama/getting_started.md diff --git a/docs/yama/index.md b/docs.old/yama/index.md similarity index 100% rename from docs/yama/index.md rename to docs.old/yama/index.md diff --git a/docs/yama/internals.md b/docs.old/yama/internals.md similarity index 100% rename from docs/yama/internals.md rename to docs.old/yama/internals.md diff --git a/docs/yama/internals/pointers-and-nodes.md b/docs.old/yama/internals/pointers-and-nodes.md similarity index 100% rename from docs/yama/internals/pointers-and-nodes.md rename to docs.old/yama/internals/pointers-and-nodes.md diff --git a/docs/yama/internals/raw-piles.md b/docs.old/yama/internals/raw-piles.md similarity index 100% rename from docs/yama/internals/raw-piles.md rename to docs.old/yama/internals/raw-piles.md diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 851c8cf..e69de29 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -1,11 +0,0 @@ -# Summary - -- [Yama](./yama/index.md) - - [Getting Started](./yama/getting_started.md) - - [Internals](./yama/internals.md) - - [Raw Piles](./yama/internals/raw-piles.md) - - [Pointers and Nodes](./yama/internals/pointers-and-nodes.md) -- [Datman](./datman/index.md) - - [Getting Started](./datman/getting_started.md) - - [Remote Backups](./datman/remote_backups.md) - diff --git a/docs/yama/zstd.md b/docs/yama/zstd.md new file mode 100644 index 0000000..477aa21 --- /dev/null +++ b/docs/yama/zstd.md @@ -0,0 +1,5 @@ +# Using a Zstd dictionary with Yama + +## Creating a Zstd dictionary + + diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 3802953..757f216 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -874,7 +874,7 @@ mod tests { }; let mut child_full = child_diff.clone(); - integrate_node_in_place(&mut child_full, &parent).unwrap(); + integrate_node_in_place(&mut child_full, &parent); let expected_child_full = TreeNode::Directory { ownership: FilesystemOwnership { uid: 47, gid: 49 }, From 00dec17da0f2eb51ffd46763cb485b31634377d7 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Wed, 3 May 2023 23:50:55 +0100 Subject: [PATCH 07/51] overhaul: streaming store support --- Cargo.lock | 169 +++++++++++++++++++++++++++++++++++++++++++ yama/Cargo.toml | 1 + yama/src/bin/yama.rs | 118 ++++++++++++++++++++++++++---- yama/src/storing.rs | 116 +++++++++++++++++++++++------ 4 files changed, 367 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 180fe3d..c94e3eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,6 +49,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "ambient-authority" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec8ad6edb4840b78c5c3d88de606b22252d552b55f3a4699fbb10fc070ec3049" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -371,6 +377,47 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +[[package]] +name = "cap-fs-ext" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0c86006edbaf13bbe0cdf2d7492cff638cd24cd6b717fa2aadcab09b532353" +dependencies = [ + "cap-primitives", + "cap-std", + "io-lifetimes", + "windows-sys 0.48.0", +] + +[[package]] +name = "cap-primitives" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f377e5b016d3d2b9d150b8e8f711d88d42046b89294572d504596f19e59ca" +dependencies = [ + "ambient-authority", + "fs-set-times", + "io-extras", + "io-lifetimes", + "ipnet", + "maybe-owned", + "rustix", + "windows-sys 0.48.0", + "winx", +] + +[[package]] +name = "cap-std" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14bfc13243563bf62ee9a31b6659d2fc2bf20e75f2d3d58d87a0c420778e1399" +dependencies = [ + "cap-primitives", + "io-extras", + "io-lifetimes", + "rustix", +] + [[package]] name = "cc" version = "1.0.73" @@ -766,6 +813,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "duplex" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f4b4ccdcb95f0ced5ddc8e3dbac4a2f029e9433c5ee94e9b9d7c148c86ffcd4" + [[package]] name = "dust_style_filetree_display" version = "0.8.5" @@ -881,6 +934,17 @@ dependencies = [ "instant", ] +[[package]] +name = "fd-lock" +version = "3.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "fiat-crypto" version = "0.1.20" @@ -930,6 +994,17 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-set-times" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7833d0f115a013d51c55950a3b09d30e4b057be9961b709acb9b5b17a1108861" +dependencies = [ + "io-lifetimes", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "futures" version = "0.3.28" @@ -1334,6 +1409,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "io-extras" +version = "0.17.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde93d48f0d9277f977a333eca8313695ddd5301dc96f7e02aeddcb0dd99096f" +dependencies = [ + "io-lifetimes", + "os_pipe", + "windows-sys 0.48.0", +] + [[package]] name = "io-lifetimes" version = "1.0.9" @@ -1342,9 +1428,26 @@ checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" dependencies = [ "hermit-abi 0.3.1", "libc", + "os_pipe", "windows-sys 0.45.0", ] +[[package]] +name = "io-streams" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b18f85497e7fd4b4d3ada035e29273dde90f5b188349fda32a7cb1bc4457afbe" +dependencies = [ + "duplex", + "io-extras", + "io-lifetimes", + "memchr", + "os_pipe", + "parking", + "rustix", + "system-interface", +] + [[package]] name = "ipnet" version = "2.5.0" @@ -1495,6 +1598,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "maybe-owned" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" + [[package]] name = "md5" version = "0.7.0" @@ -1825,6 +1934,16 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "os_pipe" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae859aa07428ca9a929b936690f8b12dc5f11dd8c6992a18ca93919f28bc177" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "ouroboros" version = "0.15.6" @@ -1864,6 +1983,12 @@ dependencies = [ "libm", ] +[[package]] +name = "parking" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14f2252c834a40ed9bb5422029649578e63aa341ac401f74e719dd1afda8394e" + [[package]] name = "parking_lot" version = "0.11.2" @@ -2279,8 +2404,10 @@ dependencies = [ "bitflags 1.3.2", "errno", "io-lifetimes", + "itoa", "libc", "linux-raw-sys", + "once_cell", "windows-sys 0.45.0", ] @@ -2542,6 +2669,19 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "socketpair" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "365cc8b798dfcc4f5518e75655521d47f089ded0ef7df335c637e30b6cc717de" +dependencies = [ + "io-extras", + "io-lifetimes", + "rustix", + "uuid", + "windows-sys 0.48.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -2750,6 +2890,23 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "system-interface" +version = "0.25.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928ebd55ab758962e230f51ca63735c5b283f26292297c81404289cda5d78631" +dependencies = [ + "bitflags 1.3.2", + "cap-fs-ext", + "fd-lock", + "io-lifetimes", + "os_pipe", + "rustix", + "socketpair", + "windows-sys 0.48.0", + "winx", +] + [[package]] name = "tempfile" version = "3.5.0" @@ -3680,6 +3837,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "winx" +version = "0.35.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c52a121f0fbf9320d5f2a9a5d82f6cb7557eda5e8b47fc3e7f359ec866ae960" +dependencies = [ + "bitflags 1.3.2", + "io-lifetimes", + "windows-sys 0.48.0", +] + [[package]] name = "x25519-dalek" version = "2.0.0-rc.2" @@ -3713,6 +3881,7 @@ dependencies = [ "hostname", "ignore", "indicatif", + "io-streams", "memmap2", "patricia_tree", "serde", diff --git a/yama/Cargo.toml b/yama/Cargo.toml index 28680e4..d546e24 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -47,6 +47,7 @@ flume = "0.10.14" async-recursion = "1.0.4" toml = "0.7.3" +io-streams = "0.14.3" dust_style_filetree_display = "0.8.5" diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index cb19ab1..79d28b8 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -23,6 +23,7 @@ use std::iter::Iterator; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; use indicatif::ProgressStyle; use tokio::io::{stdin, AsyncBufReadExt, BufReader}; use tracing::{info, info_span, warn, Span, Instrument}; @@ -32,12 +33,13 @@ use tracing_subscriber::filter::filter_fn; use tracing_subscriber::Layer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; +use users::{get_current_gid, get_current_uid}; use yama::extract::flatten_treenode; use yama::init::{generate_master_keyring, pack_keyring}; use yama::open::{open_keyring_interactive, open_pile, pre_open_keyring, update_cache}; use yama::pile_connector::PileConnectionScheme; use yama::scan::create_uidgid_lookup_tables; -use yama::storing::{assemble_and_write_indices, StoragePipeline}; +use yama::storing::{assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState}; use yama::{extract, get_hostname, init, PROGRESS_BAR_STYLE, scan}; use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; use yama_midlevel_crypto::chunk_id::ChunkIdKey; @@ -47,9 +49,7 @@ use yama_pile::definitions::{ use yama_pile::locks::LockKind; use yama_pile::pointers::Pointer; use yama_pile::tree::unpopulated::ScanEntry; -use yama_pile::tree::{ - assemble_tree_from_scan_entries, differentiate_node_in_place, RootTreeNode, TreeNode, -}; +use yama_pile::tree::{assemble_tree_from_scan_entries, differentiate_node_in_place, FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode}; use yama_pile::FILE_YAMA_CONNECTOR; #[derive(Clone, Debug)] @@ -147,14 +147,11 @@ pub enum YamaCommand { #[command(subcommand)] Keyring(KeyringCommand), - /// Store a file, directory or input stream to a Yama pile. + /// Store a file or directory to a Yama pile. Store { source: PathBuf, destination: PileAndPointer, - #[arg(long)] - stdin: bool, - #[arg(long)] overwrite: bool, @@ -165,6 +162,19 @@ pub enum YamaCommand { parent: Option, }, + /// Store an input stream to the Yama pile. + StoreStdin { + destination: PileAndPointer, + + #[arg(long)] + overwrite: bool, + + /// A name to give to the file that the stream is stored as. Otherwise it will just be called + /// 'stream'. + #[arg(short = 'n', long)] + name: Option, + }, + /// Extract a file, directory or output stream from a Yama pile. Extract { source: PileAndPointerWithSubTree, @@ -172,9 +182,6 @@ pub enum YamaCommand { #[arg(long)] stdout: bool, - - #[arg(long)] - overwrite: bool, }, // TODO Mount { ... }, @@ -346,11 +353,9 @@ async fn main() -> eyre::Result<()> { YamaCommand::Store { source, destination, - stdin, overwrite, parent, } => { - ensure!(!stdin, "stdin not supported yet"); let pile_connector_path = destination.pile_path.unwrap_or(PathBuf::from(".")); let keyring = pre_open_keyring(&pile_connector_path).await?; let keyring = open_keyring_interactive(keyring).await?; @@ -491,12 +496,93 @@ async fn main() -> eyre::Result<()> { .context("failed to write pointer")?; Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - } + }, + YamaCommand::StoreStdin { + destination, + overwrite, + name, + } => { + let pile_connector_path = destination.pile_path.unwrap_or(PathBuf::from(".")); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), destination.pointer), + ) + .await?; + update_cache(&pwc).await?; + + let pwc = Arc::new(pwc); + + let store_span = info_span!("storing"); + // store_span.pb_set_style(&ProgressStyle::default_bar()); + // TODO INDETERMINATE PROGRESS BAR with bytes shown? + store_span.pb_set_style(&ProgressStyle::default_bar().template( + PROGRESS_BAR_STYLE, + ).unwrap()); + store_span.pb_set_message("storing files"); + store_span.pb_set_length(1u64); + // TODO Dirty + let store_span_entered = store_span.enter(); + + + let mut storing_state = StoringState::new(pwc.clone()).await.context("failed to create storing state")?; + let mut sbw = StoringBloblogWriters::default(); + let stdin = std::io::BufReader::new(io_streams::StreamReader::stdin().context("failed to open stdin")?); + let (chunkref, size) = storing_state.store_full_stream(stdin, &mut sbw).context("Failed to store stream into Yama pile")?; + + sbw.finish_bloblogs(&mut storing_state).await.context("Failed to finish bloblogs")?; + + info!("Stream stored, writing indices..."); + + // Write indices for the new bloblogs we have created. This is a prerequisite for creating a pointer. + let chunkmaps = storing_state.new_bloblogs; + + assemble_and_write_indices(&pwc, chunkmaps) + .await + .context("failed to assemble and write indices")?; + + info!("All indices stored, writing pointer..."); + + // Assemble and write a pointer + let uid = get_current_uid() as u16; + let gid = get_current_gid() as u16; + let tree = TreeNode::NormalFile { + mtime: SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_millis() as u64).unwrap_or(0), + ownership: FilesystemOwnership { uid, gid }, + permissions: FilesystemPermissions { mode: 0o600 }, + size, + content: chunkref, + }; + let (uids, gids) = + create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; + + pwc.pile + .write_pointer( + destination.pointer.0.as_str(), + overwrite, + &Pointer { + parent: None, + root: RootTreeNode { + name: name.unwrap_or_else(|| String::from("stream")), + node: tree, + }, + uids, + gids, + }, + ) + .await + .context("failed to write pointer")?; + + Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; + }, YamaCommand::Extract { source, destination, stdout, - overwrite, } => { ensure!(!stdout, "stdout not supported yet"); let pile_connector_path = source @@ -555,7 +641,7 @@ async fn main() -> eyre::Result<()> { Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; } - other => todo!(), + _other => todo!(), } Ok(()) diff --git a/yama/src/storing.rs b/yama/src/storing.rs index fe454b6..0331d94 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -1,10 +1,11 @@ use crate::pile_with_cache::PileWithCache; use dashmap::DashSet; use eyre::{bail, Context}; -use fastcdc::v2020::FastCDC; +use fastcdc::v2020::{FastCDC, StreamCDC}; use flume::{Receiver, RecvError, SendError, Sender}; use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; +use std::io::Read; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; @@ -45,6 +46,28 @@ pub struct StoringState { pub compressor: zstd::bulk::Compressor<'static>, } +impl StoringState { + pub async fn new(pwc: Arc>) -> eyre::Result { + let compressor = match pwc.pile.pile_config.zstd_dict.as_ref() { + None => { + Compressor::new(get_zstd_level()).context("can't create dictless compressor")? + } + Some(dict_bytes) => Compressor::with_dictionary(get_zstd_level(), dict_bytes) + .context("can't create dictful compressor")?, + }; + + let chunk_id_key = pwc.pile.pile_config.chunk_id_key; + Ok(StoringState { + cache_conn: pwc.localcache.read().await?, + new_unflushed_chunks: Arc::new(Default::default()), + new_bloblogs: vec![], + pwc, + chunk_id_key, + compressor, + }) + } +} + struct StoringIntermediate { /// New bloblogs that we have created but not yet written out indices for. pub new_bloblogs: Vec<(BloblogId, BTreeMap)>, @@ -68,7 +91,7 @@ pub struct StoringBloblogWriters { } impl StoringBloblogWriters { - async fn finish_bloblogs(&mut self, ss: &mut StoringState) -> eyre::Result<()> { + pub async fn finish_bloblogs(&mut self, ss: &mut StoringState) -> eyre::Result<()> { if let Some(writer_to_finish) = self.file_contents.take() { let (_bloblog_path, bloblog_id, chunkmap) = writer_to_finish.finish().await?; ss.new_bloblogs.push((bloblog_id, chunkmap)); @@ -140,6 +163,42 @@ impl StoringState { }) } + fn store_full_stream_returning_chunks( + &mut self, + store_stream: impl Read, + slot: &mut Option>>>, + ) -> eyre::Result<(Vec, u64)> { + task::block_in_place(|| { + let mut stream_length = 0u64; + let mut result = Vec::new(); + for chunk in StreamCDC::new(store_stream, FASTCDC_MIN, FASTCDC_AVG, FASTCDC_MAX) { + let chunk = chunk.context("failed to read in for StreamCDC")?; + let chunk_bytes = chunk.data.as_slice(); + stream_length += chunk_bytes.len() as u64; + let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key); + result.push(chunk_id); + let is_new = Handle::current().block_on(async { + Ok::( + self.cache_conn.is_chunk_new(chunk_id).await? + && self.new_unflushed_chunks.insert(chunk_id), + ) + })?; + + if is_new { + let compressed_bytes = self.compressor.compress(&chunk_bytes)?; + + Handle::current().block_on(async { + let writer = self.obtain_bloblog_writer(slot).await?; + writer.write_chunk(chunk_id, &compressed_bytes).await?; + Ok::<(), eyre::Report>(()) + })?; + } + } + + Ok((result, stream_length)) + }) + } + pub fn store_full_slice( &mut self, store_slice: &[u8], @@ -170,6 +229,39 @@ impl StoringState { depth, }) } + + /// Stores a full stream (`Read`) and returns the recursive chunk ref plus the length of the + /// stream. + pub fn store_full_stream( + &mut self, + store_stream: impl Read, + sbw: &mut StoringBloblogWriters, + ) -> eyre::Result<(RecursiveChunkRef, u64)> { + // First calculate all the chunk IDs needed to be written here. + let (mut chunk_ids, stream_length) = + self.store_full_stream_returning_chunks(store_stream, &mut sbw.file_contents)?; + let mut depth = 0; + + // If we have the wrong number of chunks, we should chunk the chunk list... + while chunk_ids.len() != 1 { + let mut metachunks_list_bytes: Vec = Vec::with_capacity(chunk_ids.len() * 32); + for chunk_id in chunk_ids { + metachunks_list_bytes.extend_from_slice(&chunk_id.to_bytes()); + } + + // TODO It might be nice to store these in opposite order, so a read is a true sequential + // scan. + // i.e. (depth=3) (depth=2) (depth=1) (depth=0) ... + chunk_ids = self + .store_full_slice_returning_chunks(&metachunks_list_bytes, &mut sbw.metachunks)?; + depth += 1; + } + + Ok((RecursiveChunkRef { + chunk_id: chunk_ids[0], + depth, + }, stream_length)) + } } async fn store_file( @@ -242,25 +334,7 @@ impl StoragePipeline { for spw_num in 0..workers { let job_rx = job_rx.clone(); let result_tx = result_tx.clone(); - let pwc = pwc.clone(); - - let compressor = match pwc.pile.pile_config.zstd_dict.as_ref() { - None => { - Compressor::new(get_zstd_level()).context("can't create dictless compressor")? - } - Some(dict_bytes) => Compressor::with_dictionary(get_zstd_level(), dict_bytes) - .context("can't create dictful compressor")?, - }; - - let chunk_id_key = pwc.pile.pile_config.chunk_id_key; - let storing_state = StoringState { - cache_conn: pwc.localcache.read().await?, - new_unflushed_chunks: Arc::new(Default::default()), - new_bloblogs: vec![], - pwc, - chunk_id_key, - compressor, - }; + let storing_state = StoringState::new(pwc.clone()).await.context("failed to create storing state")?; // make a logging span for the Storage Pipeline Workers let spw_span = info_span!("spw", n = spw_num); join_set.spawn( From 8e5649597b4af5fd067067d061782e8aad40a206 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Thu, 4 May 2023 23:56:35 +0100 Subject: [PATCH 08/51] overhaul: streaming extract support --- yama/src/bin/yama.rs | 83 ++++++++++++++++++++++++++++++++++++++++---- yama/src/extract.rs | 52 +++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index 79d28b8..031f499 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -16,7 +16,7 @@ along with Yama. If not, see . */ use clap::{Parser, Subcommand}; -use eyre::{bail, ensure, eyre, Context, ContextCompat}; +use eyre::{bail, eyre, Context, ContextCompat}; use patricia_tree::PatriciaMap; use std::borrow::Cow; use std::iter::Iterator; @@ -179,9 +179,11 @@ pub enum YamaCommand { Extract { source: PileAndPointerWithSubTree, destination: PathBuf, + }, - #[arg(long)] - stdout: bool, + /// Extract an output stream from a Yama pile. + ExtractStdout { + source: PileAndPointerWithSubTree, }, // TODO Mount { ... }, @@ -582,9 +584,7 @@ async fn main() -> eyre::Result<()> { YamaCommand::Extract { source, destination, - stdout, } => { - ensure!(!stdout, "stdout not supported yet"); let pile_connector_path = source .pile_path .as_ref() @@ -640,7 +640,78 @@ async fn main() -> eyre::Result<()> { .await?; Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - } + }, + YamaCommand::ExtractStdout { + source, + } => { + let pile_connector_path = source + .pile_path + .as_ref() + .map(|p| p.as_ref()) + .unwrap_or(Path::new(".")); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = Arc::new(open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), source.pointer), + ) + .await?); + update_cache(&pwc).await?; + + let pointer = pwc + .read_pointer_fully_integrated(source.pointer.0.as_str()) + .await + .context("failed to read pointer")? + .with_context(|| { + format!( + "it appears that the pointer {:?} does not exist", + source.pointer + ) + })?; + assert!(pointer.parent.is_none()); + + let node = if source.sub_tree.is_empty() { + &pointer.root.node + } else { + let mut current = &pointer.root.node; + for subpath in source.sub_tree.split('/') { + if let TreeNode::Directory { children, .. } = current { + current = children.get(subpath).with_context(|| { + format!("can't descend into {subpath}: doesn't exist in directory.") + })?; + } else { + bail!("can't descend into {subpath}; parent isn't a directory..."); + } + } + current + }; + + let chunkref = match node { + TreeNode::NormalFile { content, .. } => { + content + } + TreeNode::Directory { .. } => { + bail!("Can't extract `Directory` to stdout!"); + } + TreeNode::SymbolicLink { .. } => { + bail!("Can't extract `SymbolicLink` to stdout!"); + } + TreeNode::Deleted => { + bail!("Can't extract `Deleted` to stdout!"); + } + }; + + let extract_span = info_span!("extract_files"); + let stream = std::io::BufWriter::new(io_streams::StreamWriter::stdout().context("failed to open stdout")?); + extract::unpack_sync_stream(&pwc, *chunkref, stream) + .instrument(extract_span) + .await?; + + Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; + }, _other => todo!(), } diff --git a/yama/src/extract.rs b/yama/src/extract.rs index 9549c85..cf5275d 100644 --- a/yama/src/extract.rs +++ b/yama/src/extract.rs @@ -7,6 +7,7 @@ use patricia_tree::PatriciaMap; use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; use std::fs::Permissions; +use std::io::Write; use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use std::sync::Arc; @@ -235,6 +236,57 @@ pub async fn unpack_files( }.instrument(unpack_span).await } +pub async fn unpack_sync_stream(pwc: &Arc>, + chunkref: RecursiveChunkRef, + mut stream: impl Write, +) -> eyre::Result<()> { + let expanded_chunkrefs = expand_chunkrefs( + pwc, + vec![((), chunkref)].into_iter(), + ) + .await?; + + let total_chunks = expanded_chunkrefs.iter().map(|(_, cs)| cs.len() as u64).sum::(); + let unpack_span = info_span!("unpack_files"); + + async move { + let unpack_span = Span::current(); + unpack_span.pb_set_style(&ProgressStyle::default_bar().template( + PROGRESS_BAR_STYLE, + ).unwrap()); + unpack_span.pb_set_message("unpack"); + unpack_span.pb_set_length(total_chunks); + + let (file_part_retriever, _) = + lookup_chunkrefs_and_create_retriever(pwc, expanded_chunkrefs).await?; + let mut done = false; + + while let Ok(next_part) = file_part_retriever.recv_async().await { + match next_part { + RetrieverResp::Blob { blob, .. } => { + tokio::task::block_in_place(|| { + stream.write_all(&blob) + }).context("Failed to write to output stream on Blob")?; + + unpack_span.pb_inc(1); + } + RetrieverResp::JobComplete(_) => { + tokio::task::block_in_place(|| { + stream.flush() + }).context("Failed to flush output stream on JobComplete")?; + done = true; + } + } + } + + if !done { + bail!("There were errors extracting."); + } + + Ok(()) + }.instrument(unpack_span).await +} + async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, restore_permissions: bool, rx: Receiver>>) -> eyre::Result<()> { let mut oo = OpenOptions::new(); oo.write(true).create_new(true); From dabf7c5cf04346b673c822d3a85b27f13fe01119 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sat, 20 May 2023 13:11:30 +0100 Subject: [PATCH 09/51] overhaul: datman support --- Cargo.lock | 27 +- datman/Cargo.toml | 23 + datman/src/backup.rs | 471 ++++++++++++++++++ datman/src/bin/datman.rs | 262 +++++++++- datman/src/datetime.rs | 26 + datman/src/descriptor_config.rs | 126 +++++ datman/src/extract.rs | 182 +++++++ datman/src/lib.rs | 5 + datman/src/pointer_names.rs | 20 + yama/Cargo.toml | 2 +- yama/src/bin/yama.rs | 342 ++++++++++--- yama/src/bin/yamascan.rs | 11 +- yama/src/check.rs | 64 +++ yama/src/extract.rs | 97 ++-- yama/src/lib.rs | 4 +- yama/src/open.rs | 20 +- yama/src/retriever.rs | 27 +- yama/src/retriever/decompressor.rs | 10 +- yama/src/scan.rs | 17 +- yama/src/storing.rs | 186 +++---- yama/src/vacuum.rs | 5 +- yama/src/vacuum/delete_unrefd_bloblogs.rs | 1 + yama/src/vacuum/forget_chunks.rs | 171 +++++++ yama/src/vacuum/merge_indices.rs | 127 +++++ .../src/vacuum/repack_bloblogs_and_indices.rs | 191 +++++++ .../20230413133342_local_index_cache.sql | 31 +- yama_localcache/src/lib.rs | 73 +++ yama_pile/src/definitions.rs | 4 +- yama_pile/src/keyring.rs | 35 +- yama_pile/src/lib.rs | 98 +++- yama_pile/src/locks.rs | 26 +- yama_pile/src/tree.rs | 5 + yama_pile/src/utils.rs | 1 + yama_wormfile/src/boxed.rs | 15 +- yama_wormfile/src/lib.rs | 14 + yama_wormfile_fs/src/lib.rs | 31 +- yama_wormfile_s3/src/lib.rs | 22 +- yama_wormfile_sftp/src/lib.rs | 42 +- 38 files changed, 2549 insertions(+), 265 deletions(-) create mode 100644 datman/src/backup.rs create mode 100644 datman/src/datetime.rs create mode 100644 datman/src/descriptor_config.rs create mode 100644 datman/src/extract.rs create mode 100644 datman/src/pointer_names.rs create mode 100644 yama/src/check.rs create mode 100644 yama/src/vacuum/delete_unrefd_bloblogs.rs create mode 100644 yama/src/vacuum/forget_chunks.rs create mode 100644 yama/src/vacuum/merge_indices.rs create mode 100644 yama/src/vacuum/repack_bloblogs_and_indices.rs diff --git a/Cargo.lock b/Cargo.lock index c94e3eb..8ab3762 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -746,7 +746,24 @@ dependencies = [ name = "datman" version = "0.7.0-alpha.1" dependencies = [ + "chrono", + "clap", + "dashmap", "eyre", + "indicatif", + "patricia_tree", + "serde", + "serde_json", + "tokio", + "toml", + "tracing", + "tracing-indicatif", + "tracing-subscriber", + "users", + "yama", + "yama_midlevel_crypto", + "yama_pile", + "yama_wormfile", ] [[package]] @@ -3036,9 +3053,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.27.0" +version = "1.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105" dependencies = [ "autocfg", "bytes", @@ -3050,7 +3067,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -3065,9 +3082,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", diff --git a/datman/Cargo.toml b/datman/Cargo.toml index fa5e7c8..bab8e89 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -12,3 +12,26 @@ description = "A chunked and deduplicated backup system using Yama" [dependencies] eyre = "0.6.8" +clap = { version = "4.2.2", features = ["derive", "env"] } +tracing = "0.1.37" +tracing-subscriber = { version = "0.3.16", features = ["tracing-log", "env-filter"] } +tracing-indicatif = "0.3.0" +indicatif = "0.17.3" +serde = { version = "1.0.160", features = ["derive"] } +serde_json = "1.0.96" +toml = "0.7.3" +tokio = { version = "1.28.0", features = ["fs", "macros", "rt-multi-thread"] } +dashmap = "5.4.0" +chrono = "0.4.24" +users = "0.11.0" + +yama = { version = "0.7.0-alpha.1", path = "../yama" } +yama_pile = { path = "../yama_pile" } +#yama_localcache = { path = "../yama_localcache" } +yama_wormfile = { path = "../yama_wormfile" } +#yama_wormfile_fs = { path = "../yama_wormfile_fs" } +#yama_wormfile_s3 = { path = "../yama_wormfile_s3" } +#yama_wormfile_sftp = { path = "../yama_wormfile_sftp" } +yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } + +patricia_tree = "0.5.7" \ No newline at end of file diff --git a/datman/src/backup.rs b/datman/src/backup.rs new file mode 100644 index 0000000..d980858 --- /dev/null +++ b/datman/src/backup.rs @@ -0,0 +1,471 @@ +use crate::descriptor_config::{SourceDescriptor, SourceDescriptorInner, VirtualSourceKind}; +use crate::pointer_names::{get_pointer_name_at, POINTER_NAME_DATETIME_SPLITTER}; +use chrono::{DateTime, Utc}; +use dashmap::DashSet; +use eyre::{bail, eyre, Context, ContextCompat}; +use indicatif::ProgressStyle; +use patricia_tree::PatriciaMap; +use std::borrow::Cow; +use std::collections::{BTreeMap, HashMap}; +use std::io::Write; +use std::path::PathBuf; +use std::process::{Child, Command, Stdio}; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; +use tokio::task::JoinSet; +use tracing::{debug, info, info_span, Instrument, Span}; +use tracing_indicatif::span_ext::IndicatifSpanExt; +use users::{get_current_gid, get_current_uid}; +use yama::pile_with_cache::PileWithCache; +use yama::scan::create_uidgid_lookup_tables; +use yama::storing::{ + assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState, +}; +use yama::{scan, PROGRESS_BAR_STYLE}; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::definitions::{BlobLocator, BloblogId, IndexBloblogEntry, RecursiveChunkRef}; +use yama_pile::pointers::Pointer; +use yama_pile::tree::unpopulated::ScanEntry; +use yama_pile::tree::{ + assemble_tree_from_scan_entries, differentiate_node_in_place, FilesystemOwnership, + FilesystemPermissions, RootTreeNode, TreeNode, +}; +use yama_wormfile::boxed::BoxedWormFileProvider; + +pub async fn backup( + pwc: Arc>, + sources_to_backup: BTreeMap, +) -> eyre::Result<()> { + // Locate suitable parent pointers + let parents_to_use = find_suitable_parent_pointers(&pwc, &sources_to_backup) + .await + .context("failed to look for suitable parent pointers")?; + let now = Utc::now(); + + // (dirtrees) Scan + let dir_sources = scan_dir_sources(&sources_to_backup, parents_to_use, now) + .await + .context("failed to scan directory sources")?; + + let new_unflushed_chunks: Arc> = Arc::new(Default::default()); + + // (dirtrees) Start a storage pipeline and submit jobs to it + let task_store_dirs = { + let new_unflushed_chunks = new_unflushed_chunks.clone(); + let pwc = pwc.clone(); + + let bds_span = info_span!("storing"); + tokio::spawn( + async move { + backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) + .await + .context("whilst backing up dir sources") + } + .instrument(bds_span), + ) + }; + + // (virtual source streams) Store to bloblog writers + let task_store_virtuals = { + let bvs_span = info_span!("storing_virts"); + let new_unflushed_chunks = new_unflushed_chunks.clone(); + let pwc = pwc.clone(); + + tokio::spawn( + async move { + backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks) + .await + .context("whilst backing up virtual sources") + } + .instrument(bvs_span), + ) + }; + + let (dir_sources_and_chunkmaps, virt_sources) = + tokio::join!(task_store_dirs, task_store_virtuals); + let dir_sources_and_chunkmaps: BackupDirSourcesReturn = dir_sources_and_chunkmaps??; + let mut virt_sources: Vec = virt_sources??; + + let mut chunkmaps = dir_sources_and_chunkmaps.chunkmaps; + for source in &mut virt_sources { + chunkmaps.extend( + std::mem::take(&mut source.chunkmaps) + .into_iter() + .map(|(k, nb)| { + ( + k, + IndexBloblogEntry { + chunks: nb, + forgotten_bytes: 0, + }, + ) + }), + ); + } + + // Chunkmaps, indices and write pointers + assemble_and_write_indices(&pwc, chunkmaps) + .await + .context("failed to assemble and write indices")?; + + info!("All indices stored, writing pointer..."); + + for (dir_source_prep, chunk_file_map) in dir_sources_and_chunkmaps.dir_source_returns { + // Assemble and write a pointer + let mut tree = + assemble_tree_from_scan_entries(dir_source_prep.scan_entry_map, chunk_file_map) + .context("failed to assemble tree")?; + let (uids, gids) = + create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; + + if let Some(ref parent_node) = dir_source_prep.parent { + differentiate_node_in_place(&mut tree, &parent_node.root.node) + .context("failed to differentiate?")?; + } + + pwc.pile + .write_pointer( + &dir_source_prep.new_pointer_name, + false, + &Pointer { + parent: dir_source_prep.parent_name.clone(), + root: RootTreeNode { + name: dir_source_prep + .path + .file_name() + .map(|oss| oss.to_str()) + .flatten() + .unwrap_or("") + .to_owned(), + node: tree, + }, + uids, + gids, + }, + ) + .await + .context("failed to write pointer")?; + } + + for virtual_source in virt_sources { + pwc.pile + .write_pointer(&virtual_source.pointer_name, false, &virtual_source.pointer) + .await + .context("failed to write pointer")?; + } + + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + + Ok(()) +} + +/// Given access to a PWC and a map of sources to back up, returns a map of pointer names to use as parents. +/// For virtual sources, no parent is chosen. +/// For directory sources, the most recent pointer from the same source is chosen as a parent. +async fn find_suitable_parent_pointers( + pwc: &PileWithCache, + sources_to_backup: &BTreeMap, +) -> eyre::Result> { + let mut result = BTreeMap::new(); + + let pointers = pwc + .pile + .list_pointers() + .await + .context("failed to list pointers")?; + + for (source_name, source) in sources_to_backup.iter() { + if source.is_directory_source() { + let starter = format!("{source_name}{POINTER_NAME_DATETIME_SPLITTER}"); + if let Some(most_recent_pointer) = pointers + .iter() + .rev() + .filter(|pn| pn.starts_with(&starter)) + .next() + { + debug!("for {source_name:?}, using parent {most_recent_pointer:?}"); + let pointer = pwc + .read_pointer_fully_integrated(&most_recent_pointer) + .await + .context("failed to read parent pointer")? + .context("no parent pointer despite having just listed it")?; + result.insert( + source_name.to_owned(), + (most_recent_pointer.clone(), pointer), + ); + } + } + } + + Ok(result) +} + +struct DirSourcePrep { + scan_entry_map: PatriciaMap, + parent_name: Option, + parent: Option, + path: PathBuf, + new_pointer_name: String, +} + +async fn scan_dir_sources( + sources_to_backup: &BTreeMap, + mut parents: BTreeMap, + now: DateTime, +) -> eyre::Result> { + let mut joinset = JoinSet::new(); + + for (source_name, source) in sources_to_backup { + if let SourceDescriptorInner::DirectorySource { + path, + cross_filesystems, + ignore, + } = &source.inner + { + let path = path.to_owned(); + let cross_filesystems = *cross_filesystems; + debug!("TODO: xf={cross_filesystems}"); + let ignore = ignore.to_owned(); + let (parent_name, parent) = parents.remove(source_name).unzip(); + let new_pointer_name = get_pointer_name_at(&source_name, now); + joinset.spawn_blocking(move || -> eyre::Result { + let scan_entry_map = scan::scan(&path, &ignore).context("Failed to scan")?; + Ok(DirSourcePrep { + scan_entry_map, + parent_name, + parent, + path, + new_pointer_name, + }) + }); + } + } + + let mut result = Vec::new(); + + while let Some(dsp_res_res) = joinset.join_next().await { + result.push(dsp_res_res??); + } + Ok(result) +} + +struct BackupDirSourcesReturn { + pub chunkmaps: BTreeMap, + pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap<(RecursiveChunkRef, u64)>)>, +} + +async fn backup_dir_sources( + dir_sources: Vec, + pwc: Arc>, + new_unflushed_chunks: Arc>, +) -> eyre::Result { + let mut chunk_file_maps = Vec::new(); + let mut pruned_scan_entry_maps = Vec::new(); + + // First collect all that stuff together... + for dir_source in &dir_sources { + let (chunk_file_map, pruned_scan_entry_map) = if let Some(ref parent_node) = + dir_source.parent + { + let (cfm, pruned) = + scan::prepopulate_unmodified(&parent_node.root.node, &dir_source.scan_entry_map); + + (cfm, Cow::Owned(pruned)) + } else { + ( + PatriciaMap::<(RecursiveChunkRef, u64)>::new(), + Cow::Borrowed(&dir_source.scan_entry_map), + ) + }; + chunk_file_maps.push(chunk_file_map); + pruned_scan_entry_maps.push(pruned_scan_entry_map); + } + + let store_span = Span::current(); + // store_span.pb_set_style(&ProgressStyle::default_bar()); + store_span.pb_set_style( + &ProgressStyle::default_bar() + .template(PROGRESS_BAR_STYLE) + .unwrap(), + ); + store_span.pb_set_message("storing files"); + store_span.pb_set_length( + pruned_scan_entry_maps + .iter() + .map(|pruned_scan_entry_map| { + pruned_scan_entry_map + .values() + .filter(|v| matches!(v, ScanEntry::NormalFile { .. })) + .count() as u64 + }) + .sum(), + ); + + // + let (pipeline, pipeline_job_tx) = + StoragePipeline::launch_new(4, pwc.clone(), new_unflushed_chunks).await?; + + let dir_sources2 = &dir_sources; + let (submitter_task, receiver_task) = tokio::join!( + async move { + let pipeline_job_tx = pipeline_job_tx; + for (dir_source_idx, dir_source) in dir_sources2.iter().enumerate() { + for (name_bytes, scan_entry) in pruned_scan_entry_maps[dir_source_idx].iter() { + if let ScanEntry::NormalFile { .. } = scan_entry { + let name = std::str::from_utf8(name_bytes.as_slice()) + .context("name is not str")?; + // TODO(bug): if source name is a file, this doesn't work (.join("")) + pipeline_job_tx + .send_async(( + (dir_source_idx, name.to_owned()), + dir_source.path.join(name), + )) + .await + .map_err(|_| eyre!("unable to send to pipeline."))?; + } + } + } + + drop(pipeline_job_tx); + Ok::<_, eyre::Report>(()) + }, + async { + while let Ok(((dir_source_idx, job_id), rec_chunk_ref, real_size)) = + pipeline.next_result().await + { + chunk_file_maps[dir_source_idx].insert_str(&job_id, (rec_chunk_ref, real_size)); + Span::current().pb_inc(1); + } + // eprintln!("fin rec"); + Ok::<_, eyre::Report>(()) + } + ); + + submitter_task?; + receiver_task?; + + assert_eq!(dir_sources.len(), chunk_file_maps.len()); + + let chunkmaps = pipeline.finish_into_chunkmaps().await?; + + Ok(BackupDirSourcesReturn { + chunkmaps, + dir_source_returns: dir_sources + .into_iter() + .zip(chunk_file_maps.into_iter()) + .collect(), + }) +} + +async fn backup_virtual_sources( + sources: &BTreeMap, + now: DateTime, + pwc: Arc>, + new_unflushed_chunks: Arc>, +) -> eyre::Result> { + let mut joinset: JoinSet> = JoinSet::new(); + + for (source_name, source) in sources { + if source.is_virtual_source() { + joinset.spawn(backup_virtual_source( + get_pointer_name_at(source_name, now), + source.clone(), + pwc.clone(), + new_unflushed_chunks.clone(), + )); + } + } + + let mut results = Vec::new(); + while let Some(result_res_res) = joinset.join_next().await { + results.push(result_res_res??); + } + + Ok(results) +} + +struct VirtualSourceReturn { + pub pointer_name: String, + pub pointer: Pointer, + pub chunkmaps: Vec<(BloblogId, BTreeMap)>, +} + +async fn backup_virtual_source( + pointer_name: String, + source: SourceDescriptor, + pwc: Arc>, + new_unflushed_chunks: Arc>, +) -> eyre::Result { + let SourceDescriptorInner::VirtualSource(virtual_source) = &source.inner else { + bail!("bug: non-VS SDI passed to BVS"); + }; + + let mut storing_state = StoringState::new(pwc.clone(), new_unflushed_chunks) + .await + .context("failed to create storing state")?; + let mut sbw = StoringBloblogWriters::default(); + let ((chunkref, size), mut sbw, mut storing_state) = tokio::task::spawn_blocking({ + let virtual_source = virtual_source.clone(); + move || -> eyre::Result<((RecursiveChunkRef, u64), StoringBloblogWriters, StoringState)> { + let child = open_stdout_backup_process(&virtual_source.extra_args, &virtual_source.helper)?; + Ok((storing_state.store_full_stream(child.stdout.unwrap(), &mut sbw).context("Failed to store stream into Yama pile")?, sbw, storing_state)) + } + }).await??; + + sbw.finish_bloblogs(&mut storing_state) + .await + .context("Failed to finish bloblogs")?; + let chunkmaps = storing_state.new_bloblogs; + + // Assemble and write a pointer + let uid = get_current_uid() as u16; + let gid = get_current_gid() as u16; + let tree = TreeNode::NormalFile { + mtime: SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0), + ownership: FilesystemOwnership { uid, gid }, + permissions: FilesystemPermissions { mode: 0o600 }, + size, + content: chunkref, + }; + let (uids, gids) = + create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; + + let VirtualSourceKind::Stdout { filename } = &virtual_source.kind; + + Ok(VirtualSourceReturn { + pointer_name, + pointer: Pointer { + parent: None, + root: RootTreeNode { + name: filename.clone(), + node: tree, + }, + uids, + gids, + }, + chunkmaps, + }) +} + +pub fn open_stdout_backup_process( + extra_args: &HashMap, + program_name: &str, +) -> eyre::Result { + let mut child = Command::new(format!("datman-helper-{}-backup", program_name)) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .stdin(Stdio::piped()) + .spawn()?; + let mut child_stdin = child.stdin.as_mut().unwrap(); + serde_json::to_writer(&mut child_stdin, extra_args)?; + child_stdin.flush()?; + // close stdin! + child.stdin = None; + Ok(child) +} diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 20b202f..3f3e027 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -15,6 +15,266 @@ You should have received a copy of the GNU General Public License along with Yama. If not, see . */ -pub fn main() -> eyre::Result<()> { +use clap::{Parser, Subcommand}; +use datman::backup::backup; +use datman::descriptor_config::{load_descriptor, SourceDescriptor}; +use datman::extract::{ + extract, load_pointers_for_extraction, merge_roots_for_batch_extract, select_to_extract, +}; +use eyre::{bail, Context, ContextCompat}; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; +use std::str::FromStr; +use tracing::info; +use tracing_indicatif::IndicatifLayer; +use tracing_subscriber::filter::filter_fn; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::Layer; +use yama::get_hostname; +use yama::open::open_lock_and_update_cache; + +#[derive(Clone, Debug)] +pub struct PileAndPointer { + pub pile_path: Option, + pub pointer: PointerName, +} + +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct PointerName(String); + +impl FromStr for PointerName { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + if !s + .chars() + .all(|c| c.is_alphanumeric() || ['_', '+', '-', ':'].contains(&c)) + { + bail!("Bad pointer name: {s:?}"); + } + Ok(PointerName(s.to_owned())) + } +} + +impl FromStr for PileAndPointer { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + match s.split_once(":") { + None => Ok(PileAndPointer { + pile_path: None, + pointer: PointerName::from_str(s)?, + }), + Some((pile_path, pointer)) => Ok(PileAndPointer { + pile_path: Some(PathBuf::from(pile_path)), + pointer: PointerName::from_str(pointer)?, + }), + } + } +} + +#[derive(Clone, Debug)] +pub struct PileAndPointerWithSubTree { + pub pile_path: Option, + pub pointer: PointerName, + // TODO how to represent... + pub sub_tree: String, +} + +impl FromStr for PileAndPointerWithSubTree { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + let (pile_path, pointer_and_subtree) = match s.split_once(":") { + None => (None, s), + Some((pile_path, pointer)) => (Some(PathBuf::from(pile_path)), pointer), + }; + + if let Some(slash) = pointer_and_subtree.find('/') { + Ok(PileAndPointerWithSubTree { + pile_path, + pointer: PointerName::from_str(&pointer_and_subtree[0..slash])?, + sub_tree: pointer_and_subtree[slash + 1..].to_owned(), + }) + } else { + Ok(PileAndPointerWithSubTree { + pile_path, + pointer: PointerName::from_str(&pointer_and_subtree)?, + sub_tree: String::new(), + }) + } + } +} + +#[derive(Parser, Clone, Debug)] +pub struct DatmanArgs { + #[arg(long, env = "DATMAN_CONFIG", default_value = "datman.toml")] + config: PathBuf, + + #[command(subcommand)] + command: DatmanCommand, +} + +#[derive(Subcommand, Clone, Debug)] +pub enum DatmanCommand { + BackupOne { + source_name: String, + pile_name: String, + }, + + BackupAll { + pile_name: String, + }, + + ExtractOne { + pile_name: String, + source_name: String, + destination: PathBuf, + }, + + ExtractAll { + pile_name: String, + destination: PathBuf, + }, +} + +const PROGRESS_SPANS: &'static [&'static str] = &[ + "store_file", + "storing", + "unpack_files", + "expand_chunkrefs", + "extract_files", +]; + +#[tokio::main] +pub async fn main() -> eyre::Result<()> { + let indicatif_layer = IndicatifLayer::new(); + let stderr_writer = indicatif_layer.get_stderr_writer(); + let indicatif_layer = indicatif_layer.with_filter(filter_fn(|span_metadata| { + span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name()) + })); + + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "sqlx=warn,yama=debug,datman=debug,info".into()), + ) + .with(tracing_subscriber::fmt::layer().with_writer(stderr_writer)) + .with(indicatif_layer) + .init(); + + let args: DatmanArgs = dbg!(DatmanArgs::parse()); + + let descriptor = load_descriptor(&args.config) + .await + .context("failed to load Datman descriptor")?; + dbg!(&descriptor); + + match args.command { + DatmanCommand::BackupOne { + source_name, + pile_name, + } => { + let pile_connector_path = descriptor + .piles + .get(&pile_name) + .cloned() + .context("no pile by that name")?; + let lock_name = format!("{} datman backup {:?}", get_hostname(), source_name); + let pwc = open_lock_and_update_cache(pile_connector_path, lock_name).await?; + + let source = descriptor + .sources + .get(&source_name) + .context("no source by that name")?; + let my_hostname = get_hostname(); + if &source.host != &my_hostname { + bail!( + "Current hostname is {:?}, not {:?} as expected for this source.", + my_hostname, + source.host + ); + } + + let mut sources_to_backup = BTreeMap::new(); + sources_to_backup.insert(source_name.clone(), source.clone()); + + backup(pwc, sources_to_backup).await?; + } + DatmanCommand::BackupAll { pile_name } => { + let pile_connector_path = descriptor + .piles + .get(&pile_name) + .cloned() + .context("no pile by that name")?; + let lock_name = format!("{} datman backupall", get_hostname()); + let pwc = open_lock_and_update_cache(pile_connector_path, lock_name).await?; + + let my_hostname = get_hostname(); + let sources_to_backup: BTreeMap = descriptor + .sources + .clone() + .into_iter() + .filter(|(_, source)| &source.host == &my_hostname) + .collect(); + + if sources_to_backup.len() == 0 { + bail!( + "No sources to back up! The current hostname is {:?}; is it correct?", + my_hostname + ); + } + + info!( + "Backing up the following {} sources: {:?}", + sources_to_backup.len(), + sources_to_backup.keys().collect::>() + ); + + backup(pwc, sources_to_backup).await?; + } + DatmanCommand::ExtractOne { + pile_name, + source_name, + destination, + } => { + let pile_connector_path = descriptor + .piles + .get(&pile_name) + .cloned() + .context("no pile by that name")?; + let lock_name = format!("{} datman extract {:?}", get_hostname(), source_name); + let pwc = open_lock_and_update_cache(pile_connector_path, lock_name).await?; + + let mut sources = BTreeSet::new(); + sources.insert(source_name.clone()); + let selected = select_to_extract(&pwc, sources, None, None, false).await?; + let mut for_extraction = load_pointers_for_extraction(pwc.clone(), selected).await?; + assert_eq!(for_extraction.len(), 1); + let root_node = for_extraction.remove(&source_name).unwrap(); + extract(pwc, root_node.node, &destination).await?; + } + DatmanCommand::ExtractAll { + pile_name, + destination, + } => { + let pile_connector_path = descriptor + .piles + .get(&pile_name) + .cloned() + .context("no pile by that name")?; + let lock_name = format!("{} datman extractall", get_hostname()); + let pwc = open_lock_and_update_cache(pile_connector_path, lock_name).await?; + + let sources = descriptor.sources.keys().cloned().collect(); + let selected = select_to_extract(&pwc, sources, None, None, false).await?; + let for_extraction = load_pointers_for_extraction(pwc.clone(), selected).await?; + let merged_node = merge_roots_for_batch_extract(for_extraction); + extract(pwc, merged_node, &destination).await?; + } + } + Ok(()) } diff --git a/datman/src/datetime.rs b/datman/src/datetime.rs new file mode 100644 index 0000000..3291945 --- /dev/null +++ b/datman/src/datetime.rs @@ -0,0 +1,26 @@ +use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone}; +use eyre::bail; +use std::str::FromStr; + +pub struct HumanDateTime(pub DateTime); + +impl FromStr for HumanDateTime { + type Err = eyre::Error; + + fn from_str(s: &str) -> Result { + if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { + let local_datetime = Local + .from_local_datetime(&date_only.and_hms_opt(0, 0, 0).unwrap()) + .unwrap(); + Ok(HumanDateTime(local_datetime)) + } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { + let local_datetime = Local.from_local_datetime(&date_and_time).unwrap(); + Ok(HumanDateTime(local_datetime)) + } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + let local_datetime = Local.from_local_datetime(&date_and_time).unwrap(); + Ok(HumanDateTime(local_datetime)) + } else { + bail!("Couldn't parse using any format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14"); + } + } +} diff --git a/datman/src/descriptor_config.rs b/datman/src/descriptor_config.rs new file mode 100644 index 0000000..2dba230 --- /dev/null +++ b/datman/src/descriptor_config.rs @@ -0,0 +1,126 @@ +/* +This file is part of Yama. + +Yama is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Yama is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Yama. If not, see . +*/ + +use eyre::{Context, ContextCompat}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +// TODO how do we handle?: +// - (important) yama push of one pile to another +// - backup policy stuff like 'minimum backup frequency' ... show when it's not been done +// - backup policy stuff like 'minimum on two different disks, not powered at the same time...' + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Descriptor { + /// Sources + pub sources: HashMap, + + /// Paths to destination Yama Piles. Remote Piles need a local virtual pile to specify the layers. + pub piles: HashMap, + + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub retention: Option, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct RetentionPolicyConfig { + pub daily: u32, + pub weekly: u32, + pub monthly: u32, + pub yearly: u32, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct SourceDescriptor { + /// The host to run this backup task on. + pub host: String, + #[serde(flatten)] + pub inner: SourceDescriptorInner, +} + +impl SourceDescriptor { + pub fn is_directory_source(&self) -> bool { + matches!(&self.inner, &SourceDescriptorInner::DirectorySource { .. }) + } + + pub fn is_virtual_source(&self) -> bool { + matches!(&self.inner, &SourceDescriptorInner::VirtualSource { .. }) + } +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub enum SourceDescriptorInner { + DirectorySource { + path: PathBuf, + #[serde(default)] + cross_filesystems: bool, + + /// TODO Paths to ignore + #[serde(default)] + ignore: Vec, + }, + + VirtualSource(VirtualSource), +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct VirtualSource { + /// The name of the helper program that will be used to do this backup. + pub helper: String, + + /// The label that will be assigned to this source. + pub label: String, + + /// The kind of virtual source (how it operates). + pub kind: VirtualSourceKind, + + #[serde(flatten)] + pub extra_args: HashMap, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub enum VirtualSourceKind { + Stdout { + #[serde(rename = "stdout")] + filename: String, + }, + // TODO(feature) TempDir +} + +/// Loads a descriptor and resolves relative paths contained within. +pub async fn load_descriptor(path: &Path) -> eyre::Result { + let text = tokio::fs::read_to_string(path).await?; + let mut descriptor: Descriptor = toml::de::from_str(&text)?; + + let dir = path + .parent() + .context("there must be a parent path for the descriptor file")?; + + // Absolutise pile paths + for (_, pile_path) in descriptor.piles.iter_mut() { + *pile_path = dir + .join(&*pile_path) + .canonicalize() + .context("Failed to canonicalise path in descriptor")?; + } + + Ok(descriptor) +} diff --git a/datman/src/extract.rs b/datman/src/extract.rs new file mode 100644 index 0000000..561283a --- /dev/null +++ b/datman/src/extract.rs @@ -0,0 +1,182 @@ +use crate::datetime::HumanDateTime; +use crate::pointer_names::split_pointer_name; +use chrono::{DateTime, Utc}; +use eyre::{bail, eyre, Context, ContextCompat}; +use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::sync::Arc; +use tracing::{info_span, warn, Instrument}; +use yama::extract; +use yama::extract::flatten_treenode; +use yama::pile_with_cache::PileWithCache; +use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode}; +use yama_wormfile::boxed::BoxedWormFileProvider; + +/// Given a list of source names and conditions to find pointers within, +/// returns a mapping of source names to pointers. +pub async fn select_to_extract( + pwc: &PileWithCache, + sources: BTreeSet, + before: Option, + after: Option, + accept_partial: bool, +) -> eyre::Result> { + let before = before.map(|dt| dt.0.with_timezone(&Utc)); + let after = after.map(|dt| dt.0.with_timezone(&Utc)); + let pointers_list = pwc + .pile + .list_pointers() + .await + .context("failed to list pointers")?; + + select_to_extract_impl(pointers_list, sources, before, after, accept_partial) +} + +/// Given a list of source names and conditions to find pointers within, +/// returns a mapping of source names to pointers. +fn select_to_extract_impl( + pointers_list: Vec, + sources: BTreeSet, + before: Option>, + after: Option>, + accept_partial: bool, +) -> eyre::Result> { + if after.is_some() && before.is_some() { + bail!("Can't specify both before and after!"); + } + + let mut pointers_by_source: BTreeMap = BTreeMap::new(); + + for pointer in pointers_list { + if let Some((source_name, pointer_datetime)) = split_pointer_name(&pointer) { + if !sources.contains(&source_name) { + // Not a source that we're interested in. + continue; + } + if let Some(before) = before { + if before < pointer_datetime { + // datetime is after the 'before' time + continue; + } + } else if let Some(after) = after { + if pointer_datetime < after { + // datetime is before the 'after' time + continue; + } + } + + match pointers_by_source.entry(source_name) { + Entry::Vacant(ve) => { + ve.insert(pointer); + } + Entry::Occupied(mut oe) => { + let current_choice = oe.get_mut(); + let (_, current_datetime) = split_pointer_name(¤t_choice).unwrap(); + let should_replace = if after.is_some() { + // if we want the first one after a time, we want the earliest option! + // so replace if new datetime is earlier than current + pointer_datetime < current_datetime + } else { + // replace if new datetime is after current datetime + current_datetime < pointer_datetime + }; + if should_replace { + *current_choice = pointer; + } + } + } + }; + } + + if pointers_by_source.is_empty() { + bail!("No pointers selected for ANY of the sources: {sources:?}"); + } + + let missing: Vec<&String> = sources + .iter() + .filter(|src| !pointers_by_source.contains_key(*src)) + .collect(); + if !missing.is_empty() { + if accept_partial { + warn!("Some sources didn't have any pointers selected: {missing:?}. Continuing because --accept-partial passed."); + } else { + bail!("Some sources didn't have any pointers selected: {missing:?}. Pass --accept-partial if this is intended anyway."); + } + } + + Ok(pointers_by_source) +} + +pub async fn load_pointers_for_extraction( + pwc: Arc>, + what_to_extract: BTreeMap, +) -> eyre::Result> { + let mut result = BTreeMap::new(); + for (source_name, pointer_name) in &what_to_extract { + let pointer = pwc + .read_pointer_fully_integrated(&pointer_name) + .await? + .context("pointer doesn't exist??")?; + // TODO(ownership): adapt uid/gids here + result.insert(source_name.clone(), pointer.root); + } + Ok(result) +} + +pub fn merge_roots_for_batch_extract(extracts: BTreeMap) -> TreeNode { + let mut children = BTreeMap::new(); + + for (name, entry) in extracts { + if matches!(entry.node, TreeNode::NormalFile { .. }) { + let mut children2 = BTreeMap::new(); + children2.insert(entry.name, entry.node); + children.insert( + name, + TreeNode::Directory { + ownership: FilesystemOwnership { + // TODO(ownership): populate this correctly (current user?) + uid: 0, + gid: 0, + }, + permissions: FilesystemPermissions { mode: 0o700 }, + children: children2, + }, + ); + } else { + children.insert(name, entry.node); + } + } + + TreeNode::Directory { + ownership: FilesystemOwnership { + // TODO(ownership): populate this correctly (current user?) + uid: 0, + gid: 0, + }, + permissions: FilesystemPermissions { mode: 0o700 }, + children, + } +} + +pub async fn extract( + pwc: Arc>, + node: TreeNode, + destination: &Path, +) -> eyre::Result<()> { + let flat = flatten_treenode(&node)?; + drop(node); + + extract::unpack_nonfiles(destination, &flat.nonfiles, false, true).await?; + + let extract_span = info_span!("extract_files"); + extract::unpack_files(&pwc, destination, &flat.files, false, true) + .instrument(extract_span) + .await?; + + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + Ok(()) +} diff --git a/datman/src/lib.rs b/datman/src/lib.rs index 8b13789..c835c9a 100644 --- a/datman/src/lib.rs +++ b/datman/src/lib.rs @@ -1 +1,6 @@ +pub mod backup; +pub mod descriptor_config; +pub mod extract; +pub mod datetime; +pub mod pointer_names; diff --git a/datman/src/pointer_names.rs b/datman/src/pointer_names.rs new file mode 100644 index 0000000..1721ebe --- /dev/null +++ b/datman/src/pointer_names.rs @@ -0,0 +1,20 @@ +use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; + +pub const POINTER_DATETIME_FORMAT: &'static str = "%F_%T"; +pub const POINTER_NAME_DATETIME_SPLITTER: &'static str = "+"; + +pub fn get_pointer_name_at(source_name: &str, datetime: DateTime) -> String { + format!( + "{}{}{}", + source_name, + POINTER_NAME_DATETIME_SPLITTER, + datetime.format(POINTER_DATETIME_FORMAT).to_string() + ) +} + +pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime)> { + let (source_name, date_time_str) = pointer_name.rsplit_once(POINTER_NAME_DATETIME_SPLITTER)?; + let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?; + let date_time = Utc.from_utc_datetime(&date_time); + Some((source_name.to_owned(), date_time)) +} diff --git a/yama/Cargo.toml b/yama/Cargo.toml index d546e24..55d1e61 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -30,7 +30,7 @@ yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } clap = { version = "4.2.2", features = ["derive"] } -tokio = { version = "1.27.0", features = ["io-std"] } +tokio = { version = "1.28.1", features = ["full"] } appdirs = "0.2.0" twox-hash = "1.6.3" hostname = "0.3.1" diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index 031f499..a6d6b02 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -17,6 +17,7 @@ along with Yama. If not, see . use clap::{Parser, Subcommand}; use eyre::{bail, eyre, Context, ContextCompat}; +use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; use std::borrow::Cow; use std::iter::Iterator; @@ -24,32 +25,41 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; -use indicatif::ProgressStyle; use tokio::io::{stdin, AsyncBufReadExt, BufReader}; -use tracing::{info, info_span, warn, Span, Instrument}; -use tracing_indicatif::IndicatifLayer; +use tracing::{info, info_span, warn, Instrument, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; +use tracing_indicatif::IndicatifLayer; use tracing_subscriber::filter::filter_fn; -use tracing_subscriber::Layer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::Layer; use users::{get_current_gid, get_current_uid}; use yama::extract::flatten_treenode; use yama::init::{generate_master_keyring, pack_keyring}; use yama::open::{open_keyring_interactive, open_pile, pre_open_keyring, update_cache}; use yama::pile_connector::PileConnectionScheme; use yama::scan::create_uidgid_lookup_tables; -use yama::storing::{assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState}; -use yama::{extract, get_hostname, init, PROGRESS_BAR_STYLE, scan}; +use yama::storing::{ + assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState, +}; +use yama::vacuum::forget_chunks::{find_forgettable_chunks, forget_chunks}; +use yama::vacuum::merge_indices::{MERGE_TARGET_SIZE, MERGE_THRESHOLD_SIZE}; +use yama::vacuum::repack_bloblogs_and_indices::{ + get_bloblogs_stats, perform_repack, select_bloblogs_for_repack, +}; +use yama::{check, extract, get_hostname, init, scan, vacuum, PROGRESS_BAR_STYLE}; use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; use yama_midlevel_crypto::chunk_id::ChunkIdKey; use yama_pile::definitions::{ - PackedPileConfig, PileConfig, RecursiveChunkRef, SUPPORTED_YAMA_PILE_VERSION, + IndexBloblogEntry, PackedPileConfig, PileConfig, RecursiveChunkRef, SUPPORTED_YAMA_PILE_VERSION, }; use yama_pile::locks::LockKind; use yama_pile::pointers::Pointer; use yama_pile::tree::unpopulated::ScanEntry; -use yama_pile::tree::{assemble_tree_from_scan_entries, differentiate_node_in_place, FilesystemOwnership, FilesystemPermissions, RootTreeNode, TreeNode}; +use yama_pile::tree::{ + assemble_tree_from_scan_entries, differentiate_node_in_place, FilesystemOwnership, + FilesystemPermissions, RootTreeNode, TreeNode, +}; use yama_pile::FILE_YAMA_CONNECTOR; #[derive(Clone, Debug)] @@ -68,7 +78,7 @@ impl FromStr for PointerName { fn from_str(s: &str) -> Result { if !s .chars() - .all(|c| c.is_alphanumeric() || ['_', '+', '-'].contains(&c)) + .all(|c| c.is_alphanumeric() || ['_', '+', '-', ':'].contains(&c)) { bail!("Bad pointer name: {s:?}"); } @@ -182,9 +192,7 @@ pub enum YamaCommand { }, /// Extract an output stream from a Yama pile. - ExtractStdout { - source: PileAndPointerWithSubTree, - }, + ExtractStdout { source: PileAndPointerWithSubTree }, // TODO Mount { ... }, Check { @@ -198,10 +206,31 @@ pub enum YamaCommand { intensive: bool, }, // TODO lsp, rmp + /// Perform maintenance tasks, usually freeing up space or clumping together files to reduce + /// clutter. + Vacuum { + /// Perform all maintenance and space-saving tasks. + #[arg(long, short = 'a')] + all: bool, - // TODO vacuum + /// Merge indices together. Implied by -a. + #[arg(long, short = 'm')] + merge: bool, - // TODO `locks` to inspect locks + /// Forget chunks from indices. Implied by -a. + /// This process is slow because it involves walking all pointers to see which chunks can be + /// forgotten. + #[arg(long, short = 'f')] + forget: bool, + + /// Repack bloblogs and corresponding indices. Implied by -a. + #[arg(long, short = 'r')] + repack: bool, + + /// Delete unreferenced bloblogs. Implied by -a. + #[arg(long, short = 'd')] + delete_unrefd_bloblogs: bool, + }, // TODO `locks` to inspect locks } #[derive(Subcommand, Clone, Debug)] @@ -236,16 +265,21 @@ pub enum KeyringCommand { }, // TODO ChangePassword } -const PROGRESS_SPANS: &'static [&'static str] = &["store_file", "storing", "unpack_files", "expand_chunkrefs", "extract_files"]; +const PROGRESS_SPANS: &'static [&'static str] = &[ + "store_file", + "storing", + "unpack_files", + "expand_chunkrefs", + "extract_files", +]; #[tokio::main] async fn main() -> eyre::Result<()> { let indicatif_layer = IndicatifLayer::new(); let stderr_writer = indicatif_layer.get_stderr_writer(); - let indicatif_layer = indicatif_layer - .with_filter(filter_fn(|span_metadata| { - span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name()) - })); + let indicatif_layer = indicatif_layer.with_filter(filter_fn(|span_metadata| { + span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name()) + })); tracing_subscriber::registry() .with( @@ -307,9 +341,11 @@ async fn main() -> eyre::Result<()> { None } else { let zstd_dict_path = zstd_dict.unwrap(); - Some(Arc::new(tokio::fs::read(&zstd_dict_path) - .await - .with_context(|| format!("failed to read Zstd dict at {zstd_dict_path:?}"))?)) + Some(Arc::new( + tokio::fs::read(&zstd_dict_path).await.with_context(|| { + format!("failed to read Zstd dict at {zstd_dict_path:?}") + })?, + )) }; let pile_config = PileConfig { @@ -408,15 +444,23 @@ async fn main() -> eyre::Result<()> { let store_span = info_span!("storing"); // store_span.pb_set_style(&ProgressStyle::default_bar()); - store_span.pb_set_style(&ProgressStyle::default_bar().template( - PROGRESS_BAR_STYLE, - ).unwrap()); + store_span.pb_set_style( + &ProgressStyle::default_bar() + .template(PROGRESS_BAR_STYLE) + .unwrap(), + ); store_span.pb_set_message("storing files"); - store_span.pb_set_length(pruned_scan_entry_map.values() - .filter(|v| matches!(v, ScanEntry::NormalFile { .. })).count() as u64); + store_span.pb_set_length( + pruned_scan_entry_map + .values() + .filter(|v| matches!(v, ScanEntry::NormalFile { .. })) + .count() as u64, + ); let store_span_entered = store_span.enter(); - let (pipeline, pipeline_job_tx) = StoragePipeline::launch_new(4, pwc.clone()).await?; + let new_unflushed_chunks = Arc::new(Default::default()); + let (pipeline, pipeline_job_tx) = + StoragePipeline::launch_new(4, pwc.clone(), new_unflushed_chunks).await?; let source2 = source.clone(); let (submitter_task, receiver_task) = tokio::join!( @@ -426,8 +470,13 @@ async fn main() -> eyre::Result<()> { if let ScanEntry::NormalFile { .. } = scan_entry { let name = std::str::from_utf8(name_bytes.as_slice()) .context("name is not str")?; + let path = if name != "" { + source2.join(name) + } else { + source2.clone() + }; pipeline_job_tx - .send_async((name.to_owned(), source2.join(name))) + .send_async((name.to_owned(), path)) .await .map_err(|_| eyre!("unable to send to pipeline."))?; } @@ -497,8 +546,11 @@ async fn main() -> eyre::Result<()> { .await .context("failed to write pointer")?; - Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - }, + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } YamaCommand::StoreStdin { destination, overwrite, @@ -514,7 +566,7 @@ async fn main() -> eyre::Result<()> { LockKind::Shared, format!("{} store {:?}", get_hostname(), destination.pointer), ) - .await?; + .await?; update_cache(&pwc).await?; let pwc = Arc::new(pwc); @@ -522,30 +574,54 @@ async fn main() -> eyre::Result<()> { let store_span = info_span!("storing"); // store_span.pb_set_style(&ProgressStyle::default_bar()); // TODO INDETERMINATE PROGRESS BAR with bytes shown? - store_span.pb_set_style(&ProgressStyle::default_bar().template( - PROGRESS_BAR_STYLE, - ).unwrap()); + store_span.pb_set_style( + &ProgressStyle::default_bar() + .template(PROGRESS_BAR_STYLE) + .unwrap(), + ); store_span.pb_set_message("storing files"); store_span.pb_set_length(1u64); // TODO Dirty - let store_span_entered = store_span.enter(); + let _store_span_entered = store_span.enter(); - - let mut storing_state = StoringState::new(pwc.clone()).await.context("failed to create storing state")?; + let new_unflushed_chunks = Arc::new(Default::default()); + let mut storing_state = StoringState::new(pwc.clone(), new_unflushed_chunks) + .await + .context("failed to create storing state")?; let mut sbw = StoringBloblogWriters::default(); - let stdin = std::io::BufReader::new(io_streams::StreamReader::stdin().context("failed to open stdin")?); - let (chunkref, size) = storing_state.store_full_stream(stdin, &mut sbw).context("Failed to store stream into Yama pile")?; + let stdin = std::io::BufReader::new( + io_streams::StreamReader::stdin().context("failed to open stdin")?, + ); + let (chunkref, size) = storing_state + .store_full_stream(stdin, &mut sbw) + .context("Failed to store stream into Yama pile")?; - sbw.finish_bloblogs(&mut storing_state).await.context("Failed to finish bloblogs")?; + sbw.finish_bloblogs(&mut storing_state) + .await + .context("Failed to finish bloblogs")?; info!("Stream stored, writing indices..."); // Write indices for the new bloblogs we have created. This is a prerequisite for creating a pointer. let chunkmaps = storing_state.new_bloblogs; - assemble_and_write_indices(&pwc, chunkmaps) - .await - .context("failed to assemble and write indices")?; + assemble_and_write_indices( + &pwc, + chunkmaps + .into_iter() + .map(|(k, nb)| { + ( + k, + IndexBloblogEntry { + chunks: nb, + forgotten_bytes: 0, + }, + ) + }) + .collect(), + ) + .await + .context("failed to assemble and write indices")?; info!("All indices stored, writing pointer..."); @@ -553,7 +629,10 @@ async fn main() -> eyre::Result<()> { let uid = get_current_uid() as u16; let gid = get_current_gid() as u16; let tree = TreeNode::NormalFile { - mtime: SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_millis() as u64).unwrap_or(0), + mtime: SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0), ownership: FilesystemOwnership { uid, gid }, permissions: FilesystemPermissions { mode: 0o600 }, size, @@ -579,8 +658,11 @@ async fn main() -> eyre::Result<()> { .await .context("failed to write pointer")?; - Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - }, + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } YamaCommand::Extract { source, destination, @@ -593,13 +675,15 @@ async fn main() -> eyre::Result<()> { let keyring = pre_open_keyring(&pile_connector_path).await?; let keyring = open_keyring_interactive(keyring).await?; - let pwc = Arc::new(open_pile( - &pile_connector_path, - keyring, - LockKind::Shared, - format!("{} store {:?}", get_hostname(), source.pointer), - ) - .await?); + let pwc = Arc::new( + open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), source.pointer), + ) + .await?, + ); update_cache(&pwc).await?; let pointer = pwc @@ -639,11 +723,12 @@ async fn main() -> eyre::Result<()> { .instrument(extract_span) .await?; - Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - }, - YamaCommand::ExtractStdout { - source, - } => { + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } + YamaCommand::ExtractStdout { source } => { let pile_connector_path = source .pile_path .as_ref() @@ -652,13 +737,15 @@ async fn main() -> eyre::Result<()> { let keyring = pre_open_keyring(&pile_connector_path).await?; let keyring = open_keyring_interactive(keyring).await?; - let pwc = Arc::new(open_pile( - &pile_connector_path, - keyring, - LockKind::Shared, - format!("{} store {:?}", get_hostname(), source.pointer), - ) - .await?); + let pwc = Arc::new( + open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} store {:?}", get_hostname(), source.pointer), + ) + .await?, + ); update_cache(&pwc).await?; let pointer = pwc @@ -690,9 +777,7 @@ async fn main() -> eyre::Result<()> { }; let chunkref = match node { - TreeNode::NormalFile { content, .. } => { - content - } + TreeNode::NormalFile { content, .. } => content, TreeNode::Directory { .. } => { bail!("Can't extract `Directory` to stdout!"); } @@ -705,13 +790,126 @@ async fn main() -> eyre::Result<()> { }; let extract_span = info_span!("extract_files"); - let stream = std::io::BufWriter::new(io_streams::StreamWriter::stdout().context("failed to open stdout")?); + let stream = std::io::BufWriter::new( + io_streams::StreamWriter::stdout().context("failed to open stdout")?, + ); extract::unpack_sync_stream(&pwc, *chunkref, stream) .instrument(extract_span) .await?; - Arc::try_unwrap(pwc).map_err(|_| eyre!("pwc still in use; can't close down gracefully"))?.close().await?; - }, + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } + YamaCommand::Check { + pointers, + shallow, + intensive, + } => { + if !pointers && !shallow && !intensive { + bail!("Check level not chosen. Try -2"); + } + if pointers { + bail!("pointers check not implemented yet. Try -2"); + } + if intensive { + bail!("intensive check not implemented yet. Try -2"); + } + + let pile_connector_path = Path::new("."); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = Arc::new( + open_pile( + &pile_connector_path, + keyring, + LockKind::Shared, + format!("{} check", get_hostname()), + ) + .await?, + ); + update_cache(&pwc).await?; + + if shallow { + check::check_pointers_point_to_indexed_chunks(&pwc) + .await + .context("shallow check failed")?; + } + + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } + YamaCommand::Vacuum { + all, + merge, + forget, + repack, + delete_unrefd_bloblogs, + } => { + let pile_connector_path = Path::new("."); + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + // TODO figure out how we use the pendingexclusive thing again... + let pwc = Arc::new( + open_pile( + &pile_connector_path, + keyring, + LockKind::Exclusive, + format!("{} vacuum", get_hostname()), + ) + .await?, + ); + update_cache(&pwc).await?; + + if all || merge { + let to_merge = vacuum::merge_indices::select_indices_for_merge( + &pwc, + MERGE_TARGET_SIZE, + MERGE_THRESHOLD_SIZE, + ) + .await + .context("failed to select indices for merge")?; + vacuum::merge_indices::merge_indices(&pwc, to_merge) + .await + .context("failed to merge indices")?; + update_cache(&pwc).await?; + } + + if all || forget { + // TODO: allow running on smaller sets of indices than all of them + let all_indices = { + let mut cache_conn = pwc.localcache.read().await?; + cache_conn.list_indices().await? + }; + + let forgettable_chunks = find_forgettable_chunks(&pwc, all_indices.clone()).await?; + info!("{} chunks can be forgotten", forgettable_chunks.len()); + forget_chunks(&pwc, all_indices, forgettable_chunks).await?; + update_cache(&pwc).await?; + } + + if all || repack { + let bloblog_stats = get_bloblogs_stats(&pwc).await?; + let to_repack = select_bloblogs_for_repack(bloblog_stats).await?; + info!("{} repack groups to be processed.", to_repack.len()); + perform_repack(pwc.clone(), to_repack).await?; + update_cache(&pwc).await?; + } + + if all || delete_unrefd_bloblogs { + todo!(); + } + + Arc::try_unwrap(pwc) + .map_err(|_| eyre!("pwc still in use; can't close down gracefully"))? + .close() + .await?; + } _other => todo!(), } diff --git a/yama/src/bin/yamascan.rs b/yama/src/bin/yamascan.rs index 6b88a87..bf58473 100644 --- a/yama/src/bin/yamascan.rs +++ b/yama/src/bin/yamascan.rs @@ -99,16 +99,15 @@ async fn main() -> eyre::Result<()> { &root_display_node, false, ) - }, - YamaScanCommand::Ignore { - path, unanchored - } => { + } + YamaScanCommand::Ignore { path, unanchored } => { let mut oo = OpenOptions::new() .read(true) .write(true) .create(true) .truncate(false) - .open(".yamaignore").await + .open(".yamaignore") + .await .context("failed to open .yamaignore for r/w")?; let pos = oo.seek(SeekFrom::End(0)).await?; if pos > 1 { @@ -127,7 +126,7 @@ async fn main() -> eyre::Result<()> { oo.flush().await?; drop(oo); - }, + } _other => todo!(), } diff --git a/yama/src/check.rs b/yama/src/check.rs new file mode 100644 index 0000000..7b08197 --- /dev/null +++ b/yama/src/check.rs @@ -0,0 +1,64 @@ +use crate::extract::expand_chunkrefs; +use crate::pile_with_cache::PileWithCache; +use eyre::{bail, ContextCompat}; +use std::collections::BTreeSet; +use std::sync::Arc; +use tracing::info; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::tree::TreeNode; +use yama_wormfile::boxed::BoxedWormFileProvider; + +/// Check that all pointers point to chunks that exist **in our local cache**. +pub async fn check_pointers_point_to_indexed_chunks( + pwc: &Arc>, +) -> eyre::Result<()> { + let pointer_names = pwc.pile.list_pointers().await?; + let mut rcrs_to_check = BTreeSet::new(); + for pointer_name in &pointer_names { + let pointer = pwc + .pile + .read_pointer(pointer_name) + .await? + .context("pointer vanished")?; + if let Some(parent_name) = pointer.parent { + if !pointer_names.contains(pointer_name) { + bail!("{parent_name:?}, the parent of {pointer_name:?}, does not exist"); + } + } + + pointer + .root + .node + .visit( + &mut |node, _| { + if let TreeNode::NormalFile { content, .. } = node { + rcrs_to_check.insert(*content); + } + Ok(()) + }, + String::new(), + ) + .unwrap(); + } + + let chunk_ids: BTreeSet = + expand_chunkrefs(pwc, rcrs_to_check.into_iter().map(|x| ((), x))) + .await? + .into_iter() + .map(|(_, x)| x) + .flatten() + .collect(); + + info!("{} chunks to check for existence", chunk_ids.len()); + + let mut cache = pwc.localcache.read().await?; + + let resolved_chunks = cache.locate_chunks(&chunk_ids).await?; + + if chunk_ids.len() != resolved_chunks.len() { + bail!("Not all chunk IDs could be resolved. TODO: this check error is currently not granular enough."); + } + info!("All {} chunks accounted for!", resolved_chunks.len()); + + Ok(()) +} diff --git a/yama/src/extract.rs b/yama/src/extract.rs index cf5275d..61a2e8a 100644 --- a/yama/src/extract.rs +++ b/yama/src/extract.rs @@ -1,17 +1,18 @@ use crate::pile_with_cache::PileWithCache; use crate::retriever::decompressor::PipelineDecompressor; use crate::retriever::{create_fixed_retriever, FileId, JobChunkReq, JobId, RetrieverResp}; -use eyre::{bail, ensure, Context, ContextCompat, eyre}; +use crate::PROGRESS_BAR_STYLE; +use eyre::{bail, ensure, eyre, Context, ContextCompat}; use flume::Receiver; +use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; use std::fs::Permissions; use std::io::Write; use std::os::unix::fs::PermissionsExt; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; -use indicatif::ProgressStyle; use tokio::fs::OpenOptions; use tokio::io::AsyncWriteExt; use tokio::task::JoinSet; @@ -22,7 +23,6 @@ use yama_pile::definitions::{BloblogId, RecursiveChunkRef}; use yama_pile::tree::unpopulated::ScanEntry; use yama_pile::tree::{FilesystemPermissions, TreeNode}; use yama_wormfile::boxed::BoxedWormFileProvider; -use crate::PROGRESS_BAR_STYLE; #[derive(Clone, Debug, Default)] pub struct FlattenedTree { @@ -93,7 +93,7 @@ pub fn flatten_treenode(root_node: &TreeNode) -> eyre::Result { /// Create directories and symbolic links. pub async fn unpack_nonfiles( - root: &PathBuf, + root: &Path, nonfiles: &PatriciaMap, restore_ownership: bool, restore_permissions: bool, @@ -133,7 +133,7 @@ pub async fn unpack_nonfiles( // TODO(perf): move out file writes into separate tasks... pub async fn unpack_files( pwc: &Arc>, - root: &PathBuf, + root: &Path, files: &PatriciaMap<(ScanEntry, RecursiveChunkRef)>, restore_ownership: bool, restore_permissions: bool, @@ -149,7 +149,10 @@ pub async fn unpack_files( ) .await?; - let total_chunks = expanded_chunkrefs.iter().map(|(_, cs)| cs.len() as u64).sum::(); + let total_chunks = expanded_chunkrefs + .iter() + .map(|(_, cs)| cs.len() as u64) + .sum::(); let unpack_span = info_span!("unpack_files"); async move { @@ -236,24 +239,26 @@ pub async fn unpack_files( }.instrument(unpack_span).await } -pub async fn unpack_sync_stream(pwc: &Arc>, - chunkref: RecursiveChunkRef, - mut stream: impl Write, +pub async fn unpack_sync_stream( + pwc: &Arc>, + chunkref: RecursiveChunkRef, + mut stream: impl Write, ) -> eyre::Result<()> { - let expanded_chunkrefs = expand_chunkrefs( - pwc, - vec![((), chunkref)].into_iter(), - ) - .await?; + let expanded_chunkrefs = expand_chunkrefs(pwc, vec![((), chunkref)].into_iter()).await?; - let total_chunks = expanded_chunkrefs.iter().map(|(_, cs)| cs.len() as u64).sum::(); + let total_chunks = expanded_chunkrefs + .iter() + .map(|(_, cs)| cs.len() as u64) + .sum::(); let unpack_span = info_span!("unpack_files"); async move { let unpack_span = Span::current(); - unpack_span.pb_set_style(&ProgressStyle::default_bar().template( - PROGRESS_BAR_STYLE, - ).unwrap()); + unpack_span.pb_set_style( + &ProgressStyle::default_bar() + .template(PROGRESS_BAR_STYLE) + .unwrap(), + ); unpack_span.pb_set_message("unpack"); unpack_span.pb_set_length(total_chunks); @@ -264,16 +269,14 @@ pub async fn unpack_sync_stream(pwc: &Arc>, while let Ok(next_part) = file_part_retriever.recv_async().await { match next_part { RetrieverResp::Blob { blob, .. } => { - tokio::task::block_in_place(|| { - stream.write_all(&blob) - }).context("Failed to write to output stream on Blob")?; + tokio::task::block_in_place(|| stream.write_all(&blob)) + .context("Failed to write to output stream on Blob")?; unpack_span.pb_inc(1); } RetrieverResp::JobComplete(_) => { - tokio::task::block_in_place(|| { - stream.flush() - }).context("Failed to flush output stream on JobComplete")?; + tokio::task::block_in_place(|| stream.flush()) + .context("Failed to flush output stream on JobComplete")?; done = true; } } @@ -284,10 +287,17 @@ pub async fn unpack_sync_stream(pwc: &Arc>, } Ok(()) - }.instrument(unpack_span).await + } + .instrument(unpack_span) + .await } -async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, restore_permissions: bool, rx: Receiver>>) -> eyre::Result<()> { +async fn file_unpacker_writer( + path: PathBuf, + permissions: FilesystemPermissions, + restore_permissions: bool, + rx: Receiver>>, +) -> eyre::Result<()> { let mut oo = OpenOptions::new(); oo.write(true).create_new(true); if restore_permissions { @@ -301,15 +311,12 @@ async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, loop { match rx.recv_async().await { Ok(Some(next_block)) => { - file.write_all(&next_block) - .await?; - }, + file.write_all(&next_block).await?; + } Ok(None) => { - file.flush() - .await - .context("failed to flush")?; + file.flush().await.context("failed to flush")?; return Ok(()); - }, + } Err(_) => { bail!("rx for file unpacking into {path:?} disconnected unexpectedly"); } @@ -317,7 +324,7 @@ async fn file_unpacker_writer(path: PathBuf, permissions: FilesystemPermissions, } } -async fn expand_chunkrefs( +pub(crate) async fn expand_chunkrefs( pwc: &Arc>, chunkrefs: impl Iterator, ) -> eyre::Result)>> { @@ -337,13 +344,21 @@ async fn expand_chunkrefs( } let ec_span = info_span!("expand_chunkrefs"); - ec_span.pb_set_style(&ProgressStyle::default_bar().template( - PROGRESS_BAR_STYLE, - ).unwrap()); - ec_span.pb_set_length(ts_and_chunks.iter().map(|(_, cs)| cs.len() as u64).sum::()); + ec_span.pb_set_style( + &ProgressStyle::default_bar() + .template(PROGRESS_BAR_STYLE) + .unwrap(), + ); + ec_span.pb_set_length( + ts_and_chunks + .iter() + .map(|(_, cs)| cs.len() as u64) + .sum::(), + ); ec_span.pb_set_message(&format!("resolve (d={next_depth})")); let expanded_ts_and_chunks = expand_chunkrefs_one_layer(pwc, ts_and_chunks) - .instrument(ec_span).await?; + .instrument(ec_span) + .await?; by_depth .entry(Reverse(next_depth - 1)) .or_default() @@ -413,7 +428,7 @@ async fn lookup_chunkrefs_and_create_retriever( Ok((retriever, out_by_job)) } -async fn expand_chunkrefs_one_layer( +pub(crate) async fn expand_chunkrefs_one_layer( pwc: &Arc>, input: Vec<(T, Vec)>, ) -> eyre::Result)>> { diff --git a/yama/src/lib.rs b/yama/src/lib.rs index d391b1c..fbb9589 100644 --- a/yama/src/lib.rs +++ b/yama/src/lib.rs @@ -1,6 +1,7 @@ pub mod init; pub mod open; +pub mod check; pub mod extract; pub mod scan; pub mod storing; @@ -11,7 +12,8 @@ pub mod pile_with_cache; pub mod retriever; -pub const PROGRESS_BAR_STYLE: &'static str = "[{elapsed_precise}]/[{eta}] {wide_bar:.cyan/blue} {pos:>7}/{len:7} {msg}"; +pub const PROGRESS_BAR_STYLE: &'static str = + "[{elapsed_precise}]/[{eta}] {wide_bar:.cyan/blue} {pos:>7}/{len:7} {msg}"; pub fn get_hostname() -> String { hostname::get() diff --git a/yama/src/open.rs b/yama/src/open.rs index 3ccc6aa..ca7d86f 100644 --- a/yama/src/open.rs +++ b/yama/src/open.rs @@ -4,7 +4,7 @@ use eyre::{bail, Context, ContextCompat}; use std::borrow::Cow; use std::collections::BTreeSet; use std::hash::{Hash, Hasher}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::io::{AsyncBufReadExt, BufReader}; use tracing::debug; @@ -98,7 +98,10 @@ pub async fn open_pile( connection_scheme.hash(&mut hasher); let u64_hash = hasher.finish(); - let base_name = connector_in_dir + let canon_connector_in_dir = connector_in_dir + .canonicalize() + .unwrap_or(connector_in_dir.to_owned()); + let base_name = canon_connector_in_dir .file_name() .map(|f| f.to_string_lossy()) .unwrap_or(Cow::Borrowed("_")); @@ -165,3 +168,16 @@ pub async fn update_cache(pwc: &PileWithCache) -> eyre::R Ok(()) } + +pub async fn open_lock_and_update_cache( + pile_connector_path: PathBuf, + lock_name: String, +) -> eyre::Result>> { + let keyring = pre_open_keyring(&pile_connector_path).await?; + let keyring = open_keyring_interactive(keyring).await?; + + let pwc = open_pile(&pile_connector_path, keyring, LockKind::Shared, lock_name).await?; + update_cache(&pwc).await?; + + Ok(Arc::new(pwc)) +} diff --git a/yama/src/retriever.rs b/yama/src/retriever.rs index 2f7cfa2..47bbe30 100644 --- a/yama/src/retriever.rs +++ b/yama/src/retriever.rs @@ -48,6 +48,7 @@ struct FileRegionMarker { pub subjob: u32, } +#[derive(Debug)] struct OpenFileState { pub req_tx: Sender, pub offset: u64, @@ -61,16 +62,13 @@ struct OpenFileReq { pub subjob: u32, } +#[derive(Debug)] struct ActiveJobState { pub subjobs: Vec, pub next_subjob: u32, pub inflight: u32, } -pub struct Retriever { - job_tx: Sender<(JobId, Vec)>, -} - struct RetrieverInternals { pwc: Arc>, jobs_queue: BTreeMap>, @@ -141,6 +139,7 @@ impl RetrieverInternals { offset: u64, length: u64, ) -> eyre::Result<()> { + // debug!("sched {job:?}->{subjob:?}"); open_file .req_tx .send_async(OpenFileReq { @@ -205,7 +204,8 @@ impl RetrieverInternals { }) .await .expect("completions shut"); - // eprintln!("completion of{next_job:?}"); + + // debug!("read,acking! {:?}", next_job); ack_tx.send_async(next_job.job).await?; } @@ -213,7 +213,12 @@ impl RetrieverInternals { } async fn retrieval_task(&mut self) -> eyre::Result<()> { + // let mut icount = 0u64; loop { + // icount += 1; + // debug!("[{icount}] active jobs {:#?}", self.active_jobs); + // debug!("[{icount}] open files {:#?}", self.open_files); + // 0. Try to progress open jobs if they are staring right at the bytes they need... let mut to_remove = Vec::new(); for (active_job_id, active_job) in &mut self.active_jobs { @@ -226,9 +231,17 @@ impl RetrieverInternals { to_remove.push(*active_job_id); continue; } + // Which file we are 'staring at' and requesting a run of chunks from + let mut stare_file = None; 'single_job_staring: loop { let desired_blob = &active_job.subjobs[active_job.next_subjob as usize]; + if stare_file.is_some() && stare_file != Some(desired_blob.file) { + // We have changed which file we are looking at, we can't request any further + // because they might get retrieved out of order. + break 'single_job_staring; + } if let Some(open_file) = self.open_files.get_mut(&desired_blob.file) { + stare_file = Some(desired_blob.file); if open_file.offset == desired_blob.offset { Self::file_request( open_file, @@ -255,12 +268,15 @@ impl RetrieverInternals { // this job is to be finished! break 'single_job_staring; } + } else { + break 'single_job_staring; } } else { break 'single_job_staring; } } } + for remove in to_remove { self.active_jobs.remove(&remove); // eprintln!("job complete {remove:?}"); @@ -354,6 +370,7 @@ impl RetrieverInternals { files_to_open.insert(desired_blob.file); } } + if !files_to_open.is_empty() { for file in files_to_open { self.open_file(file).await?; diff --git a/yama/src/retriever/decompressor.rs b/yama/src/retriever/decompressor.rs index 5bdbc5b..0720888 100644 --- a/yama/src/retriever/decompressor.rs +++ b/yama/src/retriever/decompressor.rs @@ -175,10 +175,10 @@ impl PipelineDecompressor { ); } - let state = self - .processing - .get_mut(&job) - .context("bad job/not starting at 0 for job")?; + // debug!("blob {job:?} {subjob:?}"); + let state = self.processing.get_mut(&job).with_context(|| { + format!("bad job/not starting at 0 for job {job:?} (subjob={subjob:?})") + })?; ensure!( state.next_enqueue_subjob == subjob, "out of order Blob commands" @@ -196,6 +196,8 @@ impl PipelineDecompressor { .context("bad job to complete")?; state.complete = true; + // debug!("complete {job:?}"); + let can_remove = state.next_submit_subjob == state.next_enqueue_subjob; if can_remove { diff --git a/yama/src/scan.rs b/yama/src/scan.rs index 2f6631a..dba486f 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -1,4 +1,4 @@ -use eyre::{bail, eyre, Context}; +use eyre::{bail, eyre, Context, ContextCompat}; use ignore::WalkBuilder; use patricia_tree::PatriciaMap; use std::collections::{BTreeMap, BTreeSet}; @@ -95,6 +95,19 @@ pub fn relative_path(base: &Path, leaf: &Path) -> Option { /// Aborts if any errors (permission, bad .yamaignore files, etc) are encountered. /// In the future, we possibly want to consider allowing pub fn scan(root: &Path, ignores: &Vec) -> eyre::Result> { + let mut entries: PatriciaMap = PatriciaMap::new(); + + if !root.is_dir() { + let metadata = std::fs::symlink_metadata(root).context("reading metadata of root")?; + entries.insert( + "", + scan_one_no_recurse(root, metadata) + .context("failed to generate scan entry for root")? + .context("root probably doesn't exist, or is ignored?")?, + ); + return Ok(entries); + } + let mut walker = WalkBuilder::new(root); walker .standard_filters(false) @@ -108,8 +121,6 @@ pub fn scan(root: &Path, ignores: &Vec) -> eyre::Result = PatriciaMap::new(); - for entry in walker { let entry = entry?; diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 0331d94..cd8c65c 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -5,6 +5,7 @@ use fastcdc::v2020::{FastCDC, StreamCDC}; use flume::{Receiver, RecvError, SendError, Sender}; use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::Debug; use std::io::Read; use std::path::{Path, PathBuf}; use std::pin::Pin; @@ -47,7 +48,10 @@ pub struct StoringState { } impl StoringState { - pub async fn new(pwc: Arc>) -> eyre::Result { + pub async fn new( + pwc: Arc>, + new_unflushed_chunks: Arc>, + ) -> eyre::Result { let compressor = match pwc.pile.pile_config.zstd_dict.as_ref() { None => { Compressor::new(get_zstd_level()).context("can't create dictless compressor")? @@ -59,7 +63,7 @@ impl StoringState { let chunk_id_key = pwc.pile.pile_config.chunk_id_key; Ok(StoringState { cache_conn: pwc.localcache.read().await?, - new_unflushed_chunks: Arc::new(Default::default()), + new_unflushed_chunks, new_bloblogs: vec![], pwc, chunk_id_key, @@ -130,6 +134,35 @@ impl StoringState { Ok(slot.as_mut().unwrap()) } + /// For internal use only. + fn process_chunk( + &mut self, + chunk_bytes: &[u8], + result: &mut Vec, + slot: &mut Option>>>, + ) -> eyre::Result<()> { + let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key); + result.push(chunk_id); + let is_new = Handle::current().block_on(async { + Ok::( + self.cache_conn.is_chunk_new(chunk_id).await? + && self.new_unflushed_chunks.insert(chunk_id), + ) + })?; + + if is_new { + let compressed_bytes = self.compressor.compress(&chunk_bytes)?; + + Handle::current().block_on(async { + let writer = self.obtain_bloblog_writer(slot).await?; + writer.write_chunk(chunk_id, &compressed_bytes).await?; + Ok::<(), eyre::Report>(()) + })?; + } + + Ok(()) + } + fn store_full_slice_returning_chunks( &mut self, store_slice: &[u8], @@ -137,26 +170,14 @@ impl StoringState { ) -> eyre::Result> { task::block_in_place(|| { let mut result = Vec::new(); + for chunk in FastCDC::new(store_slice, FASTCDC_MIN, FASTCDC_AVG, FASTCDC_MAX) { let chunk_bytes = &store_slice[chunk.offset..chunk.offset + chunk.length]; - let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key); - result.push(chunk_id); - let is_new = Handle::current().block_on(async { - Ok::( - self.cache_conn.is_chunk_new(chunk_id).await? - && self.new_unflushed_chunks.insert(chunk_id), - ) - })?; + self.process_chunk(chunk_bytes, &mut result, slot)? + } - if is_new { - let compressed_bytes = self.compressor.compress(&chunk_bytes)?; - - Handle::current().block_on(async { - let writer = self.obtain_bloblog_writer(slot).await?; - writer.write_chunk(chunk_id, &compressed_bytes).await?; - Ok::<(), eyre::Report>(()) - })?; - } + if result.is_empty() { + self.process_chunk(&[], &mut result, slot)?; } Ok(result) @@ -175,24 +196,11 @@ impl StoringState { let chunk = chunk.context("failed to read in for StreamCDC")?; let chunk_bytes = chunk.data.as_slice(); stream_length += chunk_bytes.len() as u64; - let chunk_id = ChunkId::compute(chunk_bytes, &self.chunk_id_key); - result.push(chunk_id); - let is_new = Handle::current().block_on(async { - Ok::( - self.cache_conn.is_chunk_new(chunk_id).await? - && self.new_unflushed_chunks.insert(chunk_id), - ) - })?; + self.process_chunk(chunk_bytes, &mut result, slot)?; + } - if is_new { - let compressed_bytes = self.compressor.compress(&chunk_bytes)?; - - Handle::current().block_on(async { - let writer = self.obtain_bloblog_writer(slot).await?; - writer.write_chunk(chunk_id, &compressed_bytes).await?; - Ok::<(), eyre::Report>(()) - })?; - } + if result.is_empty() { + self.process_chunk(&[], &mut result, slot)?; } Ok((result, stream_length)) @@ -257,10 +265,13 @@ impl StoringState { depth += 1; } - Ok((RecursiveChunkRef { - chunk_id: chunk_ids[0], - depth, - }, stream_length)) + Ok(( + RecursiveChunkRef { + chunk_id: chunk_ids[0], + depth, + }, + stream_length, + )) } } @@ -276,14 +287,14 @@ async fn store_file( Ok((chunkref, size_of_file as u64)) } -pub struct StoragePipeline { - result_rx: Receiver<(String, RecursiveChunkRef, u64)>, +pub struct StoragePipeline { + result_rx: Receiver<(JobName, RecursiveChunkRef, u64)>, join_set: JoinSet>, } -async fn storage_pipeline_worker( - job_rx: Receiver<(String, PathBuf)>, - result_tx: Sender<(String, RecursiveChunkRef, u64)>, +async fn storage_pipeline_worker( + job_rx: Receiver<(JobName, PathBuf)>, + result_tx: Sender<(JobName, RecursiveChunkRef, u64)>, mut storing_state: StoringState, ) -> eyre::Result { let mut bloblog_writers = StoringBloblogWriters::default(); @@ -292,22 +303,24 @@ async fn storage_pipeline_worker( while let Ok((job_id, file_path)) = job_rx.recv_async().await { let span = info_span!("store_file", file=?file_path); - let span_enter = span.enter(); - // debug!("SPW job {job_id:?}"); - let (rec_chunk_ref, file_length) = - store_file(&file_path, &mut storing_state, &mut bloblog_writers) - .await - .with_context(|| format!("failed to store {file_path:?}"))?; - // debug!("SPW good {job_id:?}"); - if let Err(SendError(to_be_sent)) = result_tx - .send_async((job_id, rec_chunk_ref, file_length)) - .await - { - bail!("Can't return result for {to_be_sent:?} — result_tx shut down."); - } - drop(span_enter); - drop(span); + async { + // debug!("SPW job {job_id:?}"); + let (rec_chunk_ref, file_length) = + store_file(&file_path, &mut storing_state, &mut bloblog_writers) + .await + .with_context(|| format!("failed to store {file_path:?}"))?; + // debug!("SPW good {job_id:?}"); + if let Err(SendError(to_be_sent)) = result_tx + .send_async((job_id, rec_chunk_ref, file_length)) + .await + { + bail!("Can't return result for {to_be_sent:?} — result_tx shut down."); + } + Ok(()) + } + .instrument(span) + .await? } debug!("SPW shutdown"); @@ -322,11 +335,12 @@ fn get_zstd_level() -> i32 { return 12; } -impl StoragePipeline { +impl StoragePipeline { pub async fn launch_new( workers: u32, pwc: Arc>, - ) -> eyre::Result<(StoragePipeline, Sender<(String, PathBuf)>)> { + new_unflushed_chunks: Arc>, + ) -> eyre::Result<(StoragePipeline, Sender<(JobName, PathBuf)>)> { let (job_tx, job_rx) = flume::bounded(16); let (result_tx, result_rx) = flume::bounded(4); @@ -334,7 +348,9 @@ impl StoragePipeline { for spw_num in 0..workers { let job_rx = job_rx.clone(); let result_tx = result_tx.clone(); - let storing_state = StoringState::new(pwc.clone()).await.context("failed to create storing state")?; + let storing_state = StoringState::new(pwc.clone(), new_unflushed_chunks.clone()) + .await + .context("failed to create storing state")?; // make a logging span for the Storage Pipeline Workers let spw_span = info_span!("spw", n = spw_num); join_set.spawn( @@ -359,48 +375,48 @@ impl StoragePipeline { } #[inline] - pub async fn next_result(&self) -> Result<(String, RecursiveChunkRef, u64), RecvError> { + pub async fn next_result(&self) -> Result<(JobName, RecursiveChunkRef, u64), RecvError> { self.result_rx.recv_async().await } /// Must be sure that all results have been collected first. pub async fn finish_into_chunkmaps( mut self, - ) -> eyre::Result)>> { + ) -> eyre::Result> { if let Ok(msg) = self.result_rx.recv_async().await { bail!("Haven't processed all results yet! {msg:?}"); } - let mut chunkmap = Vec::new(); + let mut chunkmap = BTreeMap::new(); while let Some(join_resres) = self.join_set.join_next().await { - chunkmap.extend(join_resres??.new_bloblogs); + chunkmap.extend(join_resres??.new_bloblogs.into_iter().map(|(k, nb)| { + ( + k, + IndexBloblogEntry { + chunks: nb, + forgotten_bytes: 0, + }, + ) + })); } Ok(chunkmap) } } -fn assemble_indices(chunkmap: Vec<(BloblogId, BTreeMap)>) -> Vec { +fn assemble_indices(chunkmap: BTreeMap) -> Vec { let mut sorted_map = BTreeMap::new(); for (idx, chunkmap) in chunkmap.into_iter().enumerate() { - let size_of_chunkmap = chunkmap.1.len() + 1; + let size_of_chunkmap = chunkmap.1.chunks.len() + 1; sorted_map.insert(Reverse((size_of_chunkmap, idx)), chunkmap); } let mut indices = Vec::new(); - while let Some(k) = sorted_map.keys().cloned().next() { - let (Reverse((size, _)), (bloblog_id, bloblog_chunks)) = - sorted_map.remove_entry(&k).unwrap(); + while let Some((Reverse((size, _)), (bloblog_id, bloblog_chunks))) = sorted_map.pop_first() { let mut new_index_contents = BTreeMap::new(); - new_index_contents.insert( - bloblog_id, - IndexBloblogEntry { - chunks: bloblog_chunks, - forgotten_bytes: 0, - }, - ); + new_index_contents.insert(bloblog_id, bloblog_chunks); let mut new_index_size_so_far = size; while new_index_size_so_far < DESIRED_INDEX_SIZE_ENTRIES && !sorted_map.is_empty() { @@ -417,13 +433,9 @@ fn assemble_indices(chunkmap: Vec<(BloblogId, BTreeMap)>) let (Reverse((add_size, _)), (bloblog_id, bloblog_chunks)) = sorted_map.remove_entry(&k).unwrap(); new_index_size_so_far += add_size; - new_index_contents.insert( - bloblog_id, - IndexBloblogEntry { - chunks: bloblog_chunks, - forgotten_bytes: 0, - }, - ); + new_index_contents.insert(bloblog_id, bloblog_chunks); + } else { + break; } } @@ -458,7 +470,7 @@ async fn write_indices( pub async fn assemble_and_write_indices( pwc: &PileWithCache, - chunkmap: Vec<(BloblogId, BTreeMap)>, + chunkmap: BTreeMap, ) -> eyre::Result<()> { let indices = assemble_indices(chunkmap); write_indices(pwc, indices).await diff --git a/yama/src/vacuum.rs b/yama/src/vacuum.rs index 8b13789..8957c12 100644 --- a/yama/src/vacuum.rs +++ b/yama/src/vacuum.rs @@ -1 +1,4 @@ - +pub mod delete_unrefd_bloblogs; +pub mod forget_chunks; +pub mod merge_indices; +pub mod repack_bloblogs_and_indices; diff --git a/yama/src/vacuum/delete_unrefd_bloblogs.rs b/yama/src/vacuum/delete_unrefd_bloblogs.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yama/src/vacuum/delete_unrefd_bloblogs.rs @@ -0,0 +1 @@ + diff --git a/yama/src/vacuum/forget_chunks.rs b/yama/src/vacuum/forget_chunks.rs new file mode 100644 index 0000000..b7ac4bc --- /dev/null +++ b/yama/src/vacuum/forget_chunks.rs @@ -0,0 +1,171 @@ +use crate::extract::expand_chunkrefs_one_layer; +use crate::pile_with_cache::PileWithCache; +use eyre::{bail, ensure, Context, ContextCompat}; +use std::collections::{BTreeMap, BTreeSet}; +use std::sync::Arc; +use tracing::info; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::definitions::IndexId; +use yama_pile::tree::TreeNode; +use yama_wormfile::boxed::BoxedWormFileProvider; + +pub async fn find_forgettable_chunks( + pwc: &Arc>, + indices: BTreeSet, +) -> eyre::Result> { + let mut unseen_chunk_ids = BTreeSet::new(); + + // Find all chunks in the given indices + { + let mut cache_conn = pwc.localcache.read().await?; + for index_id in &indices { + unseen_chunk_ids.extend(cache_conn.list_chunks_in_index(*index_id).await?); + } + }; + + let chunks_to_scan = prepare_chunkrefs_to_scan(pwc).await?; + scan_chunks(pwc, &mut unseen_chunk_ids, chunks_to_scan) + .await + .context("failed to do a sweep")?; + + Ok(unseen_chunk_ids) +} + +async fn prepare_chunkrefs_to_scan( + pwc: &Arc>, +) -> eyre::Result>> { + let pointer_names = pwc + .pile + .list_pointers() + .await + .context("failed to list pointers")?; + let mut chunks_to_scan_by_depth: BTreeMap> = BTreeMap::new(); + + for pointer_name in &pointer_names { + let pointer = pwc + .pile + .read_pointer(pointer_name) + .await? + .context("pointer vanished")?; + if let Some(parent_name) = pointer.parent { + if !pointer_names.contains(pointer_name) { + bail!("{parent_name:?}, the parent of {pointer_name:?}, does not exist"); + } + } + + pointer + .root + .node + .visit( + &mut |node, _| { + if let TreeNode::NormalFile { content, .. } = node { + chunks_to_scan_by_depth + .entry(content.depth) + .or_default() + .insert(content.chunk_id); + } + Ok(()) + }, + String::new(), + ) + .unwrap(); + } + + Ok(chunks_to_scan_by_depth) +} + +/// Scans the recursive chunkrefs that are passed in, ticking off chunks from the `unseen` set as +/// we go. +async fn scan_chunks( + pwc: &Arc>, + unseen: &mut BTreeSet, + chunks_to_scan_by_depth: BTreeMap>, +) -> eyre::Result<()> { + let mut to_scan: Vec<(u32, Vec)> = chunks_to_scan_by_depth + .into_iter() + .flat_map(|(depth, chunkset)| { + chunkset + .into_iter() + .map(move |chunk_id| (depth, vec![chunk_id])) + }) + .collect(); + + while !to_scan.is_empty() { + // Mark as seen. + for (_, chunk_ids) in &to_scan { + for chunk_id in chunk_ids { + unseen.remove(chunk_id); + } + } + + // Don't descend further into zero-depth elements. + to_scan = to_scan + .into_iter() + .filter(|(depth, _)| *depth > 0) + .collect(); + + // Decrement depth counters. + to_scan = expand_chunkrefs_one_layer(pwc, to_scan) + .await? + .into_iter() + .map(|(old_depth, chunkids)| (old_depth - 1, chunkids)) + .collect(); + } + + Ok(()) +} + +pub async fn forget_chunks( + pwc: &Arc>, + indices: BTreeSet, + forgettable: BTreeSet, +) -> eyre::Result<()> { + let mut indices_to_rewrite = Vec::new(); + // First do a cache-only check to see which indices need rewriting. + { + let mut cache_conn = pwc.localcache.read().await?; + for index_id in &indices { + let chunks_in_this_index = cache_conn.list_chunks_in_index(*index_id).await?; + if !chunks_in_this_index.is_disjoint(&forgettable) { + indices_to_rewrite.push(index_id); + } + } + } + + info!( + "{} indices to rewrite in order to forget chunks", + indices_to_rewrite.len() + ); + + // Go through each index and clean out whatever needs forgetting (then re-create the index and + // remove the old one). + for index_id in indices_to_rewrite { + let mut index = pwc.pile.read_index(*index_id).await?; + let mut changed = false; + for bloblog_entry in index.bloblogs.values_mut() { + let removable: Vec = bloblog_entry + .chunks + .keys() + .filter(|ci| forgettable.contains(ci)) + .cloned() + .collect(); + changed |= !removable.is_empty(); + for chunk_id in removable { + bloblog_entry.forgotten_bytes += + bloblog_entry.chunks.remove(&chunk_id).unwrap().length; + } + } + + ensure!(changed, "no change to index {index_id:?}"); + + index.supersedes.clear(); + index.supersedes.insert(*index_id); + + // TODO APPLY THE NEW INDEX DIRECTLY (how do we do that again?) + let new_index_id = pwc.pile.create_index(&index).await?; + ensure!(new_index_id != *index_id, "index ID bounce"); + pwc.pile.delete_index_dangerous_exclusive(*index_id).await?; + } + + Ok(()) +} diff --git a/yama/src/vacuum/merge_indices.rs b/yama/src/vacuum/merge_indices.rs new file mode 100644 index 0000000..a82c3e4 --- /dev/null +++ b/yama/src/vacuum/merge_indices.rs @@ -0,0 +1,127 @@ +use crate::pile_with_cache::PileWithCache; +use eyre::{bail, Context}; +use std::collections::btree_map::Entry; +use std::collections::BTreeSet; +use std::sync::Arc; +use tracing::{debug, warn}; +use yama_pile::definitions::{Index, IndexId}; +use yama_wormfile::boxed::BoxedWormFileProvider; + +pub const MERGE_THRESHOLD_SIZE: u32 = 2 * 1024 * 1024; +pub const MERGE_TARGET_SIZE: u32 = 16 * 1024 * 1024; + +/// Selects indices for merge. +/// +/// Criteria: +/// - size is less than the `threshold_size` +/// - (FUTURE; TODO) two indices that cover the same bloblog should be merged +pub async fn select_indices_for_merge( + pwc: &Arc>, + target_size: u32, + threshold_size: u32, +) -> eyre::Result>> { + let mut result = Vec::new(); + let mut mergeable_indices: BTreeSet<(u64, IndexId)> = pwc + .pile + .list_indices_with_meta() + .await? + .into_iter() + .filter(|(_, meta)| meta.file_size < threshold_size as u64) + .map(|(index_id, meta)| (meta.file_size, index_id)) + .collect(); + + while mergeable_indices.len() >= 2 { + let mut merge_set = BTreeSet::new(); + let mut merge_size = 0u64; + + let (first_size, first_index) = mergeable_indices.pop_first().unwrap(); + merge_size += first_size; + merge_set.insert(first_index); + + while let Some((size, index)) = mergeable_indices.first() { + if merge_size + *size < target_size as u64 { + merge_size += *size; + merge_set.insert(*index); + mergeable_indices.pop_first(); + } else { + break; + } + } + + if merge_set.len() > 1 { + result.push(merge_set); + } + } + + Ok(result) +} + +/// Merges some indices, deleting them in the process. +/// Requires exclusive lock. +/// (Note: in the future we could only supersede the indices, which only needs a shared lock. +/// However you need an exclusive lock to eventually delete superseded indices...). +pub async fn merge_indices( + pwc: &Arc>, + merge_sets: Vec>, +) -> eyre::Result<()> { + for merge_set in merge_sets { + let mut final_index = Index { + supersedes: merge_set.clone(), + bloblogs: Default::default(), + }; + + for index_id in &merge_set { + let index_being_subsumed = pwc.pile.read_index(*index_id).await?; + // TODO: do we need to worry about the 'supersedes' property on the index here? + // I think not, or at least not if the superseded indices don't exist, + // but worth thinking about in the future if we don't immediately delete + // superseded indices... + for (bloblog_id, bloblog_entry) in index_being_subsumed.bloblogs { + match final_index.bloblogs.entry(bloblog_id) { + Entry::Vacant(ve) => { + ve.insert(bloblog_entry); + } + Entry::Occupied(mut oe) => { + let new_entry = oe.get_mut(); + let (existing_chunks, new_chunks): (Vec<_>, Vec<_>) = bloblog_entry + .chunks + .into_iter() + .partition(|(chunk_id, _)| new_entry.chunks.contains_key(chunk_id)); + for (chunk_id, locator) in new_chunks { + // Subtract from the forgotten byte count, since this may be us re-remembering bytes out of safety... + new_entry.forgotten_bytes = + new_entry.forgotten_bytes.saturating_sub(locator.length); + let is_new = new_entry.chunks.insert(chunk_id, locator).is_none(); + assert!(is_new); + } + for (chunk_id, locator) in existing_chunks { + if &new_entry.chunks[&chunk_id] != &locator { + bail!("Attempted to merge indices that disagree about {bloblog_id:?}/{chunk_id:?}"); + } + } + } + } + } + } + + let merged_index_id = pwc + .pile + .create_index(&final_index) + .await + .context("failed to create merged index")?; + + if merge_set.contains(&merged_index_id) { + // I don't see how this could be possible, but let's avoid deleting the new index if it somehow is a merge of itself... + warn!("strange: created index ID is one of its own merges..."); + continue; + } + + debug!("merged indices {merge_set:?} into {merged_index_id:?}; deleting mergees"); + for index_to_delete in merge_set { + pwc.pile + .delete_index_dangerous_exclusive(index_to_delete) + .await?; + } + } + Ok(()) +} diff --git a/yama/src/vacuum/repack_bloblogs_and_indices.rs b/yama/src/vacuum/repack_bloblogs_and_indices.rs new file mode 100644 index 0000000..454dc6f --- /dev/null +++ b/yama/src/vacuum/repack_bloblogs_and_indices.rs @@ -0,0 +1,191 @@ +use crate::pile_with_cache::PileWithCache; +use crate::storing::assemble_and_write_indices; +use eyre::ContextCompat; +use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, BTreeSet}; +use std::sync::Arc; +use yama_localcache::BloblogStats; +use yama_midlevel_crypto::chunk_id::ChunkId; +use yama_pile::definitions::{BloblogId, IndexBloblogEntry}; +use yama_wormfile::boxed::BoxedWormFileProvider; + +/// Repack bloblogs that have this much forgotten space in them. +pub const REPACK_BLOBLOGS_TO_RECLAIM_SPACE_BYTES: u64 = 32 * 1024 * 1024; + +/// Defines what a 'small bloblog' is (one that is below a certain size, excluding forgotten bytes). +pub const SMALL_BLOBLOG_THRESHOLD: u64 = 64 * 1024 * 1024; + +/// Clump together small bloblogs when together they would hit or exceed this size. +pub const REPACK_BLOBLOGS_TO_CLUMP_TOGETHER_SMALL_BLOBLOGS_BYTES: u64 = 2 * 1024 * 1024 * 1024; + +/// The target size to reach when repacking, in terms of blob bytes. +pub const REPACK_TARGET_SIZE: u64 = 4 * 1024 * 1024; + +/// The limit size to use when repacking, in terms of blob bytes. +pub const REPACK_TARGET_LIMIT: u64 = 5 * 1024 * 1024; + +/// Gets bloblogs' stats. Only considers bloblogs referenced by exactly one index, so we don't +/// have to deal with unifying indices. +pub async fn get_bloblogs_stats( + pwc: &Arc>, +) -> eyre::Result> { + let mut cache_conn = pwc.localcache.read().await?; + let indices = cache_conn.list_indices().await?; + let mut bloblogs: BTreeMap> = BTreeMap::new(); + + for index in indices { + for (bloblog, stats) in cache_conn.index_bloblog_stats(index).await? { + match bloblogs.entry(bloblog) { + Entry::Vacant(ve) => { + ve.insert(Some(stats)); + } + Entry::Occupied(mut oe) => { + // only allow one stats per bloblog, then replace with None. + oe.insert(None); + } + } + } + } + + Ok(bloblogs + .into_iter() + .flat_map(|(k, v)| v.map(|v| (k, v))) + .collect()) +} + +/// Choose some bloblogs to repack. Assumes an updated local cache. +/// +/// Only bloblogs referenced by exactly one index will be considered for repacking. +pub async fn select_bloblogs_for_repack( + stats: BTreeMap, +) -> eyre::Result>> { + let mut repack_for_space: BTreeSet = stats + .iter() + .filter(|(_, v)| v.forgotten_bytes >= REPACK_BLOBLOGS_TO_RECLAIM_SPACE_BYTES) + .map(|(&k, _)| k) + .collect(); + let maybe_repack_for_clumping: BTreeSet = stats + .iter() + .filter(|(_, v)| v.blob_size <= SMALL_BLOBLOG_THRESHOLD) + .map(|(&k, _)| k) + .collect(); + + let should_repack_for_clumping = maybe_repack_for_clumping.len() > 1 + && maybe_repack_for_clumping + .iter() + .map(|bi| stats[bi].blob_size) + .sum::() + > REPACK_BLOBLOGS_TO_CLUMP_TOGETHER_SMALL_BLOBLOGS_BYTES; + + let to_repack = repack_for_space.clone(); + if should_repack_for_clumping { + repack_for_space.extend(maybe_repack_for_clumping); + } + + let mut to_repack: BTreeSet<(u64, BloblogId)> = to_repack + .into_iter() + .map(|bi| (stats[&bi].blob_size, bi)) + .collect(); + + let mut repack_sets = Vec::new(); + + while !to_repack.is_empty() { + let mut new_repack_group = BTreeMap::new(); + let mut new_repack_group_size = 0u64; + + let (first_sz, first_to_repack) = to_repack.pop_last().unwrap(); + new_repack_group_size += first_sz; + new_repack_group.insert(first_to_repack, stats[&first_to_repack].clone()); + + while new_repack_group_size < REPACK_TARGET_SIZE { + let Some((first_size, _)) = to_repack.first() else { break; }; + if new_repack_group_size + *first_size > REPACK_TARGET_LIMIT { + break; + } + let (extra_size, extra_bloblog_id) = to_repack.pop_first().unwrap(); + new_repack_group_size += extra_size; + new_repack_group.insert(extra_bloblog_id, stats[&extra_bloblog_id].clone()); + } + + // now check the repack group is good + if new_repack_group + .keys() + .any(|bi| repack_for_space.contains(bi)) + || new_repack_group_size > REPACK_BLOBLOGS_TO_CLUMP_TOGETHER_SMALL_BLOBLOGS_BYTES + { + repack_sets.push(new_repack_group); + } + } + + Ok(repack_sets) +} + +pub async fn perform_repack( + pwc: Arc>, + repack_sets: Vec>, +) -> eyre::Result<()> { + // 1. Write new bloblogs + let mut indices_buffer = BTreeMap::new(); + let mut index_parts: BTreeMap = BTreeMap::new(); + for repack_set in &repack_sets { + let mut new_bloblog = pwc.pile.create_bloblog().await?; + + for (old_bloblog_id, old_bloblog_stats) in repack_set { + let index_id = old_bloblog_stats.in_index; + if !indices_buffer.contains_key(&index_id) { + indices_buffer.insert(index_id, pwc.pile.read_index(index_id).await?); + } + let index_bloblog_entry = indices_buffer + .get_mut(&index_id) + .unwrap() + .bloblogs + .remove(&old_bloblog_id) + .context("bug: no IBE despite rewrite from context of this index")?; + let mut old_bloblog = pwc.pile.read_bloblog(*old_bloblog_id).await?; + let locators: BTreeMap = index_bloblog_entry + .chunks + .into_iter() + .map(|(blob, locator)| (locator.offset, blob)) + .collect(); + for chunk_id in locators.into_values() { + let chunk = old_bloblog + .read_chunk(chunk_id) + .await? + .context("bug or corrupt bloblog: promised chunk missing")?; + new_bloblog.write_chunk(chunk_id, &chunk).await?; + } + } + + let (_wormpath, new_bloblog_id, new_bloblog_index_info) = new_bloblog.finish().await?; + index_parts.insert( + new_bloblog_id, + IndexBloblogEntry { + chunks: new_bloblog_index_info, + forgotten_bytes: 0, + }, + ); + } + + // 2. Write new indices, but make sure to also write out index entries for unaffected bloblogs + // that appear in the indices we want to replace shortly. + for (_, index) in indices_buffer.iter_mut() { + index_parts.extend(std::mem::take(&mut index.bloblogs)); + } + assemble_and_write_indices(&pwc, index_parts).await?; + + // 3. Delete old indices + for index_id in indices_buffer.into_keys() { + pwc.pile.delete_index_dangerous_exclusive(index_id).await?; + } + + // 4. Delete old bloblogs + for repack_group in repack_sets { + for bloblog_id in repack_group.into_keys() { + pwc.pile + .delete_bloblog_dangerous_exclusive(bloblog_id) + .await?; + } + } + + Ok(()) +} diff --git a/yama_localcache/migrations/20230413133342_local_index_cache.sql b/yama_localcache/migrations/20230413133342_local_index_cache.sql index 1fb4975..aa24e90 100644 --- a/yama_localcache/migrations/20230413133342_local_index_cache.sql +++ b/yama_localcache/migrations/20230413133342_local_index_cache.sql @@ -6,23 +6,32 @@ CREATE TABLE indices ( ); CREATE UNIQUE INDEX indices_index_sha256 ON indices(index_sha256); -CREATE TABLE blobs ( - chunk_id TEXT NOT NULL, - bloblog_short_id INTEGER NOT NULL REFERENCES bloblogs(bloblog_short_id), - index_short_id INTEGER NOT NULL REFERENCES indices(index_short_id), - offset INTEGER NOT NULL, - size INTEGER NOT NULL, - PRIMARY KEY (chunk_id, bloblog_short_id, index_short_id) -); -CREATE INDEX blobs_bloblog_short_id ON blobs(bloblog_short_id); -CREATE INDEX blobs_index_short_id ON blobs(index_short_id); - CREATE TABLE bloblogs ( bloblog_short_id INTEGER PRIMARY KEY NOT NULL, bloblog_sha256 TEXT NOT NULL ); CREATE UNIQUE INDEX bloblogs_bloblog_sha256 ON bloblogs(bloblog_sha256); +-- Track the relationship between indices and bloblogs +CREATE TABLE indices_bloblogs ( + index_short_id INTEGER NOT NULL REFERENCES indices(index_short_id), + bloblog_short_id INTEGER NOT NULL REFERENCES bloblogs(bloblog_short_id), + forgotten_bytes INTEGER NOT NULL, + PRIMARY KEY (index_short_id, bloblog_short_id) +); + +CREATE TABLE blobs ( + chunk_id TEXT NOT NULL, + bloblog_short_id INTEGER NOT NULL, + index_short_id INTEGER NOT NULL, + offset INTEGER NOT NULL, + size INTEGER NOT NULL, + PRIMARY KEY (chunk_id, bloblog_short_id, index_short_id), + FOREIGN KEY (index_short_id, bloblog_short_id) REFERENCES indices_bloblogs(index_short_id, bloblog_short_id) +); +CREATE INDEX blobs_bloblog_short_id ON blobs(bloblog_short_id); +CREATE INDEX blobs_index_short_id ON blobs(index_short_id); + CREATE TABLE indices_supersede ( superseded_sha256 TEXT NOT NULL, successor_sha256 TEXT NOT NULL REFERENCES indices(index_sha256), diff --git a/yama_localcache/src/lib.rs b/yama_localcache/src/lib.rs index 50c8c14..4584adb 100644 --- a/yama_localcache/src/lib.rs +++ b/yama_localcache/src/lib.rs @@ -150,6 +150,13 @@ impl StoreConnection { Some(row) => row.bloblog_short_id, }; + let forgotten_bytes = index_bloblog_entry.forgotten_bytes as i64; + query!(" + INSERT INTO indices_bloblogs (index_short_id, bloblog_short_id, forgotten_bytes) + VALUES (?, ?, ?) + ", index_short_id, bloblog_short_id, forgotten_bytes) + .execute(&mut *txn) + .await?; for (chunk_id, chunk_locator) in index_bloblog_entry.chunks.iter() { let chunk_id_txt = chunk_id.to_string(); @@ -201,6 +208,15 @@ impl StoreConnection { .execute(&mut *txn) .await?; + query!( + " + DELETE FROM indices_bloblogs WHERE index_short_id = ? + ", + index_short_id + ) + .execute(&mut *txn) + .await?; + query!( " DELETE FROM indices WHERE index_short_id = ? @@ -255,6 +271,8 @@ impl StoreConnection { } } + /// Returns all chunk locations. + /// If a chunk does not exist, it is just not returned in the output map. pub async fn locate_chunks( &mut self, chunk_ids: &BTreeSet, @@ -332,4 +350,59 @@ impl StoreConnection { .is_none(); Ok(is_new) } + + pub async fn list_chunks_in_index( + &mut self, + index_id: IndexId, + ) -> eyre::Result> { + let index_id_text = index_id.to_string(); + let row_results = query!( + " + SELECT chunk_id AS \"chunk_id!\" FROM indices i + LEFT JOIN blobs b USING (index_short_id) + WHERE index_sha256 = ? + ", + index_id_text + ) + .map(|row| { + ChunkId::from_str(&row.chunk_id).context("failed to decode ChunkId in local cache") + }) + .fetch_all(&mut *self.conn) + .await?; + row_results.into_iter().collect() + } + + pub async fn index_bloblog_stats( + &mut self, + index_id: IndexId, + ) -> eyre::Result> { + let index_id_text = index_id.to_string(); + let row_results = query!(" + SELECT bloblog_sha256 AS bloblog_id, ib.forgotten_bytes AS forgotten_bytes, COUNT(size) AS \"num_chunks!: i64\", SUM(size) AS \"num_bytes!: i64\" FROM indices i + LEFT JOIN indices_bloblogs ib USING (index_short_id) + LEFT JOIN bloblogs b USING (bloblog_short_id) + LEFT JOIN blobs USING (index_short_id, bloblog_short_id) + WHERE index_sha256 = ? + GROUP BY bloblog_sha256 + ", index_id_text) + .map(|row| { + Ok((BloblogId::try_from(row.bloblog_id.as_ref())?, BloblogStats { + in_index: index_id, + blob_size: row.num_bytes as u64, + forgotten_bytes: row.forgotten_bytes as u64, + num_chunks: row.num_chunks as u32, + })) + }) + .fetch_all(&mut *self.conn) + .await?; + row_results.into_iter().collect() + } +} + +#[derive(Clone, Debug)] +pub struct BloblogStats { + pub in_index: IndexId, + pub blob_size: u64, + pub forgotten_bytes: u64, + pub num_chunks: u32, } diff --git a/yama_pile/src/definitions.rs b/yama_pile/src/definitions.rs index 2db35c7..993f233 100644 --- a/yama_pile/src/definitions.rs +++ b/yama_pile/src/definitions.rs @@ -26,7 +26,7 @@ pub struct BloblogFooter { pub type PackedBloblogFooter = AsymBox>>; /// Locator for a blob within a bloblog. -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Eq, PartialEq)] pub struct BlobLocator { pub offset: u64, pub length: u64, @@ -104,7 +104,7 @@ pub struct IndexBloblogEntry { pub type PackedIndex = AsymBox>>; -#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct RecursiveChunkRef { /// The root Chunk ID. pub chunk_id: ChunkId, diff --git a/yama_pile/src/keyring.rs b/yama_pile/src/keyring.rs index 0427840..abe53de 100644 --- a/yama_pile/src/keyring.rs +++ b/yama_pile/src/keyring.rs @@ -25,29 +25,58 @@ pub struct Keyring { pub fn generate_r_w_keys() -> (ReaderKey, WriterKey) { let (encrypt, decrypt) = generate_asym_keypair(); let (sign, verify) = asym_signing_keypair(); - (ReaderKey { decrypt, verify }, WriterKey { encrypt, sign }) + ( + ReaderKey::new(decrypt, verify), + WriterKey::new(encrypt, sign), + ) } #[derive(Clone, Serialize, Deserialize)] pub struct WriterKey { + // boxed because these take up a lot of stack space otherwise! + #[serde(flatten)] + inner: Box, +} + +#[derive(Clone, Serialize, Deserialize)] +struct WriterKeyInner { encrypt: EncryptingKey, sign: SigningKey, } impl WriterKey { + pub fn new(encrypt: EncryptingKey, sign: SigningKey) -> Self { + Self { + inner: Box::new(WriterKeyInner { encrypt, sign }), + } + } + pub fn make_locked_asymbox(&self, contents: T) -> AsymBox { - AsymBox::new(contents, &self.sign, &self.encrypt).unwrap() + AsymBox::new(contents, &self.inner.sign, &self.inner.encrypt).unwrap() } } #[derive(Clone, Serialize, Deserialize)] pub struct ReaderKey { + // boxed because these take up a lot of stack space otherwise! + #[serde(flatten)] + inner: Box, +} + +#[derive(Clone, Serialize, Deserialize)] +pub struct ReaderKeyInner { decrypt: DecryptingKey, verify: VerifyingKey, } impl ReaderKey { + pub fn new(decrypt: DecryptingKey, verify: VerifyingKey) -> Self { + Self { + inner: Box::new(ReaderKeyInner { decrypt, verify }), + } + } + pub fn unlock_asymbox(&self, asymbox: AsymBox) -> Option { - asymbox.unlock(&self.decrypt, &self.verify) + asymbox.unlock(&self.inner.decrypt, &self.inner.verify) } } diff --git a/yama_pile/src/lib.rs b/yama_pile/src/lib.rs index e3b3939..254cf45 100644 --- a/yama_pile/src/lib.rs +++ b/yama_pile/src/lib.rs @@ -8,13 +8,13 @@ use crate::locks::{LockHandle, LockKind}; use crate::pointers::{PackedPointer, Pointer}; use crate::utils::HashedWormWriter; use eyre::{bail, Context, ContextCompat}; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; use yama_midlevel_crypto::zstd_box::Zstd; use yama_wormfile::paths::{WormPath, WormPathBuf}; -use yama_wormfile::{WormFileProvider, WormFileWriter}; +use yama_wormfile::{WormFileMeta, WormFileProvider, WormFileWriter}; pub mod definitions; @@ -109,10 +109,35 @@ impl Pile { Ok(BloblogReader::new(worm_reader, &self.keyring).await?) } + /// Delete a bloblog from the pile. + /// This is dangerous: should only really be done in the vacuum operation. + /// Requires an exclusive lock! + /// + /// + pub async fn delete_bloblog_dangerous_exclusive( + &self, + bloblog_id: BloblogId, + ) -> eyre::Result<()> { + if !self.lock.is_active_now(LockKind::Exclusive) { + bail!("can't delete bloblog: exclusive lock not active"); + } + let bloblog_path = WormPathBuf::new(format!( + "bloblogs/{}/{}", + hex::encode(&bloblog_id.0 .0[0..1]), + bloblog_id.0.to_string() + )) + .unwrap(); + self.provider.delete(bloblog_path.as_ref()).await?; + Ok(()) + } + /// Create a new index, returning the index ID. /// /// Requires key: w_bloblog_footer pub async fn create_index(&self, index: &Index) -> eyre::Result { + if !self.lock.is_active_now(LockKind::Shared) { + bail!("can't create index: lock not active"); + } let worm_writer = self.provider.write().await?; let mut writer = HashedWormWriter::new(worm_writer); let packed_index: PackedIndex = self @@ -133,6 +158,9 @@ impl Pile { /// List all indices present in the pile. pub async fn list_indices(&self) -> eyre::Result> { + if !self.lock.is_active_now(LockKind::Shared) { + bail!("can't list indices: lock not active"); + } let files = self .provider .list(WormPath::new("indices").unwrap()) @@ -152,10 +180,37 @@ impl Pile { Ok(result) } + /// List all indices present in the pile, with their metadata. + pub async fn list_indices_with_meta(&self) -> eyre::Result> { + if !self.lock.is_active_now(LockKind::Shared) { + bail!("can't list indices: lock not active"); + } + let files = self + .provider + .list_meta(WormPath::new("indices").unwrap()) + .await + .context("failed to list indices")?; + let mut result = BTreeMap::new(); + for (file, meta) in files { + let (_, filename) = file + .as_ref() + .as_str() + .rsplit_once('/') + .context("index listing entry should split at /")?; + let index_id = IndexId::try_from(filename) + .with_context(|| format!("not a valid index ID: {filename:?}"))?; + result.insert(index_id, meta); + } + Ok(result) + } + /// Read an index from the pile. /// /// Requires key: r_bloblog_footer pub async fn read_index(&self, index_id: IndexId) -> eyre::Result { + if !self.lock.is_active_now(LockKind::Shared) { + bail!("can't read index: lock not active"); + } let r_bloblog_footer = self .keyring .r_bloblog_footer @@ -174,6 +229,20 @@ impl Pile { Ok(index) } + /// Delete an index from the pile. + /// This is dangerous: should only really be done in the vacuum operation. + /// Requires an exclusive lock! + /// + /// + pub async fn delete_index_dangerous_exclusive(&self, index_id: IndexId) -> eyre::Result<()> { + if !self.lock.is_active_now(LockKind::Exclusive) { + bail!("can't delete index: exclusive lock not active"); + } + let target = WormPathBuf::new(format!("indices/{}", index_id.0)).unwrap(); + self.provider.delete(target.as_ref()).await?; + Ok(()) + } + pub async fn read_pointer(&self, name: &str) -> eyre::Result> { let r_pointer = self .keyring @@ -234,14 +303,33 @@ impl Pile { Ok(()) } - pub async fn close(mut self) -> eyre::Result<()> { + pub async fn list_pointers(&self) -> eyre::Result> { + let files = self + .provider + .list(WormPath::new("pointers").unwrap()) + .await?; + Ok(files + .into_iter() + .map(|file| { + let (_dir, pointer) = file.as_ref().as_str().rsplit_once('/').unwrap(); + pointer.to_owned() + }) + .collect()) + } + + pub async fn close(self) -> eyre::Result<()> { match Arc::try_unwrap(self.lock) { Ok(lock) => { - lock.close().await + lock.close() + .await .context("failed to release lock gracefully")?; } Err(arc) => { - bail!("could not close pile gracefully; lock arc has {} strong refs and {} weak refs", Arc::strong_count(&arc), Arc::weak_count(&arc)); + bail!( + "could not close pile gracefully; lock arc has {} strong refs and {} weak refs", + Arc::strong_count(&arc), + Arc::weak_count(&arc) + ); } } Ok(()) diff --git a/yama_pile/src/locks.rs b/yama_pile/src/locks.rs index 0d54f82..2ce48ec 100644 --- a/yama_pile/src/locks.rs +++ b/yama_pile/src/locks.rs @@ -1,6 +1,6 @@ use crate::keyring::{Keyring, ReaderKey, WriterKey}; use chrono::{DateTime, Duration, Utc}; -use eyre::{bail, Context, ContextCompat, eyre}; +use eyre::{bail, eyre, Context, ContextCompat}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; @@ -74,19 +74,23 @@ pub struct LockHandle { impl Drop for LockHandle { fn drop(&mut self) { - if let Some(lock_release_tx) = self.lock_release_tx - .take() { - lock_release_tx - .send(()) - .expect("can't drop lock"); + if let Some(lock_release_tx) = self.lock_release_tx.take() { + lock_release_tx.send(()).expect("can't drop lock"); } } } impl LockHandle { pub async fn close(mut self) -> eyre::Result<()> { - self.lock_release_tx.take().unwrap().send(()).map_err(|_| eyre!("can't drop lock"))?; - self.lock_task_join_handle.take().unwrap().await + self.lock_release_tx + .take() + .unwrap() + .send(()) + .map_err(|_| eyre!("can't drop lock"))?; + self.lock_task_join_handle + .take() + .unwrap() + .await .context("lock task fail")?; Ok(()) } @@ -141,7 +145,7 @@ impl LockHandle { let stage1_locks = scan_locks(provider.as_ref(), &r_locks, now).await?; if let Some(blocker) = find_lock_blocker(&stage1_locks, &lock_id, kind) { let lock = &stage1_locks[blocker]; - warn!("{:?} lock {} held by {} currently expiring at {} is blocking our potential lock.", lock.kind, lock_id, lock.holder, lock.expires_at); + warn!("{:?} lock {} held by {:?} currently expiring at {} is blocking our potential lock.", lock.kind, lock_id, lock.holder, lock.expires_at); tokio::time::sleep(tokio::time::Duration::from_secs( (lock.expires_at - now).num_seconds().max(0) as u64 + 10, @@ -161,7 +165,7 @@ impl LockHandle { let stage2_locks = scan_locks(provider.as_ref(), &r_locks, now).await?; if let Some(blocker) = find_lock_blocker(&stage2_locks, &lock_id, kind) { let lock = &stage2_locks[blocker]; - warn!("{:?} lock {} held by {} currently expiring at {} blocked our lock; backing out.", lock.kind, lock_id, lock.holder, lock.expires_at); + warn!("{:?} lock {} held by {:?} currently expiring at {} blocked our lock; backing out.", lock.kind, lock_id, lock.holder, lock.expires_at); // Back out our lock. provider.delete(lock_path.as_ref()).await?; @@ -197,7 +201,7 @@ impl LockHandle { lock_path, lock_id, lock_release_tx: Some(lock_release_tx), - lock_task_join_handle + lock_task_join_handle, }); } } diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 757f216..5dd1267 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -41,7 +41,9 @@ pub struct RootTreeNode { } #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +#[serde(tag = "t")] pub enum TreeNode { + #[serde(rename = "F")] NormalFile { /// modification time in ms mtime: u64, @@ -53,6 +55,7 @@ pub enum TreeNode { #[serde(flatten)] content: RecursiveChunkRef, }, + #[serde(rename = "D")] Directory { #[serde(flatten)] ownership: FilesystemOwnership, @@ -60,12 +63,14 @@ pub enum TreeNode { permissions: FilesystemPermissions, children: BTreeMap, }, + #[serde(rename = "L")] SymbolicLink { #[serde(flatten)] ownership: FilesystemOwnership, target: String, }, // TODO is there any other kind of file we need to store? + #[serde(rename = "X")] Deleted, } diff --git a/yama_pile/src/utils.rs b/yama_pile/src/utils.rs index 1a7e50e..dec283b 100644 --- a/yama_pile/src/utils.rs +++ b/yama_pile/src/utils.rs @@ -131,6 +131,7 @@ impl AsyncWrite for SymStreamWriter { ) -> Poll> { let mut enc_buf = buf.to_vec(); // Safety: Deny use of unencrypted `buf` from here on. + #[allow(unused)] let buf = (); let offset = self.offset; self.sym_stream_key.apply_xor(offset, &mut enc_buf); diff --git a/yama_wormfile/src/boxed.rs b/yama_wormfile/src/boxed.rs index 3459b8b..d5d9a45 100644 --- a/yama_wormfile/src/boxed.rs +++ b/yama_wormfile/src/boxed.rs @@ -1,5 +1,5 @@ use crate::paths::{WormPath, WormPathBuf}; -use crate::{WormFileProvider, WormFileReader, WormFileWriter}; +use crate::{WormFileMeta, WormFileProvider, WormFileReader, WormFileWriter}; use async_trait::async_trait; use std::error::Error; use std::fmt::{Debug, Display, Formatter}; @@ -32,6 +32,7 @@ trait BoxableWormFileProvider: Debug + Send + Sync { async fn is_dir_b(&self, path: &WormPath) -> eyre::Result; async fn is_regular_file_b(&self, path: &WormPath) -> eyre::Result; async fn list_b(&self, path: &WormPath) -> eyre::Result>; + async fn list_meta_b(&self, path: &WormPath) -> eyre::Result>; async fn read_b(&self, path: &WormPath) -> eyre::Result>>; async fn write_b(&self) -> eyre::Result>>; async fn delete_b(&self, path: &WormPath) -> eyre::Result<()>; @@ -51,6 +52,10 @@ impl BoxableWormFileProvider for T { self.list(path).await } + async fn list_meta_b(&self, path: &WormPath) -> eyre::Result> { + self.list_meta(path).await + } + async fn read_b(&self, path: &WormPath) -> eyre::Result>> { self.read(path) .await @@ -101,6 +106,14 @@ impl WormFileProvider for BoxedWormFileProvider { self.inner.list_b(path).await } + async fn list_meta( + &self, + path: impl AsRef + Send, + ) -> eyre::Result> { + let path = path.as_ref(); + self.inner.list_meta_b(path).await + } + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let path = path.as_ref(); self.inner.read_b(path).await diff --git a/yama_wormfile/src/lib.rs b/yama_wormfile/src/lib.rs index 762ca52..860d9eb 100644 --- a/yama_wormfile/src/lib.rs +++ b/yama_wormfile/src/lib.rs @@ -32,6 +32,15 @@ pub trait WormFileProvider: Debug + Send + Sync { /// TODO a streaming version of this might be beneficial. async fn list(&self, path: impl AsRef + Send) -> eyre::Result>; + /// Lists all the files and directories in the specified path, with metadata. + /// + /// If the path does not exist, gives an error. + /// TODO a streaming version of this might be beneficial. + async fn list_meta( + &self, + path: impl AsRef + Send, + ) -> eyre::Result>; + /// Reads a file. /// /// Fails if the file does not exist or is not a regular file. @@ -50,6 +59,11 @@ pub trait WormFileProvider: Debug + Send + Sync { async fn delete(&self, path: impl AsRef + Send) -> eyre::Result<()>; } +#[derive(Clone, Debug)] +pub struct WormFileMeta { + pub file_size: u64, +} + pub trait WormFileReader: AsyncRead + AsyncSeek + Debug + Send + Sync + Unpin + 'static {} #[async_trait] diff --git a/yama_wormfile_fs/src/lib.rs b/yama_wormfile_fs/src/lib.rs index f0d6464..8bc1d49 100644 --- a/yama_wormfile_fs/src/lib.rs +++ b/yama_wormfile_fs/src/lib.rs @@ -3,13 +3,14 @@ use eyre::Context as EyreContext; use std::fmt::{Debug, Formatter}; use std::io; use std::io::{ErrorKind, SeekFrom}; +use std::os::unix::fs::MetadataExt; use std::path::PathBuf; use std::pin::Pin; use std::task::{Context, Poll}; use tokio::fs::{File, OpenOptions}; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; use yama_wormfile::paths::{WormPath, WormPathBuf}; -use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; +use yama_wormfile::{WormFileMeta, WormFileProvider, WormFileReader, WormFileWriter}; /// WormFileProvider that uses the local filesystem, in a given root directory. #[derive(Debug)] @@ -70,6 +71,34 @@ impl WormFileProvider for LocalWormFilesystem { Ok(out) } + async fn list_meta( + &self, + path: impl AsRef + Send, + ) -> eyre::Result> { + let worm_path = path.as_ref(); + let real_path = self.resolve_real_path(worm_path); + let mut dir_reader = match tokio::fs::read_dir(real_path).await { + Ok(ok) => ok, + Err(e) if e.kind() == ErrorKind::NotFound => { + return Ok(Vec::new()); + } + Err(other) => return Err(other.into()), + }; + let mut out = Vec::new(); + while let Some(next_ent) = dir_reader.next_entry().await? { + if let Some(name_str) = next_ent.file_name().to_str() { + let metadata = next_ent.metadata().await?; + out.push(( + worm_path.join(name_str).unwrap(), + WormFileMeta { + file_size: metadata.size(), + }, + )); + } + } + Ok(out) + } + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let worm_path = path.as_ref(); let real_path = self.resolve_real_path(worm_path); diff --git a/yama_wormfile_s3/src/lib.rs b/yama_wormfile_s3/src/lib.rs index 34340e9..9934a25 100644 --- a/yama_wormfile_s3/src/lib.rs +++ b/yama_wormfile_s3/src/lib.rs @@ -12,7 +12,7 @@ use tokio::io::{duplex, AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, DuplexS use tokio::task::JoinHandle; use uuid::Uuid; use yama_wormfile::paths::{WormPath, WormPathBuf}; -use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; +use yama_wormfile::{WormFileMeta, WormFileProvider, WormFileReader, WormFileWriter}; /// WormFileProvider that uses an S3 bucket, with a given path prefix. #[derive(Debug)] @@ -68,6 +68,18 @@ impl WormFileProvider for S3WormFilesystem { } async fn list(&self, path: impl AsRef + Send) -> eyre::Result> { + Ok(self + .list_meta(path) + .await? + .into_iter() + .map(|(name, _meta)| name) + .collect()) + } + + async fn list_meta( + &self, + path: impl AsRef + Send, + ) -> eyre::Result> { let path = path.as_ref(); let full_path = self.resolve_real_path(path); let list = self @@ -84,6 +96,14 @@ impl WormFileProvider for S3WormFilesystem { .strip_prefix(&self.path_prefix) .map(|s| WormPathBuf::new(s.to_owned())) .flatten() + .map(|x| { + ( + x, + WormFileMeta { + file_size: obj.size, + }, + ) + }) }) .collect()) } diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index b931e9d..8dd930e 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -19,7 +19,7 @@ use std::task::{Context, Poll}; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; use tokio::runtime::Handle; use yama_wormfile::paths::{WormPath, WormPathBuf}; -use yama_wormfile::{WormFileProvider, WormFileReader, WormFileWriter}; +use yama_wormfile::{WormFileMeta, WormFileProvider, WormFileReader, WormFileWriter}; /// WormFileProvider that uses an SFTP connection, in a given root directory. #[derive(Debug)] @@ -227,6 +227,46 @@ impl WormFileProvider for SftpWormFilesystem { .collect()) } + async fn list_meta( + &self, + path: impl AsRef + Send, + ) -> eyre::Result> { + let worm_path = path.as_ref(); + let path = worm_path.as_str(); + let mut fs = self.get_fs(); + + let mut remote_dir = match fs.open_dir(path).await { + Ok(ok) => ok, + Err(openssh_sftp_client::Error::SftpError(SftpErrorKind::NoSuchFile, _msg)) => { + return Ok(Vec::new()); + } + Err(other) => { + return Err(other.into()); + } + }; + let dir_reader = remote_dir.read_dir().await?; + + Ok(dir_reader + .iter() + .filter_map(|entry| { + if let Some(name_str) = entry.filename().as_os_str().to_str() { + if name_str.is_empty() || name_str == "." || name_str == ".." { + None + } else { + Some(( + worm_path.join(name_str).expect("pre-checked"), + WormFileMeta { + file_size: entry.metadata().len().expect("no size on SFTP file?"), + }, + )) + } + } else { + None + } + }) + .collect()) + } + async fn read(&self, path: impl AsRef + Send) -> eyre::Result { let real_path = self.root_dir.join(path.as_ref().as_str()); From 70663ad01650ba7ebddb0f16d148d79841d5c77d Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sat, 20 May 2023 13:21:58 +0100 Subject: [PATCH 10/51] Fix progress bar in datman --- datman/src/bin/datman.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 3f3e027..2d7ecf0 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -153,7 +153,8 @@ pub async fn main() -> eyre::Result<()> { let indicatif_layer = IndicatifLayer::new(); let stderr_writer = indicatif_layer.get_stderr_writer(); let indicatif_layer = indicatif_layer.with_filter(filter_fn(|span_metadata| { - span_metadata.target().starts_with("yama") && PROGRESS_SPANS.contains(&span_metadata.name()) + (span_metadata.target().starts_with("yama") || span_metadata.target().starts_with("datman")) + && PROGRESS_SPANS.contains(&span_metadata.name()) })); tracing_subscriber::registry() From 3a2ece31b657cb447117a45a523c9d3994d3dd83 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sat, 20 May 2023 13:22:06 +0100 Subject: [PATCH 11/51] Fix query on empty local cache --- yama_localcache/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yama_localcache/src/lib.rs b/yama_localcache/src/lib.rs index 4584adb..3f372df 100644 --- a/yama_localcache/src/lib.rs +++ b/yama_localcache/src/lib.rs @@ -359,7 +359,7 @@ impl StoreConnection { let row_results = query!( " SELECT chunk_id AS \"chunk_id!\" FROM indices i - LEFT JOIN blobs b USING (index_short_id) + JOIN blobs b USING (index_short_id) WHERE index_sha256 = ? ", index_id_text From a47924dc803ace00aff9e5973e23687b0d96ee54 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Sat, 20 May 2023 14:16:13 +0100 Subject: [PATCH 12/51] Fix flake and add 7-prefix so we can use it alongside the stable version --- flake.nix | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/flake.nix b/flake.nix index c7b9927..08c8695 100644 --- a/flake.nix +++ b/flake.nix @@ -38,12 +38,26 @@ # paths = [fenixRustToolchain.rustc fenixRustToolchain.cargo fenixRustToolchain.clippy fenixRustToolchain.rustfmt fenixRustToolchain.rustPlatform.rustcSrc]; # }; - naersk-lib = naersk.lib."${system}"; + #naersk-lib = naersk.lib."${system}"; + naersk-lib = pkgs.callPackage naersk { + cargo = fenixRustToolchain; + rustc = fenixRustToolchain; + }; rustComponents = naersk-lib.buildPackage { pname = "yama"; root = ./.; + overrideMain = attrs: { + # Set up the dev database, needed for compile-time query checking. + preConfigure = '' + export PATH="${pkgs.sqlx-cli}/bin:$PATH" + pushd yama_localcache + bash dev_db.sh + popd + ''; + }; + buildInputs = with pkgs; [ openssl pkgconfig @@ -71,7 +85,7 @@ installPhase = '' # set -eu mkdir $out $out/bin - ln -s ${rustComponents}/bin/{yama,datman} $out/bin + ln -s ${rustComponents}/bin/{yama,datman,yamascan} $out/bin ln -s ${mysqlHelper}/bin/datman-helper-mysql-{backup,restore} $out/bin ln -s ${postgresHelper}/bin/datman-helper-postgres-{backup,restore} $out/bin ln -s ${pkgs.lz4}/bin/lz4 $out/bin/ @@ -82,8 +96,10 @@ postInstall = '' # set -eu - for fn in $out/bin/{datman,yama,datman-helper-{mysql,postgres}-{backup,restore}}; do + for fnbase in {datman,yama,yamascan,datman-helper-{mysql,postgres}-{backup,restore}}; do + fn="$out/bin/$fnbase" wrapProgram $fn --suffix PATH : $out/bin + mv "$out/bin/$fnbase" "$out/bin/7$fnbase" done ''; }; From 470420665f599b831909a67b8b2adbb561087a2c Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Mon, 22 May 2023 20:44:24 +0100 Subject: [PATCH 13/51] Tweaks that tracked down SFTP infinite buffer problem --- .gitignore | 3 +- Cargo.lock | 24 +++-- datman/src/bin/datman.rs | 5 +- yama/src/storing.rs | 4 +- yama_wormfile_sftp/Cargo.toml | 6 +- yama_wormfile_sftp/src/lib.rs | 166 +++++++++++++++++----------------- 6 files changed, 110 insertions(+), 98 deletions(-) diff --git a/.gitignore b/.gitignore index 0761fab..328550d 100644 --- a/.gitignore +++ b/.gitignore @@ -22,4 +22,5 @@ __pycache__ yama7demo yamaSFTPdemo -yama_localcache/testdb.sqlite \ No newline at end of file +yama_localcache/testdb.sqlite +sftp7demo \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 8ab3762..fa720c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1826,15 +1826,17 @@ dependencies = [ [[package]] name = "openssh-sftp-client" -version = "0.12.2" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa8e5f26e549bd266d9bcd9e5b4fd344729985ef1a7f5ac3e51f3f96a4a620" +checksum = "866d0eab409a2fcb6b8c3838fdbf10d7399d486548c19179a80f1c1142e93348" dependencies = [ "bytes", "derive_destructure2", + "futures-core", "once_cell", "openssh-sftp-client-lowlevel", "openssh-sftp-error", + "pin-project", "scopeguard", "tokio", "tokio-io-utility", @@ -1843,9 +1845,9 @@ dependencies = [ [[package]] name = "openssh-sftp-client-lowlevel" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "406bf41d8372365497d5645e802a8dfe22008b8183edbe6c79e4b75614431daa" +checksum = "f4975d0a824e82d4f61e3edf870254ce97bd7f8154751d2afdd97c7f43e57dff" dependencies = [ "awaitable", "bytes", @@ -1860,9 +1862,9 @@ dependencies = [ [[package]] name = "openssh-sftp-error" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d836b428ead150165d1178ed0aa672791c13b3ae9616ea1e34d13730a2cb486" +checksum = "f4c3356e914b8006417188efd534105d5bcb230b4a9fd67782a6b4a4e15fa006" dependencies = [ "awaitable-error", "openssh-sftp-protocol-error", @@ -3124,9 +3126,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" dependencies = [ "futures-core", "pin-project-lite", @@ -3135,9 +3137,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ "bytes", "futures-core", @@ -4020,6 +4022,8 @@ dependencies = [ "rand", "thiserror", "tokio", + "tokio-stream", + "tracing", "yama_wormfile", ] diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 2d7ecf0..70e59c6 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -159,8 +159,9 @@ pub async fn main() -> eyre::Result<()> { tracing_subscriber::registry() .with( - tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| "sqlx=warn,yama=debug,datman=debug,info".into()), + tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { + "sqlx=warn,yama=debug,datman=debug,yama_wormfile_sftp=debug,info".into() + }), ) .with(tracing_subscriber::fmt::layer().with_writer(stderr_writer)) .with(indicatif_layer) diff --git a/yama/src/storing.rs b/yama/src/storing.rs index cd8c65c..4d2a8b5 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -323,9 +323,9 @@ async fn storage_pipeline_worker( .await? } - debug!("SPW shutdown"); - + debug!("finishing bloblogs"); bloblog_writers.finish_bloblogs(&mut storing_state).await?; + debug!("finished bloblogs!"); Ok(StoringIntermediate::from(storing_state)) } diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index 6348a0c..70e4812 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -10,9 +10,11 @@ yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } ouroboros = "0.15.6" openssh = "0.9.9" -openssh-sftp-client = "0.12.2" +openssh-sftp-client = "0.13.5" async-trait = "0.1.68" tokio = { version = "1.27.0", features = ["io-std"] } +tokio-stream = "0.1.14" rand = "0.8.5" thiserror = "1.0.40" -eyre = "0.6.8" \ No newline at end of file +eyre = "0.6.8" +tracing = "0.1.37" diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index 8dd930e..740ceee 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -4,8 +4,8 @@ use async_trait::async_trait; use eyre::{bail, Context as EyreContext, ContextCompat}; use openssh::{KnownHosts, RemoteChild, Session, Stdio}; use openssh_sftp_client::error::SftpErrorKind; -use openssh_sftp_client::file::TokioCompatFile; -use openssh_sftp_client::fs::Fs; +use openssh_sftp_client::file::{File, TokioCompatFile}; +use openssh_sftp_client::fs::{DirEntry, Fs}; use openssh_sftp_client::Error::SftpError; use openssh_sftp_client::Sftp; use ouroboros::self_referencing; @@ -15,9 +15,11 @@ use std::io::{ErrorKind, SeekFrom}; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; -use std::task::{Context, Poll}; +use std::task::{ready, Context, Poll}; +use std::time::Duration; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; -use tokio::runtime::Handle; +use tokio_stream::StreamExt; +use tracing::debug; use yama_wormfile::paths::{WormPath, WormPathBuf}; use yama_wormfile::{WormFileMeta, WormFileProvider, WormFileReader, WormFileWriter}; @@ -50,14 +52,6 @@ struct SftpConn { // fs: Fs<'this>, } -#[self_referencing] -struct FileWithSftpConn { - conn: Arc, - #[borrows(conn)] - #[covariant] - file: Option>, -} - impl SftpConn { pub async fn create(ssh_connect: &str, root_dir: impl Into) -> eyre::Result { let root_dir = root_dir.into(); @@ -98,13 +92,27 @@ impl SftpConn { Ok(res) } - pub fn get_fs(&self) -> Fs<'_> { + pub fn get_fs(&self) -> Fs { let mut fs = self.borrow_sftp().fs(); fs.set_cwd(&self.borrow_root_dir()); fs } async fn create_dir_all(&self, worm_path_as_pathbuf: PathBuf) -> eyre::Result<()> { + // Try twice to try and shake out race conditions if another worker is doing this exactly + // now... + if self + .create_dir_all_impl_once(worm_path_as_pathbuf.clone()) + .await + .is_ok() + { + return Ok(()); + } + tokio::time::sleep(Duration::from_secs(3)).await; + self.create_dir_all_impl_once(worm_path_as_pathbuf).await + } + + async fn create_dir_all_impl_once(&self, worm_path_as_pathbuf: PathBuf) -> eyre::Result<()> { let mut fs = self.get_fs(); let mut stack = vec![]; @@ -163,7 +171,7 @@ impl SftpWormFilesystem { Ok(SftpWormFilesystem { conn, root_dir }) } - fn get_fs(&self) -> Fs<'_> { + fn get_fs(&self) -> Fs { let mut fs = self.conn.borrow_sftp().fs(); fs.set_cwd(&self.root_dir); fs @@ -200,7 +208,7 @@ impl WormFileProvider for SftpWormFilesystem { let path = worm_path.as_str(); let mut fs = self.get_fs(); - let mut remote_dir = match fs.open_dir(path).await { + let remote_dir = match fs.open_dir(path).await { Ok(ok) => ok, Err(openssh_sftp_client::Error::SftpError(SftpErrorKind::NoSuchFile, _msg)) => { return Ok(Vec::new()); @@ -209,7 +217,10 @@ impl WormFileProvider for SftpWormFilesystem { return Err(other.into()); } }; - let dir_reader = remote_dir.read_dir().await?; + let dir_reader: Vec = remote_dir + .read_dir() + .collect::, _>>() + .await?; Ok(dir_reader .iter() @@ -235,7 +246,7 @@ impl WormFileProvider for SftpWormFilesystem { let path = worm_path.as_str(); let mut fs = self.get_fs(); - let mut remote_dir = match fs.open_dir(path).await { + let remote_dir = match fs.open_dir(path).await { Ok(ok) => ok, Err(openssh_sftp_client::Error::SftpError(SftpErrorKind::NoSuchFile, _msg)) => { return Ok(Vec::new()); @@ -244,7 +255,10 @@ impl WormFileProvider for SftpWormFilesystem { return Err(other.into()); } }; - let dir_reader = remote_dir.read_dir().await?; + let dir_reader: Vec = remote_dir + .read_dir() + .collect::, _>>() + .await?; Ok(dir_reader .iter() @@ -271,36 +285,18 @@ impl WormFileProvider for SftpWormFilesystem { let real_path = self.root_dir.join(path.as_ref().as_str()); let real_path2 = real_path.clone(); - // the `Send` in the below line is very important... - let mut file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { - conn: self.conn.clone(), - file_builder: |conn| { - Box::pin(async move { - let file = conn.borrow_sftp().open(real_path).await?; - Ok::<_, eyre::Report>(Some(TokioCompatFile::new(file))) - }) - }, - } - .try_build() - .await?; - // yucky hacks... but we need to get to the file to get the length out, so we can seek from the end... - let file_length = tokio::task::block_in_place(|| { - file_with_conn.with_file_mut(|file| { - Handle::current().block_on(async move { - file.as_mut() - .unwrap() - .metadata() - .await? - .len() - .context("no len in SFTP file metadata!") - }) - }) - })?; + let mut file: File = self.conn.borrow_sftp().open(real_path).await?; + let file_length = file + .metadata() + .await? + .len() + .context("no len in SFTP file metadata!")?; + let file = Some(Box::pin(TokioCompatFile::new(file))); Ok(SftpWormReader { path: real_path2, - file_with_conn, + file, length: file_length, }) } @@ -333,21 +329,15 @@ impl WormFileProvider for SftpWormFilesystem { continue; } - let file_with_conn: FileWithSftpConn = FileWithSftpConnAsyncSendTryBuilder { - conn: self.conn.clone(), - file_builder: |conn| { - Box::pin(async move { - let file = conn.borrow_sftp().create(real_path).await?; - Ok::<_, eyre::Report>(Some(TokioCompatFile::new(file))) - }) - }, - } - .try_build() - .await?; + let file = Some(Box::pin(TokioCompatFile::new( + self.conn.borrow_sftp().create(real_path).await?, + ))); break Ok(SftpWormWriter { temp_path: WormPathBuf::new(try_path).unwrap(), - file_with_conn: Some(file_with_conn), + file, + conn: self.conn.clone(), + unflushed_written_bytes: 0, }); } } @@ -363,7 +353,7 @@ impl WormFileProvider for SftpWormFilesystem { pub struct SftpWormReader { path: PathBuf, - file_with_conn: FileWithSftpConn, + file: Option>>, length: u64, } @@ -379,8 +369,7 @@ impl AsyncRead for SftpWormReader { cx: &mut Context<'_>, buf: &mut ReadBuf<'_>, ) -> Poll> { - self.file_with_conn - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_read(cx, buf)) + Pin::new(self.file.as_mut().unwrap()).poll_read(cx, buf) } } @@ -392,13 +381,11 @@ impl AsyncSeek for SftpWormReader { io::Error::new(ErrorKind::InvalidInput, "SeekFrom::End out of bounds") })?); } - self.file_with_conn - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).start_seek(position)) + Pin::new(self.file.as_mut().unwrap()).start_seek(position) } fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.file_with_conn - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_complete(cx)) + Pin::new(self.file.as_mut().unwrap()).poll_complete(cx) } } @@ -406,7 +393,9 @@ impl WormFileReader for SftpWormReader {} pub struct SftpWormWriter { temp_path: WormPathBuf, - file_with_conn: Option, + file: Option>>, + conn: Arc, + unflushed_written_bytes: u64, } impl Debug for SftpWormWriter { @@ -415,54 +404,66 @@ impl Debug for SftpWormWriter { } } +/// Maximum number of unflushed bytes to have written, before we force-schedule a flush. +const MAX_UNFLUSHED_WRITTEN_BYTES: u64 = 32 * 1024 * 1024; + impl AsyncWrite for SftpWormWriter { fn poll_write( mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8], ) -> Poll> { - self.file_with_conn - .as_mut() - .unwrap() - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_write(cx, buf)) + // Workaround for openssh-sftp-client bug that means that writes are infinitely buffered: + // only allow a certain number of bytes to be written before flushing. + if self.unflushed_written_bytes > MAX_UNFLUSHED_WRITTEN_BYTES { + // Flush and only continue if a flush was a no-op/immediately performed. + //ready!(self.as_mut().poll_flush(cx))?; + } + let r = Pin::new(self.file.as_mut().unwrap()).poll_write(cx, buf); + if r.is_ready() { + self.unflushed_written_bytes += buf.len() as u64; + } + r } fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.file_with_conn - .as_mut() - .unwrap() - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_flush(cx)) + let r = Pin::new(self.file.as_mut().unwrap()).poll_flush(cx); + if r.is_ready() { + self.unflushed_written_bytes = 0; + } + r } fn poll_shutdown( mut self: Pin<&mut Self>, cx: &mut Context<'_>, ) -> Poll> { - self.file_with_conn - .as_mut() - .unwrap() - .with_file_mut(|file| Pin::new(file.as_mut().unwrap()).poll_shutdown(cx)) + Pin::new(self.file.as_mut().unwrap()).poll_shutdown(cx) } } #[async_trait] impl WormFileWriter for SftpWormWriter { async fn finalise(&mut self, target_path: &WormPath, replace: bool) -> io::Result<()> { + debug!("finalising SFTP file to {target_path:?}"); self.flush().await?; + debug!("flushed SFTP file to {target_path:?}"); let SftpWormWriter { temp_path, - file_with_conn, + file, + conn, .. } = self; - let mut file_with_conn = file_with_conn.take().unwrap(); - let file = file_with_conn.with_file_mut(|file| file.take().unwrap()); - file.close() + let file = file.take().unwrap(); + let file: TokioCompatFile = TokioCompatFile::clone(&file); + // This looks wrong but might be OK. Hack mostly needed due to the very awkward Pinning. + file.into_inner() + .close() .await .map_err(|e| io::Error::new(ErrorKind::Other, e))?; - let conn: Arc = file_with_conn.into_heads().conn; let mut fs = conn.get_fs(); // Directories will be created as needed. @@ -471,6 +472,7 @@ impl WormFileWriter for SftpWormWriter { .await .map_err(|e| io::Error::new(ErrorKind::Other, e.to_string()))?; } + debug!("created dirs SFTP file to {target_path:?}"); // Avoid allowing a replacement if not intended. // But this is currently not atomic, so it's just a sanity check rather than a foolproof // safeguard! @@ -491,10 +493,12 @@ impl WormFileWriter for SftpWormWriter { } } + debug!("moving SFTP file to {target_path:?}"); // Perform the move. fs.rename(&temp_path.as_ref().as_str(), target_path.as_str()) .await .map_err(|e| io::Error::new(ErrorKind::Other, e))?; + debug!("moved SFTP file to {target_path:?}"); Ok(()) } } From 5ffbf4fc1c0ca91d69081e89220228875e3a2a27 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 26 May 2023 21:00:22 +0100 Subject: [PATCH 14/51] Add some debug lines for Bad PMap issue --- datman/src/backup.rs | 9 ++++++++- yama_pile/src/tree.rs | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index d980858..9c32975 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -2,7 +2,7 @@ use crate::descriptor_config::{SourceDescriptor, SourceDescriptorInner, VirtualS use crate::pointer_names::{get_pointer_name_at, POINTER_NAME_DATETIME_SPLITTER}; use chrono::{DateTime, Utc}; use dashmap::DashSet; -use eyre::{bail, eyre, Context, ContextCompat}; +use eyre::{bail, eyre, Context, ContextCompat, ensure}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; use std::borrow::Cow; @@ -309,6 +309,9 @@ async fn backup_dir_sources( StoragePipeline::launch_new(4, pwc.clone(), new_unflushed_chunks).await?; let dir_sources2 = &dir_sources; + let mut submitted = 0; + let mut completed = 0; + let submitted_mut = &mut submitted; let (submitter_task, receiver_task) = tokio::join!( async move { let pipeline_job_tx = pipeline_job_tx; @@ -325,6 +328,7 @@ async fn backup_dir_sources( )) .await .map_err(|_| eyre!("unable to send to pipeline."))?; + *submitted_mut += 1; } } } @@ -337,6 +341,7 @@ async fn backup_dir_sources( pipeline.next_result().await { chunk_file_maps[dir_source_idx].insert_str(&job_id, (rec_chunk_ref, real_size)); + completed += 1; Span::current().pb_inc(1); } // eprintln!("fin rec"); @@ -347,6 +352,8 @@ async fn backup_dir_sources( submitter_task?; receiver_task?; + ensure!(completed == submitted, "completed: {completed:?} != submitted {submitted:?}"); + assert_eq!(dir_sources.len(), chunk_file_maps.len()); let chunkmaps = pipeline.finish_into_chunkmaps().await?; diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 5dd1267..f860494 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -411,7 +411,7 @@ pub fn assemble_tree_from_scan_entries( } => { let (content, size) = chunkings .remove(&key_string) - .context("bad chunkings PMap: missing entry")?; + .with_context(|| format!("bad chunkings PMap: missing entry: {key_string:?}"))?; // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. // That's fine. We'll patch this up later. From 1bd46b934d6b068b80e9a498b46011d0e38eedee Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 26 May 2023 22:58:37 +0100 Subject: [PATCH 15/51] Avoid use of PatriciaMap.insert_str to avoid causing bug --- datman/src/backup.rs | 2 +- yama/src/bin/yama.rs | 2 +- yama/src/scan.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index 9c32975..8db181c 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -340,7 +340,7 @@ async fn backup_dir_sources( while let Ok(((dir_source_idx, job_id), rec_chunk_ref, real_size)) = pipeline.next_result().await { - chunk_file_maps[dir_source_idx].insert_str(&job_id, (rec_chunk_ref, real_size)); + chunk_file_maps[dir_source_idx].insert(&job_id, (rec_chunk_ref, real_size)); completed += 1; Span::current().pb_inc(1); } diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index a6d6b02..fcdbdb2 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -487,7 +487,7 @@ async fn main() -> eyre::Result<()> { async { while let Ok((job_id, rec_chunk_ref, real_size)) = pipeline.next_result().await { - chunk_file_map.insert_str(&job_id, (rec_chunk_ref, real_size)); + chunk_file_map.insert(&job_id, (rec_chunk_ref, real_size)); Span::current().pb_inc(1); } // eprintln!("fin rec"); diff --git a/yama/src/scan.rs b/yama/src/scan.rs index dba486f..10dfe1c 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -157,7 +157,7 @@ pub fn scan(root: &Path, ignores: &Vec) -> eyre::Result Date: Fri, 26 May 2023 22:58:41 +0100 Subject: [PATCH 16/51] Cleanups --- datman/src/backup.rs | 7 +++++-- yama_pile/src/tree.rs | 6 +++--- yama_wormfile_sftp/src/lib.rs | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index 8db181c..eb3a6a1 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -2,7 +2,7 @@ use crate::descriptor_config::{SourceDescriptor, SourceDescriptorInner, VirtualS use crate::pointer_names::{get_pointer_name_at, POINTER_NAME_DATETIME_SPLITTER}; use chrono::{DateTime, Utc}; use dashmap::DashSet; -use eyre::{bail, eyre, Context, ContextCompat, ensure}; +use eyre::{bail, ensure, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; use std::borrow::Cow; @@ -352,7 +352,10 @@ async fn backup_dir_sources( submitter_task?; receiver_task?; - ensure!(completed == submitted, "completed: {completed:?} != submitted {submitted:?}"); + ensure!( + completed == submitted, + "completed: {completed:?} != submitted {submitted:?}" + ); assert_eq!(dir_sources.len(), chunk_file_maps.len()); diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index f860494..3d0e7fa 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -409,9 +409,9 @@ pub fn assemble_tree_from_scan_entries( permissions, size: _unverified_size_ignore, } => { - let (content, size) = chunkings - .remove(&key_string) - .with_context(|| format!("bad chunkings PMap: missing entry: {key_string:?}"))?; + let (content, size) = chunkings.remove(&key_string).with_context(|| { + format!("bad chunkings PMap: missing entry: {key_string:?}") + })?; // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. // That's fine. We'll patch this up later. diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index 740ceee..550e240 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -15,7 +15,7 @@ use std::io::{ErrorKind, SeekFrom}; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; -use std::task::{ready, Context, Poll}; +use std::task::{Context, Poll}; use std::time::Duration; use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, AsyncWriteExt, ReadBuf}; use tokio_stream::StreamExt; From 32e514bd2e7e6a7356b28974bd433e2f0c299d55 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 26 May 2023 23:30:14 +0100 Subject: [PATCH 17/51] Introduce Option<> on file store entries --- datman/src/backup.rs | 9 ++++----- yama/src/bin/yama.rs | 7 +++---- yama/src/scan.rs | 4 ++-- yama/src/storing.rs | 23 +++++++++++------------ yama_pile/src/tree.rs | 9 +++++---- 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index eb3a6a1..0a2c492 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -254,7 +254,7 @@ async fn scan_dir_sources( struct BackupDirSourcesReturn { pub chunkmaps: BTreeMap, - pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap<(RecursiveChunkRef, u64)>)>, + pub dir_source_returns: Vec<(DirSourcePrep, PatriciaMap>)>, } async fn backup_dir_sources( @@ -276,7 +276,7 @@ async fn backup_dir_sources( (cfm, Cow::Owned(pruned)) } else { ( - PatriciaMap::<(RecursiveChunkRef, u64)>::new(), + PatriciaMap::>::new(), Cow::Borrowed(&dir_source.scan_entry_map), ) }; @@ -337,10 +337,9 @@ async fn backup_dir_sources( Ok::<_, eyre::Report>(()) }, async { - while let Ok(((dir_source_idx, job_id), rec_chunk_ref, real_size)) = - pipeline.next_result().await + while let Ok(((dir_source_idx, job_id), file_store_opt)) = pipeline.next_result().await { - chunk_file_maps[dir_source_idx].insert(&job_id, (rec_chunk_ref, real_size)); + chunk_file_maps[dir_source_idx].insert(&job_id, file_store_opt); completed += 1; Span::current().pb_inc(1); } diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index fcdbdb2..0fc1fa5 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -437,7 +437,7 @@ async fn main() -> eyre::Result<()> { (cfm, Cow::Owned(pruned)) } else { ( - PatriciaMap::<(RecursiveChunkRef, u64)>::new(), + PatriciaMap::>::new(), Cow::Borrowed(&scan_entry_map), ) }; @@ -485,9 +485,8 @@ async fn main() -> eyre::Result<()> { Ok::<_, eyre::Report>(()) }, async { - while let Ok((job_id, rec_chunk_ref, real_size)) = pipeline.next_result().await - { - chunk_file_map.insert(&job_id, (rec_chunk_ref, real_size)); + while let Ok((job_id, file_store_opt)) = pipeline.next_result().await { + chunk_file_map.insert(&job_id, file_store_opt); Span::current().pb_inc(1); } // eprintln!("fin rec"); diff --git a/yama/src/scan.rs b/yama/src/scan.rs index 10dfe1c..b08f622 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -224,7 +224,7 @@ pub fn prepopulate_unmodified( parent_tree: &TreeNode, scan_entry_map: &PatriciaMap, ) -> ( - PatriciaMap<(RecursiveChunkRef, u64)>, + PatriciaMap>, PatriciaMap, ) { let mut reusable_chunkings = PatriciaMap::new(); @@ -253,7 +253,7 @@ pub fn prepopulate_unmodified( && prev_permissions == permissions { // Nothing seems to have changed about this file, let's just reuse the `content` from last time. - reusable_chunkings.insert(path, (*prev_content, *size)); + reusable_chunkings.insert(path, Some((*prev_content, *size))); pruned_scan_entry_map.remove(path); } } diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 4d2a8b5..9c06d2f 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -279,22 +279,22 @@ async fn store_file( file_path: &Path, storing_state: &mut StoringState, sbw: &mut StoringBloblogWriters, -) -> eyre::Result<(RecursiveChunkRef, u64)> { +) -> eyre::Result> { let file = File::open(file_path).await?.into_std().await; let mapped = unsafe { memmap2::Mmap::map(&file) }?; let size_of_file = mapped.as_ref().len(); let chunkref = storing_state.store_full_slice(mapped.as_ref(), sbw)?; - Ok((chunkref, size_of_file as u64)) + Ok(Some((chunkref, size_of_file as u64))) } pub struct StoragePipeline { - result_rx: Receiver<(JobName, RecursiveChunkRef, u64)>, + result_rx: Receiver<(JobName, Option<(RecursiveChunkRef, u64)>)>, join_set: JoinSet>, } async fn storage_pipeline_worker( job_rx: Receiver<(JobName, PathBuf)>, - result_tx: Sender<(JobName, RecursiveChunkRef, u64)>, + result_tx: Sender<(JobName, Option<(RecursiveChunkRef, u64)>)>, mut storing_state: StoringState, ) -> eyre::Result { let mut bloblog_writers = StoringBloblogWriters::default(); @@ -306,14 +306,11 @@ async fn storage_pipeline_worker( async { // debug!("SPW job {job_id:?}"); - let (rec_chunk_ref, file_length) = - store_file(&file_path, &mut storing_state, &mut bloblog_writers) - .await - .with_context(|| format!("failed to store {file_path:?}"))?; - // debug!("SPW good {job_id:?}"); - if let Err(SendError(to_be_sent)) = result_tx - .send_async((job_id, rec_chunk_ref, file_length)) + let file_store_opt = store_file(&file_path, &mut storing_state, &mut bloblog_writers) .await + .with_context(|| format!("failed to store {file_path:?}"))?; + // debug!("SPW good {job_id:?}"); + if let Err(SendError(to_be_sent)) = result_tx.send_async((job_id, file_store_opt)).await { bail!("Can't return result for {to_be_sent:?} — result_tx shut down."); } @@ -375,7 +372,9 @@ impl StoragePipeline { } #[inline] - pub async fn next_result(&self) -> Result<(JobName, RecursiveChunkRef, u64), RecvError> { + pub async fn next_result( + &self, + ) -> Result<(JobName, Option<(RecursiveChunkRef, u64)>), RecvError> { self.result_rx.recv_async().await } diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 3d0e7fa..0641d03 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -392,7 +392,7 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) { pub fn assemble_tree_from_scan_entries( scan: PatriciaMap, - mut chunkings: PatriciaMap<(RecursiveChunkRef, u64)>, + mut chunkings: PatriciaMap>, ) -> eyre::Result { let mut dirs: BTreeMap> = BTreeMap::new(); // special-case the root ("") @@ -409,9 +409,10 @@ pub fn assemble_tree_from_scan_entries( permissions, size: _unverified_size_ignore, } => { - let (content, size) = chunkings.remove(&key_string).with_context(|| { - format!("bad chunkings PMap: missing entry: {key_string:?}") - })?; + let (content, size) = chunkings + .remove(&key_string) + .with_context(|| format!("bad chunkings PMap: missing entry: {key_string:?}"))? + .unwrap(); // TODO // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. // That's fine. We'll patch this up later. From 53886aad461104e8ea480fdc245fc0a7bc391b45 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 26 May 2023 23:43:23 +0100 Subject: [PATCH 18/51] Only produce warnings if files vanish during store --- yama/src/storing.rs | 16 ++++++++++++++-- yama_pile/src/tree.rs | 34 +++++++++++++++++----------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 9c06d2f..06bc841 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -6,6 +6,7 @@ use flume::{Receiver, RecvError, SendError, Sender}; use std::cmp::Reverse; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Debug; +use std::io; use std::io::Read; use std::path::{Path, PathBuf}; use std::pin::Pin; @@ -14,7 +15,7 @@ use tokio::fs::File; use tokio::runtime::Handle; use tokio::task; use tokio::task::JoinSet; -use tracing::{debug, error, info_span, Instrument}; +use tracing::{debug, error, info_span, warn, Instrument}; use yama_localcache::StoreConnection; use yama_midlevel_crypto::chunk_id::{ChunkId, ChunkIdKey}; use yama_pile::bloblogs::BloblogWriter; @@ -275,12 +276,23 @@ impl StoringState { } } +/// Stores a file, returning Ok(Some(...)) if fine, Ok(None) if the file doesn't exist (vanished) +/// or Err(...) for any other error. async fn store_file( file_path: &Path, storing_state: &mut StoringState, sbw: &mut StoringBloblogWriters, ) -> eyre::Result> { - let file = File::open(file_path).await?.into_std().await; + let file = match File::open(file_path).await { + Ok(file) => file.into_std().await, + Err(err) if err.kind() == io::ErrorKind::NotFound => { + warn!("file vanished: {file_path:?}"); + return Ok(None); + } + Err(other) => { + bail!("error storing {file_path:?}: {other:?}"); + } + }; let mapped = unsafe { memmap2::Mmap::map(&file) }?; let size_of_file = mapped.as_ref().len(); let chunkref = storing_state.store_full_slice(mapped.as_ref(), sbw)?; diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 0641d03..945a52d 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -409,25 +409,25 @@ pub fn assemble_tree_from_scan_entries( permissions, size: _unverified_size_ignore, } => { - let (content, size) = chunkings + if let Some((content, size)) = chunkings .remove(&key_string) .with_context(|| format!("bad chunkings PMap: missing entry: {key_string:?}"))? - .unwrap(); // TODO - - // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. - // That's fine. We'll patch this up later. - dirs.get_mut(parent_dir_name) - .context("bad PMap: parent not seen first")? - .insert( - child_name.to_owned(), - TreeNode::NormalFile { - mtime, - ownership, - permissions, - size, - content, - }, - ); + { + // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. + // That's fine. We'll patch this up later. + dirs.get_mut(parent_dir_name) + .context("bad PMap: parent not seen first")? + .insert( + child_name.to_owned(), + TreeNode::NormalFile { + mtime, + ownership, + permissions, + size, + content, + }, + ); + } } ScanEntry::Directory { ownership, From 1ac9bb6d8dc696ba46e5e7294eec92894bbb3f58 Mon Sep 17 00:00:00 2001 From: Olivier Date: Tue, 8 Aug 2023 21:17:27 +0100 Subject: [PATCH 19/51] Add `yama keyring create` command --- yama/src/bin/yama.rs | 77 +++++++++- yama/src/init.rs | 22 +-- yama/src/open.rs | 22 +-- yama_midlevel_crypto/src/asym_keyx.rs | 4 +- yama_midlevel_crypto/src/asym_signed.rs | 12 ++ yama_midlevel_crypto/src/keys_kyber.rs | 4 +- yama_midlevel_crypto/src/keys_x25519.rs | 8 +- yama_pile/Cargo.toml | 2 +- yama_pile/src/keyring.rs | 191 +++++++++++++++++++++++- 9 files changed, 297 insertions(+), 45 deletions(-) diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index 0fc1fa5..4391214 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -20,6 +20,7 @@ use eyre::{bail, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; use std::borrow::Cow; +use std::collections::BTreeSet; use std::iter::Iterator; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -35,8 +36,10 @@ use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::Layer; use users::{get_current_gid, get_current_uid}; use yama::extract::flatten_treenode; -use yama::init::{generate_master_keyring, pack_keyring}; -use yama::open::{open_keyring_interactive, open_pile, pre_open_keyring, update_cache}; +use yama::init::pack_keyring; +use yama::open::{ + open_keyring_interactive, open_pile, pre_open_keyring, pre_open_keyring_at_path, update_cache, +}; use yama::pile_connector::PileConnectionScheme; use yama::scan::create_uidgid_lookup_tables; use yama::storing::{ @@ -53,6 +56,7 @@ use yama_midlevel_crypto::chunk_id::ChunkIdKey; use yama_pile::definitions::{ IndexBloblogEntry, PackedPileConfig, PileConfig, RecursiveChunkRef, SUPPORTED_YAMA_PILE_VERSION, }; +use yama_pile::keyring::{Keyring, INDIVIDUAL_KEYS, KEY_GROUPS}; use yama_pile::locks::LockKind; use yama_pile::pointers::Pointer; use yama_pile::tree::unpopulated::ScanEntry; @@ -331,7 +335,7 @@ async fn main() -> eyre::Result<()> { Some(line.trim().to_owned()) }; - let master_keyring = generate_master_keyring(); + let master_keyring = Keyring::generate_new_master(); let master_key_packed = pack_keyring( master_keyring.clone(), master_password.as_ref().map(|s| s.as_ref()), @@ -909,7 +913,72 @@ async fn main() -> eyre::Result<()> { .close() .await?; } - _other => todo!(), + YamaCommand::Keyring(KeyringCommand::Inspect { file }) => { + let keyring = pre_open_keyring_at_path(&file).await?; + let _keyring = open_keyring_interactive(keyring).await?; + + todo!(); + } + YamaCommand::Keyring(KeyringCommand::Create { + new, + from, + with, + no_password, + weak, + }) => { + let mut keys_to_copy = BTreeSet::new(); + + if weak { + unimplemented!("can't generate weak keys yet: unimplemented."); + } + + if new.exists() { + bail!("{new:?} already exists; won't overwrite!"); + } + + for key in with.split(",") { + if let Some((_, key_group)) = KEY_GROUPS.iter().find(|(k, _)| *k == key) { + keys_to_copy.extend(key_group.into_iter()); + } else if INDIVIDUAL_KEYS.contains(&key) { + keys_to_copy.insert(key); + } else { + bail!("Not a known key or key group: {key:?}"); + } + } + + let keyring = match &from { + Some(path) => pre_open_keyring_at_path(path).await?, + None => pre_open_keyring(Path::new(".")).await?, + }; + let keyring = open_keyring_interactive(keyring).await?; + + let new_keyring = keyring + .partial_copy(&keys_to_copy) + .context("failed to make partial copy of keyring")?; + + let new_keyring_password = if no_password { + warn!("Not setting a new keyring password. The new keyring will be unprotected."); + None + } else { + println!("enter new keyring password:"); + let stdin = stdin(); + let mut stdin_br = BufReader::new(stdin); + let mut line = String::new(); + stdin_br.read_line(&mut line).await?; + Some(line.trim().to_owned()) + }; + + let new_keyring_packed = pack_keyring( + new_keyring, + new_keyring_password.as_ref().map(|s| s.as_ref()), + )?; + + tokio::fs::write(&new, &new_keyring_packed.into_byte_vec()) + .await + .context("couldn't write to new keyring file")?; + + info!("new keyring created!"); + } } Ok(()) diff --git a/yama/src/init.rs b/yama/src/init.rs index 533c54c..b49ce54 100644 --- a/yama/src/init.rs +++ b/yama/src/init.rs @@ -5,7 +5,7 @@ use yama_midlevel_crypto::byte_layer::{ByteLayer, CborSerde}; use yama_midlevel_crypto::key_derivation::KeyDerivationParameters; use yama_midlevel_crypto::sym_box::SymBox; use yama_pile::definitions::{PackedKeyring, PackedPileConfig, UnlockedOrLockedKeyring}; -use yama_pile::keyring::{generate_r_w_keys, Keyring}; +use yama_pile::keyring::Keyring; use yama_pile::{DIR_BLOBLOGS, DIR_INDICES, DIR_LOCKS, FILE_MASTER_KEYRING, FILE_YAMA_CONFIG}; use yama_wormfile::paths::WormPath; use yama_wormfile::{WormFileProvider, WormFileWriter}; @@ -71,26 +71,6 @@ pub async fn init_pile( Ok(()) } -pub fn generate_master_keyring() -> Keyring { - let (r_config, w_config) = generate_r_w_keys(); - let (r_bloblog_footer, w_bloblog_footer) = generate_r_w_keys(); - let (r_bloblog_contents, w_bloblog_contents) = generate_r_w_keys(); - let (r_locks, w_locks) = generate_r_w_keys(); - let (r_pointer, w_pointer) = generate_r_w_keys(); - Keyring { - r_config: Some(r_config), - w_config: Some(w_config), - r_bloblog_footer: Some(r_bloblog_footer), - w_bloblog_footer: Some(w_bloblog_footer), - r_bloblog_contents: Some(r_bloblog_contents), - w_bloblog_contents: Some(w_bloblog_contents), - r_locks: Some(r_locks), - w_locks: Some(w_locks), - r_pointer: Some(r_pointer), - w_pointer: Some(w_pointer), - } -} - // todo move this pub fn pack_keyring(unpacked: Keyring, password: Option<&str>) -> eyre::Result { let packed = if let Some(password) = password { diff --git a/yama/src/open.rs b/yama/src/open.rs index ca7d86f..3af5503 100644 --- a/yama/src/open.rs +++ b/yama/src/open.rs @@ -22,15 +22,7 @@ pub async fn pre_open_keyring(connector_in_dir: &Path) -> eyre::Result eyre::Result eyre::Result { + let packed_keyring_bytes = tokio::fs::read(&keyring_path) + .await + .with_context(|| format!("failed to read keyring file at {:?}", keyring_path))?; + let packed_keyring = PackedKeyring::from_byte_vec(packed_keyring_bytes) + .deserialise() + .with_context(|| format!("failed to deserialise keyring file at {:?}", keyring_path))?; + return Ok(packed_keyring); +} + pub async fn open_keyring_interactive(input: UnlockedOrLockedKeyring) -> eyre::Result { match input { UnlockedOrLockedKeyring::Locked { deriver, lockbox } => { diff --git a/yama_midlevel_crypto/src/asym_keyx.rs b/yama_midlevel_crypto/src/asym_keyx.rs index 9fdbbbc..7b8c073 100644 --- a/yama_midlevel_crypto/src/asym_keyx.rs +++ b/yama_midlevel_crypto/src/asym_keyx.rs @@ -13,13 +13,13 @@ pub struct AsymKeyExchange<'bytes> { inner: Cow<'bytes, [u8]>, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct EncryptingKey { x25519: X25519PublicKey, kyber: KyberPublicKey, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct DecryptingKey { x25519: X25519PrivateKey, kyber: KyberPrivateKey, diff --git a/yama_midlevel_crypto/src/asym_signed.rs b/yama_midlevel_crypto/src/asym_signed.rs index 9b6c7f8..18d0e5b 100644 --- a/yama_midlevel_crypto/src/asym_signed.rs +++ b/yama_midlevel_crypto/src/asym_signed.rs @@ -19,11 +19,23 @@ pub struct SigningKey { ed25519: Ed25519PrivateKey, } +impl PartialEq for SigningKey { + fn eq(&self, other: &Self) -> bool { + self.ed25519.to_keypair_bytes() == other.ed25519.to_keypair_bytes() + } +} + #[derive(Clone)] pub struct VerifyingKey { ed25519: Ed25519PublicKey, } +impl PartialEq for VerifyingKey { + fn eq(&self, other: &Self) -> bool { + self.ed25519.as_bytes() == other.ed25519.as_bytes() + } +} + pub fn asym_signing_keypair() -> (SigningKey, VerifyingKey) { let mut rng = thread_rng(); let keypair = ed25519_dalek::SigningKey::generate(&mut rng); diff --git a/yama_midlevel_crypto/src/keys_kyber.rs b/yama_midlevel_crypto/src/keys_kyber.rs index d238a33..2aa04f5 100644 --- a/yama_midlevel_crypto/src/keys_kyber.rs +++ b/yama_midlevel_crypto/src/keys_kyber.rs @@ -4,12 +4,12 @@ use serde::de::Error; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::borrow::Cow; -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq)] pub struct KyberPublicKey { inner: pqc_kyber::PublicKey, } -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq)] pub struct KyberPrivateKey { inner: pqc_kyber::SecretKey, } diff --git a/yama_midlevel_crypto/src/keys_x25519.rs b/yama_midlevel_crypto/src/keys_x25519.rs index cdb98c5..791d514 100644 --- a/yama_midlevel_crypto/src/keys_x25519.rs +++ b/yama_midlevel_crypto/src/keys_x25519.rs @@ -9,7 +9,13 @@ pub struct X25519PrivateKey { pub(crate) inner: x25519_dalek::StaticSecret, } -#[derive(Clone)] +impl PartialEq for X25519PrivateKey { + fn eq(&self, other: &Self) -> bool { + self.inner.as_bytes() == other.inner.as_bytes() + } +} + +#[derive(Clone, PartialEq)] #[repr(transparent)] pub struct X25519PublicKey { pub(crate) inner: x25519_dalek::PublicKey, diff --git a/yama_pile/Cargo.toml b/yama_pile/Cargo.toml index f0af66e..40c6138 100644 --- a/yama_pile/Cargo.toml +++ b/yama_pile/Cargo.toml @@ -15,7 +15,7 @@ rand = "0.8.5" patricia_tree = "0.5.7" hex = "0.4.3" -tokio = { version = "1.27.0", features = ["io-util", "macros"] } +tokio = { version = "1.27.0", features = ["io-util", "macros", "sync", "rt", "time"] } serde = { version = "1.0.159", features = ["derive", "rc"] } chrono = { version = "0.4.24", features = ["serde"] } diff --git a/yama_pile/src/keyring.rs b/yama_pile/src/keyring.rs index abe53de..068496c 100644 --- a/yama_pile/src/keyring.rs +++ b/yama_pile/src/keyring.rs @@ -1,10 +1,47 @@ +use eyre::ContextCompat; use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; use yama_midlevel_crypto::asym_box::AsymBox; use yama_midlevel_crypto::asym_keyx::{generate_asym_keypair, DecryptingKey, EncryptingKey}; use yama_midlevel_crypto::asym_signed::{asym_signing_keypair, SigningKey, VerifyingKey}; use yama_midlevel_crypto::byte_layer::ByteLayer; -#[derive(Clone, Serialize, Deserialize)] +/// Exhaustive list of all keys +pub const INDIVIDUAL_KEYS: &[&str] = &[ + "r_config", + "w_config", + "r_bloblog_footer", + "w_bloblog_footer", + "r_bloblog_contents", + "w_bloblog_contents", + "r_locks", + "w_locks", + "r_pointer", + "w_pointer", +]; + +/// Opinionated named groups of keys intended for specific purposes. +/// ALL: all keys +/// BACKUP: +pub const KEY_GROUPS: &[(&str, &[&str])] = &[ + ("ALL", INDIVIDUAL_KEYS), + ( + "BACKUP", + &[ + "r_config", + "r_bloblog_footer", + "w_bloblog_footer", + "w_bloblog_contents", + "r_locks", + "w_locks", + "r_pointer", + "w_pointer", + ], + ), + // TODO add more key groups... ("RESTORE", &[]) +]; + +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct Keyring { pub r_config: Option, pub w_config: Option, @@ -22,6 +59,133 @@ pub struct Keyring { pub w_pointer: Option, } +impl Keyring { + pub fn generate_new_master() -> Self { + let (r_config, w_config) = generate_r_w_keys(); + let (r_bloblog_footer, w_bloblog_footer) = generate_r_w_keys(); + let (r_bloblog_contents, w_bloblog_contents) = generate_r_w_keys(); + let (r_locks, w_locks) = generate_r_w_keys(); + let (r_pointer, w_pointer) = generate_r_w_keys(); + Keyring { + r_config: Some(r_config), + w_config: Some(w_config), + r_bloblog_footer: Some(r_bloblog_footer), + w_bloblog_footer: Some(w_bloblog_footer), + r_bloblog_contents: Some(r_bloblog_contents), + w_bloblog_contents: Some(w_bloblog_contents), + r_locks: Some(r_locks), + w_locks: Some(w_locks), + r_pointer: Some(r_pointer), + w_pointer: Some(w_pointer), + } + } + + pub fn partial_copy(&self, keys_to_copy: &BTreeSet<&str>) -> eyre::Result { + Ok(Keyring { + r_config: if keys_to_copy.contains("r_config") { + Some( + self.r_config + .as_ref() + .context("no r_config in original keyring")? + .clone(), + ) + } else { + None + }, + w_config: if keys_to_copy.contains("w_config") { + Some( + self.w_config + .as_ref() + .context("no w_config in original keyring")? + .clone(), + ) + } else { + None + }, + r_bloblog_footer: if keys_to_copy.contains("r_bloblog_footer") { + Some( + self.r_bloblog_footer + .as_ref() + .context("no r_bloblog_footer in original keyring")? + .clone(), + ) + } else { + None + }, + w_bloblog_footer: if keys_to_copy.contains("w_bloblog_footer") { + Some( + self.w_bloblog_footer + .as_ref() + .context("no w_bloblog_footer in original keyring")? + .clone(), + ) + } else { + None + }, + r_bloblog_contents: if keys_to_copy.contains("r_bloblog_contents") { + Some( + self.r_bloblog_contents + .as_ref() + .context("no r_bloblog_contents in original keyring")? + .clone(), + ) + } else { + None + }, + w_bloblog_contents: if keys_to_copy.contains("w_bloblog_contents") { + Some( + self.w_bloblog_contents + .as_ref() + .context("no w_bloblog_contents in original keyring")? + .clone(), + ) + } else { + None + }, + r_locks: if keys_to_copy.contains("r_locks") { + Some( + self.r_locks + .as_ref() + .context("no r_locks in original keyring")? + .clone(), + ) + } else { + None + }, + w_locks: if keys_to_copy.contains("w_locks") { + Some( + self.w_locks + .as_ref() + .context("no w_locks in original keyring")? + .clone(), + ) + } else { + None + }, + r_pointer: if keys_to_copy.contains("r_pointer") { + Some( + self.r_pointer + .as_ref() + .context("no r_pointer in original keyring")? + .clone(), + ) + } else { + None + }, + w_pointer: if keys_to_copy.contains("w_pointer") { + Some( + self.w_pointer + .as_ref() + .context("no w_pointer in original keyring")? + .clone(), + ) + } else { + None + }, + }) + } +} + pub fn generate_r_w_keys() -> (ReaderKey, WriterKey) { let (encrypt, decrypt) = generate_asym_keypair(); let (sign, verify) = asym_signing_keypair(); @@ -31,14 +195,14 @@ pub fn generate_r_w_keys() -> (ReaderKey, WriterKey) { ) } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct WriterKey { // boxed because these take up a lot of stack space otherwise! #[serde(flatten)] inner: Box, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] struct WriterKeyInner { encrypt: EncryptingKey, sign: SigningKey, @@ -56,14 +220,14 @@ impl WriterKey { } } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct ReaderKey { // boxed because these take up a lot of stack space otherwise! #[serde(flatten)] inner: Box, } -#[derive(Clone, Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize, PartialEq)] pub struct ReaderKeyInner { decrypt: DecryptingKey, verify: VerifyingKey, @@ -80,3 +244,20 @@ impl ReaderKey { asymbox.unlock(&self.inner.decrypt, &self.inner.verify) } } + +#[cfg(test)] +mod tests { + use crate::keyring::{Keyring, INDIVIDUAL_KEYS}; + use std::collections::BTreeSet; + + #[test] + fn test_keyring_partial_copy_full() { + let kr = Keyring::generate_new_master(); + + let keys_to_copy: BTreeSet<&str> = INDIVIDUAL_KEYS.into_iter().cloned().collect(); + assert!( + kr.partial_copy(&keys_to_copy).unwrap() == kr, + "keyrings not identical!" + ); + } +} From 00fa9d0951b81567d45e629ecc235836d1badab0 Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 10 Aug 2023 20:02:24 +0100 Subject: [PATCH 20/51] SFTP wormfile: pull SSH key from YAMA_SSH_KEY if set --- yama_wormfile_sftp/src/lib.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index 550e240..95c1401 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -2,7 +2,7 @@ extern crate core; use async_trait::async_trait; use eyre::{bail, Context as EyreContext, ContextCompat}; -use openssh::{KnownHosts, RemoteChild, Session, Stdio}; +use openssh::{KnownHosts, RemoteChild, Session, SessionBuilder, Stdio}; use openssh_sftp_client::error::SftpErrorKind; use openssh_sftp_client::file::{File, TokioCompatFile}; use openssh_sftp_client::fs::{DirEntry, Fs}; @@ -55,7 +55,12 @@ struct SftpConn { impl SftpConn { pub async fn create(ssh_connect: &str, root_dir: impl Into) -> eyre::Result { let root_dir = root_dir.into(); - let session = Session::connect(ssh_connect, KnownHosts::Strict).await?; + let mut session_builder = SessionBuilder::default(); + if let Ok(ssh_key_path) = std::env::var("YAMA_SSH_KEY") { + session_builder.keyfile(&ssh_key_path); + } + session_builder.known_hosts_check(KnownHosts::Add); + let session = session_builder.connect(ssh_connect).await?; let res = SftpConnAsyncTryBuilder { ssh: session, From c812532541e96e7686ea54676d58e30db63f9060 Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 10 Aug 2023 20:02:32 +0100 Subject: [PATCH 21/51] Add --gradual flag to datman backup commands: allows softlimiting the size of a backup round --- Cargo.lock | 14 ++++ datman/Cargo.toml | 1 + datman/src/backup.rs | 27 +++++++- datman/src/bin/datman.rs | 13 ++-- yama/Cargo.toml | 4 ++ yama/src/scan.rs | 143 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 195 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa720c5..5337350 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -377,6 +377,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +[[package]] +name = "bytesize" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38fcc2979eff34a4b84e1cf9a1e3da42a7d44b3b690a40cdcb23e3d556cfb2e5" + [[package]] name = "cap-fs-ext" version = "1.0.10" @@ -746,6 +752,7 @@ dependencies = [ name = "datman" version = "0.7.0-alpha.1" dependencies = [ + "bytesize", "chrono", "clap", "dashmap", @@ -1589,6 +1596,12 @@ dependencies = [ "nu-ansi-term", ] +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + [[package]] name = "match_cfg" version = "0.1.0" @@ -3901,6 +3914,7 @@ dependencies = [ "ignore", "indicatif", "io-streams", + "maplit", "memmap2", "patricia_tree", "serde", diff --git a/datman/Cargo.toml b/datman/Cargo.toml index bab8e89..678baef 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -24,6 +24,7 @@ tokio = { version = "1.28.0", features = ["fs", "macros", "rt-multi-thread"] } dashmap = "5.4.0" chrono = "0.4.24" users = "0.11.0" +bytesize = "1.2.0" yama = { version = "0.7.0-alpha.1", path = "../yama" } yama_pile = { path = "../yama_pile" } diff --git a/datman/src/backup.rs b/datman/src/backup.rs index 0a2c492..c9e9516 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -1,6 +1,8 @@ use crate::descriptor_config::{SourceDescriptor, SourceDescriptorInner, VirtualSourceKind}; use crate::pointer_names::{get_pointer_name_at, POINTER_NAME_DATETIME_SPLITTER}; +use bytesize::ByteSize; use chrono::{DateTime, Utc}; +use clap::Args; use dashmap::DashSet; use eyre::{bail, ensure, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; @@ -17,7 +19,7 @@ use tracing::{debug, info, info_span, Instrument, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; use users::{get_current_gid, get_current_uid}; use yama::pile_with_cache::PileWithCache; -use yama::scan::create_uidgid_lookup_tables; +use yama::scan::{create_uidgid_lookup_tables, limit_scan_entry_map_to_size}; use yama::storing::{ assemble_and_write_indices, StoragePipeline, StoringBloblogWriters, StoringState, }; @@ -32,9 +34,19 @@ use yama_pile::tree::{ }; use yama_wormfile::boxed::BoxedWormFileProvider; +#[derive(Args, Clone, Debug)] +pub struct BackupOptions { + /// Number of bytes to back up in one go. Intended for gradually getting a backup started. + /// Supports suffixes like MiB and MB. + /// Applies per-source. Does not apply to virtual sources. + #[clap(long)] + gradual: Option, +} + pub async fn backup( pwc: Arc>, sources_to_backup: BTreeMap, + options: &BackupOptions, ) -> eyre::Result<()> { // Locate suitable parent pointers let parents_to_use = find_suitable_parent_pointers(&pwc, &sources_to_backup) @@ -55,9 +67,10 @@ pub async fn backup( let pwc = pwc.clone(); let bds_span = info_span!("storing"); + let options = options.clone(); tokio::spawn( async move { - backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) + backup_dir_sources(dir_sources, pwc, new_unflushed_chunks, &options) .await .context("whilst backing up dir sources") } @@ -261,6 +274,7 @@ async fn backup_dir_sources( dir_sources: Vec, pwc: Arc>, new_unflushed_chunks: Arc>, + options: &BackupOptions, ) -> eyre::Result { let mut chunk_file_maps = Vec::new(); let mut pruned_scan_entry_maps = Vec::new(); @@ -281,7 +295,14 @@ async fn backup_dir_sources( ) }; chunk_file_maps.push(chunk_file_map); - pruned_scan_entry_maps.push(pruned_scan_entry_map); + + pruned_scan_entry_maps.push(match options.gradual { + Some(gradual_size_limit) => Cow::Owned(limit_scan_entry_map_to_size( + pruned_scan_entry_map.into_owned(), + gradual_size_limit.as_u64(), + )), + None => pruned_scan_entry_map, + }); } let store_span = Span::current(); diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 70e59c6..926e00e 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -16,7 +16,7 @@ along with Yama. If not, see . */ use clap::{Parser, Subcommand}; -use datman::backup::backup; +use datman::backup::{backup, BackupOptions}; use datman::descriptor_config::{load_descriptor, SourceDescriptor}; use datman::extract::{ extract, load_pointers_for_extraction, merge_roots_for_batch_extract, select_to_extract, @@ -122,10 +122,14 @@ pub enum DatmanCommand { BackupOne { source_name: String, pile_name: String, + #[clap(flatten)] + options: BackupOptions, }, BackupAll { pile_name: String, + #[clap(flatten)] + options: BackupOptions, }, ExtractOne { @@ -178,6 +182,7 @@ pub async fn main() -> eyre::Result<()> { DatmanCommand::BackupOne { source_name, pile_name, + options, } => { let pile_connector_path = descriptor .piles @@ -203,9 +208,9 @@ pub async fn main() -> eyre::Result<()> { let mut sources_to_backup = BTreeMap::new(); sources_to_backup.insert(source_name.clone(), source.clone()); - backup(pwc, sources_to_backup).await?; + backup(pwc, sources_to_backup, &options).await?; } - DatmanCommand::BackupAll { pile_name } => { + DatmanCommand::BackupAll { pile_name, options } => { let pile_connector_path = descriptor .piles .get(&pile_name) @@ -235,7 +240,7 @@ pub async fn main() -> eyre::Result<()> { sources_to_backup.keys().collect::>() ); - backup(pwc, sources_to_backup).await?; + backup(pwc, sources_to_backup, &options).await?; } DatmanCommand::ExtractOne { pile_name, diff --git a/yama/Cargo.toml b/yama/Cargo.toml index 55d1e61..e88212b 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -51,3 +51,7 @@ io-streams = "0.14.3" dust_style_filetree_display = "0.8.5" + + +[dev-dependencies] +maplit = "1.0.2" \ No newline at end of file diff --git a/yama/src/scan.rs b/yama/src/scan.rs index b08f622..82600d1 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -1,6 +1,7 @@ use eyre::{bail, eyre, Context, ContextCompat}; use ignore::WalkBuilder; use patricia_tree::PatriciaMap; +use std::cmp::max; use std::collections::{BTreeMap, BTreeSet}; use std::fs::{read_link, Metadata}; use std::io::ErrorKind; @@ -272,3 +273,145 @@ pub fn integrate_uid_or_gid_map(new: &mut BTreeMap, old: &BTreeMap< new.entry(*old_uid).or_insert_with(|| old_user.clone()); } } + +/// Given a scan entry map, creates another one whose size is limited to not containing more than +/// the given number of file bytes to be stored. +/// There is one exception: if an individual file exceeds the max size, the returned scan entry map +/// may contain just that one file. +/// +/// Useful for gradually starting backups without having to do the first in one shot. +pub fn limit_scan_entry_map_to_size( + scan_entry_map: PatriciaMap, + soft_max_size: u64, +) -> PatriciaMap { + let mut accum_size: u64 = 0; + let mut have_file = false; + let mut result = PatriciaMap::new(); + let mut unincluded_directories = PatriciaMap::new(); + + for (path_bytes, entry) in scan_entry_map.into_iter() { + if accum_size >= soft_max_size { + // we're already full! + break; + } + let size_of_entry = match entry { + ScanEntry::NormalFile { size, .. } => { + // even zero-byte files are not for free, so don't let them be. + max(size, 4096) + } + ScanEntry::Directory { .. } => { + // slightly made up number, but typical size of an inode? + 4096 + } + ScanEntry::SymbolicLink { .. } => { + // slightly made up number, but typical size of an inode? + 4096 + } + }; + + let size_limit_would_be_respected = accum_size + size_of_entry <= soft_max_size; + let this_is_the_only_file = !have_file && matches!(&entry, ScanEntry::NormalFile { .. }); + if size_limit_would_be_respected || this_is_the_only_file { + have_file |= matches!(&entry, ScanEntry::NormalFile { .. }); + result.insert(&path_bytes, entry); + accum_size += size_of_entry; + + // Pull out parent directories so our subset always contains the parents for their children. + let mut path_fragment = &path_bytes[..]; + while let Some((index, _)) = path_fragment + .iter() + .enumerate() + .rev() + .find(|(_idx, char_byte)| **char_byte == b'/') + { + path_fragment = &path_bytes[0..index]; + + if let Some(directory) = unincluded_directories.remove(path_fragment) { + result.insert(path_fragment, directory); + accum_size += 4096; + } + } + } else if matches!(&entry, &ScanEntry::Directory { .. }) { + // put the directory to one side in case we need it... + unincluded_directories.insert(path_bytes, entry); + } + } + + result +} + +#[cfg(test)] +mod tests { + use crate::scan::limit_scan_entry_map_to_size; + use maplit::btreeset; + use patricia_tree::PatriciaMap; + use std::collections::BTreeSet; + use yama_pile::tree::unpopulated::ScanEntry; + use yama_pile::tree::{FilesystemOwnership, FilesystemPermissions}; + + #[test] + fn test_limit_scan_entry_map_to_size() { + let mut orig = PatriciaMap::new(); + + orig.insert( + "somedir".as_bytes(), + ScanEntry::Directory { + ownership: FilesystemOwnership { uid: 0, gid: 0 }, + permissions: FilesystemPermissions { mode: 0 }, + }, + ); + orig.insert( + "somedir/a_small_file".as_bytes(), + ScanEntry::NormalFile { + mtime: 0, + ownership: FilesystemOwnership { uid: 0, gid: 0 }, + permissions: FilesystemPermissions { mode: 0 }, + size: 4, + }, + ); + orig.insert( + "somedir/somefile".as_bytes(), + ScanEntry::NormalFile { + mtime: 0, + ownership: FilesystemOwnership { uid: 0, gid: 0 }, + permissions: FilesystemPermissions { mode: 0 }, + size: 8192, + }, + ); + + // 16k = 4k (dir) + 8k (somefile) + 4k (small file; minimum) + assert_eq!( + limit_scan_entry_map_to_size(orig.clone(), 16384) + .keys() + .collect::>(), + btreeset! { + b"somedir".to_vec(), + b"somedir/a_small_file".to_vec(), + b"somedir/somefile".to_vec(), + } + ); + + // now we don't have room for the big file. + assert_eq!( + limit_scan_entry_map_to_size(orig.clone(), 16383) + .keys() + .collect::>(), + btreeset! { + b"somedir".to_vec(), + b"somedir/a_small_file".to_vec(), + } + ); + + // because we must always include at least one file so we make forward progress, it doesn't + // matter that this violates the size limit. + assert_eq!( + limit_scan_entry_map_to_size(orig.clone(), 1) + .keys() + .collect::>(), + btreeset! { + b"somedir".to_vec(), + b"somedir/a_small_file".to_vec(), + } + ); + } +} From 2e08b2df4714ca9f25c7815dc68799e15d46f203 Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 10 Aug 2023 21:33:03 +0100 Subject: [PATCH 22/51] Set Zstd level to 16 --- yama/src/storing.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 06bc841..354cbef 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -341,7 +341,7 @@ async fn storage_pipeline_worker( fn get_zstd_level() -> i32 { // TODO Read from env? - return 12; + return 16; } impl StoragePipeline { From 22beea0c507271d64c48bcc0b44758619bbb2475 Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 20:00:31 +0100 Subject: [PATCH 23/51] Fix bug when using gradual scans --- datman/src/backup.rs | 83 ++++++++++++++++++++++++++----------------- yama/src/bin/yama.rs | 21 ++++++----- yama/src/scan.rs | 41 +++++++++++++++++++-- yama_pile/src/tree.rs | 5 +-- 4 files changed, 104 insertions(+), 46 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index c9e9516..330cdd0 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -7,7 +7,6 @@ use dashmap::DashSet; use eyre::{bail, ensure, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; -use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; use std::io::Write; use std::path::PathBuf; @@ -55,7 +54,7 @@ pub async fn backup( let now = Utc::now(); // (dirtrees) Scan - let dir_sources = scan_dir_sources(&sources_to_backup, parents_to_use, now) + let dir_sources = scan_dir_sources(&sources_to_backup, parents_to_use, now, options) .await .context("failed to scan directory sources")?; @@ -67,10 +66,9 @@ pub async fn backup( let pwc = pwc.clone(); let bds_span = info_span!("storing"); - let options = options.clone(); tokio::spawn( async move { - backup_dir_sources(dir_sources, pwc, new_unflushed_chunks, &options) + backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) .await .context("whilst backing up dir sources") } @@ -125,9 +123,12 @@ pub async fn backup( for (dir_source_prep, chunk_file_map) in dir_sources_and_chunkmaps.dir_source_returns { // Assemble and write a pointer - let mut tree = - assemble_tree_from_scan_entries(dir_source_prep.scan_entry_map, chunk_file_map) - .context("failed to assemble tree")?; + let mut tree = assemble_tree_from_scan_entries( + dir_source_prep.new_scan_entry_map, + dir_source_prep.reused_scan_entry_map, + chunk_file_map, + ) + .context("failed to assemble tree")?; let (uids, gids) = create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; @@ -217,17 +218,22 @@ async fn find_suitable_parent_pointers( } struct DirSourcePrep { - scan_entry_map: PatriciaMap, + /// New entries only. + new_scan_entry_map: PatriciaMap, + /// Files: Reused entries only. Directories: can be partially changed but there's no chunking to be done. + reused_scan_entry_map: PatriciaMap, parent_name: Option, parent: Option, path: PathBuf, new_pointer_name: String, + chunk_file_map: PatriciaMap>, } async fn scan_dir_sources( sources_to_backup: &BTreeMap, mut parents: BTreeMap, now: DateTime, + options: &BackupOptions, ) -> eyre::Result> { let mut joinset = JoinSet::new(); @@ -244,10 +250,40 @@ async fn scan_dir_sources( let ignore = ignore.to_owned(); let (parent_name, parent) = parents.remove(source_name).unzip(); let new_pointer_name = get_pointer_name_at(&source_name, now); + let options = options.clone(); joinset.spawn_blocking(move || -> eyre::Result { let scan_entry_map = scan::scan(&path, &ignore).context("Failed to scan")?; + + // TODO This whole section is messy. + // Maybe we should consider combining prepopulate_unmodified and limit_scan_entry_map_to_size + // as the latter might benefit from being able to see what is in the parent pointer... + + let (chunk_file_map, pruned_scan_entry_map, prepopulated_scan_entry_map) = + if let Some(ref parent_node) = parent { + let (cfm, prepopulated, pruned) = + scan::prepopulate_unmodified(&parent_node.root.node, &scan_entry_map); + + (cfm, pruned, prepopulated) + } else { + ( + PatriciaMap::>::new(), + scan_entry_map, + PatriciaMap::new(), + ) + }; + + let pruned_scan_entry_map = match options.gradual { + Some(gradual_size_limit) => limit_scan_entry_map_to_size( + pruned_scan_entry_map, + gradual_size_limit.as_u64(), + ), + None => pruned_scan_entry_map, + }; + Ok(DirSourcePrep { - scan_entry_map, + chunk_file_map, + new_scan_entry_map: pruned_scan_entry_map, + reused_scan_entry_map: prepopulated_scan_entry_map, parent_name, parent, path, @@ -271,38 +307,19 @@ struct BackupDirSourcesReturn { } async fn backup_dir_sources( - dir_sources: Vec, + mut dir_sources: Vec, pwc: Arc>, new_unflushed_chunks: Arc>, - options: &BackupOptions, ) -> eyre::Result { let mut chunk_file_maps = Vec::new(); let mut pruned_scan_entry_maps = Vec::new(); // First collect all that stuff together... + for dir_source in &mut dir_sources { + chunk_file_maps.push(std::mem::take(&mut dir_source.chunk_file_map)); + } for dir_source in &dir_sources { - let (chunk_file_map, pruned_scan_entry_map) = if let Some(ref parent_node) = - dir_source.parent - { - let (cfm, pruned) = - scan::prepopulate_unmodified(&parent_node.root.node, &dir_source.scan_entry_map); - - (cfm, Cow::Owned(pruned)) - } else { - ( - PatriciaMap::>::new(), - Cow::Borrowed(&dir_source.scan_entry_map), - ) - }; - chunk_file_maps.push(chunk_file_map); - - pruned_scan_entry_maps.push(match options.gradual { - Some(gradual_size_limit) => Cow::Owned(limit_scan_entry_map_to_size( - pruned_scan_entry_map.into_owned(), - gradual_size_limit.as_u64(), - )), - None => pruned_scan_entry_map, - }); + pruned_scan_entry_maps.push(&dir_source.new_scan_entry_map); } let store_span = Span::current(); diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index 4391214..ec34327 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -19,7 +19,6 @@ use clap::{Parser, Subcommand}; use eyre::{bail, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; -use std::borrow::Cow; use std::collections::BTreeSet; use std::iter::Iterator; use std::path::{Path, PathBuf}; @@ -433,16 +432,17 @@ async fn main() -> eyre::Result<()> { let pwc = Arc::new(pwc); - let (mut chunk_file_map, pruned_scan_entry_map) = + let (mut chunk_file_map, pruned_scan_entry_map, prepopulated_scan_entry_map) = if let Some(ref parent_node) = parent_pointer { - let (cfm, pruned) = + let (cfm, pruned, prepopulated) = scan::prepopulate_unmodified(&parent_node.root.node, &scan_entry_map); - (cfm, Cow::Owned(pruned)) + (cfm, pruned, prepopulated) } else { ( PatriciaMap::>::new(), - Cow::Borrowed(&scan_entry_map), + scan_entry_map, + PatriciaMap::new(), ) }; @@ -467,10 +467,11 @@ async fn main() -> eyre::Result<()> { StoragePipeline::launch_new(4, pwc.clone(), new_unflushed_chunks).await?; let source2 = source.clone(); + let pruned_scan_entry_map2 = &pruned_scan_entry_map; let (submitter_task, receiver_task) = tokio::join!( async move { let pipeline_job_tx = pipeline_job_tx; - for (name_bytes, scan_entry) in pruned_scan_entry_map.iter() { + for (name_bytes, scan_entry) in pruned_scan_entry_map2.iter() { if let ScanEntry::NormalFile { .. } = scan_entry { let name = std::str::from_utf8(name_bytes.as_slice()) .context("name is not str")?; @@ -517,8 +518,12 @@ async fn main() -> eyre::Result<()> { info!("All indices stored, writing pointer..."); // Assemble and write a pointer - let mut tree = assemble_tree_from_scan_entries(scan_entry_map, chunk_file_map) - .context("failed to assemble tree")?; + let mut tree = assemble_tree_from_scan_entries( + pruned_scan_entry_map, + prepopulated_scan_entry_map, + chunk_file_map, + ) + .context("failed to assemble tree")?; let (uids, gids) = create_uidgid_lookup_tables(&tree).context("failed to create uid/gid tables")?; diff --git a/yama/src/scan.rs b/yama/src/scan.rs index 82600d1..8a956ff 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -220,19 +220,23 @@ fn scan_one_no_recurse(path: &Path, metadata: Metadata) -> eyre::Result, ) -> ( PatriciaMap>, PatriciaMap, + PatriciaMap, ) { let mut reusable_chunkings = PatriciaMap::new(); + let mut prepopulated_scan_entry_map = PatriciaMap::new(); let mut pruned_scan_entry_map = scan_entry_map.clone(); parent_tree .visit( &mut |tree_node, path| { + // TODO We should consider prepopulating symlinks and empty dirs too, if they're + // included in the parent. if let TreeNode::NormalFile { mtime: prev_mtime, ownership: prev_ownership, @@ -255,7 +259,26 @@ pub fn prepopulate_unmodified( { // Nothing seems to have changed about this file, let's just reuse the `content` from last time. reusable_chunkings.insert(path, Some((*prev_content, *size))); - pruned_scan_entry_map.remove(path); + prepopulated_scan_entry_map.insert( + path, + pruned_scan_entry_map.remove(path).expect("checked removal"), + ); + + // Pull out parent directories so our subset always contains the parents for their children. + let mut path_fragment = path.as_bytes(); + while let Some((index, _)) = path_fragment + .iter() + .enumerate() + .rev() + .find(|(_idx, char_byte)| **char_byte == b'/') + { + path_fragment = &path_fragment[0..index]; + + if let Some(directory) = pruned_scan_entry_map.remove(path_fragment) + { + prepopulated_scan_entry_map.insert(path_fragment, directory); + } + } } } } @@ -265,7 +288,11 @@ pub fn prepopulate_unmodified( String::new(), ) .expect("no reason to fail"); - (reusable_chunkings, pruned_scan_entry_map) + ( + reusable_chunkings, + prepopulated_scan_entry_map, + pruned_scan_entry_map, + ) } pub fn integrate_uid_or_gid_map(new: &mut BTreeMap, old: &BTreeMap) { @@ -287,6 +314,7 @@ pub fn limit_scan_entry_map_to_size( let mut accum_size: u64 = 0; let mut have_file = false; let mut result = PatriciaMap::new(); + // let mut removeds = PatriciaSet::new(); let mut unincluded_directories = PatriciaMap::new(); for (path_bytes, entry) in scan_entry_map.into_iter() { @@ -334,9 +362,16 @@ pub fn limit_scan_entry_map_to_size( } else if matches!(&entry, &ScanEntry::Directory { .. }) { // put the directory to one side in case we need it... unincluded_directories.insert(path_bytes, entry); + } else { + // removeds.insert(path_bytes); } } + // for (key, _) in unincluded_directories { + // removeds.insert(key); + // } + + // (result, removeds) result } diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index 945a52d..dea030f 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -391,14 +391,15 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) { } pub fn assemble_tree_from_scan_entries( - scan: PatriciaMap, + new_scan: PatriciaMap, + reused_scan: PatriciaMap, mut chunkings: PatriciaMap>, ) -> eyre::Result { let mut dirs: BTreeMap> = BTreeMap::new(); // special-case the root ("") dirs.insert(String::new(), BTreeMap::new()); - for (key, entry) in scan.into_iter() { + for (key, entry) in reused_scan.into_iter().chain(new_scan.into_iter()) { let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?; let (parent_dir_name, child_name) = key_string.rsplit_once('/').unwrap_or(("", &key_string)); From 31ffb1ce86d1b41ebdc236535a6489af2ec40fae Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 20:18:03 +0100 Subject: [PATCH 24/51] Add logging for new bug with gradual scans --- datman/src/backup.rs | 19 +++++++++++++++++-- yama/src/scan.rs | 4 ++-- yama_pile/src/tree.rs | 6 +++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/datman/src/backup.rs b/datman/src/backup.rs index 330cdd0..f406cd0 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -7,7 +7,7 @@ use dashmap::DashSet; use eyre::{bail, ensure, eyre, Context, ContextCompat}; use indicatif::ProgressStyle; use patricia_tree::PatriciaMap; -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::io::Write; use std::path::PathBuf; use std::process::{Child, Command, Stdio}; @@ -260,9 +260,24 @@ async fn scan_dir_sources( let (chunk_file_map, pruned_scan_entry_map, prepopulated_scan_entry_map) = if let Some(ref parent_node) = parent { - let (cfm, prepopulated, pruned) = + let (cfm, pruned, prepopulated) = scan::prepopulate_unmodified(&parent_node.root.node, &scan_entry_map); + // debug + let pruned_keys: BTreeSet = pruned + .keys() + .map(|b| String::from_utf8(b).unwrap()) + .collect(); + let prepop_keys: BTreeSet = prepopulated + .keys() + .map(|b| String::from_utf8(b).unwrap()) + .collect(); + let ix_keys: BTreeSet<&String> = + pruned_keys.intersection(&prepop_keys).collect(); + if !ix_keys.is_empty() { + bail!("bug: intersecting prepop and prune keys: {ix_keys:?}"); + } + (cfm, pruned, prepopulated) } else { ( diff --git a/yama/src/scan.rs b/yama/src/scan.rs index 8a956ff..eca4209 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -220,7 +220,7 @@ fn scan_one_no_recurse(path: &Path, metadata: Metadata) -> eyre::Result, @@ -290,8 +290,8 @@ pub fn prepopulate_unmodified( .expect("no reason to fail"); ( reusable_chunkings, - prepopulated_scan_entry_map, pruned_scan_entry_map, + prepopulated_scan_entry_map, ) } diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index dea030f..f3b843b 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -390,6 +390,8 @@ pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) { } } +/// `new_scan` and `reused_scan` should be disjoint, but `reused_scan` must contain all directories +/// needed for itself; `new_scan` is allowed to reuse those directories. pub fn assemble_tree_from_scan_entries( new_scan: PatriciaMap, reused_scan: PatriciaMap, @@ -434,7 +436,9 @@ pub fn assemble_tree_from_scan_entries( ownership, permissions, } => { - dirs.insert(key_string.clone(), BTreeMap::new()); + if !dirs.insert(key_string.clone(), BTreeMap::new()).is_none() { + bail!("bug: {key_string:?} already in dirs..."); + } // note: for the root, this inserts the root directory entry as a child called "" within the root. // That's fine. We'll patch this up later. dirs.get_mut(parent_dir_name) From e85c8c372db44dcba265c951fe3ee686c342f8e6 Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 20:20:20 +0100 Subject: [PATCH 25/51] Don't special-case the root --- yama_pile/src/tree.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index f3b843b..e908990 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -398,8 +398,6 @@ pub fn assemble_tree_from_scan_entries( mut chunkings: PatriciaMap>, ) -> eyre::Result { let mut dirs: BTreeMap> = BTreeMap::new(); - // special-case the root ("") - dirs.insert(String::new(), BTreeMap::new()); for (key, entry) in reused_scan.into_iter().chain(new_scan.into_iter()) { let key_string = String::from_utf8(key).context("bad UTF-8 in PMap")?; From 6b72672d29f0cde481fc1c582c41e0231725bd3b Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 20:24:59 +0100 Subject: [PATCH 26/51] Fix bug in path iteration leading to bug in gradual scans --- yama/src/scan.rs | 38 ++++++++++++++++++++------------------ yama_pile/src/tree.rs | 4 +++- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/yama/src/scan.rs b/yama/src/scan.rs index eca4209..370bfbb 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -265,15 +265,7 @@ pub fn prepopulate_unmodified( ); // Pull out parent directories so our subset always contains the parents for their children. - let mut path_fragment = path.as_bytes(); - while let Some((index, _)) = path_fragment - .iter() - .enumerate() - .rev() - .find(|(_idx, char_byte)| **char_byte == b'/') - { - path_fragment = &path_fragment[0..index]; - + for path_fragment in iterate_dirs_upwards(path.as_bytes()) { if let Some(directory) = pruned_scan_entry_map.remove(path_fragment) { prepopulated_scan_entry_map.insert(path_fragment, directory); @@ -345,15 +337,7 @@ pub fn limit_scan_entry_map_to_size( accum_size += size_of_entry; // Pull out parent directories so our subset always contains the parents for their children. - let mut path_fragment = &path_bytes[..]; - while let Some((index, _)) = path_fragment - .iter() - .enumerate() - .rev() - .find(|(_idx, char_byte)| **char_byte == b'/') - { - path_fragment = &path_bytes[0..index]; - + for path_fragment in iterate_dirs_upwards(&path_bytes) { if let Some(directory) = unincluded_directories.remove(path_fragment) { result.insert(path_fragment, directory); accum_size += 4096; @@ -375,6 +359,24 @@ pub fn limit_scan_entry_map_to_size( result } +/// Returns a list of all the parent paths of the given path (in bytes), +/// including the root, in order from leaf to root. +pub fn iterate_dirs_upwards(path_bytes: &[u8]) -> Vec<&[u8]> { + let mut result = Vec::new(); + let mut path_fragment = &path_bytes[..]; + while let Some((index, _)) = path_fragment + .iter() + .enumerate() + .rev() + .find(|(_idx, char_byte)| **char_byte == b'/') + { + path_fragment = &path_bytes[0..index]; + result.push(path_fragment); + } + result.push(&path_bytes[0..0]); + result +} + #[cfg(test)] mod tests { use crate::scan::limit_scan_entry_map_to_size; diff --git a/yama_pile/src/tree.rs b/yama_pile/src/tree.rs index e908990..cda68ca 100644 --- a/yama_pile/src/tree.rs +++ b/yama_pile/src/tree.rs @@ -417,7 +417,9 @@ pub fn assemble_tree_from_scan_entries( // note: for the root, this inserts the root file entry as a child called "" within a fake root 'directory'. // That's fine. We'll patch this up later. dirs.get_mut(parent_dir_name) - .context("bad PMap: parent not seen first")? + .with_context(|| { + format!("bad PMap: parent {parent_dir_name} not seen first") + })? .insert( child_name.to_owned(), TreeNode::NormalFile { From 5137ac0640a07555731d6ef2631faacfbe6c64c9 Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 20:31:08 +0100 Subject: [PATCH 27/51] Fix ignore rules --- yama/src/bin/yamascan.rs | 10 +++++++--- yama/src/scan.rs | 11 ++++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/yama/src/bin/yamascan.rs b/yama/src/bin/yamascan.rs index bf58473..dfea42b 100644 --- a/yama/src/bin/yamascan.rs +++ b/yama/src/bin/yamascan.rs @@ -52,7 +52,11 @@ pub enum YamaScanCommand { /// Show dust-style usage graph of the current directory, excluding excluded files. #[command(alias = "du")] - Usage {}, + Usage { + /// Specify an ignore rule. Can use multiple times. + #[arg(short = 'I', long = "ignore")] + ignore: Vec, + }, } #[tokio::main] @@ -66,7 +70,7 @@ async fn main() -> eyre::Result<()> { .init(); match YamaScanCommand::parse() { - YamaScanCommand::Usage {} => { + YamaScanCommand::Usage { ignore } => { let idd = InitialDisplayData { short_paths: true, is_reversed: false, @@ -76,7 +80,7 @@ async fn main() -> eyre::Result<()> { iso: false, }; - let scan = scan::scan(Path::new("."), &Vec::new()).context("Couldn't scan")?; + let scan = scan::scan(Path::new("."), &ignore).context("Couldn't scan")?; let top_nodes = assemble_display_tree_from_scan_entries(scan)?.children; let root_display_node = dust_style_filetree_display::filter::get_biggest( diff --git a/yama/src/scan.rs b/yama/src/scan.rs index 370bfbb..526989a 100644 --- a/yama/src/scan.rs +++ b/yama/src/scan.rs @@ -1,4 +1,5 @@ use eyre::{bail, eyre, Context, ContextCompat}; +use ignore::overrides::OverrideBuilder; use ignore::WalkBuilder; use patricia_tree::PatriciaMap; use std::cmp::max; @@ -117,9 +118,17 @@ pub fn scan(root: &Path, ignores: &Vec) -> eyre::Result Date: Fri, 11 Aug 2023 21:44:14 +0100 Subject: [PATCH 28/51] Update SFTP client to get fix for infinite buffering memory leak --- Cargo.lock | 4 ++-- yama_wormfile_sftp/Cargo.toml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5337350..ce703ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1839,9 +1839,9 @@ dependencies = [ [[package]] name = "openssh-sftp-client" -version = "0.13.5" +version = "0.13.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "866d0eab409a2fcb6b8c3838fdbf10d7399d486548c19179a80f1c1142e93348" +checksum = "bff0b8012752f6cdf35c0483c0bd0cc15f4b229284f1f5a7aaf2a66cd7e8fde9" dependencies = [ "bytes", "derive_destructure2", diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index 70e4812..9166407 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -10,7 +10,8 @@ yama_wormfile = { version = "0.1.0", path = "../yama_wormfile" } ouroboros = "0.15.6" openssh = "0.9.9" -openssh-sftp-client = "0.13.5" +# Need >=0.13.6 which includes a fix for massive memory leak (infinite buffering): https://github.com/openssh-rust/openssh-sftp-client/issues/89 +openssh-sftp-client = "0.13.9" async-trait = "0.1.68" tokio = { version = "1.27.0", features = ["io-std"] } tokio-stream = "0.1.14" From a9379dba144870bb7300f82c9464940c048ecfdb Mon Sep 17 00:00:00 2001 From: Olivier Date: Fri, 11 Aug 2023 22:19:12 +0100 Subject: [PATCH 29/51] Actually add a limit to prevent infinite buffering memory --- yama_wormfile_sftp/src/lib.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/yama_wormfile_sftp/src/lib.rs b/yama_wormfile_sftp/src/lib.rs index 95c1401..2bae46f 100644 --- a/yama_wormfile_sftp/src/lib.rs +++ b/yama_wormfile_sftp/src/lib.rs @@ -7,11 +7,12 @@ use openssh_sftp_client::error::SftpErrorKind; use openssh_sftp_client::file::{File, TokioCompatFile}; use openssh_sftp_client::fs::{DirEntry, Fs}; use openssh_sftp_client::Error::SftpError; -use openssh_sftp_client::Sftp; +use openssh_sftp_client::{Sftp, SftpOptions}; use ouroboros::self_referencing; use std::fmt::{Debug, Formatter}; use std::io; use std::io::{ErrorKind, SeekFrom}; +use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::Arc; @@ -76,10 +77,16 @@ impl SftpConn { }, sftp_builder: |ssh_child| { Box::pin(async move { + let sftp_opts = SftpOptions::new() + // Don't buffer infinitely when writing files: 32 MiB per file in flight + // will do. + .tokio_compat_file_write_limit( + NonZeroUsize::new(32 * 1024 * 1024).unwrap(), + ); Sftp::new( ssh_child.stdin().take().unwrap(), ssh_child.stdout().take().unwrap(), - Default::default(), + sftp_opts, ) .await .map_err(|e| eyre::Error::from(e)) From 2c14654d2970425afdfa71df347eea85786daa6d Mon Sep 17 00:00:00 2001 From: Olivier Date: Sun, 13 Aug 2023 16:30:50 +0100 Subject: [PATCH 30/51] Add a small amount of async_backtrace tracking --- Cargo.lock | 88 ++++++++++++++++++++++++++++++ datman/Cargo.toml | 3 +- datman/src/backup.rs | 28 ++++------ datman/src/bin/datman.rs | 3 + yama/Cargo.toml | 3 +- yama/src/bin/yama.rs | 3 + yama/src/debugging.rs | 20 +++++++ yama/src/lib.rs | 2 + yama/src/retriever.rs | 8 +-- yama/src/retriever/decompressor.rs | 4 +- yama/src/storing.rs | 24 +++++--- yama_localcache/Cargo.toml | 3 +- yama_pile/Cargo.toml | 3 +- yama_pile/src/locks.rs | 4 +- yama_wormfile_sftp/Cargo.toml | 1 + 15 files changed, 160 insertions(+), 37 deletions(-) create mode 100644 yama/src/debugging.rs diff --git a/Cargo.lock b/Cargo.lock index ce703ff..9cd0c99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,34 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +[[package]] +name = "async-backtrace" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a2a4168316d920764f19e516d4d0f5b079ecdf0ced2ea2ef08a266102a38802" +dependencies = [ + "async-backtrace-attributes", + "dashmap", + "futures", + "itertools", + "loom", + "once_cell", + "pin-project-lite", + "rustc-hash", + "static_assertions", +] + +[[package]] +name = "async-backtrace-attributes" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840d2e9edec91ac974365978efc6f00781ff497e706a12306fff29ae92f8ad46" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + [[package]] name = "async-recursion" version = "1.0.4" @@ -752,6 +780,7 @@ dependencies = [ name = "datman" version = "0.7.0-alpha.1" dependencies = [ + "async-backtrace", "bytesize", "chrono", "clap", @@ -1129,6 +1158,19 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1586,6 +1628,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lscolors" version = "0.13.0" @@ -2427,6 +2482,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.37.6" @@ -2464,6 +2525,12 @@ dependencies = [ "base64 0.21.0", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "rxml" version = "0.8.2" @@ -2507,6 +2574,12 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -2652,6 +2725,16 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -3903,6 +3986,7 @@ name = "yama" version = "0.7.0-alpha.1" dependencies = [ "appdirs", + "async-backtrace", "async-recursion", "clap", "dashmap", @@ -3918,6 +4002,7 @@ dependencies = [ "memmap2", "patricia_tree", "serde", + "signal-hook", "tokio", "toml", "tracing", @@ -3939,6 +4024,7 @@ dependencies = [ name = "yama_localcache" version = "0.1.0" dependencies = [ + "async-backtrace", "eyre", "itertools", "sqlx", @@ -3971,6 +4057,7 @@ dependencies = [ name = "yama_pile" version = "0.1.0" dependencies = [ + "async-backtrace", "backtrace", "chrono", "eyre", @@ -4028,6 +4115,7 @@ dependencies = [ name = "yama_wormfile_sftp" version = "0.1.0" dependencies = [ + "async-backtrace", "async-trait", "eyre", "openssh", diff --git a/datman/Cargo.toml b/datman/Cargo.toml index 678baef..2757610 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -35,4 +35,5 @@ yama_wormfile = { path = "../yama_wormfile" } #yama_wormfile_sftp = { path = "../yama_wormfile_sftp" } yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } -patricia_tree = "0.5.7" \ No newline at end of file +patricia_tree = "0.5.7" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/datman/src/backup.rs b/datman/src/backup.rs index f406cd0..ae53e22 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -66,14 +66,12 @@ pub async fn backup( let pwc = pwc.clone(); let bds_span = info_span!("storing"); - tokio::spawn( - async move { - backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) - .await - .context("whilst backing up dir sources") - } - .instrument(bds_span), - ) + tokio::spawn(async_backtrace::frame!(async move { + backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) + .await + .context("whilst backing up dir sources") + } + .instrument(bds_span))) }; // (virtual source streams) Store to bloblog writers @@ -82,14 +80,12 @@ pub async fn backup( let new_unflushed_chunks = new_unflushed_chunks.clone(); let pwc = pwc.clone(); - tokio::spawn( - async move { - backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks) - .await - .context("whilst backing up virtual sources") - } - .instrument(bvs_span), - ) + tokio::spawn(async_backtrace::frame!(async move { + backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks) + .await + .context("whilst backing up virtual sources") + } + .instrument(bvs_span))) }; let (dir_sources_and_chunkmaps, virt_sources) = diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 926e00e..5bdba6f 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -31,6 +31,7 @@ use tracing_subscriber::filter::filter_fn; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::Layer; +use yama::debugging::register_sigusr1_backtrace_helper; use yama::get_hostname; use yama::open::open_lock_and_update_cache; @@ -171,6 +172,8 @@ pub async fn main() -> eyre::Result<()> { .with(indicatif_layer) .init(); + register_sigusr1_backtrace_helper(); + let args: DatmanArgs = dbg!(DatmanArgs::parse()); let descriptor = load_descriptor(&args.config) diff --git a/yama/Cargo.toml b/yama/Cargo.toml index e88212b..6139cf0 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -51,7 +51,8 @@ io-streams = "0.14.3" dust_style_filetree_display = "0.8.5" - +async-backtrace = "0.2.6" +signal-hook = "0.3.17" [dev-dependencies] maplit = "1.0.2" \ No newline at end of file diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index ec34327..0887fb6 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -34,6 +34,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::Layer; use users::{get_current_gid, get_current_uid}; +use yama::debugging::register_sigusr1_backtrace_helper; use yama::extract::flatten_treenode; use yama::init::pack_keyring; use yama::open::{ @@ -293,6 +294,8 @@ async fn main() -> eyre::Result<()> { .with(indicatif_layer) .init(); + register_sigusr1_backtrace_helper(); + match dbg!(YamaCommand::parse()) { YamaCommand::Init { sftp, diff --git a/yama/src/debugging.rs b/yama/src/debugging.rs new file mode 100644 index 0000000..0a07d0c --- /dev/null +++ b/yama/src/debugging.rs @@ -0,0 +1,20 @@ +use tokio::signal::unix::SignalKind; +use tracing::warn; + +/// Registers a signal handler on SIGUSR1 that dumps a backtrace of the tokio task tree. +/// +/// May be useful for debugging deadlocks etc. +pub fn register_sigusr1_backtrace_helper() { + tokio::spawn(async { + while let Some(()) = tokio::signal::unix::signal(SignalKind::user_defined1()) + .unwrap() + .recv() + .await + { + warn!( + "SIGUSR1 received; debug task backtrace:\n{}", + async_backtrace::taskdump_tree(false) + ); + } + }); +} diff --git a/yama/src/lib.rs b/yama/src/lib.rs index fbb9589..f8f452c 100644 --- a/yama/src/lib.rs +++ b/yama/src/lib.rs @@ -21,3 +21,5 @@ pub fn get_hostname() -> String { .into_string() .expect("Hostname string must be sensible.") } + +pub mod debugging; diff --git a/yama/src/retriever.rs b/yama/src/retriever.rs index 47bbe30..b744d38 100644 --- a/yama/src/retriever.rs +++ b/yama/src/retriever.rs @@ -108,11 +108,11 @@ pub fn create_fixed_retriever( rint.set_up_job(job_id, job); } - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = rint.retrieval_task().await { error!("retriever failed: {e:?}"); } - }); + })); Ok(results_rx) } @@ -165,13 +165,13 @@ impl RetrieverInternals { let (subjob_tx, subjob_rx) = flume::unbounded(); - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = Self::reader_task(bloblog_reader, subjob_rx, ack_tx, completion_tx).await { error!("error in reader for {bloblog_id:?}: {e:?}"); } - }); + })); self.open_files.insert( file_id, diff --git a/yama/src/retriever/decompressor.rs b/yama/src/retriever/decompressor.rs index 0720888..232dd01 100644 --- a/yama/src/retriever/decompressor.rs +++ b/yama/src/retriever/decompressor.rs @@ -58,11 +58,11 @@ impl PipelineDecompressor { processing: Default::default(), }; - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = pd.decompressor_manager().await { eprintln!("pipeline decompressor error: {e:?}"); } - }); + })); Ok(out_rx) } diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 354cbef..3c6f473 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -112,6 +112,7 @@ impl StoringBloblogWriters { impl StoringState { /// Acquire a bloblog writer handle, reusing the existing one in the slot if suitable. + #[async_backtrace::framed] async fn obtain_bloblog_writer<'a>( &mut self, slot: &'a mut Option>>>, @@ -136,6 +137,7 @@ impl StoringState { } /// For internal use only. + #[async_backtrace::framed] fn process_chunk( &mut self, chunk_bytes: &[u8], @@ -164,6 +166,7 @@ impl StoringState { Ok(()) } + #[async_backtrace::framed] fn store_full_slice_returning_chunks( &mut self, store_slice: &[u8], @@ -185,6 +188,7 @@ impl StoringState { }) } + #[async_backtrace::framed] fn store_full_stream_returning_chunks( &mut self, store_stream: impl Read, @@ -208,6 +212,7 @@ impl StoringState { }) } + #[async_backtrace::framed] pub fn store_full_slice( &mut self, store_slice: &[u8], @@ -241,6 +246,7 @@ impl StoringState { /// Stores a full stream (`Read`) and returns the recursive chunk ref plus the length of the /// stream. + #[async_backtrace::framed] pub fn store_full_stream( &mut self, store_stream: impl Read, @@ -278,6 +284,7 @@ impl StoringState { /// Stores a file, returning Ok(Some(...)) if fine, Ok(None) if the file doesn't exist (vanished) /// or Err(...) for any other error. +#[async_backtrace::framed] async fn store_file( file_path: &Path, storing_state: &mut StoringState, @@ -304,6 +311,7 @@ pub struct StoragePipeline { join_set: JoinSet>, } +#[async_backtrace::framed] async fn storage_pipeline_worker( job_rx: Receiver<(JobName, PathBuf)>, result_tx: Sender<(JobName, Option<(RecursiveChunkRef, u64)>)>, @@ -362,16 +370,14 @@ impl StoragePipeline { .context("failed to create storing state")?; // make a logging span for the Storage Pipeline Workers let spw_span = info_span!("spw", n = spw_num); - join_set.spawn( - async move { - let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await; - if let Err(ref err) = result { - error!("Error in SPW {err:?}"); - } - result + join_set.spawn(async_backtrace::frame!(async move { + let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await; + if let Err(ref err) = result { + error!("Error in SPW {err:?}"); } - .instrument(spw_span), - ); + result + } + .instrument(spw_span))); } Ok(( diff --git a/yama_localcache/Cargo.toml b/yama_localcache/Cargo.toml index 3ef059f..d12ccf8 100644 --- a/yama_localcache/Cargo.toml +++ b/yama_localcache/Cargo.toml @@ -12,4 +12,5 @@ eyre = "0.6.8" tokio = "1.27.0" yama_pile = { path = "../yama_pile" } yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } -itertools = "0.10.5" \ No newline at end of file +itertools = "0.10.5" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/yama_pile/Cargo.toml b/yama_pile/Cargo.toml index 40c6138..54c16a5 100644 --- a/yama_pile/Cargo.toml +++ b/yama_pile/Cargo.toml @@ -22,4 +22,5 @@ chrono = { version = "0.4.24", features = ["serde"] } uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } unix_mode = "0.1.3" -backtrace = "0.3.67" \ No newline at end of file +backtrace = "0.3.67" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/yama_pile/src/locks.rs b/yama_pile/src/locks.rs index 2ce48ec..76a2a97 100644 --- a/yama_pile/src/locks.rs +++ b/yama_pile/src/locks.rs @@ -181,7 +181,7 @@ impl LockHandle { // Good. Now start a background task for refreshing it as necessary. // TODO spawn this onto a joinset and then make sure we release locks at end of program... let lock_path2 = lock_path.clone(); - let lock_task_join_handle = Some(tokio::spawn(async move { + let lock_task_join_handle = Some(tokio::spawn(async_backtrace::frame!(async move { if let Err(err) = lock_renewal( provider, lock_path2, @@ -194,7 +194,7 @@ impl LockHandle { { error!("Lock renewal task failed: {err:?}"); } - })); + }))); break Ok(LockHandle { lock, diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index 9166407..98a4007 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -19,3 +19,4 @@ rand = "0.8.5" thiserror = "1.0.40" eyre = "0.6.8" tracing = "0.1.37" +async-backtrace = "0.2.6" From 96deadd270628b63a606a3fb4d5aeddc02ece9a5 Mon Sep 17 00:00:00 2001 From: Olivier Date: Sun, 13 Aug 2023 16:42:32 +0100 Subject: [PATCH 31/51] Remove old yama and datman from tree --- datman.old/Cargo.toml | 38 - datman.old/README.md | 13 - datman.old/src/bin/datman.rs | 468 ------ datman.old/src/commands.rs | 66 - datman.old/src/commands/backup.rs | 391 ----- datman.old/src/commands/extract.rs | 182 --- datman.old/src/commands/ibrowse.rs | 253 ---- datman.old/src/commands/ilabel.rs | 267 ---- datman.old/src/commands/prune.rs | 220 --- datman.old/src/commands/pushpull.rs | 306 ---- datman.old/src/commands/report.rs | 456 ------ datman.old/src/descriptor.rs | 116 -- datman.old/src/labelling.rs | 288 ---- datman.old/src/lib.rs | 12 - datman.old/src/remote.rs | 2 - .../src/remote/backup_source_requester.rs | 304 ---- .../src/remote/backup_source_responder.rs | 187 --- datman.old/src/tree.rs | 359 ----- yama.old/Cargo.toml | 44 - yama.old/README.md | 25 - yama.old/src/bin/yama.rs | 311 ---- yama.old/src/chunking.rs | 241 ---- yama.old/src/commands.rs | 183 --- yama.old/src/debug.rs | 213 --- yama.old/src/definitions.rs | 357 ----- yama.old/src/lib.rs | 10 - yama.old/src/operations.rs | 80 -- yama.old/src/operations/checking.rs | 438 ------ yama.old/src/operations/cleanup.rs | 64 - yama.old/src/operations/extracting.rs | 370 ----- yama.old/src/operations/legacy_pushpull.rs | 333 ----- yama.old/src/operations/storing.rs | 342 ----- yama.old/src/pile.rs | 393 ----- yama.old/src/pile/access_guard.rs | 141 -- yama.old/src/pile/compression.rs | 359 ----- yama.old/src/pile/integrity.rs | 160 --- yama.old/src/pile/local_sqlitebloblogs.rs | 1258 ----------------- yama.old/src/progress.rs | 42 - yama.old/src/remote.rs | 102 -- yama.old/src/remote/requester.rs | 495 ------- yama.old/src/remote/responder.rs | 374 ----- yama.old/src/tree.rs | 718 ---------- yama.old/src/utils.rs | 140 -- 43 files changed, 11121 deletions(-) delete mode 100644 datman.old/Cargo.toml delete mode 100644 datman.old/README.md delete mode 100644 datman.old/src/bin/datman.rs delete mode 100644 datman.old/src/commands.rs delete mode 100644 datman.old/src/commands/backup.rs delete mode 100644 datman.old/src/commands/extract.rs delete mode 100644 datman.old/src/commands/ibrowse.rs delete mode 100644 datman.old/src/commands/ilabel.rs delete mode 100644 datman.old/src/commands/prune.rs delete mode 100644 datman.old/src/commands/pushpull.rs delete mode 100644 datman.old/src/commands/report.rs delete mode 100644 datman.old/src/descriptor.rs delete mode 100644 datman.old/src/labelling.rs delete mode 100644 datman.old/src/lib.rs delete mode 100644 datman.old/src/remote.rs delete mode 100644 datman.old/src/remote/backup_source_requester.rs delete mode 100644 datman.old/src/remote/backup_source_responder.rs delete mode 100644 datman.old/src/tree.rs delete mode 100644 yama.old/Cargo.toml delete mode 100644 yama.old/README.md delete mode 100644 yama.old/src/bin/yama.rs delete mode 100644 yama.old/src/chunking.rs delete mode 100644 yama.old/src/commands.rs delete mode 100644 yama.old/src/debug.rs delete mode 100644 yama.old/src/definitions.rs delete mode 100644 yama.old/src/lib.rs delete mode 100644 yama.old/src/operations.rs delete mode 100644 yama.old/src/operations/checking.rs delete mode 100644 yama.old/src/operations/cleanup.rs delete mode 100644 yama.old/src/operations/extracting.rs delete mode 100644 yama.old/src/operations/legacy_pushpull.rs delete mode 100644 yama.old/src/operations/storing.rs delete mode 100644 yama.old/src/pile.rs delete mode 100644 yama.old/src/pile/access_guard.rs delete mode 100644 yama.old/src/pile/compression.rs delete mode 100644 yama.old/src/pile/integrity.rs delete mode 100644 yama.old/src/pile/local_sqlitebloblogs.rs delete mode 100644 yama.old/src/progress.rs delete mode 100644 yama.old/src/remote.rs delete mode 100644 yama.old/src/remote/requester.rs delete mode 100644 yama.old/src/remote/responder.rs delete mode 100644 yama.old/src/tree.rs delete mode 100644 yama.old/src/utils.rs diff --git a/datman.old/Cargo.toml b/datman.old/Cargo.toml deleted file mode 100644 index 58c910a..0000000 --- a/datman.old/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -name = "datman" -version = "0.7.0-alpha.1" -authors = ["Olivier 'reivilibre' "] -edition = "2021" -repository = "https://bics.ga/reivilibre/yama" -license = "GPL-3.0-or-later" - -description = "A chunked and deduplicated backup system using Yama" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -clap = { version = "3.1.18", features = ["derive"] } -crossbeam-channel = "0.5.1" -anyhow = "1.0" -thiserror = "1.0" -serde = { version = "1.0.104", features = ["derive"] } -serde_json = "1.0.64" -toml = "0.5.5" -log = "0.4" -env_logger = "0.7.1" -indicatif = "0.14.0" -arc-interner = "0.5.1" -zstd = "0.11.2" # 0.11.2+zstd.1.5.2 -byteorder = "1" -termion = "1.5.6" -glob = "0.3.0" -humansize = "1.1.1" -chrono = "0.4.19" -itertools = "0.10.1" -hostname = "0.3.1" -yama = { path = "../yama", version = "0.7.0-alpha.1" } -metrics = "0.17.1" -bare-metrics-recorder = { version = "0.1.0" } -comfy-table = "6.0.0-rc.1" -libc = "0.2.126" -io-streams = "0.11.0" \ No newline at end of file diff --git a/datman.old/README.md b/datman.old/README.md deleted file mode 100644 index 934b50e..0000000 --- a/datman.old/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# datman: DATa MANager - -Datman is a tool to make it easier to use Yama for backups. - -Features: - -* Chunk-based deduplication -* (optional) Compression using Zstd and a specifiable dictionary -* (optional) Encryption -* Ability to back up to remote machines over SSH -* Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels. - -See the documentation for more information. diff --git a/datman.old/src/bin/datman.rs b/datman.old/src/bin/datman.rs deleted file mode 100644 index 01f03ea..0000000 --- a/datman.old/src/bin/datman.rs +++ /dev/null @@ -1,468 +0,0 @@ -/* -This file is part of Yama. - -Yama is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Yama is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Yama. If not, see . -*/ - -use std::fs::File; -use std::io::{BufReader, BufWriter, Write}; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; - -use clap::Parser; -use env_logger::Env; - -use anyhow::{bail, Context}; -use bare_metrics_recorder::recording::BareMetricsRecorderCore; -use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc}; -use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination}; -use datman::commands::ilabel::interactive_labelling_session; -use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy}; -use datman::commands::{init_descriptor, pushpull}; -use datman::descriptor::{load_descriptor, SourceDescriptor}; -use datman::get_hostname; -use datman::remote::backup_source_requester::backup_remote_source_to_destination; -use datman::remote::backup_source_responder; -use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; -use itertools::Itertools; -use log::info; -use std::str::FromStr; -use yama::commands::load_pile_descriptor; -use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel}; - -pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m"; -pub const BOLD: &str = "\x1b[1m"; -pub const BOLD_OFF: &str = "\x1b[22m"; -pub const WHITE: &str = "\x1b[37m"; -pub const RED: &str = "\x1b[31m"; -pub const GREEN: &str = "\x1b[32m"; - -#[derive(Parser)] -pub enum DatmanCommand { - /// Initialise a datman descriptor in this directory. - Init {}, - - /// - Status {}, - - #[clap(name = "ilabel")] - InteractiveLabelling { - /// Name of the source to label. - source_name: String, - }, - - #[clap(name = "ibrowse")] - InteractiveBrowsing { - /// Name of the source to browse. - source_name: String, - }, - - /// Back up a source locally or over the network. - BackupOne { - /// Name of the source to back up. - source_name: String, - - /// Name of the destination to back up to. - destination_name: String, - }, - - BackupAll { - /// Name of the remote to back up. - /// Special value 'self' means 'this host only'. - /// Special value 'all' means 'all hosts'. - remote_name: String, - - /// Name of the destination to back up to. - destination_name: String, - }, - - Extract { - /// Name of the 'source' to extract - /// Omit for 'all'. - #[clap(short)] - source_name: Option, - - /// If specified, will get the first backup after this date. - #[clap(long)] - after: Option, - - /// If specified, will get the last backup before this date. The default behaviour is to get the latest. - #[clap(long)] - before: Option, - - /// If not specified, time-restricted extractions that don't have a pointer for every source - /// will instead lead to an error. - #[clap(long)] - accept_partial: bool, // TODO unimplemented. - - /// Name of the pile to extract from - pile_name: String, - - /// Place to extract to. - destination: PathBuf, - - /// Skip applying metadata. Might be needed to extract without superuser privileges. - #[clap(long)] - skip_metadata: bool, - }, - - Report { - /// Name of the pile to report on. - pile_name: String, - - /// Don't summarise months. - #[clap(long)] - individual: bool, - }, - - #[clap(name = "_backup_source_responder")] - InternalBackupSourceResponder, - - /// Pulls all pointers from a remote pile to a local pile. - /// Does not yet support label filtering, but will do in the future. - Pull { - /// e.g. 'myserver:main' - remote_and_remote_pile: String, - - pile_name: String, - }, - - /// Applies a retention policy by removing unnecessary backups. - /// Does not reclaim space by itself: use - /// `yama check --apply-gc --shallow` - /// & `yama compact` - /// to do that. - Prune { pile_name: String }, - - #[clap(name = "_pull_responder_offerer")] - InternalPullResponderOfferer { - datman_path: PathBuf, - pile_name: String, - }, -} - -pub struct HumanDateTime(pub DateTime); - -impl FromStr for HumanDateTime { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - if let Ok(date_only) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { - let local_date = chrono::offset::Local.from_local_date(&date_only).unwrap(); - let local_datetime = local_date.and_hms(0, 0, 0); - Ok(HumanDateTime(local_datetime)) - } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") { - let local_datetime = chrono::offset::Local - .from_local_datetime(&date_and_time) - .unwrap(); - Ok(HumanDateTime(local_datetime)) - } else if let Ok(date_and_time) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { - let local_datetime = chrono::offset::Local - .from_local_datetime(&date_and_time) - .unwrap(); - Ok(HumanDateTime(local_datetime)) - } else { - bail!("Couldn't parse using either format. Use one of: 2021-05-16 OR 2021-05-16T17:42:14 OR 2021-05-16 17:42:14"); - } - } -} - -fn with_obvious_successfail_message(result: anyhow::Result) -> anyhow::Result { - match &result { - Ok(_) => { - eprintln!("Operation {}successful{}.", GREEN, WHITE); - } - Err(error) => { - eprintln!("{:?}", error); - eprintln!( - "{}{}Operation {}{}FAILED{}!{}", - FAILURE_SYMBOL_OBNOXIOUS_FLASHING, WHITE, RED, BOLD, WHITE, BOLD_OFF - ); - } - }; - result -} - -fn with_exitcode(result: anyhow::Result) { - match &result { - Ok(_) => { - std::process::exit(0); - } - Err(_) => { - std::process::exit(5); - } - }; -} - -fn main() -> anyhow::Result<()> { - env_logger::Builder::from_env(Env::default().default_filter_or("info")).init(); - - let now = Utc::now(); - - let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!( - "/tmp/datman_{}.baremetrics", - now.format("%F_%H%M%S") - ))?) - .start("datman".to_string())?; - shard.install_as_metrics_recorder()?; - - let opts: DatmanCommand = DatmanCommand::parse(); - - match opts { - DatmanCommand::Init {} => { - init_descriptor(Path::new(".")).unwrap(); - } - DatmanCommand::Status { .. } => { - unimplemented!(); - } - DatmanCommand::InteractiveLabelling { source_name } => { - interactive_labelling_session(Path::new("."), source_name)?; - } - DatmanCommand::InteractiveBrowsing { source_name } => { - datman::commands::ibrowse::session(Path::new("."), source_name)?; - } - DatmanCommand::BackupOne { - source_name, - destination_name, - } => { - let my_hostname = get_hostname(); - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let source = &descriptor.sources[&source_name]; - let destination = &descriptor.piles[&destination_name]; - - let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("storing"); - - let is_remote = if let SourceDescriptor::DirectorySource { hostname, .. } = source { - hostname != &my_hostname - } else { - false - }; - - let result = if is_remote { - backup_remote_source_to_destination( - source, - destination, - &descriptor, - Path::new("."), - &source_name, - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - pbar, - ) - } else { - backup_source_to_destination( - source, - destination, - &descriptor, - Path::new("."), - &source_name, - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - &mut pbar, - ) - }; - with_exitcode(with_obvious_successfail_message(result)) - } - DatmanCommand::BackupAll { - remote_name, - destination_name, - } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let destination = &descriptor.piles[&destination_name]; - - let mut pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("storing"); - - backup_all_sources_to_destination( - destination, - &descriptor, - Path::new("."), - &destination_name, - yama::utils::get_number_of_workers("YAMA_CHUNKERS"), - &mut pbar, - remote_name, - ) - .unwrap(); - } - DatmanCommand::Extract { - source_name, - after, - before, - accept_partial, - pile_name, - destination, - skip_metadata, - } => { - if !accept_partial { - bail!("Specify --accept-partial until running without it is supported."); - } - - if after.is_some() && before.is_some() { - bail!("Can't specify both before and after!"); - } - - let before = before.map(|dt| dt.0.with_timezone(&Utc)); - let after = after.map(|dt| dt.0.with_timezone(&Utc)); - - datman::commands::extract::extract( - &destination, - Path::new("."), - source_name.as_ref().map(|x| x.as_ref()), - &pile_name, - before.into(), - after.into(), - !skip_metadata, - !skip_metadata, - !skip_metadata, - yama::utils::get_number_of_workers("YAMA_EXTRACTORS"), - )?; - } - - DatmanCommand::InternalBackupSourceResponder => { - info!("Datman responder at {:?}", std::env::current_exe()?); - backup_source_responder::handler_stdio()?; - } - - DatmanCommand::Report { - pile_name, - individual, - } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let destination = &descriptor.piles[&pile_name]; - let report = - datman::commands::report::generate_report(destination, &descriptor, !individual)?; - - datman::commands::report::print_filesystem_space(&destination.path)?; - datman::commands::report::print_report(&report)?; - } - DatmanCommand::Pull { - remote_and_remote_pile, - pile_name, - } => { - let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile - .split(':') - .collect_tuple() - .context("You must pull from a remote pile specified as remote:path:pile.")?; - - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let source = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&source.path)?; - let (pile, bypass_raw_pile) = open_pile_with_work_bypass( - &source.path, - &pile_desc, - BypassLevel::CompressionBypass, - )?; - - let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10)); - pbar.set_style( - ProgressStyle::default_bar().template( - "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ), - ); - pbar.set_message("pulling"); - - let remote_host_descriptor = descriptor - .remote_hosts - .get(hostname) - .ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?; - - let mut connection = Command::new("ssh") - .arg(&remote_host_descriptor.user_at_host) - .arg("--") - .arg( - &remote_host_descriptor - .path_to_datman - .as_ref() - .map(|x| x.as_str()) - .unwrap_or("datman"), - ) - .arg("_pull_responder_offerer") - .arg(remote_datman_path) - .arg(remote_pile_name) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()) - .spawn()?; - - let mut reader = BufReader::new(connection.stdout.take().unwrap()); - let mut writer = BufWriter::new(connection.stdin.take().unwrap()); - - pushpull::accepting_side( - &pile, - &bypass_raw_pile, - &mut reader, - &mut writer, - Box::new(pbar), - )?; - } - - DatmanCommand::Prune { pile_name } => { - let descriptor = load_descriptor(Path::new(".")).unwrap(); - let retention_policy = descriptor - .retention - .context("No retention policy set in descriptor")?; - let dest_desc = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&dest_desc.path)?; - - prune_with_retention_policy( - &dest_desc.path, - &pile_desc, - &RetentionPolicy::from_config(retention_policy), - true, - )?; - } - - DatmanCommand::InternalPullResponderOfferer { - datman_path, - pile_name, - } => { - let descriptor = load_descriptor(&datman_path).unwrap(); - let source = &descriptor.piles[&pile_name]; - - let pile_desc = load_pile_descriptor(&source.path)?; - let (pile, bypass_raw_pile) = open_pile_with_work_bypass( - &source.path, - &pile_desc, - BypassLevel::CompressionBypass, - )?; - - let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?); - let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?); - - pushpull::offering_side( - &pile, - &bypass_raw_pile, - &mut stdin, - &mut stdout, - Box::new(()), - )?; - - stdout.flush()?; - } - } - Ok(()) -} diff --git a/datman.old/src/commands.rs b/datman.old/src/commands.rs deleted file mode 100644 index 58d171d..0000000 --- a/datman.old/src/commands.rs +++ /dev/null @@ -1,66 +0,0 @@ -/* -This file is part of Yama. - -Yama is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Yama is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Yama. If not, see . -*/ - -use std::collections::HashMap; -use std::fs::File; -use std::io::Write; -use std::path::Path; - -use crate::descriptor::{Descriptor, RetentionPolicyConfig, SourceDescriptor}; - -pub mod backup; -pub mod extract; -pub mod ibrowse; -pub mod ilabel; -pub mod prune; -pub mod pushpull; -pub mod report; - -pub fn init_descriptor(path: &Path) -> anyhow::Result<()> { - std::fs::create_dir_all(path)?; - std::fs::create_dir(path.join("labelling"))?; - - let mut datman_toml_file = File::create(path.join("datman.toml"))?; - - let source: HashMap = Default::default(); - /*source.insert("demo1".to_owned(), SourceDescriptor::DirectorySource { - hostname: "demohost1".to_string(), - directory: PathBuf::from("/dev/null") - }); - source.insert("demo2".to_owned(), SourceDescriptor::VirtualSource { blah: "".to_string(), label: "wat".to_string() });*/ - - let bytes = toml::to_vec(&Descriptor { - labels: vec![ - "pocket".to_owned(), - "precious".to_owned(), - "bulky".to_owned(), - ], - sources: source, - piles: Default::default(), - remote_hosts: Default::default(), - retention: Some(RetentionPolicyConfig { - daily: 14, - weekly: 12, - monthly: 24, - yearly: 9001, - }), - })?; - - datman_toml_file.write_all(&bytes)?; - - Ok(()) -} diff --git a/datman.old/src/commands/backup.rs b/datman.old/src/commands/backup.rs deleted file mode 100644 index e701445..0000000 --- a/datman.old/src/commands/backup.rs +++ /dev/null @@ -1,391 +0,0 @@ -/* -This file is part of Yama. - -Yama is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Yama is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Yama. If not, see . -*/ - -use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor, VirtualSourceKind}; -use crate::get_hostname; -use crate::labelling::{ - label_node, load_labelling_rules, str_to_label, Label, LabellingRules, State, -}; -use crate::tree::{scan, FileTree, FileTree1}; -use anyhow::{anyhow, bail}; -use arc_interner::ArcIntern; -use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; -use log::{info, warn}; -use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; -use std::io::Write; -use std::path::Path; -use std::process::{Child, Command, Stdio}; -use std::sync::Arc; -use yama::chunking::SENSIBLE_THRESHOLD; -use yama::commands::{load_pile_descriptor, open_pile, store_tree_node}; -use yama::definitions::{ - FilesystemOwnership, FilesystemPermissions, PointerData, RecursiveChunkRef, RootTreeNode, - TreeNode, -}; -use yama::progress::ProgressTracker; - -pub const POINTER_DATETIME_FORMAT: &'static str = "%F_%T"; -pub const POINTER_FIELD_SEPARATOR: char = '+'; - -pub fn get_pointer_name_at(source_name: &str, datetime: DateTime) -> String { - format!( - "{}{}{}", - source_name, - POINTER_FIELD_SEPARATOR, - datetime.format(POINTER_DATETIME_FORMAT).to_string() - ) -} - -pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime)> { - let (source_name, date_time_str) = pointer_name.rsplit_once("+")?; - let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?; - let date_time = Utc.from_utc_datetime(&date_time); - Some((source_name.to_owned(), date_time)) -} - -pub fn open_stdout_backup_process( - extra_args: &HashMap, - program_name: &str, -) -> anyhow::Result { - let mut child = Command::new(format!("datman-helper-{}-backup", program_name)) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()) - .stdin(Stdio::piped()) - .spawn()?; - let mut child_stdin = child.stdin.as_mut().unwrap(); - serde_json::to_writer(&mut child_stdin, extra_args)?; - child_stdin.flush()?; - // close stdin! - child.stdin = None; - Ok(child) -} - -pub fn label_filter_and_convert( - tree: FileTree1<()>, - descriptor: &Descriptor, - source_name: &str, - rules: &LabellingRules, - dest: &DestPileDescriptor, -) -> anyhow::Result> { - info!("Labelling."); - let mut tree = tree.replace_meta(&None); - let labels = descriptor - .labels - .iter() - .map(|l| Label(ArcIntern::new(l.clone()))) - .collect(); - label_node("".to_owned(), None, &mut tree, &labels, rules)?; - - let included_labels: HashSet