Start an attempt to create a high-performance chunking pipeline
This commit is contained in:
parent
ccb50f2dd9
commit
cc60ae88a4
|
@ -185,16 +185,6 @@ dependencies = [
|
||||||
"tiny-keccak",
|
"tiny-keccak",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-channel"
|
|
||||||
version = "0.4.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87"
|
|
||||||
dependencies = [
|
|
||||||
"crossbeam-utils 0.7.2",
|
|
||||||
"maybe-uninit",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
|
@ -202,7 +192,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"crossbeam-utils 0.8.5",
|
"crossbeam-utils",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -213,7 +203,7 @@ checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"crossbeam-epoch",
|
"crossbeam-epoch",
|
||||||
"crossbeam-utils 0.8.5",
|
"crossbeam-utils",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -223,23 +213,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
|
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"crossbeam-utils 0.8.5",
|
"crossbeam-utils",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"memoffset",
|
"memoffset",
|
||||||
"scopeguard",
|
"scopeguard",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-utils"
|
|
||||||
version = "0.7.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
"cfg-if 0.1.10",
|
|
||||||
"lazy_static",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-utils"
|
name = "crossbeam-utils"
|
||||||
version = "0.8.5"
|
version = "0.8.5"
|
||||||
|
@ -275,7 +254,7 @@ dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"crossbeam-channel 0.4.4",
|
"crossbeam-channel",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"glob",
|
"glob",
|
||||||
"hostname",
|
"hostname",
|
||||||
|
@ -292,6 +271,17 @@ dependencies = [
|
||||||
"zstd",
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derivative"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dirs-next"
|
name = "dirs-next"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
|
@ -565,12 +555,6 @@ version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
|
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "maybe-uninit"
|
|
||||||
version = "2.0.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.4.0"
|
version = "2.4.0"
|
||||||
|
@ -792,9 +776,9 @@ version = "1.9.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
|
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crossbeam-channel 0.5.1",
|
"crossbeam-channel",
|
||||||
"crossbeam-deque",
|
"crossbeam-deque",
|
||||||
"crossbeam-utils 0.8.5",
|
"crossbeam-utils",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
]
|
]
|
||||||
|
@ -1205,8 +1189,9 @@ dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"crossbeam-channel 0.4.4",
|
"crossbeam-channel",
|
||||||
"crossbeam-utils 0.8.5",
|
"crossbeam-utils",
|
||||||
|
"derivative",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"fastcdc",
|
"fastcdc",
|
||||||
"glob",
|
"glob",
|
||||||
|
|
|
@ -12,7 +12,7 @@ description = "A chunked and deduplicated backup system using Yama"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = "= 3.0.0-beta.5"
|
clap = "= 3.0.0-beta.5"
|
||||||
crossbeam-channel = "0.4"
|
crossbeam-channel = "0.5.1"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
|
|
|
@ -20,8 +20,8 @@ twox-hash = "1.5.0"
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
serde_bare = "0.3.0"
|
serde_bare = "0.3.0"
|
||||||
users = "0.9.1"
|
users = "0.9.1"
|
||||||
crossbeam-channel = "0.4"
|
crossbeam-channel = "0.5.1"
|
||||||
crossbeam-utils = "0.8.1"
|
crossbeam-utils = "0.8.5"
|
||||||
toml = "0.5.5"
|
toml = "0.5.5"
|
||||||
glob = "0.3.0"
|
glob = "0.3.0"
|
||||||
nix = "0.17.0"
|
nix = "0.17.0"
|
||||||
|
@ -38,6 +38,7 @@ rayon = "1.5.0"
|
||||||
rusqlite = "0.24.2"
|
rusqlite = "0.24.2"
|
||||||
chrono = "0.4.19"
|
chrono = "0.4.19"
|
||||||
rustyline = "7.1.0"
|
rustyline = "7.1.0"
|
||||||
|
derivative = "2.2.0"
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|
|
@ -18,8 +18,9 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
use crate::chunking::RecursiveUnchunker;
|
use crate::chunking::RecursiveUnchunker;
|
||||||
use crate::commands::retrieve_tree_node;
|
use crate::commands::retrieve_tree_node;
|
||||||
use crate::definitions::{ChunkId, TreeNode};
|
use crate::definitions::{ChunkId, TreeNode};
|
||||||
use crate::pile::{Keyspace, Pile, RawPile};
|
use crate::pile::{ControllerMessage, Keyspace, Pile, RawPile, StoragePipelineSettings};
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
|
use crossbeam_channel::Sender;
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use log::{error, info, warn};
|
use log::{error, info, warn};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
@ -121,6 +122,15 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
|
||||||
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
||||||
self.underlying.check_lowlevel()
|
self.underlying.check_lowlevel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
self.underlying
|
||||||
|
.build_storage_pipeline(settings, controller_send)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
|
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
|
||||||
|
|
|
@ -21,6 +21,7 @@ use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::chunking::calculate_chunkid;
|
use crate::chunking::calculate_chunkid;
|
||||||
use crate::definitions::{ChunkId, PointerData};
|
use crate::definitions::{ChunkId, PointerData};
|
||||||
|
use crossbeam_channel::Sender;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::sync::{Arc, Condvar, Mutex};
|
use std::sync::{Arc, Condvar, Mutex};
|
||||||
|
@ -75,7 +76,21 @@ pub struct DebugStatistics {
|
||||||
pub total_chunk_size: u64,
|
pub total_chunk_size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait RawPile: Send + Sync + Debug {
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct StoragePipelineSettings {
|
||||||
|
pub num_compressors: u32,
|
||||||
|
pub compressor_input_bound: u32,
|
||||||
|
pub writer_input_bound: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum ControllerMessage {
|
||||||
|
Failure {
|
||||||
|
worker_id: Arc<String>,
|
||||||
|
error_message: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait RawPile: Send + Sync + Debug + 'static {
|
||||||
// TODO expose verification errors?
|
// TODO expose verification errors?
|
||||||
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool>;
|
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool>;
|
||||||
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
|
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
|
||||||
|
@ -99,6 +114,12 @@ pub trait RawPile: Send + Sync + Debug {
|
||||||
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RawPile for Box<dyn RawPile> {
|
impl RawPile for Box<dyn RawPile> {
|
||||||
|
@ -129,6 +150,15 @@ impl RawPile for Box<dyn RawPile> {
|
||||||
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
||||||
self.as_ref().debug_statistics()
|
self.as_ref().debug_statistics()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
self.as_ref()
|
||||||
|
.build_storage_pipeline(settings, controller_send)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<RP: RawPile> RawPile for Arc<RP> {
|
impl<RP: RawPile> RawPile for Arc<RP> {
|
||||||
|
@ -159,6 +189,15 @@ impl<RP: RawPile> RawPile for Arc<RP> {
|
||||||
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
||||||
self.as_ref().debug_statistics()
|
self.as_ref().debug_statistics()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
self.as_ref()
|
||||||
|
.build_storage_pipeline(settings, controller_send)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -21,14 +21,16 @@ use std::thread::JoinHandle;
|
||||||
|
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
|
use derivative::Derivative;
|
||||||
use log::error;
|
use log::error;
|
||||||
use zstd::block::{Compressor, Decompressor};
|
use zstd::block::{Compressor, Decompressor};
|
||||||
|
|
||||||
use crate::pile::{DebugStatistics, Keyspace, RawPile};
|
use crate::definitions::ChunkId;
|
||||||
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
|
|
||||||
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct CompressionSettings {
|
pub struct CompressionSettings {
|
||||||
/// Raw dictionary to pass to Zstd for compression and decompression
|
/// Raw dictionary to pass to Zstd for compression and decompression
|
||||||
pub dictionary: Arc<Vec<u8>>,
|
pub dictionary: Arc<Vec<u8>>,
|
||||||
|
@ -40,11 +42,15 @@ pub struct CompressionSettings {
|
||||||
pub num_decompressors: u32,
|
pub num_decompressors: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Derivative)]
|
||||||
|
#[derivative(Clone(bound = ""))]
|
||||||
|
// we need to use derivative's Clone impl because Arc<R> causes R to have a bound on Clone
|
||||||
|
// even though that's not needed. https://github.com/rust-lang/rust/issues/26925
|
||||||
pub struct RawPileCompressor<R: RawPile> {
|
pub struct RawPileCompressor<R: RawPile> {
|
||||||
underlying: R,
|
underlying: Arc<R>,
|
||||||
compressor: Sender<(Vec<u8>, Sender<Vec<u8>>)>,
|
compressor: Option<Sender<(Vec<u8>, Sender<Vec<u8>>)>>,
|
||||||
decompressor: Sender<(Vec<u8>, Sender<Vec<u8>>)>,
|
decompressor: Option<Sender<(Vec<u8>, Sender<Vec<u8>>)>>,
|
||||||
|
settings: Arc<CompressionSettings>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: RawPile> RawPileCompressor<R> {
|
impl<R: RawPile> RawPileCompressor<R> {
|
||||||
|
@ -52,6 +58,20 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
underlying: R,
|
underlying: R,
|
||||||
settings: CompressionSettings,
|
settings: CompressionSettings,
|
||||||
) -> anyhow::Result<(Self, Vec<JoinHandle<()>>)> {
|
) -> anyhow::Result<(Self, Vec<JoinHandle<()>>)> {
|
||||||
|
if settings.num_compressors == 0 && settings.num_decompressors == 0 {
|
||||||
|
// optimisation for when we're only building a pipeline: we don't want to
|
||||||
|
return Ok((
|
||||||
|
RawPileCompressor {
|
||||||
|
underlying: Arc::new(underlying),
|
||||||
|
compressor: None,
|
||||||
|
decompressor: None,
|
||||||
|
|
||||||
|
settings: Arc::new(settings),
|
||||||
|
},
|
||||||
|
Vec::with_capacity(0),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let (com_s, com_r) = crossbeam_channel::bounded(4);
|
let (com_s, com_r) = crossbeam_channel::bounded(4);
|
||||||
let (dec_s, dec_r) = crossbeam_channel::bounded(4);
|
let (dec_s, dec_r) = crossbeam_channel::bounded(4);
|
||||||
|
|
||||||
|
@ -85,9 +105,10 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
RawPileCompressor {
|
RawPileCompressor {
|
||||||
underlying,
|
underlying: Arc::new(underlying),
|
||||||
compressor: com_s,
|
compressor: Some(com_s),
|
||||||
decompressor: dec_s,
|
decompressor: Some(dec_s),
|
||||||
|
settings: Arc::new(settings),
|
||||||
},
|
},
|
||||||
handles,
|
handles,
|
||||||
))
|
))
|
||||||
|
@ -124,6 +145,8 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
fn decompress(&self, data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
fn decompress(&self, data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||||
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
|
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
|
||||||
self.decompressor
|
self.decompressor
|
||||||
|
.as_ref()
|
||||||
|
.expect("No decompressors configured")
|
||||||
.send((data.to_vec(), ret_s))
|
.send((data.to_vec(), ret_s))
|
||||||
.or(Err(anyhow!("couldn't send to decompressor")))?;
|
.or(Err(anyhow!("couldn't send to decompressor")))?;
|
||||||
|
|
||||||
|
@ -133,11 +156,23 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
fn compress(&self, compressed_data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
fn compress(&self, compressed_data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||||
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
|
let (ret_s, ret_r) = crossbeam_channel::bounded(0);
|
||||||
self.compressor
|
self.compressor
|
||||||
|
.as_ref()
|
||||||
|
.expect("No compressors configured")
|
||||||
.send((compressed_data.to_vec(), ret_s))
|
.send((compressed_data.to_vec(), ret_s))
|
||||||
.or(Err(anyhow!("couldn't send to compressor")))?;
|
.or(Err(anyhow!("couldn't send to compressor")))?;
|
||||||
|
|
||||||
Ok(ret_r.recv().or(Err(anyhow!("couldn't receive result")))?)
|
Ok(ret_r.recv().or(Err(anyhow!("couldn't receive result")))?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_pipeline_worker(
|
||||||
|
&self,
|
||||||
|
next_stage: Sender<(ChunkId, Vec<u8>)>,
|
||||||
|
input: Receiver<(ChunkId, Vec<u8>)>,
|
||||||
|
controller_send: &Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
todo!();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: RawPile> RawPile for RawPileCompressor<R> {
|
impl<R: RawPile> RawPile for RawPileCompressor<R> {
|
||||||
|
@ -177,4 +212,40 @@ impl<R: RawPile> RawPile for RawPileCompressor<R> {
|
||||||
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
||||||
self.underlying.debug_statistics()
|
self.underlying.debug_statistics()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
// this one should have a few threads behind it! yarr!
|
||||||
|
let subsequent_pipeline = self
|
||||||
|
.underlying
|
||||||
|
.build_storage_pipeline(settings.clone(), controller_send.clone())?;
|
||||||
|
|
||||||
|
let (input_to_this_stage, receiver) =
|
||||||
|
crossbeam_channel::bounded(settings.compressor_input_bound as usize);
|
||||||
|
|
||||||
|
for compressor_number in 0..settings.num_compressors {
|
||||||
|
let subsequent_pipeline = subsequent_pipeline.clone();
|
||||||
|
let receiver = receiver.clone();
|
||||||
|
let controller_send = controller_send.clone();
|
||||||
|
let this = (*self).clone();
|
||||||
|
thread::spawn(move || {
|
||||||
|
let worker_id = Arc::new(format!("compressor-{}", compressor_number));
|
||||||
|
if let Err(err) =
|
||||||
|
this.storage_pipeline_worker(subsequent_pipeline, receiver, &controller_send)
|
||||||
|
{
|
||||||
|
controller_send
|
||||||
|
.send(ControllerMessage::Failure {
|
||||||
|
worker_id,
|
||||||
|
error_message: format!("err {:?}", err),
|
||||||
|
})
|
||||||
|
.expect("This is BAD: failed to send failure message to controller.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(input_to_this_stage)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,9 @@ use log::warn;
|
||||||
use sodiumoxide::crypto::secretbox;
|
use sodiumoxide::crypto::secretbox;
|
||||||
use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
||||||
|
|
||||||
use crate::pile::{Keyspace, RawPile};
|
use crate::definitions::ChunkId;
|
||||||
|
use crate::pile::{ControllerMessage, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
|
use crossbeam_channel::Sender;
|
||||||
|
|
||||||
/// A RawPile that provides encryption of chunk contents.
|
/// A RawPile that provides encryption of chunk contents.
|
||||||
/// Please note that keys are not currently encrypted, so this scheme is not CPA-secure.
|
/// Please note that keys are not currently encrypted, so this scheme is not CPA-secure.
|
||||||
|
@ -109,4 +111,12 @@ impl<R: RawPile> RawPile for RawPileEncryptor<R> {
|
||||||
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
||||||
self.underlying.check_lowlevel()
|
self.underlying.check_lowlevel()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,9 +19,10 @@ use std::hash::Hasher;
|
||||||
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::definitions::XXH64_SEED;
|
use crate::definitions::{ChunkId, XXH64_SEED};
|
||||||
use crate::pile::{DebugStatistics, Keyspace, RawPile};
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
use crate::utils::bytes_to_hexstring;
|
use crate::utils::bytes_to_hexstring;
|
||||||
|
use crossbeam_channel::Sender;
|
||||||
|
|
||||||
/// This RawPile enables checking the integrity of stored chunks.
|
/// This RawPile enables checking the integrity of stored chunks.
|
||||||
/// This is done by storing a hash along with the chunk contents, which can later be verified.
|
/// This is done by storing a hash along with the chunk contents, which can later be verified.
|
||||||
|
@ -113,4 +114,12 @@ impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
|
||||||
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
fn debug_statistics(&self) -> anyhow::Result<Option<DebugStatistics>> {
|
||||||
self.underlying.debug_statistics()
|
self.underlying.debug_statistics()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,11 +18,11 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::{HashMap, VecDeque};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::fs;
|
|
||||||
use std::fs::{read_dir, File, OpenOptions};
|
use std::fs::{read_dir, File, OpenOptions};
|
||||||
use std::io::{Read, Seek, SeekFrom, Write};
|
use std::io::{Read, Seek, SeekFrom, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{Arc, Condvar, Mutex};
|
use std::sync::{Arc, Condvar, Mutex};
|
||||||
|
use std::{fs, thread};
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
@ -32,9 +32,11 @@ use rusqlite::{params, Error};
|
||||||
use rusqlite::{Connection, OptionalExtension};
|
use rusqlite::{Connection, OptionalExtension};
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
use crate::definitions::ChunkId;
|
||||||
use crate::pile::{DebugStatistics, Keyspace, RawPile};
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
use crate::utils::bytes_to_hexstring;
|
use crate::utils::bytes_to_hexstring;
|
||||||
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
use rusqlite::ffi::ErrorCode::ConstraintViolation;
|
use rusqlite::ffi::ErrorCode::ConstraintViolation;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
/// Bloblogs will not be reused if they are already 2 GiB large.
|
/// Bloblogs will not be reused if they are already 2 GiB large.
|
||||||
pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
||||||
|
@ -193,11 +195,11 @@ impl Inner {
|
||||||
/// Because random access is important for performance, an additional SQLite database is used
|
/// Because random access is important for performance, an additional SQLite database is used
|
||||||
/// as a map from chunk IDs to their positions in the blob logs, allowing readers to seek to the
|
/// as a map from chunk IDs to their positions in the blob logs, allowing readers to seek to the
|
||||||
/// appropriate place and read a chunk randomly.
|
/// appropriate place and read a chunk randomly.
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct SqliteBloblogPile {
|
pub struct SqliteBloblogPile {
|
||||||
inner: Arc<Mutex<Inner>>,
|
inner: Arc<Mutex<Inner>>,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
writers_reach_zero: Condvar,
|
writers_reach_zero: Arc<Condvar>,
|
||||||
should_batch_pointer_writes: bool,
|
should_batch_pointer_writes: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,6 +369,79 @@ impl SqliteBloblogPile {
|
||||||
let mut inner = self.inner.lock().unwrap();
|
let mut inner = self.inner.lock().unwrap();
|
||||||
inner.flush()
|
inner.flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn storage_pipeline_worker(
|
||||||
|
&self,
|
||||||
|
incoming: Receiver<(ChunkId, Vec<u8>)>,
|
||||||
|
controller_sender: &Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
// can hold on to the same bloblog as long as we'd like!
|
||||||
|
const POINTERS_BUFFER_SIZE: usize = 256;
|
||||||
|
let mut pointers_buffered = Vec::with_capacity(POINTERS_BUFFER_SIZE);
|
||||||
|
|
||||||
|
fn flush_pointers(
|
||||||
|
this: &SqliteBloblogPile,
|
||||||
|
pointers_buffered: &mut Vec<BloblogPointer>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_blob(
|
||||||
|
this: &SqliteBloblogPile,
|
||||||
|
bloblog_id: BloblogId,
|
||||||
|
bloblog: &mut Bloblog,
|
||||||
|
pointers_buffered: &mut Vec<BloblogPointer>,
|
||||||
|
(chunk_id, chunk): (ChunkId, Vec<u8>),
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let offset = bloblog.write_blob(&chunk_id, &chunk)?;
|
||||||
|
let pointer = BloblogPointer {
|
||||||
|
bloblog: bloblog_id,
|
||||||
|
offset,
|
||||||
|
};
|
||||||
|
pointers_buffered.push(pointer);
|
||||||
|
|
||||||
|
if pointers_buffered.len() >= POINTERS_BUFFER_SIZE {
|
||||||
|
flush_pointers(this, pointers_buffered)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Ok(chunk) = incoming.recv() {
|
||||||
|
let (bloblog_id, bloglog_mutex) = self.get_writing_bloblog()?;
|
||||||
|
let mut bloblog = bloglog_mutex.lock().expect("Failed to lock bloblog?");
|
||||||
|
write_blob(
|
||||||
|
self,
|
||||||
|
bloblog_id,
|
||||||
|
&mut bloblog,
|
||||||
|
&mut pointers_buffered,
|
||||||
|
chunk,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
while let Ok(chunk) = incoming.recv_timeout(Duration::from_secs(5)) {
|
||||||
|
write_blob(
|
||||||
|
self,
|
||||||
|
bloblog_id,
|
||||||
|
&mut bloblog,
|
||||||
|
&mut pointers_buffered,
|
||||||
|
chunk,
|
||||||
|
)?;
|
||||||
|
if bloblog.filesize()? > MAX_BLOBLOG_REUSE_SIZE {
|
||||||
|
// get a new bloblog to write with.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(bloblog);
|
||||||
|
self.return_writing_bloblog(bloblog_id, bloglog_mutex)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
flush_pointers(self, &mut pointers_buffered)?;
|
||||||
|
|
||||||
|
// we MUST have flushed ALL the pointers by now.
|
||||||
|
assert!(pointers_buffered.is_empty());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for SqliteBloblogPile {
|
impl Drop for SqliteBloblogPile {
|
||||||
|
@ -594,6 +669,29 @@ impl RawPile for SqliteBloblogPile {
|
||||||
total_chunk_size,
|
total_chunk_size,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
let (sender, incoming) = crossbeam_channel::bounded(settings.writer_input_bound as usize);
|
||||||
|
|
||||||
|
let this = self.clone();
|
||||||
|
|
||||||
|
thread::spawn(move || {
|
||||||
|
let worker_id = Arc::new(format!("bloblogwriter"));
|
||||||
|
if let Err(err) = this.storage_pipeline_worker(incoming, &controller_send) {
|
||||||
|
controller_send
|
||||||
|
.send(ControllerMessage::Failure {
|
||||||
|
worker_id,
|
||||||
|
error_message: format!("err {:?}", err),
|
||||||
|
})
|
||||||
|
.expect("This is BAD: failed to send failure message to controller.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct KeyIterator {
|
struct KeyIterator {
|
||||||
|
|
|
@ -8,7 +8,8 @@ use anyhow::anyhow;
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
use log::{error, info};
|
use log::{error, info};
|
||||||
|
|
||||||
use crate::pile::{Keyspace, RawPile};
|
use crate::definitions::ChunkId;
|
||||||
|
use crate::pile::{ControllerMessage, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
|
||||||
|
|
||||||
|
@ -303,6 +304,15 @@ impl RawPile for Requester {
|
||||||
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for LowLevelCheck.")),
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for LowLevelCheck.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_storage_pipeline(
|
||||||
|
&self,
|
||||||
|
settings: StoragePipelineSettings,
|
||||||
|
controller_send: Sender<ControllerMessage>,
|
||||||
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
|
// this one is a little bit more complex.
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ListKeyIterator {
|
pub struct ListKeyIterator {
|
||||||
|
|
Loading…
Reference in New Issue