Remove sled-backed storage

This commit is contained in:
Olivier 'reivilibre' 2021-06-16 12:44:45 +01:00
parent 09f65234a6
commit effccb5868
4 changed files with 3 additions and 174 deletions

View File

@ -12,10 +12,6 @@ This Raw Pile stores data on disk.
Chunk contents are written into 'blob logs'. The position of a chunk's contents is then stored in a SQLite index, to allow much faster random read time to the chunk. Chunk contents are written into 'blob logs'. The position of a chunk's contents is then stored in a SQLite index, to allow much faster random read time to the chunk.
### Sled Raw Pile
This Raw Pile stores data on disk. It has known issues (high memory usage; many-file disk layout) and is likely to be removed soon.
## Transformers ## Transformers

View File

@ -12,7 +12,6 @@ use crate::chunking::{RecursiveChunker, RecursiveUnchunker, SENSIBLE_THRESHOLD};
use crate::definitions::{PointerData, RecursiveChunkRef, RootTreeNode, TreeNode}; use crate::definitions::{PointerData, RecursiveChunkRef, RootTreeNode, TreeNode};
use crate::pile::compression::{CompressionSettings, RawPileCompressor}; use crate::pile::compression::{CompressionSettings, RawPileCompressor};
use crate::pile::integrity::RawPileIntegrityChecker; use crate::pile::integrity::RawPileIntegrityChecker;
use crate::pile::local_sled::LocalSledRawPile;
use crate::pile::local_sqlitebloblogs::SqliteBloblogPile; use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
use crate::pile::{Pile, PileDescriptor, PileStorage, RawPile}; use crate::pile::{Pile, PileDescriptor, PileStorage, RawPile};
use crate::tree::{integrate_node_in_place, merge_uid_or_gid_tables}; use crate::tree::{integrate_node_in_place, merge_uid_or_gid_tables};
@ -58,30 +57,6 @@ pub fn open_pile(dir: &Path, desc: &PileDescriptor) -> anyhow::Result<Pile<Box<d
PileStorage::RemoteOnly => { PileStorage::RemoteOnly => {
bail!("This is a remote-only pile. No local storage allowed."); bail!("This is a remote-only pile. No local storage allowed.");
} }
PileStorage::Sled => {
let sled_raw_pile = LocalSledRawPile::open(dir)?;
let raw_pile: Box<dyn RawPile> = match desc.compression {
None => Box::new(sled_raw_pile),
Some(comp_level) => {
let mut dictionary = Vec::new();
let dict_path = dir.join("important_zstd.dict");
File::open(dict_path)?.read_to_end(&mut dictionary)?;
let (compressed_pile, _handles) = RawPileCompressor::new(
sled_raw_pile,
CompressionSettings {
dictionary: Arc::new(dictionary),
level: comp_level as i32,
num_compressors: 4, // TODO make this configurable!
num_decompressors: 4,
},
)?;
Box::new(compressed_pile)
}
};
Ok(Pile::new(raw_pile))
}
PileStorage::SqliteIndexedBloblog => { PileStorage::SqliteIndexedBloblog => {
let blob_raw_pile = RawPileIntegrityChecker::new(SqliteBloblogPile::open(dir)?); let blob_raw_pile = RawPileIntegrityChecker::new(SqliteBloblogPile::open(dir)?);
let raw_pile: Box<dyn RawPile> = match desc.compression { let raw_pile: Box<dyn RawPile> = match desc.compression {
@ -108,7 +83,6 @@ pub fn open_pile(dir: &Path, desc: &PileDescriptor) -> anyhow::Result<Pile<Box<d
}; };
Ok(Pile::new(raw_pile)) Ok(Pile::new(raw_pile))
} }
PileStorage::BarePushSled => unimplemented!(),
} }
} }

View File

@ -10,7 +10,6 @@ use std::sync::{Condvar, Mutex};
pub mod compression; pub mod compression;
pub mod encryption; pub mod encryption;
pub mod integrity; pub mod integrity;
pub mod local_sled;
pub mod local_sqlitebloblogs; pub mod local_sqlitebloblogs;
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(Serialize, Deserialize, Debug, Clone)]
@ -27,13 +26,11 @@ pub struct PileDescriptor {
pub enum PileStorage { pub enum PileStorage {
/// No local storage. Pile is only usable for remotes. /// No local storage. Pile is only usable for remotes.
RemoteOnly, RemoteOnly,
/// Local storage backed by the Sled key-value store. DEPRECATED.
Sled,
/// Local storage backed by bloblogs that are indexed by a SQLite database. /// Local storage backed by bloblogs that are indexed by a SQLite database.
SqliteIndexedBloblog, SqliteIndexedBloblog,
/// Local temporary storage in which chunks are only kept for long enough to send them to // Local temporary storage in which chunks are only kept for long enough to send them to
/// remotes. Unimplemented at present. // remotes. Unimplemented at present.
BarePushSled, // TODO THIS IS NOT THE CORRECT NAME ANYWAY BarePushSled,
} }
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(Serialize, Deserialize, Debug, Clone)]

View File

@ -1,138 +0,0 @@
use std::hash::Hasher;
use std::path::Path;
use anyhow::bail;
use sled::{Db, Tree};
use crate::definitions::XXH64_SEED;
use crate::pile::{Keyspace, RawPile};
use crate::utils::bytes_to_hexstring;
pub struct LocalSledRawPile {
pub db: Db,
pub chunks_tree: Tree,
pub hashes_tree: Tree,
pub pointers_tree: Tree,
}
impl LocalSledRawPile {
pub fn open(directory: &Path) -> anyhow::Result<Self> {
let sled_path = directory.join("pile.sled");
let db = sled::Config::default()
.mode(sled::Mode::LowSpace)
.use_compression(false)
.cache_capacity(32 * 1024 * 1024 /* 32 MiB */)
.path(sled_path)
.open()?;
let chunks_tree = db.open_tree("chunks")?;
let hashes_tree = db.open_tree("chunkhashes")?;
let pointers_tree = db.open_tree("pointers")?;
Ok(LocalSledRawPile {
db,
chunks_tree,
hashes_tree,
pointers_tree,
})
}
fn which_tree(&self, kind: Keyspace) -> &Tree {
match kind {
Keyspace::Chunk => &self.chunks_tree,
Keyspace::ChunkHash => &self.hashes_tree,
Keyspace::Pointer => &self.pointers_tree,
}
}
}
impl RawPile for LocalSledRawPile {
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
Ok(self.which_tree(kind).contains_key(key)?)
}
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
// TODO(perf): wouldn't it be nice to not need to copy these?
let mut data_opt = self.which_tree(kind).get(key)?.map(|x| x.to_vec());
if let Some(data_then_hash) = data_opt.as_mut() {
if kind == Keyspace::Chunk || kind == Keyspace::Pointer {
let len = data_then_hash.len();
let data = &data_then_hash[..len - 8];
let xxhash = &data_then_hash[len - 8..];
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&data);
let computed_hash = hasher.finish().to_be_bytes();
if computed_hash != xxhash {
bail!(
"Low-level hash discrepancy; expected {} computed {} at chunk {}",
bytes_to_hexstring(&xxhash),
bytes_to_hexstring(&computed_hash),
bytes_to_hexstring(data)
);
}
// remove the hash from the end
data_then_hash.drain(len - 8..);
}
}
Ok(data_opt)
}
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
let tree = self.which_tree(kind);
if kind == Keyspace::Chunk || kind == Keyspace::Pointer {
// add a hash on the end for easy and quick checking later on.
let mut value = value.to_vec();
let mut hasher = twox_hash::XxHash64::with_seed(XXH64_SEED);
hasher.write(&value);
let computed_hash = hasher.finish().to_be_bytes();
value.extend_from_slice(&computed_hash);
tree.insert(key, value)?;
} else {
tree.insert(key, value)?;
}
Ok(())
}
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
self.which_tree(kind).remove(key)?;
Ok(())
}
fn list_keys(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
Ok(Box::new(self.which_tree(kind).iter().keys().map(
|result_ivec| result_ivec.map(|ivec| ivec.to_vec()).map_err(|e| e.into()),
)))
}
/*
fn list_keyvalue_pairs(
&self,
kind: Keyspace,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<(Vec<u8>, Vec<u8>)>>>> {
// TODO we would need to verify the hash and remove it from the end
Ok(Box::new(self.which_tree(kind).iter().map(|result_ivec| {
result_ivec
.map(|(ivec_k, ivec_v)| (ivec_k.to_vec(), ivec_v.to_vec()))
.map_err(|e| e.into())
})))
}
*/
fn flush(&self) -> anyhow::Result<()> {
self.db.flush()?;
Ok(())
}
fn check_lowlevel(&self) -> anyhow::Result<bool> {
unimplemented!()
}
}