Compare commits
No commits in common. "develop" and "v0.5.0-alpha.3" have entirely different histories.
develop
...
v0.5.0-alp
|
@ -15,5 +15,3 @@
|
||||||
__pycache__
|
__pycache__
|
||||||
/datman-helper-postgres/datman_helper_postgres.egg-info
|
/datman-helper-postgres/datman_helper_postgres.egg-info
|
||||||
/datman-helper-mysql/datman_helper_mysql.egg-info
|
/datman-helper-mysql/datman_helper_mysql.egg-info
|
||||||
/result
|
|
||||||
|
|
||||||
|
|
|
@ -5,19 +5,27 @@ platform: linux/amd64
|
||||||
|
|
||||||
pipeline:
|
pipeline:
|
||||||
unitTests:
|
unitTests:
|
||||||
image: "rust:1.65.0"
|
image: "docker.bics.ga/rei_ci/rust-sccache:latest-amd64"
|
||||||
pull: true
|
|
||||||
commands:
|
commands:
|
||||||
- DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
|
- DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
|
||||||
- DEBIAN_FRONTEND=noninteractive apt-get -yqq install pkg-config libssl-dev build-essential libsqlite3-dev > /dev/null
|
- DEBIAN_FRONTEND=noninteractive apt-get -yqq install pkg-config libssl-dev build-essential libsqlite3-dev > /dev/null
|
||||||
- cargo build --all
|
- cargo build --all
|
||||||
- cargo test --all
|
- cargo test --all
|
||||||
|
- sccache --show-stats
|
||||||
|
environment:
|
||||||
|
RUSTC_WRAPPER: /usr/local/bin/sccache
|
||||||
|
SCCACHE_S3_USE_SSL: "true"
|
||||||
|
SCCACHE_ENDPOINT: "richie.m4.tanukitsu.net:443"
|
||||||
|
secrets:
|
||||||
|
- sccache_bucket
|
||||||
|
- aws_access_key_id
|
||||||
|
- aws_secret_access_key
|
||||||
when:
|
when:
|
||||||
event: [push, pull_request]
|
event: [push, pull_request]
|
||||||
|
|
||||||
|
|
||||||
testSuite:
|
testSuite:
|
||||||
image: "rust:1.65.0"
|
image: "docker.bics.ga/rei_ci/rust-sccache:latest-amd64"
|
||||||
commands:
|
commands:
|
||||||
- DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
|
- DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
|
||||||
- DEBIAN_FRONTEND=noninteractive apt-get -yqq -o=Dpkg::Use-Pty=0 install pkg-config libssl-dev build-essential libsqlite3-dev python3.9 python3.9-venv postgresql postgresql-client mariadb-server mariadb-client zstd lz4 > /dev/null
|
- DEBIAN_FRONTEND=noninteractive apt-get -yqq -o=Dpkg::Use-Pty=0 install pkg-config libssl-dev build-essential libsqlite3-dev python3.9 python3.9-venv postgresql postgresql-client mariadb-server mariadb-client zstd lz4 > /dev/null
|
||||||
|
@ -30,14 +38,22 @@ pipeline:
|
||||||
- cargo install -q --path yama
|
- cargo install -q --path yama
|
||||||
- cargo install -q --path datman
|
- cargo install -q --path datman
|
||||||
- python3.9 -m venv testsuite/.venv
|
- python3.9 -m venv testsuite/.venv
|
||||||
- ./testsuite/.venv/bin/pip install ./testsuite ./datman-helper-postgres ./datman-helper-mysql
|
- ./testsuite/.venv/bin/pip install -e testsuite -e datman-helper-postgres -e datman-helper-mysql
|
||||||
- cd testsuite && . .venv/bin/activate && TEST_POSTGRES=$(hostname),testsuitedb,root TEST_MYSQL=$(hostname),testsuitemydb,root green
|
- cd testsuite && . .venv/bin/activate && TEST_POSTGRES=$(hostname),testsuitedb,root TEST_MYSQL=$(hostname),testsuitemydb,root green
|
||||||
|
- sccache --show-stats
|
||||||
|
environment:
|
||||||
|
RUSTC_WRAPPER: /usr/local/bin/sccache
|
||||||
|
SCCACHE_S3_USE_SSL: "true"
|
||||||
|
SCCACHE_ENDPOINT: "richie.m4.tanukitsu.net:443"
|
||||||
|
secrets:
|
||||||
|
- sccache_bucket
|
||||||
|
- aws_access_key_id
|
||||||
|
- aws_secret_access_key
|
||||||
when:
|
when:
|
||||||
event: [push, pull_request]
|
event: [push, pull_request]
|
||||||
|
|
||||||
deployManual:
|
deployManual:
|
||||||
image: "docker.emunest.net/rei_ci/mdbook:latest-amd64"
|
image: "docker.bics.ga/rei_ci/mdbook:latest-amd64"
|
||||||
pull: true
|
|
||||||
when:
|
when:
|
||||||
branch:
|
branch:
|
||||||
- develop
|
- develop
|
||||||
|
|
|
@ -4,8 +4,7 @@ platform: linux/${ARCH}
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
ARCH:
|
ARCH:
|
||||||
# I don't have an arm64 runner at the moment.
|
- arm64
|
||||||
#- arm64
|
|
||||||
- amd64
|
- amd64
|
||||||
|
|
||||||
.a1: &when
|
.a1: &when
|
||||||
|
@ -16,10 +15,7 @@ pipeline:
|
||||||
buildRelease:
|
buildRelease:
|
||||||
when: *when
|
when: *when
|
||||||
|
|
||||||
# Disabled for now because I'm trying to get infinite build times to stop :-(.
|
image: "docker.bics.ga/rei_ci/rust-sccache:latest-${ARCH}"
|
||||||
# Suspect a kernel bug but any workaround will do for now.
|
|
||||||
#image: "docker.bics.ga/rei_ci/rust-sccache:latest-${ARCH}"
|
|
||||||
image: "rust:1.61"
|
|
||||||
commands:
|
commands:
|
||||||
- apt-get -qq update && apt-get -yqq install pkg-config libssl-dev build-essential libolm-dev cmake
|
- apt-get -qq update && apt-get -yqq install pkg-config libssl-dev build-essential libolm-dev cmake
|
||||||
- cargo build --release
|
- cargo build --release
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,11 +1,6 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"yama",
|
"yama",
|
||||||
"datman",
|
"datman"
|
||||||
]
|
]
|
||||||
|
|
||||||
[profile.release]
|
|
||||||
# Include FULL debug information in the release binaries
|
|
||||||
debug = 2
|
|
||||||
# When this feature stabilises, it will be possible to split the debug information into a file alongside the binary
|
|
||||||
#split-debuginfo = "packed"
|
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
package = []
|
|
||||||
|
|
||||||
[metadata]
|
|
||||||
lock-version = "1.1"
|
|
||||||
python-versions = "^3.8"
|
|
||||||
content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a"
|
|
||||||
|
|
||||||
[metadata.files]
|
|
|
@ -1,19 +0,0 @@
|
||||||
[tool.poetry]
|
|
||||||
name = "datman-helper-mysql"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "MySQL integration for Datman"
|
|
||||||
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
|
||||||
license = "GPL-3.0-or-later"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
|
||||||
python = "^3.8"
|
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
|
||||||
datman-helper-mysql-backup="datman_helper_mysql.backup:cli"
|
|
||||||
datman-helper-mysql-restore="datman_helper_mysql.restore:cli"
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["poetry-core>=1.0.0"]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from shutil import rmtree
|
||||||
|
|
||||||
|
from setuptools import Command, find_packages, setup
|
||||||
|
|
||||||
|
# Package meta-data.
|
||||||
|
NAME = "datman_helper_mysql"
|
||||||
|
DESCRIPTION = "MySQL integration for Datman"
|
||||||
|
URL = "https://bics.ga/reivilibre/yama"
|
||||||
|
EMAIL = "reivi@librepush.net"
|
||||||
|
AUTHOR = "Olivier 'reivilibre'"
|
||||||
|
REQUIRES_PYTHON = ">=3.7.0"
|
||||||
|
VERSION = "0.1.0"
|
||||||
|
|
||||||
|
# What packages are required for this module to be executed?
|
||||||
|
REQUIRED = []
|
||||||
|
|
||||||
|
|
||||||
|
# What packages are optional?
|
||||||
|
EXTRAS = {}
|
||||||
|
|
||||||
|
# The rest you shouldn't have to touch too much :)
|
||||||
|
# ------------------------------------------------
|
||||||
|
# Except, perhaps the License and Trove Classifiers!
|
||||||
|
# If you do change the License, remember to change the Trove Classifier for that!
|
||||||
|
|
||||||
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
# Import the README and use it as the long-description.
|
||||||
|
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
|
||||||
|
try:
|
||||||
|
with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
|
||||||
|
long_description = "\n" + f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
long_description = DESCRIPTION
|
||||||
|
|
||||||
|
# Load the package's __version__.py module as a dictionary.
|
||||||
|
about = {}
|
||||||
|
if not VERSION:
|
||||||
|
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
|
||||||
|
with open(os.path.join(here, project_slug, "__version__.py")) as f:
|
||||||
|
exec(f.read(), about)
|
||||||
|
else:
|
||||||
|
about["__version__"] = VERSION
|
||||||
|
|
||||||
|
|
||||||
|
class UploadCommand(Command):
|
||||||
|
"""Support setup.py upload."""
|
||||||
|
|
||||||
|
description = "Build and publish the package."
|
||||||
|
user_options = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def status(s):
|
||||||
|
"""Prints things in bold."""
|
||||||
|
print("\033[1m{0}\033[0m".format(s))
|
||||||
|
|
||||||
|
def initialize_options(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def finalize_options(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
self.status("Removing previous builds…")
|
||||||
|
rmtree(os.path.join(here, "dist"))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.status("Building Source and Wheel (universal) distribution…")
|
||||||
|
os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
|
||||||
|
|
||||||
|
self.status("Uploading the package to PyPI via Twine…")
|
||||||
|
os.system("twine upload dist/*")
|
||||||
|
|
||||||
|
self.status("Pushing git tags…")
|
||||||
|
os.system("git tag v{0}".format(about["__version__"]))
|
||||||
|
os.system("git push --tags")
|
||||||
|
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
# Where the magic happens:
|
||||||
|
setup(
|
||||||
|
name=NAME,
|
||||||
|
version=about["__version__"],
|
||||||
|
description=DESCRIPTION,
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
author=AUTHOR,
|
||||||
|
author_email=EMAIL,
|
||||||
|
python_requires=REQUIRES_PYTHON,
|
||||||
|
url=URL,
|
||||||
|
packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
|
||||||
|
# If your package is a single module, use this instead of 'packages':
|
||||||
|
# py_modules=['mypackage'],
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"datman-helper-mysql-backup=datman_helper_mysql.backup:cli",
|
||||||
|
"datman-helper-mysql-restore=datman_helper_mysql.restore:cli",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=REQUIRED,
|
||||||
|
extras_require=EXTRAS,
|
||||||
|
include_package_data=True,
|
||||||
|
# TODO license='GPL3',
|
||||||
|
classifiers=[
|
||||||
|
# Trove classifiers
|
||||||
|
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||||
|
"Programming Language :: Python",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
],
|
||||||
|
)
|
|
@ -39,7 +39,10 @@ def cli():
|
||||||
# The process (if any) that is our LZ4 decompressor.
|
# The process (if any) that is our LZ4 decompressor.
|
||||||
lz4_process = None
|
lz4_process = None
|
||||||
|
|
||||||
dump_command = ["pg_dump", database_to_use]
|
dump_command = [
|
||||||
|
"pg_dump",
|
||||||
|
database_to_use
|
||||||
|
]
|
||||||
|
|
||||||
if host_to_use is not None:
|
if host_to_use is not None:
|
||||||
if use_lz4:
|
if use_lz4:
|
||||||
|
@ -60,19 +63,21 @@ def cli():
|
||||||
# (rather than lz4 covering it).
|
# (rather than lz4 covering it).
|
||||||
command = [
|
command = [
|
||||||
"ssh",
|
"ssh",
|
||||||
f"{user_to_use}@{host_to_use}"
|
f"{user_to_use}@{host_to_use}" if user_to_use is not None else f"{host_to_use}",
|
||||||
if user_to_use is not None
|
|
||||||
else f"{host_to_use}",
|
|
||||||
"bash",
|
"bash",
|
||||||
"-o",
|
"-o",
|
||||||
"pipefail",
|
"pipefail",
|
||||||
"-c",
|
"-c",
|
||||||
shlex.quote(" ".join(dump_command)),
|
shlex.quote(" ".join(dump_command))
|
||||||
]
|
]
|
||||||
elif user_to_use is not None:
|
elif user_to_use is not None:
|
||||||
current_username = pwd.getpwuid(os.getuid()).pw_name
|
current_username = pwd.getpwuid(os.getuid()).pw_name
|
||||||
if current_username != user_to_use:
|
if current_username != user_to_use:
|
||||||
command = ["sudo", "-u", user_to_use] + dump_command
|
command = [
|
||||||
|
"sudo",
|
||||||
|
"-u",
|
||||||
|
user_to_use
|
||||||
|
] + dump_command
|
||||||
else:
|
else:
|
||||||
command = dump_command
|
command = dump_command
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
package = []
|
|
||||||
|
|
||||||
[metadata]
|
|
||||||
lock-version = "1.1"
|
|
||||||
python-versions = "^3.8"
|
|
||||||
content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a"
|
|
||||||
|
|
||||||
[metadata.files]
|
|
|
@ -1,19 +0,0 @@
|
||||||
[tool.poetry]
|
|
||||||
name = "datman-helper-postgres"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "Postgres integration for Datman"
|
|
||||||
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
|
||||||
license = "GPL-3.0-or-later"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
|
||||||
python = "^3.8"
|
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
|
||||||
datman-helper-postgres-backup="datman_helper_postgres.backup:cli"
|
|
||||||
datman-helper-postgres-restore="datman_helper_postgres.restore:cli"
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["poetry-core>=1.0.0"]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from shutil import rmtree
|
||||||
|
|
||||||
|
from setuptools import Command, find_packages, setup
|
||||||
|
|
||||||
|
# Package meta-data.
|
||||||
|
NAME = "datman_helper_postgres"
|
||||||
|
DESCRIPTION = "Postgres integration for Datman"
|
||||||
|
URL = "https://bics.ga/reivilibre/yama"
|
||||||
|
EMAIL = "reivi@librepush.net"
|
||||||
|
AUTHOR = "Olivier 'reivilibre'"
|
||||||
|
REQUIRES_PYTHON = ">=3.7.0"
|
||||||
|
VERSION = "0.1.0"
|
||||||
|
|
||||||
|
# What packages are required for this module to be executed?
|
||||||
|
REQUIRED = []
|
||||||
|
|
||||||
|
|
||||||
|
# What packages are optional?
|
||||||
|
EXTRAS = {}
|
||||||
|
|
||||||
|
# The rest you shouldn't have to touch too much :)
|
||||||
|
# ------------------------------------------------
|
||||||
|
# Except, perhaps the License and Trove Classifiers!
|
||||||
|
# If you do change the License, remember to change the Trove Classifier for that!
|
||||||
|
|
||||||
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
# Import the README and use it as the long-description.
|
||||||
|
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
|
||||||
|
try:
|
||||||
|
with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
|
||||||
|
long_description = "\n" + f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
long_description = DESCRIPTION
|
||||||
|
|
||||||
|
# Load the package's __version__.py module as a dictionary.
|
||||||
|
about = {}
|
||||||
|
if not VERSION:
|
||||||
|
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
|
||||||
|
with open(os.path.join(here, project_slug, "__version__.py")) as f:
|
||||||
|
exec(f.read(), about)
|
||||||
|
else:
|
||||||
|
about["__version__"] = VERSION
|
||||||
|
|
||||||
|
|
||||||
|
class UploadCommand(Command):
|
||||||
|
"""Support setup.py upload."""
|
||||||
|
|
||||||
|
description = "Build and publish the package."
|
||||||
|
user_options = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def status(s):
|
||||||
|
"""Prints things in bold."""
|
||||||
|
print("\033[1m{0}\033[0m".format(s))
|
||||||
|
|
||||||
|
def initialize_options(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def finalize_options(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
self.status("Removing previous builds…")
|
||||||
|
rmtree(os.path.join(here, "dist"))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.status("Building Source and Wheel (universal) distribution…")
|
||||||
|
os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
|
||||||
|
|
||||||
|
self.status("Uploading the package to PyPI via Twine…")
|
||||||
|
os.system("twine upload dist/*")
|
||||||
|
|
||||||
|
self.status("Pushing git tags…")
|
||||||
|
os.system("git tag v{0}".format(about["__version__"]))
|
||||||
|
os.system("git push --tags")
|
||||||
|
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
# Where the magic happens:
|
||||||
|
setup(
|
||||||
|
name=NAME,
|
||||||
|
version=about["__version__"],
|
||||||
|
description=DESCRIPTION,
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
author=AUTHOR,
|
||||||
|
author_email=EMAIL,
|
||||||
|
python_requires=REQUIRES_PYTHON,
|
||||||
|
url=URL,
|
||||||
|
packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
|
||||||
|
# If your package is a single module, use this instead of 'packages':
|
||||||
|
# py_modules=['mypackage'],
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"datman-helper-postgres-backup=datman_helper_postgres.backup:cli",
|
||||||
|
"datman-helper-postgres-restore=datman_helper_postgres.restore:cli",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=REQUIRED,
|
||||||
|
extras_require=EXTRAS,
|
||||||
|
include_package_data=True,
|
||||||
|
# TODO license='GPL3',
|
||||||
|
classifiers=[
|
||||||
|
# Trove classifiers
|
||||||
|
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||||
|
"Programming Language :: Python",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
],
|
||||||
|
)
|
|
@ -1,8 +1,8 @@
|
||||||
[package]
|
[package]
|
||||||
name = "datman"
|
name = "datman"
|
||||||
version = "0.6.0-alpha.5"
|
version = "0.5.0-alpha.2"
|
||||||
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
||||||
edition = "2021"
|
edition = "2018"
|
||||||
repository = "https://bics.ga/reivilibre/yama"
|
repository = "https://bics.ga/reivilibre/yama"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ description = "A chunked and deduplicated backup system using Yama"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "3.1.18", features = ["derive"] }
|
clap = "= 3.0.0-beta.5"
|
||||||
crossbeam-channel = "0.5.1"
|
crossbeam-channel = "0.5.1"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
|
@ -22,7 +22,7 @@ log = "0.4"
|
||||||
env_logger = "0.7.1"
|
env_logger = "0.7.1"
|
||||||
indicatif = "0.14.0"
|
indicatif = "0.14.0"
|
||||||
arc-interner = "0.5.1"
|
arc-interner = "0.5.1"
|
||||||
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
zstd = "0.6.0" # 0.6.0+zstd.1.4.8
|
||||||
byteorder = "1"
|
byteorder = "1"
|
||||||
termion = "1.5.6"
|
termion = "1.5.6"
|
||||||
glob = "0.3.0"
|
glob = "0.3.0"
|
||||||
|
@ -30,9 +30,6 @@ humansize = "1.1.1"
|
||||||
chrono = "0.4.19"
|
chrono = "0.4.19"
|
||||||
itertools = "0.10.1"
|
itertools = "0.10.1"
|
||||||
hostname = "0.3.1"
|
hostname = "0.3.1"
|
||||||
yama = { path = "../yama", version = "0.6.0-alpha.5" }
|
yama = { path = "../yama", version = "0.5.0-alpha.1" }
|
||||||
metrics = "0.17.1"
|
metrics = "0.17.1"
|
||||||
bare-metrics-recorder = { version = "0.1.0" }
|
bare-metrics-recorder = { version = "0.1.0" }
|
||||||
comfy-table = "6.0.0-rc.1"
|
|
||||||
libc = "0.2.126"
|
|
||||||
io-streams = "0.11.0"
|
|
|
@ -8,6 +8,5 @@ Features:
|
||||||
* (optional) Compression using Zstd and a specifiable dictionary
|
* (optional) Compression using Zstd and a specifiable dictionary
|
||||||
* (optional) Encryption
|
* (optional) Encryption
|
||||||
* Ability to back up to remote machines over SSH
|
* Ability to back up to remote machines over SSH
|
||||||
* Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels.
|
|
||||||
|
|
||||||
See the documentation for more information.
|
See the documentation for more information.
|
||||||
|
|
|
@ -16,30 +16,24 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufReader, BufWriter, Write};
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::{Command, Stdio};
|
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::bail;
|
||||||
use bare_metrics_recorder::recording::BareMetricsRecorderCore;
|
use bare_metrics_recorder::recording::BareMetricsRecorderCore;
|
||||||
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
|
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
|
||||||
use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
|
use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
|
||||||
use datman::commands::ilabel::interactive_labelling_session;
|
use datman::commands::ilabel::interactive_labelling_session;
|
||||||
use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy};
|
use datman::commands::init_descriptor;
|
||||||
use datman::commands::{init_descriptor, pushpull};
|
|
||||||
use datman::descriptor::{load_descriptor, SourceDescriptor};
|
use datman::descriptor::{load_descriptor, SourceDescriptor};
|
||||||
use datman::get_hostname;
|
use datman::get_hostname;
|
||||||
use datman::remote::backup_source_requester::backup_remote_source_to_destination;
|
use datman::remote::backup_source_requester::backup_remote_source_to_destination;
|
||||||
use datman::remote::backup_source_responder;
|
use datman::remote::backup_source_responder;
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use itertools::Itertools;
|
|
||||||
use log::info;
|
use log::info;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use yama::commands::load_pile_descriptor;
|
|
||||||
use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel};
|
|
||||||
|
|
||||||
pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m";
|
pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m";
|
||||||
pub const BOLD: &str = "\x1b[1m";
|
pub const BOLD: &str = "\x1b[1m";
|
||||||
|
@ -117,39 +111,8 @@ pub enum DatmanCommand {
|
||||||
skip_metadata: bool,
|
skip_metadata: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
Report {
|
|
||||||
/// Name of the pile to report on.
|
|
||||||
pile_name: String,
|
|
||||||
|
|
||||||
/// Don't summarise months.
|
|
||||||
#[clap(long)]
|
|
||||||
individual: bool,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[clap(name = "_backup_source_responder")]
|
#[clap(name = "_backup_source_responder")]
|
||||||
InternalBackupSourceResponder,
|
InternalBackupSourceResponder,
|
||||||
|
|
||||||
/// Pulls all pointers from a remote pile to a local pile.
|
|
||||||
/// Does not yet support label filtering, but will do in the future.
|
|
||||||
Pull {
|
|
||||||
/// e.g. 'myserver:main'
|
|
||||||
remote_and_remote_pile: String,
|
|
||||||
|
|
||||||
pile_name: String,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Applies a retention policy by removing unnecessary backups.
|
|
||||||
/// Does not reclaim space by itself: use
|
|
||||||
/// `yama check --apply-gc --shallow`
|
|
||||||
/// & `yama compact`
|
|
||||||
/// to do that.
|
|
||||||
Prune { pile_name: String },
|
|
||||||
|
|
||||||
#[clap(name = "_pull_responder_offerer")]
|
|
||||||
InternalPullResponderOfferer {
|
|
||||||
datman_path: PathBuf,
|
|
||||||
pile_name: String,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct HumanDateTime(pub DateTime<Local>);
|
pub struct HumanDateTime(pub DateTime<Local>);
|
||||||
|
@ -210,7 +173,7 @@ fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
let now = Utc::now();
|
let now = Utc::now();
|
||||||
|
|
||||||
let (shard, _stopper) = BareMetricsRecorderCore::new(File::create(format!(
|
let (shard, stopper) = BareMetricsRecorderCore::new(File::create(format!(
|
||||||
"/tmp/datman_{}.baremetrics",
|
"/tmp/datman_{}.baremetrics",
|
||||||
now.format("%F_%H%M%S")
|
now.format("%F_%H%M%S")
|
||||||
))?)
|
))?)
|
||||||
|
@ -227,10 +190,10 @@ fn main() -> anyhow::Result<()> {
|
||||||
unimplemented!();
|
unimplemented!();
|
||||||
}
|
}
|
||||||
DatmanCommand::InteractiveLabelling { source_name } => {
|
DatmanCommand::InteractiveLabelling { source_name } => {
|
||||||
interactive_labelling_session(Path::new("."), source_name)?;
|
interactive_labelling_session(Path::new("."), source_name).unwrap();
|
||||||
}
|
}
|
||||||
DatmanCommand::InteractiveBrowsing { source_name } => {
|
DatmanCommand::InteractiveBrowsing { source_name } => {
|
||||||
datman::commands::ibrowse::session(Path::new("."), source_name)?;
|
datman::commands::ibrowse::session(Path::new("."), source_name).unwrap();
|
||||||
}
|
}
|
||||||
DatmanCommand::BackupOne {
|
DatmanCommand::BackupOne {
|
||||||
source_name,
|
source_name,
|
||||||
|
@ -344,125 +307,6 @@ fn main() -> anyhow::Result<()> {
|
||||||
info!("Datman responder at {:?}", std::env::current_exe()?);
|
info!("Datman responder at {:?}", std::env::current_exe()?);
|
||||||
backup_source_responder::handler_stdio()?;
|
backup_source_responder::handler_stdio()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
DatmanCommand::Report {
|
|
||||||
pile_name,
|
|
||||||
individual,
|
|
||||||
} => {
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let destination = &descriptor.piles[&pile_name];
|
|
||||||
let report =
|
|
||||||
datman::commands::report::generate_report(destination, &descriptor, !individual)?;
|
|
||||||
|
|
||||||
datman::commands::report::print_filesystem_space(&destination.path)?;
|
|
||||||
datman::commands::report::print_report(&report)?;
|
|
||||||
}
|
|
||||||
DatmanCommand::Pull {
|
|
||||||
remote_and_remote_pile,
|
|
||||||
pile_name,
|
|
||||||
} => {
|
|
||||||
let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile
|
|
||||||
.split(':')
|
|
||||||
.collect_tuple()
|
|
||||||
.context("You must pull from a remote pile specified as remote:path:pile.")?;
|
|
||||||
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let source = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&source.path)?;
|
|
||||||
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
|
||||||
&source.path,
|
|
||||||
&pile_desc,
|
|
||||||
BypassLevel::CompressionBypass,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
|
|
||||||
pbar.set_style(
|
|
||||||
ProgressStyle::default_bar().template(
|
|
||||||
"[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
|
|
||||||
),
|
|
||||||
);
|
|
||||||
pbar.set_message("pulling");
|
|
||||||
|
|
||||||
let remote_host_descriptor = descriptor
|
|
||||||
.remote_hosts
|
|
||||||
.get(hostname)
|
|
||||||
.ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
|
|
||||||
|
|
||||||
let mut connection = Command::new("ssh")
|
|
||||||
.arg(&remote_host_descriptor.user_at_host)
|
|
||||||
.arg("--")
|
|
||||||
.arg(
|
|
||||||
&remote_host_descriptor
|
|
||||||
.path_to_datman
|
|
||||||
.as_ref()
|
|
||||||
.map(|x| x.as_str())
|
|
||||||
.unwrap_or("datman"),
|
|
||||||
)
|
|
||||||
.arg("_pull_responder_offerer")
|
|
||||||
.arg(remote_datman_path)
|
|
||||||
.arg(remote_pile_name)
|
|
||||||
.stdin(Stdio::piped())
|
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.stderr(Stdio::inherit())
|
|
||||||
.spawn()?;
|
|
||||||
|
|
||||||
let mut reader = BufReader::new(connection.stdout.take().unwrap());
|
|
||||||
let mut writer = BufWriter::new(connection.stdin.take().unwrap());
|
|
||||||
|
|
||||||
pushpull::accepting_side(
|
|
||||||
&pile,
|
|
||||||
&bypass_raw_pile,
|
|
||||||
&mut reader,
|
|
||||||
&mut writer,
|
|
||||||
Box::new(pbar),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::Prune { pile_name } => {
|
|
||||||
let descriptor = load_descriptor(Path::new(".")).unwrap();
|
|
||||||
let retention_policy = descriptor
|
|
||||||
.retention
|
|
||||||
.context("No retention policy set in descriptor")?;
|
|
||||||
let dest_desc = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&dest_desc.path)?;
|
|
||||||
|
|
||||||
prune_with_retention_policy(
|
|
||||||
&dest_desc.path,
|
|
||||||
&pile_desc,
|
|
||||||
&RetentionPolicy::from_config(retention_policy),
|
|
||||||
true,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
DatmanCommand::InternalPullResponderOfferer {
|
|
||||||
datman_path,
|
|
||||||
pile_name,
|
|
||||||
} => {
|
|
||||||
let descriptor = load_descriptor(&datman_path).unwrap();
|
|
||||||
let source = &descriptor.piles[&pile_name];
|
|
||||||
|
|
||||||
let pile_desc = load_pile_descriptor(&source.path)?;
|
|
||||||
let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
|
|
||||||
&source.path,
|
|
||||||
&pile_desc,
|
|
||||||
BypassLevel::CompressionBypass,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?);
|
|
||||||
let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?);
|
|
||||||
|
|
||||||
pushpull::offering_side(
|
|
||||||
&pile,
|
|
||||||
&bypass_raw_pile,
|
|
||||||
&mut stdin,
|
|
||||||
&mut stdout,
|
|
||||||
Box::new(()),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
stdout.flush()?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,15 +20,12 @@ use std::fs::File;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::descriptor::{Descriptor, RetentionPolicyConfig, SourceDescriptor};
|
use crate::descriptor::{Descriptor, SourceDescriptor};
|
||||||
|
|
||||||
pub mod backup;
|
pub mod backup;
|
||||||
pub mod extract;
|
pub mod extract;
|
||||||
pub mod ibrowse;
|
pub mod ibrowse;
|
||||||
pub mod ilabel;
|
pub mod ilabel;
|
||||||
pub mod prune;
|
|
||||||
pub mod pushpull;
|
|
||||||
pub mod report;
|
|
||||||
|
|
||||||
pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
|
pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
|
||||||
std::fs::create_dir_all(path)?;
|
std::fs::create_dir_all(path)?;
|
||||||
|
@ -52,12 +49,6 @@ pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
|
||||||
sources: source,
|
sources: source,
|
||||||
piles: Default::default(),
|
piles: Default::default(),
|
||||||
remote_hosts: Default::default(),
|
remote_hosts: Default::default(),
|
||||||
retention: Some(RetentionPolicyConfig {
|
|
||||||
daily: 14,
|
|
||||||
weekly: 12,
|
|
||||||
monthly: 24,
|
|
||||||
yearly: 9001,
|
|
||||||
}),
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
datman_toml_file.write_all(&bytes)?;
|
datman_toml_file.write_all(&bytes)?;
|
||||||
|
|
|
@ -17,13 +17,11 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor, VirtualSourceKind};
|
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor, VirtualSourceKind};
|
||||||
use crate::get_hostname;
|
use crate::get_hostname;
|
||||||
use crate::labelling::{
|
use crate::labelling::{label_node, load_labelling_rules, str_to_label, Label, State};
|
||||||
label_node, load_labelling_rules, str_to_label, Label, LabellingRules, State,
|
|
||||||
};
|
|
||||||
use crate::tree::{scan, FileTree, FileTree1};
|
use crate::tree::{scan, FileTree, FileTree1};
|
||||||
use anyhow::{anyhow, bail};
|
use anyhow::{anyhow, bail};
|
||||||
use arc_interner::ArcIntern;
|
use arc_interner::ArcIntern;
|
||||||
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use log::{info, warn};
|
use log::{info, warn};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
@ -51,13 +49,6 @@ pub fn get_pointer_name_at(source_name: &str, datetime: DateTime<Utc>) -> String
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn split_pointer_name(pointer_name: &str) -> Option<(String, DateTime<Utc>)> {
|
|
||||||
let (source_name, date_time_str) = pointer_name.rsplit_once("+")?;
|
|
||||||
let date_time = NaiveDateTime::parse_from_str(date_time_str, POINTER_DATETIME_FORMAT).ok()?;
|
|
||||||
let date_time = Utc.from_utc_datetime(&date_time);
|
|
||||||
Some((source_name.to_owned(), date_time))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn open_stdout_backup_process(
|
pub fn open_stdout_backup_process(
|
||||||
extra_args: &HashMap<String, toml::Value>,
|
extra_args: &HashMap<String, toml::Value>,
|
||||||
program_name: &str,
|
program_name: &str,
|
||||||
|
@ -78,8 +69,8 @@ pub fn open_stdout_backup_process(
|
||||||
pub fn label_filter_and_convert(
|
pub fn label_filter_and_convert(
|
||||||
tree: FileTree1<()>,
|
tree: FileTree1<()>,
|
||||||
descriptor: &Descriptor,
|
descriptor: &Descriptor,
|
||||||
|
desc_path: &Path,
|
||||||
source_name: &str,
|
source_name: &str,
|
||||||
rules: &LabellingRules,
|
|
||||||
dest: &DestPileDescriptor,
|
dest: &DestPileDescriptor,
|
||||||
) -> anyhow::Result<Option<TreeNode>> {
|
) -> anyhow::Result<Option<TreeNode>> {
|
||||||
info!("Labelling.");
|
info!("Labelling.");
|
||||||
|
@ -89,7 +80,8 @@ pub fn label_filter_and_convert(
|
||||||
.iter()
|
.iter()
|
||||||
.map(|l| Label(ArcIntern::new(l.clone())))
|
.map(|l| Label(ArcIntern::new(l.clone())))
|
||||||
.collect();
|
.collect();
|
||||||
label_node("".to_owned(), None, &mut tree, &labels, rules)?;
|
let rules = load_labelling_rules(desc_path, source_name)?;
|
||||||
|
label_node("".to_owned(), None, &mut tree, &labels, &rules)?;
|
||||||
|
|
||||||
let included_labels: HashSet<Label> = dest.included_labels.iter().map(str_to_label).collect();
|
let included_labels: HashSet<Label> = dest.included_labels.iter().map(str_to_label).collect();
|
||||||
|
|
||||||
|
@ -148,23 +140,17 @@ pub fn backup_source_to_destination<PT: ProgressTracker>(
|
||||||
SourceDescriptor::DirectorySource {
|
SourceDescriptor::DirectorySource {
|
||||||
hostname: _,
|
hostname: _,
|
||||||
directory,
|
directory,
|
||||||
cross_filesystems,
|
|
||||||
} => {
|
} => {
|
||||||
info!("Looking to backup {} to {}", source_name, dest_name);
|
info!("Looking to backup {} to {}", source_name, dest_name);
|
||||||
let rules = load_labelling_rules(desc_path, source_name)?;
|
|
||||||
let exclusions = rules.get_exclusions_set(directory);
|
|
||||||
|
|
||||||
info!("Scanning.");
|
info!("Scanning.");
|
||||||
let tree = scan(directory, !*cross_filesystems, &exclusions)?
|
let tree = scan(directory)?.ok_or_else(|| anyhow!("Source does not exist."))?;
|
||||||
.ok_or_else(|| anyhow!("Source does not exist."))?;
|
|
||||||
|
|
||||||
let absolute_source_path = desc_path.join(directory);
|
let absolute_source_path = desc_path.join(directory);
|
||||||
let absolute_dest_path = desc_path.join(&dest.path);
|
let absolute_dest_path = desc_path.join(&dest.path);
|
||||||
let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
|
let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
|
||||||
let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
|
let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
|
||||||
|
|
||||||
let root = if let Some(root) =
|
let root = if let Some(root) =
|
||||||
label_filter_and_convert(tree, descriptor, source_name, &rules, dest)?
|
label_filter_and_convert(tree, descriptor, desc_path, source_name, dest)?
|
||||||
{
|
{
|
||||||
root
|
root
|
||||||
} else {
|
} else {
|
||||||
|
@ -216,6 +202,7 @@ pub fn backup_source_to_destination<PT: ProgressTracker>(
|
||||||
parent,
|
parent,
|
||||||
num_workers,
|
num_workers,
|
||||||
progress_bar,
|
progress_bar,
|
||||||
|
true,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
info!("Stored!");
|
info!("Stored!");
|
||||||
|
|
|
@ -15,7 +15,6 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::{anyhow, bail};
|
use anyhow::{anyhow, bail};
|
||||||
|
@ -69,19 +68,15 @@ pub fn session(path: &Path, source_name: String) -> anyhow::Result<()> {
|
||||||
.get(&source_name)
|
.get(&source_name)
|
||||||
.ok_or_else(|| anyhow!("Could not find source {:?}!", source_name))?;
|
.ok_or_else(|| anyhow!("Could not find source {:?}!", source_name))?;
|
||||||
|
|
||||||
let (directory, one_filesystem) = match source_descriptor {
|
let directory = match source_descriptor {
|
||||||
SourceDescriptor::DirectorySource {
|
SourceDescriptor::DirectorySource { directory, .. } => directory,
|
||||||
directory,
|
|
||||||
cross_filesystems,
|
|
||||||
..
|
|
||||||
} => (directory, !*cross_filesystems),
|
|
||||||
SourceDescriptor::VirtualSource { .. } => {
|
SourceDescriptor::VirtualSource { .. } => {
|
||||||
bail!("Cannot browse virtual source.");
|
bail!("Cannot browse virtual source.");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("Scanning source; this might take a little while...");
|
println!("Scanning source; this might take a little while...");
|
||||||
let mut dir_scan: FileTree1<Option<State>> = scan(directory, one_filesystem, &BTreeSet::new())?
|
let mut dir_scan: FileTree1<Option<State>> = scan(directory)?
|
||||||
.ok_or_else(|| anyhow!("Empty source."))?
|
.ok_or_else(|| anyhow!("Empty source."))?
|
||||||
.replace_meta(&None);
|
.replace_meta(&None);
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,6 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::{StdinLock, Stdout, Write};
|
use std::io::{StdinLock, Stdout, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -187,13 +186,12 @@ pub fn interactive_labelling_session(path: &Path, source_name: String) -> anyhow
|
||||||
if let SourceDescriptor::DirectorySource {
|
if let SourceDescriptor::DirectorySource {
|
||||||
hostname,
|
hostname,
|
||||||
directory,
|
directory,
|
||||||
cross_filesystems,
|
|
||||||
} = source
|
} = source
|
||||||
{
|
{
|
||||||
let my_hostname = get_hostname();
|
let my_hostname = get_hostname();
|
||||||
let mut dir_scan = if &my_hostname == hostname {
|
let mut dir_scan = if &my_hostname == hostname {
|
||||||
info!("Scanning source; this might take a little while...");
|
info!("Scanning source; this might take a little while...");
|
||||||
scan(directory, !*cross_filesystems, &BTreeSet::new())?
|
scan(directory)?
|
||||||
.ok_or_else(|| anyhow!("Empty source."))?
|
.ok_or_else(|| anyhow!("Empty source."))?
|
||||||
.replace_meta(&None)
|
.replace_meta(&None)
|
||||||
} else {
|
} else {
|
||||||
|
@ -208,13 +206,8 @@ pub fn interactive_labelling_session(path: &Path, source_name: String) -> anyhow
|
||||||
|
|
||||||
// then request to scan
|
// then request to scan
|
||||||
info!("Requesting scan from remote source... (this may take some time)");
|
info!("Requesting scan from remote source... (this may take some time)");
|
||||||
let scan = backup_source_requester::scanning(
|
let scan =
|
||||||
&mut read,
|
backup_source_requester::scanning(&mut read, &mut write, directory.as_ref())?
|
||||||
&mut write,
|
|
||||||
directory.as_ref(),
|
|
||||||
!*cross_filesystems,
|
|
||||||
&BTreeSet::new(),
|
|
||||||
)?
|
|
||||||
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?
|
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?
|
||||||
.replace_meta(&None);
|
.replace_meta(&None);
|
||||||
|
|
||||||
|
|
|
@ -1,220 +0,0 @@
|
||||||
use crate::commands::backup::split_pointer_name;
|
|
||||||
use crate::descriptor::RetentionPolicyConfig;
|
|
||||||
use anyhow::{bail, Context};
|
|
||||||
use log::info;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::io;
|
|
||||||
use std::path::Path;
|
|
||||||
use yama::commands::open_pile;
|
|
||||||
use yama::operations::remove_pointer_safely;
|
|
||||||
use yama::pile::PileDescriptor;
|
|
||||||
|
|
||||||
pub struct RetentionBand {
|
|
||||||
pub interval_s: u64,
|
|
||||||
pub number_to_retain: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct RetentionPolicy {
|
|
||||||
pub retention_bands: Vec<RetentionBand>,
|
|
||||||
}
|
|
||||||
|
|
||||||
const DAY: u64 = 86400;
|
|
||||||
const WEEK: u64 = 7 * DAY;
|
|
||||||
const MONTH: u64 = 31 * DAY;
|
|
||||||
const YEAR: u64 = 365 * DAY;
|
|
||||||
|
|
||||||
impl RetentionPolicy {
|
|
||||||
pub fn from_config(descriptor: RetentionPolicyConfig) -> RetentionPolicy {
|
|
||||||
let mut policy = RetentionPolicy {
|
|
||||||
retention_bands: vec![],
|
|
||||||
};
|
|
||||||
|
|
||||||
if descriptor.daily != 0 {
|
|
||||||
policy.retention_bands.push(RetentionBand {
|
|
||||||
interval_s: DAY,
|
|
||||||
number_to_retain: descriptor.daily,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if descriptor.weekly != 0 {
|
|
||||||
policy.retention_bands.push(RetentionBand {
|
|
||||||
interval_s: WEEK,
|
|
||||||
number_to_retain: descriptor.weekly,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if descriptor.monthly != 0 {
|
|
||||||
policy.retention_bands.push(RetentionBand {
|
|
||||||
interval_s: MONTH,
|
|
||||||
number_to_retain: descriptor.monthly,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if descriptor.yearly != 0 {
|
|
||||||
policy.retention_bands.push(RetentionBand {
|
|
||||||
interval_s: YEAR,
|
|
||||||
number_to_retain: descriptor.yearly,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
policy
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the set of snapshots to remove.
|
|
||||||
pub fn apply_returning_prunable(
|
|
||||||
&self,
|
|
||||||
snapshots_by_unix_time: BTreeMap<u64, String>,
|
|
||||||
) -> BTreeSet<String> {
|
|
||||||
if snapshots_by_unix_time.is_empty() {
|
|
||||||
return BTreeSet::new();
|
|
||||||
}
|
|
||||||
let mut snapshots_included: BTreeSet<u64> = BTreeSet::new();
|
|
||||||
|
|
||||||
// Always mark the most recent snapshot as retained!
|
|
||||||
let last_snapshot = snapshots_by_unix_time.keys().rev().next().unwrap();
|
|
||||||
snapshots_included.insert(*last_snapshot);
|
|
||||||
|
|
||||||
let now_time = *last_snapshot;
|
|
||||||
|
|
||||||
for band in &self.retention_bands {
|
|
||||||
for multiple in 1..=band.number_to_retain {
|
|
||||||
let target_time = now_time - (multiple as u64) * band.interval_s;
|
|
||||||
if let Some((k, _)) = snapshots_by_unix_time.range(0..=target_time).rev().next() {
|
|
||||||
snapshots_included.insert(*k);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all prunable (unincluded) snapshots.
|
|
||||||
snapshots_by_unix_time
|
|
||||||
.into_iter()
|
|
||||||
.filter(|(k, _v)| !snapshots_included.contains(k))
|
|
||||||
.map(|(_k, v)| v)
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn prune_with_retention_policy(
|
|
||||||
pile_path: &Path,
|
|
||||||
pile_desc: &PileDescriptor,
|
|
||||||
policy: &RetentionPolicy,
|
|
||||||
prompt_first: bool,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let pile = open_pile(&pile_path, &pile_desc).context("Failed to open pile")?;
|
|
||||||
|
|
||||||
let pointers = pile
|
|
||||||
.list_pointers()
|
|
||||||
.context("Failed to list pointers in pile")?;
|
|
||||||
|
|
||||||
let mut pointers_to_keep: BTreeSet<String> = pointers.iter().cloned().collect();
|
|
||||||
|
|
||||||
let pointers_to_remove = get_prunable_pointers(&policy, pointers);
|
|
||||||
|
|
||||||
for remove in &pointers_to_remove {
|
|
||||||
pointers_to_keep.remove(remove);
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Gory details:\n---\nKeep: {pointers_to_keep:?}\n---\nRemove: {pointers_to_remove:?}");
|
|
||||||
info!(
|
|
||||||
"{} pointers to remove ({} to keep) based on retention policy.",
|
|
||||||
pointers_to_remove.len(),
|
|
||||||
pointers_to_keep.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
if prompt_first {
|
|
||||||
println!("Would you like to proceed? [y/N]: ");
|
|
||||||
let mut buffer = String::new();
|
|
||||||
let stdin = io::stdin(); // We get `Stdin` here.
|
|
||||||
stdin.read_line(&mut buffer)?;
|
|
||||||
if buffer.trim().to_ascii_lowercase() != "y" {
|
|
||||||
bail!("Aborted by user.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for to_remove in pointers_to_remove {
|
|
||||||
let res = remove_pointer_safely(&pile, &to_remove).context("removing prunable pointers");
|
|
||||||
|
|
||||||
pile.flush()
|
|
||||||
.context("flushing pile after removing pointers")?;
|
|
||||||
|
|
||||||
res?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_prunable_pointers(policy: &RetentionPolicy, pointers: Vec<String>) -> BTreeSet<String> {
|
|
||||||
let mut split_pointers_by_name: BTreeMap<String, BTreeMap<u64, String>> = BTreeMap::new();
|
|
||||||
|
|
||||||
for pointer in pointers {
|
|
||||||
let (name, datetime) = if let Some(x) = split_pointer_name(&pointer) {
|
|
||||||
x
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
split_pointers_by_name
|
|
||||||
.entry(name)
|
|
||||||
.or_default()
|
|
||||||
.insert(datetime.timestamp().try_into().unwrap(), pointer);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut pointers_to_remove = BTreeSet::new();
|
|
||||||
|
|
||||||
for (_pointer_base_name, ts_to_pointer) in split_pointers_by_name {
|
|
||||||
let to_remove = policy.apply_returning_prunable(ts_to_pointer);
|
|
||||||
|
|
||||||
pointers_to_remove.extend(to_remove);
|
|
||||||
}
|
|
||||||
|
|
||||||
pointers_to_remove
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use crate::commands::prune::{get_prunable_pointers, RetentionPolicy};
|
|
||||||
use crate::descriptor::RetentionPolicyConfig;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_prunable_pointers() {
|
|
||||||
let pointers = vec![
|
|
||||||
"alice+2022-09-28_05:00:00",
|
|
||||||
"alice+2022-09-28_02:00:00",
|
|
||||||
"alice+2022-09-21_05:00:00",
|
|
||||||
"alice+2022-09-14_05:00:00",
|
|
||||||
"alice+2022-09-08_05:00:00",
|
|
||||||
"alice+2022-09-07_05:00:00",
|
|
||||||
"alice+2022-09-01_05:00:00",
|
|
||||||
"bob+2022-09-28_06:00:00",
|
|
||||||
"bob+2022-09-28_03:00:00",
|
|
||||||
"bob+2022-09-21_06:00:00",
|
|
||||||
"bob+2022-09-14_06:00:00",
|
|
||||||
"bob+2022-09-08_06:00:00",
|
|
||||||
"bob+2022-09-07_06:00:00",
|
|
||||||
"bob+2022-09-01_06:00:00",
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.map(|s| s.to_owned())
|
|
||||||
.collect();
|
|
||||||
let policy = RetentionPolicy::from_config(RetentionPolicyConfig {
|
|
||||||
daily: 0,
|
|
||||||
weekly: 3,
|
|
||||||
monthly: 0,
|
|
||||||
yearly: 0,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
get_prunable_pointers(&policy, pointers)
|
|
||||||
.into_iter()
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
vec![
|
|
||||||
"alice+2022-09-01_05:00:00",
|
|
||||||
"alice+2022-09-08_05:00:00",
|
|
||||||
"alice+2022-09-28_02:00:00",
|
|
||||||
"bob+2022-09-01_06:00:00",
|
|
||||||
"bob+2022-09-08_06:00:00",
|
|
||||||
"bob+2022-09-28_03:00:00",
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,306 +0,0 @@
|
||||||
// Push and Pull support for Datman
|
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context};
|
|
||||||
use log::info;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::io::{Read, Write};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
|
||||||
use yama::chunking::RecursiveUnchunker;
|
|
||||||
use yama::commands::retrieve_tree_node;
|
|
||||||
use yama::definitions::{ChunkId, PointerData, RecursiveChunkRef, TreeNode};
|
|
||||||
use yama::pile::{Keyspace, Pile, PipelineDescription, RawPile};
|
|
||||||
use yama::progress::ProgressTracker;
|
|
||||||
use yama::remote::{read_message, write_message};
|
|
||||||
|
|
||||||
pub fn offer_pointers<W: Write, RP: RawPile>(
|
|
||||||
pile: &Pile<RP>,
|
|
||||||
writer: &mut W,
|
|
||||||
) -> anyhow::Result<BTreeMap<String, PointerData>> {
|
|
||||||
let mut pointers_to_offer: BTreeMap<String, PointerData> = BTreeMap::new();
|
|
||||||
|
|
||||||
for pointer_name in pile.list_pointers()? {
|
|
||||||
let pointer_data = pile
|
|
||||||
.read_pointer(&pointer_name)?
|
|
||||||
.context("Listed pointer not present")?;
|
|
||||||
|
|
||||||
pointers_to_offer.insert(pointer_name, pointer_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
write_message(writer, &pointers_to_offer)?;
|
|
||||||
Ok(pointers_to_offer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn ensure_compatible_bypasses(
|
|
||||||
my_full: &Vec<PipelineDescription>,
|
|
||||||
my_bypass: &Vec<PipelineDescription>,
|
|
||||||
their_full: &Vec<PipelineDescription>,
|
|
||||||
their_bypass: &Vec<PipelineDescription>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
ensure!(
|
|
||||||
my_full.starts_with(&my_bypass),
|
|
||||||
"Our full pipeline is not an extension of the bypass pipeline."
|
|
||||||
);
|
|
||||||
ensure!(
|
|
||||||
their_full.starts_with(&their_bypass),
|
|
||||||
"Their full pipeline is not an extension of their bypass pipeline."
|
|
||||||
);
|
|
||||||
|
|
||||||
let my_bypassed_parts = &my_full[my_bypass.len()..];
|
|
||||||
let their_bypassed_parts = &their_full[their_bypass.len()..];
|
|
||||||
|
|
||||||
ensure!(
|
|
||||||
my_bypassed_parts == their_bypassed_parts,
|
|
||||||
"Our bypassed parts and their bypassed parts are not the same.\nOurs: {:?}\nTheirs: {:?}",
|
|
||||||
my_bypassed_parts,
|
|
||||||
their_bypassed_parts
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn negotiate_bypassed_pile<R: Read, W: Write>(
|
|
||||||
pile: &Pile<Arc<Box<dyn RawPile>>>,
|
|
||||||
bypass_pile: &Box<dyn RawPile>,
|
|
||||||
reader: &mut R,
|
|
||||||
writer: &mut W,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let my_full_pipeline = pile.raw_pile.describe_pipeline()?;
|
|
||||||
let my_bypass_pipeline = bypass_pile.describe_pipeline()?;
|
|
||||||
|
|
||||||
write_message(writer, &my_full_pipeline)?;
|
|
||||||
write_message(writer, &my_bypass_pipeline)?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let their_full_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
|
|
||||||
let their_bypass_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
|
|
||||||
|
|
||||||
ensure_compatible_bypasses(
|
|
||||||
&my_full_pipeline,
|
|
||||||
&my_bypass_pipeline,
|
|
||||||
&their_full_pipeline,
|
|
||||||
&their_bypass_pipeline,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_chunk_ids(
|
|
||||||
pile: &Pile<Arc<Box<dyn RawPile>>>,
|
|
||||||
root: &TreeNode,
|
|
||||||
chunk_ids: &mut BTreeSet<ChunkId>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
root.visit(
|
|
||||||
&mut |tree_node, _| {
|
|
||||||
match tree_node {
|
|
||||||
TreeNode::NormalFile { content, .. } => {
|
|
||||||
collect_chunk_ids_from_chunkref(pile, content, chunk_ids)?;
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
"".to_owned(),
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_chunk_ids_from_chunkref(
|
|
||||||
pile: &Pile<Arc<Box<dyn RawPile>>>,
|
|
||||||
chunk_ref: &RecursiveChunkRef,
|
|
||||||
collection: &mut BTreeSet<ChunkId>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
if chunk_ref.depth == 0 {
|
|
||||||
collection.insert(chunk_ref.chunk_id);
|
|
||||||
} else {
|
|
||||||
let shallower_chunk_ref = RecursiveChunkRef {
|
|
||||||
chunk_id: chunk_ref.chunk_id,
|
|
||||||
depth: chunk_ref.depth - 1,
|
|
||||||
};
|
|
||||||
let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
|
|
||||||
let mut next_chunk_id: ChunkId = Default::default();
|
|
||||||
loop {
|
|
||||||
let read = unchunker.read(&mut next_chunk_id[..])?;
|
|
||||||
if read == 0 {
|
|
||||||
break;
|
|
||||||
} else if read < next_chunk_id.len() {
|
|
||||||
unchunker.read_exact(&mut next_chunk_id[read..])?;
|
|
||||||
}
|
|
||||||
collection.insert(next_chunk_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn offering_side<R: Read, W: Write>(
|
|
||||||
pile: &Pile<Arc<Box<dyn RawPile>>>,
|
|
||||||
bypass_pile: &Box<dyn RawPile>,
|
|
||||||
reader: &mut R,
|
|
||||||
writer: &mut W,
|
|
||||||
mut progress: Box<dyn ProgressTracker>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let version = env!("CARGO_PKG_VERSION");
|
|
||||||
let expecting = format!("Datman Pull Accepter {}", version);
|
|
||||||
write_message(writer, &format!("Datman Pull Offerer {}", version))?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let found: String = read_message(reader)?;
|
|
||||||
ensure!(
|
|
||||||
found == expecting,
|
|
||||||
"Version mismatch. Expecting {:?} got {:?}",
|
|
||||||
expecting,
|
|
||||||
found
|
|
||||||
);
|
|
||||||
|
|
||||||
// First 'negotiate' (for now: assert) a pile bypass.
|
|
||||||
// This lets us avoid decompressing things before recompressing them at the other end,
|
|
||||||
// assuming both ends use the same dictionary.
|
|
||||||
negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
|
|
||||||
|
|
||||||
let offered_pointers = offer_pointers(pile, writer)?;
|
|
||||||
let wanted_pointers = read_message::<_, BTreeSet<String>>(reader)?;
|
|
||||||
|
|
||||||
let mut chunks_to_offer: BTreeSet<ChunkId> = BTreeSet::new();
|
|
||||||
|
|
||||||
for pointer_name in &wanted_pointers {
|
|
||||||
let pointer_data = offered_pointers
|
|
||||||
.get(pointer_name)
|
|
||||||
.with_context(|| format!("Requested pointer {:?} was not offered", pointer_name))?;
|
|
||||||
|
|
||||||
collect_chunk_ids_from_chunkref(pile, &pointer_data.chunk_ref, &mut chunks_to_offer)?;
|
|
||||||
|
|
||||||
let root_node = retrieve_tree_node(pile, pointer_data.chunk_ref.clone())?;
|
|
||||||
collect_chunk_ids(pile, &root_node.node, &mut chunks_to_offer)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
write_message(writer, &chunks_to_offer)?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let chunks_to_skip: BTreeSet<ChunkId> = read_message(reader)?;
|
|
||||||
let chunks_to_send: Vec<ChunkId> = chunks_to_offer
|
|
||||||
.difference(&chunks_to_skip)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
drop(chunks_to_offer);
|
|
||||||
drop(chunks_to_skip);
|
|
||||||
|
|
||||||
let start_sort_by_hints = Instant::now();
|
|
||||||
let chunks_to_send_with_hints: BTreeSet<(u64, ChunkId)> = chunks_to_send
|
|
||||||
.into_iter()
|
|
||||||
.map(|chunk_id| {
|
|
||||||
pile.raw_pile
|
|
||||||
.chunk_id_transfer_ordering_hint(&chunk_id)
|
|
||||||
.map(|hint| (hint, chunk_id))
|
|
||||||
})
|
|
||||||
.collect::<anyhow::Result<_>>()?;
|
|
||||||
let time_to_sort_by_hints = Instant::now() - start_sort_by_hints;
|
|
||||||
info!(
|
|
||||||
"{} s to sort {} chunks by their hints",
|
|
||||||
time_to_sort_by_hints.as_secs_f32(),
|
|
||||||
chunks_to_send_with_hints.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
progress.set_max_size(chunks_to_send_with_hints.len() as u64);
|
|
||||||
progress.set_current(0);
|
|
||||||
for (_hint, chunk_id) in chunks_to_send_with_hints {
|
|
||||||
let chunk_data = bypass_pile
|
|
||||||
.read(Keyspace::Chunk, &chunk_id)?
|
|
||||||
.context("Chunk vanished")?;
|
|
||||||
|
|
||||||
write_message(writer, &Some((chunk_id, chunk_data)))?;
|
|
||||||
progress.inc_progress(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
write_message(writer, &None::<Option<(ChunkId, Vec<u8>)>>)?;
|
|
||||||
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn accepting_side<R: Read, W: Write>(
|
|
||||||
pile: &Pile<Arc<Box<dyn RawPile>>>,
|
|
||||||
bypass_pile: &Box<dyn RawPile>,
|
|
||||||
reader: &mut R,
|
|
||||||
writer: &mut W,
|
|
||||||
mut progress: Box<dyn ProgressTracker>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let version = env!("CARGO_PKG_VERSION");
|
|
||||||
let expecting = format!("Datman Pull Offerer {}", version);
|
|
||||||
write_message(writer, &format!("Datman Pull Accepter {}", version))?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let found: String = read_message(reader)?;
|
|
||||||
ensure!(
|
|
||||||
found == expecting,
|
|
||||||
"Version mismatch. Expecting {:?} got {:?}",
|
|
||||||
expecting,
|
|
||||||
found
|
|
||||||
);
|
|
||||||
|
|
||||||
// First 'negotiate' (for now: assert) a pile bypass.
|
|
||||||
// This lets us avoid decompressing things before recompressing them at the other end,
|
|
||||||
// assuming both ends use the same dictionary.
|
|
||||||
negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
|
|
||||||
|
|
||||||
let offered_pointers: BTreeMap<String, PointerData> = read_message(reader)?;
|
|
||||||
let mut wanted_pointers: BTreeSet<String> = BTreeSet::new();
|
|
||||||
|
|
||||||
for (pointer_name, pointer_data) in &offered_pointers {
|
|
||||||
if pile.read_pointer(pointer_name)?.is_none() {
|
|
||||||
wanted_pointers.insert(pointer_name.clone());
|
|
||||||
if let Some(parent) = &pointer_data.parent_pointer {
|
|
||||||
if pile.read_pointer(parent)?.is_none() && !offered_pointers.contains_key(parent) {
|
|
||||||
bail!("Offered pointer {:?} requires parent {:?} which we don't have and isn't offered.", pointer_name, parent);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
write_message(writer, &wanted_pointers)?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let offered_chunks: BTreeSet<ChunkId> = read_message(reader)?;
|
|
||||||
let mut chunks_to_skip: BTreeSet<ChunkId> = BTreeSet::new();
|
|
||||||
for chunk_id in &offered_chunks {
|
|
||||||
if pile.chunk_exists(chunk_id)? {
|
|
||||||
chunks_to_skip.insert(*chunk_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
write_message(writer, &chunks_to_skip)?;
|
|
||||||
writer.flush()?;
|
|
||||||
|
|
||||||
let num_chunks_to_recv = offered_chunks.len() - chunks_to_skip.len();
|
|
||||||
|
|
||||||
let mut chunks_to_recv: BTreeSet<ChunkId> = offered_chunks
|
|
||||||
.difference(&chunks_to_skip)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
drop(offered_chunks);
|
|
||||||
drop(chunks_to_skip);
|
|
||||||
|
|
||||||
progress.set_max_size(num_chunks_to_recv as u64);
|
|
||||||
progress.set_current(0);
|
|
||||||
|
|
||||||
while let Some((chunk_id, chunk_data)) = read_message::<_, Option<(ChunkId, Vec<u8>)>>(reader)?
|
|
||||||
{
|
|
||||||
ensure!(
|
|
||||||
chunks_to_recv.remove(&chunk_id),
|
|
||||||
"Received unexpected chunk"
|
|
||||||
);
|
|
||||||
bypass_pile.write(Keyspace::Chunk, &chunk_id, &chunk_data)?;
|
|
||||||
progress.inc_progress(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure!(chunks_to_recv.is_empty(), "Unreceived chunks.");
|
|
||||||
|
|
||||||
for (pointer_name, pointer_data) in &offered_pointers {
|
|
||||||
pile.write_pointer(pointer_name, pointer_data)?;
|
|
||||||
}
|
|
||||||
pile.flush()?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -1,456 +0,0 @@
|
||||||
use crate::commands::backup::split_pointer_name;
|
|
||||||
use crate::descriptor::{Descriptor, DestPileDescriptor};
|
|
||||||
use anyhow::Context;
|
|
||||||
use chrono::{Date, DateTime, Utc};
|
|
||||||
use comfy_table::presets::UTF8_FULL;
|
|
||||||
use comfy_table::{Attribute, Cell, Color, ContentArrangement, Table};
|
|
||||||
use humansize::FileSize;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use log::info;
|
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::ffi::CString;
|
|
||||||
use std::io::Read;
|
|
||||||
use std::mem;
|
|
||||||
use std::mem::size_of;
|
|
||||||
use std::os::unix::ffi::OsStrExt;
|
|
||||||
use std::os::unix::fs::MetadataExt;
|
|
||||||
use std::path::Path;
|
|
||||||
use yama::chunking::RecursiveUnchunker;
|
|
||||||
use yama::commands::{load_pile_descriptor, open_pile, retrieve_tree_node};
|
|
||||||
use yama::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
|
|
||||||
use yama::pile::{DebugStatistics, Pile, RawPile};
|
|
||||||
|
|
||||||
// This module generates reports for a Datman system.
|
|
||||||
// Referenced Chunk IDs are counted and used to give an indication of size.
|
|
||||||
// Chunk IDs are summarised into u32s to reduce memory usage. Since the report is approximate,
|
|
||||||
// it doesn't matter if there are a few collisions (although they are still fairly unlikely to
|
|
||||||
// affect much).
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Report {
|
|
||||||
pub last_source_backups: BTreeMap<String, Option<DateTime<Utc>>>,
|
|
||||||
|
|
||||||
pub chunk_usages_aggregated: bool,
|
|
||||||
pub chunk_usage: BTreeMap<String, Sizes>,
|
|
||||||
|
|
||||||
pub debug_stats: Option<DebugStatistics>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Default)]
|
|
||||||
pub struct Sizes {
|
|
||||||
/// Total number of chunks that we refer to.
|
|
||||||
pub total: u32,
|
|
||||||
|
|
||||||
/// Each referred chunk is counted once here, but divided by the number of sharers.
|
|
||||||
/// We are 'morally responsible' for this many chunks.
|
|
||||||
pub moral: u32,
|
|
||||||
|
|
||||||
/// Number of chunks that only we point to.
|
|
||||||
pub unique: u32,
|
|
||||||
|
|
||||||
/// Number of chunks for which we are the oldest (lexicographically earliest) pointer to point
|
|
||||||
/// to those chunks.
|
|
||||||
pub rollup: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
type CondensedChunkId = u32;
|
|
||||||
|
|
||||||
fn condense_chunk_id(chunk_id: ChunkId) -> CondensedChunkId {
|
|
||||||
CondensedChunkId::from_be_bytes(
|
|
||||||
chunk_id[0..size_of::<CondensedChunkId>()]
|
|
||||||
.try_into()
|
|
||||||
.unwrap(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_report(
|
|
||||||
dest_pile_descriptor: &DestPileDescriptor,
|
|
||||||
descriptor: &Descriptor,
|
|
||||||
aggregate_chunk_usage_by_month: bool,
|
|
||||||
) -> anyhow::Result<Report> {
|
|
||||||
let pile_descriptor = load_pile_descriptor(&dest_pile_descriptor.path)?;
|
|
||||||
let pile = open_pile(&dest_pile_descriptor.path, &pile_descriptor)?;
|
|
||||||
|
|
||||||
let debug_stats = pile.raw_pile.debug_statistics()?;
|
|
||||||
|
|
||||||
let mut pointers_to_parent_and_chunkids = BTreeMap::new();
|
|
||||||
let mut pointergroups_to_pointers: BTreeMap<String, Vec<String>> = BTreeMap::new();
|
|
||||||
|
|
||||||
info!("Collecting chunk IDs... This will probably be slow.");
|
|
||||||
for pointer_name in pile.list_pointers()? {
|
|
||||||
let pointer = pile
|
|
||||||
.read_pointer(&pointer_name)?
|
|
||||||
.context("listed pointer doesn't exist")?;
|
|
||||||
let root_node = retrieve_tree_node(&pile, pointer.chunk_ref)?;
|
|
||||||
let pointer_chunk_ids = collect_chunk_ids(&pile, &root_node.node)?;
|
|
||||||
|
|
||||||
let pointergroup = if aggregate_chunk_usage_by_month {
|
|
||||||
let (base, date_time) =
|
|
||||||
split_pointer_name(&pointer_name).context("Can't split pointer name")?;
|
|
||||||
format!("{}+{}", base, date_time.format("%Y-%m"))
|
|
||||||
} else {
|
|
||||||
pointer_name.clone()
|
|
||||||
};
|
|
||||||
|
|
||||||
pointergroups_to_pointers
|
|
||||||
.entry(pointergroup)
|
|
||||||
.or_default()
|
|
||||||
.push(pointer_name.clone());
|
|
||||||
|
|
||||||
pointers_to_parent_and_chunkids
|
|
||||||
.insert(pointer_name, (pointer.parent_pointer, pointer_chunk_ids));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we iterate in reverse order, making a list of count of Chunk IDs.
|
|
||||||
// At the same time, we can also calculate 'rollup' sizes.
|
|
||||||
let mut chunk_sharer_counts: BTreeMap<CondensedChunkId, u16> = BTreeMap::new();
|
|
||||||
|
|
||||||
let mut pointergroup_stats: BTreeMap<String, Sizes> = BTreeMap::new();
|
|
||||||
|
|
||||||
for (pointergroup_name, pointers_in_group) in pointergroups_to_pointers.iter().rev() {
|
|
||||||
let mut deduped_chunks = BTreeSet::new();
|
|
||||||
|
|
||||||
for pointer_name in pointers_in_group {
|
|
||||||
deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
|
|
||||||
&pointers_to_parent_and_chunkids,
|
|
||||||
&pointer_name,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut rollup_count = 0;
|
|
||||||
for chunk in deduped_chunks {
|
|
||||||
let count = chunk_sharer_counts.entry(chunk).or_default();
|
|
||||||
*count += 1;
|
|
||||||
if *count == 1 {
|
|
||||||
rollup_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let entry = pointergroup_stats
|
|
||||||
.entry(pointergroup_name.to_owned())
|
|
||||||
.or_default();
|
|
||||||
entry.rollup = rollup_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now go through again and update all the stats!
|
|
||||||
for (pointergroup_name, pointers_in_group) in &pointergroups_to_pointers {
|
|
||||||
let mut deduped_chunks = BTreeSet::new();
|
|
||||||
|
|
||||||
for pointer_name in pointers_in_group {
|
|
||||||
deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
|
|
||||||
&pointers_to_parent_and_chunkids,
|
|
||||||
&pointer_name,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut unique_count = 0;
|
|
||||||
let mut shared_count_by_sharers = [0u32; 256];
|
|
||||||
let total_count = deduped_chunks.len();
|
|
||||||
for chunk in deduped_chunks {
|
|
||||||
let count = chunk_sharer_counts[&chunk];
|
|
||||||
if count == 1 {
|
|
||||||
unique_count += 1;
|
|
||||||
} else {
|
|
||||||
let num_sharers = (count as usize).min(256);
|
|
||||||
shared_count_by_sharers[num_sharers - 1] += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut sharers_sum: f64 = 0.0;
|
|
||||||
for (sharers_minus_one, count) in shared_count_by_sharers.into_iter().enumerate() {
|
|
||||||
sharers_sum += (count as f64) / (sharers_minus_one + 1) as f64;
|
|
||||||
}
|
|
||||||
|
|
||||||
let entry = pointergroup_stats
|
|
||||||
.entry(pointergroup_name.to_owned())
|
|
||||||
.or_default();
|
|
||||||
entry.moral = (sharers_sum.ceil() as u32) + unique_count;
|
|
||||||
entry.unique = unique_count;
|
|
||||||
entry.total = total_count as u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut last_backed_up = BTreeMap::new();
|
|
||||||
for source_name in descriptor.sources.keys().cloned() {
|
|
||||||
last_backed_up.insert(source_name, None);
|
|
||||||
}
|
|
||||||
|
|
||||||
for pointer_name in pointers_to_parent_and_chunkids.keys() {
|
|
||||||
if let Some((source_name, date_time)) = split_pointer_name(&pointer_name) {
|
|
||||||
last_backed_up.insert(source_name, Some(date_time));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Report {
|
|
||||||
last_source_backups: last_backed_up,
|
|
||||||
chunk_usage: pointergroup_stats,
|
|
||||||
chunk_usages_aggregated: aggregate_chunk_usage_by_month,
|
|
||||||
debug_stats,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Does not filter duplicates...
|
|
||||||
fn iter_over_all_chunkids_incl_parents<'a>(
|
|
||||||
pointers_to_parent_and_chunkids: &'a BTreeMap<
|
|
||||||
String,
|
|
||||||
(Option<String>, BTreeSet<CondensedChunkId>),
|
|
||||||
>,
|
|
||||||
pointer_name: &'a str,
|
|
||||||
) -> Box<dyn Iterator<Item = CondensedChunkId> + 'a> {
|
|
||||||
let (parent, chunks) = &pointers_to_parent_and_chunkids[pointer_name];
|
|
||||||
match parent {
|
|
||||||
None => Box::new(chunks.iter().copied()),
|
|
||||||
Some(parent) => Box::new(chunks.iter().copied().chain(
|
|
||||||
iter_over_all_chunkids_incl_parents(pointers_to_parent_and_chunkids, &parent),
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_chunk_ids<RP: RawPile>(
|
|
||||||
pile: &Pile<RP>,
|
|
||||||
root: &TreeNode,
|
|
||||||
) -> anyhow::Result<BTreeSet<CondensedChunkId>> {
|
|
||||||
let mut chunk_ids = BTreeSet::new();
|
|
||||||
root.visit(
|
|
||||||
&mut |tree_node, _| {
|
|
||||||
match tree_node {
|
|
||||||
TreeNode::NormalFile { content, .. } => {
|
|
||||||
collect_chunk_ids_from_chunkref(pile, content, &mut chunk_ids)?;
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
"".to_owned(),
|
|
||||||
)?;
|
|
||||||
Ok(chunk_ids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn collect_chunk_ids_from_chunkref<RP: RawPile>(
|
|
||||||
pile: &Pile<RP>,
|
|
||||||
chunk_ref: &RecursiveChunkRef,
|
|
||||||
collection: &mut BTreeSet<CondensedChunkId>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
if chunk_ref.depth == 0 {
|
|
||||||
collection.insert(condense_chunk_id(chunk_ref.chunk_id));
|
|
||||||
} else {
|
|
||||||
let shallower_chunk_ref = RecursiveChunkRef {
|
|
||||||
chunk_id: chunk_ref.chunk_id,
|
|
||||||
depth: chunk_ref.depth - 1,
|
|
||||||
};
|
|
||||||
let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
|
|
||||||
let mut next_chunk_id: ChunkId = Default::default();
|
|
||||||
loop {
|
|
||||||
let read = unchunker.read(&mut next_chunk_id[..])?;
|
|
||||||
if read == 0 {
|
|
||||||
break;
|
|
||||||
} else if read < next_chunk_id.len() {
|
|
||||||
unchunker.read_exact(&mut next_chunk_id[read..])?;
|
|
||||||
}
|
|
||||||
collection.insert(condense_chunk_id(next_chunk_id));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn print_report(report: &Report) -> anyhow::Result<()> {
|
|
||||||
print_time_report(report)?;
|
|
||||||
print_size_report(report)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn print_time_report(report: &Report) -> anyhow::Result<()> {
|
|
||||||
println!("\nBackup times");
|
|
||||||
let mut table = Table::new();
|
|
||||||
table
|
|
||||||
.load_preset(UTF8_FULL)
|
|
||||||
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
|
|
||||||
.enforce_styling();
|
|
||||||
|
|
||||||
table.set_header(vec![
|
|
||||||
Cell::new("Source name").fg(Color::Cyan),
|
|
||||||
Cell::new("Last backed up").fg(Color::Cyan),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let today = Utc::today();
|
|
||||||
|
|
||||||
let sort_by_dates: Vec<(Option<Date<Utc>>, String)> = report
|
|
||||||
.last_source_backups
|
|
||||||
.iter()
|
|
||||||
.map(|(name, datetime)| (datetime.map(|dt| dt.date()), name.to_owned()))
|
|
||||||
.sorted()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for (date, source_name) in sort_by_dates {
|
|
||||||
match date {
|
|
||||||
None => {
|
|
||||||
table.add_row(vec![
|
|
||||||
Cell::new(source_name).fg(Color::Magenta),
|
|
||||||
Cell::new("NEVER").fg(Color::Red).add_attributes(vec![
|
|
||||||
Attribute::SlowBlink,
|
|
||||||
Attribute::RapidBlink,
|
|
||||||
Attribute::Bold,
|
|
||||||
]),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
Some(date) => {
|
|
||||||
let number_of_days = today.signed_duration_since(date).num_days();
|
|
||||||
let num_days_human = if number_of_days > 0 {
|
|
||||||
format!("{number_of_days} days ago")
|
|
||||||
} else {
|
|
||||||
format!("today")
|
|
||||||
};
|
|
||||||
|
|
||||||
let colour = if number_of_days < 2 {
|
|
||||||
Color::Green
|
|
||||||
} else if number_of_days < 14 {
|
|
||||||
Color::Yellow
|
|
||||||
} else {
|
|
||||||
Color::Red
|
|
||||||
};
|
|
||||||
|
|
||||||
let formatted_date = date.format("%F");
|
|
||||||
|
|
||||||
let mut val_cell =
|
|
||||||
Cell::new(format!("{formatted_date} {num_days_human}")).fg(colour);
|
|
||||||
if number_of_days > 28 {
|
|
||||||
val_cell = val_cell.add_attribute(Attribute::SlowBlink);
|
|
||||||
}
|
|
||||||
|
|
||||||
table.add_row(vec![Cell::new(source_name).fg(Color::Magenta), val_cell]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("{table}");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn print_size_report(report: &Report) -> anyhow::Result<()> {
|
|
||||||
println!("\nPile size");
|
|
||||||
let mut table = Table::new();
|
|
||||||
table
|
|
||||||
.load_preset(UTF8_FULL)
|
|
||||||
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
|
|
||||||
.enforce_styling();
|
|
||||||
//.set_width(100);
|
|
||||||
table.set_header(vec![
|
|
||||||
Cell::new("Pointer name").fg(Color::Cyan),
|
|
||||||
Cell::new("Rollup size").fg(Color::Magenta),
|
|
||||||
Cell::new("Unique size").fg(Color::Magenta),
|
|
||||||
Cell::new("Moral size").fg(Color::Magenta),
|
|
||||||
Cell::new("Total size").fg(Color::Magenta),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let average_chunk_size = report
|
|
||||||
.debug_stats
|
|
||||||
.as_ref()
|
|
||||||
.map(|stats| stats.total_chunk_size as f64 / stats.number_of_chunks as f64);
|
|
||||||
for (pointer_name, sizes) in &report.chunk_usage {
|
|
||||||
table.add_row(vec![
|
|
||||||
Cell::new(pointer_name).fg(Color::Blue),
|
|
||||||
Cell::new(format_size(sizes.rollup, average_chunk_size)).fg(Color::Yellow),
|
|
||||||
Cell::new(format_size(sizes.unique, average_chunk_size)).fg(Color::Yellow),
|
|
||||||
Cell::new(format_size(sizes.moral, average_chunk_size)).fg(Color::Yellow),
|
|
||||||
Cell::new(format_size(sizes.total, average_chunk_size)).fg(Color::Yellow),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("{table}");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_size(chunks: u32, average_chunk_size: Option<f64>) -> String {
|
|
||||||
let est_size_suffix = average_chunk_size
|
|
||||||
.map(|bytes_per_chunk| {
|
|
||||||
let num_bytes = (chunks as f64 * bytes_per_chunk) as u64;
|
|
||||||
let mut format = humansize::file_size_opts::BINARY;
|
|
||||||
format.decimal_places = 1;
|
|
||||||
format!(" ~{}", num_bytes.file_size(format).unwrap())
|
|
||||||
})
|
|
||||||
.unwrap_or_default();
|
|
||||||
format!("{} c{}", chunks, est_size_suffix)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn calculate_total_filesize_of_dir(dir: &Path) -> anyhow::Result<u64> {
|
|
||||||
let mut total = 0;
|
|
||||||
for file in std::fs::read_dir(dir)? {
|
|
||||||
let file = file?;
|
|
||||||
let metadata = file.metadata()?;
|
|
||||||
total += metadata.size();
|
|
||||||
if metadata.is_dir() {
|
|
||||||
total += calculate_total_filesize_of_dir(&file.path())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(total)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn print_filesystem_space(pile_path: &Path) -> anyhow::Result<()> {
|
|
||||||
let usage_for_pile = calculate_total_filesize_of_dir(&pile_path)?;
|
|
||||||
|
|
||||||
let path_c = CString::new(pile_path.as_os_str().as_bytes()).unwrap();
|
|
||||||
let stats = unsafe {
|
|
||||||
let mut stats: libc::statfs = mem::zeroed();
|
|
||||||
match libc::statfs(path_c.as_ptr(), &mut stats) {
|
|
||||||
0 => Ok(stats),
|
|
||||||
other => Err(std::io::Error::from_raw_os_error(other)),
|
|
||||||
}
|
|
||||||
}?;
|
|
||||||
|
|
||||||
// On a BTRFS system with 2 disks in RAID1, note (about df -h):
|
|
||||||
// - 'Size' shows the average size of the two disks. I think of it as 'ideal size'.
|
|
||||||
// - 'Avail' seems to show the actual number of bytes usable.
|
|
||||||
// - 'Used' seems to show the actual number of bytes used.
|
|
||||||
// In short: probably avoid relying on 'size'.
|
|
||||||
|
|
||||||
let block_size = stats.f_bsize as i64;
|
|
||||||
let used_bytes = (stats.f_blocks - stats.f_bfree) as i64 * block_size;
|
|
||||||
let avail_bytes = stats.f_bavail as i64 * block_size;
|
|
||||||
let usable_bytes = used_bytes + avail_bytes;
|
|
||||||
let theoretical_size = stats.f_blocks as i64 * block_size;
|
|
||||||
|
|
||||||
let mut format = humansize::file_size_opts::BINARY;
|
|
||||||
format.decimal_places = 1;
|
|
||||||
format.decimal_zeroes = 1;
|
|
||||||
|
|
||||||
println!("\nFilesystem Information");
|
|
||||||
|
|
||||||
let mut table = Table::new();
|
|
||||||
table
|
|
||||||
.load_preset(UTF8_FULL)
|
|
||||||
.set_content_arrangement(ContentArrangement::DynamicFullWidth)
|
|
||||||
.enforce_styling();
|
|
||||||
//.set_width(100);
|
|
||||||
table.set_header(vec![
|
|
||||||
Cell::new("Theoretical Size").fg(Color::Cyan),
|
|
||||||
Cell::new("Usable Size").fg(Color::Cyan),
|
|
||||||
Cell::new("Used").fg(Color::Cyan),
|
|
||||||
Cell::new("Used for Pile").fg(Color::Cyan),
|
|
||||||
Cell::new("Available").fg(Color::Cyan),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let available_space_colour = if avail_bytes < 8 * 1024 * 1024 * 1024 {
|
|
||||||
Color::Red
|
|
||||||
} else if avail_bytes < 64 * 1024 * 1024 * 1024 {
|
|
||||||
Color::Yellow
|
|
||||||
} else {
|
|
||||||
Color::Green
|
|
||||||
};
|
|
||||||
|
|
||||||
table.add_row(vec![
|
|
||||||
Cell::new(format!(
|
|
||||||
"{:>9}",
|
|
||||||
theoretical_size.file_size(&format).unwrap()
|
|
||||||
))
|
|
||||||
.fg(Color::Blue),
|
|
||||||
Cell::new(format!("{:>9}", usable_bytes.file_size(&format).unwrap())).fg(Color::Blue),
|
|
||||||
Cell::new(format!("{:>9}", used_bytes.file_size(&format).unwrap())).fg(Color::Blue),
|
|
||||||
Cell::new(format!("{:>9}", usage_for_pile.file_size(&format).unwrap())).fg(Color::Blue),
|
|
||||||
Cell::new(format!("{:>9}", avail_bytes.file_size(&format).unwrap()))
|
|
||||||
.fg(available_space_colour),
|
|
||||||
]);
|
|
||||||
|
|
||||||
print!("{table}");
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -38,10 +38,6 @@ pub struct Descriptor {
|
||||||
pub piles: HashMap<String, DestPileDescriptor>,
|
pub piles: HashMap<String, DestPileDescriptor>,
|
||||||
|
|
||||||
pub remote_hosts: HashMap<String, RemoteHostDescriptor>,
|
pub remote_hosts: HashMap<String, RemoteHostDescriptor>,
|
||||||
|
|
||||||
#[serde(default)]
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub retention: Option<RetentionPolicyConfig>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||||
|
@ -50,22 +46,12 @@ pub struct RemoteHostDescriptor {
|
||||||
pub path_to_datman: Option<String>,
|
pub path_to_datman: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
|
||||||
pub struct RetentionPolicyConfig {
|
|
||||||
pub daily: u32,
|
|
||||||
pub weekly: u32,
|
|
||||||
pub monthly: u32,
|
|
||||||
pub yearly: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
pub enum SourceDescriptor {
|
pub enum SourceDescriptor {
|
||||||
DirectorySource {
|
DirectorySource {
|
||||||
hostname: String,
|
hostname: String,
|
||||||
directory: PathBuf,
|
directory: PathBuf,
|
||||||
#[serde(default)]
|
|
||||||
cross_filesystems: bool,
|
|
||||||
},
|
},
|
||||||
VirtualSource {
|
VirtualSource {
|
||||||
/// The name of the helper program that will be used to do this backup.
|
/// The name of the helper program that will be used to do this backup.
|
||||||
|
|
|
@ -15,10 +15,10 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::{BTreeSet, HashMap};
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader, Write};
|
use std::io::{BufRead, BufReader, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
@ -222,23 +222,6 @@ impl LabellingRules {
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_exclusions_set(&self, base: &Path) -> BTreeSet<PathBuf> {
|
|
||||||
let mut exclusions = BTreeSet::new();
|
|
||||||
|
|
||||||
for (ext_path, state) in &self.position_based_rules {
|
|
||||||
assert!(ext_path.is_empty() || ext_path.starts_with('/'));
|
|
||||||
let full_path = PathBuf::from(format!(
|
|
||||||
"{}{ext_path}",
|
|
||||||
base.to_str().expect("base path must always be utf-8")
|
|
||||||
));
|
|
||||||
if state == &Excluded {
|
|
||||||
exclusions.insert(full_path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
exclusions
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Uninteractively label the nodes.
|
/// Uninteractively label the nodes.
|
||||||
|
|
|
@ -1,19 +1,15 @@
|
||||||
use crate::commands::backup::{get_pointer_name_at, label_filter_and_convert};
|
use crate::commands::backup::{get_pointer_name_at, label_filter_and_convert};
|
||||||
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor};
|
use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor};
|
||||||
use crate::labelling::load_labelling_rules;
|
|
||||||
use crate::tree::FileTree;
|
use crate::tree::FileTree;
|
||||||
use anyhow::{anyhow, bail};
|
use anyhow::{anyhow, bail};
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use log::info;
|
use log::info;
|
||||||
use std::collections::BTreeSet;
|
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
use std::process::{Child, Command, Stdio};
|
use std::process::{Child, Command, Stdio};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use yama::commands::{load_pile_descriptor, open_pile};
|
use yama::commands::{load_pile_descriptor, open_pile};
|
||||||
use yama::definitions::{PartialPointerData, TreeNode};
|
use yama::definitions::TreeNode;
|
||||||
use yama::operations::storing::{pointer_ops_prepare_to_store, pointers_ops_after_store};
|
|
||||||
use yama::pile::access_guard::PileGuard;
|
|
||||||
use yama::pile::{Pile, RawPile, StoragePipelineSettings};
|
use yama::pile::{Pile, RawPile, StoragePipelineSettings};
|
||||||
use yama::progress::ProgressTracker;
|
use yama::progress::ProgressTracker;
|
||||||
use yama::remote::responder::{Responder, ResponderWritingPipeline};
|
use yama::remote::responder::{Responder, ResponderWritingPipeline};
|
||||||
|
@ -49,14 +45,10 @@ pub fn scanning<R: Read, W: Write>(
|
||||||
read: &mut R,
|
read: &mut R,
|
||||||
write: &mut W,
|
write: &mut W,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
one_filesystem: bool,
|
|
||||||
exclusions: &BTreeSet<PathBuf>,
|
|
||||||
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
||||||
info!("Scanning.");
|
info!("Scanning.");
|
||||||
write_message(write, &"scan")?;
|
write_message(write, &"scan")?;
|
||||||
write_message(write, &path)?;
|
write_message(write, &path)?;
|
||||||
write_message(write, &one_filesystem)?;
|
|
||||||
write_message(write, exclusions)?;
|
|
||||||
write.flush()?;
|
write.flush()?;
|
||||||
let scan_result: Option<FileTree<(), (), (), ()>> = read_message(read)?;
|
let scan_result: Option<FileTree<(), (), (), ()>> = read_message(read)?;
|
||||||
|
|
||||||
|
@ -72,15 +64,19 @@ pub fn chunking<
|
||||||
read: R,
|
read: R,
|
||||||
mut write: W,
|
mut write: W,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
|
pointer_name: String,
|
||||||
tree_node: &TreeNode,
|
tree_node: &TreeNode,
|
||||||
raw_pile: Arc<RP>,
|
raw_pile: Arc<RP>,
|
||||||
|
parent: Option<String>,
|
||||||
progress_bar: PT,
|
progress_bar: PT,
|
||||||
use_writing_pipeline: bool,
|
use_writing_pipeline: bool,
|
||||||
) -> anyhow::Result<(R, W, PartialPointerData)> {
|
) -> anyhow::Result<(R, W)> {
|
||||||
info!("Chunking.");
|
info!("Chunking.");
|
||||||
write_message(&mut write, &"chunk")?;
|
write_message(&mut write, &"chunk")?;
|
||||||
write_message(&mut write, &path)?;
|
write_message(&mut write, &path)?;
|
||||||
|
write_message(&mut write, &pointer_name)?;
|
||||||
write_message(&mut write, tree_node)?;
|
write_message(&mut write, tree_node)?;
|
||||||
|
write_message(&mut write, &parent)?;
|
||||||
write.flush()?;
|
write.flush()?;
|
||||||
|
|
||||||
let (writing_pipeline, control_rx) = if use_writing_pipeline {
|
let (writing_pipeline, control_rx) = if use_writing_pipeline {
|
||||||
|
@ -101,13 +97,11 @@ pub fn chunking<
|
||||||
(None, None)
|
(None, None)
|
||||||
};
|
};
|
||||||
|
|
||||||
let guarded_pile = PileGuard::new(Arc::clone(&raw_pile), true);
|
|
||||||
|
|
||||||
let (r_handle, w_handle, join_handles) = Responder::start(
|
let (r_handle, w_handle, join_handles) = Responder::start(
|
||||||
read,
|
read,
|
||||||
write,
|
write,
|
||||||
get_number_of_workers("YAMA_RESPONDERS") as u16,
|
get_number_of_workers("YAMA_RESPONDERS") as u16,
|
||||||
Arc::new(guarded_pile),
|
raw_pile,
|
||||||
writing_pipeline,
|
writing_pipeline,
|
||||||
progress_bar,
|
progress_bar,
|
||||||
);
|
);
|
||||||
|
@ -117,7 +111,7 @@ pub fn chunking<
|
||||||
for handle in join_handles {
|
for handle in join_handles {
|
||||||
handle.join().expect("Join handle should not fail");
|
handle.join().expect("Join handle should not fail");
|
||||||
}
|
}
|
||||||
let mut read = r_handle.join().unwrap();
|
let read = r_handle.join().unwrap();
|
||||||
let write = w_handle.join().unwrap();
|
let write = w_handle.join().unwrap();
|
||||||
|
|
||||||
if let Some(control_rx) = control_rx {
|
if let Some(control_rx) = control_rx {
|
||||||
|
@ -128,9 +122,7 @@ pub fn chunking<
|
||||||
|
|
||||||
info!("Remote finished chunking.");
|
info!("Remote finished chunking.");
|
||||||
|
|
||||||
let pointer_data: PartialPointerData = read_message(&mut read)?;
|
Ok((read, write))
|
||||||
|
|
||||||
Ok((read, write, pointer_data))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn quit<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
pub fn quit<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
||||||
|
@ -183,7 +175,6 @@ pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>
|
||||||
SourceDescriptor::DirectorySource {
|
SourceDescriptor::DirectorySource {
|
||||||
hostname,
|
hostname,
|
||||||
directory,
|
directory,
|
||||||
cross_filesystems,
|
|
||||||
} => {
|
} => {
|
||||||
let remote_host_descriptor = descriptor
|
let remote_host_descriptor = descriptor
|
||||||
.remote_hosts
|
.remote_hosts
|
||||||
|
@ -203,22 +194,13 @@ pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>
|
||||||
info!("Connecting...");
|
info!("Connecting...");
|
||||||
introduction(&mut read, &mut write)?;
|
introduction(&mut read, &mut write)?;
|
||||||
|
|
||||||
let rules = load_labelling_rules(desc_path, source_name)?;
|
|
||||||
let exclusions = rules.get_exclusions_set(directory);
|
|
||||||
|
|
||||||
// then request to scan
|
// then request to scan
|
||||||
info!("Requesting scan... (this may take some time)");
|
info!("Requesting scan... (this may take some time)");
|
||||||
let scan_result = scanning(
|
let scan_result = scanning(&mut read, &mut write, directory.as_ref())?
|
||||||
&mut read,
|
|
||||||
&mut write,
|
|
||||||
directory.as_ref(),
|
|
||||||
!*cross_filesystems,
|
|
||||||
&exclusions,
|
|
||||||
)?
|
|
||||||
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?;
|
.ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?;
|
||||||
|
|
||||||
let mut root =
|
let root =
|
||||||
label_filter_and_convert(scan_result, descriptor, source_name, &rules, dest)?
|
label_filter_and_convert(scan_result, descriptor, desc_path, source_name, dest)?
|
||||||
.ok_or_else(|| anyhow!("Empty filter..."))?;
|
.ok_or_else(|| anyhow!("Empty filter..."))?;
|
||||||
|
|
||||||
let absolute_dest_path = desc_path.join(&dest.path);
|
let absolute_dest_path = desc_path.join(&dest.path);
|
||||||
|
@ -266,27 +248,19 @@ pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>
|
||||||
let raw_pile = Arc::new(pile.raw_pile);
|
let raw_pile = Arc::new(pile.raw_pile);
|
||||||
let pile = Pile::new(raw_pile.clone());
|
let pile = Pile::new(raw_pile.clone());
|
||||||
|
|
||||||
pointer_ops_prepare_to_store(&pile, &mut root, &parent)?;
|
let (mut read, mut write) = chunking(
|
||||||
|
|
||||||
info!(
|
|
||||||
"Have pointer_name = {:?}, parent = {:?}",
|
|
||||||
pointer_name, parent
|
|
||||||
);
|
|
||||||
|
|
||||||
let (mut read, mut write, pointer_data) = chunking(
|
|
||||||
read,
|
read,
|
||||||
write,
|
write,
|
||||||
directory.as_ref(),
|
directory.as_ref(),
|
||||||
|
pointer_name.clone(),
|
||||||
&root,
|
&root,
|
||||||
raw_pile,
|
raw_pile,
|
||||||
|
parent,
|
||||||
progress_bar,
|
progress_bar,
|
||||||
true,
|
true,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
quit(&mut read, &mut write)?;
|
quit(&mut read, &mut write)?;
|
||||||
|
|
||||||
pointers_ops_after_store(&pile, &pointer_name, &pointer_data.complete(parent))?;
|
|
||||||
|
|
||||||
pile.flush()?;
|
pile.flush()?;
|
||||||
|
|
||||||
info!("Stored! Checking for existence...");
|
info!("Stored! Checking for existence...");
|
||||||
|
|
|
@ -1,25 +1,21 @@
|
||||||
// This file implements the responder side of the backup source protocol -- the protocol used
|
// This file implements the responder side of the backup source protocol -- the protocol used
|
||||||
// to connect to remote backup sources.
|
// to connect to remote backup sources.
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
use crate::tree::scan;
|
||||||
|
use anyhow::bail;
|
||||||
|
use crossbeam_channel::Sender;
|
||||||
|
use log::info;
|
||||||
use std::io::{stdin, stdout, Read, Write};
|
use std::io::{stdin, stdout, Read, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use yama::definitions::TreeNode;
|
||||||
use anyhow::bail;
|
|
||||||
use crossbeam_channel::Sender;
|
|
||||||
use log::info;
|
|
||||||
|
|
||||||
use yama::definitions::{PartialPointerData, TreeNode};
|
|
||||||
use yama::pile::{Pile, RawPile};
|
use yama::pile::{Pile, RawPile};
|
||||||
use yama::progress::ProgressTracker;
|
use yama::progress::ProgressTracker;
|
||||||
use yama::remote::requester::Requester;
|
use yama::remote::requester::Requester;
|
||||||
use yama::remote::{read_message, write_message, RequestBody, ResponseBody};
|
use yama::remote::{read_message, write_message, RequestBody, ResponseBody};
|
||||||
use yama::utils::get_number_of_workers;
|
use yama::utils::get_number_of_workers;
|
||||||
|
|
||||||
use crate::tree::scan;
|
|
||||||
|
|
||||||
pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
||||||
let version = env!("CARGO_PKG_VERSION");
|
let version = env!("CARGO_PKG_VERSION");
|
||||||
write_message(
|
write_message(
|
||||||
|
@ -43,14 +39,48 @@ pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::R
|
||||||
|
|
||||||
pub fn scanning<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
pub fn scanning<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
|
||||||
let path: PathBuf = read_message(read)?;
|
let path: PathBuf = read_message(read)?;
|
||||||
let one_filesystem: bool = read_message(read)?;
|
let scan_result = scan(&path)?;
|
||||||
let exclusions: BTreeSet<PathBuf> = read_message(read)?;
|
|
||||||
let scan_result = scan(&path, one_filesystem, &exclusions)?;
|
|
||||||
write_message(write, &scan_result)?;
|
write_message(write, &scan_result)?;
|
||||||
write.flush()?;
|
write.flush()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn chunking<R: Read + Send + 'static, W: Write + Send + 'static>(
|
||||||
|
mut read: R,
|
||||||
|
write: W,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let path: PathBuf = read_message(&mut read)?;
|
||||||
|
let pointer_name: String = read_message(&mut read)?;
|
||||||
|
let tree_node: TreeNode = read_message(&mut read)?;
|
||||||
|
let parent: Option<String> = read_message(&mut read)?;
|
||||||
|
|
||||||
|
let (yama_requester, requester_join_handles) = Requester::new(read, write);
|
||||||
|
|
||||||
|
let raw_pile: Box<dyn RawPile> = Box::new(yama_requester);
|
||||||
|
|
||||||
|
let pile = Pile::new(raw_pile);
|
||||||
|
|
||||||
|
// TODO TODO progress
|
||||||
|
let progress_bar = &mut ();
|
||||||
|
|
||||||
|
yama::operations::storing::store_fully(
|
||||||
|
Arc::new(pile),
|
||||||
|
&path,
|
||||||
|
&pointer_name,
|
||||||
|
tree_node,
|
||||||
|
parent,
|
||||||
|
get_number_of_workers("YAMA_CHUNKERS"),
|
||||||
|
progress_bar,
|
||||||
|
true,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
for join_handle in requester_join_handles {
|
||||||
|
join_handle.join().expect("Expected to join handle");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ProgressSender {
|
pub struct ProgressSender {
|
||||||
pub last_sent: Instant,
|
pub last_sent: Instant,
|
||||||
pub current_progress: u64,
|
pub current_progress: u64,
|
||||||
|
@ -61,7 +91,7 @@ pub struct ProgressSender {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ProgressSender {
|
impl ProgressSender {
|
||||||
pub fn send_now(&mut self, _include_message: bool) {
|
pub fn send_now(&mut self, include_message: bool) {
|
||||||
self.sender
|
self.sender
|
||||||
.send((
|
.send((
|
||||||
RequestBody::Progress {
|
RequestBody::Progress {
|
||||||
|
@ -75,6 +105,7 @@ impl ProgressSender {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn send_if_overdue(&mut self) {
|
pub fn send_if_overdue(&mut self) {
|
||||||
|
//info!("send if overdue...");
|
||||||
if Instant::now().duration_since(self.last_sent).as_millis() >= 1024 {
|
if Instant::now().duration_since(self.last_sent).as_millis() >= 1024 {
|
||||||
self.send_now(false);
|
self.send_now(false);
|
||||||
}
|
}
|
||||||
|
@ -98,17 +129,23 @@ impl ProgressTracker for ProgressSender {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO use io-streams crate and get rid of the duplication!!
|
pub fn chunking_stdio() -> anyhow::Result<()> {
|
||||||
pub fn chunking_stdio() -> anyhow::Result<PartialPointerData> {
|
let (path, pointer_name, tree_node, parent) = {
|
||||||
let (path, tree_node) = {
|
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
let mut read = stdin.lock();
|
let mut read = stdin.lock();
|
||||||
let path: PathBuf = read_message(&mut read)?;
|
let path: PathBuf = read_message(&mut read)?;
|
||||||
|
let pointer_name: String = read_message(&mut read)?;
|
||||||
let tree_node: TreeNode = read_message(&mut read)?;
|
let tree_node: TreeNode = read_message(&mut read)?;
|
||||||
(path, tree_node)
|
let parent: Option<String> = read_message(&mut read)?;
|
||||||
|
(path, pointer_name, tree_node, parent)
|
||||||
};
|
};
|
||||||
|
|
||||||
let (pointer_data, requester_join_handles) = {
|
info!(
|
||||||
|
"Have pointer_name = {:?}, parent = {:?}",
|
||||||
|
pointer_name, parent
|
||||||
|
);
|
||||||
|
|
||||||
|
let requester_join_handles = {
|
||||||
let (yama_requester, requester_join_handles) = Requester::new_from_stdio();
|
let (yama_requester, requester_join_handles) = Requester::new_from_stdio();
|
||||||
let command_sender = yama_requester.clone_command_sender();
|
let command_sender = yama_requester.clone_command_sender();
|
||||||
info!("progress sender in use");
|
info!("progress sender in use");
|
||||||
|
@ -124,15 +161,18 @@ pub fn chunking_stdio() -> anyhow::Result<PartialPointerData> {
|
||||||
|
|
||||||
let pile = Pile::new(raw_pile);
|
let pile = Pile::new(raw_pile);
|
||||||
|
|
||||||
let pointer_data = yama::operations::storing::store_without_pointer_ops(
|
yama::operations::storing::store_fully(
|
||||||
&Arc::new(pile),
|
Arc::new(pile),
|
||||||
&path,
|
&path,
|
||||||
|
&pointer_name,
|
||||||
tree_node,
|
tree_node,
|
||||||
|
parent,
|
||||||
get_number_of_workers("YAMA_CHUNKERS"),
|
get_number_of_workers("YAMA_CHUNKERS"),
|
||||||
&mut progress_bar,
|
&mut progress_bar,
|
||||||
|
true,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
(pointer_data, requester_join_handles)
|
requester_join_handles
|
||||||
};
|
};
|
||||||
|
|
||||||
info!("Waiting to join.");
|
info!("Waiting to join.");
|
||||||
|
@ -143,7 +183,20 @@ pub fn chunking_stdio() -> anyhow::Result<PartialPointerData> {
|
||||||
|
|
||||||
info!("Chunking completed.");
|
info!("Chunking completed.");
|
||||||
|
|
||||||
Ok(pointer_data)
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn handler<R: Read + Send + 'static, W: Write + Send + 'static>(
|
||||||
|
mut read: R,
|
||||||
|
mut write: W,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
introduction(&mut read, &mut write)?;
|
||||||
|
|
||||||
|
scanning(&mut read, &mut write)?;
|
||||||
|
|
||||||
|
chunking(read, write)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn handler_stdio() -> anyhow::Result<()> {
|
pub fn handler_stdio() -> anyhow::Result<()> {
|
||||||
|
@ -166,11 +219,9 @@ pub fn handler_stdio() -> anyhow::Result<()> {
|
||||||
info!("Chunking.");
|
info!("Chunking.");
|
||||||
drop(read);
|
drop(read);
|
||||||
drop(write);
|
drop(write);
|
||||||
let pointer_data = chunking_stdio()?;
|
chunking_stdio()?;
|
||||||
read = stdin.lock();
|
read = stdin.lock();
|
||||||
write = stdout.lock();
|
write = stdout.lock();
|
||||||
write_message(&mut write, &pointer_data)?;
|
|
||||||
write.flush()?;
|
|
||||||
}
|
}
|
||||||
"exit" => {
|
"exit" => {
|
||||||
write_message(&mut write, &"exit")?;
|
write_message(&mut write, &"exit")?;
|
||||||
|
|
|
@ -15,16 +15,16 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::BTreeMap;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs::{read_link, symlink_metadata, DirEntry, Metadata};
|
use std::fs::{read_link, symlink_metadata, DirEntry, Metadata};
|
||||||
use std::io::ErrorKind;
|
use std::io::ErrorKind;
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use log::{debug, info, warn};
|
use log::warn;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
pub use yama::definitions::FilesystemOwnership;
|
pub use yama::definitions::FilesystemOwnership;
|
||||||
|
@ -216,18 +216,12 @@ pub fn mtime_msec(metadata: &Metadata) -> u64 {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan the filesystem to produce a Tree, using a default progress bar.
|
/// Scan the filesystem to produce a Tree, using a default progress bar.
|
||||||
pub fn scan(
|
pub fn scan(path: &Path) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
||||||
path: &Path,
|
|
||||||
one_filesystem: bool,
|
|
||||||
exclusions: &BTreeSet<PathBuf>,
|
|
||||||
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
|
||||||
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(2));
|
let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(2));
|
||||||
pbar.set_style(ProgressStyle::default_spinner().template("{spinner} {pos:7} {msg}"));
|
pbar.set_style(ProgressStyle::default_spinner().template("{spinner} {pos:7} {msg}"));
|
||||||
pbar.set_message("dir scan");
|
pbar.set_message("dir scan");
|
||||||
|
|
||||||
let one_filesystem = if one_filesystem { Some(None) } else { None };
|
let result = scan_with_progress_bar(path, &pbar);
|
||||||
|
|
||||||
let result = scan_with_progress_bar(path, &pbar, one_filesystem, exclusions);
|
|
||||||
pbar.finish_at_current_pos();
|
pbar.finish_at_current_pos();
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
@ -236,15 +230,7 @@ pub fn scan(
|
||||||
pub fn scan_with_progress_bar(
|
pub fn scan_with_progress_bar(
|
||||||
path: &Path,
|
path: &Path,
|
||||||
progress_bar: &ProgressBar,
|
progress_bar: &ProgressBar,
|
||||||
mut one_filesystem: Option<Option<u64>>,
|
|
||||||
exclusions: &BTreeSet<PathBuf>,
|
|
||||||
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
|
||||||
if exclusions.contains(path) {
|
|
||||||
// Don't enter excluded paths.
|
|
||||||
debug!("Not descending into excluded path: {:?}", path);
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
let metadata_res = symlink_metadata(path);
|
let metadata_res = symlink_metadata(path);
|
||||||
progress_bar.inc(1);
|
progress_bar.inc(1);
|
||||||
if let Err(e) = &metadata_res {
|
if let Err(e) = &metadata_res {
|
||||||
|
@ -263,14 +249,6 @@ pub fn scan_with_progress_bar(
|
||||||
let metadata = metadata_res?;
|
let metadata = metadata_res?;
|
||||||
let filetype = metadata.file_type();
|
let filetype = metadata.file_type();
|
||||||
|
|
||||||
if let Some(one_filesystem) = one_filesystem.as_mut() {
|
|
||||||
let this_fs = metadata.dev();
|
|
||||||
if *one_filesystem.get_or_insert(this_fs) != this_fs {
|
|
||||||
info!("Stopping at filesystem boundary: {:?}", path);
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*let name = path
|
/*let name = path
|
||||||
.file_name()
|
.file_name()
|
||||||
.ok_or(anyhow!("No filename, wat"))?
|
.ok_or(anyhow!("No filename, wat"))?
|
||||||
|
@ -316,23 +294,15 @@ pub fn scan_with_progress_bar(
|
||||||
|
|
||||||
for entry in dir_read? {
|
for entry in dir_read? {
|
||||||
let entry: DirEntry = entry?;
|
let entry: DirEntry = entry?;
|
||||||
|
let scanned = scan_with_progress_bar(&entry.path(), progress_bar)?;
|
||||||
if entry.file_name() == ".datmanskip" {
|
|
||||||
// Directories with .datmanskip in them are to be skipped entirely.
|
|
||||||
// TODO(perf): should this be checked upfront before some children may already
|
|
||||||
// have been scanned?
|
|
||||||
debug!("Skipping {path:?} because it has a .datmanskip file.");
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
let scanned =
|
|
||||||
scan_with_progress_bar(&entry.path(), progress_bar, one_filesystem, exclusions)?;
|
|
||||||
if let Some(scanned) = scanned {
|
if let Some(scanned) = scanned {
|
||||||
if let Ok(filename) = entry.file_name().into_string() {
|
children.insert(
|
||||||
children.insert(filename, scanned);
|
entry
|
||||||
} else {
|
.file_name()
|
||||||
warn!("Non-UTF-8 filename; ignoring: {:?}", entry.file_name())
|
.into_string()
|
||||||
}
|
.expect("OsString not String"),
|
||||||
|
scanned,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
177
flake.lock
177
flake.lock
|
@ -1,177 +0,0 @@
|
||||||
{
|
|
||||||
"nodes": {
|
|
||||||
"flake-utils": {
|
|
||||||
"inputs": {
|
|
||||||
"systems": "systems"
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1710146030,
|
|
||||||
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"naersk": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1662220400,
|
|
||||||
"narHash": "sha256-9o2OGQqu4xyLZP9K6kNe1pTHnyPz0Wr3raGYnr9AIgY=",
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "naersk",
|
|
||||||
"rev": "6944160c19cb591eb85bbf9b2f2768a935623ed3",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "naersk",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nix-github-actions": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"poetry2nix",
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1703863825,
|
|
||||||
"narHash": "sha256-rXwqjtwiGKJheXB43ybM8NwWB8rO2dSRrEqes0S7F5Y=",
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "nix-github-actions",
|
|
||||||
"rev": "5163432afc817cf8bd1f031418d1869e4c9d5547",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "nix-github-actions",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nixpkgs": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1714971268,
|
|
||||||
"narHash": "sha256-IKwMSwHj9+ec660l+I4tki/1NRoeGpyA2GdtdYpAgEw=",
|
|
||||||
"owner": "NixOS",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"rev": "27c13997bf450a01219899f5a83bd6ffbfc70d3c",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"id": "nixpkgs",
|
|
||||||
"ref": "nixos-23.11",
|
|
||||||
"type": "indirect"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"poetry2nix": {
|
|
||||||
"inputs": {
|
|
||||||
"flake-utils": "flake-utils",
|
|
||||||
"nix-github-actions": "nix-github-actions",
|
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
],
|
|
||||||
"systems": "systems_2",
|
|
||||||
"treefmt-nix": "treefmt-nix"
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1715017507,
|
|
||||||
"narHash": "sha256-RN2Vsba56PfX02DunWcZYkMLsipp928h+LVAWMYmbZg=",
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "poetry2nix",
|
|
||||||
"rev": "e6b36523407ae6a7a4dfe29770c30b3a3563b43a",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "poetry2nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"inputs": {
|
|
||||||
"naersk": "naersk",
|
|
||||||
"nixpkgs": "nixpkgs",
|
|
||||||
"poetry2nix": "poetry2nix",
|
|
||||||
"utils": "utils"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"systems": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1681028828,
|
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"systems_2": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1681028828,
|
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"id": "systems",
|
|
||||||
"type": "indirect"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"treefmt-nix": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"poetry2nix",
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1714058656,
|
|
||||||
"narHash": "sha256-Qv4RBm4LKuO4fNOfx9wl40W2rBbv5u5m+whxRYUMiaA=",
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "treefmt-nix",
|
|
||||||
"rev": "c6aaf729f34a36c445618580a9f95a48f5e4e03f",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "treefmt-nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"utils": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1659877975,
|
|
||||||
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": "root",
|
|
||||||
"version": 7
|
|
||||||
}
|
|
92
flake.nix
92
flake.nix
|
@ -1,92 +0,0 @@
|
||||||
{
|
|
||||||
description = "Yama and Datman";
|
|
||||||
|
|
||||||
inputs = {
|
|
||||||
utils.url = "github:numtide/flake-utils";
|
|
||||||
naersk = {
|
|
||||||
url = "github:nix-community/naersk";
|
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
|
||||||
};
|
|
||||||
nixpkgs.url = "nixpkgs/nixos-23.11";
|
|
||||||
poetry2nix = {
|
|
||||||
url = "github:nix-community/poetry2nix";
|
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
outputs = { self, nixpkgs, utils, naersk, poetry2nix }:
|
|
||||||
utils.lib.eachDefaultSystem (system: let
|
|
||||||
pkgs = nixpkgs.legacyPackages."${system}";
|
|
||||||
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryApplication;
|
|
||||||
naersk-lib = naersk.lib."${system}";
|
|
||||||
|
|
||||||
rustComponents = naersk-lib.buildPackage {
|
|
||||||
pname = "yama";
|
|
||||||
root = ./.;
|
|
||||||
|
|
||||||
buildInputs = with pkgs; [
|
|
||||||
openssl
|
|
||||||
pkg-config
|
|
||||||
sqlite
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
mysqlHelper = mkPoetryApplication {
|
|
||||||
projectDir = ./datman-helper-mysql;
|
|
||||||
};
|
|
||||||
|
|
||||||
postgresHelper = mkPoetryApplication {
|
|
||||||
projectDir = ./datman-helper-postgres;
|
|
||||||
};
|
|
||||||
|
|
||||||
# We want to produce a package with all of these together, with wrappers that let them
|
|
||||||
# refer to each other by name (i.e. have each other on the path).
|
|
||||||
# Datman needs the helpers on the path.
|
|
||||||
# The helpers need lz4 on the path.
|
|
||||||
allInOne = pkgs.stdenv.mkDerivation {
|
|
||||||
name = "datman-aio";
|
|
||||||
|
|
||||||
src = "${pkgs.emptyDirectory}";
|
|
||||||
|
|
||||||
installPhase = ''
|
|
||||||
# set -eu
|
|
||||||
mkdir $out $out/bin
|
|
||||||
ln -s ${rustComponents}/bin/{yama,datman} $out/bin
|
|
||||||
ln -s ${mysqlHelper}/bin/datman-helper-mysql-{backup,restore} $out/bin
|
|
||||||
ln -s ${postgresHelper}/bin/datman-helper-postgres-{backup,restore} $out/bin
|
|
||||||
ln -s ${pkgs.lz4}/bin/lz4 $out/bin/
|
|
||||||
runHook postInstall
|
|
||||||
'';
|
|
||||||
|
|
||||||
buildInputs = [ pkgs.makeWrapper ];
|
|
||||||
|
|
||||||
postInstall = ''
|
|
||||||
# set -eu
|
|
||||||
for fn in $out/bin/{datman,yama,datman-helper-{mysql,postgres}-{backup,restore}}; do
|
|
||||||
wrapProgram $fn --suffix PATH : $out/bin
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
in rec {
|
|
||||||
# `nix build`
|
|
||||||
packages.yama = allInOne;
|
|
||||||
|
|
||||||
defaultPackage = packages.yama;
|
|
||||||
|
|
||||||
# NixOS Modules
|
|
||||||
# nixosModules = {
|
|
||||||
# yama = import ./nixos_modules/yama.nix self;
|
|
||||||
# };
|
|
||||||
|
|
||||||
# `nix run`
|
|
||||||
apps.yama = utils.lib.mkApp {
|
|
||||||
drv = rustComponents;
|
|
||||||
};
|
|
||||||
defaultApp = apps.yama;
|
|
||||||
|
|
||||||
# `nix develop`
|
|
||||||
devShell = pkgs.mkShell {
|
|
||||||
nativeBuildInputs = with pkgs; [ rustc cargo ];
|
|
||||||
};
|
|
||||||
});
|
|
||||||
}
|
|
|
@ -4,7 +4,7 @@ if [ $# -ge 1 ]
|
||||||
then
|
then
|
||||||
files=$*
|
files=$*
|
||||||
else
|
else
|
||||||
files="testsuite/setup.py testsuite/datmantests testsuite/helpers testsuite/yamatests datman-helper-postgres/datman_helper_postgres datman-helper-mysql/datman_helper_mysql"
|
files="testsuite/setup.py testsuite/datmantests testsuite/helpers testsuite/yamatests datman-helper-postgres/datman_helper_postgres datman-helper-postgres/setup.py datman-helper-mysql/datman_helper_mysql datman-helper-mysql/setup.py"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Linting these locations: $files"
|
echo "Linting these locations: $files"
|
||||||
|
|
50
shell.nix
50
shell.nix
|
@ -1,50 +0,0 @@
|
||||||
{ pkgs ? import <nixpkgs> {} }:
|
|
||||||
|
|
||||||
let
|
|
||||||
# We may need some packages from nixpkgs-unstable
|
|
||||||
#unstable = import <nixpkgs-unstable> {};
|
|
||||||
|
|
||||||
rust-toolchain = pkgs.symlinkJoin {
|
|
||||||
name = "rust-toolchain";
|
|
||||||
paths = [pkgs.rustc pkgs.cargo pkgs.rustfmt pkgs.rustPlatform.rustcSrc];
|
|
||||||
};
|
|
||||||
in
|
|
||||||
|
|
||||||
pkgs.mkShell {
|
|
||||||
|
|
||||||
buildInputs = [
|
|
||||||
rust-toolchain
|
|
||||||
|
|
||||||
pkgs.pkg-config
|
|
||||||
|
|
||||||
pkgs.alsa-lib
|
|
||||||
pkgs.sqlite
|
|
||||||
#pkgs.libclang # ??
|
|
||||||
];
|
|
||||||
|
|
||||||
nativeBuildInputs = [
|
|
||||||
pkgs.openssl
|
|
||||||
pkgs.python3
|
|
||||||
];
|
|
||||||
|
|
||||||
# Needed for bindgen when binding to avahi
|
|
||||||
LIBCLANG_PATH="${pkgs.llvmPackages_latest.libclang.lib}/lib";
|
|
||||||
|
|
||||||
# Cargo culted:
|
|
||||||
# Add to rustc search path
|
|
||||||
RUSTFLAGS = (builtins.map (a: ''-L ${a}/lib'') [
|
|
||||||
]);
|
|
||||||
# Add to bindgen search path
|
|
||||||
BINDGEN_EXTRA_CLANG_ARGS =
|
|
||||||
# Includes with normal include path
|
|
||||||
(builtins.map (a: ''-I"${a}/include"'') [
|
|
||||||
])
|
|
||||||
# Includes with special directory paths
|
|
||||||
++ [
|
|
||||||
''-I"${pkgs.llvmPackages_latest.libclang.lib}/lib/clang/${pkgs.llvmPackages_latest.libclang.version}/include"''
|
|
||||||
#''-I"${pkgs.glib.dev}/include/glib-2.0"''
|
|
||||||
#''-I${pkgs.glib.out}/lib/glib-2.0/include/''
|
|
||||||
];
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -251,8 +251,7 @@ kind = {{ stdout = "blahblah.txt" }}
|
||||||
seed = 7555
|
seed = 7555
|
||||||
print(f"seed: {seed}")
|
print(f"seed: {seed}")
|
||||||
rng.seed(seed)
|
rng.seed(seed)
|
||||||
# min_files is 8 because we need enough files to use each label for this
|
# min_files is 8 because we need enough files to use each label for this test to succeed.
|
||||||
# test to succeed.
|
|
||||||
initial_descriptor, _ = generate_random_dir(rng, src_path, 32, min_files=8)
|
initial_descriptor, _ = generate_random_dir(rng, src_path, 32, min_files=8)
|
||||||
labellings = generate_labels(initial_descriptor, rng)
|
labellings = generate_labels(initial_descriptor, rng)
|
||||||
save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
|
save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
|
||||||
|
@ -299,81 +298,3 @@ kind = {{ stdout = "blahblah.txt" }}
|
||||||
)
|
)
|
||||||
|
|
||||||
td.cleanup()
|
td.cleanup()
|
||||||
|
|
||||||
def test_backup_incremental_with_mid_delete(self):
|
|
||||||
td = TemporaryDirectory("test_backup_incremental_with_mid_delete")
|
|
||||||
tdpath = Path(td.name)
|
|
||||||
|
|
||||||
datman_path = tdpath.joinpath("datman")
|
|
||||||
src_path = datman_path.joinpath("srca")
|
|
||||||
yama_path = datman_path.joinpath("main")
|
|
||||||
|
|
||||||
set_up_simple_datman(datman_path)
|
|
||||||
set_up_simple_yama(yama_path)
|
|
||||||
|
|
||||||
rng = Random()
|
|
||||||
seed = rng.randint(0, 9001)
|
|
||||||
print(f"seed: {seed}")
|
|
||||||
rng.seed(seed)
|
|
||||||
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
|
|
||||||
|
|
||||||
print("storing")
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
|
|
||||||
# now mutate and store incremental
|
|
||||||
randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
|
|
||||||
time.sleep(2)
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
|
|
||||||
# now mutate and store incremental again!
|
|
||||||
randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
|
|
||||||
mutated_descriptor = scan_dir(src_path)
|
|
||||||
time.sleep(2)
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
|
|
||||||
pointer_names = [
|
|
||||||
line
|
|
||||||
for line in subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
|
|
||||||
.decode()
|
|
||||||
.split("\n")
|
|
||||||
if line
|
|
||||||
]
|
|
||||||
self.assertEqual(len(pointer_names), 3)
|
|
||||||
self.assertLess(pointer_names[0], pointer_names[1])
|
|
||||||
self.assertLess(pointer_names[1], pointer_names[2])
|
|
||||||
|
|
||||||
print(f"removing mid pointer {pointer_names[1]}")
|
|
||||||
subprocess.check_call(
|
|
||||||
("yama", "debug", "rmp", pointer_names[1]),
|
|
||||||
cwd=yama_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
print("extracting last pointer to check still valid")
|
|
||||||
dest_path = tdpath.joinpath("desta")
|
|
||||||
subprocess.check_call(
|
|
||||||
(
|
|
||||||
"datman",
|
|
||||||
"extract",
|
|
||||||
"--skip-metadata",
|
|
||||||
"--accept-partial",
|
|
||||||
"main",
|
|
||||||
"../desta",
|
|
||||||
),
|
|
||||||
cwd=datman_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
# this will be wrapped in a directory that starts with the name srca+
|
|
||||||
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
|
|
||||||
|
|
||||||
contents = extracted_dir_descriptor_wrapper.contents
|
|
||||||
self.assertEqual(len(contents), 1)
|
|
||||||
key, value = next(iter(contents.items()))
|
|
||||||
self.assertTrue(key.startswith("srca+"))
|
|
||||||
|
|
||||||
self.assertIsInstance(value, DirectoryDescriptor)
|
|
||||||
key, value = next(iter(value.contents.items()))
|
|
||||||
self.assertEqual(key, "srca")
|
|
||||||
|
|
||||||
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
|
|
||||||
|
|
||||||
td.cleanup()
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Set
|
|
||||||
|
|
||||||
|
|
||||||
def set_up_simple_yama(path: Path):
|
def set_up_simple_yama(path: Path):
|
||||||
|
@ -11,13 +10,3 @@ def set_up_simple_yama(path: Path):
|
||||||
"example_zstd.dict"
|
"example_zstd.dict"
|
||||||
)
|
)
|
||||||
shutil.copyfile(example_zstd_path, path.joinpath("important_zstd.dict"))
|
shutil.copyfile(example_zstd_path, path.joinpath("important_zstd.dict"))
|
||||||
|
|
||||||
|
|
||||||
def list_bloblog_ids(pile: Path) -> Set[int]:
|
|
||||||
result = set()
|
|
||||||
for p in pile.joinpath("bloblog").iterdir():
|
|
||||||
try:
|
|
||||||
result.add(int(p.name))
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
return result
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ REQUIRED = ["green", "attrs", "immutabledict"]
|
||||||
|
|
||||||
|
|
||||||
# What packages are optional?
|
# What packages are optional?
|
||||||
EXTRAS = {"dev": ["black==22.10.0", "flake8==3.9.2", "isort==5.9.2"]}
|
EXTRAS = {"dev": ["black==21.7b0", "flake8==3.9.2", "isort==5.9.2"]}
|
||||||
|
|
||||||
# The rest you shouldn't have to touch too much :)
|
# The rest you shouldn't have to touch too much :)
|
||||||
# ------------------------------------------------
|
# ------------------------------------------------
|
||||||
|
|
|
@ -1,175 +0,0 @@
|
||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
from random import Random
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
from unittest import TestCase
|
|
||||||
|
|
||||||
from helpers import (
|
|
||||||
DirectoryDescriptor,
|
|
||||||
generate_random_dir,
|
|
||||||
randomly_mutate_directory_in_descriptor,
|
|
||||||
scan_dir,
|
|
||||||
)
|
|
||||||
from helpers.datman_helpers import set_up_simple_datman
|
|
||||||
from helpers.yama_helpers import list_bloblog_ids, set_up_simple_yama
|
|
||||||
|
|
||||||
|
|
||||||
class TestYamaCompact(TestCase):
|
|
||||||
def test_compaction_merge_two_small_bloblogs(self):
|
|
||||||
td = TemporaryDirectory("test_check_fails_after_random_corruption")
|
|
||||||
tdpath = Path(td.name)
|
|
||||||
|
|
||||||
datman_path = tdpath.joinpath("datman")
|
|
||||||
src_path = datman_path.joinpath("srca")
|
|
||||||
yama_path = datman_path.joinpath("main")
|
|
||||||
|
|
||||||
set_up_simple_datman(datman_path)
|
|
||||||
set_up_simple_yama(yama_path)
|
|
||||||
|
|
||||||
rng = Random()
|
|
||||||
seed = rng.randint(0, 9001)
|
|
||||||
print(f"seed: {seed}")
|
|
||||||
rng.seed(seed)
|
|
||||||
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
|
|
||||||
|
|
||||||
# Back up twice: that way we should get at least two bloblogs!
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
old_bloblog_ids = list_bloblog_ids(yama_path)
|
|
||||||
self.assertGreater(
|
|
||||||
len(old_bloblog_ids), 1, "Should be many bloblogs at this point"
|
|
||||||
)
|
|
||||||
|
|
||||||
subprocess.check_call(
|
|
||||||
(
|
|
||||||
"yama",
|
|
||||||
"compact",
|
|
||||||
"--mergeable",
|
|
||||||
"2",
|
|
||||||
"--small",
|
|
||||||
str(2 * 1024 * 1024 * 1024),
|
|
||||||
),
|
|
||||||
cwd=yama_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
new_bloblog_ids = list_bloblog_ids(yama_path)
|
|
||||||
self.assertEqual(
|
|
||||||
len(new_bloblog_ids), 1, "Should only be 1 bloblog at this point."
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
list(new_bloblog_ids)[0],
|
|
||||||
max(old_bloblog_ids) + 1,
|
|
||||||
"New bloblog ID should be 1 greater than the max old one.",
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_gc_then_compact(self):
|
|
||||||
td = TemporaryDirectory("test_gc_then_compact")
|
|
||||||
tdpath = Path(td.name)
|
|
||||||
|
|
||||||
datman_path = tdpath.joinpath("datman")
|
|
||||||
src_path = datman_path.joinpath("srca")
|
|
||||||
yama_path = datman_path.joinpath("main")
|
|
||||||
|
|
||||||
set_up_simple_datman(datman_path)
|
|
||||||
set_up_simple_yama(yama_path)
|
|
||||||
|
|
||||||
rng = Random()
|
|
||||||
seed = rng.randint(0, 9001)
|
|
||||||
print(f"seed: {seed}")
|
|
||||||
rng.seed(seed)
|
|
||||||
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
|
|
||||||
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
orig_pointer_name = (
|
|
||||||
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
|
|
||||||
.decode()
|
|
||||||
.split("\n")[0]
|
|
||||||
)
|
|
||||||
|
|
||||||
randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
|
|
||||||
mutated_descriptor = scan_dir(src_path)
|
|
||||||
|
|
||||||
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
|
|
||||||
|
|
||||||
old_bloblog_ids = list_bloblog_ids(yama_path)
|
|
||||||
|
|
||||||
# Try a GC and check that it's a no-op
|
|
||||||
subprocess.check_call(
|
|
||||||
("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
|
|
||||||
)
|
|
||||||
subprocess.check_call(
|
|
||||||
(
|
|
||||||
"yama",
|
|
||||||
"compact",
|
|
||||||
"--mergeable",
|
|
||||||
"2000",
|
|
||||||
"--reclaim",
|
|
||||||
"1",
|
|
||||||
"--max-dealloc",
|
|
||||||
"1",
|
|
||||||
),
|
|
||||||
cwd=yama_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
unchanged_bloblog_ids = list_bloblog_ids(yama_path)
|
|
||||||
self.assertEqual(
|
|
||||||
old_bloblog_ids,
|
|
||||||
unchanged_bloblog_ids,
|
|
||||||
"No GC: no compaction should have happened.",
|
|
||||||
)
|
|
||||||
|
|
||||||
subprocess.check_call(
|
|
||||||
("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path
|
|
||||||
)
|
|
||||||
|
|
||||||
# Try a GC and check that it did something
|
|
||||||
subprocess.check_call(
|
|
||||||
("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
|
|
||||||
)
|
|
||||||
subprocess.check_call(
|
|
||||||
(
|
|
||||||
"yama",
|
|
||||||
"compact",
|
|
||||||
"--mergeable",
|
|
||||||
"2000",
|
|
||||||
"--reclaim",
|
|
||||||
"1",
|
|
||||||
"--max-dealloc",
|
|
||||||
"1",
|
|
||||||
),
|
|
||||||
cwd=yama_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
new_bloblog_ids = list_bloblog_ids(yama_path)
|
|
||||||
self.assertNotEqual(
|
|
||||||
old_bloblog_ids, new_bloblog_ids, "GC: compaction should have happened."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check that we can still extract the files!
|
|
||||||
dest_path = tdpath.joinpath("desta")
|
|
||||||
subprocess.check_call(
|
|
||||||
(
|
|
||||||
"datman",
|
|
||||||
"extract",
|
|
||||||
"--skip-metadata",
|
|
||||||
"--accept-partial",
|
|
||||||
"main",
|
|
||||||
"../desta",
|
|
||||||
),
|
|
||||||
cwd=datman_path,
|
|
||||||
)
|
|
||||||
|
|
||||||
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
|
|
||||||
|
|
||||||
contents = extracted_dir_descriptor_wrapper.contents
|
|
||||||
self.assertEqual(len(contents), 1)
|
|
||||||
key, value = next(iter(contents.items()))
|
|
||||||
self.assertTrue(key.startswith("srca+"))
|
|
||||||
|
|
||||||
self.assertIsInstance(value, DirectoryDescriptor)
|
|
||||||
key, value = next(iter(value.contents.items()))
|
|
||||||
self.assertEqual(key, "srca")
|
|
||||||
|
|
||||||
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
|
|
||||||
|
|
||||||
td.cleanup()
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "yama"
|
name = "yama"
|
||||||
version = "0.6.0-alpha.5"
|
version = "0.5.0-alpha.2"
|
||||||
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
description = "Deduplicated, compressed and encrypted content pile manager"
|
description = "Deduplicated, compressed and encrypted content pile manager"
|
||||||
|
@ -11,10 +11,11 @@ license = "GPL-3.0-or-later"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
fastcdc = "1.0.6"
|
fastcdc = "1.0.2"
|
||||||
zstd = "0.11.2" # 0.11.2+zstd.1.5.2
|
zstd = "0.6.0" # 0.6.0+zstd.1.4.8
|
||||||
clap = { version = "3.1.18", features = ["derive"] }
|
sshish = "0.1.0"
|
||||||
blake = "2.0.2"
|
clap = "= 3.0.0-beta.5"
|
||||||
|
blake = "2.0.0"
|
||||||
twox-hash = "1.5.0"
|
twox-hash = "1.5.0"
|
||||||
serde = { version = "1.0.104", features = ["derive"] }
|
serde = { version = "1.0.104", features = ["derive"] }
|
||||||
serde_bare = "0.3.0"
|
serde_bare = "0.3.0"
|
||||||
|
@ -40,6 +41,5 @@ rustyline = "7.1.0"
|
||||||
derivative = "2.2.0"
|
derivative = "2.2.0"
|
||||||
metrics = "0.17.1"
|
metrics = "0.17.1"
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
temp-dir = "0.1.11"
|
temp-dir = "0.1.11"
|
||||||
|
|
|
@ -18,24 +18,21 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
|
use clap::{crate_authors, crate_description, crate_version, Parser};
|
||||||
use log::info;
|
use log::info;
|
||||||
|
|
||||||
use clap::Parser;
|
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
|
use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
|
||||||
use yama::debug::{debug_command, DebugCommand};
|
use yama::debug::{debug_command, DebugCommand};
|
||||||
use yama::operations::checking::VacuumMode;
|
use yama::operations::checking::VacuumMode;
|
||||||
use yama::operations::legacy_pushpull::{
|
use yama::operations::pushpull::{determine_bypass_level, open_pile_with_work_bypass, push_to};
|
||||||
determine_bypass_level, open_pile_with_work_bypass, push_to,
|
use yama::operations::{checking, extracting};
|
||||||
};
|
|
||||||
use yama::operations::{checking, cleanup, extracting};
|
|
||||||
use yama::pile::local_sqlitebloblogs::CompactionThresholds;
|
|
||||||
use yama::pile::{Pile, PileDescriptor, RawPile};
|
use yama::pile::{Pile, PileDescriptor, RawPile};
|
||||||
use yama::{commands, debug};
|
use yama::{commands, debug};
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[clap(version = env!("CARGO_PKG_VERSION"), author = env!("CARGO_PKG_AUTHORS"), about = env!("CARGO_PKG_DESCRIPTION"))]
|
#[clap(version = crate_version!(), author = crate_authors!(), about = crate_description!())]
|
||||||
struct Opts {
|
struct Opts {
|
||||||
/// Chooses a different pile to be the working pile.
|
/// Chooses a different pile to be the working pile.
|
||||||
/// If specified, must be the name of a remote in yama.toml.
|
/// If specified, must be the name of a remote in yama.toml.
|
||||||
|
@ -57,9 +54,8 @@ enum PileCommand {
|
||||||
pointer_name: String,
|
pointer_name: String,
|
||||||
|
|
||||||
/// Limited expression(s) of files to retrieve.
|
/// Limited expression(s) of files to retrieve.
|
||||||
/// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE.
|
|
||||||
#[clap(short, long)]
|
#[clap(short, long)]
|
||||||
subset: Option<String>,
|
subset: Vec<PathBuf>,
|
||||||
|
|
||||||
destination: PathBuf,
|
destination: PathBuf,
|
||||||
|
|
||||||
|
@ -84,29 +80,6 @@ enum PileCommand {
|
||||||
shallow: bool,
|
shallow: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
Compact {
|
|
||||||
/// Don't actually perform any compaction; just plan it out.
|
|
||||||
#[clap(long)]
|
|
||||||
dry_run: bool,
|
|
||||||
|
|
||||||
/// Allocated size under which a bloblog is considered small.
|
|
||||||
#[clap(long = "small")]
|
|
||||||
small_thresh: Option<u64>,
|
|
||||||
|
|
||||||
/// Minimum amount of space to reclaim in order to run compaction for reclaim.
|
|
||||||
#[clap(long = "reclaim")]
|
|
||||||
min_reclaim: Option<u64>,
|
|
||||||
|
|
||||||
/// Maximum amount of space that can be deallocated in a bloblog before we consider it
|
|
||||||
/// worthwhile to replace.
|
|
||||||
#[clap(long = "max-dealloc")]
|
|
||||||
max_deallocated: Option<u64>,
|
|
||||||
|
|
||||||
/// Minimum number of mergeable small bloblogs in order to run compaction for merge.
|
|
||||||
#[clap(long)]
|
|
||||||
mergeable: Option<u32>,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Enter a debug prompt for manually operating on the yama pile.
|
/// Enter a debug prompt for manually operating on the yama pile.
|
||||||
Debug { supplied_command: Vec<String> },
|
Debug { supplied_command: Vec<String> },
|
||||||
|
|
||||||
|
@ -161,25 +134,10 @@ fn wrapped_main() -> anyhow::Result<i32> {
|
||||||
|
|
||||||
fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
|
fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
|
||||||
|
|
||||||
let mut node_to_extract = &mut root_tree_node.node;
|
|
||||||
|
|
||||||
if let Some(subset) = subset {
|
|
||||||
for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) {
|
|
||||||
match node_to_extract.child(path_to_descend) {
|
|
||||||
Ok(new_node) => {
|
|
||||||
node_to_extract = new_node;
|
|
||||||
}
|
|
||||||
Err(msg) => {
|
|
||||||
bail!("Can't descend into {path_to_descend:?}: {msg}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo allow disabling apply metadata
|
// todo allow disabling apply metadata
|
||||||
extracting::extract(
|
extracting::extract(
|
||||||
destination,
|
destination,
|
||||||
node_to_extract,
|
&mut root_tree_node.node,
|
||||||
&pile,
|
&pile,
|
||||||
true,
|
true,
|
||||||
workers.unwrap_or(2),
|
workers.unwrap_or(2),
|
||||||
|
@ -215,29 +173,6 @@ fn wrapped_main() -> anyhow::Result<i32> {
|
||||||
return Ok(1);
|
return Ok(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PileCommand::Compact {
|
|
||||||
dry_run,
|
|
||||||
small_thresh,
|
|
||||||
min_reclaim,
|
|
||||||
max_deallocated,
|
|
||||||
mergeable,
|
|
||||||
} => {
|
|
||||||
let this_dir = Path::new(".");
|
|
||||||
let descriptor =
|
|
||||||
load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
|
|
||||||
cleanup::compact(
|
|
||||||
this_dir,
|
|
||||||
&descriptor,
|
|
||||||
!*dry_run,
|
|
||||||
true,
|
|
||||||
CompactionThresholds {
|
|
||||||
minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024),
|
|
||||||
minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64),
|
|
||||||
cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024),
|
|
||||||
cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024),
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
PileCommand::Init {} => {
|
PileCommand::Init {} => {
|
||||||
commands::init(".".as_ref())?;
|
commands::init(".".as_ref())?;
|
||||||
}
|
}
|
||||||
|
|
|
@ -161,8 +161,6 @@ impl<'cst, CST: ChunkSubmissionTarget> Write for RecursiveChunker<'cst, CST> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn calculate_chunkid(chunk: &[u8]) -> ChunkId {
|
pub fn calculate_chunkid(chunk: &[u8]) -> ChunkId {
|
||||||
// TODO(newver) Allow pluggable chunkID calculations so that encrypted storage can work without
|
|
||||||
// leaking contents.
|
|
||||||
let mut chunk_id: ChunkId = Default::default();
|
let mut chunk_id: ChunkId = Default::default();
|
||||||
blake::hash(256, &chunk, &mut chunk_id).expect("BLAKE problem");
|
blake::hash(256, &chunk, &mut chunk_id).expect("BLAKE problem");
|
||||||
chunk_id
|
chunk_id
|
||||||
|
|
|
@ -22,6 +22,7 @@ use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{anyhow, bail, Context};
|
use anyhow::{anyhow, bail, Context};
|
||||||
|
use clap::crate_version;
|
||||||
use log::warn;
|
use log::warn;
|
||||||
|
|
||||||
use crate::chunking::{RecursiveChunker, RecursiveUnchunker, SENSIBLE_THRESHOLD};
|
use crate::chunking::{RecursiveChunker, RecursiveUnchunker, SENSIBLE_THRESHOLD};
|
||||||
|
@ -47,7 +48,7 @@ pub fn init(dir: &Path) -> anyhow::Result<()> {
|
||||||
let mut file = File::create(yama_toml)?;
|
let mut file = File::create(yama_toml)?;
|
||||||
|
|
||||||
let desc = PileDescriptor {
|
let desc = PileDescriptor {
|
||||||
yama_version: env!("CARGO_PKG_VERSION").to_owned(),
|
yama_version: crate_version!().to_owned(),
|
||||||
storage: PileStorage::SqliteIndexedBloblog,
|
storage: PileStorage::SqliteIndexedBloblog,
|
||||||
compression: Some(12),
|
compression: Some(12),
|
||||||
};
|
};
|
||||||
|
|
|
@ -15,12 +15,13 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::commands::retrieve_tree_node;
|
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node, store_tree_node};
|
||||||
use crate::definitions::{FilesystemOwnership, FilesystemPermissions, TreeNode};
|
use crate::definitions::{FilesystemOwnership, FilesystemPermissions, TreeNode};
|
||||||
use crate::operations::remove_pointer_safely;
|
|
||||||
use crate::pile::{Pile, PileDescriptor, RawPile};
|
use crate::pile::{Pile, PileDescriptor, RawPile};
|
||||||
|
use crate::tree::integrate_node_in_place;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use log::info;
|
||||||
use rustyline::error::ReadlineError;
|
use rustyline::error::ReadlineError;
|
||||||
use rustyline::Editor;
|
use rustyline::Editor;
|
||||||
|
|
||||||
|
@ -122,7 +123,50 @@ pub fn debug_command<RP: RawPile>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DebugCommand::DeletePointer { name } => {
|
DebugCommand::DeletePointer { name } => {
|
||||||
remove_pointer_safely(pile, name)?;
|
// retrieve this pointer
|
||||||
|
let mut this_pointer = pile.read_pointer(name.as_str())?.ok_or_else(|| {
|
||||||
|
anyhow!("Pointer {:?} does not exist so can not be deleted.", name)
|
||||||
|
})?;
|
||||||
|
let mut this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())?;
|
||||||
|
|
||||||
|
// fully integrate the pointer
|
||||||
|
fully_integrate_pointer_node(&pile, &mut this_node.node, &mut this_pointer)?;
|
||||||
|
assert!(this_pointer.parent_pointer.is_none());
|
||||||
|
|
||||||
|
// now integrate any pointers that rely on this one
|
||||||
|
// so that they no longer rely on this one.
|
||||||
|
for pointer in pile.list_pointers()?.iter() {
|
||||||
|
if pointer == name {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(mut pointer_data) = pile.read_pointer(pointer.as_str())? {
|
||||||
|
if let Some(parent_pointer) = pointer_data.parent_pointer.as_ref() {
|
||||||
|
if parent_pointer == name {
|
||||||
|
info!("Pointer is now an orphan: {:?}", pointer);
|
||||||
|
|
||||||
|
// need to integrate this node, so retrieve it
|
||||||
|
let mut node = retrieve_tree_node(&pile, pointer_data.chunk_ref)?;
|
||||||
|
|
||||||
|
// integrate it in-place
|
||||||
|
integrate_node_in_place(&mut node.node, &this_node.node)?;
|
||||||
|
|
||||||
|
// mark it as orphaned (no parent)
|
||||||
|
pointer_data.parent_pointer = None;
|
||||||
|
|
||||||
|
// store the orphaned node
|
||||||
|
let new_chunk_ref = store_tree_node(&pile, &node)?;
|
||||||
|
// associate the orphaned node with the orphaned pointer
|
||||||
|
pointer_data.chunk_ref = new_chunk_ref;
|
||||||
|
// write the pointer back.
|
||||||
|
pile.write_pointer(pointer.as_str(), &pointer_data)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// then delete the pointer
|
||||||
|
pile.delete_pointer(name)?;
|
||||||
|
info!("Deleted pointer: {:?}", name);
|
||||||
}
|
}
|
||||||
DebugCommand::PointerInfo { name } => {
|
DebugCommand::PointerInfo { name } => {
|
||||||
let this_pointer = pile
|
let this_pointer = pile
|
||||||
|
|
|
@ -35,24 +35,6 @@ pub struct PointerData {
|
||||||
pub gid_lookup: BTreeMap<u16, Option<String>>,
|
pub gid_lookup: BTreeMap<u16, Option<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct PartialPointerData {
|
|
||||||
pub chunk_ref: RecursiveChunkRef,
|
|
||||||
pub uid_lookup: BTreeMap<u16, Option<String>>,
|
|
||||||
pub gid_lookup: BTreeMap<u16, Option<String>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialPointerData {
|
|
||||||
pub fn complete(self, parent_pointer: Option<String>) -> PointerData {
|
|
||||||
PointerData {
|
|
||||||
chunk_ref: self.chunk_ref,
|
|
||||||
parent_pointer,
|
|
||||||
uid_lookup: self.uid_lookup,
|
|
||||||
gid_lookup: self.gid_lookup,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||||
pub struct RecursiveChunkRef {
|
pub struct RecursiveChunkRef {
|
||||||
/// The root Chunk ID.
|
/// The root Chunk ID.
|
||||||
|
@ -270,19 +252,6 @@ impl TreeNode {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Recurses into a child by name, or returns Err with a reason.
|
|
||||||
pub fn child(&mut self, name: &str) -> Result<&mut TreeNode, &'static str> {
|
|
||||||
match self {
|
|
||||||
TreeNode::NormalFile { .. } => Err("not a directory: normal file"),
|
|
||||||
TreeNode::Directory { children, .. } => match children.get_mut(name) {
|
|
||||||
None => Err("child not in directory"),
|
|
||||||
Some(node) => Ok(node),
|
|
||||||
},
|
|
||||||
TreeNode::SymbolicLink { .. } => Err("not a directory: symlink"),
|
|
||||||
TreeNode::Deleted => Err("not a directory: deleted"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
|
|
@ -1,80 +1,4 @@
|
||||||
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node, store_tree_node};
|
|
||||||
use crate::pile::{Pile, RawPile};
|
|
||||||
use crate::tree::{differentiate_node_in_place, integrate_node_in_place};
|
|
||||||
use anyhow::{anyhow, Context};
|
|
||||||
use log::info;
|
|
||||||
|
|
||||||
pub mod checking;
|
pub mod checking;
|
||||||
pub mod cleanup;
|
|
||||||
pub mod extracting;
|
pub mod extracting;
|
||||||
pub mod legacy_pushpull;
|
pub mod pushpull;
|
||||||
pub mod storing;
|
pub mod storing;
|
||||||
|
|
||||||
pub fn remove_pointer_safely<P: RawPile>(pile: &Pile<P>, name: &str) -> anyhow::Result<()> {
|
|
||||||
// retrieve this pointer
|
|
||||||
let mut this_pointer = pile
|
|
||||||
.read_pointer(name)?
|
|
||||||
.ok_or_else(|| anyhow!("Pointer {:?} does not exist so can not be deleted.", name))?;
|
|
||||||
let mut this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())
|
|
||||||
.context("retrieving 'this' node")?;
|
|
||||||
|
|
||||||
let new_parent_name = this_pointer.parent_pointer.clone();
|
|
||||||
fully_integrate_pointer_node(pile, &mut this_node.node, &mut this_pointer)
|
|
||||||
.context("integrating new parent")?;
|
|
||||||
|
|
||||||
let new_parent = if let Some(ref new_parent_name) = new_parent_name {
|
|
||||||
let mut new_parent_pointer = pile
|
|
||||||
.read_pointer(new_parent_name.as_str())?
|
|
||||||
.ok_or_else(|| anyhow!("Parent pointer {:?} does not exist.", name))?;
|
|
||||||
let mut new_parent_node = retrieve_tree_node(&pile, new_parent_pointer.chunk_ref.clone())?;
|
|
||||||
fully_integrate_pointer_node(pile, &mut new_parent_node.node, &mut new_parent_pointer)?;
|
|
||||||
Some((new_parent_pointer, new_parent_node))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// now integrate any pointers that rely on this one
|
|
||||||
// so that they no longer rely on this one.
|
|
||||||
for pointer in pile.list_pointers()?.iter() {
|
|
||||||
if pointer == name {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if let Some(mut pointer_data) = pile.read_pointer(pointer.as_str())? {
|
|
||||||
if let Some(parent_pointer) = pointer_data.parent_pointer.as_ref() {
|
|
||||||
if parent_pointer == name {
|
|
||||||
info!("Pointer would be orphaned: {:?}; integrating", pointer);
|
|
||||||
|
|
||||||
// need to integrate this node, so retrieve it
|
|
||||||
let mut node = retrieve_tree_node(&pile, pointer_data.chunk_ref)?;
|
|
||||||
|
|
||||||
// integrate it in-place
|
|
||||||
integrate_node_in_place(&mut node.node, &this_node.node)?;
|
|
||||||
|
|
||||||
if let Some((_, ref new_parent_node)) = new_parent {
|
|
||||||
// then differentiate with respect to the NEW parent
|
|
||||||
differentiate_node_in_place(&mut node.node, &new_parent_node.node)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// pass through the parent
|
|
||||||
pointer_data.parent_pointer = new_parent_name.clone();
|
|
||||||
|
|
||||||
// store the updated version of the pointer
|
|
||||||
let new_chunk_ref = store_tree_node(&pile, &node)?;
|
|
||||||
// associate the new node with the new version of the pointer
|
|
||||||
pointer_data.chunk_ref = new_chunk_ref;
|
|
||||||
// write the pointer back.
|
|
||||||
pile.write_pointer(pointer.as_str(), &pointer_data)?;
|
|
||||||
|
|
||||||
// we must flush chunks before deleting the pointer
|
|
||||||
pile.flush()
|
|
||||||
.context("flushing after writing pointer back")?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// then delete the pointer
|
|
||||||
pile.delete_pointer(name)?;
|
|
||||||
info!("Deleted pointer: {:?}", name);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
|
@ -18,16 +18,12 @@ along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
use crate::chunking::RecursiveUnchunker;
|
use crate::chunking::RecursiveUnchunker;
|
||||||
use crate::commands::retrieve_tree_node;
|
use crate::commands::retrieve_tree_node;
|
||||||
use crate::definitions::{ChunkId, TreeNode};
|
use crate::definitions::{ChunkId, TreeNode};
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, Keyspace, Pile, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, Keyspace, Pile, PipelineDescription, RawPile, StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use itertools::Itertools;
|
|
||||||
use log::{error, info, warn};
|
use log::{error, info, warn};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
@ -112,10 +108,6 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
|
||||||
self.underlying.delete(kind, key)
|
self.underlying.delete(kind, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -139,14 +131,6 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
|
||||||
self.underlying
|
self.underlying
|
||||||
.build_storage_pipeline(settings, controller_send)
|
.build_storage_pipeline(settings, controller_send)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
self.underlying.describe_pipeline()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
|
/// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
|
||||||
|
@ -413,21 +397,9 @@ pub fn check_shallow<RP: RawPile>(
|
||||||
|
|
||||||
// actually do the vacuum!
|
// actually do the vacuum!
|
||||||
info!("Going to vacuum them up.");
|
info!("Going to vacuum them up.");
|
||||||
for vacuum_ids_chunk in to_vacuum
|
for vacuum_id in to_vacuum {
|
||||||
.into_iter()
|
pile.raw_pile.delete(Keyspace::Chunk, &vacuum_id)?;
|
||||||
.chunks(512)
|
pbar.inc(1);
|
||||||
.into_iter()
|
|
||||||
.map(|c| c.collect::<Vec<ChunkId>>())
|
|
||||||
{
|
|
||||||
pile.raw_pile.delete_many(
|
|
||||||
Keyspace::Chunk,
|
|
||||||
vacuum_ids_chunk
|
|
||||||
.iter()
|
|
||||||
.map(|ci| ci.as_slice())
|
|
||||||
.collect::<Vec<&[u8]>>()
|
|
||||||
.as_slice(),
|
|
||||||
)?;
|
|
||||||
pbar.inc(vacuum_ids_chunk.len().try_into().unwrap());
|
|
||||||
}
|
}
|
||||||
pile.flush()?;
|
pile.flush()?;
|
||||||
pbar.finish_and_clear();
|
pbar.finish_and_clear();
|
||||||
|
|
|
@ -1,64 +0,0 @@
|
||||||
use crate::pile::local_sqlitebloblogs::{CompactionThresholds, SqliteBloblogPile};
|
|
||||||
use crate::pile::{PileDescriptor, PileStorage};
|
|
||||||
use anyhow::{bail, Context};
|
|
||||||
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
|
|
||||||
use log::info;
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
pub fn compact(
|
|
||||||
pile_path: &Path,
|
|
||||||
pile_desc: &PileDescriptor,
|
|
||||||
actually_run: bool,
|
|
||||||
make_progress_bar: bool,
|
|
||||||
thresholds: CompactionThresholds,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let pbar = if make_progress_bar {
|
|
||||||
ProgressBar::with_draw_target(1000 as u64, ProgressDrawTarget::stdout_with_hz(10))
|
|
||||||
} else {
|
|
||||||
ProgressBar::hidden()
|
|
||||||
};
|
|
||||||
pbar.set_style(
|
|
||||||
ProgressStyle::default_bar()
|
|
||||||
.template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
|
|
||||||
);
|
|
||||||
pbar.set_message("compacting");
|
|
||||||
|
|
||||||
match pile_desc.storage {
|
|
||||||
PileStorage::SqliteIndexedBloblog => {
|
|
||||||
let bloblog_pile = SqliteBloblogPile::open(&pile_path)
|
|
||||||
.context("Failed to open SQLite-indexed Bloblog Pile")?;
|
|
||||||
compact_bloblogs(bloblog_pile, pbar, actually_run, thresholds)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
other @ PileStorage::RemoteOnly => {
|
|
||||||
bail!("Cannot use compaction on this kind of pile: {other:?}!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compact_bloblogs(
|
|
||||||
bloblog_pile: SqliteBloblogPile,
|
|
||||||
pbar: ProgressBar,
|
|
||||||
actually_run: bool,
|
|
||||||
thresholds: CompactionThresholds,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
info!("=== Analysing for compaction ===");
|
|
||||||
let analysis = bloblog_pile.analyse_for_compaction()?;
|
|
||||||
let chunks_total: u64 = analysis.values().map(|bs| bs.chunks_total).sum();
|
|
||||||
let chunks_deleted: u64 = analysis.values().map(|bs| bs.chunks_deleted).sum();
|
|
||||||
let bytes_total: u64 = analysis.values().map(|bs| bs.bytes_total).sum();
|
|
||||||
let bytes_deleted: u64 = analysis.values().map(|bs| bs.bytes_deleted).sum();
|
|
||||||
|
|
||||||
info!("{} bloblogs in this pile, with {chunks_total} chunks ({bytes_total} B) of which {chunks_deleted} ({bytes_deleted} B) are deleted.", analysis.len());
|
|
||||||
|
|
||||||
info!("=== Planning compaction ===");
|
|
||||||
let plan = bloblog_pile.plan_compaction(&thresholds, analysis)?;
|
|
||||||
info!("Planned compaction: replace {} bloblogs (of which {} are small), freeing up {} B and rewriting {} B", plan.bloblogs_to_replace.len(), plan.small_bloblogs, plan.reclaimable_space, plan.bytes_to_write);
|
|
||||||
|
|
||||||
if actually_run {
|
|
||||||
info!("=== Compacting ===");
|
|
||||||
bloblog_pile.perform_compaction(Box::new(pbar), plan)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -2,7 +2,7 @@ use crate::chunking::RecursiveUnchunker;
|
||||||
use crate::commands::fully_load_pointer;
|
use crate::commands::fully_load_pointer;
|
||||||
use crate::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
|
use crate::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
|
||||||
use crate::operations::checking::VacuumRawPile;
|
use crate::operations::checking::VacuumRawPile;
|
||||||
use crate::operations::legacy_pushpull::PushWorkerToManagerMessage::{NewTask, TaskDone};
|
use crate::operations::pushpull::PushWorkerToManagerMessage::{NewTask, TaskDone};
|
||||||
use crate::pile::compression::{CompressionSettings, RawPileCompressor};
|
use crate::pile::compression::{CompressionSettings, RawPileCompressor};
|
||||||
use crate::pile::integrity::RawPileIntegrityChecker;
|
use crate::pile::integrity::RawPileIntegrityChecker;
|
||||||
use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
|
use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
|
|
@ -29,9 +29,7 @@ use log::{error, warn};
|
||||||
use crate::chunking::{ChunkSubmissionTarget, RecursiveChunker, SENSIBLE_THRESHOLD};
|
use crate::chunking::{ChunkSubmissionTarget, RecursiveChunker, SENSIBLE_THRESHOLD};
|
||||||
use crate::commands;
|
use crate::commands;
|
||||||
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node};
|
use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node};
|
||||||
use crate::definitions::{
|
use crate::definitions::{PointerData, RecursiveChunkRef, RootTreeNode, TreeNode};
|
||||||
PartialPointerData, PointerData, RecursiveChunkRef, RootTreeNode, TreeNode,
|
|
||||||
};
|
|
||||||
use crate::pile::{existence_checker_stage, Pile, RawPile, StoragePipelineSettings};
|
use crate::pile::{existence_checker_stage, Pile, RawPile, StoragePipelineSettings};
|
||||||
use crate::progress::ProgressTracker;
|
use crate::progress::ProgressTracker;
|
||||||
use crate::tree::{create_uidgid_lookup_tables, differentiate_node_in_place};
|
use crate::tree::{create_uidgid_lookup_tables, differentiate_node_in_place};
|
||||||
|
@ -242,29 +240,7 @@ pub fn store_fully<PT: ProgressTracker>(
|
||||||
parent: Option<String>,
|
parent: Option<String>,
|
||||||
num_workers: u8,
|
num_workers: u8,
|
||||||
progress_bar: &mut PT,
|
progress_bar: &mut PT,
|
||||||
) -> anyhow::Result<()> {
|
use_pipelined_storage: bool,
|
||||||
pointer_ops_prepare_to_store(&pile, &mut root_node, &parent)?;
|
|
||||||
let pointer_data =
|
|
||||||
store_without_pointer_ops(&pile, &root_dir, root_node, num_workers, progress_bar)?
|
|
||||||
.complete(parent);
|
|
||||||
pointers_ops_after_store(&pile, &new_pointer_name, &pointer_data)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn pointers_ops_after_store(
|
|
||||||
pile: &Pile<impl RawPile>,
|
|
||||||
new_pointer_name: &str,
|
|
||||||
pointer_data: &PointerData,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
pile.write_pointer(&new_pointer_name, &pointer_data)?;
|
|
||||||
pile.flush()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn pointer_ops_prepare_to_store(
|
|
||||||
pile: &Pile<impl RawPile>,
|
|
||||||
mut root_node: &mut TreeNode,
|
|
||||||
parent: &Option<String>,
|
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
if let Some(parent) = parent.as_ref() {
|
if let Some(parent) = parent.as_ref() {
|
||||||
let mut parent_pointer = pile.read_pointer(parent)?.ok_or_else(|| {
|
let mut parent_pointer = pile.read_pointer(parent)?.ok_or_else(|| {
|
||||||
|
@ -278,16 +254,8 @@ pub fn pointer_ops_prepare_to_store(
|
||||||
fully_integrate_pointer_node(&pile, &mut parent_node.node, &mut parent_pointer)?;
|
fully_integrate_pointer_node(&pile, &mut parent_node.node, &mut parent_pointer)?;
|
||||||
differentiate_node_in_place(&mut root_node, &parent_node.node)?;
|
differentiate_node_in_place(&mut root_node, &parent_node.node)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn store_without_pointer_ops<PT: ProgressTracker>(
|
if use_pipelined_storage {
|
||||||
pile: &Arc<Pile<Box<dyn RawPile>>>,
|
|
||||||
root_dir: &PathBuf,
|
|
||||||
mut root_node: TreeNode,
|
|
||||||
num_workers: u8,
|
|
||||||
progress_bar: &mut PT,
|
|
||||||
) -> anyhow::Result<PartialPointerData> {
|
|
||||||
// TODO make these configurable
|
// TODO make these configurable
|
||||||
let sps = StoragePipelineSettings {
|
let sps = StoragePipelineSettings {
|
||||||
num_compressors: get_number_of_workers("YAMA_PL_COMPRESSORS") as u32,
|
num_compressors: get_number_of_workers("YAMA_PL_COMPRESSORS") as u32,
|
||||||
|
@ -297,10 +265,7 @@ pub fn store_without_pointer_ops<PT: ProgressTracker>(
|
||||||
let (control_tx, control_rx) = crossbeam_channel::unbounded();
|
let (control_tx, control_rx) = crossbeam_channel::unbounded();
|
||||||
let pile2 = pile.clone();
|
let pile2 = pile.clone();
|
||||||
let pipeline = pile.raw_pile.build_storage_pipeline(sps, control_tx)?;
|
let pipeline = pile.raw_pile.build_storage_pipeline(sps, control_tx)?;
|
||||||
|
|
||||||
// TODO(newver) The existence checker stage should be able to be swapped between different implementations.
|
|
||||||
let pipeline = existence_checker_stage(pile2, pipeline);
|
let pipeline = existence_checker_stage(pile2, pipeline);
|
||||||
|
|
||||||
store(
|
store(
|
||||||
&root_dir,
|
&root_dir,
|
||||||
&mut root_node,
|
&mut root_node,
|
||||||
|
@ -313,6 +278,15 @@ pub fn store_without_pointer_ops<PT: ProgressTracker>(
|
||||||
while let Ok(_) = control_rx.recv() {
|
while let Ok(_) = control_rx.recv() {
|
||||||
// TODO nothing for now.
|
// TODO nothing for now.
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
store(
|
||||||
|
&root_dir,
|
||||||
|
&mut root_node,
|
||||||
|
pile.as_ref(),
|
||||||
|
progress_bar,
|
||||||
|
num_workers,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
let mut uid_lookup = BTreeMap::new();
|
let mut uid_lookup = BTreeMap::new();
|
||||||
let mut gid_lookup = BTreeMap::new();
|
let mut gid_lookup = BTreeMap::new();
|
||||||
|
@ -323,20 +297,19 @@ pub fn store_without_pointer_ops<PT: ProgressTracker>(
|
||||||
let chunk_ref = commands::store_tree_node(
|
let chunk_ref = commands::store_tree_node(
|
||||||
&pile,
|
&pile,
|
||||||
&RootTreeNode {
|
&RootTreeNode {
|
||||||
name: root_dir
|
name: root_dir.file_name().unwrap().to_str().unwrap().to_owned(),
|
||||||
.file_name()
|
|
||||||
.map(|s| s.to_str())
|
|
||||||
.flatten()
|
|
||||||
.unwrap_or("_root")
|
|
||||||
.to_owned(),
|
|
||||||
node: root_node,
|
node: root_node,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let pointer_data = PartialPointerData {
|
let pointer_data = PointerData {
|
||||||
chunk_ref,
|
chunk_ref,
|
||||||
|
parent_pointer: parent,
|
||||||
uid_lookup,
|
uid_lookup,
|
||||||
gid_lookup,
|
gid_lookup,
|
||||||
};
|
};
|
||||||
Ok(pointer_data)
|
|
||||||
|
pile.write_pointer(&new_pointer_name, &pointer_data)?;
|
||||||
|
pile.flush()?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,6 @@ use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::sync::{Arc, Condvar, Mutex};
|
use std::sync::{Arc, Condvar, Mutex};
|
||||||
|
|
||||||
pub mod access_guard;
|
|
||||||
pub mod compression;
|
pub mod compression;
|
||||||
pub mod encryption;
|
pub mod encryption;
|
||||||
pub mod integrity;
|
pub mod integrity;
|
||||||
|
@ -88,8 +87,6 @@ pub fn existence_checker_stage<RP: RawPile>(
|
||||||
pile: Arc<Pile<RP>>,
|
pile: Arc<Pile<RP>>,
|
||||||
next_stage: Sender<(ChunkId, Vec<u8>)>,
|
next_stage: Sender<(ChunkId, Vec<u8>)>,
|
||||||
) -> Sender<(ChunkId, Vec<u8>)> {
|
) -> Sender<(ChunkId, Vec<u8>)> {
|
||||||
// TODO(newver) Do better than this.
|
|
||||||
|
|
||||||
let shared_seen_set: Arc<Mutex<HashSet<ChunkId>>> = Default::default();
|
let shared_seen_set: Arc<Mutex<HashSet<ChunkId>>> = Default::default();
|
||||||
let (tx, rx) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(32);
|
let (tx, rx) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(32);
|
||||||
|
|
||||||
|
@ -126,22 +123,12 @@ pub enum ControllerMessage {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
|
||||||
pub enum PipelineDescription {
|
|
||||||
Store,
|
|
||||||
Remote,
|
|
||||||
Integrity,
|
|
||||||
Compression { dictionary_fingerprint: u64 },
|
|
||||||
Encryption,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait RawPile: Send + Sync + Debug + 'static {
|
pub trait RawPile: Send + Sync + Debug + 'static {
|
||||||
// TODO expose verification errors?
|
// TODO expose verification errors?
|
||||||
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool>;
|
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool>;
|
||||||
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
|
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
|
||||||
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()>;
|
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()>;
|
||||||
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()>;
|
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()>;
|
||||||
fn delete_many(&self, kind: Keyspace, key: &[&[u8]]) -> anyhow::Result<()>;
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -166,12 +153,6 @@ pub trait RawPile: Send + Sync + Debug + 'static {
|
||||||
settings: StoragePipelineSettings,
|
settings: StoragePipelineSettings,
|
||||||
controller_send: Sender<ControllerMessage>,
|
controller_send: Sender<ControllerMessage>,
|
||||||
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>>;
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>>;
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>>;
|
|
||||||
|
|
||||||
/// Return a u64 order token that indicates the optimum order to read this chunk in
|
|
||||||
/// compared to other chunks.
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RawPile for Box<dyn RawPile> {
|
impl RawPile for Box<dyn RawPile> {
|
||||||
|
@ -187,9 +168,6 @@ impl RawPile for Box<dyn RawPile> {
|
||||||
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
||||||
self.as_ref().delete(kind, key)
|
self.as_ref().delete(kind, key)
|
||||||
}
|
}
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.as_ref().delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -214,14 +192,6 @@ impl RawPile for Box<dyn RawPile> {
|
||||||
self.as_ref()
|
self.as_ref()
|
||||||
.build_storage_pipeline(settings, controller_send)
|
.build_storage_pipeline(settings, controller_send)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
self.as_ref().describe_pipeline()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<RP: RawPile> RawPile for Arc<RP> {
|
impl<RP: RawPile> RawPile for Arc<RP> {
|
||||||
|
@ -237,9 +207,6 @@ impl<RP: RawPile> RawPile for Arc<RP> {
|
||||||
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
||||||
self.as_ref().delete(kind, key)
|
self.as_ref().delete(kind, key)
|
||||||
}
|
}
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.as_ref().delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -264,14 +231,6 @@ impl<RP: RawPile> RawPile for Arc<RP> {
|
||||||
self.as_ref()
|
self.as_ref()
|
||||||
.build_storage_pipeline(settings, controller_send)
|
.build_storage_pipeline(settings, controller_send)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
self.as_ref().describe_pipeline()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -1,141 +0,0 @@
|
||||||
use crate::chunking::calculate_chunkid;
|
|
||||||
use crate::definitions::ChunkId;
|
|
||||||
use crate::pile::{
|
|
||||||
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use anyhow::{anyhow, bail};
|
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
|
||||||
use derivative::Derivative;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::thread;
|
|
||||||
|
|
||||||
/// PileGuard is a wrapper around a pile that prevents data exfiltration and malicious corruption.
|
|
||||||
/// It's basically a firewall for a Pile?
|
|
||||||
/// Preventing malicious corruption requires the chunks to be unprocessed. This way, their ID can be
|
|
||||||
/// checked by this module.
|
|
||||||
#[derive(Debug, Derivative)]
|
|
||||||
#[derivative(Clone(bound = ""))]
|
|
||||||
// we need to use derivative's Clone impl because Arc<R> causes R to have a bound on Clone
|
|
||||||
// even though that's not needed. https://github.com/rust-lang/rust/issues/26925
|
|
||||||
pub struct PileGuard<R: Clone + RawPile> {
|
|
||||||
underlying: R,
|
|
||||||
/// Whether to verify chunk IDs to prevent malicious corruption
|
|
||||||
verify_chunk_ids: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pipeline(
|
|
||||||
subsequent_pipeline: Sender<(ChunkId, Vec<u8>)>,
|
|
||||||
input: Receiver<(ChunkId, Vec<u8>)>,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
while let Ok((claimed_chunk_id, chunk)) = input.recv() {
|
|
||||||
let actual_chunk_id = calculate_chunkid(&chunk);
|
|
||||||
if actual_chunk_id != claimed_chunk_id {
|
|
||||||
bail!("CHUNK ID MISMATCH — is this forgery? (malicious storage process?) claimed{:?} actually{:?}", claimed_chunk_id, actual_chunk_id);
|
|
||||||
}
|
|
||||||
subsequent_pipeline
|
|
||||||
.send((claimed_chunk_id, chunk))
|
|
||||||
.map_err(|_| anyhow!("Subsequent step closed"))?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Clone + RawPile> PileGuard<R> {
|
|
||||||
pub fn new(underlying: R, verify_chunk_ids: bool) -> Self {
|
|
||||||
PileGuard {
|
|
||||||
underlying,
|
|
||||||
verify_chunk_ids,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Clone + RawPile> RawPile for PileGuard<R> {
|
|
||||||
fn exists(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<bool> {
|
|
||||||
match kind {
|
|
||||||
Keyspace::Chunk => self.underlying.exists(kind, key),
|
|
||||||
Keyspace::ChunkHash => {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
Keyspace::Pointer => {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read(&self, _kind: Keyspace, _key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write(&self, kind: Keyspace, _key: &[u8], _value: &[u8]) -> anyhow::Result<()> {
|
|
||||||
match kind {
|
|
||||||
Keyspace::Chunk => {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
Keyspace::ChunkHash => {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
Keyspace::Pointer => {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete(&self, _kind: Keyspace, _key: &[u8]) -> anyhow::Result<()> {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete_many(&self, _kind: Keyspace, _keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
|
||||||
&self,
|
|
||||||
_kind: Keyspace,
|
|
||||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<Vec<u8>>>>> {
|
|
||||||
bail!("Access denied");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn flush(&self) -> anyhow::Result<()> {
|
|
||||||
self.underlying.flush()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
|
||||||
self.underlying.check_lowlevel()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_storage_pipeline(
|
|
||||||
&self,
|
|
||||||
settings: StoragePipelineSettings,
|
|
||||||
controller_send: Sender<ControllerMessage>,
|
|
||||||
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
|
||||||
let subsequent_pipeline = self
|
|
||||||
.underlying
|
|
||||||
.build_storage_pipeline(settings.clone(), controller_send.clone())?;
|
|
||||||
|
|
||||||
let (input_to_this_stage, receiver) = crossbeam_channel::bounded(8);
|
|
||||||
|
|
||||||
thread::Builder::new()
|
|
||||||
.name("yama Aguard".to_owned())
|
|
||||||
.spawn(move || {
|
|
||||||
if let Err(err) = pipeline(subsequent_pipeline, receiver) {
|
|
||||||
controller_send
|
|
||||||
.send(ControllerMessage::Failure {
|
|
||||||
worker_id: Arc::new(String::from("accessguard")),
|
|
||||||
error_message: format!("err {:?}", err),
|
|
||||||
})
|
|
||||||
.expect("This is BAD: failed to send failure message to controller.");
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Ok(input_to_this_stage)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
// TODO(question) Should we be described in the pipeline?
|
|
||||||
self.underlying.describe_pipeline()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -15,7 +15,6 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::thread::JoinHandle;
|
use std::thread::JoinHandle;
|
||||||
|
@ -25,13 +24,10 @@ use crossbeam_channel::{Receiver, Sender};
|
||||||
use derivative::Derivative;
|
use derivative::Derivative;
|
||||||
use log::error;
|
use log::error;
|
||||||
use metrics::{register_counter, Unit};
|
use metrics::{register_counter, Unit};
|
||||||
use zstd::bulk::{Compressor, Decompressor};
|
use zstd::block::{Compressor, Decompressor};
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
use crate::definitions::ChunkId;
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
|
|
||||||
StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
pub const DECOMPRESS_CAPACITY: usize = 32 * 1024 * 1024;
|
||||||
|
|
||||||
|
@ -154,10 +150,9 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
|
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
|
||||||
settings: CompressionSettings,
|
settings: CompressionSettings,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut compressor =
|
let mut compressor = Compressor::with_dict(settings.dictionary.as_ref().clone());
|
||||||
Compressor::with_dictionary(settings.level, settings.dictionary.as_ref())?;
|
|
||||||
while let Ok((job, response_sender)) = queue.recv() {
|
while let Ok((job, response_sender)) = queue.recv() {
|
||||||
let result = compressor.compress(&job)?;
|
let result = compressor.compress(&job, settings.level)?;
|
||||||
response_sender
|
response_sender
|
||||||
.send(result)
|
.send(result)
|
||||||
.or(Err(anyhow!("Couldn't send compression result")))?;
|
.or(Err(anyhow!("Couldn't send compression result")))?;
|
||||||
|
@ -169,7 +164,7 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
|
queue: Receiver<(Vec<u8>, Sender<Vec<u8>>)>,
|
||||||
settings: CompressionSettings,
|
settings: CompressionSettings,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut decompressor = Decompressor::with_dictionary(settings.dictionary.as_ref())?;
|
let mut decompressor = Decompressor::with_dict(settings.dictionary.as_ref().clone());
|
||||||
while let Ok((job, response_sender)) = queue.recv() {
|
while let Ok((job, response_sender)) = queue.recv() {
|
||||||
let result = decompressor.decompress(&job, DECOMPRESS_CAPACITY)?;
|
let result = decompressor.decompress(&job, DECOMPRESS_CAPACITY)?;
|
||||||
response_sender
|
response_sender
|
||||||
|
@ -234,11 +229,11 @@ impl<R: RawPile> RawPileCompressor<R> {
|
||||||
"id" => worker_id
|
"id" => worker_id
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut compressor =
|
let mut compressor = Compressor::with_dict(self.settings.dictionary.as_ref().clone());
|
||||||
Compressor::with_dictionary(self.settings.level, self.settings.dictionary.as_ref())?;
|
let level = self.settings.level;
|
||||||
while let Ok((chunk_id, bytes)) = input.recv() {
|
while let Ok((chunk_id, bytes)) = input.recv() {
|
||||||
let in_bytes = bytes.len();
|
let in_bytes = bytes.len();
|
||||||
let bytes = compressor.compress(&bytes)?;
|
let bytes = compressor.compress(&bytes, level)?;
|
||||||
let out_bytes = bytes.len();
|
let out_bytes = bytes.len();
|
||||||
next_stage.send((chunk_id, bytes))?;
|
next_stage.send((chunk_id, bytes))?;
|
||||||
// Per-worker metrics
|
// Per-worker metrics
|
||||||
|
@ -278,10 +273,6 @@ impl<R: RawPile> RawPile for RawPileCompressor<R> {
|
||||||
self.underlying.delete(kind, key)
|
self.underlying.delete(kind, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -339,21 +330,4 @@ impl<R: RawPile> RawPile for RawPileCompressor<R> {
|
||||||
|
|
||||||
Ok(input_to_this_stage)
|
Ok(input_to_this_stage)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
let mut underlying = self.underlying.describe_pipeline()?;
|
|
||||||
|
|
||||||
let mut dict_fingerprint_u256 = [0; 32];
|
|
||||||
blake::hash(256, &self.settings.dictionary, &mut dict_fingerprint_u256)?;
|
|
||||||
let dictionary_fingerprint: u64 =
|
|
||||||
u64::from_be_bytes(dict_fingerprint_u256[0..8].try_into().unwrap());
|
|
||||||
underlying.push(PipelineDescription::Compression {
|
|
||||||
dictionary_fingerprint,
|
|
||||||
});
|
|
||||||
Ok(underlying)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,7 @@ use sodiumoxide::crypto::secretbox;
|
||||||
use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
use sodiumoxide::crypto::secretbox::{Key, Nonce, NONCEBYTES};
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
use crate::definitions::ChunkId;
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
|
|
||||||
/// A RawPile that provides encryption of chunk contents.
|
/// A RawPile that provides encryption of chunk contents.
|
||||||
|
@ -101,10 +99,6 @@ impl<R: RawPile> RawPile for RawPileEncryptor<R> {
|
||||||
self.underlying.delete(kind, key)
|
self.underlying.delete(kind, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -120,19 +114,9 @@ impl<R: RawPile> RawPile for RawPileEncryptor<R> {
|
||||||
|
|
||||||
fn build_storage_pipeline(
|
fn build_storage_pipeline(
|
||||||
&self,
|
&self,
|
||||||
_settings: StoragePipelineSettings,
|
settings: StoragePipelineSettings,
|
||||||
_controller_send: Sender<ControllerMessage>,
|
controller_send: Sender<ControllerMessage>,
|
||||||
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
let mut underlying = self.underlying.describe_pipeline()?;
|
|
||||||
underlying.push(PipelineDescription::Encryption);
|
|
||||||
Ok(underlying)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,10 +20,7 @@ use std::hash::Hasher;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::definitions::{ChunkId, XXH64_SEED};
|
use crate::definitions::{ChunkId, XXH64_SEED};
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
|
|
||||||
StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use crate::utils::bytes_to_hexstring;
|
use crate::utils::bytes_to_hexstring;
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
|
|
||||||
|
@ -98,10 +95,6 @@ impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
|
||||||
self.underlying.delete(kind, key)
|
self.underlying.delete(kind, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
self.underlying.delete_many(kind, keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -147,14 +140,4 @@ impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
Ok(input)
|
Ok(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
let mut underlying = self.underlying.describe_pipeline()?;
|
|
||||||
underlying.push(PipelineDescription::Integrity);
|
|
||||||
Ok(underlying)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,32 +15,28 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
|
use std::collections::hash_map::Entry;
|
||||||
|
use std::collections::{HashMap, VecDeque};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::fs::{read_dir, remove_file, File, OpenOptions};
|
use std::fs::{read_dir, File, OpenOptions};
|
||||||
use std::io::{Read, Seek, SeekFrom, Write};
|
use std::io::{Read, Seek, SeekFrom, Write};
|
||||||
use std::os::unix::fs::MetadataExt;
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{Arc, Condvar, Mutex};
|
use std::sync::{Arc, Condvar, Mutex};
|
||||||
use std::time::Duration;
|
|
||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
use anyhow::{bail, ensure, Context};
|
use anyhow::{bail, Context};
|
||||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
|
||||||
use log::{info, warn};
|
use log::{info, warn};
|
||||||
use nix::unistd::sync;
|
use nix::unistd::sync;
|
||||||
use rusqlite::ffi::ErrorCode::ConstraintViolation;
|
use rusqlite::{params, Error, ErrorCode};
|
||||||
use rusqlite::{params, Error, ErrorCode, Transaction, TransactionBehavior, NO_PARAMS};
|
|
||||||
use rusqlite::{Connection, OptionalExtension};
|
use rusqlite::{Connection, OptionalExtension};
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
use crate::definitions::ChunkId;
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, DebugStatistics, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
|
use crate::utils::bytes_to_hexstring;
|
||||||
StoragePipelineSettings,
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
};
|
use rusqlite::ffi::ErrorCode::ConstraintViolation;
|
||||||
use crate::progress::ProgressTracker;
|
use std::time::Duration;
|
||||||
use crate::utils::{bytes_to_hexstring, LruMap};
|
|
||||||
|
|
||||||
/// Bloblogs will not be reused if they are already 2 GiB large.
|
/// Bloblogs will not be reused if they are already 2 GiB large.
|
||||||
pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
||||||
|
@ -48,14 +44,6 @@ pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
|
||||||
/// This many pointers will be batched up for writing.
|
/// This many pointers will be batched up for writing.
|
||||||
pub const POINTER_WRITE_BATCHES: usize = 2048;
|
pub const POINTER_WRITE_BATCHES: usize = 2048;
|
||||||
|
|
||||||
/// This many bloblogs will be kept open for reading, at maximum.
|
|
||||||
pub const BLOBLOG_MAX_READING_FILE_COUNT: usize = 128;
|
|
||||||
|
|
||||||
/// Size of a blob header within a bloblog.
|
|
||||||
/// 32 byte Chunk Id
|
|
||||||
/// 4 byte (u32) Blob size
|
|
||||||
pub const BLOB_HEADER_SIZE: u64 = 32 + 4;
|
|
||||||
|
|
||||||
/// A file storing a log of blobs.
|
/// A file storing a log of blobs.
|
||||||
/// Format:
|
/// Format:
|
||||||
/// Repeated:
|
/// Repeated:
|
||||||
|
@ -145,8 +133,8 @@ pub type BloblogId = u32;
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Inner {
|
pub struct Inner {
|
||||||
next_bloblog_id: BloblogId,
|
next_bloblog_id: BloblogId,
|
||||||
writer_bloblogs: Vec<(BloblogId, Arc<Mutex<Bloblog>>)>,
|
writer_bloblogs: Vec<BloblogId>,
|
||||||
reader_bloblogs: LruMap<BloblogId, Arc<Mutex<Bloblog>>>,
|
open_bloblogs: HashMap<BloblogId, Arc<Mutex<Bloblog>>>, // TODO want an LRU cache with a weak hashmap...?
|
||||||
connection: Connection,
|
connection: Connection,
|
||||||
writers_in_progress: u16,
|
writers_in_progress: u16,
|
||||||
// We batch up pointer writes because sync() performance really hurts us if we do them one by
|
// We batch up pointer writes because sync() performance really hurts us if we do them one by
|
||||||
|
@ -154,13 +142,14 @@ pub struct Inner {
|
||||||
queued_pointer_writes: HashMap<ChunkId, BloblogPointer>,
|
queued_pointer_writes: HashMap<ChunkId, BloblogPointer>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn raw_put_chunk_pointer_txn(
|
impl Inner {
|
||||||
txn: &Transaction,
|
pub fn raw_put_chunk_pointer(
|
||||||
|
&self,
|
||||||
chunk_id: &ChunkId,
|
chunk_id: &ChunkId,
|
||||||
bloblog: BloblogId,
|
bloblog: BloblogId,
|
||||||
offset_i64: i64,
|
offset_i64: i64,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
match txn.execute(
|
match self.connection.execute(
|
||||||
"INSERT INTO chunks (chunk_id, bloblog, offset) VALUES (?1, ?2, ?3)",
|
"INSERT INTO chunks (chunk_id, bloblog, offset) VALUES (?1, ?2, ?3)",
|
||||||
params![&chunk_id[..], bloblog, offset_i64],
|
params![&chunk_id[..], bloblog, offset_i64],
|
||||||
) {
|
) {
|
||||||
|
@ -173,41 +162,27 @@ fn raw_put_chunk_pointer_txn(
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
Err(Error::SqliteFailure(e, str).into())
|
Err(Error::SqliteFailure(e, str))?;
|
||||||
|
unreachable!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(other) => Err(other.into()),
|
other => {
|
||||||
|
other?;
|
||||||
|
unreachable!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Inner {
|
|
||||||
pub fn raw_put_chunk_pointer(
|
|
||||||
&mut self,
|
|
||||||
chunk_id: &ChunkId,
|
|
||||||
bloblog: BloblogId,
|
|
||||||
offset_i64: i64,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let txn = self.connection.transaction()?;
|
|
||||||
raw_put_chunk_pointer_txn(&txn, chunk_id, bloblog, offset_i64)?;
|
|
||||||
txn.commit()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn flush(&mut self) -> anyhow::Result<()> {
|
pub fn flush(&mut self) -> anyhow::Result<()> {
|
||||||
// Create a non-allocated hashmap to satisfy borrow checker, then swap it in and out
|
// Create a non-allocated hashmap to satisfy borrow checker, then swap it in and out
|
||||||
let mut queued_pointer_writes = HashMap::with_capacity(0);
|
let mut queued_pointer_writes = HashMap::with_capacity(0);
|
||||||
std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
|
std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
|
||||||
|
|
||||||
let txn = self.connection.transaction()?;
|
|
||||||
|
|
||||||
for (chunk_id, pointer) in queued_pointer_writes.drain() {
|
for (chunk_id, pointer) in queued_pointer_writes.drain() {
|
||||||
let offset_i64 =
|
let offset_i64 =
|
||||||
i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
|
i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
|
||||||
raw_put_chunk_pointer_txn(&txn, &chunk_id, pointer.bloblog, offset_i64)?;
|
self.raw_put_chunk_pointer(&chunk_id, pointer.bloblog, offset_i64)?;
|
||||||
}
|
}
|
||||||
std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
|
std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
|
||||||
|
|
||||||
txn.commit()?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -269,18 +244,11 @@ impl SqliteBloblogPile {
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable WAL mode for significantly better write performance.
|
|
||||||
connection.execute_batch(
|
|
||||||
"
|
|
||||||
PRAGMA journal_mode=WAL;
|
|
||||||
",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(SqliteBloblogPile {
|
Ok(SqliteBloblogPile {
|
||||||
inner: Arc::new(Mutex::new(Inner {
|
inner: Arc::new(Mutex::new(Inner {
|
||||||
next_bloblog_id: 0,
|
next_bloblog_id: 0,
|
||||||
writer_bloblogs: Vec::new(),
|
writer_bloblogs: Vec::new(),
|
||||||
reader_bloblogs: LruMap::new(BLOBLOG_MAX_READING_FILE_COUNT),
|
open_bloblogs: HashMap::new(),
|
||||||
connection,
|
connection,
|
||||||
writers_in_progress: 0,
|
writers_in_progress: 0,
|
||||||
queued_pointer_writes: Default::default(),
|
queued_pointer_writes: Default::default(),
|
||||||
|
@ -293,33 +261,23 @@ impl SqliteBloblogPile {
|
||||||
|
|
||||||
fn open_bloblog(&self, bloblog_id: BloblogId) -> anyhow::Result<Arc<Mutex<Bloblog>>> {
|
fn open_bloblog(&self, bloblog_id: BloblogId) -> anyhow::Result<Arc<Mutex<Bloblog>>> {
|
||||||
let mut inner = self.inner.lock().unwrap();
|
let mut inner = self.inner.lock().unwrap();
|
||||||
|
Ok(match inner.open_bloblogs.entry(bloblog_id) {
|
||||||
match inner.reader_bloblogs.get(&bloblog_id) {
|
Entry::Occupied(entry) => entry.get().clone(),
|
||||||
Some(bloblog) => Ok(bloblog.clone()),
|
Entry::Vacant(entry) => {
|
||||||
None => {
|
|
||||||
let bloblog = Arc::new(Mutex::new(Bloblog::open(
|
let bloblog = Arc::new(Mutex::new(Bloblog::open(
|
||||||
&self.path.join(&bloblog_id.to_string()),
|
&self.path.join(&bloblog_id.to_string()),
|
||||||
)?));
|
)?));
|
||||||
inner.reader_bloblogs.insert(bloblog_id, bloblog.clone());
|
entry.insert(bloblog.clone());
|
||||||
Ok(bloblog)
|
bloblog
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_writing_bloblog(&self) -> anyhow::Result<(BloblogId, Arc<Mutex<Bloblog>>)> {
|
fn get_writing_bloblog(&self) -> anyhow::Result<(BloblogId, Arc<Mutex<Bloblog>>)> {
|
||||||
let mut inner = self.inner.lock().unwrap();
|
let mut inner = self.inner.lock().unwrap();
|
||||||
|
let writing_bloblog_id: BloblogId = match inner.writer_bloblogs.pop() {
|
||||||
inner.writers_in_progress += 1;
|
None => {
|
||||||
|
loop {
|
||||||
if let Some(writing_bloblog) = inner.writer_bloblogs.pop() {
|
|
||||||
// We already have an open bloblog to give back.
|
|
||||||
return Ok(writing_bloblog);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No open bloblogs to reuse; create a new one.
|
|
||||||
// It's very important to create a fresh one here; we definitely don't want to use a file
|
|
||||||
// that someone else is using!
|
|
||||||
let writing_bloblog_id = loop {
|
|
||||||
let pre_inc = inner.next_bloblog_id;
|
let pre_inc = inner.next_bloblog_id;
|
||||||
inner.next_bloblog_id += 1;
|
inner.next_bloblog_id += 1;
|
||||||
|
|
||||||
|
@ -328,18 +286,26 @@ impl SqliteBloblogPile {
|
||||||
if !bloblog_path.exists() {
|
if !bloblog_path.exists() {
|
||||||
break pre_inc;
|
break pre_inc;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(id) => id,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let result = Ok((
|
||||||
|
writing_bloblog_id,
|
||||||
|
match inner.open_bloblogs.entry(writing_bloblog_id) {
|
||||||
|
Entry::Occupied(entry) => entry.get().clone(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
let bloblog = Arc::new(Mutex::new(Bloblog::open(
|
let bloblog = Arc::new(Mutex::new(Bloblog::open(
|
||||||
&self.path.join(&writing_bloblog_id.to_string()),
|
&self.path.join(&writing_bloblog_id.to_string()),
|
||||||
)?));
|
)?));
|
||||||
|
entry.insert(bloblog.clone());
|
||||||
// MAYBE FUTURE // Insert a weak reference so we can easily get a reader for this if desired.
|
bloblog
|
||||||
// inner.open_bloblogs.insert(writing_bloblog_id, Arc::downgrade(&bloblog));
|
}
|
||||||
// For now, I don't think we actually care about reading a bloblog that we've written
|
},
|
||||||
// (at least not usually?)
|
));
|
||||||
|
inner.writers_in_progress += 1;
|
||||||
Ok((writing_bloblog_id, bloblog))
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Should be called once the bloblog has been finished writing to for the moment.
|
/// Should be called once the bloblog has been finished writing to for the moment.
|
||||||
|
@ -352,7 +318,7 @@ impl SqliteBloblogPile {
|
||||||
let size = bloblog.lock().unwrap().filesize()?;
|
let size = bloblog.lock().unwrap().filesize()?;
|
||||||
let mut inner = self.inner.lock().unwrap();
|
let mut inner = self.inner.lock().unwrap();
|
||||||
if size < MAX_BLOBLOG_REUSE_SIZE {
|
if size < MAX_BLOBLOG_REUSE_SIZE {
|
||||||
inner.writer_bloblogs.push((id, bloblog));
|
inner.writer_bloblogs.push(id);
|
||||||
}
|
}
|
||||||
inner.writers_in_progress -= 1;
|
inner.writers_in_progress -= 1;
|
||||||
if inner.writers_in_progress == 0 {
|
if inner.writers_in_progress == 0 {
|
||||||
|
@ -378,33 +344,8 @@ impl SqliteBloblogPile {
|
||||||
.optional()?)
|
.optional()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_chunk_pointers(
|
|
||||||
&self,
|
|
||||||
chunk_ids: &[&[u8]],
|
|
||||||
) -> anyhow::Result<Vec<Option<BloblogPointer>>> {
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
let txn = inner.connection.transaction()?;
|
|
||||||
let mut result = Vec::with_capacity(chunk_ids.len());
|
|
||||||
{
|
|
||||||
let mut stmt = txn.prepare("SELECT bloblog, offset FROM chunks WHERE chunk_id = ?1")?;
|
|
||||||
for &chunk_id in chunk_ids {
|
|
||||||
let bloglog_pointer: Option<BloblogPointer> = stmt
|
|
||||||
.query_row(params![chunk_id], |row| {
|
|
||||||
Ok(BloblogPointer {
|
|
||||||
bloblog: row.get(0)?,
|
|
||||||
offset: row.get::<_, i64>(1)? as u64,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.optional()?;
|
|
||||||
result.push(bloglog_pointer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
txn.commit()?;
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn put_chunk_pointer(&self, chunk_id: &ChunkId, pointer: BloblogPointer) -> anyhow::Result<()> {
|
fn put_chunk_pointer(&self, chunk_id: &ChunkId, pointer: BloblogPointer) -> anyhow::Result<()> {
|
||||||
let mut inner = self.inner.lock().unwrap();
|
let inner = self.inner.lock().unwrap();
|
||||||
let offset_i64 = i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
|
let offset_i64 = i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
|
||||||
inner.raw_put_chunk_pointer(chunk_id, pointer.bloblog, offset_i64)
|
inner.raw_put_chunk_pointer(chunk_id, pointer.bloblog, offset_i64)
|
||||||
}
|
}
|
||||||
|
@ -442,7 +383,7 @@ impl SqliteBloblogPile {
|
||||||
pointers_buffered: &mut Vec<(ChunkId, BloblogPointer)>,
|
pointers_buffered: &mut Vec<(ChunkId, BloblogPointer)>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut inner = this.inner.lock().unwrap();
|
let mut inner = this.inner.lock().unwrap();
|
||||||
let txn = inner.connection.transaction()?;
|
let mut txn = inner.connection.transaction()?;
|
||||||
{
|
{
|
||||||
let mut stmt = txn.prepare(
|
let mut stmt = txn.prepare(
|
||||||
"INSERT OR FAIL INTO chunks (chunk_id, bloblog, offset) VALUES (?1, ?2, ?3)",
|
"INSERT OR FAIL INTO chunks (chunk_id, bloblog, offset) VALUES (?1, ?2, ?3)",
|
||||||
|
@ -528,341 +469,6 @@ impl SqliteBloblogPile {
|
||||||
assert!(pointers_buffered.is_empty());
|
assert!(pointers_buffered.is_empty());
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Look at the bloblogs in this pile and see where space may be reclaimable if we were to
|
|
||||||
/// compact.
|
|
||||||
///
|
|
||||||
/// Next step: plan_compaction
|
|
||||||
pub fn analyse_for_compaction(&self) -> anyhow::Result<BTreeMap<BloblogId, BloblogStats>> {
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
// Lock the database right away.
|
|
||||||
let txn = inner
|
|
||||||
.connection
|
|
||||||
.transaction_with_behavior(TransactionBehavior::Exclusive)?;
|
|
||||||
let mut stmt = txn.prepare(
|
|
||||||
"
|
|
||||||
SELECT bloblog, COUNT(c.offset), COUNT(d.offset), SUM(COALESCE(d.size, 0))
|
|
||||||
FROM chunks c LEFT JOIN deleted d USING (bloblog, offset)
|
|
||||||
GROUP BY bloblog
|
|
||||||
",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
struct UnpopulatedBloblogStats {
|
|
||||||
pub bloblog_id: BloblogId,
|
|
||||||
pub chunks_total: u64,
|
|
||||||
pub chunks_deleted: u64,
|
|
||||||
pub bytes_deleted: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
let unpopul_bloblog_stats = stmt.query_map(NO_PARAMS, |row| {
|
|
||||||
Ok(UnpopulatedBloblogStats {
|
|
||||||
bloblog_id: row.get(0)?,
|
|
||||||
chunks_total: row.get::<_, i64>(1)?.try_into().expect("i64 -> u64"),
|
|
||||||
chunks_deleted: row.get::<_, i64>(2)?.try_into().expect("i64 -> u64"),
|
|
||||||
bytes_deleted: row.get::<_, i64>(3)?.try_into().expect("i64 -> u64"),
|
|
||||||
})
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let mut final_stats = BTreeMap::new();
|
|
||||||
|
|
||||||
for unpopul_stat in unpopul_bloblog_stats {
|
|
||||||
let UnpopulatedBloblogStats {
|
|
||||||
bloblog_id,
|
|
||||||
chunks_total,
|
|
||||||
chunks_deleted,
|
|
||||||
bytes_deleted,
|
|
||||||
} = unpopul_stat?;
|
|
||||||
let bloblog_path = self.path.join(&bloblog_id.to_string());
|
|
||||||
let bytes_total = std::fs::metadata(&bloblog_path)
|
|
||||||
.with_context(|| format!("Failed to get metadata for bloblog: {:?}", bloblog_path))?
|
|
||||||
.size();
|
|
||||||
|
|
||||||
final_stats.insert(
|
|
||||||
bloblog_id,
|
|
||||||
BloblogStats {
|
|
||||||
chunks_total,
|
|
||||||
chunks_deleted,
|
|
||||||
bytes_total,
|
|
||||||
// Add a slight correction since we can count the blob headers of deleted blobs
|
|
||||||
// as deleted.
|
|
||||||
bytes_deleted: bytes_deleted + chunks_deleted * BLOB_HEADER_SIZE,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(final_stats)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Look at the analysis of compaction and, using the specified thresholds, come up with a plan
|
|
||||||
/// to perform compaction.
|
|
||||||
///
|
|
||||||
/// May return an empty plan if compaction isn't worthwhile.
|
|
||||||
///
|
|
||||||
/// Previous step: analyse_for_compaction
|
|
||||||
/// Next step: perform_compaction
|
|
||||||
pub fn plan_compaction(
|
|
||||||
&self,
|
|
||||||
thresholds: &CompactionThresholds,
|
|
||||||
analysis: BTreeMap<BloblogId, BloblogStats>,
|
|
||||||
) -> anyhow::Result<CompactionPlan> {
|
|
||||||
let bloblogs_to_replace: BTreeMap<BloblogId, BloblogStats> = analysis
|
|
||||||
.into_iter()
|
|
||||||
.filter(|(_id, stats)| thresholds.should_replace_bloblog(stats))
|
|
||||||
.collect();
|
|
||||||
let reclaimable_space: u64 = bloblogs_to_replace
|
|
||||||
.values()
|
|
||||||
.map(|bs| bs.bytes_deleted)
|
|
||||||
.sum();
|
|
||||||
let bytes_to_write: u64 = bloblogs_to_replace
|
|
||||||
.values()
|
|
||||||
.map(|bs| bs.bytes_total - bs.bytes_deleted)
|
|
||||||
.sum();
|
|
||||||
let small_bloblogs: u32 = bloblogs_to_replace
|
|
||||||
.values()
|
|
||||||
.filter(|bs| bs.bytes_total - bs.bytes_deleted < thresholds.cond_if_less_allocated_than)
|
|
||||||
.count() as u32;
|
|
||||||
|
|
||||||
if reclaimable_space < thresholds.minimum_to_reclaim
|
|
||||||
&& small_bloblogs < thresholds.minimum_small_bloblogs_to_merge
|
|
||||||
{
|
|
||||||
// Nothing worth doing: return an empty plan.
|
|
||||||
return Ok(CompactionPlan {
|
|
||||||
bloblogs_to_replace: Default::default(),
|
|
||||||
bytes_to_write: 0,
|
|
||||||
reclaimable_space: 0,
|
|
||||||
small_bloblogs: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(CompactionPlan {
|
|
||||||
bloblogs_to_replace: bloblogs_to_replace.keys().copied().collect(),
|
|
||||||
bytes_to_write,
|
|
||||||
reclaimable_space,
|
|
||||||
small_bloblogs,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Given a compaction plan, perform the compaction.
|
|
||||||
/// There shouldn't be any decisions left to be made at this point: just action.
|
|
||||||
///
|
|
||||||
/// TODO flock the bloblogs to be removed and make readers and writers also flock them too.
|
|
||||||
///
|
|
||||||
/// TODO find a way to deal with bloblogs that are entirely unreferenced from the index
|
|
||||||
/// (e.g. bloblogs that weren't written properly, e.g. if compaction fails.)
|
|
||||||
pub fn perform_compaction(
|
|
||||||
&self,
|
|
||||||
mut progress: Box<dyn ProgressTracker>,
|
|
||||||
plan: CompactionPlan,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
|
||||||
struct ReplacedBlobRow {
|
|
||||||
pub old_bloblog: BloblogId,
|
|
||||||
pub old_offset: u64,
|
|
||||||
pub chunk_id: ChunkId,
|
|
||||||
}
|
|
||||||
|
|
||||||
if plan.bloblogs_to_replace.is_empty() {
|
|
||||||
info!("No compaction to be done.");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut to_preserve = BTreeSet::new();
|
|
||||||
let mut replacements = BTreeMap::new();
|
|
||||||
|
|
||||||
progress.set_max_size(plan.bytes_to_write);
|
|
||||||
|
|
||||||
// First find all the blobs we need to replace.
|
|
||||||
{
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
// Lock the database right away.
|
|
||||||
let txn = inner
|
|
||||||
.connection
|
|
||||||
.transaction_with_behavior(TransactionBehavior::Exclusive)?;
|
|
||||||
let mut stmt = txn.prepare(
|
|
||||||
"
|
|
||||||
SELECT chunk_id, c.offset
|
|
||||||
FROM chunks c LEFT JOIN deleted d USING (bloblog, offset)
|
|
||||||
WHERE bloblog = ?1 AND d.offset IS NULL
|
|
||||||
",
|
|
||||||
)?;
|
|
||||||
for bloblog in plan.bloblogs_to_replace.iter().copied() {
|
|
||||||
to_preserve.extend(
|
|
||||||
stmt.query_map([bloblog], |row| {
|
|
||||||
let mut chunk_id = ChunkId::default();
|
|
||||||
chunk_id.copy_from_slice(row.get::<_, Vec<u8>>(0).unwrap().as_slice());
|
|
||||||
Ok(ReplacedBlobRow {
|
|
||||||
old_bloblog: bloblog,
|
|
||||||
chunk_id,
|
|
||||||
old_offset: row.get::<_, i64>(1).unwrap().try_into().unwrap(),
|
|
||||||
})
|
|
||||||
})?
|
|
||||||
.collect::<Result<Vec<ReplacedBlobRow>, _>>()?,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then make the replacements
|
|
||||||
info!("Rewriting bloblogs...");
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
let mut iterator = to_preserve.into_iter();
|
|
||||||
loop {
|
|
||||||
let (new_bloblog_id, bloglog_mutex) = self.get_writing_bloblog()?;
|
|
||||||
let mut new_bloblog = bloglog_mutex.lock().expect("Failed to lock bloblog?");
|
|
||||||
let mut is_more = false;
|
|
||||||
|
|
||||||
while let Some(preserve) = iterator.next() {
|
|
||||||
is_more = true;
|
|
||||||
|
|
||||||
// Get hold of the old bloblog
|
|
||||||
let old_bloblog = self.open_bloblog(preserve.old_bloblog)?;
|
|
||||||
let mut old_bloblog = old_bloblog.lock().unwrap();
|
|
||||||
|
|
||||||
// Transfer the blob
|
|
||||||
buf.clear();
|
|
||||||
old_bloblog.read_blob(preserve.old_offset, &preserve.chunk_id, &mut buf)?;
|
|
||||||
let new_offset = new_bloblog.write_blob(&preserve.chunk_id, &buf)?;
|
|
||||||
|
|
||||||
// Make a note of the replacement
|
|
||||||
replacements.insert(
|
|
||||||
preserve,
|
|
||||||
BloblogPointer {
|
|
||||||
bloblog: new_bloblog_id,
|
|
||||||
offset: new_offset,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
progress.inc_progress(buf.len() as u64);
|
|
||||||
|
|
||||||
if new_bloblog.filesize()? > MAX_BLOBLOG_REUSE_SIZE {
|
|
||||||
// get a new bloblog to write with.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
drop(new_bloblog);
|
|
||||||
self.return_writing_bloblog(new_bloblog_id, bloglog_mutex)?;
|
|
||||||
|
|
||||||
if !is_more {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info!("Applying replacements...");
|
|
||||||
{
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
// Lock the database right away.
|
|
||||||
let txn = inner
|
|
||||||
.connection
|
|
||||||
.transaction_with_behavior(TransactionBehavior::Exclusive)?;
|
|
||||||
let mut stmt = txn.prepare(
|
|
||||||
"
|
|
||||||
UPDATE chunks
|
|
||||||
SET bloblog = ?1, offset = ?2
|
|
||||||
WHERE chunk_id = ?3
|
|
||||||
",
|
|
||||||
)?;
|
|
||||||
|
|
||||||
for (replacement_row, new_pos) in replacements {
|
|
||||||
ensure!(
|
|
||||||
stmt.execute(params![
|
|
||||||
new_pos.bloblog,
|
|
||||||
new_pos.offset as i64,
|
|
||||||
&replacement_row.chunk_id as &[u8]
|
|
||||||
])? == 1,
|
|
||||||
"Wrong number of rows updated for replacement!"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
drop(stmt);
|
|
||||||
txn.commit().context("committing replacements")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO fsync new bloblogs
|
|
||||||
|
|
||||||
info!("Deleting old bloblogs...");
|
|
||||||
{
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
// Lock the database right away.
|
|
||||||
let txn = inner
|
|
||||||
.connection
|
|
||||||
.transaction_with_behavior(TransactionBehavior::Exclusive)?;
|
|
||||||
|
|
||||||
for bloblog_id in plan.bloblogs_to_replace.iter().copied() {
|
|
||||||
let deleted_chunks = txn.execute(
|
|
||||||
"
|
|
||||||
DELETE FROM chunks WHERE bloblog = ?1
|
|
||||||
",
|
|
||||||
params![bloblog_id],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let deleted_deleted = txn.execute(
|
|
||||||
"
|
|
||||||
DELETE FROM deleted WHERE bloblog = ?1
|
|
||||||
",
|
|
||||||
params![bloblog_id],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
ensure!(deleted_chunks == deleted_deleted, "Undeleted chunks left in bloblog {bloblog_id}: CHUNKS={deleted_chunks} DELETED={deleted_deleted}");
|
|
||||||
|
|
||||||
let bloblog_path = self.path.join(bloblog_id.to_string());
|
|
||||||
remove_file(&bloblog_path).with_context(|| {
|
|
||||||
format!("Failed to remove obsolete bloblog: {:?}", bloblog_path)
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
|
|
||||||
txn.commit()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct BloblogStats {
|
|
||||||
pub chunks_total: u64,
|
|
||||||
pub chunks_deleted: u64,
|
|
||||||
pub bytes_total: u64,
|
|
||||||
pub bytes_deleted: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct CompactionPlan {
|
|
||||||
pub bloblogs_to_replace: BTreeSet<BloblogId>,
|
|
||||||
pub bytes_to_write: u64,
|
|
||||||
pub reclaimable_space: u64,
|
|
||||||
pub small_bloblogs: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct CompactionThresholds {
|
|
||||||
/// Minimum bytes to be reclaimable overall for compaction to be worthwhile.
|
|
||||||
pub minimum_to_reclaim: u64,
|
|
||||||
|
|
||||||
/// (alternative reason) Minimum number of files to be undersized in order for compaction
|
|
||||||
/// to be worthwhile.
|
|
||||||
/// This gives us a way to make compaction run if we have lots of tiny bloblogs.
|
|
||||||
pub minimum_small_bloblogs_to_merge: u32,
|
|
||||||
|
|
||||||
/// A bloblog will be replaced if the deallocated size is greater than this.
|
|
||||||
pub cond_if_more_deallocated_than: u64,
|
|
||||||
|
|
||||||
/// A bloblog will be replaced if the allocated size is less than this.
|
|
||||||
pub cond_if_less_allocated_than: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CompactionThresholds {
|
|
||||||
pub fn should_replace_bloblog(&self, bloblog_stats: &BloblogStats) -> bool {
|
|
||||||
let allocated = bloblog_stats.bytes_total - bloblog_stats.bytes_deleted;
|
|
||||||
// Note that this will also trigger for fully-deallocated files if
|
|
||||||
let is_small = allocated < self.cond_if_less_allocated_than;
|
|
||||||
let has_large_deallocations =
|
|
||||||
bloblog_stats.bytes_deleted > self.cond_if_more_deallocated_than;
|
|
||||||
is_small || has_large_deallocations
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct CompactionOutcome {
|
|
||||||
pub bloblogs_deleted: u32,
|
|
||||||
pub bloblogs_created: u32,
|
|
||||||
pub bytes_deleted: u32,
|
|
||||||
pub bytes_created: u32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for SqliteBloblogPile {
|
impl Drop for SqliteBloblogPile {
|
||||||
|
@ -1007,59 +613,6 @@ impl RawPile for SqliteBloblogPile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
match kind {
|
|
||||||
Keyspace::Chunk => {
|
|
||||||
let mut chunk_pointers_by_bloblog: BTreeMap<BloblogId, Vec<(u64, &[u8])>> =
|
|
||||||
BTreeMap::new();
|
|
||||||
|
|
||||||
for (chunk_pointer, chunk_id) in self
|
|
||||||
.get_chunk_pointers(keys)
|
|
||||||
.context("failed to get chunk pointers")?
|
|
||||||
.into_iter()
|
|
||||||
.zip(keys)
|
|
||||||
.filter_map(|(pointer, &chunk_id)| match pointer {
|
|
||||||
Some(pointer) => Some((pointer, chunk_id)),
|
|
||||||
None => None,
|
|
||||||
})
|
|
||||||
{
|
|
||||||
chunk_pointers_by_bloblog
|
|
||||||
.entry(chunk_pointer.bloblog)
|
|
||||||
.or_default()
|
|
||||||
.push((chunk_pointer.offset, chunk_id));
|
|
||||||
}
|
|
||||||
let mut inner = self.inner.lock().unwrap();
|
|
||||||
let txn = inner.connection.transaction()?;
|
|
||||||
{
|
|
||||||
let mut stmt = txn.prepare(
|
|
||||||
"INSERT OR IGNORE INTO deleted (bloblog, offset, size)
|
|
||||||
VALUES (?1, ?2, ?3)",
|
|
||||||
)?;
|
|
||||||
for (bloblog_id, entries) in chunk_pointers_by_bloblog {
|
|
||||||
let bloblog_mutex = self.open_bloblog(bloblog_id)?;
|
|
||||||
let mut bloblog = bloblog_mutex.lock().unwrap();
|
|
||||||
for (chunk_offset, raw_chunk_id) in entries {
|
|
||||||
let mut chunk_id: ChunkId = Default::default();
|
|
||||||
chunk_id.copy_from_slice(raw_chunk_id);
|
|
||||||
let size = bloblog.blob_len(chunk_offset, &chunk_id)?;
|
|
||||||
let offset_i64 = i64::try_from(chunk_offset)
|
|
||||||
.expect("ouch! can't turn u64 into i64...");
|
|
||||||
stmt.execute(params![bloblog_id, offset_i64, size])?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
txn.commit().context("Failed to commit chunk deletions")?;
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
for &key in keys {
|
|
||||||
self.delete(kind, key)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -1153,9 +706,7 @@ impl RawPile for SqliteBloblogPile {
|
||||||
|
|
||||||
let this = self.clone();
|
let this = self.clone();
|
||||||
|
|
||||||
thread::Builder::new()
|
thread::spawn(move || {
|
||||||
.name("SQLBloblogStPpln".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let worker_id = Arc::new(format!("bloblogwriter"));
|
let worker_id = Arc::new(format!("bloblogwriter"));
|
||||||
if let Err(err) = this.storage_pipeline_worker(incoming) {
|
if let Err(err) = this.storage_pipeline_worker(incoming) {
|
||||||
controller_send
|
controller_send
|
||||||
|
@ -1165,25 +716,10 @@ impl RawPile for SqliteBloblogPile {
|
||||||
})
|
})
|
||||||
.expect("This is BAD: failed to send failure message to controller.");
|
.expect("This is BAD: failed to send failure message to controller.");
|
||||||
}
|
}
|
||||||
})
|
});
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Ok(sender)
|
Ok(sender)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
Ok(vec![PipelineDescription::Store])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
let chunk_pointer = self
|
|
||||||
.get_chunk_pointer(chunk_id)?
|
|
||||||
.context("Can't get chunk ID transfer ordering hint for chunk without pointer.")?;
|
|
||||||
|
|
||||||
// Scheme: 24-bit bloblog ID
|
|
||||||
// followed by 40-bit offset
|
|
||||||
Ok(((chunk_pointer.bloblog as u64) << 40) | (chunk_pointer.offset & 0xFF_FF_FF_FF_FF))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct KeyIterator {
|
struct KeyIterator {
|
||||||
|
@ -1229,9 +765,8 @@ impl Iterator for KeyIterator {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use temp_dir::TempDir;
|
|
||||||
|
|
||||||
use crate::pile::local_sqlitebloblogs::Bloblog;
|
use crate::pile::local_sqlitebloblogs::Bloblog;
|
||||||
|
use temp_dir::TempDir;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn bloblog_read_write_test() {
|
pub fn bloblog_read_write_test() {
|
||||||
|
|
|
@ -22,7 +22,7 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::pile::{Keyspace, PipelineDescription};
|
use crate::pile::Keyspace;
|
||||||
|
|
||||||
pub mod requester;
|
pub mod requester;
|
||||||
pub mod responder;
|
pub mod responder;
|
||||||
|
@ -60,7 +60,6 @@ pub enum RequestBody {
|
||||||
},
|
},
|
||||||
Flush,
|
Flush,
|
||||||
LowLevelCheck,
|
LowLevelCheck,
|
||||||
Describe,
|
|
||||||
Shutdown,
|
Shutdown,
|
||||||
Progress {
|
Progress {
|
||||||
current: u64,
|
current: u64,
|
||||||
|
@ -74,7 +73,7 @@ pub struct Response {
|
||||||
body: ResponseBody,
|
body: ResponseBody,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
pub enum ResponseBody {
|
pub enum ResponseBody {
|
||||||
Success,
|
Success,
|
||||||
Failed(String),
|
Failed(String),
|
||||||
|
@ -84,7 +83,6 @@ pub enum ResponseBody {
|
||||||
batch: Vec<Vec<u8>>,
|
batch: Vec<Vec<u8>>,
|
||||||
next_token: u16,
|
next_token: u16,
|
||||||
},
|
},
|
||||||
Description(Vec<PipelineDescription>),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_message<R: Read, D: DeserializeOwned>(read: &mut R) -> anyhow::Result<D> {
|
pub fn read_message<R: Read, D: DeserializeOwned>(read: &mut R) -> anyhow::Result<D> {
|
||||||
|
|
|
@ -4,20 +4,14 @@ use std::sync::{Arc, Mutex};
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::thread::JoinHandle;
|
use std::thread::JoinHandle;
|
||||||
|
|
||||||
use anyhow::{anyhow, bail};
|
use anyhow::anyhow;
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
use log::{error, info};
|
use log::{error, info};
|
||||||
|
|
||||||
use crate::definitions::ChunkId;
|
use crate::definitions::ChunkId;
|
||||||
use crate::pile::{
|
use crate::pile::{ControllerMessage, Keyspace, RawPile, StoragePipelineSettings};
|
||||||
ControllerMessage, Keyspace, PipelineDescription, RawPile, StoragePipelineSettings,
|
|
||||||
};
|
|
||||||
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
use crate::remote::{read_message, write_message, Request, RequestBody, Response, ResponseBody};
|
||||||
use metrics::{
|
|
||||||
gauge, histogram, increment_counter, register_counter, register_gauge, register_histogram, Unit,
|
|
||||||
};
|
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
|
||||||
use std::time::Instant;
|
|
||||||
|
|
||||||
/// A kind of RawPile which can make requests to a RawPile over a pipe (e.g. TCP socket or an
|
/// A kind of RawPile which can make requests to a RawPile over a pipe (e.g. TCP socket or an
|
||||||
/// SSH connection).
|
/// SSH connection).
|
||||||
|
@ -32,13 +26,7 @@ impl Requester {
|
||||||
read: R,
|
read: R,
|
||||||
write: W,
|
write: W,
|
||||||
) -> (Self, Vec<JoinHandle<()>>) {
|
) -> (Self, Vec<JoinHandle<()>>) {
|
||||||
register_histogram!(
|
let in_flight: Arc<Mutex<HashMap<u16, Sender<ResponseBody>>>> =
|
||||||
"requester_cmd_response_time_ms",
|
|
||||||
Unit::Milliseconds,
|
|
||||||
"Time between request being issued and a response being received"
|
|
||||||
);
|
|
||||||
|
|
||||||
let in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>> =
|
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
Arc::new(Mutex::new(HashMap::new()));
|
||||||
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
|
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
|
||||||
let mut handles = Vec::new();
|
let mut handles = Vec::new();
|
||||||
|
@ -49,34 +37,22 @@ impl Requester {
|
||||||
// Spawn a reader
|
// Spawn a reader
|
||||||
let in_flight = in_flight.clone();
|
let in_flight = in_flight.clone();
|
||||||
let shutdown_signal = shutdown_signal.clone();
|
let shutdown_signal = shutdown_signal.clone();
|
||||||
handles.push(
|
handles.push(thread::spawn(move || {
|
||||||
thread::Builder::new()
|
|
||||||
.name("ReqstrReader".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
|
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
|
||||||
error!("reader failed: {:?}", e);
|
error!("reader failed: {:?}", e);
|
||||||
}
|
}
|
||||||
})
|
}));
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Spawn a writer
|
// Spawn a writer
|
||||||
let in_flight = in_flight.clone();
|
let in_flight = in_flight.clone();
|
||||||
let command_receiver = command_receiver.clone();
|
let command_receiver = command_receiver.clone();
|
||||||
handles.push(
|
handles.push(thread::spawn(move || {
|
||||||
thread::Builder::new()
|
if let Err(e) = Self::writer(write, in_flight, command_receiver, shutdown_signal) {
|
||||||
.name("ReqstrWriter".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
if let Err(e) =
|
|
||||||
Self::writer(write, in_flight, command_receiver, shutdown_signal)
|
|
||||||
{
|
|
||||||
error!("writer failed: {:?}", e);
|
error!("writer failed: {:?}", e);
|
||||||
}
|
}
|
||||||
})
|
}));
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -88,7 +64,7 @@ impl Requester {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_from_stdio() -> (Self, Vec<JoinHandle<()>>) {
|
pub fn new_from_stdio() -> (Self, Vec<JoinHandle<()>>) {
|
||||||
let in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>> =
|
let in_flight: Arc<Mutex<HashMap<u16, Sender<ResponseBody>>>> =
|
||||||
Arc::new(Mutex::new(HashMap::new()));
|
Arc::new(Mutex::new(HashMap::new()));
|
||||||
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
|
let (command_sender, command_receiver) = crossbeam_channel::bounded(16);
|
||||||
let mut handles = Vec::new();
|
let mut handles = Vec::new();
|
||||||
|
@ -99,38 +75,26 @@ impl Requester {
|
||||||
// Spawn a reader
|
// Spawn a reader
|
||||||
let in_flight = in_flight.clone();
|
let in_flight = in_flight.clone();
|
||||||
let shutdown_signal = shutdown_signal.clone();
|
let shutdown_signal = shutdown_signal.clone();
|
||||||
handles.push(
|
handles.push(thread::spawn(move || {
|
||||||
thread::Builder::new()
|
|
||||||
.name("ReqstrReaderSI".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let stdin = stdin();
|
let stdin = stdin();
|
||||||
let read = stdin.lock();
|
let read = stdin.lock();
|
||||||
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
|
if let Err(e) = Self::reader(read, in_flight, shutdown_signal) {
|
||||||
error!("reader failed: {:?}", e);
|
error!("reader failed: {:?}", e);
|
||||||
}
|
}
|
||||||
})
|
}));
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Spawn a writer
|
// Spawn a writer
|
||||||
let in_flight = in_flight.clone();
|
let in_flight = in_flight.clone();
|
||||||
let command_receiver = command_receiver.clone();
|
let command_receiver = command_receiver.clone();
|
||||||
handles.push(
|
handles.push(thread::spawn(move || {
|
||||||
thread::Builder::new()
|
|
||||||
.name("ReqstrWriterSO".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let stdout = stdout();
|
let stdout = stdout();
|
||||||
let write = stdout.lock();
|
let write = stdout.lock();
|
||||||
if let Err(e) =
|
if let Err(e) = Self::writer(write, in_flight, command_receiver, shutdown_signal) {
|
||||||
Self::writer(write, in_flight, command_receiver, shutdown_signal)
|
|
||||||
{
|
|
||||||
error!("writer failed: {:?}", e);
|
error!("writer failed: {:?}", e);
|
||||||
}
|
}
|
||||||
})
|
}));
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -148,7 +112,7 @@ impl Requester {
|
||||||
/// Thread that reads messages and sends them along.
|
/// Thread that reads messages and sends them along.
|
||||||
fn reader<R: Read>(
|
fn reader<R: Read>(
|
||||||
mut read: R,
|
mut read: R,
|
||||||
in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>>,
|
in_flight: Arc<Mutex<HashMap<u16, Sender<ResponseBody>>>>,
|
||||||
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
|
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
loop {
|
loop {
|
||||||
|
@ -160,18 +124,9 @@ impl Requester {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut map = in_flight.lock().or(Err(anyhow!("Mutex poisoned")))?;
|
let map = in_flight.lock().or(Err(anyhow!("Mutex poisoned")))?;
|
||||||
|
map.get(&response.response_to)
|
||||||
// We free up the ID as we get the sender out of the map.
|
.ok_or(anyhow!("Didn't find response channel..."))?
|
||||||
let (resp_sender, req_instant) = map
|
|
||||||
.remove(&response.response_to)
|
|
||||||
.ok_or(anyhow!("Didn't find response channel..."))?;
|
|
||||||
|
|
||||||
let req_resp_time_in_millis =
|
|
||||||
Instant::now().duration_since(req_instant).as_millis() as f64;
|
|
||||||
histogram!("requester_cmd_response_time_ms", req_resp_time_in_millis);
|
|
||||||
|
|
||||||
resp_sender
|
|
||||||
.send(response.body)
|
.send(response.body)
|
||||||
.or(Err(anyhow!("Failed to send response to channel")))?;
|
.or(Err(anyhow!("Failed to send response to channel")))?;
|
||||||
}
|
}
|
||||||
|
@ -180,7 +135,7 @@ impl Requester {
|
||||||
/// Thread that writes messages.
|
/// Thread that writes messages.
|
||||||
fn writer<W: Write>(
|
fn writer<W: Write>(
|
||||||
mut write: W,
|
mut write: W,
|
||||||
in_flight: Arc<Mutex<HashMap<u16, (Sender<ResponseBody>, Instant)>>>,
|
in_flight: Arc<Mutex<HashMap<u16, Sender<ResponseBody>>>>,
|
||||||
command_receiver: Receiver<(RequestBody, Option<Sender<ResponseBody>>)>,
|
command_receiver: Receiver<(RequestBody, Option<Sender<ResponseBody>>)>,
|
||||||
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
|
shutdown_request_channel: Arc<(AtomicU16, AtomicBool)>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
@ -191,8 +146,7 @@ impl Requester {
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.find(|id| !map.contains_key(&id))
|
.find(|id| !map.contains_key(&id))
|
||||||
.expect("No ID found");
|
.expect("No ID found");
|
||||||
let now = Instant::now();
|
map.insert(request_id, response_channel);
|
||||||
map.insert(request_id, (response_channel, now));
|
|
||||||
request_id
|
request_id
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
|
@ -271,7 +225,8 @@ impl RawPile for Requester {
|
||||||
ResponseBody::Success => Ok(true),
|
ResponseBody::Success => Ok(true),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
ResponseBody::NotExists => Ok(false),
|
ResponseBody::NotExists => Ok(false),
|
||||||
other => Err(anyhow!("Received {:?} for Exists", other)),
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for exists.")),
|
||||||
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for exists.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
|
fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>> {
|
||||||
|
@ -283,7 +238,7 @@ impl RawPile for Requester {
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
ResponseBody::NotExists => Ok(None),
|
ResponseBody::NotExists => Ok(None),
|
||||||
ResponseBody::Data(data) => Ok(Some(data)),
|
ResponseBody::Data(data) => Ok(Some(data)),
|
||||||
other => Err(anyhow!("Received {:?} for Read", other)),
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for read.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
|
fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()> {
|
||||||
|
@ -294,7 +249,9 @@ impl RawPile for Requester {
|
||||||
})? {
|
})? {
|
||||||
ResponseBody::Success => Ok(()),
|
ResponseBody::Success => Ok(()),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
other => Err(anyhow!("Received {:?} for Write", other)),
|
ResponseBody::NotExists => Err(anyhow!("Received NotExists for write.")),
|
||||||
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for write.")),
|
||||||
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for write.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
|
||||||
|
@ -304,15 +261,11 @@ impl RawPile for Requester {
|
||||||
})? {
|
})? {
|
||||||
ResponseBody::Success => Ok(()),
|
ResponseBody::Success => Ok(()),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
other => Err(anyhow!("Received {:?} for Delete", other)),
|
ResponseBody::NotExists => Err(anyhow!("Received NotExists for delete.")),
|
||||||
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for delete.")),
|
||||||
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for delete.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
|
|
||||||
for &key in keys {
|
|
||||||
self.delete(kind, key)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn list_keys(
|
fn list_keys(
|
||||||
&self,
|
&self,
|
||||||
kind: Keyspace,
|
kind: Keyspace,
|
||||||
|
@ -324,33 +277,38 @@ impl RawPile for Requester {
|
||||||
buffer: Vec::with_capacity(0),
|
buffer: Vec::with_capacity(0),
|
||||||
})),
|
})),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
|
ResponseBody::NotExists => Err(anyhow!("Received NotExists for list_keys.")),
|
||||||
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for list_keys.")),
|
||||||
ResponseBody::BatchData { batch, next_token } => Ok(Box::new(ListKeyIterator {
|
ResponseBody::BatchData { batch, next_token } => Ok(Box::new(ListKeyIterator {
|
||||||
command_sender: self.commands.clone(),
|
command_sender: self.commands.clone(),
|
||||||
batch_token: Some(next_token),
|
batch_token: Some(next_token),
|
||||||
buffer: batch,
|
buffer: batch,
|
||||||
})),
|
})),
|
||||||
other => Err(anyhow!("Received {:?} for List", other)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn flush(&self) -> anyhow::Result<()> {
|
fn flush(&self) -> anyhow::Result<()> {
|
||||||
match self.request(RequestBody::Flush)? {
|
match self.request(RequestBody::Flush)? {
|
||||||
ResponseBody::Success => Ok(()),
|
ResponseBody::Success => Ok(()),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
other => Err(anyhow!("Received {:?} for Flush", other)),
|
ResponseBody::NotExists => Err(anyhow!("Received NotExists for Flush.")),
|
||||||
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for Flush.")),
|
||||||
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for Flush.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
fn check_lowlevel(&self) -> anyhow::Result<bool> {
|
||||||
match self.request(RequestBody::LowLevelCheck)? {
|
match self.request(RequestBody::LowLevelCheck)? {
|
||||||
ResponseBody::Success => Ok(true),
|
ResponseBody::Success => Ok(true),
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
||||||
other => Err(anyhow!("Received {:?} for LowLevelCheck", other)),
|
ResponseBody::NotExists => Err(anyhow!("Received NotExists for LowLevelCheck.")),
|
||||||
|
ResponseBody::Data(_) => Err(anyhow!("Received Data for LowLevelCheck.")),
|
||||||
|
ResponseBody::BatchData { .. } => Err(anyhow!("Received BatchData for LowLevelCheck.")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_storage_pipeline(
|
fn build_storage_pipeline(
|
||||||
&self,
|
&self,
|
||||||
_settings: StoragePipelineSettings,
|
settings: StoragePipelineSettings,
|
||||||
_controller_send: Sender<ControllerMessage>,
|
controller_send: Sender<ControllerMessage>,
|
||||||
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>> {
|
||||||
// this one is a little bit more complex.
|
// this one is a little bit more complex.
|
||||||
// We want to be able to send off multiple write requests at once, but not too many, so we
|
// We want to be able to send off multiple write requests at once, but not too many, so we
|
||||||
|
@ -358,30 +316,13 @@ impl RawPile for Requester {
|
||||||
let (input, receiver) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(128);
|
let (input, receiver) = crossbeam_channel::bounded::<(ChunkId, Vec<u8>)>(128);
|
||||||
let command_sender = self.commands.clone();
|
let command_sender = self.commands.clone();
|
||||||
|
|
||||||
register_counter!(
|
std::thread::spawn(move || {
|
||||||
"requester_pipeline_cmds_issued",
|
|
||||||
Unit::Count,
|
|
||||||
"Number of write commands issued by the Requester's storage pipeline"
|
|
||||||
);
|
|
||||||
register_gauge!(
|
|
||||||
"requester_pipeline_writes_inflight",
|
|
||||||
Unit::Count,
|
|
||||||
"Number of write commands in-flight"
|
|
||||||
);
|
|
||||||
|
|
||||||
std::thread::Builder::new()
|
|
||||||
.name("ReqStPpln".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let (response_tx, response_rx) = crossbeam_channel::bounded::<ResponseBody>(32);
|
let (response_tx, response_rx) = crossbeam_channel::bounded::<ResponseBody>(32);
|
||||||
let mut in_flight_writes = 0;
|
let mut in_flight_writes = 0;
|
||||||
const MAX_IN_FLIGHT_WRITES: u32 = 32;
|
const MAX_IN_FLIGHT_WRITES: u32 = 32;
|
||||||
let mut pipeline_still_going = true;
|
let mut pipeline_still_going = true;
|
||||||
|
|
||||||
while pipeline_still_going || in_flight_writes > 0 {
|
while pipeline_still_going || in_flight_writes > 0 {
|
||||||
gauge!(
|
|
||||||
"requester_pipeline_writes_inflight",
|
|
||||||
in_flight_writes as f64
|
|
||||||
);
|
|
||||||
// TODO this won't handle channel closure properly.
|
// TODO this won't handle channel closure properly.
|
||||||
if in_flight_writes < MAX_IN_FLIGHT_WRITES && pipeline_still_going {
|
if in_flight_writes < MAX_IN_FLIGHT_WRITES && pipeline_still_going {
|
||||||
crossbeam_channel::select! {
|
crossbeam_channel::select! {
|
||||||
|
@ -394,13 +335,20 @@ impl RawPile for Requester {
|
||||||
ResponseBody::Failed(string) => {
|
ResponseBody::Failed(string) => {
|
||||||
panic!("Requester pipeline fail {}", string);
|
panic!("Requester pipeline fail {}", string);
|
||||||
}
|
}
|
||||||
other => panic!("wtf {:?}", other),
|
ResponseBody::BatchData { .. } => {
|
||||||
|
panic!("wtf BatchData");
|
||||||
|
}
|
||||||
|
ResponseBody::NotExists => {
|
||||||
|
panic!("wtf NotExists");
|
||||||
|
}
|
||||||
|
ResponseBody::Data(_) => {
|
||||||
|
panic!("wtf Data");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
recv(receiver) -> resp => {
|
recv(receiver) -> resp => {
|
||||||
if let Ok((chunk_id, write)) = resp {
|
if let Ok((chunk_id, write)) = resp {
|
||||||
in_flight_writes += 1;
|
in_flight_writes += 1;
|
||||||
increment_counter!("requester_pipeline_cmds_issued");
|
|
||||||
command_sender.send((RequestBody::Write {
|
command_sender.send((RequestBody::Write {
|
||||||
kind: Keyspace::Chunk,
|
kind: Keyspace::Chunk,
|
||||||
key: chunk_id.to_vec(),
|
key: chunk_id.to_vec(),
|
||||||
|
@ -416,7 +364,6 @@ impl RawPile for Requester {
|
||||||
// Either the pipeline is stopping or we are too busy to accept new chunks,
|
// Either the pipeline is stopping or we are too busy to accept new chunks,
|
||||||
// so only process responses.
|
// so only process responses.
|
||||||
let resp = response_rx.recv().unwrap();
|
let resp = response_rx.recv().unwrap();
|
||||||
in_flight_writes -= 1;
|
|
||||||
match resp {
|
match resp {
|
||||||
ResponseBody::Success => {
|
ResponseBody::Success => {
|
||||||
// nop
|
// nop
|
||||||
|
@ -424,30 +371,22 @@ impl RawPile for Requester {
|
||||||
ResponseBody::Failed(string) => {
|
ResponseBody::Failed(string) => {
|
||||||
panic!("Requester pipeline fail {}", string);
|
panic!("Requester pipeline fail {}", string);
|
||||||
}
|
}
|
||||||
other => panic!("wtf {:?}", other),
|
ResponseBody::BatchData { .. } => {
|
||||||
|
panic!("wtf BatchData");
|
||||||
|
}
|
||||||
|
ResponseBody::NotExists => {
|
||||||
|
panic!("wtf NotExists");
|
||||||
|
}
|
||||||
|
ResponseBody::Data(_) => {
|
||||||
|
panic!("wtf Data");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
.unwrap();
|
});
|
||||||
|
|
||||||
Ok(input)
|
Ok(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
|
|
||||||
match self.request(RequestBody::Describe)? {
|
|
||||||
ResponseBody::Description(mut description) => {
|
|
||||||
description.push(PipelineDescription::Remote);
|
|
||||||
Ok(description)
|
|
||||||
}
|
|
||||||
ResponseBody::Failed(err_msg) => Err(anyhow!("Remote failure: {}", err_msg)),
|
|
||||||
other => Err(anyhow!("Received {:?} for Describe", other)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunk_id_transfer_ordering_hint(&self, _chunk_id: &ChunkId) -> anyhow::Result<u64> {
|
|
||||||
bail!("You probably shouldn't be using chunk ID transfer ordering hints with a remote.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ListKeyIterator {
|
pub struct ListKeyIterator {
|
||||||
|
@ -475,6 +414,8 @@ impl Iterator for ListKeyIterator {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
ResponseBody::Failed(err_msg) => Some(Err(anyhow!("Remote failure: {}", err_msg))),
|
ResponseBody::Failed(err_msg) => Some(Err(anyhow!("Remote failure: {}", err_msg))),
|
||||||
|
ResponseBody::NotExists => Some(Err(anyhow!("Received NotExists for NextBatch."))),
|
||||||
|
ResponseBody::Data(_) => Some(Err(anyhow!("Received Data for NextBatch."))),
|
||||||
ResponseBody::BatchData { batch, next_token } => {
|
ResponseBody::BatchData { batch, next_token } => {
|
||||||
self.batch_token = Some(next_token);
|
self.batch_token = Some(next_token);
|
||||||
self.buffer = batch;
|
self.buffer = batch;
|
||||||
|
@ -486,7 +427,6 @@ impl Iterator for ListKeyIterator {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
other => Some(Err(anyhow!("Received {:?} for NextBatch", other))),
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|
|
@ -54,33 +54,26 @@ impl Responder {
|
||||||
// spawn the reader
|
// spawn the reader
|
||||||
let work_queue_send = work_queue_send.clone();
|
let work_queue_send = work_queue_send.clone();
|
||||||
let responder = responder.clone();
|
let responder = responder.clone();
|
||||||
thread::Builder::new()
|
thread::spawn(move || {
|
||||||
.name("RespdrReader".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let mut read = read;
|
let mut read = read;
|
||||||
if let Err(e) = responder.reader(&mut read, work_queue_send, &mut progress_bar)
|
if let Err(e) = responder.reader(&mut read, work_queue_send, &mut progress_bar) {
|
||||||
{
|
|
||||||
error!("reader failed: {:?}", e);
|
error!("reader failed: {:?}", e);
|
||||||
}
|
}
|
||||||
read
|
read
|
||||||
})
|
})
|
||||||
.unwrap()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let w_handle = {
|
let w_handle = {
|
||||||
// spawn the writer
|
// spawn the writer
|
||||||
let resp_recv = resp_recv.clone();
|
let resp_recv = resp_recv.clone();
|
||||||
let responder = responder.clone();
|
let responder = responder.clone();
|
||||||
thread::Builder::new()
|
thread::spawn(move || {
|
||||||
.name("RespdrWriter".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
let mut write = write;
|
let mut write = write;
|
||||||
if let Err(e) = responder.writer(&mut write, resp_recv) {
|
if let Err(e) = responder.writer(&mut write, resp_recv) {
|
||||||
error!("writer failed: {:?}", e);
|
error!("writer failed: {:?}", e);
|
||||||
}
|
}
|
||||||
write
|
write
|
||||||
})
|
})
|
||||||
.unwrap()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
for worker_num in 0..num_workers {
|
for worker_num in 0..num_workers {
|
||||||
|
@ -89,17 +82,11 @@ impl Responder {
|
||||||
let work_queue_recv = work_queue_recv.clone();
|
let work_queue_recv = work_queue_recv.clone();
|
||||||
let resp_send = resp_send.clone();
|
let resp_send = resp_send.clone();
|
||||||
let pile = pile.clone();
|
let pile = pile.clone();
|
||||||
handles.push(
|
handles.push(thread::spawn(move || {
|
||||||
thread::Builder::new()
|
if let Err(e) = responder.worker(pile.as_ref(), work_queue_recv, resp_send) {
|
||||||
.name("RespdrWorker".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
if let Err(e) = responder.worker(pile.as_ref(), work_queue_recv, resp_send)
|
|
||||||
{
|
|
||||||
error!("worker {} failed: {:?}", worker_num, e);
|
error!("worker {} failed: {:?}", worker_num, e);
|
||||||
}
|
}
|
||||||
})
|
}));
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(r_handle, w_handle, handles)
|
(r_handle, w_handle, handles)
|
||||||
|
@ -262,7 +249,7 @@ impl Responder {
|
||||||
.continuation_tokens
|
.continuation_tokens
|
||||||
.lock()
|
.lock()
|
||||||
.or(Err(anyhow!("Mutex poisoned")))?;
|
.or(Err(anyhow!("Mutex poisoned")))?;
|
||||||
let batch_token = (0u16..u16::MAX)
|
let batch_token = (0u16..u16::max_value())
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.find(|id| !map.contains_key(&id))
|
.find(|id| !map.contains_key(&id))
|
||||||
.expect("No ID found");
|
.expect("No ID found");
|
||||||
|
@ -349,20 +336,6 @@ impl Responder {
|
||||||
RequestBody::Progress { .. } => {
|
RequestBody::Progress { .. } => {
|
||||||
unreachable!("handled by readea")
|
unreachable!("handled by readea")
|
||||||
}
|
}
|
||||||
RequestBody::Describe => match pile.describe_pipeline() {
|
|
||||||
Ok(description) => Response {
|
|
||||||
response_to: request.id,
|
|
||||||
body: ResponseBody::Description(description),
|
|
||||||
},
|
|
||||||
Err(err) => {
|
|
||||||
warn!("Error whilst doing a raw describe_pipeline: {:?}", err);
|
|
||||||
let err = format!("{:?}", err);
|
|
||||||
Response {
|
|
||||||
response_to: request.id,
|
|
||||||
body: ResponseBody::Failed(err),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
responses
|
responses
|
||||||
|
|
|
@ -185,7 +185,7 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow
|
||||||
/// result is in-place.
|
/// result is in-place.
|
||||||
///
|
///
|
||||||
/// Preconditions:
|
/// Preconditions:
|
||||||
/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
|
/// - `old` must be an integrated pointer.
|
||||||
/// - `old` is the parent of `new`
|
/// - `old` is the parent of `new`
|
||||||
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
|
pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
|
||||||
if let TreeNode::Directory { children, .. } = new {
|
if let TreeNode::Directory { children, .. } = new {
|
||||||
|
|
|
@ -15,7 +15,6 @@ You should have received a copy of the GNU General Public License
|
||||||
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
along with Yama. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
pub fn bytes_to_hexstring(chunkid: &[u8]) -> String {
|
pub fn bytes_to_hexstring(chunkid: &[u8]) -> String {
|
||||||
|
@ -43,98 +42,3 @@ pub fn get_number_of_workers(first_try_env_name: &str) -> u8 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct LruMap<K, V> {
|
|
||||||
capacity: usize,
|
|
||||||
last_access: BTreeSet<(u64, K)>,
|
|
||||||
items: BTreeMap<K, (V, u64)>,
|
|
||||||
counter: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K: Ord + Clone, V> LruMap<K, V> {
|
|
||||||
pub fn new(capacity: usize) -> LruMap<K, V> {
|
|
||||||
LruMap {
|
|
||||||
capacity,
|
|
||||||
last_access: BTreeSet::new(),
|
|
||||||
items: BTreeMap::new(),
|
|
||||||
counter: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets an item from the LRU map.
|
|
||||||
pub fn get(&mut self, key: &K) -> Option<&V> {
|
|
||||||
match self.items.get_mut(key) {
|
|
||||||
Some((value, last_used_instant)) => {
|
|
||||||
assert!(
|
|
||||||
self.last_access.remove(&(*last_used_instant, key.clone())),
|
|
||||||
"Corrupt LRU map: freshen not correct."
|
|
||||||
);
|
|
||||||
let new_instant = self.counter;
|
|
||||||
self.counter += 1;
|
|
||||||
self.last_access.insert((new_instant, key.clone()));
|
|
||||||
*last_used_instant = new_instant;
|
|
||||||
Some(value)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(&mut self, key: K, value: V) -> Option<V> {
|
|
||||||
let new_instant = self.counter;
|
|
||||||
self.counter += 1;
|
|
||||||
|
|
||||||
let retval = match self.items.insert(key.clone(), (value, new_instant)) {
|
|
||||||
Some((old_entry, old_instant)) => {
|
|
||||||
assert!(
|
|
||||||
self.last_access.remove(&(old_instant, key.clone())),
|
|
||||||
"Corrupt LRU map: insert not correct."
|
|
||||||
);
|
|
||||||
Some(old_entry)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
self.last_access.insert((new_instant, key));
|
|
||||||
|
|
||||||
if retval.is_none() {
|
|
||||||
// We didn't replace any item, so we have grown by 1.
|
|
||||||
// Check if we need to evict.
|
|
||||||
if self.items.len() > self.capacity {
|
|
||||||
self.evict();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
retval
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn evict(&mut self) -> Option<(K, V)> {
|
|
||||||
if let Some(first_entry) = self.last_access.iter().next().cloned() {
|
|
||||||
self.last_access.remove(&first_entry);
|
|
||||||
let (_, key) = first_entry;
|
|
||||||
let (value, _) = self
|
|
||||||
.items
|
|
||||||
.remove(&key)
|
|
||||||
.expect("Corrupt LRU map: last access and items out of sync");
|
|
||||||
|
|
||||||
Some((key, value))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use crate::utils::LruMap;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_lru_map() {
|
|
||||||
let mut lmap = LruMap::new(3);
|
|
||||||
lmap.insert(1, 1);
|
|
||||||
lmap.insert(2, 1);
|
|
||||||
lmap.insert(3, 1);
|
|
||||||
assert_eq!(lmap.get(&1), Some(&1));
|
|
||||||
lmap.insert(4, 1);
|
|
||||||
assert_eq!(lmap.get(&2), None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue