Update flake and fix it

Simplify flake lock
Batch up chunk deletions in an attempt to make vacuuming more performant
2024-05-08 20:41:28 +01:00 · 2023-04-01 16:57:04 +01:00 · 2022-11-28 21:03:07 +00:00 · 2022-11-21 21:23:38 +00:00 · 2022-11-20 22:11:05 +00:00 · 2022-11-20 20:58:45 +00:00
51 changed files with 2604 additions and 517 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1,2 @@
 use nix
--- a/.woodpecker/build.yml
+++ b/.woodpecker/build.yml
@ -5,28 +5,19 @@ platform: linux/amd64
 pipeline:
  unitTests:
-    image: "docker.bics.ga/rei_ci/rust-sccache:latest-amd64"
+    image: "rust:1.65.0"
    pull: true
    commands:
      - DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
      - DEBIAN_FRONTEND=noninteractive apt-get -yqq install pkg-config libssl-dev build-essential libsqlite3-dev > /dev/null
      - cargo build --all
      - cargo test --all
      - sccache --show-stats
    environment:
      RUSTC_WRAPPER: /usr/local/bin/sccache
      SCCACHE_S3_USE_SSL: "true"
      SCCACHE_ENDPOINT: "richie.m4.tanukitsu.net:443"
    secrets:
      - sccache_bucket
      - aws_access_key_id
      - aws_secret_access_key
    when:
      event: [push, pull_request]
  testSuite:
-    image: "docker.bics.ga/rei_ci/rust-sccache:latest-amd64"
+    image: "rust:1.65.0"
    commands:
      - DEBIAN_FRONTEND=noninteractive apt-get -qq update > /dev/null
      - DEBIAN_FRONTEND=noninteractive apt-get -yqq -o=Dpkg::Use-Pty=0 install pkg-config libssl-dev build-essential libsqlite3-dev python3.9 python3.9-venv postgresql postgresql-client mariadb-server mariadb-client zstd lz4 > /dev/null
@ -39,22 +30,13 @@ pipeline:
      - cargo install -q --path yama
      - cargo install -q --path datman
      - python3.9 -m venv testsuite/.venv
-      - ./testsuite/.venv/bin/pip install -e testsuite -e datman-helper-postgres -e datman-helper-mysql
+      - ./testsuite/.venv/bin/pip install ./testsuite ./datman-helper-postgres ./datman-helper-mysql
      - cd testsuite && . .venv/bin/activate && TEST_POSTGRES=$(hostname),testsuitedb,root TEST_MYSQL=$(hostname),testsuitemydb,root green
      - sccache --show-stats
    environment:
      RUSTC_WRAPPER: /usr/local/bin/sccache
      SCCACHE_S3_USE_SSL: "true"
      SCCACHE_ENDPOINT: "richie.m4.tanukitsu.net:443"
    secrets:
      - sccache_bucket
      - aws_access_key_id
      - aws_secret_access_key
    when:
      event: [push, pull_request]
  deployManual:
-    image: "docker.bics.ga/rei_ci/mdbook:latest-amd64"
+    image: "docker.emunest.net/rei_ci/mdbook:latest-amd64"
    pull: true
    when:
      branch:
--- a/Cargo.lock
+++ b/Cargo.lock
@ -43,6 +43,12 @@ dependencies = [
 "memchr",
 ]
 [[package]]
 name = "ambient-authority"
 version = "0.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec8ad6edb4840b78c5c3d88de606b22252d552b55f3a4699fbb10fc070ec3049"
 [[package]]
 name = "anyhow"
 version = "1.0.57"
@ -135,6 +141,50 @@ version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 [[package]]
 name = "cap-fs-ext"
 version = "0.24.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e54b86398b5852ddd45784b1d9b196b98beb39171821bad4b8b44534a1e87927"
 dependencies = [
 "cap-primitives",
 "cap-std",
 "io-lifetimes",
 "winapi",
 ]
 [[package]]
 name = "cap-primitives"
 version = "0.24.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fb8fca3e81fae1d91a36e9784ca22a39ef623702b5f7904d89dc31f10184a178"
 dependencies = [
 "ambient-authority",
 "errno",
 "fs-set-times",
 "io-extras",
 "io-lifetimes",
 "ipnet",
 "maybe-owned",
 "rustix",
 "winapi",
 "winapi-util",
 "winx",
 ]
 [[package]]
 name = "cap-std"
 version = "0.24.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2247568946095c7765ad2b441a56caffc08027734c634a6d5edda648f04e32eb"
 dependencies = [
 "cap-primitives",
 "io-extras",
 "io-lifetimes",
 "ipnet",
 "rustix",
 ]
 [[package]]
 name = "cc"
 version = "1.0.73"
@ -171,16 +221,16 @@ dependencies = [
 [[package]]
 name = "clap"
-version = "3.1.18"
+version = "3.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b"
+checksum = "1df386a2d0f35bdefc0642fd8bcb2cd28243959f028abfd22fbade6f7d30980e"
 dependencies = [
 "atty",
 "bitflags",
 "clap_derive",
 "clap_lex",
 "indexmap",
- "lazy_static",
+ "once_cell",
 "strsim",
 "termcolor",
 "textwrap",
@ -188,11 +238,11 @@ dependencies = [
 [[package]]
 name = "clap_derive"
-version = "3.1.18"
+version = "3.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c"
+checksum = "7b740354ad9fcf20e27b46d921be4bb3712f5b3c2c7a89ba68a72a8e51d3a47f"
 dependencies = [
- "heck 0.4.0",
+ "heck",
 "proc-macro-error",
 "proc-macro2",
 "quote",
@ -201,18 +251,18 @@ dependencies = [
 [[package]]
 name = "clap_lex"
-version = "0.2.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
+checksum = "5538cd660450ebeb4234cfecf8f2284b844ffc4c50531e66d584ad5b91293613"
 dependencies = [
 "os_str_bytes",
 ]
 [[package]]
 name = "comfy-table"
-version = "6.0.0-rc.1"
+version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a1a275e66c69adb0600a13650aed718c99337d9a185d353efa13ff1e05576c4"
+checksum = "121d8a5b0346092c18a4b2fd6f620d7a06f0eb7ac0a45860939a0884bc579c56"
 dependencies = [
 "crossterm",
 "strum",
@ -354,7 +404,7 @@ dependencies = [
 [[package]]
 name = "datman"
-version = "0.6.0-alpha.1"
+version = "0.6.0-alpha.5"
 dependencies = [
 "anyhow",
 "arc-interner",
@ -369,6 +419,7 @@ dependencies = [
 "hostname",
 "humansize",
 "indicatif",
 "io-streams",
 "itertools 0.10.3",
 "libc",
 "log",
@ -414,6 +465,12 @@ dependencies = [
 "winapi",
 ]
 [[package]]
 name = "duplex"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1178dea852a9bad9cb4b8e9111fbf0379a4c288e5cf4e14c36c87b85fe0cbbfa"
 [[package]]
 name = "ed25519"
 version = "1.5.2"
@ -448,6 +505,27 @@ dependencies = [
 "termcolor",
 ]
 [[package]]
 name = "errno"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
 dependencies = [
 "errno-dragonfly",
 "libc",
 "winapi",
 ]
 [[package]]
 name = "errno-dragonfly"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
 dependencies = [
 "cc",
 "libc",
 ]
 [[package]]
 name = "fallible-iterator"
 version = "0.2.0"
@ -476,6 +554,17 @@ dependencies = [
 "miniz_oxide",
 ]
 [[package]]
 name = "fs-set-times"
 version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7df62ee66ee2d532ea8d567b5a3f0d03ecd64636b98bad5be1e93dcc918b92aa"
 dependencies = [
 "io-lifetimes",
 "rustix",
 "winapi",
 ]
 [[package]]
 name = "fs2"
 version = "0.4.3"
@ -497,13 +586,13 @@ dependencies = [
 [[package]]
 name = "getrandom"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
+checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
 dependencies = [
 "cfg-if 1.0.0",
 "libc",
- "wasi 0.10.0+wasi-snapshot-preview1",
+ "wasi 0.11.0+wasi-snapshot-preview1",
 ]
 [[package]]
@ -550,15 +639,6 @@ dependencies = [
 "num-traits",
 ]
 [[package]]
 name = "heck"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
 dependencies = [
 "unicode-segmentation",
 ]
 [[package]]
 name = "heck"
 version = "0.4.0"
@ -622,6 +702,48 @@ dependencies = [
 "regex",
 ]
 [[package]]
 name = "io-extras"
 version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0c937cc9891c12eaa8c63ad347e4a288364b1328b924886970b47a14ab8f8f8"
 dependencies = [
 "io-lifetimes",
 "os_pipe",
 "winapi",
 ]
 [[package]]
 name = "io-lifetimes"
 version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec58677acfea8a15352d42fc87d11d63596ade9239e0a7c9352914417515dbe6"
 dependencies = [
 "os_pipe",
 ]
 [[package]]
 name = "io-streams"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fba6685e8e5efa7bd0ce8c4e92ac113b3b059b2e18bd1bf51f7cfab0f61b4b19"
 dependencies = [
 "duplex",
 "io-extras",
 "io-lifetimes",
 "memchr",
 "os_pipe",
 "parking",
 "rustix",
 "system-interface",
 ]
 [[package]]
 name = "ipnet"
 version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b"
 [[package]]
 name = "itertools"
 version = "0.9.0"
@ -689,6 +811,12 @@ dependencies = [
 "vcpkg",
 ]
 [[package]]
 name = "linux-raw-sys"
 version = "0.0.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5284f00d480e1c39af34e72f8ad60b94f47007e3481cd3b731c1d67190ddc7b7"
 [[package]]
 name = "lock_api"
 version = "0.4.7"
@ -714,6 +842,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
 [[package]]
 name = "maybe-owned"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4"
 [[package]]
 name = "memchr"
 version = "2.5.0"
@ -861,6 +995,16 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225"
 [[package]]
 name = "os_pipe"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2c92f2b54f081d635c77e7120862d48db8e91f7f21cef23ab1b4fe9971c59f55"
 dependencies = [
 "libc",
 "winapi",
 ]
 [[package]]
 name = "os_str_bytes"
 version = "6.1.0"
@ -868,10 +1012,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
 [[package]]
-name = "parking_lot"
+name = "parking"
-version = "0.12.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58"
+checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
 [[package]]
 name = "parking_lot"
 version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
 dependencies = [
 "lock_api",
 "parking_lot_core",
@ -1071,6 +1221,22 @@ dependencies = [
 "smallvec",
 ]
 [[package]]
 name = "rustix"
 version = "0.33.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "938a344304321a9da4973b9ff4f9f8db9caf4597dfd9dda6a60b523340a0fff0"
 dependencies = [
 "bitflags",
 "errno",
 "io-lifetimes",
 "itoa",
 "libc",
 "linux-raw-sys",
 "once_cell",
 "winapi",
 ]
 [[package]]
 name = "rustversion"
 version = "1.0.6"
@ -1210,6 +1376,19 @@ version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
 [[package]]
 name = "socketpair"
 version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f0c0b3fc17356799222affc5a40345b7cc25b548c489c5a31eca0888ee2404c"
 dependencies = [
 "io-extras",
 "io-lifetimes",
 "rustix",
 "uuid",
 "winapi",
 ]
 [[package]]
 name = "sodiumoxide"
 version = "0.2.7"
@ -1236,17 +1415,17 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 [[package]]
 name = "strum"
-version = "0.23.0"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
+checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
 [[package]]
 name = "strum_macros"
-version = "0.23.1"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
+checksum = "9550962e7cf70d9980392878dfaf1dcc3ece024f4cf3bf3c46b978d0bad61d6c"
 dependencies = [
- "heck 0.3.3",
+ "heck",
 "proc-macro2",
 "quote",
 "rustversion",
@ -1255,15 +1434,32 @@ dependencies = [
 [[package]]
 name = "syn"
-version = "1.0.95"
+version = "1.0.96"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942"
+checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
 dependencies = [
 "proc-macro2",
 "quote",
 "unicode-ident",
 ]
 [[package]]
 name = "system-interface"
 version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e09bb3fb4e02ec4b87e182ea9718fadbc0fa3e50085b40a9af9690572b67f9e"
 dependencies = [
 "atty",
 "bitflags",
 "cap-fs-ext",
 "io-lifetimes",
 "os_pipe",
 "rustix",
 "socketpair",
 "winapi",
 "winx",
 ]
 [[package]]
 name = "temp-dir"
 version = "0.1.11"
@ -1369,9 +1565,9 @@ dependencies = [
 [[package]]
 name = "unicode-ident"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
+checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
 [[package]]
 name = "unicode-segmentation"
@ -1400,6 +1596,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372"
 [[package]]
 name = "uuid"
 version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
 dependencies = [
 "getrandom",
 ]
 [[package]]
 name = "vcpkg"
 version = "0.2.15"
@ -1515,9 +1720,20 @@ version = "0.36.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
 [[package]]
 name = "winx"
 version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d5973cb8cd94a77d03ad7e23bbe14889cb29805da1cec0e4aff75e21aebded"
 dependencies = [
 "bitflags",
 "io-lifetimes",
 "winapi",
 ]
 [[package]]
 name = "yama"
-version = "0.6.0-alpha.1"
+version = "0.6.0-alpha.5"
 dependencies = [
 "anyhow",
 "blake",
--- a/datman-helper-mysql/poetry.lock
+++ b/datman-helper-mysql/poetry.lock
@ -0,0 +1,8 @@
 package = []
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
 content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a"
 [metadata.files]
--- a/datman-helper-mysql/pyproject.toml
+++ b/datman-helper-mysql/pyproject.toml
@ -0,0 +1,19 @@
 [tool.poetry]
 name = "datman-helper-mysql"
 version = "0.1.0"
 description = "MySQL integration for Datman"
 authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
 license = "GPL-3.0-or-later"
 [tool.poetry.dependencies]
 python = "^3.8"
 [tool.poetry.dev-dependencies]
 [tool.poetry.scripts]
 datman-helper-mysql-backup="datman_helper_mysql.backup:cli"
 datman-helper-mysql-restore="datman_helper_mysql.restore:cli"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
--- a/datman-helper-mysql/setup.py
+++ b/datman-helper-mysql/setup.py
@ -1,119 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import io
 import os
 import sys
 from shutil import rmtree
 from setuptools import Command, find_packages, setup
 # Package meta-data.
 NAME = "datman_helper_mysql"
 DESCRIPTION = "MySQL integration for Datman"
 URL = "https://bics.ga/reivilibre/yama"
 EMAIL = "reivi@librepush.net"
 AUTHOR = "Olivier 'reivilibre'"
 REQUIRES_PYTHON = ">=3.7.0"
 VERSION = "0.1.0"
 # What packages are required for this module to be executed?
 REQUIRED = []
 # What packages are optional?
 EXTRAS = {}
 # The rest you shouldn't have to touch too much :)
 # ------------------------------------------------
 # Except, perhaps the License and Trove Classifiers!
 # If you do change the License, remember to change the Trove Classifier for that!
 here = os.path.abspath(os.path.dirname(__file__))
 # Import the README and use it as the long-description.
 # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 try:
    with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
        long_description = "\n" + f.read()
 except FileNotFoundError:
    long_description = DESCRIPTION
 # Load the package's __version__.py module as a dictionary.
 about = {}
 if not VERSION:
    project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
    with open(os.path.join(here, project_slug, "__version__.py")) as f:
        exec(f.read(), about)
 else:
    about["__version__"] = VERSION
 class UploadCommand(Command):
    """Support setup.py upload."""
    description = "Build and publish the package."
    user_options = []
    @staticmethod
    def status(s):
        """Prints things in bold."""
        print("\033[1m{0}\033[0m".format(s))
    def initialize_options(self):
        pass
    def finalize_options(self):
        pass
    def run(self):
        try:
            self.status("Removing previous builds…")
            rmtree(os.path.join(here, "dist"))
        except OSError:
            pass
        self.status("Building Source and Wheel (universal) distribution…")
        os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
        self.status("Uploading the package to PyPI via Twine…")
        os.system("twine upload dist/*")
        self.status("Pushing git tags…")
        os.system("git tag v{0}".format(about["__version__"]))
        os.system("git push --tags")
        sys.exit()
 # Where the magic happens:
 setup(
    name=NAME,
    version=about["__version__"],
    description=DESCRIPTION,
    long_description=long_description,
    long_description_content_type="text/markdown",
    author=AUTHOR,
    author_email=EMAIL,
    python_requires=REQUIRES_PYTHON,
    url=URL,
    packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
    # If your package is a single module, use this instead of 'packages':
    # py_modules=['mypackage'],
    entry_points={
        "console_scripts": [
            "datman-helper-mysql-backup=datman_helper_mysql.backup:cli",
            "datman-helper-mysql-restore=datman_helper_mysql.restore:cli",
        ],
    },
    install_requires=REQUIRED,
    extras_require=EXTRAS,
    include_package_data=True,
    # TODO license='GPL3',
    classifiers=[
        # Trove classifiers
        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
    ],
 )
--- a/datman-helper-postgres/datman_helper_postgres/backup.py
+++ b/datman-helper-postgres/datman_helper_postgres/backup.py
@ -39,10 +39,7 @@ def cli():
    # The process (if any) that is our LZ4 decompressor.
    lz4_process = None
-    dump_command = [
+    dump_command = ["pg_dump", database_to_use]
        "pg_dump",
        database_to_use
    ]
    if host_to_use is not None:
        if use_lz4:
@ -63,21 +60,19 @@ def cli():
        # (rather than lz4 covering it).
        command = [
            "ssh",
-            f"{user_to_use}@{host_to_use}" if user_to_use is not None else f"{host_to_use}",
+            f"{user_to_use}@{host_to_use}"
            if user_to_use is not None
            else f"{host_to_use}",
            "bash",
            "-o",
            "pipefail",
            "-c",
-            shlex.quote(" ".join(dump_command))
+            shlex.quote(" ".join(dump_command)),
        ]
    elif user_to_use is not None:
        current_username = pwd.getpwuid(os.getuid()).pw_name
        if current_username != user_to_use:
-            command = [
+            command = ["sudo", "-u", user_to_use] + dump_command
                "sudo",
                "-u",
                user_to_use
            ] + dump_command
        else:
            command = dump_command
    else:
--- a/datman-helper-postgres/poetry.lock
+++ b/datman-helper-postgres/poetry.lock
@ -0,0 +1,8 @@
 package = []
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.8"
 content-hash = "fafb334cb038533f851c23d0b63254223abf72ce4f02987e7064b0c95566699a"
 [metadata.files]
--- a/datman-helper-postgres/pyproject.toml
+++ b/datman-helper-postgres/pyproject.toml
@ -0,0 +1,19 @@
 [tool.poetry]
 name = "datman-helper-postgres"
 version = "0.1.0"
 description = "Postgres integration for Datman"
 authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
 license = "GPL-3.0-or-later"
 [tool.poetry.dependencies]
 python = "^3.8"
 [tool.poetry.dev-dependencies]
 [tool.poetry.scripts]
 datman-helper-postgres-backup="datman_helper_postgres.backup:cli"
 datman-helper-postgres-restore="datman_helper_postgres.restore:cli"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
--- a/datman-helper-postgres/setup.py
+++ b/datman-helper-postgres/setup.py
@ -1,119 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import io
 import os
 import sys
 from shutil import rmtree
 from setuptools import Command, find_packages, setup
 # Package meta-data.
 NAME = "datman_helper_postgres"
 DESCRIPTION = "Postgres integration for Datman"
 URL = "https://bics.ga/reivilibre/yama"
 EMAIL = "reivi@librepush.net"
 AUTHOR = "Olivier 'reivilibre'"
 REQUIRES_PYTHON = ">=3.7.0"
 VERSION = "0.1.0"
 # What packages are required for this module to be executed?
 REQUIRED = []
 # What packages are optional?
 EXTRAS = {}
 # The rest you shouldn't have to touch too much :)
 # ------------------------------------------------
 # Except, perhaps the License and Trove Classifiers!
 # If you do change the License, remember to change the Trove Classifier for that!
 here = os.path.abspath(os.path.dirname(__file__))
 # Import the README and use it as the long-description.
 # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 try:
    with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
        long_description = "\n" + f.read()
 except FileNotFoundError:
    long_description = DESCRIPTION
 # Load the package's __version__.py module as a dictionary.
 about = {}
 if not VERSION:
    project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
    with open(os.path.join(here, project_slug, "__version__.py")) as f:
        exec(f.read(), about)
 else:
    about["__version__"] = VERSION
 class UploadCommand(Command):
    """Support setup.py upload."""
    description = "Build and publish the package."
    user_options = []
    @staticmethod
    def status(s):
        """Prints things in bold."""
        print("\033[1m{0}\033[0m".format(s))
    def initialize_options(self):
        pass
    def finalize_options(self):
        pass
    def run(self):
        try:
            self.status("Removing previous builds…")
            rmtree(os.path.join(here, "dist"))
        except OSError:
            pass
        self.status("Building Source and Wheel (universal) distribution…")
        os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
        self.status("Uploading the package to PyPI via Twine…")
        os.system("twine upload dist/*")
        self.status("Pushing git tags…")
        os.system("git tag v{0}".format(about["__version__"]))
        os.system("git push --tags")
        sys.exit()
 # Where the magic happens:
 setup(
    name=NAME,
    version=about["__version__"],
    description=DESCRIPTION,
    long_description=long_description,
    long_description_content_type="text/markdown",
    author=AUTHOR,
    author_email=EMAIL,
    python_requires=REQUIRES_PYTHON,
    url=URL,
    packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
    # If your package is a single module, use this instead of 'packages':
    # py_modules=['mypackage'],
    entry_points={
        "console_scripts": [
            "datman-helper-postgres-backup=datman_helper_postgres.backup:cli",
            "datman-helper-postgres-restore=datman_helper_postgres.restore:cli",
        ],
    },
    install_requires=REQUIRED,
    extras_require=EXTRAS,
    include_package_data=True,
    # TODO license='GPL3',
    classifiers=[
        # Trove classifiers
        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
        "Programming Language :: Python",
        "Programming Language :: Python :: 3",
    ],
 )
--- a/datman/Cargo.toml
+++ b/datman/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "datman"
-version = "0.6.0-alpha.2"
+version = "0.6.0-alpha.5"
 authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
 edition = "2021"
 repository = "https://bics.ga/reivilibre/yama"
@ -30,8 +30,9 @@ humansize = "1.1.1"
 chrono = "0.4.19"
 itertools = "0.10.1"
 hostname = "0.3.1"
-yama = { path = "../yama", version = "0.6.0-alpha.1" }
+yama = { path = "../yama", version = "0.6.0-alpha.5" }
 metrics = "0.17.1"
 bare-metrics-recorder = { version = "0.1.0" }
 comfy-table = "6.0.0-rc.1"
 libc = "0.2.126"
 io-streams = "0.11.0"
--- a/datman/README.md
+++ b/datman/README.md
@ -8,5 +8,6 @@ Features:
 * (optional) Compression using Zstd and a specifiable dictionary
 * (optional) Encryption
 * Ability to back up to remote machines over SSH
 * Labelling of files in a backup source; different destinations can choose to backup either all or a subset of the labels.
 See the documentation for more information.
--- a/datman/src/bin/datman.rs
+++ b/datman/src/bin/datman.rs
@ -16,24 +16,30 @@ along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
 use std::fs::File;
 use std::io::{BufReader, BufWriter, Write};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use clap::Parser;
 use env_logger::Env;
-use anyhow::bail;
+use anyhow::{bail, Context};
 use bare_metrics_recorder::recording::BareMetricsRecorderCore;
 use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, TimeZone, Utc};
 use datman::commands::backup::{backup_all_sources_to_destination, backup_source_to_destination};
 use datman::commands::ilabel::interactive_labelling_session;
-use datman::commands::init_descriptor;
+use datman::commands::prune::{prune_with_retention_policy, RetentionPolicy};
 use datman::commands::{init_descriptor, pushpull};
 use datman::descriptor::{load_descriptor, SourceDescriptor};
 use datman::get_hostname;
 use datman::remote::backup_source_requester::backup_remote_source_to_destination;
 use datman::remote::backup_source_responder;
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
 use itertools::Itertools;
 use log::info;
 use std::str::FromStr;
 use yama::commands::load_pile_descriptor;
 use yama::operations::legacy_pushpull::{open_pile_with_work_bypass, BypassLevel};
 pub const FAILURE_SYMBOL_OBNOXIOUS_FLASHING: &str = "\x1b[5m\x1b[31m⚠️ \x1b[25m\x1b[22m";
 pub const BOLD: &str = "\x1b[1m";
@ -114,10 +120,36 @@ pub enum DatmanCommand {
    Report {
        /// Name of the pile to report on.
        pile_name: String,
        /// Don't summarise months.
        #[clap(long)]
        individual: bool,
    },
    #[clap(name = "_backup_source_responder")]
    InternalBackupSourceResponder,
    /// Pulls all pointers from a remote pile to a local pile.
    /// Does not yet support label filtering, but will do in the future.
    Pull {
        /// e.g. 'myserver:main'
        remote_and_remote_pile: String,
        pile_name: String,
    },
    /// Applies a retention policy by removing unnecessary backups.
    /// Does not reclaim space by itself: use
    ///   `yama check --apply-gc --shallow`
    /// & `yama compact`
    /// to do that.
    Prune { pile_name: String },
    #[clap(name = "_pull_responder_offerer")]
    InternalPullResponderOfferer {
        datman_path: PathBuf,
        pile_name: String,
    },
 }
 pub struct HumanDateTime(pub DateTime<Local>);
@ -313,13 +345,123 @@ fn main() -> anyhow::Result<()> {
            backup_source_responder::handler_stdio()?;
        }
-        DatmanCommand::Report { pile_name } => {
+        DatmanCommand::Report {
            pile_name,
            individual,
        } => {
            let descriptor = load_descriptor(Path::new(".")).unwrap();
            let destination = &descriptor.piles[&pile_name];
-            let report = datman::commands::report::generate_report(destination, &descriptor)?;
+            let report =
                datman::commands::report::generate_report(destination, &descriptor, !individual)?;
            datman::commands::report::print_report(&report)?;
            datman::commands::report::print_filesystem_space(&destination.path)?;
            datman::commands::report::print_report(&report)?;
        }
        DatmanCommand::Pull {
            remote_and_remote_pile,
            pile_name,
        } => {
            let (hostname, remote_datman_path, remote_pile_name) = remote_and_remote_pile
                .split(':')
                .collect_tuple()
                .context("You must pull from a remote pile specified as remote:path:pile.")?;
            let descriptor = load_descriptor(Path::new(".")).unwrap();
            let source = &descriptor.piles[&pile_name];
            let pile_desc = load_pile_descriptor(&source.path)?;
            let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
                &source.path,
                &pile_desc,
                BypassLevel::CompressionBypass,
            )?;
            let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(10));
            pbar.set_style(
                ProgressStyle::default_bar().template(
                    "[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
                ),
            );
            pbar.set_message("pulling");
            let remote_host_descriptor = descriptor
                .remote_hosts
                .get(hostname)
                .ok_or_else(|| anyhow::anyhow!("No remote host by that name: {:?}.", hostname))?;
            let mut connection = Command::new("ssh")
                .arg(&remote_host_descriptor.user_at_host)
                .arg("--")
                .arg(
                    &remote_host_descriptor
                        .path_to_datman
                        .as_ref()
                        .map(|x| x.as_str())
                        .unwrap_or("datman"),
                )
                .arg("_pull_responder_offerer")
                .arg(remote_datman_path)
                .arg(remote_pile_name)
                .stdin(Stdio::piped())
                .stdout(Stdio::piped())
                .stderr(Stdio::inherit())
                .spawn()?;
            let mut reader = BufReader::new(connection.stdout.take().unwrap());
            let mut writer = BufWriter::new(connection.stdin.take().unwrap());
            pushpull::accepting_side(
                &pile,
                &bypass_raw_pile,
                &mut reader,
                &mut writer,
                Box::new(pbar),
            )?;
        }
        DatmanCommand::Prune { pile_name } => {
            let descriptor = load_descriptor(Path::new(".")).unwrap();
            let retention_policy = descriptor
                .retention
                .context("No retention policy set in descriptor")?;
            let dest_desc = &descriptor.piles[&pile_name];
            let pile_desc = load_pile_descriptor(&dest_desc.path)?;
            prune_with_retention_policy(
                &dest_desc.path,
                &pile_desc,
                &RetentionPolicy::from_config(retention_policy),
                true,
            )?;
        }
        DatmanCommand::InternalPullResponderOfferer {
            datman_path,
            pile_name,
        } => {
            let descriptor = load_descriptor(&datman_path).unwrap();
            let source = &descriptor.piles[&pile_name];
            let pile_desc = load_pile_descriptor(&source.path)?;
            let (pile, bypass_raw_pile) = open_pile_with_work_bypass(
                &source.path,
                &pile_desc,
                BypassLevel::CompressionBypass,
            )?;
            let mut stdin = BufReader::new(io_streams::StreamReader::stdin()?);
            let mut stdout = BufWriter::new(io_streams::StreamWriter::stdout()?);
            pushpull::offering_side(
                &pile,
                &bypass_raw_pile,
                &mut stdin,
                &mut stdout,
                Box::new(()),
            )?;
            stdout.flush()?;
        }
    }
    Ok(())
--- a/datman/src/commands.rs
+++ b/datman/src/commands.rs
@ -20,12 +20,14 @@ use std::fs::File;
 use std::io::Write;
 use std::path::Path;
-use crate::descriptor::{Descriptor, SourceDescriptor};
+use crate::descriptor::{Descriptor, RetentionPolicyConfig, SourceDescriptor};
 pub mod backup;
 pub mod extract;
 pub mod ibrowse;
 pub mod ilabel;
 pub mod prune;
 pub mod pushpull;
 pub mod report;
 pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
@ -50,6 +52,12 @@ pub fn init_descriptor(path: &Path) -> anyhow::Result<()> {
        sources: source,
        piles: Default::default(),
        remote_hosts: Default::default(),
        retention: Some(RetentionPolicyConfig {
            daily: 14,
            weekly: 12,
            monthly: 24,
            yearly: 9001,
        }),
    })?;
    datman_toml_file.write_all(&bytes)?;
--- a/datman/src/commands/backup.rs
+++ b/datman/src/commands/backup.rs
@ -17,7 +17,9 @@ along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor, VirtualSourceKind};
 use crate::get_hostname;
-use crate::labelling::{label_node, load_labelling_rules, str_to_label, Label, State};
+use crate::labelling::{
    label_node, load_labelling_rules, str_to_label, Label, LabellingRules, State,
 };
 use crate::tree::{scan, FileTree, FileTree1};
 use anyhow::{anyhow, bail};
 use arc_interner::ArcIntern;
@ -76,8 +78,8 @@ pub fn open_stdout_backup_process(
 pub fn label_filter_and_convert(
    tree: FileTree1<()>,
    descriptor: &Descriptor,
    desc_path: &Path,
    source_name: &str,
    rules: &LabellingRules,
    dest: &DestPileDescriptor,
 ) -> anyhow::Result<Option<TreeNode>> {
    info!("Labelling.");
@ -87,8 +89,7 @@ pub fn label_filter_and_convert(
        .iter()
        .map(|l| Label(ArcIntern::new(l.clone())))
        .collect();
-    let rules = load_labelling_rules(desc_path, source_name)?;
+    label_node("".to_owned(), None, &mut tree, &labels, rules)?;
    label_node("".to_owned(), None, &mut tree, &labels, &rules)?;
    let included_labels: HashSet<Label> = dest.included_labels.iter().map(str_to_label).collect();
@ -150,16 +151,20 @@ pub fn backup_source_to_destination<PT: ProgressTracker>(
            cross_filesystems,
        } => {
            info!("Looking to backup {} to {}", source_name, dest_name);
            let rules = load_labelling_rules(desc_path, source_name)?;
            let exclusions = rules.get_exclusions_set(directory);
            info!("Scanning.");
-            let tree = scan(directory, !*cross_filesystems)?
+            let tree = scan(directory, !*cross_filesystems, &exclusions)?
                .ok_or_else(|| anyhow!("Source does not exist."))?;
            let absolute_source_path = desc_path.join(directory);
            let absolute_dest_path = desc_path.join(&dest.path);
            let pile_descriptor = load_pile_descriptor(&absolute_dest_path)?;
            let pile = open_pile(&absolute_dest_path, &pile_descriptor)?;
            let root = if let Some(root) =
-                label_filter_and_convert(tree, descriptor, desc_path, source_name, dest)?
+                label_filter_and_convert(tree, descriptor, source_name, &rules, dest)?
            {
                root
            } else {
--- a/datman/src/commands/ibrowse.rs
+++ b/datman/src/commands/ibrowse.rs
@ -15,6 +15,7 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
 use std::collections::BTreeSet;
 use std::path::Path;
 use anyhow::{anyhow, bail};
@ -80,7 +81,7 @@ pub fn session(path: &Path, source_name: String) -> anyhow::Result<()> {
    };
    println!("Scanning source; this might take a little while...");
-    let mut dir_scan: FileTree1<Option<State>> = scan(directory, one_filesystem)?
+    let mut dir_scan: FileTree1<Option<State>> = scan(directory, one_filesystem, &BTreeSet::new())?
        .ok_or_else(|| anyhow!("Empty source."))?
        .replace_meta(&None);
--- a/datman/src/commands/ilabel.rs
+++ b/datman/src/commands/ilabel.rs
@ -15,6 +15,7 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
 use std::collections::BTreeSet;
 use std::io;
 use std::io::{StdinLock, Stdout, Write};
 use std::path::Path;
@ -192,7 +193,7 @@ pub fn interactive_labelling_session(path: &Path, source_name: String) -> anyhow
        let my_hostname = get_hostname();
        let mut dir_scan = if &my_hostname == hostname {
            info!("Scanning source; this might take a little while...");
-            scan(directory, !*cross_filesystems)?
+            scan(directory, !*cross_filesystems, &BTreeSet::new())?
                .ok_or_else(|| anyhow!("Empty source."))?
                .replace_meta(&None)
        } else {
@ -212,6 +213,7 @@ pub fn interactive_labelling_session(path: &Path, source_name: String) -> anyhow
                &mut write,
                directory.as_ref(),
                !*cross_filesystems,
                &BTreeSet::new(),
            )?
            .ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?
            .replace_meta(&None);
--- a/datman/src/commands/prune.rs
+++ b/datman/src/commands/prune.rs
@ -0,0 +1,220 @@
 use crate::commands::backup::split_pointer_name;
 use crate::descriptor::RetentionPolicyConfig;
 use anyhow::{bail, Context};
 use log::info;
 use std::collections::{BTreeMap, BTreeSet};
 use std::io;
 use std::path::Path;
 use yama::commands::open_pile;
 use yama::operations::remove_pointer_safely;
 use yama::pile::PileDescriptor;
 pub struct RetentionBand {
    pub interval_s: u64,
    pub number_to_retain: u32,
 }
 pub struct RetentionPolicy {
    pub retention_bands: Vec<RetentionBand>,
 }
 const DAY: u64 = 86400;
 const WEEK: u64 = 7 * DAY;
 const MONTH: u64 = 31 * DAY;
 const YEAR: u64 = 365 * DAY;
 impl RetentionPolicy {
    pub fn from_config(descriptor: RetentionPolicyConfig) -> RetentionPolicy {
        let mut policy = RetentionPolicy {
            retention_bands: vec![],
        };
        if descriptor.daily != 0 {
            policy.retention_bands.push(RetentionBand {
                interval_s: DAY,
                number_to_retain: descriptor.daily,
            });
        }
        if descriptor.weekly != 0 {
            policy.retention_bands.push(RetentionBand {
                interval_s: WEEK,
                number_to_retain: descriptor.weekly,
            });
        }
        if descriptor.monthly != 0 {
            policy.retention_bands.push(RetentionBand {
                interval_s: MONTH,
                number_to_retain: descriptor.monthly,
            });
        }
        if descriptor.yearly != 0 {
            policy.retention_bands.push(RetentionBand {
                interval_s: YEAR,
                number_to_retain: descriptor.yearly,
            });
        }
        policy
    }
    /// Returns the set of snapshots to remove.
    pub fn apply_returning_prunable(
        &self,
        snapshots_by_unix_time: BTreeMap<u64, String>,
    ) -> BTreeSet<String> {
        if snapshots_by_unix_time.is_empty() {
            return BTreeSet::new();
        }
        let mut snapshots_included: BTreeSet<u64> = BTreeSet::new();
        // Always mark the most recent snapshot as retained!
        let last_snapshot = snapshots_by_unix_time.keys().rev().next().unwrap();
        snapshots_included.insert(*last_snapshot);
        let now_time = *last_snapshot;
        for band in &self.retention_bands {
            for multiple in 1..=band.number_to_retain {
                let target_time = now_time - (multiple as u64) * band.interval_s;
                if let Some((k, _)) = snapshots_by_unix_time.range(0..=target_time).rev().next() {
                    snapshots_included.insert(*k);
                }
            }
        }
        // Find all prunable (unincluded) snapshots.
        snapshots_by_unix_time
            .into_iter()
            .filter(|(k, _v)| !snapshots_included.contains(k))
            .map(|(_k, v)| v)
            .collect()
    }
 }
 pub fn prune_with_retention_policy(
    pile_path: &Path,
    pile_desc: &PileDescriptor,
    policy: &RetentionPolicy,
    prompt_first: bool,
 ) -> anyhow::Result<()> {
    let pile = open_pile(&pile_path, &pile_desc).context("Failed to open pile")?;
    let pointers = pile
        .list_pointers()
        .context("Failed to list pointers in pile")?;
    let mut pointers_to_keep: BTreeSet<String> = pointers.iter().cloned().collect();
    let pointers_to_remove = get_prunable_pointers(&policy, pointers);
    for remove in &pointers_to_remove {
        pointers_to_keep.remove(remove);
    }
    info!("Gory details:\n---\nKeep: {pointers_to_keep:?}\n---\nRemove: {pointers_to_remove:?}");
    info!(
        "{} pointers to remove ({} to keep) based on retention policy.",
        pointers_to_remove.len(),
        pointers_to_keep.len()
    );
    if prompt_first {
        println!("Would you like to proceed? [y/N]: ");
        let mut buffer = String::new();
        let stdin = io::stdin(); // We get `Stdin` here.
        stdin.read_line(&mut buffer)?;
        if buffer.trim().to_ascii_lowercase() != "y" {
            bail!("Aborted by user.");
        }
    }
    for to_remove in pointers_to_remove {
        let res = remove_pointer_safely(&pile, &to_remove).context("removing prunable pointers");
        pile.flush()
            .context("flushing pile after removing pointers")?;
        res?;
    }
    Ok(())
 }
 fn get_prunable_pointers(policy: &RetentionPolicy, pointers: Vec<String>) -> BTreeSet<String> {
    let mut split_pointers_by_name: BTreeMap<String, BTreeMap<u64, String>> = BTreeMap::new();
    for pointer in pointers {
        let (name, datetime) = if let Some(x) = split_pointer_name(&pointer) {
            x
        } else {
            continue;
        };
        split_pointers_by_name
            .entry(name)
            .or_default()
            .insert(datetime.timestamp().try_into().unwrap(), pointer);
    }
    let mut pointers_to_remove = BTreeSet::new();
    for (_pointer_base_name, ts_to_pointer) in split_pointers_by_name {
        let to_remove = policy.apply_returning_prunable(ts_to_pointer);
        pointers_to_remove.extend(to_remove);
    }
    pointers_to_remove
 }
 #[cfg(test)]
 mod test {
    use crate::commands::prune::{get_prunable_pointers, RetentionPolicy};
    use crate::descriptor::RetentionPolicyConfig;
    #[test]
    fn test_prunable_pointers() {
        let pointers = vec![
            "alice+2022-09-28_05:00:00",
            "alice+2022-09-28_02:00:00",
            "alice+2022-09-21_05:00:00",
            "alice+2022-09-14_05:00:00",
            "alice+2022-09-08_05:00:00",
            "alice+2022-09-07_05:00:00",
            "alice+2022-09-01_05:00:00",
            "bob+2022-09-28_06:00:00",
            "bob+2022-09-28_03:00:00",
            "bob+2022-09-21_06:00:00",
            "bob+2022-09-14_06:00:00",
            "bob+2022-09-08_06:00:00",
            "bob+2022-09-07_06:00:00",
            "bob+2022-09-01_06:00:00",
        ]
        .into_iter()
        .map(|s| s.to_owned())
        .collect();
        let policy = RetentionPolicy::from_config(RetentionPolicyConfig {
            daily: 0,
            weekly: 3,
            monthly: 0,
            yearly: 0,
        });
        assert_eq!(
            get_prunable_pointers(&policy, pointers)
                .into_iter()
                .collect::<Vec<_>>(),
            vec![
                "alice+2022-09-01_05:00:00",
                "alice+2022-09-08_05:00:00",
                "alice+2022-09-28_02:00:00",
                "bob+2022-09-01_06:00:00",
                "bob+2022-09-08_06:00:00",
                "bob+2022-09-28_03:00:00",
            ]
        );
    }
 }
--- a/datman/src/commands/pushpull.rs
+++ b/datman/src/commands/pushpull.rs
@ -0,0 +1,306 @@
 // Push and Pull support for Datman
 use anyhow::{bail, ensure, Context};
 use log::info;
 use std::collections::{BTreeMap, BTreeSet};
 use std::io::{Read, Write};
 use std::sync::Arc;
 use std::time::Instant;
 use yama::chunking::RecursiveUnchunker;
 use yama::commands::retrieve_tree_node;
 use yama::definitions::{ChunkId, PointerData, RecursiveChunkRef, TreeNode};
 use yama::pile::{Keyspace, Pile, PipelineDescription, RawPile};
 use yama::progress::ProgressTracker;
 use yama::remote::{read_message, write_message};
 pub fn offer_pointers<W: Write, RP: RawPile>(
    pile: &Pile<RP>,
    writer: &mut W,
 ) -> anyhow::Result<BTreeMap<String, PointerData>> {
    let mut pointers_to_offer: BTreeMap<String, PointerData> = BTreeMap::new();
    for pointer_name in pile.list_pointers()? {
        let pointer_data = pile
            .read_pointer(&pointer_name)?
            .context("Listed pointer not present")?;
        pointers_to_offer.insert(pointer_name, pointer_data);
    }
    write_message(writer, &pointers_to_offer)?;
    Ok(pointers_to_offer)
 }
 pub fn ensure_compatible_bypasses(
    my_full: &Vec<PipelineDescription>,
    my_bypass: &Vec<PipelineDescription>,
    their_full: &Vec<PipelineDescription>,
    their_bypass: &Vec<PipelineDescription>,
 ) -> anyhow::Result<()> {
    ensure!(
        my_full.starts_with(&my_bypass),
        "Our full pipeline is not an extension of the bypass pipeline."
    );
    ensure!(
        their_full.starts_with(&their_bypass),
        "Their full pipeline is not an extension of their bypass pipeline."
    );
    let my_bypassed_parts = &my_full[my_bypass.len()..];
    let their_bypassed_parts = &their_full[their_bypass.len()..];
    ensure!(
        my_bypassed_parts == their_bypassed_parts,
        "Our bypassed parts and their bypassed parts are not the same.\nOurs: {:?}\nTheirs: {:?}",
        my_bypassed_parts,
        their_bypassed_parts
    );
    Ok(())
 }
 pub fn negotiate_bypassed_pile<R: Read, W: Write>(
    pile: &Pile<Arc<Box<dyn RawPile>>>,
    bypass_pile: &Box<dyn RawPile>,
    reader: &mut R,
    writer: &mut W,
 ) -> anyhow::Result<()> {
    let my_full_pipeline = pile.raw_pile.describe_pipeline()?;
    let my_bypass_pipeline = bypass_pile.describe_pipeline()?;
    write_message(writer, &my_full_pipeline)?;
    write_message(writer, &my_bypass_pipeline)?;
    writer.flush()?;
    let their_full_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
    let their_bypass_pipeline = read_message::<_, Vec<PipelineDescription>>(reader)?;
    ensure_compatible_bypasses(
        &my_full_pipeline,
        &my_bypass_pipeline,
        &their_full_pipeline,
        &their_bypass_pipeline,
    )?;
    Ok(())
 }
 fn collect_chunk_ids(
    pile: &Pile<Arc<Box<dyn RawPile>>>,
    root: &TreeNode,
    chunk_ids: &mut BTreeSet<ChunkId>,
 ) -> anyhow::Result<()> {
    root.visit(
        &mut |tree_node, _| {
            match tree_node {
                TreeNode::NormalFile { content, .. } => {
                    collect_chunk_ids_from_chunkref(pile, content, chunk_ids)?;
                }
                _ => {}
            }
            Ok(())
        },
        "".to_owned(),
    )?;
    Ok(())
 }
 fn collect_chunk_ids_from_chunkref(
    pile: &Pile<Arc<Box<dyn RawPile>>>,
    chunk_ref: &RecursiveChunkRef,
    collection: &mut BTreeSet<ChunkId>,
 ) -> anyhow::Result<()> {
    if chunk_ref.depth == 0 {
        collection.insert(chunk_ref.chunk_id);
    } else {
        let shallower_chunk_ref = RecursiveChunkRef {
            chunk_id: chunk_ref.chunk_id,
            depth: chunk_ref.depth - 1,
        };
        let mut unchunker = RecursiveUnchunker::new(pile, shallower_chunk_ref);
        let mut next_chunk_id: ChunkId = Default::default();
        loop {
            let read = unchunker.read(&mut next_chunk_id[..])?;
            if read == 0 {
                break;
            } else if read < next_chunk_id.len() {
                unchunker.read_exact(&mut next_chunk_id[read..])?;
            }
            collection.insert(next_chunk_id);
        }
    }
    Ok(())
 }
 pub fn offering_side<R: Read, W: Write>(
    pile: &Pile<Arc<Box<dyn RawPile>>>,
    bypass_pile: &Box<dyn RawPile>,
    reader: &mut R,
    writer: &mut W,
    mut progress: Box<dyn ProgressTracker>,
 ) -> anyhow::Result<()> {
    let version = env!("CARGO_PKG_VERSION");
    let expecting = format!("Datman Pull Accepter {}", version);
    write_message(writer, &format!("Datman Pull Offerer {}", version))?;
    writer.flush()?;
    let found: String = read_message(reader)?;
    ensure!(
        found == expecting,
        "Version mismatch. Expecting {:?} got {:?}",
        expecting,
        found
    );
    // First 'negotiate' (for now: assert) a pile bypass.
    // This lets us avoid decompressing things before recompressing them at the other end,
    // assuming both ends use the same dictionary.
    negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
    let offered_pointers = offer_pointers(pile, writer)?;
    let wanted_pointers = read_message::<_, BTreeSet<String>>(reader)?;
    let mut chunks_to_offer: BTreeSet<ChunkId> = BTreeSet::new();
    for pointer_name in &wanted_pointers {
        let pointer_data = offered_pointers
            .get(pointer_name)
            .with_context(|| format!("Requested pointer {:?} was not offered", pointer_name))?;
        collect_chunk_ids_from_chunkref(pile, &pointer_data.chunk_ref, &mut chunks_to_offer)?;
        let root_node = retrieve_tree_node(pile, pointer_data.chunk_ref.clone())?;
        collect_chunk_ids(pile, &root_node.node, &mut chunks_to_offer)?;
    }
    write_message(writer, &chunks_to_offer)?;
    writer.flush()?;
    let chunks_to_skip: BTreeSet<ChunkId> = read_message(reader)?;
    let chunks_to_send: Vec<ChunkId> = chunks_to_offer
        .difference(&chunks_to_skip)
        .cloned()
        .collect();
    drop(chunks_to_offer);
    drop(chunks_to_skip);
    let start_sort_by_hints = Instant::now();
    let chunks_to_send_with_hints: BTreeSet<(u64, ChunkId)> = chunks_to_send
        .into_iter()
        .map(|chunk_id| {
            pile.raw_pile
                .chunk_id_transfer_ordering_hint(&chunk_id)
                .map(|hint| (hint, chunk_id))
        })
        .collect::<anyhow::Result<_>>()?;
    let time_to_sort_by_hints = Instant::now() - start_sort_by_hints;
    info!(
        "{} s to sort {} chunks by their hints",
        time_to_sort_by_hints.as_secs_f32(),
        chunks_to_send_with_hints.len()
    );
    progress.set_max_size(chunks_to_send_with_hints.len() as u64);
    progress.set_current(0);
    for (_hint, chunk_id) in chunks_to_send_with_hints {
        let chunk_data = bypass_pile
            .read(Keyspace::Chunk, &chunk_id)?
            .context("Chunk vanished")?;
        write_message(writer, &Some((chunk_id, chunk_data)))?;
        progress.inc_progress(1);
    }
    write_message(writer, &None::<Option<(ChunkId, Vec<u8>)>>)?;
    writer.flush()?;
    Ok(())
 }
 pub fn accepting_side<R: Read, W: Write>(
    pile: &Pile<Arc<Box<dyn RawPile>>>,
    bypass_pile: &Box<dyn RawPile>,
    reader: &mut R,
    writer: &mut W,
    mut progress: Box<dyn ProgressTracker>,
 ) -> anyhow::Result<()> {
    let version = env!("CARGO_PKG_VERSION");
    let expecting = format!("Datman Pull Offerer {}", version);
    write_message(writer, &format!("Datman Pull Accepter {}", version))?;
    writer.flush()?;
    let found: String = read_message(reader)?;
    ensure!(
        found == expecting,
        "Version mismatch. Expecting {:?} got {:?}",
        expecting,
        found
    );
    // First 'negotiate' (for now: assert) a pile bypass.
    // This lets us avoid decompressing things before recompressing them at the other end,
    // assuming both ends use the same dictionary.
    negotiate_bypassed_pile(pile, &bypass_pile, reader, writer)?;
    let offered_pointers: BTreeMap<String, PointerData> = read_message(reader)?;
    let mut wanted_pointers: BTreeSet<String> = BTreeSet::new();
    for (pointer_name, pointer_data) in &offered_pointers {
        if pile.read_pointer(pointer_name)?.is_none() {
            wanted_pointers.insert(pointer_name.clone());
            if let Some(parent) = &pointer_data.parent_pointer {
                if pile.read_pointer(parent)?.is_none() && !offered_pointers.contains_key(parent) {
                    bail!("Offered pointer {:?} requires parent {:?} which we don't have and isn't offered.", pointer_name, parent);
                }
            }
        }
    }
    write_message(writer, &wanted_pointers)?;
    writer.flush()?;
    let offered_chunks: BTreeSet<ChunkId> = read_message(reader)?;
    let mut chunks_to_skip: BTreeSet<ChunkId> = BTreeSet::new();
    for chunk_id in &offered_chunks {
        if pile.chunk_exists(chunk_id)? {
            chunks_to_skip.insert(*chunk_id);
        }
    }
    write_message(writer, &chunks_to_skip)?;
    writer.flush()?;
    let num_chunks_to_recv = offered_chunks.len() - chunks_to_skip.len();
    let mut chunks_to_recv: BTreeSet<ChunkId> = offered_chunks
        .difference(&chunks_to_skip)
        .cloned()
        .collect();
    drop(offered_chunks);
    drop(chunks_to_skip);
    progress.set_max_size(num_chunks_to_recv as u64);
    progress.set_current(0);
    while let Some((chunk_id, chunk_data)) = read_message::<_, Option<(ChunkId, Vec<u8>)>>(reader)?
    {
        ensure!(
            chunks_to_recv.remove(&chunk_id),
            "Received unexpected chunk"
        );
        bypass_pile.write(Keyspace::Chunk, &chunk_id, &chunk_data)?;
        progress.inc_progress(1);
    }
    ensure!(chunks_to_recv.is_empty(), "Unreceived chunks.");
    for (pointer_name, pointer_data) in &offered_pointers {
        pile.write_pointer(pointer_name, pointer_data)?;
    }
    pile.flush()?;
    Ok(())
 }
--- a/datman/src/commands/report.rs
+++ b/datman/src/commands/report.rs
@ -13,6 +13,7 @@ use std::io::Read;
 use std::mem;
 use std::mem::size_of;
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::fs::MetadataExt;
 use std::path::Path;
 use yama::chunking::RecursiveUnchunker;
 use yama::commands::{load_pile_descriptor, open_pile, retrieve_tree_node};
@ -29,6 +30,7 @@ use yama::pile::{DebugStatistics, Pile, RawPile};
 pub struct Report {
    pub last_source_backups: BTreeMap<String, Option<DateTime<Utc>>>,
    pub chunk_usages_aggregated: bool,
    pub chunk_usage: BTreeMap<String, Sizes>,
    pub debug_stats: Option<DebugStatistics>,
@ -64,6 +66,7 @@ fn condense_chunk_id(chunk_id: ChunkId) -> CondensedChunkId {
 pub fn generate_report(
    dest_pile_descriptor: &DestPileDescriptor,
    descriptor: &Descriptor,
    aggregate_chunk_usage_by_month: bool,
 ) -> anyhow::Result<Report> {
    let pile_descriptor = load_pile_descriptor(&dest_pile_descriptor.path)?;
    let pile = open_pile(&dest_pile_descriptor.path, &pile_descriptor)?;
@ -71,6 +74,7 @@ pub fn generate_report(
    let debug_stats = pile.raw_pile.debug_statistics()?;
    let mut pointers_to_parent_and_chunkids = BTreeMap::new();
    let mut pointergroups_to_pointers: BTreeMap<String, Vec<String>> = BTreeMap::new();
    info!("Collecting chunk IDs... This will probably be slow.");
    for pointer_name in pile.list_pointers()? {
@ -79,6 +83,20 @@ pub fn generate_report(
            .context("listed pointer doesn't exist")?;
        let root_node = retrieve_tree_node(&pile, pointer.chunk_ref)?;
        let pointer_chunk_ids = collect_chunk_ids(&pile, &root_node.node)?;
        let pointergroup = if aggregate_chunk_usage_by_month {
            let (base, date_time) =
                split_pointer_name(&pointer_name).context("Can't split pointer name")?;
            format!("{}+{}", base, date_time.format("%Y-%m"))
        } else {
            pointer_name.clone()
        };
        pointergroups_to_pointers
            .entry(pointergroup)
            .or_default()
            .push(pointer_name.clone());
        pointers_to_parent_and_chunkids
            .insert(pointer_name, (pointer.parent_pointer, pointer_chunk_ids));
    }
@ -87,12 +105,18 @@ pub fn generate_report(
    // At the same time, we can also calculate 'rollup' sizes.
    let mut chunk_sharer_counts: BTreeMap<CondensedChunkId, u16> = BTreeMap::new();
-    let mut pointer_stats: BTreeMap<String, Sizes> = BTreeMap::new();
+    let mut pointergroup_stats: BTreeMap<String, Sizes> = BTreeMap::new();
    for (pointergroup_name, pointers_in_group) in pointergroups_to_pointers.iter().rev() {
        let mut deduped_chunks = BTreeSet::new();
        for pointer_name in pointers_in_group {
            deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
                &pointers_to_parent_and_chunkids,
                &pointer_name,
            ))
        }
    for pointer_name in pointers_to_parent_and_chunkids.keys().rev() {
        let deduped_chunks: BTreeSet<CondensedChunkId> =
            iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name)
                .collect();
        let mut rollup_count = 0;
        for chunk in deduped_chunks {
            let count = chunk_sharer_counts.entry(chunk).or_default();
@ -101,15 +125,23 @@ pub fn generate_report(
                rollup_count += 1;
            }
        }
-        let entry = pointer_stats.entry(pointer_name.to_owned()).or_default();
+        let entry = pointergroup_stats
            .entry(pointergroup_name.to_owned())
            .or_default();
        entry.rollup = rollup_count;
    }
    // Now go through again and update all the stats!
-    for pointer_name in pointers_to_parent_and_chunkids.keys().rev() {
+    for (pointergroup_name, pointers_in_group) in &pointergroups_to_pointers {
-        let deduped_chunks: BTreeSet<CondensedChunkId> =
+        let mut deduped_chunks = BTreeSet::new();
-            iter_over_all_chunkids_incl_parents(&pointers_to_parent_and_chunkids, &pointer_name)
+
-                .collect();
+        for pointer_name in pointers_in_group {
            deduped_chunks.extend(iter_over_all_chunkids_incl_parents(
                &pointers_to_parent_and_chunkids,
                &pointer_name,
            ))
        }
        let mut unique_count = 0;
        let mut shared_count_by_sharers = [0u32; 256];
        let total_count = deduped_chunks.len();
@ -128,7 +160,9 @@ pub fn generate_report(
            sharers_sum += (count as f64) / (sharers_minus_one + 1) as f64;
        }
-        let entry = pointer_stats.entry(pointer_name.to_owned()).or_default();
+        let entry = pointergroup_stats
            .entry(pointergroup_name.to_owned())
            .or_default();
        entry.moral = (sharers_sum.ceil() as u32) + unique_count;
        entry.unique = unique_count;
        entry.total = total_count as u32;
@ -147,7 +181,8 @@ pub fn generate_report(
    Ok(Report {
        last_source_backups: last_backed_up,
-        chunk_usage: pointer_stats,
+        chunk_usage: pointergroup_stats,
        chunk_usages_aggregated: aggregate_chunk_usage_by_month,
        debug_stats,
    })
 }
@ -337,7 +372,22 @@ fn format_size(chunks: u32, average_chunk_size: Option<f64>) -> String {
    format!("{} c{}", chunks, est_size_suffix)
 }
 fn calculate_total_filesize_of_dir(dir: &Path) -> anyhow::Result<u64> {
    let mut total = 0;
    for file in std::fs::read_dir(dir)? {
        let file = file?;
        let metadata = file.metadata()?;
        total += metadata.size();
        if metadata.is_dir() {
            total += calculate_total_filesize_of_dir(&file.path())?;
        }
    }
    Ok(total)
 }
 pub fn print_filesystem_space(pile_path: &Path) -> anyhow::Result<()> {
    let usage_for_pile = calculate_total_filesize_of_dir(&pile_path)?;
    let path_c = CString::new(pile_path.as_os_str().as_bytes()).unwrap();
    let stats = unsafe {
        let mut stats: libc::statfs = mem::zeroed();
@ -375,6 +425,7 @@ pub fn print_filesystem_space(pile_path: &Path) -> anyhow::Result<()> {
        Cell::new("Theoretical Size").fg(Color::Cyan),
        Cell::new("Usable Size").fg(Color::Cyan),
        Cell::new("Used").fg(Color::Cyan),
        Cell::new("Used for Pile").fg(Color::Cyan),
        Cell::new("Available").fg(Color::Cyan),
    ]);
@ -394,6 +445,7 @@ pub fn print_filesystem_space(pile_path: &Path) -> anyhow::Result<()> {
        .fg(Color::Blue),
        Cell::new(format!("{:>9}", usable_bytes.file_size(&format).unwrap())).fg(Color::Blue),
        Cell::new(format!("{:>9}", used_bytes.file_size(&format).unwrap())).fg(Color::Blue),
        Cell::new(format!("{:>9}", usage_for_pile.file_size(&format).unwrap())).fg(Color::Blue),
        Cell::new(format!("{:>9}", avail_bytes.file_size(&format).unwrap()))
            .fg(available_space_colour),
    ]);
--- a/datman/src/descriptor.rs
+++ b/datman/src/descriptor.rs
@ -38,6 +38,10 @@ pub struct Descriptor {
    pub piles: HashMap<String, DestPileDescriptor>,
    pub remote_hosts: HashMap<String, RemoteHostDescriptor>,
    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub retention: Option<RetentionPolicyConfig>,
 }
 #[derive(Clone, Serialize, Deserialize, Debug)]
@ -46,6 +50,14 @@ pub struct RemoteHostDescriptor {
    pub path_to_datman: Option<String>,
 }
 #[derive(Clone, Serialize, Deserialize, Debug)]
 pub struct RetentionPolicyConfig {
    pub daily: u32,
    pub weekly: u32,
    pub monthly: u32,
    pub yearly: u32,
 }
 #[derive(Clone, Serialize, Deserialize, Debug)]
 #[serde(untagged)]
 pub enum SourceDescriptor {
--- a/datman/src/labelling.rs
+++ b/datman/src/labelling.rs
@ -15,10 +15,10 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
-use std::collections::HashMap;
+use std::collections::{BTreeSet, HashMap};
 use std::fs::File;
 use std::io::{BufRead, BufReader, Write};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use anyhow::anyhow;
 use anyhow::Context;
@ -222,6 +222,23 @@ impl LabellingRules {
        }
        None
    }
    pub fn get_exclusions_set(&self, base: &Path) -> BTreeSet<PathBuf> {
        let mut exclusions = BTreeSet::new();
        for (ext_path, state) in &self.position_based_rules {
            assert!(ext_path.is_empty() || ext_path.starts_with('/'));
            let full_path = PathBuf::from(format!(
                "{}{ext_path}",
                base.to_str().expect("base path must always be utf-8")
            ));
            if state == &Excluded {
                exclusions.insert(full_path);
            }
        }
        exclusions
    }
 }
 /// Uninteractively label the nodes.
--- a/datman/src/remote/backup_source_requester.rs
+++ b/datman/src/remote/backup_source_requester.rs
@ -1,11 +1,13 @@
 use crate::commands::backup::{get_pointer_name_at, label_filter_and_convert};
 use crate::descriptor::{Descriptor, DestPileDescriptor, SourceDescriptor};
 use crate::labelling::load_labelling_rules;
 use crate::tree::FileTree;
 use anyhow::{anyhow, bail};
 use chrono::Utc;
 use log::info;
 use std::collections::BTreeSet;
 use std::io::{Read, Write};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::{Child, Command, Stdio};
 use std::sync::Arc;
 use yama::commands::{load_pile_descriptor, open_pile};
@ -48,11 +50,13 @@ pub fn scanning<R: Read, W: Write>(
    write: &mut W,
    path: &Path,
    one_filesystem: bool,
    exclusions: &BTreeSet<PathBuf>,
 ) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
    info!("Scanning.");
    write_message(write, &"scan")?;
    write_message(write, &path)?;
    write_message(write, &one_filesystem)?;
    write_message(write, exclusions)?;
    write.flush()?;
    let scan_result: Option<FileTree<(), (), (), ()>> = read_message(read)?;
@ -199,6 +203,9 @@ pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>
            info!("Connecting...");
            introduction(&mut read, &mut write)?;
            let rules = load_labelling_rules(desc_path, source_name)?;
            let exclusions = rules.get_exclusions_set(directory);
            // then request to scan
            info!("Requesting scan... (this may take some time)");
            let scan_result = scanning(
@ -206,11 +213,12 @@ pub fn backup_remote_source_to_destination<PT: ProgressTracker + Send + 'static>
                &mut write,
                directory.as_ref(),
                !*cross_filesystems,
                &exclusions,
            )?
            .ok_or_else(|| anyhow!("Remote scan failed (does the directory exist?)"))?;
            let mut root =
-                label_filter_and_convert(scan_result, descriptor, desc_path, source_name, dest)?
+                label_filter_and_convert(scan_result, descriptor, source_name, &rules, dest)?
                    .ok_or_else(|| anyhow!("Empty filter..."))?;
            let absolute_dest_path = desc_path.join(&dest.path);
--- a/datman/src/remote/backup_source_responder.rs
+++ b/datman/src/remote/backup_source_responder.rs
@ -1,6 +1,7 @@
 // This file implements the responder side of the backup source protocol -- the protocol used
 // to connect to remote backup sources.
 use std::collections::BTreeSet;
 use std::io::{stdin, stdout, Read, Write};
 use std::path::PathBuf;
 use std::sync::Arc;
@ -43,7 +44,8 @@ pub fn introduction<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::R
 pub fn scanning<R: Read, W: Write>(read: &mut R, write: &mut W) -> anyhow::Result<()> {
    let path: PathBuf = read_message(read)?;
    let one_filesystem: bool = read_message(read)?;
-    let scan_result = scan(&path, one_filesystem)?;
+    let exclusions: BTreeSet<PathBuf> = read_message(read)?;
    let scan_result = scan(&path, one_filesystem, &exclusions)?;
    write_message(write, &scan_result)?;
    write.flush()?;
    Ok(())
@ -96,6 +98,7 @@ impl ProgressTracker for ProgressSender {
    }
 }
 // TODO use io-streams crate and get rid of the duplication!!
 pub fn chunking_stdio() -> anyhow::Result<PartialPointerData> {
    let (path, tree_node) = {
        let stdin = stdin();
--- a/datman/src/tree.rs
+++ b/datman/src/tree.rs
@ -15,16 +15,16 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 use std::fmt::Debug;
 use std::fs::{read_link, symlink_metadata, DirEntry, Metadata};
 use std::io::ErrorKind;
 use std::os::unix::fs::MetadataExt;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use anyhow::anyhow;
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
-use log::{info, warn};
+use log::{debug, info, warn};
 use serde::{Deserialize, Serialize};
 pub use yama::definitions::FilesystemOwnership;
@ -216,14 +216,18 @@ pub fn mtime_msec(metadata: &Metadata) -> u64 {
 }
 /// Scan the filesystem to produce a Tree, using a default progress bar.
-pub fn scan(path: &Path, one_filesystem: bool) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
+pub fn scan(
    path: &Path,
    one_filesystem: bool,
    exclusions: &BTreeSet<PathBuf>,
 ) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
    let pbar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout_with_hz(2));
    pbar.set_style(ProgressStyle::default_spinner().template("{spinner} {pos:7} {msg}"));
    pbar.set_message("dir scan");
    let one_filesystem = if one_filesystem { Some(None) } else { None };
-    let result = scan_with_progress_bar(path, &pbar, one_filesystem);
+    let result = scan_with_progress_bar(path, &pbar, one_filesystem, exclusions);
    pbar.finish_at_current_pos();
    result
 }
@ -233,7 +237,14 @@ pub fn scan_with_progress_bar(
    path: &Path,
    progress_bar: &ProgressBar,
    mut one_filesystem: Option<Option<u64>>,
    exclusions: &BTreeSet<PathBuf>,
 ) -> anyhow::Result<Option<FileTree<(), (), (), ()>>> {
    if exclusions.contains(path) {
        // Don't enter excluded paths.
        debug!("Not descending into excluded path: {:?}", path);
        return Ok(None);
    }
    let metadata_res = symlink_metadata(path);
    progress_bar.inc(1);
    if let Err(e) = &metadata_res {
@ -305,15 +316,23 @@ pub fn scan_with_progress_bar(
        for entry in dir_read? {
            let entry: DirEntry = entry?;
-            let scanned = scan_with_progress_bar(&entry.path(), progress_bar, one_filesystem)?;
+
            if entry.file_name() == ".datmanskip" {
                // Directories with .datmanskip in them are to be skipped entirely.
                // TODO(perf): should this be checked upfront before some children may already
                //     have been scanned?
                debug!("Skipping {path:?} because it has a .datmanskip file.");
                return Ok(None);
            }
            let scanned =
                scan_with_progress_bar(&entry.path(), progress_bar, one_filesystem, exclusions)?;
            if let Some(scanned) = scanned {
-                children.insert(
+                if let Ok(filename) = entry.file_name().into_string() {
-                    entry
+                    children.insert(filename, scanned);
-                        .file_name()
+                } else {
-                        .into_string()
+                    warn!("Non-UTF-8 filename; ignoring: {:?}", entry.file_name())
-                        .expect("OsString not String"),
+                }
                    scanned,
                );
            }
        }
--- a/flake.lock
+++ b/flake.lock
@ -1,15 +1,35 @@
 {
  "nodes": {
-    "naersk": {
+    "flake-utils": {
      "inputs": {
-        "nixpkgs": "nixpkgs"
+        "systems": "systems"
      },
      "locked": {
-        "lastModified": 1653413650,
+        "lastModified": 1710146030,
-        "narHash": "sha256-wojDHjb+eU80MPH+3HQaK0liUy8EgR95rvmCl24i58Y=",
+        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "naersk": {
      "inputs": {
        "nixpkgs": [
          "nixpkgs"
        ]
      },
      "locked": {
        "lastModified": 1662220400,
        "narHash": "sha256-9o2OGQqu4xyLZP9K6kNe1pTHnyPz0Wr3raGYnr9AIgY=",
        "owner": "nix-community",
        "repo": "naersk",
-        "rev": "69daaceebe12c070cd5ae69ba38f277bbf033695",
+        "rev": "6944160c19cb591eb85bbf9b2f2768a935623ed3",
        "type": "github"
      },
      "original": {
@ -18,48 +38,131 @@
        "type": "github"
      }
    },
    "nix-github-actions": {
      "inputs": {
        "nixpkgs": [
          "poetry2nix",
          "nixpkgs"
        ]
      },
      "locked": {
        "lastModified": 1703863825,
        "narHash": "sha256-rXwqjtwiGKJheXB43ybM8NwWB8rO2dSRrEqes0S7F5Y=",
        "owner": "nix-community",
        "repo": "nix-github-actions",
        "rev": "5163432afc817cf8bd1f031418d1869e4c9d5547",
        "type": "github"
      },
      "original": {
        "owner": "nix-community",
        "repo": "nix-github-actions",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1653738054,
+        "lastModified": 1714971268,
-        "narHash": "sha256-IaR8iLN4Ms3f5EjU1CJkXSc49ZzyS5qv03DtVAti6/s=",
+        "narHash": "sha256-IKwMSwHj9+ec660l+I4tki/1NRoeGpyA2GdtdYpAgEw=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "17b62c338f2a0862a58bb6951556beecd98ccda9",
+        "rev": "27c13997bf450a01219899f5a83bd6ffbfc70d3c",
        "type": "github"
      },
      "original": {
        "id": "nixpkgs",
        "ref": "nixos-23.11",
        "type": "indirect"
      }
    },
-    "nixpkgs_2": {
+    "poetry2nix": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nix-github-actions": "nix-github-actions",
        "nixpkgs": [
          "nixpkgs"
        ],
        "systems": "systems_2",
        "treefmt-nix": "treefmt-nix"
      },
      "locked": {
-        "lastModified": 1653738054,
+        "lastModified": 1715017507,
-        "narHash": "sha256-IaR8iLN4Ms3f5EjU1CJkXSc49ZzyS5qv03DtVAti6/s=",
+        "narHash": "sha256-RN2Vsba56PfX02DunWcZYkMLsipp928h+LVAWMYmbZg=",
-        "owner": "NixOS",
+        "owner": "nix-community",
-        "repo": "nixpkgs",
+        "repo": "poetry2nix",
-        "rev": "17b62c338f2a0862a58bb6951556beecd98ccda9",
+        "rev": "e6b36523407ae6a7a4dfe29770c30b3a3563b43a",
        "type": "github"
      },
      "original": {
-        "id": "nixpkgs",
+        "owner": "nix-community",
-        "type": "indirect"
+        "repo": "poetry2nix",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "naersk": "naersk",
-        "nixpkgs": "nixpkgs_2",
+        "nixpkgs": "nixpkgs",
        "poetry2nix": "poetry2nix",
        "utils": "utils"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    },
    "systems_2": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "id": "systems",
        "type": "indirect"
      }
    },
    "treefmt-nix": {
      "inputs": {
        "nixpkgs": [
          "poetry2nix",
          "nixpkgs"
        ]
      },
      "locked": {
        "lastModified": 1714058656,
        "narHash": "sha256-Qv4RBm4LKuO4fNOfx9wl40W2rBbv5u5m+whxRYUMiaA=",
        "owner": "numtide",
        "repo": "treefmt-nix",
        "rev": "c6aaf729f34a36c445618580a9f95a48f5e4e03f",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "treefmt-nix",
        "type": "github"
      }
    },
    "utils": {
      "locked": {
-        "lastModified": 1652776076,
+        "lastModified": 1659877975,
-        "narHash": "sha256-gzTw/v1vj4dOVbpBSJX4J0DwUR6LIyXo7/SuuTJp1kM=",
+        "narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=",
        "owner": "numtide",
        "repo": "flake-utils",
-        "rev": "04c1b180862888302ddfb2e3ad9eaa63afc60cf8",
+        "rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0",
        "type": "github"
      },
      "original": {
--- a/flake.nix
+++ b/flake.nix
@ -3,26 +3,74 @@
  inputs = {
    utils.url = "github:numtide/flake-utils";
-    naersk.url = "github:nix-community/naersk";
+    naersk = {
      url = "github:nix-community/naersk";
      inputs.nixpkgs.follows = "nixpkgs";
    };
    nixpkgs.url = "nixpkgs/nixos-23.11";
    poetry2nix = {
      url = "github:nix-community/poetry2nix";
      inputs.nixpkgs.follows = "nixpkgs";
    };
  };
-  outputs = { self, nixpkgs, utils, naersk }:
+  outputs = { self, nixpkgs, utils, naersk, poetry2nix }:
    utils.lib.eachDefaultSystem (system: let
      pkgs = nixpkgs.legacyPackages."${system}";
      inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryApplication;
      naersk-lib = naersk.lib."${system}";
-    in rec {
+
-      # `nix build`
+      rustComponents = naersk-lib.buildPackage {
      packages.yama = naersk-lib.buildPackage {
        pname = "yama";
        root = ./.;
        buildInputs = with pkgs; [
          openssl
-          pkgconfig
+          pkg-config
          sqlite
        ];
      };
      mysqlHelper = mkPoetryApplication {
        projectDir = ./datman-helper-mysql;
      };
      postgresHelper = mkPoetryApplication {
        projectDir = ./datman-helper-postgres;
      };
      # We want to produce a package with all of these together, with wrappers that let them
      # refer to each other by name (i.e. have each other on the path).
      # Datman needs the helpers on the path.
      # The helpers need lz4 on the path.
      allInOne = pkgs.stdenv.mkDerivation {
        name = "datman-aio";
        src = "${pkgs.emptyDirectory}";
        installPhase = ''
          # set -eu
          mkdir $out $out/bin
          ln -s ${rustComponents}/bin/{yama,datman} $out/bin
          ln -s ${mysqlHelper}/bin/datman-helper-mysql-{backup,restore} $out/bin
          ln -s ${postgresHelper}/bin/datman-helper-postgres-{backup,restore} $out/bin
          ln -s ${pkgs.lz4}/bin/lz4 $out/bin/
          runHook postInstall
        '';
        buildInputs = [ pkgs.makeWrapper ];
        postInstall = ''
          # set -eu
          for fn in $out/bin/{datman,yama,datman-helper-{mysql,postgres}-{backup,restore}}; do
            wrapProgram $fn --suffix PATH : $out/bin
          done
        '';
      };
    in rec {
      # `nix build`
      packages.yama = allInOne;
      defaultPackage = packages.yama;
      # NixOS Modules
@ -32,7 +80,7 @@
      # `nix run`
      apps.yama = utils.lib.mkApp {
-        drv = packages.yama;
+        drv = rustComponents;
      };
      defaultApp = apps.yama;
--- a/scripts-dev/lint.sh
+++ b/scripts-dev/lint.sh
@ -4,7 +4,7 @@ if [ $# -ge 1 ]
 then
  files=$*
 else
-  files="testsuite/setup.py testsuite/datmantests testsuite/helpers testsuite/yamatests datman-helper-postgres/datman_helper_postgres datman-helper-postgres/setup.py datman-helper-mysql/datman_helper_mysql datman-helper-mysql/setup.py"
+  files="testsuite/setup.py testsuite/datmantests testsuite/helpers testsuite/yamatests datman-helper-postgres/datman_helper_postgres datman-helper-mysql/datman_helper_mysql"
 fi
 echo "Linting these locations: $files"
--- a/shell.nix
+++ b/shell.nix
@ -0,0 +1,50 @@
 { pkgs ? import <nixpkgs> {} }:
 let
  # We may need some packages from nixpkgs-unstable
  #unstable = import <nixpkgs-unstable> {};
  rust-toolchain = pkgs.symlinkJoin {
    name = "rust-toolchain";
    paths = [pkgs.rustc pkgs.cargo pkgs.rustfmt pkgs.rustPlatform.rustcSrc];
  };
 in
 pkgs.mkShell {
  buildInputs = [
    rust-toolchain
    pkgs.pkg-config
    pkgs.alsa-lib
    pkgs.sqlite
    #pkgs.libclang # ??
  ];
  nativeBuildInputs = [
    pkgs.openssl
    pkgs.python3
  ];
  # Needed for bindgen when binding to avahi
  LIBCLANG_PATH="${pkgs.llvmPackages_latest.libclang.lib}/lib";
  # Cargo culted:
  # Add to rustc search path
  RUSTFLAGS = (builtins.map (a: ''-L ${a}/lib'') [
  ]);
  # Add to bindgen search path
  BINDGEN_EXTRA_CLANG_ARGS =
    # Includes with normal include path
    (builtins.map (a: ''-I"${a}/include"'') [
    ])
    # Includes with special directory paths
    ++ [
      ''-I"${pkgs.llvmPackages_latest.libclang.lib}/lib/clang/${pkgs.llvmPackages_latest.libclang.version}/include"''
      #''-I"${pkgs.glib.dev}/include/glib-2.0"''
      #''-I${pkgs.glib.out}/lib/glib-2.0/include/''
    ];
 }
--- a/testsuite/datmantests/test_backup_and_extract.py
+++ b/testsuite/datmantests/test_backup_and_extract.py
@ -251,7 +251,8 @@ kind = {{ stdout = "blahblah.txt" }}
        seed = 7555
        print(f"seed: {seed}")
        rng.seed(seed)
-        # min_files is 8 because we need enough files to use each label for this test to succeed.
+        # min_files is 8 because we need enough files to use each label for this
        # test to succeed.
        initial_descriptor, _ = generate_random_dir(rng, src_path, 32, min_files=8)
        labellings = generate_labels(initial_descriptor, rng)
        save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
@ -298,3 +299,81 @@ kind = {{ stdout = "blahblah.txt" }}
            )
        td.cleanup()
    def test_backup_incremental_with_mid_delete(self):
        td = TemporaryDirectory("test_backup_incremental_with_mid_delete")
        tdpath = Path(td.name)
        datman_path = tdpath.joinpath("datman")
        src_path = datman_path.joinpath("srca")
        yama_path = datman_path.joinpath("main")
        set_up_simple_datman(datman_path)
        set_up_simple_yama(yama_path)
        rng = Random()
        seed = rng.randint(0, 9001)
        print(f"seed: {seed}")
        rng.seed(seed)
        initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
        print("storing")
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        # now mutate and store incremental
        randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
        time.sleep(2)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        # now mutate and store incremental again!
        randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
        mutated_descriptor = scan_dir(src_path)
        time.sleep(2)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        pointer_names = [
            line
            for line in subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
            .decode()
            .split("\n")
            if line
        ]
        self.assertEqual(len(pointer_names), 3)
        self.assertLess(pointer_names[0], pointer_names[1])
        self.assertLess(pointer_names[1], pointer_names[2])
        print(f"removing mid pointer {pointer_names[1]}")
        subprocess.check_call(
            ("yama", "debug", "rmp", pointer_names[1]),
            cwd=yama_path,
        )
        print("extracting last pointer to check still valid")
        dest_path = tdpath.joinpath("desta")
        subprocess.check_call(
            (
                "datman",
                "extract",
                "--skip-metadata",
                "--accept-partial",
                "main",
                "../desta",
            ),
            cwd=datman_path,
        )
        # this will be wrapped in a directory that starts with the name srca+
        extracted_dir_descriptor_wrapper = scan_dir(dest_path)
        contents = extracted_dir_descriptor_wrapper.contents
        self.assertEqual(len(contents), 1)
        key, value = next(iter(contents.items()))
        self.assertTrue(key.startswith("srca+"))
        self.assertIsInstance(value, DirectoryDescriptor)
        key, value = next(iter(value.contents.items()))
        self.assertEqual(key, "srca")
        self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
        td.cleanup()
--- a/testsuite/helpers/yama_helpers.py
+++ b/testsuite/helpers/yama_helpers.py
@ -1,6 +1,7 @@
 import shutil
 import subprocess
 from pathlib import Path
 from typing import Set
 def set_up_simple_yama(path: Path):
@ -10,3 +11,13 @@ def set_up_simple_yama(path: Path):
        "example_zstd.dict"
    )
    shutil.copyfile(example_zstd_path, path.joinpath("important_zstd.dict"))
 def list_bloblog_ids(pile: Path) -> Set[int]:
    result = set()
    for p in pile.joinpath("bloblog").iterdir():
        try:
            result.add(int(p.name))
        except ValueError:
            pass
    return result
--- a/testsuite/setup.py
+++ b/testsuite/setup.py
@ -22,7 +22,7 @@ REQUIRED = ["green", "attrs", "immutabledict"]
 # What packages are optional?
-EXTRAS = {"dev": ["black==21.7b0", "flake8==3.9.2", "isort==5.9.2"]}
+EXTRAS = {"dev": ["black==22.10.0", "flake8==3.9.2", "isort==5.9.2"]}
 # The rest you shouldn't have to touch too much :)
 # ------------------------------------------------
--- a/testsuite/yamatests/test_compact.py
+++ b/testsuite/yamatests/test_compact.py
@ -0,0 +1,175 @@
 import subprocess
 from pathlib import Path
 from random import Random
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 from helpers import (
    DirectoryDescriptor,
    generate_random_dir,
    randomly_mutate_directory_in_descriptor,
    scan_dir,
 )
 from helpers.datman_helpers import set_up_simple_datman
 from helpers.yama_helpers import list_bloblog_ids, set_up_simple_yama
 class TestYamaCompact(TestCase):
    def test_compaction_merge_two_small_bloblogs(self):
        td = TemporaryDirectory("test_check_fails_after_random_corruption")
        tdpath = Path(td.name)
        datman_path = tdpath.joinpath("datman")
        src_path = datman_path.joinpath("srca")
        yama_path = datman_path.joinpath("main")
        set_up_simple_datman(datman_path)
        set_up_simple_yama(yama_path)
        rng = Random()
        seed = rng.randint(0, 9001)
        print(f"seed: {seed}")
        rng.seed(seed)
        later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
        # Back up twice: that way we should get at least two bloblogs!
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        old_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertGreater(
            len(old_bloblog_ids), 1, "Should be many bloblogs at this point"
        )
        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2",
                "--small",
                str(2 * 1024 * 1024 * 1024),
            ),
            cwd=yama_path,
        )
        new_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertEqual(
            len(new_bloblog_ids), 1, "Should only be 1 bloblog at this point."
        )
        self.assertEqual(
            list(new_bloblog_ids)[0],
            max(old_bloblog_ids) + 1,
            "New bloblog ID should be 1 greater than the max old one.",
        )
    def test_gc_then_compact(self):
        td = TemporaryDirectory("test_gc_then_compact")
        tdpath = Path(td.name)
        datman_path = tdpath.joinpath("datman")
        src_path = datman_path.joinpath("srca")
        yama_path = datman_path.joinpath("main")
        set_up_simple_datman(datman_path)
        set_up_simple_yama(yama_path)
        rng = Random()
        seed = rng.randint(0, 9001)
        print(f"seed: {seed}")
        rng.seed(seed)
        initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        orig_pointer_name = (
            subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
            .decode()
            .split("\n")[0]
        )
        randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
        mutated_descriptor = scan_dir(src_path)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        old_bloblog_ids = list_bloblog_ids(yama_path)
        # Try a GC and check that it's a no-op
        subprocess.check_call(
            ("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
        )
        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2000",
                "--reclaim",
                "1",
                "--max-dealloc",
                "1",
            ),
            cwd=yama_path,
        )
        unchanged_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertEqual(
            old_bloblog_ids,
            unchanged_bloblog_ids,
            "No GC: no compaction should have happened.",
        )
        subprocess.check_call(
            ("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path
        )
        # Try a GC and check that it did something
        subprocess.check_call(
            ("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
        )
        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2000",
                "--reclaim",
                "1",
                "--max-dealloc",
                "1",
            ),
            cwd=yama_path,
        )
        new_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertNotEqual(
            old_bloblog_ids, new_bloblog_ids, "GC: compaction should have happened."
        )
        # Check that we can still extract the files!
        dest_path = tdpath.joinpath("desta")
        subprocess.check_call(
            (
                "datman",
                "extract",
                "--skip-metadata",
                "--accept-partial",
                "main",
                "../desta",
            ),
            cwd=datman_path,
        )
        extracted_dir_descriptor_wrapper = scan_dir(dest_path)
        contents = extracted_dir_descriptor_wrapper.contents
        self.assertEqual(len(contents), 1)
        key, value = next(iter(contents.items()))
        self.assertTrue(key.startswith("srca+"))
        self.assertIsInstance(value, DirectoryDescriptor)
        key, value = next(iter(value.contents.items()))
        self.assertEqual(key, "srca")
        self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
        td.cleanup()
--- a/yama/Cargo.toml
+++ b/yama/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "yama"
-version = "0.6.0-alpha.2"
+version = "0.6.0-alpha.5"
 authors = ["Olivier 'reivilibre' <olivier@librepush.net>"]
 edition = "2018"
 description = "Deduplicated, compressed and encrypted content pile manager"
--- a/yama/src/bin/yama.rs
+++ b/yama/src/bin/yama.rs
@ -26,8 +26,11 @@ use std::sync::Arc;
 use yama::commands::{fully_integrate_pointer_node, load_pile_descriptor, open_pile};
 use yama::debug::{debug_command, DebugCommand};
 use yama::operations::checking::VacuumMode;
-use yama::operations::pushpull::{determine_bypass_level, open_pile_with_work_bypass, push_to};
+use yama::operations::legacy_pushpull::{
-use yama::operations::{checking, extracting};
+    determine_bypass_level, open_pile_with_work_bypass, push_to,
 };
 use yama::operations::{checking, cleanup, extracting};
 use yama::pile::local_sqlitebloblogs::CompactionThresholds;
 use yama::pile::{Pile, PileDescriptor, RawPile};
 use yama::{commands, debug};
@ -54,8 +57,9 @@ enum PileCommand {
        pointer_name: String,
        /// Limited expression(s) of files to retrieve.
        /// LIMITATION OF CURRENT VERSION: ONLY ONE EXACT PATH ALLOWED, PLEASE.
        #[clap(short, long)]
-        subset: Vec<PathBuf>,
+        subset: Option<String>,
        destination: PathBuf,
@ -80,6 +84,29 @@ enum PileCommand {
        shallow: bool,
    },
    Compact {
        /// Don't actually perform any compaction; just plan it out.
        #[clap(long)]
        dry_run: bool,
        /// Allocated size under which a bloblog is considered small.
        #[clap(long = "small")]
        small_thresh: Option<u64>,
        /// Minimum amount of space to reclaim in order to run compaction for reclaim.
        #[clap(long = "reclaim")]
        min_reclaim: Option<u64>,
        /// Maximum amount of space that can be deallocated in a bloblog before we consider it
        /// worthwhile to replace.
        #[clap(long = "max-dealloc")]
        max_deallocated: Option<u64>,
        /// Minimum number of mergeable small bloblogs in order to run compaction for merge.
        #[clap(long)]
        mergeable: Option<u32>,
    },
    /// Enter a debug prompt for manually operating on the yama pile.
    Debug { supplied_command: Vec<String> },
@ -116,7 +143,7 @@ fn wrapped_main() -> anyhow::Result<i32> {
    match &opts.command {
        PileCommand::Retrieve {
            pointer_name,
-            subset: _,
+            subset,
            destination,
            num_workers: workers,
        } => {
@ -134,10 +161,25 @@ fn wrapped_main() -> anyhow::Result<i32> {
            fully_integrate_pointer_node(&pile, &mut root_tree_node.node, &mut pointer)?;
            let mut node_to_extract = &mut root_tree_node.node;
            if let Some(subset) = subset {
                for path_to_descend in subset.split('/').filter(|s| !s.is_empty()) {
                    match node_to_extract.child(path_to_descend) {
                        Ok(new_node) => {
                            node_to_extract = new_node;
                        }
                        Err(msg) => {
                            bail!("Can't descend into {path_to_descend:?}: {msg}");
                        }
                    }
                }
            }
            // todo allow disabling apply metadata
            extracting::extract(
                destination,
-                &mut root_tree_node.node,
+                node_to_extract,
                &pile,
                true,
                workers.unwrap_or(2),
@ -173,6 +215,29 @@ fn wrapped_main() -> anyhow::Result<i32> {
                return Ok(1);
            }
        }
        PileCommand::Compact {
            dry_run,
            small_thresh,
            min_reclaim,
            max_deallocated,
            mergeable,
        } => {
            let this_dir = Path::new(".");
            let descriptor =
                load_pile_descriptor(this_dir).context("Failed to load pile descriptor")?;
            cleanup::compact(
                this_dir,
                &descriptor,
                !*dry_run,
                true,
                CompactionThresholds {
                    minimum_to_reclaim: min_reclaim.unwrap_or(2 * 1024 * 1024 * 1024),
                    minimum_small_bloblogs_to_merge: mergeable.unwrap_or(64),
                    cond_if_more_deallocated_than: max_deallocated.unwrap_or(256 * 1024 * 1024),
                    cond_if_less_allocated_than: small_thresh.unwrap_or(64 * 1024 * 1024),
                },
            )?;
        }
        PileCommand::Init {} => {
            commands::init(".".as_ref())?;
        }
--- a/yama/src/debug.rs
+++ b/yama/src/debug.rs
@ -15,13 +15,12 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
-use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node, store_tree_node};
+use crate::commands::retrieve_tree_node;
 use crate::definitions::{FilesystemOwnership, FilesystemPermissions, TreeNode};
 use crate::operations::remove_pointer_safely;
 use crate::pile::{Pile, PileDescriptor, RawPile};
 use crate::tree::integrate_node_in_place;
 use anyhow::anyhow;
 use clap::Parser;
 use log::info;
 use rustyline::error::ReadlineError;
 use rustyline::Editor;
@ -123,50 +122,7 @@ pub fn debug_command<RP: RawPile>(
            }
        }
        DebugCommand::DeletePointer { name } => {
-            // retrieve this pointer
+            remove_pointer_safely(pile, name)?;
            let mut this_pointer = pile.read_pointer(name.as_str())?.ok_or_else(|| {
                anyhow!("Pointer {:?} does not exist so can not be deleted.", name)
            })?;
            let mut this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())?;
            // fully integrate the pointer
            fully_integrate_pointer_node(&pile, &mut this_node.node, &mut this_pointer)?;
            assert!(this_pointer.parent_pointer.is_none());
            // now integrate any pointers that rely on this one
            // so that they no longer rely on this one.
            for pointer in pile.list_pointers()?.iter() {
                if pointer == name {
                    continue;
                }
                if let Some(mut pointer_data) = pile.read_pointer(pointer.as_str())? {
                    if let Some(parent_pointer) = pointer_data.parent_pointer.as_ref() {
                        if parent_pointer == name {
                            info!("Pointer is now an orphan: {:?}", pointer);
                            // need to integrate this node, so retrieve it
                            let mut node = retrieve_tree_node(&pile, pointer_data.chunk_ref)?;
                            // integrate it in-place
                            integrate_node_in_place(&mut node.node, &this_node.node)?;
                            // mark it as orphaned (no parent)
                            pointer_data.parent_pointer = None;
                            // store the orphaned node
                            let new_chunk_ref = store_tree_node(&pile, &node)?;
                            // associate the orphaned node with the orphaned pointer
                            pointer_data.chunk_ref = new_chunk_ref;
                            // write the pointer back.
                            pile.write_pointer(pointer.as_str(), &pointer_data)?;
                        }
                    }
                }
            }
            // then delete the pointer
            pile.delete_pointer(name)?;
            info!("Deleted pointer: {:?}", name);
        }
        DebugCommand::PointerInfo { name } => {
            let this_pointer = pile
--- a/yama/src/definitions.rs
+++ b/yama/src/definitions.rs
@ -270,6 +270,19 @@ impl TreeNode {
            }
        }
    }
    /// Recurses into a child by name, or returns Err with a reason.
    pub fn child(&mut self, name: &str) -> Result<&mut TreeNode, &'static str> {
        match self {
            TreeNode::NormalFile { .. } => Err("not a directory: normal file"),
            TreeNode::Directory { children, .. } => match children.get_mut(name) {
                None => Err("child not in directory"),
                Some(node) => Ok(node),
            },
            TreeNode::SymbolicLink { .. } => Err("not a directory: symlink"),
            TreeNode::Deleted => Err("not a directory: deleted"),
        }
    }
 }
 #[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
--- a/yama/src/operations.rs
+++ b/yama/src/operations.rs
@ -1,4 +1,80 @@
 use crate::commands::{fully_integrate_pointer_node, retrieve_tree_node, store_tree_node};
 use crate::pile::{Pile, RawPile};
 use crate::tree::{differentiate_node_in_place, integrate_node_in_place};
 use anyhow::{anyhow, Context};
 use log::info;
 pub mod checking;
 pub mod cleanup;
 pub mod extracting;
-pub mod pushpull;
+pub mod legacy_pushpull;
 pub mod storing;
 pub fn remove_pointer_safely<P: RawPile>(pile: &Pile<P>, name: &str) -> anyhow::Result<()> {
    // retrieve this pointer
    let mut this_pointer = pile
        .read_pointer(name)?
        .ok_or_else(|| anyhow!("Pointer {:?} does not exist so can not be deleted.", name))?;
    let mut this_node = retrieve_tree_node(&pile, this_pointer.chunk_ref.clone())
        .context("retrieving 'this' node")?;
    let new_parent_name = this_pointer.parent_pointer.clone();
    fully_integrate_pointer_node(pile, &mut this_node.node, &mut this_pointer)
        .context("integrating new parent")?;
    let new_parent = if let Some(ref new_parent_name) = new_parent_name {
        let mut new_parent_pointer = pile
            .read_pointer(new_parent_name.as_str())?
            .ok_or_else(|| anyhow!("Parent pointer {:?} does not exist.", name))?;
        let mut new_parent_node = retrieve_tree_node(&pile, new_parent_pointer.chunk_ref.clone())?;
        fully_integrate_pointer_node(pile, &mut new_parent_node.node, &mut new_parent_pointer)?;
        Some((new_parent_pointer, new_parent_node))
    } else {
        None
    };
    // now integrate any pointers that rely on this one
    // so that they no longer rely on this one.
    for pointer in pile.list_pointers()?.iter() {
        if pointer == name {
            continue;
        }
        if let Some(mut pointer_data) = pile.read_pointer(pointer.as_str())? {
            if let Some(parent_pointer) = pointer_data.parent_pointer.as_ref() {
                if parent_pointer == name {
                    info!("Pointer would be orphaned: {:?}; integrating", pointer);
                    // need to integrate this node, so retrieve it
                    let mut node = retrieve_tree_node(&pile, pointer_data.chunk_ref)?;
                    // integrate it in-place
                    integrate_node_in_place(&mut node.node, &this_node.node)?;
                    if let Some((_, ref new_parent_node)) = new_parent {
                        // then differentiate with respect to the NEW parent
                        differentiate_node_in_place(&mut node.node, &new_parent_node.node)?;
                    }
                    // pass through the parent
                    pointer_data.parent_pointer = new_parent_name.clone();
                    // store the updated version of the pointer
                    let new_chunk_ref = store_tree_node(&pile, &node)?;
                    // associate the new node with the new version of the pointer
                    pointer_data.chunk_ref = new_chunk_ref;
                    // write the pointer back.
                    pile.write_pointer(pointer.as_str(), &pointer_data)?;
                    // we must flush chunks before deleting the pointer
                    pile.flush()
                        .context("flushing after writing pointer back")?;
                }
            }
        }
    }
    // then delete the pointer
    pile.delete_pointer(name)?;
    info!("Deleted pointer: {:?}", name);
    Ok(())
 }
--- a/yama/src/operations/checking.rs
+++ b/yama/src/operations/checking.rs
@ -24,8 +24,10 @@ use crate::pile::{
 use anyhow::bail;
 use crossbeam_channel::Sender;
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
 use itertools::Itertools;
 use log::{error, info, warn};
 use std::collections::HashSet;
 use std::convert::TryInto;
 use std::io::{Read, Write};
 use std::sync::Mutex;
@ -110,6 +112,10 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
        self.underlying.delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.underlying.delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -137,6 +143,10 @@ impl<RP: RawPile> RawPile for VacuumRawPile<RP> {
    fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
        self.underlying.describe_pipeline()
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
 /// Runs a full check of a Yama pile. This reads ALL the chunks, which can take a long time.
@ -403,9 +413,21 @@ pub fn check_shallow<RP: RawPile>(
            // actually do the vacuum!
            info!("Going to vacuum them up.");
-            for vacuum_id in to_vacuum {
+            for vacuum_ids_chunk in to_vacuum
-                pile.raw_pile.delete(Keyspace::Chunk, &vacuum_id)?;
+                .into_iter()
-                pbar.inc(1);
+                .chunks(512)
                .into_iter()
                .map(|c| c.collect::<Vec<ChunkId>>())
            {
                pile.raw_pile.delete_many(
                    Keyspace::Chunk,
                    vacuum_ids_chunk
                        .iter()
                        .map(|ci| ci.as_slice())
                        .collect::<Vec<&[u8]>>()
                        .as_slice(),
                )?;
                pbar.inc(vacuum_ids_chunk.len().try_into().unwrap());
            }
            pile.flush()?;
            pbar.finish_and_clear();
--- a/yama/src/operations/cleanup.rs
+++ b/yama/src/operations/cleanup.rs
@ -0,0 +1,64 @@
 use crate::pile::local_sqlitebloblogs::{CompactionThresholds, SqliteBloblogPile};
 use crate::pile::{PileDescriptor, PileStorage};
 use anyhow::{bail, Context};
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
 use log::info;
 use std::path::Path;
 pub fn compact(
    pile_path: &Path,
    pile_desc: &PileDescriptor,
    actually_run: bool,
    make_progress_bar: bool,
    thresholds: CompactionThresholds,
 ) -> anyhow::Result<()> {
    let pbar = if make_progress_bar {
        ProgressBar::with_draw_target(1000 as u64, ProgressDrawTarget::stdout_with_hz(10))
    } else {
        ProgressBar::hidden()
    };
    pbar.set_style(
        ProgressStyle::default_bar()
            .template("[{elapsed_precise}]/[{eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"),
    );
    pbar.set_message("compacting");
    match pile_desc.storage {
        PileStorage::SqliteIndexedBloblog => {
            let bloblog_pile = SqliteBloblogPile::open(&pile_path)
                .context("Failed to open SQLite-indexed Bloblog Pile")?;
            compact_bloblogs(bloblog_pile, pbar, actually_run, thresholds)?;
            Ok(())
        }
        other @ PileStorage::RemoteOnly => {
            bail!("Cannot use compaction on this kind of pile: {other:?}!");
        }
    }
 }
 fn compact_bloblogs(
    bloblog_pile: SqliteBloblogPile,
    pbar: ProgressBar,
    actually_run: bool,
    thresholds: CompactionThresholds,
 ) -> anyhow::Result<()> {
    info!("=== Analysing for compaction ===");
    let analysis = bloblog_pile.analyse_for_compaction()?;
    let chunks_total: u64 = analysis.values().map(|bs| bs.chunks_total).sum();
    let chunks_deleted: u64 = analysis.values().map(|bs| bs.chunks_deleted).sum();
    let bytes_total: u64 = analysis.values().map(|bs| bs.bytes_total).sum();
    let bytes_deleted: u64 = analysis.values().map(|bs| bs.bytes_deleted).sum();
    info!("{} bloblogs in this pile, with {chunks_total} chunks ({bytes_total} B) of which {chunks_deleted} ({bytes_deleted} B) are deleted.", analysis.len());
    info!("=== Planning compaction ===");
    let plan = bloblog_pile.plan_compaction(&thresholds, analysis)?;
    info!("Planned compaction: replace {} bloblogs (of which {} are small), freeing up {} B and rewriting {} B", plan.bloblogs_to_replace.len(), plan.small_bloblogs, plan.reclaimable_space, plan.bytes_to_write);
    if actually_run {
        info!("=== Compacting ===");
        bloblog_pile.perform_compaction(Box::new(pbar), plan)?;
    }
    Ok(())
 }
--- a/yama/src/operations/legacy_pushpull.rs
+++ b/yama/src/operations/legacy_pushpull.rs
@ -2,7 +2,7 @@ use crate::chunking::RecursiveUnchunker;
 use crate::commands::fully_load_pointer;
 use crate::definitions::{ChunkId, RecursiveChunkRef, TreeNode};
 use crate::operations::checking::VacuumRawPile;
-use crate::operations::pushpull::PushWorkerToManagerMessage::{NewTask, TaskDone};
+use crate::operations::legacy_pushpull::PushWorkerToManagerMessage::{NewTask, TaskDone};
 use crate::pile::compression::{CompressionSettings, RawPileCompressor};
 use crate::pile::integrity::RawPileIntegrityChecker;
 use crate::pile::local_sqlitebloblogs::SqliteBloblogPile;
--- a/yama/src/operations/storing.rs
+++ b/yama/src/operations/storing.rs
@ -323,7 +323,12 @@ pub fn store_without_pointer_ops<PT: ProgressTracker>(
    let chunk_ref = commands::store_tree_node(
        &pile,
        &RootTreeNode {
-            name: root_dir.file_name().unwrap().to_str().unwrap().to_owned(),
+            name: root_dir
                .file_name()
                .map(|s| s.to_str())
                .flatten()
                .unwrap_or("_root")
                .to_owned(),
            node: root_node,
        },
    )?;
--- a/yama/src/pile.rs
+++ b/yama/src/pile.rs
@ -126,7 +126,7 @@ pub enum ControllerMessage {
    },
 }
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
 pub enum PipelineDescription {
    Store,
    Remote,
@ -141,6 +141,7 @@ pub trait RawPile: Send + Sync + Debug + 'static {
    fn read(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<Option<Vec<u8>>>;
    fn write(&self, kind: Keyspace, key: &[u8], value: &[u8]) -> anyhow::Result<()>;
    fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()>;
    fn delete_many(&self, kind: Keyspace, key: &[&[u8]]) -> anyhow::Result<()>;
    fn list_keys(
        &self,
        kind: Keyspace,
@ -167,6 +168,10 @@ pub trait RawPile: Send + Sync + Debug + 'static {
    ) -> anyhow::Result<Sender<(ChunkId, Vec<u8>)>>;
    fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>>;
    /// Return a u64 order token that indicates the optimum order to read this chunk in
    /// compared to other chunks.
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64>;
 }
 impl RawPile for Box<dyn RawPile> {
@ -182,6 +187,9 @@ impl RawPile for Box<dyn RawPile> {
    fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
        self.as_ref().delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.as_ref().delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -210,6 +218,10 @@ impl RawPile for Box<dyn RawPile> {
    fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
        self.as_ref().describe_pipeline()
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
 impl<RP: RawPile> RawPile for Arc<RP> {
@ -225,6 +237,9 @@ impl<RP: RawPile> RawPile for Arc<RP> {
    fn delete(&self, kind: Keyspace, key: &[u8]) -> anyhow::Result<()> {
        self.as_ref().delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.as_ref().delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -253,6 +268,10 @@ impl<RP: RawPile> RawPile for Arc<RP> {
    fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
        self.as_ref().describe_pipeline()
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.as_ref().chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
 #[derive(Debug)]
--- a/yama/src/pile/access_guard.rs
+++ b/yama/src/pile/access_guard.rs
@ -83,6 +83,10 @@ impl<R: Clone + RawPile> RawPile for PileGuard<R> {
        bail!("Access denied");
    }
    fn delete_many(&self, _kind: Keyspace, _keys: &[&[u8]]) -> anyhow::Result<()> {
        bail!("Access denied");
    }
    fn list_keys(
        &self,
        _kind: Keyspace,
@ -130,4 +134,8 @@ impl<R: Clone + RawPile> RawPile for PileGuard<R> {
        // TODO(question) Should we be described in the pipeline?
        self.underlying.describe_pipeline()
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
--- a/yama/src/pile/compression.rs
+++ b/yama/src/pile/compression.rs
@ -278,6 +278,10 @@ impl<R: RawPile> RawPile for RawPileCompressor<R> {
        self.underlying.delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.underlying.delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -348,4 +352,8 @@ impl<R: RawPile> RawPile for RawPileCompressor<R> {
        });
        Ok(underlying)
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
--- a/yama/src/pile/encryption.rs
+++ b/yama/src/pile/encryption.rs
@ -101,6 +101,10 @@ impl<R: RawPile> RawPile for RawPileEncryptor<R> {
        self.underlying.delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.underlying.delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -127,4 +131,8 @@ impl<R: RawPile> RawPile for RawPileEncryptor<R> {
        underlying.push(PipelineDescription::Encryption);
        Ok(underlying)
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
--- a/yama/src/pile/integrity.rs
+++ b/yama/src/pile/integrity.rs
@ -98,6 +98,10 @@ impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
        self.underlying.delete(kind, key)
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        self.underlying.delete_many(kind, keys)
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -149,4 +153,8 @@ impl<RP: RawPile> RawPile for RawPileIntegrityChecker<RP> {
        underlying.push(PipelineDescription::Integrity);
        Ok(underlying)
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        self.underlying.chunk_id_transfer_ordering_hint(chunk_id)
    }
 }
--- a/yama/src/pile/local_sqlitebloblogs.rs
+++ b/yama/src/pile/local_sqlitebloblogs.rs
@ -15,20 +15,23 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
-use std::collections::hash_map::Entry;
+use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
 use std::collections::{HashMap, VecDeque};
 use std::convert::{TryFrom, TryInto};
-use std::fs::{read_dir, File, OpenOptions};
+use std::fs::{read_dir, remove_file, File, OpenOptions};
 use std::io::{Read, Seek, SeekFrom, Write};
 use std::os::unix::fs::MetadataExt;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Condvar, Mutex};
 use std::time::Duration;
 use std::{fs, thread};
-use anyhow::{bail, Context};
+use anyhow::{bail, ensure, Context};
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use crossbeam_channel::{Receiver, Sender};
 use log::{info, warn};
 use nix::unistd::sync;
-use rusqlite::{params, Error, ErrorCode};
+use rusqlite::ffi::ErrorCode::ConstraintViolation;
 use rusqlite::{params, Error, ErrorCode, Transaction, TransactionBehavior, NO_PARAMS};
 use rusqlite::{Connection, OptionalExtension};
 use crate::definitions::ChunkId;
@ -36,10 +39,8 @@ use crate::pile::{
    ControllerMessage, DebugStatistics, Keyspace, PipelineDescription, RawPile,
    StoragePipelineSettings,
 };
-use crate::utils::bytes_to_hexstring;
+use crate::progress::ProgressTracker;
-use crossbeam_channel::{Receiver, Sender};
+use crate::utils::{bytes_to_hexstring, LruMap};
 use rusqlite::ffi::ErrorCode::ConstraintViolation;
 use std::time::Duration;
 /// Bloblogs will not be reused if they are already 2 GiB large.
 pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
@ -47,6 +48,14 @@ pub const MAX_BLOBLOG_REUSE_SIZE: u64 = 2 * 1024 * 1024 * 1024;
 /// This many pointers will be batched up for writing.
 pub const POINTER_WRITE_BATCHES: usize = 2048;
 /// This many bloblogs will be kept open for reading, at maximum.
 pub const BLOBLOG_MAX_READING_FILE_COUNT: usize = 128;
 /// Size of a blob header within a bloblog.
 /// 32 byte Chunk Id
 /// 4 byte (u32) Blob size
 pub const BLOB_HEADER_SIZE: u64 = 32 + 4;
 /// A file storing a log of blobs.
 /// Format:
 ///     Repeated:
@ -136,8 +145,8 @@ pub type BloblogId = u32;
 #[derive(Debug)]
 pub struct Inner {
    next_bloblog_id: BloblogId,
-    writer_bloblogs: Vec<BloblogId>,
+    writer_bloblogs: Vec<(BloblogId, Arc<Mutex<Bloblog>>)>,
-    open_bloblogs: HashMap<BloblogId, Arc<Mutex<Bloblog>>>, // TODO want an LRU cache with a weak hashmap...?
+    reader_bloblogs: LruMap<BloblogId, Arc<Mutex<Bloblog>>>,
    connection: Connection,
    writers_in_progress: u16,
    // We batch up pointer writes because sync() performance really hurts us if we do them one by
@ -145,14 +154,13 @@ pub struct Inner {
    queued_pointer_writes: HashMap<ChunkId, BloblogPointer>,
 }
-impl Inner {
+fn raw_put_chunk_pointer_txn(
-    pub fn raw_put_chunk_pointer(
+    txn: &Transaction,
        &self,
    chunk_id: &ChunkId,
    bloblog: BloblogId,
    offset_i64: i64,
 ) -> anyhow::Result<()> {
-        match self.connection.execute(
+    match txn.execute(
        "INSERT INTO chunks (chunk_id, bloblog, offset) VALUES (?1, ?2, ?3)",
        params![&chunk_id[..], bloblog, offset_i64],
    ) {
@ -165,27 +173,41 @@ impl Inner {
                );
                Ok(())
            } else {
-                    Err(Error::SqliteFailure(e, str))?;
+                Err(Error::SqliteFailure(e, str).into())
                    unreachable!();
            }
        }
-            other => {
+        Err(other) => Err(other.into()),
                other?;
                unreachable!();
    }
 }
 impl Inner {
    pub fn raw_put_chunk_pointer(
        &mut self,
        chunk_id: &ChunkId,
        bloblog: BloblogId,
        offset_i64: i64,
    ) -> anyhow::Result<()> {
        let txn = self.connection.transaction()?;
        raw_put_chunk_pointer_txn(&txn, chunk_id, bloblog, offset_i64)?;
        txn.commit()?;
        Ok(())
    }
    pub fn flush(&mut self) -> anyhow::Result<()> {
        // Create a non-allocated hashmap to satisfy borrow checker, then swap it in and out
        let mut queued_pointer_writes = HashMap::with_capacity(0);
        std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
        let txn = self.connection.transaction()?;
        for (chunk_id, pointer) in queued_pointer_writes.drain() {
            let offset_i64 =
                i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
-            self.raw_put_chunk_pointer(&chunk_id, pointer.bloblog, offset_i64)?;
+            raw_put_chunk_pointer_txn(&txn, &chunk_id, pointer.bloblog, offset_i64)?;
        }
        std::mem::swap(&mut self.queued_pointer_writes, &mut queued_pointer_writes);
        txn.commit()?;
        Ok(())
    }
 }
@ -247,11 +269,18 @@ impl SqliteBloblogPile {
            )?;
        }
        // Enable WAL mode for significantly better write performance.
        connection.execute_batch(
            "
            PRAGMA journal_mode=WAL;
        ",
        )?;
        Ok(SqliteBloblogPile {
            inner: Arc::new(Mutex::new(Inner {
                next_bloblog_id: 0,
                writer_bloblogs: Vec::new(),
-                open_bloblogs: HashMap::new(),
+                reader_bloblogs: LruMap::new(BLOBLOG_MAX_READING_FILE_COUNT),
                connection,
                writers_in_progress: 0,
                queued_pointer_writes: Default::default(),
@ -264,23 +293,33 @@ impl SqliteBloblogPile {
    fn open_bloblog(&self, bloblog_id: BloblogId) -> anyhow::Result<Arc<Mutex<Bloblog>>> {
        let mut inner = self.inner.lock().unwrap();
-        Ok(match inner.open_bloblogs.entry(bloblog_id) {
+
-            Entry::Occupied(entry) => entry.get().clone(),
+        match inner.reader_bloblogs.get(&bloblog_id) {
-            Entry::Vacant(entry) => {
+            Some(bloblog) => Ok(bloblog.clone()),
            None => {
                let bloblog = Arc::new(Mutex::new(Bloblog::open(
                    &self.path.join(&bloblog_id.to_string()),
                )?));
-                entry.insert(bloblog.clone());
+                inner.reader_bloblogs.insert(bloblog_id, bloblog.clone());
-                bloblog
+                Ok(bloblog)
            }
        }
        })
    }
    fn get_writing_bloblog(&self) -> anyhow::Result<(BloblogId, Arc<Mutex<Bloblog>>)> {
        let mut inner = self.inner.lock().unwrap();
-        let writing_bloblog_id: BloblogId = match inner.writer_bloblogs.pop() {
+
-            None => {
+        inner.writers_in_progress += 1;
-                loop {
+
        if let Some(writing_bloblog) = inner.writer_bloblogs.pop() {
            // We already have an open bloblog to give back.
            return Ok(writing_bloblog);
        }
        // No open bloblogs to reuse; create a new one.
        // It's very important to create a fresh one here; we definitely don't want to use a file
        // that someone else is using!
        let writing_bloblog_id = loop {
            let pre_inc = inner.next_bloblog_id;
            inner.next_bloblog_id += 1;
@ -289,26 +328,18 @@ impl SqliteBloblogPile {
            if !bloblog_path.exists() {
                break pre_inc;
            }
                }
            }
            Some(id) => id,
        };
        let result = Ok((
            writing_bloblog_id,
            match inner.open_bloblogs.entry(writing_bloblog_id) {
                Entry::Occupied(entry) => entry.get().clone(),
                Entry::Vacant(entry) => {
        let bloblog = Arc::new(Mutex::new(Bloblog::open(
            &self.path.join(&writing_bloblog_id.to_string()),
        )?));
-                    entry.insert(bloblog.clone());
+
-                    bloblog
+        // MAYBE FUTURE // Insert a weak reference so we can easily get a reader for this if desired.
-                }
+        // inner.open_bloblogs.insert(writing_bloblog_id, Arc::downgrade(&bloblog));
-            },
+        // For now, I don't think we actually care about reading a bloblog that we've written
-        ));
+        // (at least not usually?)
-        inner.writers_in_progress += 1;
+
-        result
+        Ok((writing_bloblog_id, bloblog))
    }
    /// Should be called once the bloblog has been finished writing to for the moment.
@ -321,7 +352,7 @@ impl SqliteBloblogPile {
        let size = bloblog.lock().unwrap().filesize()?;
        let mut inner = self.inner.lock().unwrap();
        if size < MAX_BLOBLOG_REUSE_SIZE {
-            inner.writer_bloblogs.push(id);
+            inner.writer_bloblogs.push((id, bloblog));
        }
        inner.writers_in_progress -= 1;
        if inner.writers_in_progress == 0 {
@ -347,8 +378,33 @@ impl SqliteBloblogPile {
            .optional()?)
    }
    fn get_chunk_pointers(
        &self,
        chunk_ids: &[&[u8]],
    ) -> anyhow::Result<Vec<Option<BloblogPointer>>> {
        let mut inner = self.inner.lock().unwrap();
        let txn = inner.connection.transaction()?;
        let mut result = Vec::with_capacity(chunk_ids.len());
        {
            let mut stmt = txn.prepare("SELECT bloblog, offset FROM chunks WHERE chunk_id = ?1")?;
            for &chunk_id in chunk_ids {
                let bloglog_pointer: Option<BloblogPointer> = stmt
                    .query_row(params![chunk_id], |row| {
                        Ok(BloblogPointer {
                            bloblog: row.get(0)?,
                            offset: row.get::<_, i64>(1)? as u64,
                        })
                    })
                    .optional()?;
                result.push(bloglog_pointer);
            }
        }
        txn.commit()?;
        Ok(result)
    }
    fn put_chunk_pointer(&self, chunk_id: &ChunkId, pointer: BloblogPointer) -> anyhow::Result<()> {
-        let inner = self.inner.lock().unwrap();
+        let mut inner = self.inner.lock().unwrap();
        let offset_i64 = i64::try_from(pointer.offset).expect("ouch! can't turn u64 into i64...");
        inner.raw_put_chunk_pointer(chunk_id, pointer.bloblog, offset_i64)
    }
@ -472,6 +528,341 @@ impl SqliteBloblogPile {
        assert!(pointers_buffered.is_empty());
        Ok(())
    }
    /// Look at the bloblogs in this pile and see where space may be reclaimable if we were to
    /// compact.
    ///
    /// Next step: plan_compaction
    pub fn analyse_for_compaction(&self) -> anyhow::Result<BTreeMap<BloblogId, BloblogStats>> {
        let mut inner = self.inner.lock().unwrap();
        // Lock the database right away.
        let txn = inner
            .connection
            .transaction_with_behavior(TransactionBehavior::Exclusive)?;
        let mut stmt = txn.prepare(
            "
            SELECT bloblog, COUNT(c.offset), COUNT(d.offset), SUM(COALESCE(d.size, 0))
            FROM chunks c LEFT JOIN deleted d USING (bloblog, offset)
            GROUP BY bloblog
        ",
        )?;
        struct UnpopulatedBloblogStats {
            pub bloblog_id: BloblogId,
            pub chunks_total: u64,
            pub chunks_deleted: u64,
            pub bytes_deleted: u64,
        }
        let unpopul_bloblog_stats = stmt.query_map(NO_PARAMS, |row| {
            Ok(UnpopulatedBloblogStats {
                bloblog_id: row.get(0)?,
                chunks_total: row.get::<_, i64>(1)?.try_into().expect("i64 -> u64"),
                chunks_deleted: row.get::<_, i64>(2)?.try_into().expect("i64 -> u64"),
                bytes_deleted: row.get::<_, i64>(3)?.try_into().expect("i64 -> u64"),
            })
        })?;
        let mut final_stats = BTreeMap::new();
        for unpopul_stat in unpopul_bloblog_stats {
            let UnpopulatedBloblogStats {
                bloblog_id,
                chunks_total,
                chunks_deleted,
                bytes_deleted,
            } = unpopul_stat?;
            let bloblog_path = self.path.join(&bloblog_id.to_string());
            let bytes_total = std::fs::metadata(&bloblog_path)
                .with_context(|| format!("Failed to get metadata for bloblog: {:?}", bloblog_path))?
                .size();
            final_stats.insert(
                bloblog_id,
                BloblogStats {
                    chunks_total,
                    chunks_deleted,
                    bytes_total,
                    // Add a slight correction since we can count the blob headers of deleted blobs
                    // as deleted.
                    bytes_deleted: bytes_deleted + chunks_deleted * BLOB_HEADER_SIZE,
                },
            );
        }
        Ok(final_stats)
    }
    /// Look at the analysis of compaction and, using the specified thresholds, come up with a plan
    /// to perform compaction.
    ///
    /// May return an empty plan if compaction isn't worthwhile.
    ///
    /// Previous step: analyse_for_compaction
    /// Next step: perform_compaction
    pub fn plan_compaction(
        &self,
        thresholds: &CompactionThresholds,
        analysis: BTreeMap<BloblogId, BloblogStats>,
    ) -> anyhow::Result<CompactionPlan> {
        let bloblogs_to_replace: BTreeMap<BloblogId, BloblogStats> = analysis
            .into_iter()
            .filter(|(_id, stats)| thresholds.should_replace_bloblog(stats))
            .collect();
        let reclaimable_space: u64 = bloblogs_to_replace
            .values()
            .map(|bs| bs.bytes_deleted)
            .sum();
        let bytes_to_write: u64 = bloblogs_to_replace
            .values()
            .map(|bs| bs.bytes_total - bs.bytes_deleted)
            .sum();
        let small_bloblogs: u32 = bloblogs_to_replace
            .values()
            .filter(|bs| bs.bytes_total - bs.bytes_deleted < thresholds.cond_if_less_allocated_than)
            .count() as u32;
        if reclaimable_space < thresholds.minimum_to_reclaim
            && small_bloblogs < thresholds.minimum_small_bloblogs_to_merge
        {
            // Nothing worth doing: return an empty plan.
            return Ok(CompactionPlan {
                bloblogs_to_replace: Default::default(),
                bytes_to_write: 0,
                reclaimable_space: 0,
                small_bloblogs: 0,
            });
        }
        Ok(CompactionPlan {
            bloblogs_to_replace: bloblogs_to_replace.keys().copied().collect(),
            bytes_to_write,
            reclaimable_space,
            small_bloblogs,
        })
    }
    /// Given a compaction plan, perform the compaction.
    /// There shouldn't be any decisions left to be made at this point: just action.
    ///
    /// TODO flock the bloblogs to be removed and make readers and writers also flock them too.
    ///
    /// TODO find a way to deal with bloblogs that are entirely unreferenced from the index
    ///      (e.g. bloblogs that weren't written properly, e.g. if compaction fails.)
    pub fn perform_compaction(
        &self,
        mut progress: Box<dyn ProgressTracker>,
        plan: CompactionPlan,
    ) -> anyhow::Result<()> {
        #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
        struct ReplacedBlobRow {
            pub old_bloblog: BloblogId,
            pub old_offset: u64,
            pub chunk_id: ChunkId,
        }
        if plan.bloblogs_to_replace.is_empty() {
            info!("No compaction to be done.");
            return Ok(());
        }
        let mut to_preserve = BTreeSet::new();
        let mut replacements = BTreeMap::new();
        progress.set_max_size(plan.bytes_to_write);
        // First find all the blobs we need to replace.
        {
            let mut inner = self.inner.lock().unwrap();
            // Lock the database right away.
            let txn = inner
                .connection
                .transaction_with_behavior(TransactionBehavior::Exclusive)?;
            let mut stmt = txn.prepare(
                "
                SELECT chunk_id, c.offset
                FROM chunks c LEFT JOIN deleted d USING (bloblog, offset)
                WHERE bloblog = ?1 AND d.offset IS NULL
            ",
            )?;
            for bloblog in plan.bloblogs_to_replace.iter().copied() {
                to_preserve.extend(
                    stmt.query_map([bloblog], |row| {
                        let mut chunk_id = ChunkId::default();
                        chunk_id.copy_from_slice(row.get::<_, Vec<u8>>(0).unwrap().as_slice());
                        Ok(ReplacedBlobRow {
                            old_bloblog: bloblog,
                            chunk_id,
                            old_offset: row.get::<_, i64>(1).unwrap().try_into().unwrap(),
                        })
                    })?
                    .collect::<Result<Vec<ReplacedBlobRow>, _>>()?,
                );
            }
        }
        // Then make the replacements
        info!("Rewriting bloblogs...");
        let mut buf = Vec::new();
        let mut iterator = to_preserve.into_iter();
        loop {
            let (new_bloblog_id, bloglog_mutex) = self.get_writing_bloblog()?;
            let mut new_bloblog = bloglog_mutex.lock().expect("Failed to lock bloblog?");
            let mut is_more = false;
            while let Some(preserve) = iterator.next() {
                is_more = true;
                // Get hold of the old bloblog
                let old_bloblog = self.open_bloblog(preserve.old_bloblog)?;
                let mut old_bloblog = old_bloblog.lock().unwrap();
                // Transfer the blob
                buf.clear();
                old_bloblog.read_blob(preserve.old_offset, &preserve.chunk_id, &mut buf)?;
                let new_offset = new_bloblog.write_blob(&preserve.chunk_id, &buf)?;
                // Make a note of the replacement
                replacements.insert(
                    preserve,
                    BloblogPointer {
                        bloblog: new_bloblog_id,
                        offset: new_offset,
                    },
                );
                progress.inc_progress(buf.len() as u64);
                if new_bloblog.filesize()? > MAX_BLOBLOG_REUSE_SIZE {
                    // get a new bloblog to write with.
                    break;
                }
            }
            drop(new_bloblog);
            self.return_writing_bloblog(new_bloblog_id, bloglog_mutex)?;
            if !is_more {
                break;
            }
        }
        info!("Applying replacements...");
        {
            let mut inner = self.inner.lock().unwrap();
            // Lock the database right away.
            let txn = inner
                .connection
                .transaction_with_behavior(TransactionBehavior::Exclusive)?;
            let mut stmt = txn.prepare(
                "
                UPDATE chunks
                SET bloblog = ?1, offset = ?2
                WHERE chunk_id = ?3
            ",
            )?;
            for (replacement_row, new_pos) in replacements {
                ensure!(
                    stmt.execute(params![
                        new_pos.bloblog,
                        new_pos.offset as i64,
                        &replacement_row.chunk_id as &[u8]
                    ])? == 1,
                    "Wrong number of rows updated for replacement!"
                );
            }
            drop(stmt);
            txn.commit().context("committing replacements")?;
        }
        // TODO fsync new bloblogs
        info!("Deleting old bloblogs...");
        {
            let mut inner = self.inner.lock().unwrap();
            // Lock the database right away.
            let txn = inner
                .connection
                .transaction_with_behavior(TransactionBehavior::Exclusive)?;
            for bloblog_id in plan.bloblogs_to_replace.iter().copied() {
                let deleted_chunks = txn.execute(
                    "
                    DELETE FROM chunks WHERE bloblog = ?1
                ",
                    params![bloblog_id],
                )?;
                let deleted_deleted = txn.execute(
                    "
                    DELETE FROM deleted WHERE bloblog = ?1
                ",
                    params![bloblog_id],
                )?;
                ensure!(deleted_chunks == deleted_deleted, "Undeleted chunks left in bloblog {bloblog_id}: CHUNKS={deleted_chunks} DELETED={deleted_deleted}");
                let bloblog_path = self.path.join(bloblog_id.to_string());
                remove_file(&bloblog_path).with_context(|| {
                    format!("Failed to remove obsolete bloblog: {:?}", bloblog_path)
                })?;
            }
            txn.commit()?;
        }
        Ok(())
    }
 }
 pub struct BloblogStats {
    pub chunks_total: u64,
    pub chunks_deleted: u64,
    pub bytes_total: u64,
    pub bytes_deleted: u64,
 }
 pub struct CompactionPlan {
    pub bloblogs_to_replace: BTreeSet<BloblogId>,
    pub bytes_to_write: u64,
    pub reclaimable_space: u64,
    pub small_bloblogs: u32,
 }
 pub struct CompactionThresholds {
    /// Minimum bytes to be reclaimable overall for compaction to be worthwhile.
    pub minimum_to_reclaim: u64,
    /// (alternative reason) Minimum number of files to be undersized in order for compaction
    /// to be worthwhile.
    /// This gives us a way to make compaction run if we have lots of tiny bloblogs.
    pub minimum_small_bloblogs_to_merge: u32,
    /// A bloblog will be replaced if the deallocated size is greater than this.
    pub cond_if_more_deallocated_than: u64,
    /// A bloblog will be replaced if the allocated size is less than this.
    pub cond_if_less_allocated_than: u64,
 }
 impl CompactionThresholds {
    pub fn should_replace_bloblog(&self, bloblog_stats: &BloblogStats) -> bool {
        let allocated = bloblog_stats.bytes_total - bloblog_stats.bytes_deleted;
        // Note that this will also trigger for fully-deallocated files if
        let is_small = allocated < self.cond_if_less_allocated_than;
        let has_large_deallocations =
            bloblog_stats.bytes_deleted > self.cond_if_more_deallocated_than;
        is_small || has_large_deallocations
    }
 }
 pub struct CompactionOutcome {
    pub bloblogs_deleted: u32,
    pub bloblogs_created: u32,
    pub bytes_deleted: u32,
    pub bytes_created: u32,
 }
 impl Drop for SqliteBloblogPile {
@ -616,6 +1007,59 @@ impl RawPile for SqliteBloblogPile {
            }
        }
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        match kind {
            Keyspace::Chunk => {
                let mut chunk_pointers_by_bloblog: BTreeMap<BloblogId, Vec<(u64, &[u8])>> =
                    BTreeMap::new();
                for (chunk_pointer, chunk_id) in self
                    .get_chunk_pointers(keys)
                    .context("failed to get chunk pointers")?
                    .into_iter()
                    .zip(keys)
                    .filter_map(|(pointer, &chunk_id)| match pointer {
                        Some(pointer) => Some((pointer, chunk_id)),
                        None => None,
                    })
                {
                    chunk_pointers_by_bloblog
                        .entry(chunk_pointer.bloblog)
                        .or_default()
                        .push((chunk_pointer.offset, chunk_id));
                }
                let mut inner = self.inner.lock().unwrap();
                let txn = inner.connection.transaction()?;
                {
                    let mut stmt = txn.prepare(
                        "INSERT OR IGNORE INTO deleted (bloblog, offset, size)
                            VALUES (?1, ?2, ?3)",
                    )?;
                    for (bloblog_id, entries) in chunk_pointers_by_bloblog {
                        let bloblog_mutex = self.open_bloblog(bloblog_id)?;
                        let mut bloblog = bloblog_mutex.lock().unwrap();
                        for (chunk_offset, raw_chunk_id) in entries {
                            let mut chunk_id: ChunkId = Default::default();
                            chunk_id.copy_from_slice(raw_chunk_id);
                            let size = bloblog.blob_len(chunk_offset, &chunk_id)?;
                            let offset_i64 = i64::try_from(chunk_offset)
                                .expect("ouch! can't turn u64 into i64...");
                            stmt.execute(params![bloblog_id, offset_i64, size])?;
                        }
                    }
                }
                txn.commit().context("Failed to commit chunk deletions")?;
            }
            _ => {
                for &key in keys {
                    self.delete(kind, key)?;
                }
            }
        }
        Ok(())
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -730,6 +1174,16 @@ impl RawPile for SqliteBloblogPile {
    fn describe_pipeline(&self) -> anyhow::Result<Vec<PipelineDescription>> {
        Ok(vec![PipelineDescription::Store])
    }
    fn chunk_id_transfer_ordering_hint(&self, chunk_id: &ChunkId) -> anyhow::Result<u64> {
        let chunk_pointer = self
            .get_chunk_pointer(chunk_id)?
            .context("Can't get chunk ID transfer ordering hint for chunk without pointer.")?;
        // Scheme: 24-bit bloblog ID
        // followed by 40-bit offset
        Ok(((chunk_pointer.bloblog as u64) << 40) | (chunk_pointer.offset & 0xFF_FF_FF_FF_FF))
    }
 }
 struct KeyIterator {
@ -775,9 +1229,10 @@ impl Iterator for KeyIterator {
 #[cfg(test)]
 mod tests {
    use crate::pile::local_sqlitebloblogs::Bloblog;
    use temp_dir::TempDir;
    use crate::pile::local_sqlitebloblogs::Bloblog;
    #[test]
    pub fn bloblog_read_write_test() {
        let td = TempDir::new().unwrap();
--- a/yama/src/remote/requester.rs
+++ b/yama/src/remote/requester.rs
@ -4,7 +4,7 @@ use std::sync::{Arc, Mutex};
 use std::thread;
 use std::thread::JoinHandle;
-use anyhow::anyhow;
+use anyhow::{anyhow, bail};
 use crossbeam_channel::{Receiver, Sender};
 use log::{error, info};
@ -307,6 +307,12 @@ impl RawPile for Requester {
            other => Err(anyhow!("Received {:?} for Delete", other)),
        }
    }
    fn delete_many(&self, kind: Keyspace, keys: &[&[u8]]) -> anyhow::Result<()> {
        for &key in keys {
            self.delete(kind, key)?;
        }
        Ok(())
    }
    fn list_keys(
        &self,
        kind: Keyspace,
@ -438,6 +444,10 @@ impl RawPile for Requester {
            other => Err(anyhow!("Received {:?} for Describe", other)),
        }
    }
    fn chunk_id_transfer_ordering_hint(&self, _chunk_id: &ChunkId) -> anyhow::Result<u64> {
        bail!("You probably shouldn't be using chunk ID transfer ordering hints with a remote.");
    }
 }
 pub struct ListKeyIterator {
--- a/yama/src/tree.rs
+++ b/yama/src/tree.rs
@ -185,7 +185,7 @@ pub fn differentiate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow
 /// result is in-place.
 ///
 /// Preconditions:
-/// - `old` must be an integrated pointer.
+/// - `old` must be an integrated pointer. (Otherwise this algorithm is not correct.)
 /// - `old` is the parent of `new`
 pub fn integrate_node_in_place(new: &mut TreeNode, old: &TreeNode) -> anyhow::Result<()> {
    if let TreeNode::Directory { children, .. } = new {
--- a/yama/src/utils.rs
+++ b/yama/src/utils.rs
@ -15,6 +15,7 @@ You should have received a copy of the GNU General Public License
 along with Yama.  If not, see <https://www.gnu.org/licenses/>.
 */
 use std::collections::{BTreeMap, BTreeSet};
 use std::fmt::Write;
 pub fn bytes_to_hexstring(chunkid: &[u8]) -> String {
@ -42,3 +43,98 @@ pub fn get_number_of_workers(first_try_env_name: &str) -> u8 {
        }
    }
 }
 #[derive(Clone, Debug)]
 pub struct LruMap<K, V> {
    capacity: usize,
    last_access: BTreeSet<(u64, K)>,
    items: BTreeMap<K, (V, u64)>,
    counter: u64,
 }
 impl<K: Ord + Clone, V> LruMap<K, V> {
    pub fn new(capacity: usize) -> LruMap<K, V> {
        LruMap {
            capacity,
            last_access: BTreeSet::new(),
            items: BTreeMap::new(),
            counter: 0,
        }
    }
    /// Gets an item from the LRU map.
    pub fn get(&mut self, key: &K) -> Option<&V> {
        match self.items.get_mut(key) {
            Some((value, last_used_instant)) => {
                assert!(
                    self.last_access.remove(&(*last_used_instant, key.clone())),
                    "Corrupt LRU map: freshen not correct."
                );
                let new_instant = self.counter;
                self.counter += 1;
                self.last_access.insert((new_instant, key.clone()));
                *last_used_instant = new_instant;
                Some(value)
            }
            None => None,
        }
    }
    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
        let new_instant = self.counter;
        self.counter += 1;
        let retval = match self.items.insert(key.clone(), (value, new_instant)) {
            Some((old_entry, old_instant)) => {
                assert!(
                    self.last_access.remove(&(old_instant, key.clone())),
                    "Corrupt LRU map: insert not correct."
                );
                Some(old_entry)
            }
            None => None,
        };
        self.last_access.insert((new_instant, key));
        if retval.is_none() {
            // We didn't replace any item, so we have grown by 1.
            // Check if we need to evict.
            if self.items.len() > self.capacity {
                self.evict();
            }
        }
        retval
    }
    pub fn evict(&mut self) -> Option<(K, V)> {
        if let Some(first_entry) = self.last_access.iter().next().cloned() {
            self.last_access.remove(&first_entry);
            let (_, key) = first_entry;
            let (value, _) = self
                .items
                .remove(&key)
                .expect("Corrupt LRU map: last access and items out of sync");
            Some((key, value))
        } else {
            None
        }
    }
 }
 #[cfg(test)]
 mod test {
    use crate::utils::LruMap;
    #[test]
    fn test_lru_map() {
        let mut lmap = LruMap::new(3);
        lmap.insert(1, 1);
        lmap.insert(2, 1);
        lmap.insert(3, 1);
        assert_eq!(lmap.get(&1), Some(&1));
        lmap.insert(4, 1);
        assert_eq!(lmap.get(&2), None);
    }
 }
Author	SHA1	Message	Date
Olivier 'reivilibre'	565c99cf8c	Update flake and fix it	2024-05-08 20:41:28 +01:00
Olivier 'reivilibre'	b57dbad890	Simplify flake lock ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2023-04-01 16:57:04 +01:00
Olivier 'reivilibre'	9001177143	Batch up chunk deletions in an attempt to make vacuuming more performant ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-28 21:03:07 +00:00
Olivier 'reivilibre'	c9d64b2962	Make sure to flush + add some error contexts ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-21 21:23:38 +00:00
Olivier 'reivilibre'	50ff9bb36a	Fix including trailing empty line as pointer name ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-20 22:11:05 +00:00
Olivier 'reivilibre'	7e41408815	Add test for incremental backup with mid delete ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details Just for validation that delete does the right thing	2022-11-20 20:58:45 +00:00
Olivier 'reivilibre'	4072c5ae82	Fix parent not being integrated before being used to differentiate whilst removing a pointer ci/woodpecker/push/build Pipeline is pending Details ci/woodpecker/push/release Pipeline is pending Details	2022-11-20 20:42:26 +00:00
Olivier 'reivilibre'	d3fe111a06	Replace debug rmp with new implementation ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-20 19:44:21 +00:00
Olivier 'reivilibre'	6e1e173cb6	Implement datman prune	2022-11-20 19:43:20 +00:00
Olivier 'reivilibre'	fcc79ca95d	Hopefully fix descriptors to compare in test ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-20 10:02:27 +00:00
Olivier 'reivilibre'	c1de1341ef	Tweak wording	2022-11-20 10:02:13 +00:00
Olivier 'reivilibre'	e85c606c95	Make a no-op compaction really a no-op compaction ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-20 08:57:22 +00:00
Olivier 'reivilibre'	34c619ef41	Fix compact thresholds in tests to demonstrate what we need ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-19 17:43:27 +00:00
Olivier 'reivilibre'	b9dce3ddfc	rustfmt	2022-11-19 17:42:09 +00:00
Olivier 'reivilibre'	52202874f2	Update images to remove deprecated ones ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-19 16:47:25 +00:00
Olivier 'reivilibre'	69656131af	Fix linter ci/woodpecker/push/build Pipeline failed Details ci/woodpecker/push/release Pipeline was successful Details	2022-11-19 16:35:36 +00:00
Olivier 'reivilibre'	cc93997230	Linting	2022-11-19 16:35:33 +00:00
Olivier 'reivilibre'	41248fe396	Add tests for yama compact	2022-11-19 16:35:24 +00:00
Olivier 'reivilibre'	e7eb9ef288	Update nix shell to have python	2022-11-19 16:33:14 +00:00
Olivier 'reivilibre'	b5e9e55cad	Add yama compact command	2022-11-19 15:49:09 +00:00
Olivier 'reivilibre'	cf502b7f7e	rustfmt	2022-11-19 15:28:36 +00:00
Olivier 'reivilibre'	58c5c3f039	Add compaction logic	2022-11-19 15:27:41 +00:00
Olivier 'reivilibre'	30b261d172	Add Nix shell for Rust devel	2022-11-19 13:13:19 +00:00
Olivier 'reivilibre'	0811c11c48	Add ability to extract subset of files from yama ci/woodpecker/push/build Pipeline is pending Details ci/woodpecker/push/release Pipeline is pending Details	2022-10-04 20:21:23 +01:00
Olivier 'reivilibre'	aa2722607e	Skip directories with .datmanskip files ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details ci/woodpecker/tag/build Pipeline was successful Details ci/woodpecker/tag/release Pipeline was successful Details	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	8612804298	Do short exclusions for remote backups (also bump version as protocol version incompatible)	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	080875bfce	Add debug log for not descending	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	098895d913	Do short exclusions for local backups	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	bd5e18bc9f	Extract load_labelling_rules	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	e25e92b273	Introduce 'exclusions' parameter to scanner	2022-07-25 10:35:47 +01:00
Olivier 'reivilibre'	4aa1948350	Sort chunk IDs by hint to make pull more efficient ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details	2022-07-23 22:43:35 +01:00
Olivier 'reivilibre'	ee9ca73224	Put reader bloblogs in an LRU map to prevent hitting open FD limit	2022-07-23 22:38:29 +01:00
Olivier 'reivilibre'	05c6d3e662	Ignore non-UTF-8 file names instead of panicking ci/woodpecker/push/build Pipeline is pending Details ci/woodpecker/push/release Pipeline is pending Details ci/woodpecker/tag/build Pipeline failed Details ci/woodpecker/tag/release Pipeline was successful Details	2022-07-23 21:55:04 +01:00
Olivier 'reivilibre'	0b84c793bf	Flush chunk pointers in one transaction ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details ci/woodpecker/tag/build Pipeline was successful Details ci/woodpecker/tag/release Pipeline was successful Details	2022-06-15 21:13:58 +01:00
Olivier 'reivilibre'	eef22e7009	Use WAL mode in SQLite bloblogs	2022-06-15 21:13:56 +01:00
Olivier 'reivilibre'	332563f5a7	Bump version ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details ci/woodpecker/tag/build Pipeline was successful Details ci/woodpecker/tag/release Pipeline was successful Details	2022-06-14 22:55:04 +01:00
Olivier 'reivilibre'	14be0ef0a3	Add a version check to the pull protocol ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details	2022-06-14 22:54:32 +01:00
Olivier 'reivilibre'	375d68eb0e	Don't forget the terminator ci/woodpecker/push/build Pipeline is pending Details ci/woodpecker/push/release Pipeline was successful Details	2022-06-14 19:57:34 +01:00
Olivier 'reivilibre'	fc29c6fca1	Add some writer flushes that are probably necessary	2022-06-14 19:57:14 +01:00
Olivier 'reivilibre'	d384b1bcbd	Write down basic implementation of datman pull	2022-06-14 19:56:45 +01:00
Olivier 'reivilibre'	e357547777	Glue together an implementation for the pull responder	2022-06-14 19:54:21 +01:00
Olivier 'reivilibre'	c83e2be66d	Flesh out both sides	2022-06-14 08:54:17 +01:00
Olivier 'reivilibre'	a24778209e	Finish off the basic offering side implementation	2022-06-13 23:27:43 +01:00
Olivier 'reivilibre'	bb8fc355f0	Lay down the basic structure of push/pull offerer	2022-06-13 23:15:46 +01:00
Olivier 'reivilibre'	14fc925dbc	Make existing push/pull legacy	2022-06-13 23:15:34 +01:00
Olivier 'reivilibre'	9e51c2428e	Report the size used by the pile itself in the report	2022-06-13 23:15:34 +01:00
Olivier 'reivilibre'	01c98cb415	Aggregate reports by month and reorder sections	2022-06-04 12:33:14 +01:00
Olivier 'reivilibre'	3637b00f38	Add lz4 to path to ensure the backup helpers can work ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details	2022-06-03 11:42:08 +01:00
Olivier 'reivilibre'	25b1e14d84	Make it less strict? ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details	2022-06-02 22:26:24 +01:00
Olivier 'reivilibre'	ef70e0998e	Attempt to package up the helpers alongside yama and datman ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details	2022-06-02 20:30:22 +01:00
Olivier 'reivilibre'	5fd9a72de8	Convert helpers to Poetry to make them easier to package for NixOS	2022-06-02 20:29:54 +01:00
Olivier 'reivilibre'	4244fb88a7	Update version ci/woodpecker/push/build Pipeline was successful Details ci/woodpecker/push/release Pipeline was successful Details ci/woodpecker/tag/build Pipeline was successful Details ci/woodpecker/tag/release Pipeline was successful Details	2022-06-01 09:51:56 +01:00
Olivier 'reivilibre'	d62e864bee	Don't crash when backing up the root directory because it has no name	2022-06-01 09:16:05 +01:00