yama/testsuite/yamatests/test_compact.py

176 lines
5.4 KiB
Python

import subprocess
from pathlib import Path
from random import Random
from tempfile import TemporaryDirectory
from unittest import TestCase
from helpers import (
DirectoryDescriptor,
generate_random_dir,
randomly_mutate_directory_in_descriptor,
scan_dir,
)
from helpers.datman_helpers import set_up_simple_datman
from helpers.yama_helpers import list_bloblog_ids, set_up_simple_yama
class TestYamaCompact(TestCase):
def test_compaction_merge_two_small_bloblogs(self):
td = TemporaryDirectory("test_check_fails_after_random_corruption")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
# Back up twice: that way we should get at least two bloblogs!
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
old_bloblog_ids = list_bloblog_ids(yama_path)
self.assertGreater(
len(old_bloblog_ids), 1, "Should be many bloblogs at this point"
)
subprocess.check_call(
(
"yama",
"compact",
"--mergeable",
"2",
"--small",
str(2 * 1024 * 1024 * 1024),
),
cwd=yama_path,
)
new_bloblog_ids = list_bloblog_ids(yama_path)
self.assertEqual(
len(new_bloblog_ids), 1, "Should only be 1 bloblog at this point."
)
self.assertEqual(
list(new_bloblog_ids)[0],
max(old_bloblog_ids) + 1,
"New bloblog ID should be 1 greater than the max old one.",
)
def test_gc_then_compact(self):
td = TemporaryDirectory("test_gc_then_compact")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
orig_pointer_name = (
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
.decode()
.split("\n")[0]
)
randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
mutated_descriptor = scan_dir(src_path)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
old_bloblog_ids = list_bloblog_ids(yama_path)
# Try a GC and check that it's a no-op
subprocess.check_call(
("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
)
subprocess.check_call(
(
"yama",
"compact",
"--mergeable",
"2000",
"--reclaim",
"1",
"--max-dealloc",
"1",
),
cwd=yama_path,
)
unchanged_bloblog_ids = list_bloblog_ids(yama_path)
self.assertEqual(
old_bloblog_ids,
unchanged_bloblog_ids,
"No GC: no compaction should have happened.",
)
subprocess.check_call(
("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path
)
# Try a GC and check that it did something
subprocess.check_call(
("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
)
subprocess.check_call(
(
"yama",
"compact",
"--mergeable",
"2000",
"--reclaim",
"1",
"--max-dealloc",
"1",
),
cwd=yama_path,
)
new_bloblog_ids = list_bloblog_ids(yama_path)
self.assertNotEqual(
old_bloblog_ids, new_bloblog_ids, "GC: compaction should have happened."
)
# Check that we can still extract the files!
dest_path = tdpath.joinpath("desta")
subprocess.check_call(
(
"datman",
"extract",
"--skip-metadata",
"--accept-partial",
"main",
"../desta",
),
cwd=datman_path,
)
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
contents = extracted_dir_descriptor_wrapper.contents
self.assertEqual(len(contents), 1)
key, value = next(iter(contents.items()))
self.assertTrue(key.startswith("srca+"))
self.assertIsInstance(value, DirectoryDescriptor)
key, value = next(iter(value.contents.items()))
self.assertEqual(key, "srca")
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
td.cleanup()