diff --git a/testsuite/helpers/yama_helpers.py b/testsuite/helpers/yama_helpers.py index 895194f..a041e9b 100644 --- a/testsuite/helpers/yama_helpers.py +++ b/testsuite/helpers/yama_helpers.py @@ -1,6 +1,7 @@ import shutil import subprocess from pathlib import Path +from typing import Set def set_up_simple_yama(path: Path): @@ -10,3 +11,13 @@ def set_up_simple_yama(path: Path): "example_zstd.dict" ) shutil.copyfile(example_zstd_path, path.joinpath("important_zstd.dict")) + + +def list_bloblog_ids(pile: Path) -> Set[int]: + result = set() + for p in pile.joinpath("bloblog").iterdir(): + try: + result.add(int(p.name)) + except ValueError: + pass + return result diff --git a/testsuite/yamatests/test_compact.py b/testsuite/yamatests/test_compact.py new file mode 100644 index 0000000..8a102e8 --- /dev/null +++ b/testsuite/yamatests/test_compact.py @@ -0,0 +1,144 @@ +import subprocess +from pathlib import Path +from random import Random +from tempfile import TemporaryDirectory +from unittest import TestCase + +from helpers import ( + DirectoryDescriptor, + generate_random_dir, + randomly_mutate_directory_in_descriptor, + scan_dir, +) +from helpers.datman_helpers import set_up_simple_datman +from helpers.yama_helpers import list_bloblog_ids, set_up_simple_yama + + +class TestYamaCompact(TestCase): + def test_compaction_merge_two_small_bloblogs(self): + td = TemporaryDirectory("test_check_fails_after_random_corruption") + tdpath = Path(td.name) + + datman_path = tdpath.joinpath("datman") + src_path = datman_path.joinpath("srca") + yama_path = datman_path.joinpath("main") + + set_up_simple_datman(datman_path) + set_up_simple_yama(yama_path) + + rng = Random() + seed = rng.randint(0, 9001) + print(f"seed: {seed}") + rng.seed(seed) + later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32) + + # Back up twice: that way we should get at least two bloblogs! + subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path) + subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path) + old_bloblog_ids = list_bloblog_ids(yama_path) + self.assertGreater( + len(old_bloblog_ids), 1, "Should be many bloblogs at this point" + ) + + subprocess.check_call(("yama", "compact", "--mergeable", "2"), cwd=yama_path) + + new_bloblog_ids = list_bloblog_ids(yama_path) + self.assertEqual( + len(new_bloblog_ids), 1, "Should only be 1 bloblog at this point." + ) + self.assertEqual( + list(new_bloblog_ids)[0], + max(old_bloblog_ids) + 1, + "New bloblog ID should be 1 greater than the max old one.", + ) + + def test_gc_then_compact(self): + td = TemporaryDirectory("test_gc_then_compact") + tdpath = Path(td.name) + + datman_path = tdpath.joinpath("datman") + src_path = datman_path.joinpath("srca") + yama_path = datman_path.joinpath("main") + + set_up_simple_datman(datman_path) + set_up_simple_yama(yama_path) + + rng = Random() + seed = rng.randint(0, 9001) + print(f"seed: {seed}") + rng.seed(seed) + later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32) + + subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path) + orig_pointer_name = ( + subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path) + .decode() + .split("\n")[0] + ) + + randomly_mutate_directory_in_descriptor( + later_expected_descriptor, src_path, rng + ) + + subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path) + + old_bloblog_ids = list_bloblog_ids(yama_path) + + # Try a GC and check that it's a no-op + subprocess.check_call(("yama", "check", "--shallow", "--gc"), cwd=yama_path) + subprocess.check_call( + ("yama", "compact", "--mergeable", "2000", "--reclaim", "1"), cwd=yama_path + ) + + unchanged_bloblog_ids = list_bloblog_ids(yama_path) + self.assertEqual( + old_bloblog_ids, + unchanged_bloblog_ids, + "No GC: no compaction should have happened.", + ) + + subprocess.check_call( + ("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path + ) + + # Try a GC and check that it did something + subprocess.check_call(("yama", "check", "--shallow", "--gc"), cwd=yama_path) + subprocess.check_call( + ("yama", "compact", "--mergeable", "2000", "--reclaim", "1"), cwd=yama_path + ) + + new_bloblog_ids = list_bloblog_ids(yama_path) + self.assertNotEqual( + old_bloblog_ids, new_bloblog_ids, "GC: compaction should have happened." + ) + + # Check that we can still extract the files! + dest_path = tdpath.joinpath("desta") + subprocess.check_call( + ( + "datman", + "extract", + "--skip-metadata", + "--accept-partial", + "main", + "../desta", + ), + cwd=datman_path, + ) + + extracted_dir_descriptor_wrapper = scan_dir(dest_path) + + contents = extracted_dir_descriptor_wrapper.contents + self.assertEqual(len(contents), 1) + key, value = next(iter(contents.items())) + self.assertTrue(key.startswith("srca+")) + + self.assertIsInstance(value, DirectoryDescriptor) + key, value = next(iter(value.contents.items())) + self.assertEqual(key, "srca") + + self.assertEqual( + value.ignore_metadata(), later_expected_descriptor.ignore_metadata() + ) + + td.cleanup()