yama/testsuite/yamatests/test_compact.py

import subprocess
from pathlib import Path
from random import Random
from tempfile import TemporaryDirectory
from unittest import TestCase

from helpers import (
    DirectoryDescriptor,
    generate_random_dir,
    randomly_mutate_directory_in_descriptor,
    scan_dir,
)
from helpers.datman_helpers import set_up_simple_datman
from helpers.yama_helpers import list_bloblog_ids, set_up_simple_yama


class TestYamaCompact(TestCase):
    def test_compaction_merge_two_small_bloblogs(self):
        td = TemporaryDirectory("test_check_fails_after_random_corruption")
        tdpath = Path(td.name)

        datman_path = tdpath.joinpath("datman")
        src_path = datman_path.joinpath("srca")
        yama_path = datman_path.joinpath("main")

        set_up_simple_datman(datman_path)
        set_up_simple_yama(yama_path)

        rng = Random()
        seed = rng.randint(0, 9001)
        print(f"seed: {seed}")
        rng.seed(seed)
        later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)

        # Back up twice: that way we should get at least two bloblogs!
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        old_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertGreater(
            len(old_bloblog_ids), 1, "Should be many bloblogs at this point"
        )

        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2",
                "--small",
                str(2 * 1024 * 1024 * 1024),
            ),
            cwd=yama_path,
        )

        new_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertEqual(
            len(new_bloblog_ids), 1, "Should only be 1 bloblog at this point."
        )
        self.assertEqual(
            list(new_bloblog_ids)[0],
            max(old_bloblog_ids) + 1,
            "New bloblog ID should be 1 greater than the max old one.",
        )

    def test_gc_then_compact(self):
        td = TemporaryDirectory("test_gc_then_compact")
        tdpath = Path(td.name)

        datman_path = tdpath.joinpath("datman")
        src_path = datman_path.joinpath("srca")
        yama_path = datman_path.joinpath("main")

        set_up_simple_datman(datman_path)
        set_up_simple_yama(yama_path)

        rng = Random()
        seed = rng.randint(0, 9001)
        print(f"seed: {seed}")
        rng.seed(seed)
        initial_descriptor, _ = generate_random_dir(rng, src_path, 32)

        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
        orig_pointer_name = (
            subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
            .decode()
            .split("\n")[0]
        )

        randomly_mutate_directory_in_descriptor(initial_descriptor, src_path, rng)
        mutated_descriptor = scan_dir(src_path)

        subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)

        old_bloblog_ids = list_bloblog_ids(yama_path)

        # Try a GC and check that it's a no-op
        subprocess.check_call(
            ("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
        )
        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2000",
                "--reclaim",
                "1",
                "--max-dealloc",
                "1",
            ),
            cwd=yama_path,
        )

        unchanged_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertEqual(
            old_bloblog_ids,
            unchanged_bloblog_ids,
            "No GC: no compaction should have happened.",
        )

        subprocess.check_call(
            ("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path
        )

        # Try a GC and check that it did something
        subprocess.check_call(
            ("yama", "check", "--shallow", "--apply-gc"), cwd=yama_path
        )
        subprocess.check_call(
            (
                "yama",
                "compact",
                "--mergeable",
                "2000",
                "--reclaim",
                "1",
                "--max-dealloc",
                "1",
            ),
            cwd=yama_path,
        )

        new_bloblog_ids = list_bloblog_ids(yama_path)
        self.assertNotEqual(
            old_bloblog_ids, new_bloblog_ids, "GC: compaction should have happened."
        )

        # Check that we can still extract the files!
        dest_path = tdpath.joinpath("desta")
        subprocess.check_call(
            (
                "datman",
                "extract",
                "--skip-metadata",
                "--accept-partial",
                "main",
                "../desta",
            ),
            cwd=datman_path,
        )

        extracted_dir_descriptor_wrapper = scan_dir(dest_path)

        contents = extracted_dir_descriptor_wrapper.contents
        self.assertEqual(len(contents), 1)
        key, value = next(iter(contents.items()))
        self.assertTrue(key.startswith("srca+"))

        self.assertIsInstance(value, DirectoryDescriptor)
        key, value = next(iter(value.contents.items()))
        self.assertEqual(key, "srca")

        self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())

        td.cleanup()