yama/testsuite/yamatests/test_check_and_gc.py

336 lines
12 KiB
Python

import re
import subprocess
from pathlib import Path
from random import Random
from tempfile import TemporaryDirectory
from typing import Optional
from unittest import TestCase
from helpers import generate_random_dir, randomly_mutate_directory_in_descriptor
from helpers.datman_helpers import set_up_simple_datman
from helpers.yama_helpers import set_up_simple_yama
class TestYamaCheck(TestCase):
def test_check_on_empty_pile_is_ok(self):
td = TemporaryDirectory("test_check_on_empty_pile_is_ok")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
output = subprocess.check_output(
("yama", "check", "--shallow", "--apply-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
)
self.assertIn(b" 0 chunks", output)
output = subprocess.check_output(
("yama", "check", "--deep", "--apply-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
)
self.assertIn(b" 0 chunks", output)
td.cleanup()
def test_check_on_pile_with_store_is_ok(self):
td = TemporaryDirectory("test_check_on_pile_with_store_is_ok")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
ec_shallow = subprocess.Popen(
("yama", "check", "--shallow"), cwd=yama_path
).wait()
self.assertEqual(ec_shallow, 0)
ec_deep = subprocess.Popen(("yama", "check", "--deep"), cwd=yama_path).wait()
self.assertEqual(ec_deep, 0)
output = subprocess.check_output(
("yama", "check", "--shallow", "--apply-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
)
self.assertIn(b" 0 chunks", output)
output = subprocess.check_output(
("yama", "check", "--deep", "--apply-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
)
self.assertIn(b" 0 chunks", output)
td.cleanup()
def test_check_fails_after_random_corruption(self):
td = TemporaryDirectory("test_check_fails_after_random_corruption")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
victim_bloblog = yama_path.joinpath("bloblog", "0")
size_in_bytes = victim_bloblog.stat().st_size
with victim_bloblog.open("r+b") as fvictim:
byte_to_eat = rng.randint(0, size_in_bytes - 1)
fvictim.seek(byte_to_eat)
existing_byte = fvictim.read(1)
fvictim.seek(byte_to_eat)
if existing_byte == b"\0":
fvictim.write(b"\x01")
else:
fvictim.write(b"\0")
print(
f"Corrupted byte {byte_to_eat} of {size_in_bytes}."
f" Was {existing_byte!r}."
)
ec_shallow = subprocess.Popen(
("yama", "check", "--shallow"), cwd=yama_path
).wait()
ec_deep = subprocess.Popen(("yama", "check", "--deep"), cwd=yama_path).wait()
# shallow checks won't always raise the issue
self.assertIn(ec_shallow, (0, 1))
# deep checks should always raise the issue
self.assertEqual(ec_deep, 1)
td.cleanup()
def test_check_succeeds_after_full_removal(self):
td = TemporaryDirectory("test_check_fails_after_random_corruption")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
# Find the pointer names and remove the latest one
pointer_name = (
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
.decode()
.split("\n")[0]
)
subprocess.check_call(("yama", "debug", "rmp", pointer_name), cwd=yama_path)
# The repository should still be safe.
ec_shallow = subprocess.Popen(
("yama", "check", "--shallow"), cwd=yama_path
).wait()
ec_deep = subprocess.Popen(("yama", "check", "--deep"), cwd=yama_path).wait()
self.assertEqual(ec_shallow, 0)
self.assertEqual(ec_deep, 0)
def _test_gc_safely_clears_space_after_removal(
self, depth: str, is_full: bool, which_to_remove: Optional[int] = None
):
td = TemporaryDirectory("test_check_fails_after_random_corruption")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
seed = 3674 # todo
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(
rng, src_path, 32, min_files=2
)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
# Find the pointer names and remove the latest one
orig_pointer_name = (
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
.decode()
.split("\n")[0]
)
if is_full:
subprocess.check_call(
("yama", "debug", "rmp", orig_pointer_name), cwd=yama_path
)
else:
assert which_to_remove is not None
# we want to add a new, incremental, pointer
randomly_mutate_directory_in_descriptor(
later_expected_descriptor, src_path, rng
)
subprocess.check_call(
("datman", "backup-one", "srca", "main"), cwd=datman_path
)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
should_be_orig_pointer_name, new_pointer_name = (
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
.decode()
.split("\n")[0:2]
)
self.assertEqual(should_be_orig_pointer_name, orig_pointer_name)
self.assertNotEqual(new_pointer_name, should_be_orig_pointer_name)
self.assertGreater(len(new_pointer_name.strip()), 1)
self.assertTrue(new_pointer_name.startswith("srca+"))
victim_pointer_name = [orig_pointer_name, new_pointer_name][which_to_remove]
subprocess.check_call(
("yama", "debug", "rmp", victim_pointer_name), cwd=yama_path
)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
output = subprocess.check_output(
("yama", "check", f"--{depth}", "--apply-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
)
self.assertNotIn(b" 0 chunks", output)
self.assertIn(b" chunks", output)
# The repository should still be safe.
ec_shallow = subprocess.Popen(
("yama", "check", "--shallow"), cwd=yama_path
).wait()
ec_deep = subprocess.Popen(("yama", "check", "--deep"), cwd=yama_path).wait()
self.assertEqual(ec_shallow, 0)
self.assertEqual(ec_deep, 0)
td.cleanup()
def test_deep_gc_safely_clears_space_after_full_removal(self):
self._test_gc_safely_clears_space_after_removal("deep", is_full=True)
def test_shallow_gc_safely_clears_space_after_full_removal(self):
self._test_gc_safely_clears_space_after_removal("shallow", is_full=True)
def test_deep_gc_safely_clears_space_after_incremental_base_removal(self):
self._test_gc_safely_clears_space_after_removal(
"deep", is_full=False, which_to_remove=0
)
def test_shallow_gc_safely_clears_space_after_incremental_base_removal(self):
self._test_gc_safely_clears_space_after_removal(
"shallow", is_full=False, which_to_remove=0
)
def test_deep_gc_safely_clears_space_after_incremental_tail_removal(self):
self._test_gc_safely_clears_space_after_removal(
"deep", is_full=False, which_to_remove=1
)
def test_shallow_gc_safely_clears_space_after_incremental_tail_removal(self):
self._test_gc_safely_clears_space_after_removal(
"shallow", is_full=False, which_to_remove=1
)
def test_shallow_and_deep_gc_remove_the_same(self):
td = TemporaryDirectory("test_check_fails_after_random_corruption")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
src_path = datman_path.joinpath("srca")
yama_path = datman_path.joinpath("main")
set_up_simple_datman(datman_path)
set_up_simple_yama(yama_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
later_expected_descriptor, _ = generate_random_dir(rng, src_path, 32)
subprocess.check_call(("datman", "backup-one", "srca", "main"), cwd=datman_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
# Find the pointer names and remove the latest one
pointer_name = (
subprocess.check_output(("yama", "debug", "lsp"), cwd=yama_path)
.decode()
.split("\n")[0]
)
subprocess.check_call(("yama", "debug", "rmp", pointer_name), cwd=yama_path)
subprocess.check_call(("yama", "check", "--shallow"), cwd=yama_path)
subprocess.check_call(("yama", "check", "--deep"), cwd=yama_path)
shallow_output = subprocess.check_output(
("yama", "check", "--shallow", "--dry-run-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
).decode()
self.assertNotIn(" 0 chunks", shallow_output)
self.assertIn(" chunks", shallow_output)
deep_output = subprocess.check_output(
("yama", "check", "--deep", "--dry-run-gc"),
cwd=yama_path,
stderr=subprocess.STDOUT,
).decode()
self.assertNotIn(" 0 chunks", deep_output)
self.assertIn(" chunks", deep_output)
pat = re.compile(" ([0-9]+) chunks")
shallow_chunks = int(pat.search(shallow_output).group(1))
deep_chunks = int(pat.search(deep_output).group(1))
self.assertEqual(shallow_chunks, deep_chunks)