From 8e567863ec8cb04f1afe50a9a9b8b16f3f4f3c1a Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 1 Oct 2021 00:01:15 +0100 Subject: [PATCH] Add tests for labelling Closes #50. --- datman/src/commands/backup.rs | 1 + .../datmantests/test_backup_and_extract.py | 77 ++++++++++++- testsuite/helpers/__init__.py | 8 ++ testsuite/helpers/datman_helpers.py | 108 +++++++++++++++++- 4 files changed, 187 insertions(+), 7 deletions(-) diff --git a/datman/src/commands/backup.rs b/datman/src/commands/backup.rs index ef2221d..e6ee358 100644 --- a/datman/src/commands/backup.rs +++ b/datman/src/commands/backup.rs @@ -84,6 +84,7 @@ pub fn label_filter_and_convert( info!("Filtering."); let mut unlabelled_included = false; + // filter_inclusive includes directories that must exist for the sake of their children. if !tree.filter_inclusive(&mut |node| { match node.get_metadata().unwrap() { None => { diff --git a/testsuite/datmantests/test_backup_and_extract.py b/testsuite/datmantests/test_backup_and_extract.py index ef05bd9..398eb83 100644 --- a/testsuite/datmantests/test_backup_and_extract.py +++ b/testsuite/datmantests/test_backup_and_extract.py @@ -11,7 +11,14 @@ from helpers import ( randomly_mutate_directory_in_descriptor, scan_dir, ) -from helpers.datman_helpers import get_hostname, set_up_simple_datman +from helpers.datman_helpers import ( + MULTI_PILES_SECTION, + filter_descriptor_by_label, + generate_labels, + get_hostname, + save_labelling_rules, + set_up_simple_datman, +) from helpers.yama_helpers import set_up_simple_yama @@ -222,3 +229,71 @@ kind = {{ stdout = "blahblah.txt" }} self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata()) td.cleanup() + + def test_labels_apply(self): + td = TemporaryDirectory("test_labels_apply") + tdpath = Path(td.name) + + datman_path = tdpath.joinpath("datman") + labelling_path = datman_path.joinpath("labelling") + src_path = datman_path.joinpath("srca") + yama_precious_path = datman_path.joinpath("precious") + yama_pocket_path = datman_path.joinpath("pocket") + yama_bulky_path = datman_path.joinpath("bulky") + + set_up_simple_datman(datman_path, piles_section=MULTI_PILES_SECTION) + set_up_simple_yama(yama_precious_path) + set_up_simple_yama(yama_pocket_path) + set_up_simple_yama(yama_bulky_path) + + rng = Random() + seed = rng.randint(0, 9001) + print(f"seed: {seed}") + rng.seed(seed) + initial_descriptor, _ = generate_random_dir(rng, src_path, 32) + + labellings = generate_labels(initial_descriptor, rng) + save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings) + + for label in ["precious", "pocket", "bulky"]: + output = subprocess.check_output( + ("datman", "backup-one", "srca", label), + cwd=datman_path, + stderr=subprocess.STDOUT, + ) + self.assertNotIn(b"Unlabelled", output, "Labelling doesn't seem complete.") + + for label in ["precious", "pocket", "bulky"]: + dest_path = tdpath.joinpath(f"dest_{label}") + subprocess.check_call( + ( + "datman", + "extract", + "--skip-metadata", + "--accept-partial", + label, + str(dest_path), + ), + cwd=datman_path, + ) + + extracted_dir_descriptor_wrapper = scan_dir(dest_path) + + contents = extracted_dir_descriptor_wrapper.contents + + self.assertEqual(len(contents), 1) + key, value = next(iter(contents.items())) + self.assertTrue(key.startswith(f"srca+")) + + self.assertIsInstance(value, DirectoryDescriptor) + key, value = next(iter(value.contents.items())) + self.assertEqual(key, "srca") + + filtered_initial_descriptor = filter_descriptor_by_label( + {label}, initial_descriptor, labellings + ) + self.assertEqual( + value.ignore_metadata(), filtered_initial_descriptor.ignore_metadata() + ) + + td.cleanup() diff --git a/testsuite/helpers/__init__.py b/testsuite/helpers/__init__.py index d9789d5..c26332f 100644 --- a/testsuite/helpers/__init__.py +++ b/testsuite/helpers/__init__.py @@ -49,6 +49,14 @@ class DirectoryDescriptor: group=0, ) + def print(self, prefix: str = ""): + for key, value in self.contents.items(): + if isinstance(value, DirectoryDescriptor): + print(prefix + key + "/") + value.print(prefix + " ") + else: + print(prefix + key) + def generate_random_file(rng: Random, path: Path) -> FileDescriptor: """ diff --git a/testsuite/helpers/datman_helpers.py b/testsuite/helpers/datman_helpers.py index 9f832e9..faef434 100644 --- a/testsuite/helpers/datman_helpers.py +++ b/testsuite/helpers/datman_helpers.py @@ -1,13 +1,43 @@ +import random import subprocess from pathlib import Path -from typing import Optional +from random import Random +from typing import Dict, Optional, Set + +import attr +from helpers import DirectoryDescriptor, FileDescriptor +from immutabledict import immutabledict + +DEFAULT_PILES_SECTION = """ +[piles.main] +path = "main" +included_labels = ["precious"] +""" + +MULTI_PILES_SECTION = """ +[piles.pocket] +path = "pocket" +included_labels = ["pocket"] + +[piles.precious] +path = "precious" +included_labels = ["precious"] + +[piles.bulky] +path = "bulky" +included_labels = ["bulky"] +""" def get_hostname(): return subprocess.check_output("hostname").strip().decode() -def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None): +def set_up_simple_datman( + path: Path, + custom_extra_test: Optional[str] = None, + piles_section: str = DEFAULT_PILES_SECTION, +): path.mkdir(exist_ok=True) subprocess.check_call(("datman", "init"), cwd=path) @@ -17,11 +47,77 @@ def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None): [source.srca] directory = "{path.joinpath("srca")}" hostname = "{get_hostname()}" - -[piles.main] -path = "main" -included_labels = ["precious"] """ + + piles_section ) if custom_extra_test: file.write(custom_extra_test) + + +def save_labelling_rules(path: Path, rules: Dict[str, str]): + with path.open("wb") as fout: + proc = subprocess.Popen( + ["zstd", "-", "--stdout"], stdin=subprocess.PIPE, stdout=fout + ) + for rule_k, rule_v in rules.items(): + proc.stdin.write(f"{rule_k}\t{rule_v}\n".encode()) + proc.stdin.write(b"---\n") + proc.stdin.close() + + if proc.wait() != 0: + raise ChildProcessError(f"zstd failed with {proc.returncode}.") + + +def generate_labels( + dir_descriptor: DirectoryDescriptor, + rng: Random, + dict_in_place: Optional[Dict[str, str]] = None, + prefix: str = "", +) -> Dict[str, str]: + if not dict_in_place: + dict_in_place = dict() + + # split on this. + dict_in_place[prefix] = "?" + + for name, descriptor in dir_descriptor.contents.items(): + if isinstance(descriptor, DirectoryDescriptor): + generate_labels(descriptor, rng, dict_in_place, prefix + "/" + name) + elif isinstance(descriptor, FileDescriptor): + dict_in_place[prefix + "/" + name] = rng.choice( + ["bulky", "precious", "pocket", "!"] + ) + + return dict_in_place + + +def filter_descriptor_by_label( + labels: Set[str], + orig: DirectoryDescriptor, + label_map: Dict[str, str], + prefix: str = "", +) -> DirectoryDescriptor: + new_contents = {} + + for key, value in orig.contents.items(): + full_name = prefix + "/" + key + specified_filter = label_map[full_name] + # print(full_name, specified_filter, labels) + if not (specified_filter == "?" or specified_filter in labels): + continue + + if isinstance(value, DirectoryDescriptor): + new_dd = filter_descriptor_by_label(labels, value, label_map, full_name) + if not new_dd.contents and specified_filter == "?": + # don't include splits that are empty. + continue + new_contents[key] = new_dd + elif isinstance(value, FileDescriptor): + assert ( + specified_filter != "?" + ), "why is there a split filter on a file descriptor?" + new_contents[key] = value + else: + raise ValueError("what kind of descriptor is value?") + + return attr.evolve(orig, contents=immutabledict(new_contents))