Add tests for labelling
Some checks failed
continuous-integration/drone the build failed

Closes #50.
This commit is contained in:
Olivier 'reivilibre' 2021-10-01 00:01:15 +01:00
parent eacb3114b5
commit 8e567863ec
4 changed files with 187 additions and 7 deletions

View File

@ -84,6 +84,7 @@ pub fn label_filter_and_convert(
info!("Filtering.");
let mut unlabelled_included = false;
// filter_inclusive includes directories that must exist for the sake of their children.
if !tree.filter_inclusive(&mut |node| {
match node.get_metadata().unwrap() {
None => {

View File

@ -11,7 +11,14 @@ from helpers import (
randomly_mutate_directory_in_descriptor,
scan_dir,
)
from helpers.datman_helpers import get_hostname, set_up_simple_datman
from helpers.datman_helpers import (
MULTI_PILES_SECTION,
filter_descriptor_by_label,
generate_labels,
get_hostname,
save_labelling_rules,
set_up_simple_datman,
)
from helpers.yama_helpers import set_up_simple_yama
@ -222,3 +229,71 @@ kind = {{ stdout = "blahblah.txt" }}
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
td.cleanup()
def test_labels_apply(self):
td = TemporaryDirectory("test_labels_apply")
tdpath = Path(td.name)
datman_path = tdpath.joinpath("datman")
labelling_path = datman_path.joinpath("labelling")
src_path = datman_path.joinpath("srca")
yama_precious_path = datman_path.joinpath("precious")
yama_pocket_path = datman_path.joinpath("pocket")
yama_bulky_path = datman_path.joinpath("bulky")
set_up_simple_datman(datman_path, piles_section=MULTI_PILES_SECTION)
set_up_simple_yama(yama_precious_path)
set_up_simple_yama(yama_pocket_path)
set_up_simple_yama(yama_bulky_path)
rng = Random()
seed = rng.randint(0, 9001)
print(f"seed: {seed}")
rng.seed(seed)
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
labellings = generate_labels(initial_descriptor, rng)
save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
for label in ["precious", "pocket", "bulky"]:
output = subprocess.check_output(
("datman", "backup-one", "srca", label),
cwd=datman_path,
stderr=subprocess.STDOUT,
)
self.assertNotIn(b"Unlabelled", output, "Labelling doesn't seem complete.")
for label in ["precious", "pocket", "bulky"]:
dest_path = tdpath.joinpath(f"dest_{label}")
subprocess.check_call(
(
"datman",
"extract",
"--skip-metadata",
"--accept-partial",
label,
str(dest_path),
),
cwd=datman_path,
)
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
contents = extracted_dir_descriptor_wrapper.contents
self.assertEqual(len(contents), 1)
key, value = next(iter(contents.items()))
self.assertTrue(key.startswith(f"srca+"))
self.assertIsInstance(value, DirectoryDescriptor)
key, value = next(iter(value.contents.items()))
self.assertEqual(key, "srca")
filtered_initial_descriptor = filter_descriptor_by_label(
{label}, initial_descriptor, labellings
)
self.assertEqual(
value.ignore_metadata(), filtered_initial_descriptor.ignore_metadata()
)
td.cleanup()

View File

@ -49,6 +49,14 @@ class DirectoryDescriptor:
group=0,
)
def print(self, prefix: str = ""):
for key, value in self.contents.items():
if isinstance(value, DirectoryDescriptor):
print(prefix + key + "/")
value.print(prefix + " ")
else:
print(prefix + key)
def generate_random_file(rng: Random, path: Path) -> FileDescriptor:
"""

View File

@ -1,13 +1,43 @@
import random
import subprocess
from pathlib import Path
from typing import Optional
from random import Random
from typing import Dict, Optional, Set
import attr
from helpers import DirectoryDescriptor, FileDescriptor
from immutabledict import immutabledict
DEFAULT_PILES_SECTION = """
[piles.main]
path = "main"
included_labels = ["precious"]
"""
MULTI_PILES_SECTION = """
[piles.pocket]
path = "pocket"
included_labels = ["pocket"]
[piles.precious]
path = "precious"
included_labels = ["precious"]
[piles.bulky]
path = "bulky"
included_labels = ["bulky"]
"""
def get_hostname():
return subprocess.check_output("hostname").strip().decode()
def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
def set_up_simple_datman(
path: Path,
custom_extra_test: Optional[str] = None,
piles_section: str = DEFAULT_PILES_SECTION,
):
path.mkdir(exist_ok=True)
subprocess.check_call(("datman", "init"), cwd=path)
@ -17,11 +47,77 @@ def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
[source.srca]
directory = "{path.joinpath("srca")}"
hostname = "{get_hostname()}"
[piles.main]
path = "main"
included_labels = ["precious"]
"""
+ piles_section
)
if custom_extra_test:
file.write(custom_extra_test)
def save_labelling_rules(path: Path, rules: Dict[str, str]):
with path.open("wb") as fout:
proc = subprocess.Popen(
["zstd", "-", "--stdout"], stdin=subprocess.PIPE, stdout=fout
)
for rule_k, rule_v in rules.items():
proc.stdin.write(f"{rule_k}\t{rule_v}\n".encode())
proc.stdin.write(b"---\n")
proc.stdin.close()
if proc.wait() != 0:
raise ChildProcessError(f"zstd failed with {proc.returncode}.")
def generate_labels(
dir_descriptor: DirectoryDescriptor,
rng: Random,
dict_in_place: Optional[Dict[str, str]] = None,
prefix: str = "",
) -> Dict[str, str]:
if not dict_in_place:
dict_in_place = dict()
# split on this.
dict_in_place[prefix] = "?"
for name, descriptor in dir_descriptor.contents.items():
if isinstance(descriptor, DirectoryDescriptor):
generate_labels(descriptor, rng, dict_in_place, prefix + "/" + name)
elif isinstance(descriptor, FileDescriptor):
dict_in_place[prefix + "/" + name] = rng.choice(
["bulky", "precious", "pocket", "!"]
)
return dict_in_place
def filter_descriptor_by_label(
labels: Set[str],
orig: DirectoryDescriptor,
label_map: Dict[str, str],
prefix: str = "",
) -> DirectoryDescriptor:
new_contents = {}
for key, value in orig.contents.items():
full_name = prefix + "/" + key
specified_filter = label_map[full_name]
# print(full_name, specified_filter, labels)
if not (specified_filter == "?" or specified_filter in labels):
continue
if isinstance(value, DirectoryDescriptor):
new_dd = filter_descriptor_by_label(labels, value, label_map, full_name)
if not new_dd.contents and specified_filter == "?":
# don't include splits that are empty.
continue
new_contents[key] = new_dd
elif isinstance(value, FileDescriptor):
assert (
specified_filter != "?"
), "why is there a split filter on a file descriptor?"
new_contents[key] = value
else:
raise ValueError("what kind of descriptor is value?")
return attr.evolve(orig, contents=immutabledict(new_contents))