Closes #50.
This commit is contained in:
parent
eacb3114b5
commit
8e567863ec
@ -84,6 +84,7 @@ pub fn label_filter_and_convert(
|
||||
|
||||
info!("Filtering.");
|
||||
let mut unlabelled_included = false;
|
||||
// filter_inclusive includes directories that must exist for the sake of their children.
|
||||
if !tree.filter_inclusive(&mut |node| {
|
||||
match node.get_metadata().unwrap() {
|
||||
None => {
|
||||
|
@ -11,7 +11,14 @@ from helpers import (
|
||||
randomly_mutate_directory_in_descriptor,
|
||||
scan_dir,
|
||||
)
|
||||
from helpers.datman_helpers import get_hostname, set_up_simple_datman
|
||||
from helpers.datman_helpers import (
|
||||
MULTI_PILES_SECTION,
|
||||
filter_descriptor_by_label,
|
||||
generate_labels,
|
||||
get_hostname,
|
||||
save_labelling_rules,
|
||||
set_up_simple_datman,
|
||||
)
|
||||
from helpers.yama_helpers import set_up_simple_yama
|
||||
|
||||
|
||||
@ -222,3 +229,71 @@ kind = {{ stdout = "blahblah.txt" }}
|
||||
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
|
||||
|
||||
td.cleanup()
|
||||
|
||||
def test_labels_apply(self):
|
||||
td = TemporaryDirectory("test_labels_apply")
|
||||
tdpath = Path(td.name)
|
||||
|
||||
datman_path = tdpath.joinpath("datman")
|
||||
labelling_path = datman_path.joinpath("labelling")
|
||||
src_path = datman_path.joinpath("srca")
|
||||
yama_precious_path = datman_path.joinpath("precious")
|
||||
yama_pocket_path = datman_path.joinpath("pocket")
|
||||
yama_bulky_path = datman_path.joinpath("bulky")
|
||||
|
||||
set_up_simple_datman(datman_path, piles_section=MULTI_PILES_SECTION)
|
||||
set_up_simple_yama(yama_precious_path)
|
||||
set_up_simple_yama(yama_pocket_path)
|
||||
set_up_simple_yama(yama_bulky_path)
|
||||
|
||||
rng = Random()
|
||||
seed = rng.randint(0, 9001)
|
||||
print(f"seed: {seed}")
|
||||
rng.seed(seed)
|
||||
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
|
||||
|
||||
labellings = generate_labels(initial_descriptor, rng)
|
||||
save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
|
||||
|
||||
for label in ["precious", "pocket", "bulky"]:
|
||||
output = subprocess.check_output(
|
||||
("datman", "backup-one", "srca", label),
|
||||
cwd=datman_path,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
self.assertNotIn(b"Unlabelled", output, "Labelling doesn't seem complete.")
|
||||
|
||||
for label in ["precious", "pocket", "bulky"]:
|
||||
dest_path = tdpath.joinpath(f"dest_{label}")
|
||||
subprocess.check_call(
|
||||
(
|
||||
"datman",
|
||||
"extract",
|
||||
"--skip-metadata",
|
||||
"--accept-partial",
|
||||
label,
|
||||
str(dest_path),
|
||||
),
|
||||
cwd=datman_path,
|
||||
)
|
||||
|
||||
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
|
||||
|
||||
contents = extracted_dir_descriptor_wrapper.contents
|
||||
|
||||
self.assertEqual(len(contents), 1)
|
||||
key, value = next(iter(contents.items()))
|
||||
self.assertTrue(key.startswith(f"srca+"))
|
||||
|
||||
self.assertIsInstance(value, DirectoryDescriptor)
|
||||
key, value = next(iter(value.contents.items()))
|
||||
self.assertEqual(key, "srca")
|
||||
|
||||
filtered_initial_descriptor = filter_descriptor_by_label(
|
||||
{label}, initial_descriptor, labellings
|
||||
)
|
||||
self.assertEqual(
|
||||
value.ignore_metadata(), filtered_initial_descriptor.ignore_metadata()
|
||||
)
|
||||
|
||||
td.cleanup()
|
||||
|
@ -49,6 +49,14 @@ class DirectoryDescriptor:
|
||||
group=0,
|
||||
)
|
||||
|
||||
def print(self, prefix: str = ""):
|
||||
for key, value in self.contents.items():
|
||||
if isinstance(value, DirectoryDescriptor):
|
||||
print(prefix + key + "/")
|
||||
value.print(prefix + " ")
|
||||
else:
|
||||
print(prefix + key)
|
||||
|
||||
|
||||
def generate_random_file(rng: Random, path: Path) -> FileDescriptor:
|
||||
"""
|
||||
|
@ -1,13 +1,43 @@
|
||||
import random
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from random import Random
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
import attr
|
||||
from helpers import DirectoryDescriptor, FileDescriptor
|
||||
from immutabledict import immutabledict
|
||||
|
||||
DEFAULT_PILES_SECTION = """
|
||||
[piles.main]
|
||||
path = "main"
|
||||
included_labels = ["precious"]
|
||||
"""
|
||||
|
||||
MULTI_PILES_SECTION = """
|
||||
[piles.pocket]
|
||||
path = "pocket"
|
||||
included_labels = ["pocket"]
|
||||
|
||||
[piles.precious]
|
||||
path = "precious"
|
||||
included_labels = ["precious"]
|
||||
|
||||
[piles.bulky]
|
||||
path = "bulky"
|
||||
included_labels = ["bulky"]
|
||||
"""
|
||||
|
||||
|
||||
def get_hostname():
|
||||
return subprocess.check_output("hostname").strip().decode()
|
||||
|
||||
|
||||
def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
|
||||
def set_up_simple_datman(
|
||||
path: Path,
|
||||
custom_extra_test: Optional[str] = None,
|
||||
piles_section: str = DEFAULT_PILES_SECTION,
|
||||
):
|
||||
path.mkdir(exist_ok=True)
|
||||
subprocess.check_call(("datman", "init"), cwd=path)
|
||||
|
||||
@ -17,11 +47,77 @@ def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
|
||||
[source.srca]
|
||||
directory = "{path.joinpath("srca")}"
|
||||
hostname = "{get_hostname()}"
|
||||
|
||||
[piles.main]
|
||||
path = "main"
|
||||
included_labels = ["precious"]
|
||||
"""
|
||||
+ piles_section
|
||||
)
|
||||
if custom_extra_test:
|
||||
file.write(custom_extra_test)
|
||||
|
||||
|
||||
def save_labelling_rules(path: Path, rules: Dict[str, str]):
|
||||
with path.open("wb") as fout:
|
||||
proc = subprocess.Popen(
|
||||
["zstd", "-", "--stdout"], stdin=subprocess.PIPE, stdout=fout
|
||||
)
|
||||
for rule_k, rule_v in rules.items():
|
||||
proc.stdin.write(f"{rule_k}\t{rule_v}\n".encode())
|
||||
proc.stdin.write(b"---\n")
|
||||
proc.stdin.close()
|
||||
|
||||
if proc.wait() != 0:
|
||||
raise ChildProcessError(f"zstd failed with {proc.returncode}.")
|
||||
|
||||
|
||||
def generate_labels(
|
||||
dir_descriptor: DirectoryDescriptor,
|
||||
rng: Random,
|
||||
dict_in_place: Optional[Dict[str, str]] = None,
|
||||
prefix: str = "",
|
||||
) -> Dict[str, str]:
|
||||
if not dict_in_place:
|
||||
dict_in_place = dict()
|
||||
|
||||
# split on this.
|
||||
dict_in_place[prefix] = "?"
|
||||
|
||||
for name, descriptor in dir_descriptor.contents.items():
|
||||
if isinstance(descriptor, DirectoryDescriptor):
|
||||
generate_labels(descriptor, rng, dict_in_place, prefix + "/" + name)
|
||||
elif isinstance(descriptor, FileDescriptor):
|
||||
dict_in_place[prefix + "/" + name] = rng.choice(
|
||||
["bulky", "precious", "pocket", "!"]
|
||||
)
|
||||
|
||||
return dict_in_place
|
||||
|
||||
|
||||
def filter_descriptor_by_label(
|
||||
labels: Set[str],
|
||||
orig: DirectoryDescriptor,
|
||||
label_map: Dict[str, str],
|
||||
prefix: str = "",
|
||||
) -> DirectoryDescriptor:
|
||||
new_contents = {}
|
||||
|
||||
for key, value in orig.contents.items():
|
||||
full_name = prefix + "/" + key
|
||||
specified_filter = label_map[full_name]
|
||||
# print(full_name, specified_filter, labels)
|
||||
if not (specified_filter == "?" or specified_filter in labels):
|
||||
continue
|
||||
|
||||
if isinstance(value, DirectoryDescriptor):
|
||||
new_dd = filter_descriptor_by_label(labels, value, label_map, full_name)
|
||||
if not new_dd.contents and specified_filter == "?":
|
||||
# don't include splits that are empty.
|
||||
continue
|
||||
new_contents[key] = new_dd
|
||||
elif isinstance(value, FileDescriptor):
|
||||
assert (
|
||||
specified_filter != "?"
|
||||
), "why is there a split filter on a file descriptor?"
|
||||
new_contents[key] = value
|
||||
else:
|
||||
raise ValueError("what kind of descriptor is value?")
|
||||
|
||||
return attr.evolve(orig, contents=immutabledict(new_contents))
|
||||
|
Loading…
Reference in New Issue
Block a user