Closes #50.
This commit is contained in:
parent
eacb3114b5
commit
8e567863ec
@ -84,6 +84,7 @@ pub fn label_filter_and_convert(
|
|||||||
|
|
||||||
info!("Filtering.");
|
info!("Filtering.");
|
||||||
let mut unlabelled_included = false;
|
let mut unlabelled_included = false;
|
||||||
|
// filter_inclusive includes directories that must exist for the sake of their children.
|
||||||
if !tree.filter_inclusive(&mut |node| {
|
if !tree.filter_inclusive(&mut |node| {
|
||||||
match node.get_metadata().unwrap() {
|
match node.get_metadata().unwrap() {
|
||||||
None => {
|
None => {
|
||||||
|
@ -11,7 +11,14 @@ from helpers import (
|
|||||||
randomly_mutate_directory_in_descriptor,
|
randomly_mutate_directory_in_descriptor,
|
||||||
scan_dir,
|
scan_dir,
|
||||||
)
|
)
|
||||||
from helpers.datman_helpers import get_hostname, set_up_simple_datman
|
from helpers.datman_helpers import (
|
||||||
|
MULTI_PILES_SECTION,
|
||||||
|
filter_descriptor_by_label,
|
||||||
|
generate_labels,
|
||||||
|
get_hostname,
|
||||||
|
save_labelling_rules,
|
||||||
|
set_up_simple_datman,
|
||||||
|
)
|
||||||
from helpers.yama_helpers import set_up_simple_yama
|
from helpers.yama_helpers import set_up_simple_yama
|
||||||
|
|
||||||
|
|
||||||
@ -222,3 +229,71 @@ kind = {{ stdout = "blahblah.txt" }}
|
|||||||
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
|
self.assertEqual(value.ignore_metadata(), mutated_descriptor.ignore_metadata())
|
||||||
|
|
||||||
td.cleanup()
|
td.cleanup()
|
||||||
|
|
||||||
|
def test_labels_apply(self):
|
||||||
|
td = TemporaryDirectory("test_labels_apply")
|
||||||
|
tdpath = Path(td.name)
|
||||||
|
|
||||||
|
datman_path = tdpath.joinpath("datman")
|
||||||
|
labelling_path = datman_path.joinpath("labelling")
|
||||||
|
src_path = datman_path.joinpath("srca")
|
||||||
|
yama_precious_path = datman_path.joinpath("precious")
|
||||||
|
yama_pocket_path = datman_path.joinpath("pocket")
|
||||||
|
yama_bulky_path = datman_path.joinpath("bulky")
|
||||||
|
|
||||||
|
set_up_simple_datman(datman_path, piles_section=MULTI_PILES_SECTION)
|
||||||
|
set_up_simple_yama(yama_precious_path)
|
||||||
|
set_up_simple_yama(yama_pocket_path)
|
||||||
|
set_up_simple_yama(yama_bulky_path)
|
||||||
|
|
||||||
|
rng = Random()
|
||||||
|
seed = rng.randint(0, 9001)
|
||||||
|
print(f"seed: {seed}")
|
||||||
|
rng.seed(seed)
|
||||||
|
initial_descriptor, _ = generate_random_dir(rng, src_path, 32)
|
||||||
|
|
||||||
|
labellings = generate_labels(initial_descriptor, rng)
|
||||||
|
save_labelling_rules(labelling_path.joinpath("srca.zst"), labellings)
|
||||||
|
|
||||||
|
for label in ["precious", "pocket", "bulky"]:
|
||||||
|
output = subprocess.check_output(
|
||||||
|
("datman", "backup-one", "srca", label),
|
||||||
|
cwd=datman_path,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
)
|
||||||
|
self.assertNotIn(b"Unlabelled", output, "Labelling doesn't seem complete.")
|
||||||
|
|
||||||
|
for label in ["precious", "pocket", "bulky"]:
|
||||||
|
dest_path = tdpath.joinpath(f"dest_{label}")
|
||||||
|
subprocess.check_call(
|
||||||
|
(
|
||||||
|
"datman",
|
||||||
|
"extract",
|
||||||
|
"--skip-metadata",
|
||||||
|
"--accept-partial",
|
||||||
|
label,
|
||||||
|
str(dest_path),
|
||||||
|
),
|
||||||
|
cwd=datman_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted_dir_descriptor_wrapper = scan_dir(dest_path)
|
||||||
|
|
||||||
|
contents = extracted_dir_descriptor_wrapper.contents
|
||||||
|
|
||||||
|
self.assertEqual(len(contents), 1)
|
||||||
|
key, value = next(iter(contents.items()))
|
||||||
|
self.assertTrue(key.startswith(f"srca+"))
|
||||||
|
|
||||||
|
self.assertIsInstance(value, DirectoryDescriptor)
|
||||||
|
key, value = next(iter(value.contents.items()))
|
||||||
|
self.assertEqual(key, "srca")
|
||||||
|
|
||||||
|
filtered_initial_descriptor = filter_descriptor_by_label(
|
||||||
|
{label}, initial_descriptor, labellings
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
value.ignore_metadata(), filtered_initial_descriptor.ignore_metadata()
|
||||||
|
)
|
||||||
|
|
||||||
|
td.cleanup()
|
||||||
|
@ -49,6 +49,14 @@ class DirectoryDescriptor:
|
|||||||
group=0,
|
group=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def print(self, prefix: str = ""):
|
||||||
|
for key, value in self.contents.items():
|
||||||
|
if isinstance(value, DirectoryDescriptor):
|
||||||
|
print(prefix + key + "/")
|
||||||
|
value.print(prefix + " ")
|
||||||
|
else:
|
||||||
|
print(prefix + key)
|
||||||
|
|
||||||
|
|
||||||
def generate_random_file(rng: Random, path: Path) -> FileDescriptor:
|
def generate_random_file(rng: Random, path: Path) -> FileDescriptor:
|
||||||
"""
|
"""
|
||||||
|
@ -1,13 +1,43 @@
|
|||||||
|
import random
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from random import Random
|
||||||
|
from typing import Dict, Optional, Set
|
||||||
|
|
||||||
|
import attr
|
||||||
|
from helpers import DirectoryDescriptor, FileDescriptor
|
||||||
|
from immutabledict import immutabledict
|
||||||
|
|
||||||
|
DEFAULT_PILES_SECTION = """
|
||||||
|
[piles.main]
|
||||||
|
path = "main"
|
||||||
|
included_labels = ["precious"]
|
||||||
|
"""
|
||||||
|
|
||||||
|
MULTI_PILES_SECTION = """
|
||||||
|
[piles.pocket]
|
||||||
|
path = "pocket"
|
||||||
|
included_labels = ["pocket"]
|
||||||
|
|
||||||
|
[piles.precious]
|
||||||
|
path = "precious"
|
||||||
|
included_labels = ["precious"]
|
||||||
|
|
||||||
|
[piles.bulky]
|
||||||
|
path = "bulky"
|
||||||
|
included_labels = ["bulky"]
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_hostname():
|
def get_hostname():
|
||||||
return subprocess.check_output("hostname").strip().decode()
|
return subprocess.check_output("hostname").strip().decode()
|
||||||
|
|
||||||
|
|
||||||
def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
|
def set_up_simple_datman(
|
||||||
|
path: Path,
|
||||||
|
custom_extra_test: Optional[str] = None,
|
||||||
|
piles_section: str = DEFAULT_PILES_SECTION,
|
||||||
|
):
|
||||||
path.mkdir(exist_ok=True)
|
path.mkdir(exist_ok=True)
|
||||||
subprocess.check_call(("datman", "init"), cwd=path)
|
subprocess.check_call(("datman", "init"), cwd=path)
|
||||||
|
|
||||||
@ -17,11 +47,77 @@ def set_up_simple_datman(path: Path, custom_extra_test: Optional[str] = None):
|
|||||||
[source.srca]
|
[source.srca]
|
||||||
directory = "{path.joinpath("srca")}"
|
directory = "{path.joinpath("srca")}"
|
||||||
hostname = "{get_hostname()}"
|
hostname = "{get_hostname()}"
|
||||||
|
|
||||||
[piles.main]
|
|
||||||
path = "main"
|
|
||||||
included_labels = ["precious"]
|
|
||||||
"""
|
"""
|
||||||
|
+ piles_section
|
||||||
)
|
)
|
||||||
if custom_extra_test:
|
if custom_extra_test:
|
||||||
file.write(custom_extra_test)
|
file.write(custom_extra_test)
|
||||||
|
|
||||||
|
|
||||||
|
def save_labelling_rules(path: Path, rules: Dict[str, str]):
|
||||||
|
with path.open("wb") as fout:
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
["zstd", "-", "--stdout"], stdin=subprocess.PIPE, stdout=fout
|
||||||
|
)
|
||||||
|
for rule_k, rule_v in rules.items():
|
||||||
|
proc.stdin.write(f"{rule_k}\t{rule_v}\n".encode())
|
||||||
|
proc.stdin.write(b"---\n")
|
||||||
|
proc.stdin.close()
|
||||||
|
|
||||||
|
if proc.wait() != 0:
|
||||||
|
raise ChildProcessError(f"zstd failed with {proc.returncode}.")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_labels(
|
||||||
|
dir_descriptor: DirectoryDescriptor,
|
||||||
|
rng: Random,
|
||||||
|
dict_in_place: Optional[Dict[str, str]] = None,
|
||||||
|
prefix: str = "",
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
if not dict_in_place:
|
||||||
|
dict_in_place = dict()
|
||||||
|
|
||||||
|
# split on this.
|
||||||
|
dict_in_place[prefix] = "?"
|
||||||
|
|
||||||
|
for name, descriptor in dir_descriptor.contents.items():
|
||||||
|
if isinstance(descriptor, DirectoryDescriptor):
|
||||||
|
generate_labels(descriptor, rng, dict_in_place, prefix + "/" + name)
|
||||||
|
elif isinstance(descriptor, FileDescriptor):
|
||||||
|
dict_in_place[prefix + "/" + name] = rng.choice(
|
||||||
|
["bulky", "precious", "pocket", "!"]
|
||||||
|
)
|
||||||
|
|
||||||
|
return dict_in_place
|
||||||
|
|
||||||
|
|
||||||
|
def filter_descriptor_by_label(
|
||||||
|
labels: Set[str],
|
||||||
|
orig: DirectoryDescriptor,
|
||||||
|
label_map: Dict[str, str],
|
||||||
|
prefix: str = "",
|
||||||
|
) -> DirectoryDescriptor:
|
||||||
|
new_contents = {}
|
||||||
|
|
||||||
|
for key, value in orig.contents.items():
|
||||||
|
full_name = prefix + "/" + key
|
||||||
|
specified_filter = label_map[full_name]
|
||||||
|
# print(full_name, specified_filter, labels)
|
||||||
|
if not (specified_filter == "?" or specified_filter in labels):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(value, DirectoryDescriptor):
|
||||||
|
new_dd = filter_descriptor_by_label(labels, value, label_map, full_name)
|
||||||
|
if not new_dd.contents and specified_filter == "?":
|
||||||
|
# don't include splits that are empty.
|
||||||
|
continue
|
||||||
|
new_contents[key] = new_dd
|
||||||
|
elif isinstance(value, FileDescriptor):
|
||||||
|
assert (
|
||||||
|
specified_filter != "?"
|
||||||
|
), "why is there a split filter on a file descriptor?"
|
||||||
|
new_contents[key] = value
|
||||||
|
else:
|
||||||
|
raise ValueError("what kind of descriptor is value?")
|
||||||
|
|
||||||
|
return attr.evolve(orig, contents=immutabledict(new_contents))
|
||||||
|
Loading…
Reference in New Issue
Block a user