Merge pull request #1868 from JRMeyer/data-augmentation-cleaning
Add logging and clean up some augmentation code
This commit is contained in:
commit
ac2bbd6a79
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import math
|
import math
|
||||||
@ -10,6 +9,7 @@ from multiprocessing import Queue, Process
|
|||||||
from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS
|
from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS
|
||||||
from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE
|
from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE
|
||||||
from .sample_collections import samples_from_source, unpack_maybe
|
from .sample_collections import samples_from_source, unpack_maybe
|
||||||
|
from .logging import log_info
|
||||||
|
|
||||||
BUFFER_SIZE = 1 * MEGABYTE
|
BUFFER_SIZE = 1 * MEGABYTE
|
||||||
SPEC_PARSER = re.compile(r'^(?P<cls>[a-z_]+)(\[(?P<params>.*)\])?$')
|
SPEC_PARSER = re.compile(r'^(?P<cls>[a-z_]+)(\[(?P<params>.*)\])?$')
|
||||||
@ -90,6 +90,7 @@ def parse_augmentation(augmentation_spec):
|
|||||||
kwargs[pair[0]] = pair[1]
|
kwargs[pair[0]] = pair[1]
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unable to parse augmentation value assignment')
|
raise ValueError('Unable to parse augmentation value assignment')
|
||||||
|
log_info('Processed augmentation type: [{}] with parameter settings: {}'.format(augmentation_cls.__name__, kwargs))
|
||||||
return augmentation_cls(*args, **kwargs)
|
return augmentation_cls(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@ -106,7 +107,7 @@ def parse_augmentations(augmentation_specs):
|
|||||||
-------
|
-------
|
||||||
List of augmentation class instances from util.augmentations.*.
|
List of augmentation class instances from util.augmentations.*.
|
||||||
"""
|
"""
|
||||||
return [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs))
|
return list(map(parse_augmentation, augmentation_specs or []))
|
||||||
|
|
||||||
|
|
||||||
def apply_graph_augmentations(domain, tensor, augmentations, transcript=None, clock=0.0):
|
def apply_graph_augmentations(domain, tensor, augmentations, transcript=None, clock=0.0):
|
||||||
|
@ -163,27 +163,41 @@ def remember_exception(iterable, exception_box=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_value_range(value, target_type):
|
def get_value_range(value, target_type):
|
||||||
|
"""
|
||||||
|
This function converts all possible supplied values for augmentation
|
||||||
|
into the [start,end,r] ValueRange type. The expected inputs are of the form:
|
||||||
|
|
||||||
|
<number>
|
||||||
|
<number>~<number>
|
||||||
|
<number>:<number>~<number>
|
||||||
|
|
||||||
|
Any "missing" values are filled so that ValueRange always includes [start,end,r].
|
||||||
|
"""
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
r = target_type(0)
|
if '~' in value:
|
||||||
parts = value.split('~')
|
parts = value.split('~')
|
||||||
if len(parts) == 2:
|
if len(parts) != 2:
|
||||||
|
raise ValueError('Cannot parse value range')
|
||||||
value = parts[0]
|
value = parts[0]
|
||||||
r = target_type(parts[1])
|
r = parts[1]
|
||||||
elif len(parts) > 2:
|
else:
|
||||||
raise ValueError('Cannot parse value range')
|
r = 0 # if no <r> supplied, use 0
|
||||||
parts = value.split(':')
|
parts = value.split(':')
|
||||||
if len(parts) == 1:
|
if len(parts) == 1:
|
||||||
parts.append(parts[0])
|
parts.append(parts[0]) # only one <value> given, so double it
|
||||||
elif len(parts) > 2:
|
if len(parts) != 2:
|
||||||
raise ValueError('Cannot parse value range')
|
raise ValueError('Cannot parse value range')
|
||||||
return ValueRange(target_type(parts[0]), target_type(parts[1]), r)
|
return ValueRange(target_type(parts[0]), target_type(parts[1]), target_type(r))
|
||||||
if isinstance(value, tuple):
|
if isinstance(value, tuple):
|
||||||
if len(value) == 2:
|
if len(value) == 2:
|
||||||
return ValueRange(target_type(value[0]), target_type(value[1]), 0)
|
return ValueRange(target_type(value[0]), target_type(value[1]), target_type(0))
|
||||||
if len(value) == 3:
|
if len(value) == 3:
|
||||||
return ValueRange(target_type(value[0]), target_type(value[1]), target_type(value[2]))
|
return ValueRange(target_type(value[0]), target_type(value[1]), target_type(value[2]))
|
||||||
raise ValueError('Cannot convert to ValueRange: Wrong tuple size')
|
else:
|
||||||
return ValueRange(target_type(value), target_type(value), 0)
|
raise ValueError('Cannot convert to ValueRange: Wrong tuple size')
|
||||||
|
if isinstance(value, int) or isinstance(value, float):
|
||||||
|
return ValueRange(target_type(value), target_type(value), target_type(0))
|
||||||
|
raise ValueError('Cannot convert to ValueRange: Wrong tuple size')
|
||||||
|
|
||||||
|
|
||||||
def int_range(value):
|
def int_range(value):
|
||||||
@ -203,14 +217,20 @@ def pick_value_from_range(value_range, clock=None):
|
|||||||
|
|
||||||
def tf_pick_value_from_range(value_range, clock=None, double_precision=False):
|
def tf_pick_value_from_range(value_range, clock=None, double_precision=False):
|
||||||
import tensorflow as tf # pylint: disable=import-outside-toplevel
|
import tensorflow as tf # pylint: disable=import-outside-toplevel
|
||||||
clock = (tf.random.stateless_uniform([], seed=(-1, 1), dtype=tf.float64) if clock is None
|
if clock is None:
|
||||||
else tf.maximum(tf.constant(0.0, dtype=tf.float64), tf.minimum(tf.constant(1.0, dtype=tf.float64), clock)))
|
clock = tf.random.stateless_uniform([], seed=(-1, 1), dtype=tf.float64)
|
||||||
|
else:
|
||||||
|
clock = tf.maximum(tf.constant(0.0, dtype=tf.float64),
|
||||||
|
tf.minimum(tf.constant(1.0, dtype=tf.float64), clock))
|
||||||
value = value_range.start + clock * (value_range.end - value_range.start)
|
value = value_range.start + clock * (value_range.end - value_range.start)
|
||||||
value = tf.random.stateless_uniform([],
|
if value_range.r:
|
||||||
minval=value - value_range.r,
|
# if the option <r> (<value>~<r>, randomization radius) is supplied,
|
||||||
maxval=value + value_range.r,
|
# sample the value from a uniform distribution with "radius" <r>
|
||||||
seed=(clock * tf.int32.min, clock * tf.int32.max),
|
value = tf.random.stateless_uniform([],
|
||||||
dtype=tf.float64)
|
minval=value - value_range.r,
|
||||||
|
maxval=value + value_range.r,
|
||||||
|
seed=(clock * tf.int32.min, clock * tf.int32.max),
|
||||||
|
dtype=tf.float64)
|
||||||
if isinstance(value_range.start, int):
|
if isinstance(value_range.start, int):
|
||||||
return tf.cast(tf.math.round(value), tf.int64 if double_precision else tf.int32)
|
return tf.cast(tf.math.round(value), tf.int64 if double_precision else tf.int32)
|
||||||
return tf.cast(value, tf.float64 if double_precision else tf.float32)
|
return tf.cast(value, tf.float64 if double_precision else tf.float32)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user