Remove sparse image warp, fix boolean flags type, rebase to master

2019-09-09 12:11:28 +02:00 · 2019-09-09 12:11:28 +02:00 · d051d4fd0e
parent 0e4eed7be3
commit d051d4fd0e
4 changed files with 3 additions and 218 deletions
--- a/util/feeding.py
+++ b/util/feeding.py
@ -16,7 +16,7 @@ from util.config import Config
 from util.logging import log_error
 from util.text import text_to_char_array
 from util.flags import FLAGS
-from util.spectrogram_augmentations import augment_sparse_deform, augment_freq_time_mask, augment_dropout, augment_pitch_and_tempo, augment_speed_up
+from util.spectrogram_augmentations import augment_freq_time_mask, augment_dropout, augment_pitch_and_tempo, augment_speed_up

 def read_csvs(csv_files):
    source_data = None
@ -40,11 +40,6 @@ def samples_to_mfccs(samples, sample_rate, train_phase=False):

    # Data Augmentations
    if train_phase:
-        if FLAGS.augmention_sparse_deform:
-            spectrogram = augment_sparse_deform(spectrogram,
-                                                time_warping_para=FLAGS.augmentation_time_warp_max_warping,
-                                                normal_around_warping_std=FLAGS.augmentation_sparse_deform_std_warp)
-
        if FLAGS.augmentation_spec_dropout_keeprate < 1:
            spectrogram = augment_dropout(spectrogram,
                                          keep_prob=FLAGS.augmentation_spec_dropout_keeprate)
--- a/util/flags.py
+++ b/util/flags.py
@ -27,13 +27,9 @@ def create_flags():
    f.DEFINE_float('data_aug_features_additive', 0, 'std of the Gaussian additive noise')
    f.DEFINE_float('data_aug_features_multiplicative', 0, 'std of normal distribution around 1 for multiplicative noise')

-    f.DEFINE_integer('augmention_sparse_deform', 0, 'whether to use time-warping augmentation')
-    f.DEFINE_integer('augmentation_time_warp_max_warping', 12, 'max value for warping')
-    f.DEFINE_float('augmentation_sparse_deform_std_warp', 0.5, 'std for warping different values to different frequencies')
-
    f.DEFINE_float('augmentation_spec_dropout_keeprate', 1, 'keep rate of dropout augmentation on spectrogram (if 1, no dropout will be performed on spectrogram)')

-    f.DEFINE_integer('augmentation_freq_and_time_masking', 0, 'whether to use frequency and time masking augmentation')
+    f.DEFINE_boolean('augmentation_freq_and_time_masking', False, 'whether to use frequency and time masking augmentation')
    f.DEFINE_integer('augmentation_freq_and_time_masking_freq_mask_range', 5, 'max range of masks in the frequency domain when performing freqtime-mask augmentation')
    f.DEFINE_integer('augmentation_freq_and_time_masking_number_freq_masks', 3, 'number of masks in the frequency domain when performing freqtime-mask augmentation')
    f.DEFINE_integer('augmentation_freq_and_time_masking_time_mask_range', 2, 'max range of masks in the time domain when performing freqtime-mask augmentation')
@ -41,7 +37,7 @@ def create_flags():

    f.DEFINE_float('augmentation_speed_up_std', 0, 'std for speeding-up tempo. If std is 0, this augmentation is not performed')

-    f.DEFINE_integer('augmentation_pitch_and_tempo_scaling', 0, 'whether to use spectrogram speed and tempo scaling')
+    f.DEFINE_boolean('augmentation_pitch_and_tempo_scaling', False, 'whether to use spectrogram speed and tempo scaling')
    f.DEFINE_float('augmentation_pitch_and_tempo_scaling_min_pitch', 0.95, 'min value of pitch scaling')
    f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_pitch', 1.2, 'max value of pitch scaling')
    f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_tempo', 1.2, 'max vlaue of tempo scaling')
--- a/util/sparse_image_warp.py
+++ b/util/sparse_image_warp.py
@ -1,177 +0,0 @@
-## Implementation of sparse_image_warp that handles dynamic shapes
-from tensorflow.contrib.image.python.ops import dense_image_warp
-from tensorflow.contrib.image.python.ops import interpolate_spline
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-
-
-def _get_grid_locations(image_height, image_width):
-    """Wrapper for array_ops.meshgrid."""
-
-    y_range = math_ops.linspace(0.0, math_ops.to_float(image_height) - 1,
-                                image_height)
-    x_range = math_ops.linspace(0.0, math_ops.to_float(image_width) - 1,
-                                image_width)
-    y_grid, x_grid = array_ops.meshgrid(y_range, x_range, indexing='ij')
-    return array_ops.stack((y_grid, x_grid), -1)
-
-
-def _expand_to_minibatch(array, batch_size):
-    """Tile arbitrarily-sized array to include new batch dimension."""
-    batch_size = array_ops.expand_dims(batch_size, 0)
-    array_ones = array_ops.ones((array_ops.rank(array)), dtype=dtypes.int32)
-    tiles = array_ops.concat([batch_size, array_ones], axis=0)
-    return array_ops.tile(array_ops.expand_dims(array, 0), tiles)
-
-
-def _get_boundary_locations(image_height, image_width, num_points_per_edge):
-    """Compute evenly-spaced indices along edge of image."""
-    image_height = math_ops.to_float(image_height)
-    image_width = math_ops.to_float(image_width)
-    y_range = math_ops.linspace(0.0, image_height - 1, num_points_per_edge + 2)
-    x_range = math_ops.linspace(0.0, image_width - 1, num_points_per_edge + 2)
-    ys, xs = array_ops.meshgrid(y_range, x_range, indexing='ij')
-    is_boundary = math_ops.logical_or(
-                            math_ops.logical_or(math_ops.equal(xs, 0),                  # pylint: disable=bad-continuation
-                                                math_ops.equal(xs, image_width - 1)),
-                            math_ops.logical_or(math_ops.equal(ys, 0),                  # pylint: disable=bad-continuation
-                                                math_ops.equal(ys, image_height - 1)))
-    return array_ops.stack([array_ops.boolean_mask(ys, is_boundary),
-                            array_ops.boolean_mask(xs, is_boundary)], axis=-1)
-
-
-def _add_zero_flow_controls_at_boundary(control_point_locations,
-                                        control_point_flows, image_height,
-                                        image_width, boundary_points_per_edge):
-    """Add control points for zero-flow boundary conditions.
-    Augment the set of control points with extra points on the
-    boundary of the image that have zero flow.
-    Args:
-    control_point_locations: input control points
-    control_point_flows: their flows
-    image_height: image height
-    image_width: image width
-    boundary_points_per_edge: number of points to add in the middle of each
-                            edge (not including the corners).
-                            The total number of points added is
-                            4 + 4*(boundary_points_per_edge).
-    Returns:
-    merged_control_point_locations: augmented set of control point locations
-    merged_control_point_flows: augmented set of control point flows
-    """
-
-    batch_size = tensor_shape.dimension_value(control_point_locations.shape[0])
-
-    boundary_point_locations = _get_boundary_locations(image_height, image_width,
-                                                       boundary_points_per_edge)
-
-    boundary_point_flows = array_ops.zeros([array_ops.shape(boundary_point_locations)[0], 2])
-
-    boundary_point_locations = _expand_to_minibatch(boundary_point_locations,
-                                                    batch_size)
-
-    boundary_point_flows = _expand_to_minibatch(boundary_point_flows, batch_size)
-
-    merged_control_point_locations = array_ops.concat([control_point_locations, boundary_point_locations], 1)
-
-    merged_control_point_flows = array_ops.concat([control_point_flows, boundary_point_flows], 1)
-
-    return merged_control_point_locations, merged_control_point_flows
-
-
-def sparse_image_warp(image,
-                      source_control_point_locations,
-                      dest_control_point_locations,
-                      interpolation_order=2,
-                      regularization_weight=0.0,
-                      num_boundary_points=0,
-                      name='sparse_image_warp'):
-    """Image warping using correspondences between sparse control points.
-    Apply a non-linear warp to the image, where the warp is specified by
-    the source and destination locations of a (potentially small) number of
-    control points. First, we use a polyharmonic spline
-    (`tf.contrib.image.interpolate_spline`) to interpolate the displacements
-    between the corresponding control points to a dense flow field.
-    Then, we warp the image using this dense flow field
-    (`tf.contrib.image.dense_image_warp`).
-    Let t index our control points. For regularization_weight=0, we have:
-    warped_image[b, dest_control_point_locations[b, t, 0],
-                    dest_control_point_locations[b, t, 1], :] =
-    image[b, source_control_point_locations[b, t, 0],
-            source_control_point_locations[b, t, 1], :].
-    For regularization_weight > 0, this condition is met approximately, since
-    regularized interpolation trades off smoothness of the interpolant vs.
-    reconstruction of the interpolant at the control points.
-    See `tf.contrib.image.interpolate_spline` for further documentation of the
-    interpolation_order and regularization_weight arguments.
-    Args:
-    image: `[batch, height, width, channels]` float `Tensor`
-    source_control_point_locations: `[batch, num_control_points, 2]` float
-        `Tensor`
-    dest_control_point_locations: `[batch, num_control_points, 2]` float
-        `Tensor`
-    interpolation_order: polynomial order used by the spline interpolation
-    regularization_weight: weight on smoothness regularizer in interpolation
-    num_boundary_points: How many zero-flow boundary points to include at
-        each image edge.Usage:
-        num_boundary_points=0: don't add zero-flow points
-        num_boundary_points=1: 4 corners of the image
-        num_boundary_points=2: 4 corners and one in the middle of each edge
-            (8 points total)
-        num_boundary_points=n: 4 corners and n-1 along each edge
-    name: A name for the operation (optional).
-    Note that image and offsets can be of type tf.half, tf.float32, or
-    tf.float64, and do not necessarily have to be the same type.
-    Returns:
-    warped_image: `[batch, height, width, channels]` float `Tensor` with same
-        type as input image.
-    flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense
-        flow field produced by the interpolation.
-    """
-
-    image = ops.convert_to_tensor(image)
-    source_control_point_locations = ops.convert_to_tensor(
-        source_control_point_locations)
-    dest_control_point_locations = ops.convert_to_tensor(
-        dest_control_point_locations)
-
-    control_point_flows = (
-        dest_control_point_locations - source_control_point_locations)
-
-    clamp_boundaries = num_boundary_points > 0
-    boundary_points_per_edge = num_boundary_points - 1
-
-    with ops.name_scope(name):
-        batch_size, image_height, image_width = (array_ops.shape(image)[0],
-                                                 array_ops.shape(image)[1],
-                                                 array_ops.shape(image)[2])
-        # This generates the dense locations where the interpolant
-        # will be evaluated.
-        grid_locations = _get_grid_locations(image_height, image_width)
-
-        flattened_grid_locations = array_ops.reshape(grid_locations,
-                                                     [image_height*image_width, 2])
-
-        flattened_grid_locations = _expand_to_minibatch(flattened_grid_locations,
-                                                        batch_size)
-
-        if clamp_boundaries:
-            (dest_control_point_locations,
-             control_point_flows) = _add_zero_flow_controls_at_boundary(dest_control_point_locations,
-                                                                        control_point_flows, image_height,
-                                                                        image_width, boundary_points_per_edge)
-
-        flattened_flows = interpolate_spline.interpolate_spline(dest_control_point_locations, control_point_flows,
-                                                                flattened_grid_locations, interpolation_order,
-                                                                regularization_weight)
-
-        dense_flows = array_ops.reshape(flattened_flows,
-                                        [batch_size, image_height, image_width, 2])
-
-        warped_image = dense_image_warp.dense_image_warp(image, dense_flows)
-
-        return warped_image, dense_flows
--- a/util/spectrogram_augmentations.py
+++ b/util/spectrogram_augmentations.py
@ -1,33 +1,4 @@
 import tensorflow as tf
-from util.sparse_image_warp import sparse_image_warp
-
-def augment_sparse_deform(mel_spectrogram,
-                          time_warping_para=12,
-                          normal_around_warping_std=0.5):
-    mel_spectrogram = tf.expand_dims(mel_spectrogram, -1)
-    freq_max = tf.shape(mel_spectrogram)[1]
-    time_max = tf.shape(mel_spectrogram)[2]
-    center_freq = tf.cast(freq_max, tf.float32)/2.0
-    random_time_point = tf.random.uniform(shape=(), minval=time_warping_para, maxval=tf.cast(time_max, tf.float32) - time_warping_para)
-    chosen_warping = tf.random.uniform(shape=(), minval=0, maxval=time_warping_para)
-    #add different warping values to different frequencies
-    normal_around_warping = tf.random.normal(mean=chosen_warping, stddev=normal_around_warping_std, shape=(3,))
-
-    control_point_freqs = tf.stack([0.0, center_freq, tf.cast(freq_max, tf.float32)], axis=0)
-    control_point_times_src = tf.stack([random_time_point, random_time_point, random_time_point], axis=0)
-    control_point_times_dst = control_point_times_src+normal_around_warping
-
-    control_src = tf.expand_dims(tf.stack([control_point_freqs, control_point_times_src], axis=-1), 0)
-    control_dst = tf.expand_dims(tf.stack([control_point_freqs, control_point_times_dst], axis=1), 0)
-    warped_mel_spectrogram, _ = sparse_image_warp(mel_spectrogram,
-                                                  source_control_point_locations=control_src,
-                                                  dest_control_point_locations=control_dst,
-                                                  interpolation_order=2,
-                                                  regularization_weight=0,
-                                                  num_boundary_points=1
-                                                  )
-    warped_mel_spectrogram = warped_mel_spectrogram[:, :, :, 0]
-    return warped_mel_spectrogram

 def augment_freq_time_mask(mel_spectrogram,
                           frequency_masking_para=30,