Fix bug in dropout.

PiperOrigin-RevId: 295776879
Change-Id: Ic25abd0fe0e442f37a32c7f68307e43728658b71
This commit is contained in:
Meghna Natraj 2020-02-18 11:11:28 -08:00 committed by TensorFlower Gardener
parent a66d4828f3
commit d9c9c92c7c
3 changed files with 45 additions and 47 deletions

View File

@ -187,7 +187,7 @@ def create_single_fc_model(fingerprint_input, model_settings, is_training):
placeholder. placeholder.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
fingerprint_size = model_settings['fingerprint_size'] fingerprint_size = model_settings['fingerprint_size']
label_count = model_settings['label_count'] label_count = model_settings['label_count']
weights = tf.compat.v1.get_variable( weights = tf.compat.v1.get_variable(
@ -199,7 +199,7 @@ def create_single_fc_model(fingerprint_input, model_settings, is_training):
shape=[label_count]) shape=[label_count])
logits = tf.matmul(fingerprint_input, weights) + bias logits = tf.matmul(fingerprint_input, weights) + bias
if is_training: if is_training:
return logits, dropout_prob return logits, dropout_rate
else: else:
return logits return logits
@ -253,7 +253,7 @@ def create_conv_model(fingerprint_input, model_settings, is_training):
placeholder. placeholder.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
input_frequency_size = model_settings['fingerprint_width'] input_frequency_size = model_settings['fingerprint_width']
input_time_size = model_settings['spectrogram_length'] input_time_size = model_settings['spectrogram_length']
fingerprint_4d = tf.reshape(fingerprint_input, fingerprint_4d = tf.reshape(fingerprint_input,
@ -276,7 +276,7 @@ def create_conv_model(fingerprint_input, model_settings, is_training):
padding='SAME') + first_bias padding='SAME') + first_bias
first_relu = tf.nn.relu(first_conv) first_relu = tf.nn.relu(first_conv)
if is_training: if is_training:
first_dropout = tf.nn.dropout(first_relu, 1 - (dropout_prob)) first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate)
else: else:
first_dropout = first_relu first_dropout = first_relu
max_pool = tf.nn.max_pool2d(input=first_dropout, max_pool = tf.nn.max_pool2d(input=first_dropout,
@ -303,7 +303,7 @@ def create_conv_model(fingerprint_input, model_settings, is_training):
padding='SAME') + second_bias padding='SAME') + second_bias
second_relu = tf.nn.relu(second_conv) second_relu = tf.nn.relu(second_conv)
if is_training: if is_training:
second_dropout = tf.compat.v1.nn.dropout(second_relu, dropout_prob) second_dropout = tf.nn.dropout(second_relu, rate=dropout_rate)
else: else:
second_dropout = second_relu second_dropout = second_relu
second_conv_shape = second_dropout.get_shape() second_conv_shape = second_dropout.get_shape()
@ -325,7 +325,7 @@ def create_conv_model(fingerprint_input, model_settings, is_training):
shape=[label_count]) shape=[label_count])
final_fc = tf.matmul(flattened_second_conv, final_fc_weights) + final_fc_bias final_fc = tf.matmul(flattened_second_conv, final_fc_weights) + final_fc_bias
if is_training: if is_training:
return final_fc, dropout_prob return final_fc, dropout_rate
else: else:
return final_fc return final_fc
@ -377,7 +377,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
placeholder. placeholder.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
input_frequency_size = model_settings['fingerprint_width'] input_frequency_size = model_settings['fingerprint_width']
input_time_size = model_settings['spectrogram_length'] input_time_size = model_settings['spectrogram_length']
fingerprint_4d = tf.reshape(fingerprint_input, fingerprint_4d = tf.reshape(fingerprint_input,
@ -402,7 +402,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
padding='VALID') + first_bias padding='VALID') + first_bias
first_relu = tf.nn.relu(first_conv) first_relu = tf.nn.relu(first_conv)
if is_training: if is_training:
first_dropout = tf.nn.dropout(first_relu, 1 - (dropout_prob)) first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate)
else: else:
first_dropout = first_relu first_dropout = first_relu
first_conv_output_width = math.floor( first_conv_output_width = math.floor(
@ -426,7 +426,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
shape=[first_fc_output_channels]) shape=[first_fc_output_channels])
first_fc = tf.matmul(flattened_first_conv, first_fc_weights) + first_fc_bias first_fc = tf.matmul(flattened_first_conv, first_fc_weights) + first_fc_bias
if is_training: if is_training:
second_fc_input = tf.nn.dropout(first_fc, 1 - (dropout_prob)) second_fc_input = tf.nn.dropout(first_fc, rate=dropout_rate)
else: else:
second_fc_input = first_fc second_fc_input = first_fc
second_fc_output_channels = 128 second_fc_output_channels = 128
@ -440,7 +440,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
shape=[second_fc_output_channels]) shape=[second_fc_output_channels])
second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias
if is_training: if is_training:
final_fc_input = tf.nn.dropout(second_fc, 1 - (dropout_prob)) final_fc_input = tf.nn.dropout(second_fc, rate=dropout_rate)
else: else:
final_fc_input = second_fc final_fc_input = second_fc
label_count = model_settings['label_count'] label_count = model_settings['label_count']
@ -454,7 +454,7 @@ def create_low_latency_conv_model(fingerprint_input, model_settings,
shape=[label_count]) shape=[label_count])
final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias
if is_training: if is_training:
return final_fc, dropout_prob return final_fc, dropout_rate
else: else:
return final_fc return final_fc
@ -515,7 +515,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
ValueError: If the inputs tensor is incorrectly shaped. ValueError: If the inputs tensor is incorrectly shaped.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
input_frequency_size = model_settings['fingerprint_width'] input_frequency_size = model_settings['fingerprint_width']
input_time_size = model_settings['spectrogram_length'] input_time_size = model_settings['spectrogram_length']
@ -525,12 +525,12 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
if len(input_shape) != 2: if len(input_shape) != 2:
raise ValueError('Inputs to `SVDF` should have rank == 2.') raise ValueError('Inputs to `SVDF` should have rank == 2.')
if input_shape[-1].value is None: if input_shape[-1].value is None:
raise ValueError('The last dimension of the inputs to `SVDF` ' raise ValueError('The last dimension of the input to `SVDF` '
'should be defined. Found `None`.') 'should be defined. Found `None`.')
if input_shape[-1].value % input_frequency_size != 0: if input_shape[-1].value % input_frequency_size != 0:
raise ValueError('Inputs feature dimension %d must be a multiple of ' raise ValueError('The last dimension of the input to `SVDF` = {0} must be '
'frame size %d', fingerprint_input.shape[-1].value, 'a multiple of the frame size = {1}'.format(
input_frequency_size) input_shape.shape[-1].value, input_frequency_size))
# Set number of units (i.e. nodes) and rank. # Set number of units (i.e. nodes) and rank.
rank = 2 rank = 2
@ -545,9 +545,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
trainable=False, trainable=False,
name='runtime-memory') name='runtime-memory')
first_time_flag = tf.compat.v1.get_variable( first_time_flag = tf.compat.v1.get_variable(
name="first_time_flag", name='first_time_flag', dtype=tf.int32, initializer=1)
dtype=tf.int32,
initializer=1)
# Determine the number of new frames in the input, such that we only operate # Determine the number of new frames in the input, such that we only operate
# on those. For training we do not use the memory, and thus use all frames # on those. For training we do not use the memory, and thus use all frames
# provided in the input. # provided in the input.
@ -624,7 +622,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
first_relu = tf.nn.relu(first_bias) first_relu = tf.nn.relu(first_bias)
if is_training: if is_training:
first_dropout = tf.nn.dropout(first_relu, 1 - (dropout_prob)) first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate)
else: else:
first_dropout = first_relu first_dropout = first_relu
@ -639,7 +637,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
shape=[first_fc_output_channels]) shape=[first_fc_output_channels])
first_fc = tf.matmul(first_dropout, first_fc_weights) + first_fc_bias first_fc = tf.matmul(first_dropout, first_fc_weights) + first_fc_bias
if is_training: if is_training:
second_fc_input = tf.nn.dropout(first_fc, 1 - (dropout_prob)) second_fc_input = tf.nn.dropout(first_fc, rate=dropout_rate)
else: else:
second_fc_input = first_fc second_fc_input = first_fc
second_fc_output_channels = 256 second_fc_output_channels = 256
@ -653,7 +651,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
shape=[second_fc_output_channels]) shape=[second_fc_output_channels])
second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias second_fc = tf.matmul(second_fc_input, second_fc_weights) + second_fc_bias
if is_training: if is_training:
final_fc_input = tf.nn.dropout(second_fc, 1 - (dropout_prob)) final_fc_input = tf.nn.dropout(second_fc, rate=dropout_rate)
else: else:
final_fc_input = second_fc final_fc_input = second_fc
label_count = model_settings['label_count'] label_count = model_settings['label_count']
@ -667,7 +665,7 @@ def create_low_latency_svdf_model(fingerprint_input, model_settings,
shape=[label_count]) shape=[label_count])
final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias final_fc = tf.matmul(final_fc_input, final_fc_weights) + final_fc_bias
if is_training: if is_training:
return final_fc, dropout_prob return final_fc, dropout_rate
else: else:
return final_fc return final_fc
@ -712,7 +710,7 @@ def create_tiny_conv_model(fingerprint_input, model_settings, is_training):
placeholder. placeholder.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
input_frequency_size = model_settings['fingerprint_width'] input_frequency_size = model_settings['fingerprint_width']
input_time_size = model_settings['spectrogram_length'] input_time_size = model_settings['spectrogram_length']
fingerprint_4d = tf.reshape(fingerprint_input, fingerprint_4d = tf.reshape(fingerprint_input,
@ -736,7 +734,7 @@ def create_tiny_conv_model(fingerprint_input, model_settings, is_training):
padding='SAME') + first_bias padding='SAME') + first_bias
first_relu = tf.nn.relu(first_conv) first_relu = tf.nn.relu(first_conv)
if is_training: if is_training:
first_dropout = tf.nn.dropout(first_relu, 1 - (dropout_prob)) first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate)
else: else:
first_dropout = first_relu first_dropout = first_relu
first_dropout_shape = first_dropout.get_shape() first_dropout_shape = first_dropout.get_shape()
@ -759,7 +757,7 @@ def create_tiny_conv_model(fingerprint_input, model_settings, is_training):
final_fc = ( final_fc = (
tf.matmul(flattened_first_dropout, final_fc_weights) + final_fc_bias) tf.matmul(flattened_first_dropout, final_fc_weights) + final_fc_bias)
if is_training: if is_training:
return final_fc, dropout_prob return final_fc, dropout_rate
else: else:
return final_fc return final_fc
@ -817,7 +815,7 @@ def create_tiny_embedding_conv_model(fingerprint_input, model_settings,
placeholder. placeholder.
""" """
if is_training: if is_training:
dropout_prob = tf.compat.v1.placeholder(tf.float32, name='dropout_prob') dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate')
input_frequency_size = model_settings['fingerprint_width'] input_frequency_size = model_settings['fingerprint_width']
input_time_size = model_settings['spectrogram_length'] input_time_size = model_settings['spectrogram_length']
fingerprint_4d = tf.reshape(fingerprint_input, fingerprint_4d = tf.reshape(fingerprint_input,
@ -843,7 +841,7 @@ def create_tiny_embedding_conv_model(fingerprint_input, model_settings,
padding='SAME') + first_bias padding='SAME') + first_bias
first_relu = tf.nn.relu(first_conv) first_relu = tf.nn.relu(first_conv)
if is_training: if is_training:
first_dropout = tf.nn.dropout(first_relu, 1 - (dropout_prob)) first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate)
else: else:
first_dropout = first_relu first_dropout = first_relu
@ -870,7 +868,7 @@ def create_tiny_embedding_conv_model(fingerprint_input, model_settings,
padding='SAME') + second_bias padding='SAME') + second_bias
second_relu = tf.nn.relu(second_conv) second_relu = tf.nn.relu(second_conv)
if is_training: if is_training:
second_dropout = tf.nn.dropout(second_relu, 1 - (dropout_prob)) second_dropout = tf.nn.dropout(second_relu, rate=dropout_rate)
else: else:
second_dropout = second_relu second_dropout = second_relu
@ -894,6 +892,6 @@ def create_tiny_embedding_conv_model(fingerprint_input, model_settings,
final_fc = ( final_fc = (
tf.matmul(flattened_second_dropout, final_fc_weights) + final_fc_bias) tf.matmul(flattened_second_dropout, final_fc_weights) + final_fc_bias)
if is_training: if is_training:
return final_fc, dropout_prob return final_fc, dropout_rate
else: else:
return final_fc return final_fc

View File

@ -53,12 +53,12 @@ class ModelsTest(test.TestCase):
model_settings = self._modelSettings() model_settings = self._modelSettings()
with self.cached_session() as sess: with self.cached_session() as sess:
fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]])
logits, dropout_prob = models.create_model(fingerprint_input, logits, dropout_rate = models.create_model(
model_settings, "conv", True) fingerprint_input, model_settings, "conv", True)
self.assertIsNotNone(logits) self.assertIsNotNone(logits)
self.assertIsNotNone(dropout_prob) self.assertIsNotNone(dropout_rate)
self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name))
self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_rate.name))
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def testCreateModelConvInference(self): def testCreateModelConvInference(self):
@ -75,24 +75,24 @@ class ModelsTest(test.TestCase):
model_settings = self._modelSettings() model_settings = self._modelSettings()
with self.cached_session() as sess: with self.cached_session() as sess:
fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]])
logits, dropout_prob = models.create_model( logits, dropout_rate = models.create_model(
fingerprint_input, model_settings, "low_latency_conv", True) fingerprint_input, model_settings, "low_latency_conv", True)
self.assertIsNotNone(logits) self.assertIsNotNone(logits)
self.assertIsNotNone(dropout_prob) self.assertIsNotNone(dropout_rate)
self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name))
self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_rate.name))
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def testCreateModelFullyConnectedTraining(self): def testCreateModelFullyConnectedTraining(self):
model_settings = self._modelSettings() model_settings = self._modelSettings()
with self.cached_session() as sess: with self.cached_session() as sess:
fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]])
logits, dropout_prob = models.create_model( logits, dropout_rate = models.create_model(
fingerprint_input, model_settings, "single_fc", True) fingerprint_input, model_settings, "single_fc", True)
self.assertIsNotNone(logits) self.assertIsNotNone(logits)
self.assertIsNotNone(dropout_prob) self.assertIsNotNone(dropout_rate)
self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name))
self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_rate.name))
def testCreateModelBadArchitecture(self): def testCreateModelBadArchitecture(self):
model_settings = self._modelSettings() model_settings = self._modelSettings()
@ -108,12 +108,12 @@ class ModelsTest(test.TestCase):
model_settings = self._modelSettings() model_settings = self._modelSettings()
with self.cached_session() as sess: with self.cached_session() as sess:
fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]]) fingerprint_input = tf.zeros([1, model_settings["fingerprint_size"]])
logits, dropout_prob = models.create_model( logits, dropout_rate = models.create_model(
fingerprint_input, model_settings, "tiny_conv", True) fingerprint_input, model_settings, "tiny_conv", True)
self.assertIsNotNone(logits) self.assertIsNotNone(logits)
self.assertIsNotNone(dropout_prob) self.assertIsNotNone(dropout_rate)
self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(logits.name))
self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_prob.name)) self.assertIsNotNone(sess.graph.get_tensor_by_name(dropout_rate.name))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -132,7 +132,7 @@ def main(_):
else: else:
fingerprint_input = input_placeholder fingerprint_input = input_placeholder
logits, dropout_prob = models.create_model( logits, dropout_rate = models.create_model(
fingerprint_input, fingerprint_input,
model_settings, model_settings,
FLAGS.model_architecture, FLAGS.model_architecture,
@ -248,7 +248,7 @@ def main(_):
fingerprint_input: train_fingerprints, fingerprint_input: train_fingerprints,
ground_truth_input: train_ground_truth, ground_truth_input: train_ground_truth,
learning_rate_input: learning_rate_value, learning_rate_input: learning_rate_value,
dropout_prob: 0.5 dropout_rate: 0.5
}) })
train_writer.add_summary(train_summary, training_step) train_writer.add_summary(train_summary, training_step)
tf.compat.v1.logging.info( tf.compat.v1.logging.info(
@ -271,7 +271,7 @@ def main(_):
feed_dict={ feed_dict={
fingerprint_input: validation_fingerprints, fingerprint_input: validation_fingerprints,
ground_truth_input: validation_ground_truth, ground_truth_input: validation_ground_truth,
dropout_prob: 1.0 dropout_rate: 0.0
}) })
validation_writer.add_summary(validation_summary, training_step) validation_writer.add_summary(validation_summary, training_step)
batch_size = min(FLAGS.batch_size, set_size - i) batch_size = min(FLAGS.batch_size, set_size - i)
@ -305,7 +305,7 @@ def main(_):
feed_dict={ feed_dict={
fingerprint_input: test_fingerprints, fingerprint_input: test_fingerprints,
ground_truth_input: test_ground_truth, ground_truth_input: test_ground_truth,
dropout_prob: 1.0 dropout_rate: 0.0
}) })
batch_size = min(FLAGS.batch_size, set_size - i) batch_size = min(FLAGS.batch_size, set_size - i)
total_accuracy += (test_accuracy * batch_size) / set_size total_accuracy += (test_accuracy * batch_size) / set_size