Reintroduced feed_dict for context dependent dropout rates

2016-10-12 11:48:55 +02:00 · 2016-10-12 11:48:55 +02:00 · 9fb60a7ebc
parent f3439b72d5
commit 9fb60a7ebc
1 changed files with 30 additions and 22 deletions
--- a/DeepSpeech.ipynb
+++ b/DeepSpeech.ipynb
@ -148,7 +148,16 @@
   },
   "outputs": [],
   "source": [
-    "dropout_rate = 0.05  # TODO: Validate this is a reasonable value"
+    "dropout_rate = 0.05  # TODO: Validate this is a reasonable value\n",
+    "\n",
+    "# This global placeholder will be used for all dropout definitions\n",
+    "dropout_rate_placeholder = tf.placeholder(tf.float32)\n",
+    "\n",
+    "# The feed_dict used for training employs the given dropout_rate\n",
+    "feed_dict_train = { dropout_rate_placeholder: dropout_rate }\n",
+    "\n",
+    "# While the feed_dict used for validation, test and train progress reporting employs zero dropout\n",
+    "feed_dict = { dropout_rate_placeholder: 0.0 }"
   ]
  },
  {
@ -401,7 +410,7 @@
   },
   "outputs": [],
   "source": [
-    "def BiRNN(batch_x, n_steps, dropout_rate):\n",
+    "def BiRNN(batch_x, n_steps):\n",
    "    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
    "    batch_x = tf.transpose(batch_x, [1, 0, 2])  # Permute n_steps and batch_size\n",
    "    # Reshape to prepare input for first layer\n",
@ -411,17 +420,17 @@
    "    b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
    "    h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
    "    layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
-    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
+    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate_placeholder))\n",
    "    #Hidden layer with clipped RELU activation and dropout\n",
    "    b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
    "    h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
    "    layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
-    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
+    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate_placeholder))\n",
    "    #Hidden layer with clipped RELU activation and dropout\n",
    "    b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
    "    h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
    "    layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
-    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
+    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate_placeholder))\n",
    "    \n",
    "    # Define lstm cells with tensorflow\n",
    "    # Forward direction cell\n",
@ -447,7 +456,7 @@
    "    b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
    "    h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
    "    layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
-    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
+    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate_placeholder))\n",
    "    #Hidden layer of logits\n",
    "    b6 = variable_on_cpu('b6', [n_hidden_6], tf.random_normal_initializer())\n",
    "    h6 = variable_on_cpu('h6', [n_hidden_5, n_hidden_6], tf.random_normal_initializer())\n",
@ -468,7 +477,7 @@
   "source": [
    "The first few lines of the function `BiRNN`\n",
    "```python\n",
-    "def BiRNN(batch_x, n_steps, dropout_rate):\n",
+    "def BiRNN(batch_x, n_steps):\n",
    "    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
    "    batch_x = tf.transpose(batch_x, [1, 0, 2])  # Permute n_steps and batch_size\n",
    "    # Reshape to prepare input for first layer\n",
@ -483,7 +492,7 @@
    "    b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
    "    h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
    "    layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
-    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
+    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate_placeholder))\n",
    "    ...\n",
    "```\n",
    "pass `batch_x` through the first layer of the non-recurrent neural network, then applies dropout to the result.\n",
@ -494,12 +503,12 @@
    "    b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
    "    h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
    "    layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)   \n",
-    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
+    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate_placeholder))\n",
    "    #Hidden layer with clipped RELU activation and dropout\n",
    "    b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
    "    h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
    "    layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
-    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
+    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate_placeholder))\n",
    "```\n",
    "\n",
    "Next we create the forward and backward LSTM units\n",
@ -543,7 +552,7 @@
    "    b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
    "    h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
    "    layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
-    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
+    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate_placeholder))\n",
    "```\n",
    "\n",
    "The next line of `BiRNN`\n",
@ -584,7 +593,7 @@
   "source": [
    "In accord with [Deep Speech: Scaling up end-to-end speech recognition](http://arxiv.org/abs/1412.5567), the loss function used by our network should be the CTC loss function[[2]](http://www.cs.toronto.edu/~graves/preprint.pdf). Conveniently, this loss function is implemented in TensorFlow. Thus, we can simply make use of this implementation to define our loss.\n",
    "\n",
-    "To do so we introduce a utility function `calculate_accuracy_and_loss()` beam search decodes a mini-batch and calculates the average loss and accuracy. Next to loss and accuracy it returns the decoded result and the batch's original Y."
+    "To do so we introduce a utility function `calculate_accuracy_and_loss()` that beam search decodes a mini-batch and calculates the average loss and accuracy. Next to loss and accuracy it returns the decoded result and the batch's original Y."
   ]
  },
  {
@ -595,12 +604,12 @@
   },
   "outputs": [],
   "source": [
-    "def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
+    "def calculate_accuracy_and_loss(n_steps, batch_set):\n",
    "    # Obtain the next batch of data\n",
    "    batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
    "\n",
    "    # Calculate the logits of the batch using BiRNN\n",
-    "    logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
+    "    logits = BiRNN(batch_x, n_steps)\n",
    "    \n",
    "    # CTC loss requires the logits be time major\n",
    "    logits = tf.transpose(logits, [1, 0, 2])\n",
@ -630,7 +639,7 @@
   "source": [
    "The first lines of `calculate_accuracy_and_loss()`\n",
    "```python\n",
-    "def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
+    "def calculate_accuracy_and_loss(n_steps, batch_set):\n",
    "    # Obtain the next batch of data\n",
    "    batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
    "```\n",
@ -639,7 +648,7 @@
    "The next line\n",
    "```python\n",
    "    # Calculate the logits from the BiRNN\n",
-    "    logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
+    "    logits = BiRNN(batch_x, n_steps)\n",
    "```\n",
    "calls `BiRNN()` with a batch of data and does inference on the batch.\n",
    "\n",
@ -872,8 +881,7 @@
    "                # batch along with the original batch's labels (Y) of this tower\n",
    "                avg_loss, accuracy, decoded, labels = calculate_accuracy_and_loss(\\\n",
    "                    n_steps, \\\n",
-    "                    batch_set, \\\n",
-    "                    dropout_rate if (optimizer is not None) else 0.0 \\\n",
+    "                    batch_set \\\n",
    "                )\n",
    "                                \n",
    "                # Allow for variables to be re-used by the next tower\n",
@ -1121,7 +1129,7 @@
    "    \n",
    "    # Iterating over the towers\n",
    "    for i in range(len(tower_decodings)):\n",
-    "        decoded, labels = session.run([tower_decodings[i], tower_labels[i]])\n",
+    "        decoded, labels = session.run([tower_decodings[i], tower_labels[i]], feed_dict)\n",
    "        originals.extend(sparse_tensor_value_to_text(labels))\n",
    "        results.extend(sparse_tensor_value_to_text(decoded))\n",
    "        \n",
@ -1233,14 +1241,14 @@
    "        # Loop over the batches\n",
    "        for batch in range(total_batch/len(available_devices)):\n",
    "            # Compute the average loss for the last batch\n",
-    "            _, batch_avg_loss = session.run([apply_gradient_op, tower_loss])\n",
+    "            _, batch_avg_loss = session.run([apply_gradient_op, tower_loss], feed_dict_train)\n",
    "\n",
    "            # Add batch to total_accuracy\n",
-    "            total_accuracy += session.run(accuracy)\n",
+    "            total_accuracy += session.run(accuracy, feed_dict_train)\n",
    "\n",
    "            # Log all variable states in current step\n",
    "            step = epoch * total_batch + batch * len(available_devices)\n",
-    "            summary_str = session.run(merged)\n",
+    "            summary_str = session.run(merged, feed_dict_train)\n",
    "            writer.add_summary(summary_str, step)\n",
    "            writer.flush()\n",
    "        \n",