Fixed dropout handling and other fixes
This commit is contained in:
parent
2890264b04
commit
f3439b72d5
153
DeepSpeech.ipynb
153
DeepSpeech.ipynb
@ -76,18 +76,19 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"import os\n",
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import os.path\n",
|
"import json\n",
|
||||||
|
"import datetime\n",
|
||||||
"import tempfile\n",
|
"import tempfile\n",
|
||||||
|
"import subprocess\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"import json\n",
|
"from util.log import merge_logs\n",
|
||||||
"import subprocess\n",
|
|
||||||
"import datetime\n",
|
|
||||||
"from util.gpu import get_available_gpus\n",
|
"from util.gpu import get_available_gpus\n",
|
||||||
|
"from util.importers.ted_lium import read_data_sets\n",
|
||||||
"from util.text import sparse_tensor_value_to_text, wers\n",
|
"from util.text import sparse_tensor_value_to_text, wers\n",
|
||||||
"from tensorflow.python.ops import ctc_ops\n",
|
"from tensorflow.python.ops import ctc_ops"
|
||||||
"from util.importers.ted_lium import read_data_sets"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -125,7 +126,7 @@
|
|||||||
"training_iters = 1250 # TODO: Determine a reasonable value for this\n",
|
"training_iters = 1250 # TODO: Determine a reasonable value for this\n",
|
||||||
"batch_size = 1 # TODO: Determine a reasonable value for this\n",
|
"batch_size = 1 # TODO: Determine a reasonable value for this\n",
|
||||||
"display_step = 10 # TODO: Determine a reasonable value for this\n",
|
"display_step = 10 # TODO: Determine a reasonable value for this\n",
|
||||||
"validation_step = 50\n",
|
"validation_step = 50 # TODO: Determine a reasonable value for this\n",
|
||||||
"checkpoint_step = 1000 # TODO: Determine a reasonable value for this\n",
|
"checkpoint_step = 1000 # TODO: Determine a reasonable value for this\n",
|
||||||
"checkpoint_dir = tempfile.gettempdir() # TODO: Determine a reasonable value for this"
|
"checkpoint_dir = tempfile.gettempdir() # TODO: Determine a reasonable value for this"
|
||||||
]
|
]
|
||||||
@ -147,7 +148,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"dropout_rate = 0.01 # TODO: Validate this is a reasonable value"
|
"dropout_rate = 0.05 # TODO: Validate this is a reasonable value"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -400,7 +401,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def BiRNN(batch_x, n_steps, dropout):\n",
|
"def BiRNN(batch_x, n_steps, dropout_rate):\n",
|
||||||
" # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
|
" # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
|
||||||
" batch_x = tf.transpose(batch_x, [1, 0, 2]) # Permute n_steps and batch_size\n",
|
" batch_x = tf.transpose(batch_x, [1, 0, 2]) # Permute n_steps and batch_size\n",
|
||||||
" # Reshape to prepare input for first layer\n",
|
" # Reshape to prepare input for first layer\n",
|
||||||
@ -410,20 +411,17 @@
|
|||||||
" b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
|
" b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
|
||||||
" h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
|
" h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
|
||||||
" layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
|
" layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
|
||||||
" layer_1 = tf.nn.dropout(layer_1, (1 - dropout_rate))\n",
|
|
||||||
" #Hidden layer with clipped RELU activation and dropout\n",
|
" #Hidden layer with clipped RELU activation and dropout\n",
|
||||||
" b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
|
" b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
|
||||||
" h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
|
" h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
|
||||||
" layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
|
" layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
|
||||||
" layer_2 = tf.nn.dropout(layer_2, (1 - dropout_rate))\n",
|
|
||||||
" #Hidden layer with clipped RELU activation and dropout\n",
|
" #Hidden layer with clipped RELU activation and dropout\n",
|
||||||
" b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
|
" b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
|
||||||
" h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
|
" h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
|
||||||
" layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
|
" layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
|
||||||
" layer_3 = tf.nn.dropout(layer_3, (1 - dropout_rate))\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
" # Define lstm cells with tensorflow\n",
|
" # Define lstm cells with tensorflow\n",
|
||||||
" # Forward direction cell\n",
|
" # Forward direction cell\n",
|
||||||
@ -449,8 +447,7 @@
|
|||||||
" b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
|
" b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
|
||||||
" h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
|
" h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
|
||||||
" layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
|
" layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
|
||||||
" layer_5 = tf.nn.dropout(layer_5, (1 - dropout_rate))\n",
|
|
||||||
" #Hidden layer of logits\n",
|
" #Hidden layer of logits\n",
|
||||||
" b6 = variable_on_cpu('b6', [n_hidden_6], tf.random_normal_initializer())\n",
|
" b6 = variable_on_cpu('b6', [n_hidden_6], tf.random_normal_initializer())\n",
|
||||||
" h6 = variable_on_cpu('h6', [n_hidden_5, n_hidden_6], tf.random_normal_initializer())\n",
|
" h6 = variable_on_cpu('h6', [n_hidden_5, n_hidden_6], tf.random_normal_initializer())\n",
|
||||||
@ -471,7 +468,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"The first few lines of the function `BiRNN`\n",
|
"The first few lines of the function `BiRNN`\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
"def BiRNN(batch_x, n_steps, dropout=True):\n",
|
"def BiRNN(batch_x, n_steps, dropout_rate):\n",
|
||||||
" # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
|
" # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
|
||||||
" batch_x = tf.transpose(batch_x, [1, 0, 2]) # Permute n_steps and batch_size\n",
|
" batch_x = tf.transpose(batch_x, [1, 0, 2]) # Permute n_steps and batch_size\n",
|
||||||
" # Reshape to prepare input for first layer\n",
|
" # Reshape to prepare input for first layer\n",
|
||||||
@ -486,8 +483,7 @@
|
|||||||
" b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
|
" b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
|
||||||
" h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
|
" h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
|
||||||
" layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
|
" layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
|
||||||
" layer_1 = tf.nn.dropout(layer_1, (1 - dropout_rate))\n",
|
|
||||||
" ...\n",
|
" ...\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"pass `batch_x` through the first layer of the non-recurrent neural network, then applies dropout to the result.\n",
|
"pass `batch_x` through the first layer of the non-recurrent neural network, then applies dropout to the result.\n",
|
||||||
@ -497,15 +493,13 @@
|
|||||||
" #Hidden layer with clipped RELU activation and dropout\n",
|
" #Hidden layer with clipped RELU activation and dropout\n",
|
||||||
" b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
|
" b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
|
||||||
" h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
|
" h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
|
||||||
" layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
|
" layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip) \n",
|
||||||
" if dropout: \n",
|
" layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
|
||||||
" layer_2 = tf.nn.dropout(layer_2, (1 - dropout_rate))\n",
|
|
||||||
" #Hidden layer with clipped RELU activation and dropout\n",
|
" #Hidden layer with clipped RELU activation and dropout\n",
|
||||||
" b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
|
" b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
|
||||||
" h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
|
" h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
|
||||||
" layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
|
" layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
|
||||||
" layer_3 = tf.nn.dropout(layer_3, (1 - dropout_rate))\n",
|
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Next we create the forward and backward LSTM units\n",
|
"Next we create the forward and backward LSTM units\n",
|
||||||
@ -549,8 +543,7 @@
|
|||||||
" b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
|
" b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
|
||||||
" h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
|
" h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
|
||||||
" layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
|
" layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
|
||||||
" if dropout:\n",
|
" layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
|
||||||
" layer_5 = tf.nn.dropout(layer_5, (1 - dropout_rate))\n",
|
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The next line of `BiRNN`\n",
|
"The next line of `BiRNN`\n",
|
||||||
@ -591,7 +584,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"In accord with [Deep Speech: Scaling up end-to-end speech recognition](http://arxiv.org/abs/1412.5567), the loss function used by our network should be the CTC loss function[[2]](http://www.cs.toronto.edu/~graves/preprint.pdf). Conveniently, this loss function is implemented in TensorFlow. Thus, we can simply make use of this implementation to define our loss.\n",
|
"In accord with [Deep Speech: Scaling up end-to-end speech recognition](http://arxiv.org/abs/1412.5567), the loss function used by our network should be the CTC loss function[[2]](http://www.cs.toronto.edu/~graves/preprint.pdf). Conveniently, this loss function is implemented in TensorFlow. Thus, we can simply make use of this implementation to define our loss.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To do so we introduce a utility function `calculate_accuracy_and_loss()` that calculates the average loss for a mini-batch along with the accuracy"
|
"To do so we introduce a utility function `calculate_accuracy_and_loss()` beam search decodes a mini-batch and calculates the average loss and accuracy. Next to loss and accuracy it returns the decoded result and the batch's original Y."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -602,12 +595,12 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def calculate_accuracy_and_loss(n_steps, batch_set, dropout=False):\n",
|
"def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
|
||||||
" # Obtain the next batch of data\n",
|
" # Obtain the next batch of data\n",
|
||||||
" batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
|
" batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Calculate the logits of the batch using BiRNN\n",
|
" # Calculate the logits of the batch using BiRNN\n",
|
||||||
" logits = BiRNN(batch_x, n_steps, dropout=dropout)\n",
|
" logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # CTC loss requires the logits be time major\n",
|
" # CTC loss requires the logits be time major\n",
|
||||||
" logits = tf.transpose(logits, [1, 0, 2])\n",
|
" logits = tf.transpose(logits, [1, 0, 2])\n",
|
||||||
@ -618,12 +611,16 @@
|
|||||||
" # Calculate the average loss across the batch\n",
|
" # Calculate the average loss across the batch\n",
|
||||||
" avg_loss = tf.reduce_mean(total_loss)\n",
|
" avg_loss = tf.reduce_mean(total_loss)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Compute the accuracy\n",
|
" # Beam search decode the batch\n",
|
||||||
" decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
|
" decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
|
||||||
|
" \n",
|
||||||
|
" # Compute the edit (Levenshtein) distance \n",
|
||||||
" distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)\n",
|
" distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)\n",
|
||||||
|
" \n",
|
||||||
|
" # Compute the accuracy \n",
|
||||||
" accuracy = tf.reduce_mean(distance)\n",
|
" accuracy = tf.reduce_mean(distance)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Return avg_loss and accuracy\n",
|
" # Return results to the caller\n",
|
||||||
" return avg_loss, accuracy, decoded, batch_y"
|
" return avg_loss, accuracy, decoded, batch_y"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -633,7 +630,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"The first lines of `calculate_accuracy_and_loss()`\n",
|
"The first lines of `calculate_accuracy_and_loss()`\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
"def calculate_accuracy_and_loss(n_steps, batch_set):\n",
|
"def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
|
||||||
" # Obtain the next batch of data\n",
|
" # Obtain the next batch of data\n",
|
||||||
" batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
|
" batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
@ -642,7 +639,7 @@
|
|||||||
"The next line\n",
|
"The next line\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
" # Calculate the logits from the BiRNN\n",
|
" # Calculate the logits from the BiRNN\n",
|
||||||
" logits = BiRNN(batch_x)\n",
|
" logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"calls `BiRNN()` with a batch of data and does inference on the batch.\n",
|
"calls `BiRNN()` with a batch of data and does inference on the batch.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -659,17 +656,22 @@
|
|||||||
"```\n",
|
"```\n",
|
||||||
"calculate the average loss using tensor flow's `ctc_loss` operator. \n",
|
"calculate the average loss using tensor flow's `ctc_loss` operator. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"The next lines compute the accuracy\n",
|
"The next lines first beam decode the batch and then compute the accuracy on base of the Levenshtein distance between the decoded batch and the batch's original Y.\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
" # Compute the accuracy\n",
|
" # Beam search decode the batch\n",
|
||||||
" decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
|
" decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
|
||||||
" accuracy = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y))\n",
|
" \n",
|
||||||
|
" # Compute the edit (Levenshtein) distance \n",
|
||||||
|
" distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)\n",
|
||||||
|
" \n",
|
||||||
|
" # Compute the accuracy \n",
|
||||||
|
" accuracy = tf.reduce_mean(distance)\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Finally, the `avg_loss`, accuracy and the decoded batch are returned to the caller\n",
|
"Finally, the `avg_loss`, accuracy, the decoded batch and the original batch's Y are returned to the caller\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
" # Return avg_loss and accuracy\n",
|
" # Return results to the caller\n",
|
||||||
" return avg_loss, accuracy, decoded\n",
|
" return avg_loss, accuracy, decoded, batch_y\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -866,22 +868,25 @@
|
|||||||
" with tf.device(available_devices[i]):\n",
|
" with tf.device(available_devices[i]):\n",
|
||||||
" # Create a scope for all operations of tower i\n",
|
" # Create a scope for all operations of tower i\n",
|
||||||
" with tf.name_scope('tower_%d' % i) as scope:\n",
|
" with tf.name_scope('tower_%d' % i) as scope:\n",
|
||||||
" # Calculate the avg_loss and accuracy for this tower\n",
|
" # Calculate the avg_loss and accuracy and retrieve the decoded \n",
|
||||||
|
" # batch along with the original batch's labels (Y) of this tower\n",
|
||||||
" avg_loss, accuracy, decoded, labels = calculate_accuracy_and_loss(\\\n",
|
" avg_loss, accuracy, decoded, labels = calculate_accuracy_and_loss(\\\n",
|
||||||
" n_steps, \\\n",
|
" n_steps, \\\n",
|
||||||
" batch_set, \\\n",
|
" batch_set, \\\n",
|
||||||
" dropout=(optimizer is not None) \\\n",
|
" dropout_rate if (optimizer is not None) else 0.0 \\\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Allow for variables to be re-used by the next tower\n",
|
" # Allow for variables to be re-used by the next tower\n",
|
||||||
" tf.get_variable_scope().reuse_variables()\n",
|
" tf.get_variable_scope().reuse_variables()\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Retain tower's gradients\n",
|
" # Retain tower's decoded batch\n",
|
||||||
" tower_decodings.append(decoded)\n",
|
" tower_decodings.append(decoded)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Retain tower's labels\n",
|
" # Retain tower's labels (Y)\n",
|
||||||
" tower_labels.append(labels)\n",
|
" tower_labels.append(labels)\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" # If we are in training, there will be an optimizer given and \n",
|
||||||
|
" # only then we will compute and retain gradients on base of the loss\n",
|
||||||
" if optimizer is not None:\n",
|
" if optimizer is not None:\n",
|
||||||
" # Compute gradients for model parameters using tower's mini-batch\n",
|
" # Compute gradients for model parameters using tower's mini-batch\n",
|
||||||
" gradients = optimizer.compute_gradients(avg_loss)\n",
|
" gradients = optimizer.compute_gradients(avg_loss)\n",
|
||||||
@ -1032,7 +1037,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Finally we define the log directory plus some helpers."
|
"Finally we define the top directory for all logs and our current log sub-directory of it.\n",
|
||||||
|
"We also add some log helpers."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -1043,7 +1049,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"log_dir = '%s/%s' % (\"logs\", time.strftime(\"%Y%m%d-%H%M%S\"))\n",
|
"logs_dir = \"logs\"\n",
|
||||||
|
"log_dir = '%s/%s' % (logs_dir, time.strftime(\"%Y%m%d-%H%M%S\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def get_git_revision_hash():\n",
|
"def get_git_revision_hash():\n",
|
||||||
" return subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()\n",
|
" return subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()\n",
|
||||||
@ -1074,7 +1081,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def forward(session, data_set):\n",
|
"def decode_batch(data_set):\n",
|
||||||
" # Set n_steps parameter\n",
|
" # Set n_steps parameter\n",
|
||||||
" n_steps = data_set.max_batch_seq_len\n",
|
" n_steps = data_set.max_batch_seq_len\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -1188,7 +1195,7 @@
|
|||||||
" get_tower_results(n_steps, data_sets.train, optimizer)\n",
|
" get_tower_results(n_steps, data_sets.train, optimizer)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Validation step preparation\n",
|
" # Validation step preparation\n",
|
||||||
" validation_tower_decodings, validation_tower_labels = forward(session, data_sets.validation)\n",
|
" validation_tower_decodings, validation_tower_labels = decode_batch(data_sets.validation)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Average tower gradients\n",
|
" # Average tower gradients\n",
|
||||||
" avg_tower_gradients = average_gradients(tower_gradients)\n",
|
" avg_tower_gradients = average_gradients(tower_gradients)\n",
|
||||||
@ -1218,10 +1225,9 @@
|
|||||||
" # Define total accuracy for the epoch\n",
|
" # Define total accuracy for the epoch\n",
|
||||||
" total_accuracy = 0\n",
|
" total_accuracy = 0\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Validation step to determine the best point in time to stop\n",
|
" # Validation step\n",
|
||||||
" if epoch % validation_step == 0:\n",
|
" if epoch % validation_step == 0:\n",
|
||||||
" _, last_validation_wer = print_wer_report(session, \"Validation\", validation_tower_decodings, validation_tower_labels)\n",
|
" _, last_validation_wer = print_wer_report(session, \"Validation\", validation_tower_decodings, validation_tower_labels)\n",
|
||||||
" # TODO: Determine on base of WER, if model starts overfitting\n",
|
|
||||||
" print\n",
|
" print\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Loop over the batches\n",
|
" # Loop over the batches\n",
|
||||||
@ -1307,7 +1313,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Test network\n",
|
"# Test network\n",
|
||||||
"test_decodings, test_labels = forward(session, ted_lium.test)\n",
|
"test_decodings, test_labels = decode_batch(ted_lium.test)\n",
|
||||||
"_, test_wer = print_wer_report(session, \"Test\", test_decodings, test_labels)"
|
"_, test_wer = print_wer_report(session, \"Test\", test_decodings, test_labels)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -1380,41 +1386,6 @@
|
|||||||
"Let's also re-populate a central JS file, that contains all the dumps at once."
|
"Let's also re-populate a central JS file, that contains all the dumps at once."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 34,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"written = False\n",
|
|
||||||
"logs_dir = \"logs\"\n",
|
|
||||||
"\n",
|
|
||||||
"# All direct sub directories of the logs directory\n",
|
|
||||||
"dirs = [os.path.join(logs_dir, o) for o in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, o))]\n",
|
|
||||||
"\n",
|
|
||||||
"# Let's first populate a temporal file and rename it afterwards - guarantees an interruption free web experience\n",
|
|
||||||
"nhf = '%s/%s' % (logs_dir, 'new_hyper.js')\n",
|
|
||||||
"\n",
|
|
||||||
"with open(nhf, 'w') as dump_file:\n",
|
|
||||||
" # Assigning a global variable that the report page can pick up after loading the data as a regular script\n",
|
|
||||||
" dump_file.write('window.ALL_THE_DATA = [')\n",
|
|
||||||
" for d in dirs:\n",
|
|
||||||
" hf = os.path.join(d, \"hyper.json\")\n",
|
|
||||||
" if os.path.isfile(hf):\n",
|
|
||||||
" # Separate by comma if there was already something written\n",
|
|
||||||
" if written:\n",
|
|
||||||
" dump_file.write(',\\n')\n",
|
|
||||||
" written = True\n",
|
|
||||||
" # Append the whole file\n",
|
|
||||||
" dump_file.write(open(hf, 'r').read())\n",
|
|
||||||
" dump_file.write('];')\n",
|
|
||||||
" \n",
|
|
||||||
"# Finally we rename the file temporal file and overwrite a potentially existing active one\n",
|
|
||||||
"os.rename(nhf, '%s/%s' % (logs_dir, 'hyper.js'))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -1422,7 +1393,9 @@
|
|||||||
"collapsed": true
|
"collapsed": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"merge_logs(logs_dir)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
30
util/log/__init__.py
Normal file
30
util/log/__init__.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
|
||||||
|
import os
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
def merge_logs(logs_dir):
|
||||||
|
|
||||||
|
written = False
|
||||||
|
|
||||||
|
# All direct sub directories of the logs directory
|
||||||
|
dirs = [os.path.join(logs_dir, o) for o in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, o))]
|
||||||
|
|
||||||
|
# Let's first populate a temporal file and rename it afterwards - guarantees an interruption free web experience
|
||||||
|
nhf = '%s/%s' % (logs_dir, 'new_hyper.js')
|
||||||
|
|
||||||
|
with open(nhf, 'w') as dump_file:
|
||||||
|
# Assigning a global variable that the report page can pick up after loading the data as a regular script
|
||||||
|
dump_file.write('window.ALL_THE_DATA = [')
|
||||||
|
for d in dirs:
|
||||||
|
hf = os.path.join(d, "hyper.json")
|
||||||
|
if os.path.isfile(hf):
|
||||||
|
# Separate by comma if there was already something written
|
||||||
|
if written:
|
||||||
|
dump_file.write(',\n')
|
||||||
|
written = True
|
||||||
|
# Append the whole file
|
||||||
|
dump_file.write(open(hf, 'r').read())
|
||||||
|
dump_file.write('];')
|
||||||
|
|
||||||
|
# Finally we rename the temporal file and overwrite a potentially existing active one
|
||||||
|
os.rename(nhf, '%s/%s' % (logs_dir, 'hyper.js'))
|
||||||
Loading…
x
Reference in New Issue
Block a user