diff --git a/DeepSpeech.ipynb b/DeepSpeech.ipynb
index 54d2888a..1f043206 100644
--- a/DeepSpeech.ipynb
+++ b/DeepSpeech.ipynb
@@ -76,18 +76,19 @@
    },
    "outputs": [],
    "source": [
+    "import os\n",
     "import time\n",
-    "import os.path\n",
+    "import json\n",
+    "import datetime\n",
     "import tempfile\n",
+    "import subprocess\n",
     "import numpy as np\n",
     "import tensorflow as tf\n",
-    "import json\n",
-    "import subprocess\n",
-    "import datetime\n",
+    "from util.log import merge_logs\n",
     "from util.gpu import get_available_gpus\n",
+    "from util.importers.ted_lium import read_data_sets\n",
     "from util.text import sparse_tensor_value_to_text, wers\n",
-    "from tensorflow.python.ops import ctc_ops\n",
-    "from util.importers.ted_lium import read_data_sets"
+    "from tensorflow.python.ops import ctc_ops"
    ]
   },
   {
@@ -125,7 +126,7 @@
     "training_iters = 1250   # TODO: Determine a reasonable value for this\n",
     "batch_size = 1          # TODO: Determine a reasonable value for this\n",
     "display_step = 10       # TODO: Determine a reasonable value for this\n",
-    "validation_step = 50\n",
+    "validation_step = 50    # TODO: Determine a reasonable value for this\n",
     "checkpoint_step = 1000  # TODO: Determine a reasonable value for this\n",
     "checkpoint_dir = tempfile.gettempdir() # TODO: Determine a reasonable value for this"
    ]
@@ -147,7 +148,7 @@
    },
    "outputs": [],
    "source": [
-    "dropout_rate = 0.01  # TODO: Validate this is a reasonable value"
+    "dropout_rate = 0.05  # TODO: Validate this is a reasonable value"
    ]
   },
   {
@@ -400,7 +401,7 @@
    },
    "outputs": [],
    "source": [
-    "def BiRNN(batch_x, n_steps, dropout):\n",
+    "def BiRNN(batch_x, n_steps, dropout_rate):\n",
     "    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
     "    batch_x = tf.transpose(batch_x, [1, 0, 2])  # Permute n_steps and batch_size\n",
     "    # Reshape to prepare input for first layer\n",
@@ -410,20 +411,17 @@
     "    b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
     "    h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
     "    layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_1 = tf.nn.dropout(layer_1, (1 - dropout_rate))\n",
+    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
     "    #Hidden layer with clipped RELU activation and dropout\n",
     "    b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
     "    h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
     "    layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_2 = tf.nn.dropout(layer_2, (1 - dropout_rate))\n",
+    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
     "    #Hidden layer with clipped RELU activation and dropout\n",
     "    b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
     "    h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
     "    layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_3 = tf.nn.dropout(layer_3, (1 - dropout_rate))\n",
+    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
     "    \n",
     "    # Define lstm cells with tensorflow\n",
     "    # Forward direction cell\n",
@@ -449,8 +447,7 @@
     "    b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
     "    h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
     "    layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_5 = tf.nn.dropout(layer_5, (1 - dropout_rate))\n",
+    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
     "    #Hidden layer of logits\n",
     "    b6 = variable_on_cpu('b6', [n_hidden_6], tf.random_normal_initializer())\n",
     "    h6 = variable_on_cpu('h6', [n_hidden_5, n_hidden_6], tf.random_normal_initializer())\n",
@@ -471,7 +468,7 @@
    "source": [
     "The first few lines of the function `BiRNN`\n",
     "```python\n",
-    "def BiRNN(batch_x, n_steps, dropout=True):\n",
+    "def BiRNN(batch_x, n_steps, dropout_rate):\n",
     "    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]\n",
     "    batch_x = tf.transpose(batch_x, [1, 0, 2])  # Permute n_steps and batch_size\n",
     "    # Reshape to prepare input for first layer\n",
@@ -486,8 +483,7 @@
     "    b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())\n",
     "    h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())\n",
     "    layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_1 = tf.nn.dropout(layer_1, (1 - dropout_rate))\n",
+    "    layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate))\n",
     "    ...\n",
     "```\n",
     "pass `batch_x` through the first layer of the non-recurrent neural network, then applies dropout to the result.\n",
@@ -497,15 +493,13 @@
     "    #Hidden layer with clipped RELU activation and dropout\n",
     "    b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())\n",
     "    h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())\n",
-    "    layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)\n",
-    "    if dropout:    \n",
-    "        layer_2 = tf.nn.dropout(layer_2, (1 - dropout_rate))\n",
+    "    layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)   \n",
+    "    layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate))\n",
     "    #Hidden layer with clipped RELU activation and dropout\n",
     "    b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())\n",
     "    h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())\n",
     "    layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_3 = tf.nn.dropout(layer_3, (1 - dropout_rate))\n",
+    "    layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate))\n",
     "```\n",
     "\n",
     "Next we create the forward and backward LSTM units\n",
@@ -549,8 +543,7 @@
     "    b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())\n",
     "    h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())\n",
     "    layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)\n",
-    "    if dropout:\n",
-    "        layer_5 = tf.nn.dropout(layer_5, (1 - dropout_rate))\n",
+    "    layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate))\n",
     "```\n",
     "\n",
     "The next line of `BiRNN`\n",
@@ -591,7 +584,7 @@
    "source": [
     "In accord with [Deep Speech: Scaling up end-to-end speech recognition](http://arxiv.org/abs/1412.5567), the loss function used by our network should be the CTC loss function[[2]](http://www.cs.toronto.edu/~graves/preprint.pdf). Conveniently, this loss function is implemented in TensorFlow. Thus, we can simply make use of this implementation to define our loss.\n",
     "\n",
-    "To do so we introduce a utility function `calculate_accuracy_and_loss()` that calculates the average loss for a mini-batch along with the accuracy"
+    "To do so we introduce a utility function `calculate_accuracy_and_loss()` beam search decodes a mini-batch and calculates the average loss and accuracy. Next to loss and accuracy it returns the decoded result and the batch's original Y."
    ]
   },
   {
@@ -602,12 +595,12 @@
    },
    "outputs": [],
    "source": [
-    "def calculate_accuracy_and_loss(n_steps, batch_set, dropout=False):\n",
+    "def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
     "    # Obtain the next batch of data\n",
     "    batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
     "\n",
     "    # Calculate the logits of the batch using BiRNN\n",
-    "    logits = BiRNN(batch_x, n_steps, dropout=dropout)\n",
+    "    logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
     "    \n",
     "    # CTC loss requires the logits be time major\n",
     "    logits = tf.transpose(logits, [1, 0, 2])\n",
@@ -618,12 +611,16 @@
     "    # Calculate the average loss across the batch\n",
     "    avg_loss = tf.reduce_mean(total_loss)\n",
     "    \n",
-    "    # Compute the accuracy\n",
+    "    # Beam search decode the batch\n",
     "    decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
+    "    \n",
+    "    # Compute the edit (Levenshtein) distance \n",
     "    distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)\n",
+    "    \n",
+    "    # Compute the accuracy \n",
     "    accuracy = tf.reduce_mean(distance)\n",
     "\n",
-    "    # Return avg_loss and accuracy\n",
+    "    # Return results to the caller\n",
     "    return avg_loss, accuracy, decoded, batch_y"
    ]
   },
@@ -633,7 +630,7 @@
    "source": [
     "The first lines of `calculate_accuracy_and_loss()`\n",
     "```python\n",
-    "def calculate_accuracy_and_loss(n_steps, batch_set):\n",
+    "def calculate_accuracy_and_loss(n_steps, batch_set, dropout_rate):\n",
     "    # Obtain the next batch of data\n",
     "    batch_x, batch_y, batch_seq_len = batch_set.next_batch(batch_size)\n",
     "```\n",
@@ -642,7 +639,7 @@
     "The next line\n",
     "```python\n",
     "    # Calculate the logits from the BiRNN\n",
-    "    logits = BiRNN(batch_x)\n",
+    "    logits = BiRNN(batch_x, n_steps, dropout_rate)\n",
     "```\n",
     "calls `BiRNN()` with a batch of data and does inference on the batch.\n",
     "\n",
@@ -659,17 +656,22 @@
     "```\n",
     "calculate the average loss using tensor flow's `ctc_loss` operator. \n",
     "\n",
-    "The next lines compute the accuracy\n",
+    "The next lines first beam decode the batch and then compute the accuracy on base of the Levenshtein distance between the decoded batch and the batch's original Y.\n",
     "```python\n",
-    "    # Compute the accuracy\n",
+    "    # Beam search decode the batch\n",
     "    decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)\n",
-    "    accuracy = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y))\n",
+    "    \n",
+    "    # Compute the edit (Levenshtein) distance \n",
+    "    distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)\n",
+    "    \n",
+    "    # Compute the accuracy \n",
+    "    accuracy = tf.reduce_mean(distance)\n",
     "```\n",
     "\n",
-    "Finally, the `avg_loss`, accuracy and the decoded batch are returned to the caller\n",
+    "Finally, the `avg_loss`, accuracy, the decoded batch and the original batch's Y are returned to the caller\n",
     "```python\n",
-    "    # Return avg_loss and accuracy\n",
-    "    return avg_loss, accuracy, decoded\n",
+    "    # Return results to the caller\n",
+    "    return avg_loss, accuracy, decoded, batch_y\n",
     "```"
    ]
   },
@@ -866,22 +868,25 @@
     "        with tf.device(available_devices[i]):\n",
     "            # Create a scope for all operations of tower i\n",
     "            with tf.name_scope('tower_%d' % i) as scope:\n",
-    "                # Calculate the avg_loss and accuracy for this tower\n",
+    "                # Calculate the avg_loss and accuracy and retrieve the decoded \n",
+    "                # batch along with the original batch's labels (Y) of this tower\n",
     "                avg_loss, accuracy, decoded, labels = calculate_accuracy_and_loss(\\\n",
-    "                                                n_steps, \\\n",
-    "                                                batch_set, \\\n",
-    "                                                dropout=(optimizer is not None) \\\n",
-    "                                              )\n",
+    "                    n_steps, \\\n",
+    "                    batch_set, \\\n",
+    "                    dropout_rate if (optimizer is not None) else 0.0 \\\n",
+    "                )\n",
     "                                \n",
     "                # Allow for variables to be re-used by the next tower\n",
     "                tf.get_variable_scope().reuse_variables()\n",
     "                \n",
-    "                # Retain tower's gradients\n",
+    "                # Retain tower's decoded batch\n",
     "                tower_decodings.append(decoded)\n",
     "                \n",
-    "                # Retain tower's labels\n",
+    "                # Retain tower's labels (Y)\n",
     "                tower_labels.append(labels)\n",
     "                \n",
+    "                # If we are in training, there will be an optimizer given and \n",
+    "                # only then we will compute and retain gradients on base of the loss\n",
     "                if optimizer is not None:\n",
     "                    # Compute gradients for model parameters using tower's mini-batch\n",
     "                    gradients = optimizer.compute_gradients(avg_loss)\n",
@@ -1032,7 +1037,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Finally we define the log directory plus some helpers."
+    "Finally we define the top directory for all logs and our current log sub-directory of it.\n",
+    "We also add some log helpers."
    ]
   },
   {
@@ -1043,7 +1049,8 @@
    },
    "outputs": [],
    "source": [
-    "log_dir = '%s/%s' % (\"logs\", time.strftime(\"%Y%m%d-%H%M%S\"))\n",
+    "logs_dir = \"logs\"\n",
+    "log_dir = '%s/%s' % (logs_dir, time.strftime(\"%Y%m%d-%H%M%S\"))\n",
     "\n",
     "def get_git_revision_hash():\n",
     "    return subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()\n",
@@ -1074,7 +1081,7 @@
    },
    "outputs": [],
    "source": [
-    "def forward(session, data_set):\n",
+    "def decode_batch(data_set):\n",
     "    # Set n_steps parameter\n",
     "    n_steps = data_set.max_batch_seq_len\n",
     "\n",
@@ -1188,7 +1195,7 @@
     "        get_tower_results(n_steps, data_sets.train, optimizer)\n",
     "    \n",
     "    # Validation step preparation\n",
-    "    validation_tower_decodings, validation_tower_labels = forward(session, data_sets.validation)\n",
+    "    validation_tower_decodings, validation_tower_labels = decode_batch(data_sets.validation)\n",
     "\n",
     "    # Average tower gradients\n",
     "    avg_tower_gradients = average_gradients(tower_gradients)\n",
@@ -1218,10 +1225,9 @@
     "        # Define total accuracy for the epoch\n",
     "        total_accuracy = 0\n",
     "        \n",
-    "        # Validation step to determine the best point in time to stop\n",
+    "        # Validation step\n",
     "        if epoch % validation_step == 0:\n",
     "            _, last_validation_wer = print_wer_report(session, \"Validation\", validation_tower_decodings, validation_tower_labels)\n",
-    "            # TODO: Determine on base of WER, if model starts overfitting\n",
     "            print\n",
     "\n",
     "        # Loop over the batches\n",
@@ -1307,7 +1313,7 @@
    "outputs": [],
    "source": [
     "# Test network\n",
-    "test_decodings, test_labels = forward(session, ted_lium.test)\n",
+    "test_decodings, test_labels = decode_batch(ted_lium.test)\n",
     "_, test_wer = print_wer_report(session, \"Test\", test_decodings, test_labels)"
    ]
   },
@@ -1380,41 +1386,6 @@
     "Let's also re-populate a central JS file, that contains all the dumps at once."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "written = False\n",
-    "logs_dir = \"logs\"\n",
-    "\n",
-    "# All direct sub directories of the logs directory\n",
-    "dirs = [os.path.join(logs_dir, o) for o in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, o))]\n",
-    "\n",
-    "# Let's first populate a temporal file and rename it afterwards - guarantees an interruption free web experience\n",
-    "nhf = '%s/%s' % (logs_dir, 'new_hyper.js')\n",
-    "\n",
-    "with open(nhf, 'w') as dump_file:\n",
-    "    # Assigning a global variable that the report page can pick up after loading the data as a regular script\n",
-    "    dump_file.write('window.ALL_THE_DATA = [')\n",
-    "    for d in dirs:\n",
-    "        hf = os.path.join(d, \"hyper.json\")\n",
-    "        if os.path.isfile(hf):\n",
-    "            # Separate by comma if there was already something written\n",
-    "            if written:\n",
-    "                dump_file.write(',\\n')\n",
-    "            written = True\n",
-    "            # Append the whole file\n",
-    "            dump_file.write(open(hf, 'r').read())\n",
-    "    dump_file.write('];')\n",
-    "    \n",
-    "# Finally we rename the file temporal file and overwrite a potentially existing active one\n",
-    "os.rename(nhf, '%s/%s' % (logs_dir, 'hyper.js'))"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1422,7 +1393,9 @@
     "collapsed": true
    },
    "outputs": [],
-   "source": []
+   "source": [
+    "merge_logs(logs_dir)"
+   ]
   }
  ],
  "metadata": {
diff --git a/util/log/__init__.py b/util/log/__init__.py
new file mode 100644
index 00000000..1762f6f0
--- /dev/null
+++ b/util/log/__init__.py
@@ -0,0 +1,30 @@
+
+import os
+import os.path
+
+def merge_logs(logs_dir):
+
+    written = False
+
+    # All direct sub directories of the logs directory
+    dirs = [os.path.join(logs_dir, o) for o in os.listdir(logs_dir) if os.path.isdir(os.path.join(logs_dir, o))]
+
+    # Let's first populate a temporal file and rename it afterwards - guarantees an interruption free web experience
+    nhf = '%s/%s' % (logs_dir, 'new_hyper.js')
+
+    with open(nhf, 'w') as dump_file:
+        # Assigning a global variable that the report page can pick up after loading the data as a regular script
+        dump_file.write('window.ALL_THE_DATA = [')
+        for d in dirs:
+            hf = os.path.join(d, "hyper.json")
+            if os.path.isfile(hf):
+                # Separate by comma if there was already something written
+                if written:
+                    dump_file.write(',\n')
+                written = True
+                # Append the whole file
+                dump_file.write(open(hf, 'r').read())
+        dump_file.write('];')
+
+    # Finally we rename the temporal file and overwrite a potentially existing active one
+    os.rename(nhf, '%s/%s' % (logs_dir, 'hyper.js'))