[tfdbg2] A few fixes and improvements to example debug_mnist_v2
1. Change the default `--dump_tensor_debug_mode` flag value to `FULL_HEALTH`, a mode more suitable for numerical instability debugging for the particular bug in this example than the previous default value `NO_TENSOR`. 2. Change the default `--dump_circular_buffer_size` value to -1, to accommodate the possibility of longer runs where user would want to see the debug data in its entirety. 3. Rename a few weight variables. They were previously named in a confusing way. 4. Change "logits" to "probs", as they are generated by a `softmax` operation and hence are more accurately described as probability scores. PiperOrigin-RevId: 316473164 Change-Id: I4eb13f9581a4d4e550b3b3a5cd132eeffc7dd043
This commit is contained in:
parent
1a0909a9f4
commit
06b1b45e42
|
@ -98,16 +98,18 @@ def parse_args():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--dump_tensor_debug_mode",
|
"--dump_tensor_debug_mode",
|
||||||
type=str,
|
type=str,
|
||||||
default="NO_TENSOR",
|
default="FULL_HEALTH",
|
||||||
help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
|
help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
|
||||||
"CONCISE_HEALTH, SHAPE, FULL_TENSOR. This is relevant only when "
|
"CONCISE_HEALTH, SHAPE, FULL_HEALTH. This is relevant only when "
|
||||||
"--dump_dir is set.")
|
"--dump_dir is set.")
|
||||||
# TODO(cais): Add more tensor debug mode strings once they are supported.
|
# TODO(cais): Add more tensor debug mode strings once they are supported.
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--dump_circular_buffer_size",
|
"--dump_circular_buffer_size",
|
||||||
type=int,
|
type=int,
|
||||||
default=1000,
|
default=-1,
|
||||||
help="Size of the circular buffer used to dump execution events. "
|
help="Size of the circular buffer used to dump execution events. "
|
||||||
|
"A value <= 0 disables the circular-buffer behavior and causes "
|
||||||
|
"all instrumented tensor values to be dumped. "
|
||||||
"This is relevant only when --dump_dir is set.")
|
"This is relevant only when --dump_dir is set.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--use_random_config_path",
|
"--use_random_config_path",
|
||||||
|
@ -178,9 +180,9 @@ def main(_):
|
||||||
return activations
|
return activations
|
||||||
|
|
||||||
# init model
|
# init model
|
||||||
hidden = get_dense_weights(IMAGE_SIZE**2, HIDDEN_SIZE)
|
hidden_weights = get_dense_weights(IMAGE_SIZE**2, HIDDEN_SIZE)
|
||||||
logits = get_dense_weights(HIDDEN_SIZE, NUM_LABELS)
|
output_weights = get_dense_weights(HIDDEN_SIZE, NUM_LABELS)
|
||||||
variables = hidden + logits
|
variables = hidden_weights + output_weights
|
||||||
|
|
||||||
@tf.function
|
@tf.function
|
||||||
def model(x):
|
def model(x):
|
||||||
|
@ -193,15 +195,25 @@ def main(_):
|
||||||
Returns:
|
Returns:
|
||||||
A (?, 10) tensor containing the class scores for each example.
|
A (?, 10) tensor containing the class scores for each example.
|
||||||
"""
|
"""
|
||||||
hidden_act = dense_layer(hidden, x)
|
hidden_act = dense_layer(hidden_weights, x)
|
||||||
logits_act = dense_layer(logits, hidden_act, tf.identity)
|
logits_act = dense_layer(output_weights, hidden_act, tf.identity)
|
||||||
y = tf.nn.softmax(logits_act)
|
y = tf.nn.softmax(logits_act)
|
||||||
return y
|
return y
|
||||||
|
|
||||||
@tf.function
|
@tf.function
|
||||||
def loss(logits, labels):
|
def loss(probs, labels):
|
||||||
"""Calculates cross entropy loss."""
|
"""Calculates cross entropy loss.
|
||||||
diff = -(labels * tf.math.log(logits))
|
|
||||||
|
Args:
|
||||||
|
probs: Class probabilities predicted by the model. The shape is expected
|
||||||
|
to be (?, 10).
|
||||||
|
labels: Truth labels for the classes, as one-hot encoded vectors. The
|
||||||
|
shape is expected to be the same as `probs`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A scalar loss tensor.
|
||||||
|
"""
|
||||||
|
diff = -labels * tf.math.log(probs)
|
||||||
loss = tf.reduce_mean(diff)
|
loss = tf.reduce_mean(diff)
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue