Moving model_pruning library to tf.contrib

PiperOrigin-RevId: 174214419
2017-11-01 11:55:32 -07:00 · 2017-11-01 11:55:32 -07:00 · 7ece1c0b8e
commit 7ece1c0b8e
parent 693325c832
20 changed files with 3793 additions and 0 deletions
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@ -413,6 +413,7 @@ filegroup(
        "//tensorflow/contrib/makefile:all_files",
        "//tensorflow/contrib/meta_graph_transform:all_files",
        "//tensorflow/contrib/metrics:all_files",
        "//tensorflow/contrib/model_pruning:all_files",
        "//tensorflow/contrib/mpi_collectives:all_files",
        "//tensorflow/contrib/ndlstm:all_files",
        "//tensorflow/contrib/nearest_neighbor:all_files",
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@ -57,6 +57,7 @@ py_library(
        "//tensorflow/contrib/memory_stats:memory_stats_py",
        "//tensorflow/contrib/meta_graph_transform",
        "//tensorflow/contrib/metrics:metrics_py",
        "//tensorflow/contrib/model_pruning",
        "//tensorflow/contrib/nccl:nccl_py",
        "//tensorflow/contrib/ndlstm",
        "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_py",
--- a/tensorflow/contrib/init.py
+++ b/tensorflow/contrib/init.py
@ -51,6 +51,7 @@ from tensorflow.contrib import lookup
 from tensorflow.contrib import losses
 from tensorflow.contrib import memory_stats
 from tensorflow.contrib import metrics
 from tensorflow.contrib import model_pruning
 from tensorflow.contrib import nccl
 from tensorflow.contrib import nn
 from tensorflow.contrib import opt
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@ -518,6 +518,11 @@ add_python_module("tensorflow/contrib/metrics/python")
 add_python_module("tensorflow/contrib/metrics/python/kernel_tests")
 add_python_module("tensorflow/contrib/metrics/python/metrics")
 add_python_module("tensorflow/contrib/metrics/python/ops")
 add_python_module("tensorflow/contrib/model_pruning")
 add_python_module("tensorflow/contrib/model_pruning/examples")
 add_python_module("tensorflow/contrib/model_pruning/examples/cifar10")
 add_python_module("tensorflow/contrib/model_pruning/python")
 add_python_module("tensorflow/contrib/model_pruning/python/layers")
 add_python_module("tensorflow/contrib/ndlstm")
 add_python_module("tensorflow/contrib/ndlstm/python")
 add_python_module("tensorflow/contrib/nn")
--- a/tensorflow/contrib/model_pruning/BUILD
+++ b/tensorflow/contrib/model_pruning/BUILD
@ -0,0 +1,139 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 package(default_visibility = ["//tensorflow:__subpackages__"])
 licenses(["notice"])  # Apache 2.0
 load("//tensorflow:tensorflow.bzl", "py_test")
 py_library(
    name = "core_layers",
    srcs = ["python/layers/core_layers.py"],
    srcs_version = "PY2AND3",
    visibility = ["//visibility:public"],
    deps = [
        "//tensorflow/python:layers",
        "//tensorflow/python:ops",
        "//tensorflow/python:platform",
    ],
 )
 py_library(
    name = "layers",
    srcs = ["python/layers/layers.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":core_layers",
        "//tensorflow/contrib/framework:framework_py",
        "//tensorflow/contrib/layers:layers_py",
        "//third_party/py/numpy",
    ],
 )
 py_test(
    name = "layers_test",
    size = "small",
    srcs = ["python/layers/layers_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":layers",
        "//tensorflow/python:client_testlib",
    ],
 )
 py_library(
    name = "learning",
    srcs = ["python/learning.py"],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/contrib/slim",
    ],
 )
 py_library(
    name = "rnn_cells",
    srcs = ["python/layers/rnn_cells.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":core_layers",
    ],
 )
 py_library(
    name = "pruning",
    srcs = ["python/pruning.py"],
    srcs_version = "PY2AND3",
    visibility = ["//visibility:public"],
    deps = [
        ":core_layers",
        "//tensorflow/contrib/training:training_py",
        "//tensorflow/python:platform",
        "//third_party/py/numpy",
    ],
 )
 py_test(
    name = "pruning_test",
    size = "small",
    srcs = ["python/pruning_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":pruning",
        "//tensorflow/python:client_testlib",
    ],
 )
 py_test(
    name = "rnn_cells_test",
    size = "small",
    srcs = ["python/layers/rnn_cells_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":pruning",
        ":rnn_cells",
        "//tensorflow/python:client_testlib",
    ],
 )
 py_library(
    name = "init_py",
    srcs = ["__init__.py"],
    srcs_version = "PY2AND3",
 )
 # Top-level library
 py_library(
    name = "model_pruning",
    srcs_version = "PY2AND3",
    deps = [
        ":init_py",
        ":layers",
        ":learning",
        ":pruning",
        ":rnn_cells",
    ],
 )
 filegroup(
    name = "all_files",
    srcs = glob(
        ["**/*"],
        exclude = [
            "**/METADATA",
            "**/OWNERS",
        ],
    ),
    visibility = ["//tensorflow:__subpackages__"],
 )
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@ -0,0 +1,195 @@
 # Model pruning: Training tensorflow models to have masked connections
 This document describes the API that facilitates magnitude-based pruning of
 neural network's weight tensors. The API helps inject necessary tensorflow op
 into the training graph so the model can be pruned while it is being trained.
 ### Model creation
 The first step involves adding mask and threshold variables to the layers that
 need to undergo pruning. The variable mask is the same shape as the layer's
 weight tensor and determines which of the weights participate in the forward
 execution of the graph. This can be achieved by wrapping the weight tensor of
 the layer with the `apply_mask` function provided in
 [pruning.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/python/pruning.py).
 For example:
 ```python
 conv = tf.nn.conv2d(images, pruning.apply_mask(weights), stride, padding)
 ```
 This creates a convolutional layer with additional variables mask and threshold
 as shown below: ![Convolutional layer with mask and
 threshold](./mask.png "Convolutional layer with mask and threshold")
 Alternatively, the API also provides variant of tensorflow layers with these
 auxiliary variables built-in (see
 [layers](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/python/layers))
 . Layers currently supported:
 *   [layers.masked_conv2d](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/python/layers/layers.py?l=83)
 *   [layers.masked_fully_connected](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/python/layers/layers.py?l=241)
 *   [rnn_cells.MaskedLSTMCell](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/python/layers/rnn_cells.py?l=154)
 ### Adding pruning ops to the training graph
 The pruning library allows for specification of the following hyper parameters:
 | Hyperparameter               | Type    | Default       | Description    |
 | ---------------------------- | ------- | ------------- | -------------- |
 | name                         | string  | model_pruning | Name of the    |
 :                              :         :               : pruning        :
 :                              :         :               : specification. :
 :                              :         :               : Used for       :
 :                              :         :               : adding         :
 :                              :         :               : summaries and  :
 :                              :         :               : ops under a    :
 :                              :         :               : common         :
 :                              :         :               : tensorflow     :
 :                              :         :               : name_scope     :
 | begin_pruning_step           | integer | 0             | The global     |
 :                              :         :               : step at which  :
 :                              :         :               : to begin       :
 :                              :         :               : pruning        :
 | end_pruning_step             | integer | -1            | The global     |
 :                              :         :               : step at which  :
 :                              :         :               : to terminate   :
 :                              :         :               : pruning.       :
 :                              :         :               : Defaults to -1 :
 :                              :         :               : implying that  :
 :                              :         :               : pruning        :
 :                              :         :               : continues till :
 :                              :         :               : the training   :
 :                              :         :               : stops          :
 | do_not_prune                 | list of | [""]          | list of layers |
 :                              : strings :               : that are not   :
 :                              :         :               : pruned         :
 | threshold_decay              | float   | 0.9           | The decay      |
 :                              :         :               : factor to use  :
 :                              :         :               : for            :
 :                              :         :               : exponential    :
 :                              :         :               : decay of the   :
 :                              :         :               : thresholds     :
 | pruning_frequency            | integer | 10            | How often      |
 :                              :         :               : should the     :
 :                              :         :               : masks be       :
 :                              :         :               : updated? (in # :
 :                              :         :               : of             :
 :                              :         :               : global_steps). :
 | nbins                        | integer | 255           | Number of bins |
 :                              :         :               : to use for     :
 :                              :         :               : histogram      :
 :                              :         :               : computation    :
 | initial_sparsity             | float   | 0.0           | Initial        |
 :                              :         :               : sparsity value :
 | target_sparsity              | float   | 0.5           | Target         |
 :                              :         :               : sparsity value :
 | sparsity_function_begin_step | integer | 0             | The global     |
 :                              :         :               : step at this   :
 :                              :         :               : which the      :
 :                              :         :               : gradual        :
 :                              :         :               : sparsity       :
 :                              :         :               : function       :
 :                              :         :               : begins to take :
 :                              :         :               : effect         :
 | sparsity_function_end_step   | integer | 100           | The global     |
 :                              :         :               : step used as   :
 :                              :         :               : the end point  :
 :                              :         :               : for the        :
 :                              :         :               : gradual        :
 :                              :         :               : sparsity       :
 :                              :         :               : function       :
 | sparsity_function_exponent   | float   | 3.0           | exponent = 1   |
 :                              :         :               : is linearly    :
 :                              :         :               : varying        :
 :                              :         :               : sparsity       :
 :                              :         :               : between        :
 :                              :         :               : initial and    :
 :                              :         :               : final.         :
 :                              :         :               : exponent > 1   :
 :                              :         :               : varies more    :
 :                              :         :               : slowly towards :
 :                              :         :               : the end than   :
 :                              :         :               : the beginning  :
 The sparsity $$s_t$$ at global step $$t$$ is given by:
 $$ s_{t}=s_{f}+\left(s_{i}-s_{f}\right)\left(1-\frac{t-t_{0}}{n\Delta t}\right)^{3} $$
 The interval between sparsity_function_begin_step and sparsity_function_end_step
 is divided into $$n$$ intervals of size equal to the pruning_frequency ($$\Delta
 t$$). $$s_f$$ is the target_sparsity, $$s_i$$ is the initial_sparsity, $$t_0$$
 is the sparsity_function_begin_step. In this equation, the
 sparsity_function_exponent is set to 3.
 ### Adding pruning ops to the training graph
 The final step involves adding ops to the training graph that monitors the
 distribution of the layer's weight magnitudes and determines the layer threshold
 such masking all the weights below this threshold achieves the sparsity level
 desired for the current training step. This can be achieved as follows:
 ```python
 tf.app.flags.DEFINE_string(
    'pruning_hparams', '',
    """Comma separated list of pruning-related hyperparameters""")
 with tf.graph.as_default():
  # Create global step variable
  global_step = tf.train.get_global_step()
  # Parse pruning hyperparameters
  pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams)
  # Create a pruning object using the pruning specification
  p = pruning.Pruning(pruning_hparams, global_step=global_step)
  # Add conditional mask update op. Executing this op will update all
  # the masks in the graph if the current global step is in the range
  # [begin_pruning_step, end_pruning_step] as specified by the pruning spec
  mask_update_op = p.conditional_mask_update_op()
  # Add summaries to keep track of the sparsity in different layers during training
  p.add_pruning_summaries()
  with tf.train.MonitoredTrainingSession(...) as mon_sess:
    # Run the usual training op in the tf session
    mon_sess.run(train_op)
    # Update the masks by running the mask_update_op
    mon_sess.run(mask_update_op)
 ```
 ## Example: Pruning and training deep CNNs on the cifar10 dataset
 Please see https://www.tensorflow.org/tutorials/deep_cnn for details on neural
 network architecture, setting up inputs etc. The additional changes needed to
 incorporate pruning are captured in the following:
 *   [cifar10_pruning.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_pruning.py)
    creates a deep CNN with the same architecture, but adds mask and threshold
    variables for each of the weight tensors in the convolutional and
    locally-connected layers.
 *   [cifar10_train.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_train.py)
    add pruning ops to the training graph as described above.
 To train the pruned version of cifar10:
 ```bash
 $ examples_dir=contrib/model_pruning/examples
 $ bazel build -c opt $examples_dir/cifar10:cifar10_{train,eval}
 $ bazel-bin/$examples_dir/cifar10/cifar10_train --pruning_hparams=name=cifar10_pruning,begin_pruning_step=10000,end_pruning_step=100000,target_sparsity=0.9,sparsity_function_begin_step=10000,sparsity_function_end_step=100000
 ```
 Eval:
 ```shell
 $ bazel-bin/$examples_dir/cifar10/cifar10_eval --run_once
 ```
 TODO(suyoggupta): Add figures showing the sparsity function, sparsity for
 different layers etc.
--- a/tensorflow/contrib/model_pruning/init.py
+++ b/tensorflow/contrib/model_pruning/init.py
@ -0,0 +1,46 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Model pruning implementation in tensorflow."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 # pylint: disable=unused-import
 from tensorflow.contrib.model_pruning.python.layers.layers import masked_conv2d
 from tensorflow.contrib.model_pruning.python.layers.layers import masked_convolution
 from tensorflow.contrib.model_pruning.python.layers.layers import masked_fully_connected
 from tensorflow.contrib.model_pruning.python.layers.rnn_cells import MaskedBasicLSTMCell
 from tensorflow.contrib.model_pruning.python.layers.rnn_cells import MaskedLSTMCell
 from tensorflow.contrib.model_pruning.python.learning import train
 from tensorflow.contrib.model_pruning.python.pruning import apply_mask
 from tensorflow.contrib.model_pruning.python.pruning import get_masked_weights
 from tensorflow.contrib.model_pruning.python.pruning import get_masks
 from tensorflow.contrib.model_pruning.python.pruning import get_thresholds
 from tensorflow.contrib.model_pruning.python.pruning import get_weight_sparsity
 from tensorflow.contrib.model_pruning.python.pruning import get_weights
 from tensorflow.contrib.model_pruning.python.pruning import Pruning
 # pylint: enable=unused-import
 from tensorflow.python.util.all_util import remove_undocumented
 _allowed_symbols = [
    'masked_convolution', 'masked_conv2d', 'masked_fully_connected',
    'MaskedBasicLSTMCell', 'MaskedLSTMCell', 'train', 'apply_mask',
    'get_masked_weights', 'get_masks', 'get_thresholds', 'get_weights',
    'get_weight_sparsity', 'Pruning'
 ]
 remove_undocumented(__name__, _allowed_symbols)
--- a/tensorflow/contrib/model_pruning/examples/cifar10/BUILD
+++ b/tensorflow/contrib/model_pruning/examples/cifar10/BUILD
@ -0,0 +1,77 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 # Description:
 # Example TensorFlow models for CIFAR-10
 package(
    default_visibility = [
        "//tensorflow:internal",
    ],
 )
 licenses(["notice"])  # Apache 2.0
 py_library(
    name = "cifar10_input",
    srcs = ["cifar10_input.py"],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow:tensorflow_py",
    ],
 )
 py_library(
    name = "cifar10_pruning",
    srcs = ["cifar10_pruning.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":cifar10_input",
        "//tensorflow:tensorflow_py",
    ],
 )
 py_binary(
    name = "cifar10_eval",
    srcs = [
        "cifar10_eval.py",
    ],
    srcs_version = "PY2AND3",
    deps = [
        ":cifar10_pruning",
    ],
 )
 py_binary(
    name = "cifar10_train",
    srcs = [
        "cifar10_train.py",
    ],
    srcs_version = "PY2AND3",
    deps = [
        ":cifar10_pruning",
    ],
 )
 filegroup(
    name = "all_files",
    srcs = glob(
        ["**/*"],
        exclude = [
            "**/METADATA",
            "**/OWNERS",
        ],
    ),
    visibility = ["//tensorflow:__subpackages__"],
 )
--- a/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_eval.py
+++ b/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_eval.py
@ -0,0 +1,178 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Evaluation for CIFAR-10.
 Accuracy:
 cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs
 of data) as judged by cifar10_eval.py.
 Speed:
 On a single Tesla K40, cifar10_train.py processes a single batch of 128 images
 in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86%
 accuracy after 100K steps in 8 hours of training time.
 Usage:
 Please see the tutorial and website for how to download the CIFAR-10
 data set, compile the program and train the model.
 http://tensorflow.org/tutorials/deep_cnn/
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import argparse
 import datetime
 import math
 import sys
 import time
 import numpy as np
 import tensorflow as tf
 from tensorflow.contrib.model_pruning.examples.cifar10 import cifar10_pruning as cifar10
 FLAGS = None
 def eval_once(saver, summary_writer, top_k_op, summary_op):
  """Run Eval once.
  Args:
    saver: Saver.
    summary_writer: Summary writer.
    top_k_op: Top K op.
    summary_op: Summary op.
  """
  with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      # Restores from checkpoint
      saver.restore(sess, ckpt.model_checkpoint_path)
      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/cifar10_train/model.ckpt-0,
      # extract global_step from it.
      global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
    else:
      print('No checkpoint file found')
      return
    # Start the queue runners.
    coord = tf.train.Coordinator()
    try:
      threads = []
      for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                         start=True))
      num_iter = int(math.ceil(FLAGS.num_examples / 128))
      true_count = 0  # Counts the number of correct predictions.
      total_sample_count = num_iter * 128
      step = 0
      while step < num_iter and not coord.should_stop():
        predictions = sess.run([top_k_op])
        true_count += np.sum(predictions)
        step += 1
      # Compute precision @ 1.
      precision = true_count / total_sample_count
      print('%s: precision @ 1 = %.3f' % (datetime.datetime.now(), precision))
      summary = tf.Summary()
      summary.ParseFromString(sess.run(summary_op))
      summary.value.add(tag='Precision @ 1', simple_value=precision)
      summary_writer.add_summary(summary, global_step)
    except Exception as e:  # pylint: disable=broad-except
      coord.request_stop(e)
    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=10)
 def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)
    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)
    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
 def main(argv=None):  # pylint: disable=unused-argument
  cifar10.maybe_download_and_extract()
  if tf.gfile.Exists(FLAGS.eval_dir):
    tf.gfile.DeleteRecursively(FLAGS.eval_dir)
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  evaluate()
 if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--eval_dir',
      type=str,
      default='/tmp/cifar10_eval',
      help='Directory where to write event logs.')
  parser.add_argument(
      '--eval_data',
      type=str,
      default='test',
      help="""Either 'test' or 'train_eval'.""")
  parser.add_argument(
      '--checkpoint_dir',
      type=str,
      default='/tmp/cifar10_train',
      help="""Directory where to read model checkpoints.""")
  parser.add_argument(
      '--eval_interval_secs',
      type=int,
      default=60 * 5,
      help='How often to run the eval.')
  parser.add_argument(
      '--num_examples',
      type=int,
      default=10000,
      help='Number of examples to run.')
  parser.add_argument(
      '--run_once',
      type=bool,
      default=False,
      help='Whether to run eval only once.')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_input.py
+++ b/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_input.py
@ -0,0 +1,256 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Routine for decoding the CIFAR-10 binary file format."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 # Process images of this size. Note that this differs from the original CIFAR
 # image size of 32 x 32. If one alters this number, then the entire model
 # architecture will change and any model would need to be retrained.
 IMAGE_SIZE = 24
 # Global constants describing the CIFAR-10 data set.
 NUM_CLASSES = 10
 NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
 NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
 def read_cifar10(filename_queue):
  """Reads and parses examples from CIFAR10 data files.
  Recommendation: if you want N-way read parallelism, call this function
  N times.  This will give you N independent Readers reading different
  files & positions within those files, which will give better mixing of
  examples.
  Args:
    filename_queue: A queue of strings with the filenames to read from.
  Returns:
    An object representing a single example, with the following fields:
      height: number of rows in the result (32)
      width: number of columns in the result (32)
      depth: number of color channels in the result (3)
      key: a scalar string Tensor describing the filename & record number
        for this example.
      label: an int32 Tensor with the label in the range 0..9.
      uint8image: a [height, width, depth] uint8 Tensor with the image data
  """
  class CIFAR10Record(object):
    pass
  result = CIFAR10Record()
  # Dimensions of the images in the CIFAR-10 dataset.
  # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
  # input format.
  label_bytes = 1  # 2 for CIFAR-100
  result.height = 32
  result.width = 32
  result.depth = 3
  image_bytes = result.height * result.width * result.depth
  # Every record consists of a label followed by the image, with a
  # fixed number of bytes for each.
  record_bytes = label_bytes + image_bytes
  # Read a record, getting filenames from the filename_queue.  No
  # header or footer in the CIFAR-10 format, so we leave header_bytes
  # and footer_bytes at their default of 0.
  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
  result.key, value = reader.read(filename_queue)
  # Convert from a string to a vector of uint8 that is record_bytes long.
  record_bytes = tf.decode_raw(value, tf.uint8)
  # The first bytes represent the label, which we convert from uint8->int32.
  result.label = tf.cast(
      tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
  depth_major = tf.reshape(
      tf.strided_slice(record_bytes, [label_bytes],
                       [label_bytes + image_bytes]),
      [result.depth, result.height, result.width])
  # Convert from [depth, height, width] to [height, width, depth].
  result.uint8image = tf.transpose(depth_major, [1, 2, 0])
  return result
 def _generate_image_and_label_batch(image, label, min_queue_examples,
                                    batch_size, shuffle):
  """Construct a queued batch of images and labels.
  Args:
    image: 3-D Tensor of [height, width, 3] of type.float32.
    label: 1-D Tensor of type.int32
    min_queue_examples: int32, minimum number of samples to retain
      in the queue that provides of batches of examples.
    batch_size: Number of images per batch.
    shuffle: boolean indicating whether to use a shuffling queue.
  Returns:
    images: Images. 4D tensor of [batch_size, height, width, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  # Create a queue that shuffles the examples, and then
  # read 'batch_size' images + labels from the example queue.
  num_preprocess_threads = 16
  if shuffle:
    images, label_batch = tf.train.shuffle_batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples)
  else:
    images, label_batch = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size)
  # Display the training images in the visualizer.
  tf.summary.image('images', images)
  return images, tf.reshape(label_batch, [batch_size])
 def distorted_inputs(data_dir, batch_size):
  """Construct distorted input for CIFAR training using the Reader ops.
  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.
  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
               for i in xrange(1, 6)]
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)
  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)
  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)
  height = IMAGE_SIZE
  width = IMAGE_SIZE
  # Image processing for training the network. Note the many random
  # distortions applied to the image.
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])
  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)
  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)
  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(distorted_image)
  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])
  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)
  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)
 def inputs(eval_data, data_dir, batch_size):
  """Construct input for CIFAR evaluation using the Reader ops.
  Args:
    eval_data: bool, indicating if one should use the train or eval data set.
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.
  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  if not eval_data:
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                 for i in xrange(1, 6)]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
  else:
    filenames = [os.path.join(data_dir, 'test_batch.bin')]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)
  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)
  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)
  height = IMAGE_SIZE
  width = IMAGE_SIZE
  # Image processing for evaluation.
  # Crop the central [height, width] of the image.
  resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                         width, height)
  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(resized_image)
  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])
  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(num_examples_per_epoch *
                           min_fraction_of_examples_in_queue)
  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=False)
--- a/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_pruning.py
+++ b/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_pruning.py
@ -0,0 +1,395 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Builds the CIFAR-10 network with additional variables to support pruning.
 Summary of available functions:
 # Compute input images and labels for training. If you would like to run
 # evaluations, use inputs() instead.
 inputs, labels = distorted_inputs()
 # Compute inference on the model inputs to make a prediction.
 predictions = inference(inputs)
 # Compute the total loss of the prediction with respect to the labels.
 loss = loss(predictions, labels)
 # Create a graph to run one step of training with respect to the loss.
 train_op = train(loss, global_step)
 """
 # pylint: disable=missing-docstring
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import re
 import sys
 import tarfile
 from six.moves import urllib
 import tensorflow as tf
 from tensorflow.contrib.model_pruning.examples.cifar10 import cifar10_input
 from tensorflow.contrib.model_pruning.python import pruning
 # Global constants describing the CIFAR-10 data set.
 IMAGE_SIZE = cifar10_input.IMAGE_SIZE
 NUM_CLASSES = cifar10_input.NUM_CLASSES
 NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
 NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
 BATCH_SIZE = 128
 DATA_DIR = '/tmp/cifar10_data'
 # Constants describing the training process.
 MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
 NUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.
 LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
 INITIAL_LEARNING_RATE = 0.1       # Initial learning rate.
 # If a model is trained with multiple GPUs, prefix all Op names with tower_name
 # to differentiate the operations. Note that this prefix is removed from the
 # names of the summaries when visualizing a model.
 TOWER_NAME = 'tower'
 DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
 def _activation_summary(x):
  """Helper to create summaries for activations.
  Creates a summary that provides a histogram of activations.
  Creates a summary that measures the sparsity of activations.
  Args:
    x: Tensor
  Returns:
    nothing
  """
  # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
  # session. This helps the clarity of presentation on tensorboard.
  tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
  tf.summary.histogram(tensor_name + '/activations', x)
  tf.summary.scalar(tensor_name + '/sparsity',
                                       tf.nn.zero_fraction(x))
 def _variable_on_cpu(name, shape, initializer):
  """Helper to create a Variable stored on CPU memory.
  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable
  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    dtype = tf.float32
    var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
  return var
 def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.
  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.
  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.
  Returns:
    Variable Tensor
  """
  dtype = tf.float32
  var = _variable_on_cpu(
      name,
      shape,
      tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var
 def distorted_inputs():
  """Construct distorted input for CIFAR training using the Reader ops.
  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  Raises:
    ValueError: If no data_dir
  """
  if not DATA_DIR:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(DATA_DIR, 'cifar-10-batches-bin')
  images, labels = cifar10_input.distorted_inputs(
      data_dir=data_dir, batch_size=BATCH_SIZE)
  return images, labels
 def inputs(eval_data):
  """Construct input for CIFAR evaluation using the Reader ops.
  Args:
    eval_data: bool, indicating if one should use the train or eval data set.
  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  Raises:
    ValueError: If no data_dir
  """
  if not DATA_DIR:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(DATA_DIR, 'cifar-10-batches-bin')
  images, labels = cifar10_input.inputs(
      eval_data=eval_data, data_dir=data_dir, batch_size=BATCH_SIZE)
  return images, labels
 def inference(images):
  """Build the CIFAR-10 model.
  Args:
    images: Images returned from distorted_inputs() or inputs().
  Returns:
    Logits.
  """
  # We instantiate all variables using tf.get_variable() instead of
  # tf.Variable() in order to share variables across multiple GPU training runs.
  # If we only ran this model on a single GPU, we could simplify this function
  # by replacing all instances of tf.get_variable() with tf.Variable().
  #
  # While instantiating conv and local layers, we add mask and threshold
  # variables to the layer by calling the pruning.apply_mask() function.
  # Note that the masks are applied only to the weight tensors
  # conv1
  with tf.variable_scope('conv1') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 3, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(
        images, pruning.apply_mask(kernel, scope), [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv1)
  # pool1
  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                         padding='SAME', name='pool1')
  # norm1
  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm1')
  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(
        norm1, pruning.apply_mask(kernel, scope), [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)
  # norm2
  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm2')
  # pool2
  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')
  # local3
  with tf.variable_scope('local3') as scope:
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(pool2, [BATCH_SIZE, -1])
    dim = reshape.get_shape()[1].value
    weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(
        tf.matmul(reshape, pruning.apply_mask(weights, scope)) + biases,
        name=scope.name)
    _activation_summary(local3)
  # local4
  with tf.variable_scope('local4') as scope:
    weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(
        tf.matmul(local3, pruning.apply_mask(weights, scope)) + biases,
        name=scope.name)
    _activation_summary(local4)
  # linear layer(WX + b),
  # We don't apply softmax here because
  # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
  # and performs the softmax internally for efficiency.
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
                                          stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
    softmax_linear = tf.add(
        tf.matmul(local4, pruning.apply_mask(weights, scope)),
        biases,
        name=scope.name)
    _activation_summary(softmax_linear)
  return softmax_linear
 def loss(logits, labels):
  """Add L2Loss to all the trainable variables.
  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]
  Returns:
    Loss tensor of type float.
  """
  # Calculate the average cross entropy loss across the batch.
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, name='cross_entropy_per_example')
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
  tf.add_to_collection('losses', cross_entropy_mean)
  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss')
 def _add_loss_summaries(total_loss):
  """Add summaries for losses in CIFAR-10 model.
  Generates moving average for all losses and associated summaries for
  visualizing the performance of the network.
  Args:
    total_loss: Total loss from loss().
  Returns:
    loss_averages_op: op for generating moving averages of losses.
  """
  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  losses = tf.get_collection('losses')
  loss_averages_op = loss_averages.apply(losses + [total_loss])
  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    tf.summary.scalar(l.op.name + ' (raw)', l)
    tf.summary.scalar(l.op.name, loss_averages.average(l))
  return loss_averages_op
 def train(total_loss, global_step):
  """Train CIFAR-10 model.
  Create an optimizer and apply to all trainable variables. Add moving
  average for all trainable variables.
  Args:
    total_loss: Total loss from loss().
    global_step: Integer Variable counting the number of training steps
      processed.
  Returns:
    train_op: op for training.
  """
  # Variables that affect learning rate.
  num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / BATCH_SIZE
  decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
  # Decay the learning rate exponentially based on the number of steps.
  lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                  global_step,
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
  tf.summary.scalar('learning_rate', lr)
  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = _add_loss_summaries(total_loss)
  # Compute gradients.
  with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)
  # Apply gradients.
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
    tf.summary.histogram(var.op.name, var)
  # Add histograms for gradients.
  for grad, var in grads:
    if grad is not None:
      tf.summary.histogram(var.op.name + '/gradients', grad)
  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
      MOVING_AVERAGE_DECAY, global_step)
  variables_averages_op = variable_averages.apply(tf.trainable_variables())
  with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
    train_op = tf.no_op(name='train')
  return train_op
 def maybe_download_and_extract():
  """Download and extract the tarball from Alex's website."""
  dest_directory = DATA_DIR
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  filename = DATA_URL.split('/')[-1]
  filepath = os.path.join(dest_directory, filename)
  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
          float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  tarfile.open(filepath, 'r:gz').extractall(dest_directory)
--- a/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_train.py
+++ b/tensorflow/contrib/model_pruning/examples/cifar10/cifar10_train.py
@ -0,0 +1,159 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """A binary to train pruned CIFAR-10 using a single GPU.
 Accuracy:
 cifar10_train.py achieves ~86% accuracy after 100K steps (256 epochs of
 data) as judged by cifar10_eval.py when target sparsity in
 cifar10_pruning_spec.pbtxt is set to zero
 Results:
 Sparsity | Accuracy after 150K steps
 -------- | -------------------------
 0%       | 86%
 50%      | 86%
 75%      | TODO(suyoggupta)
 90%      | TODO(suyoggupta)
 95%      | 77%
 Usage:
 Please see the tutorial and website for how to download the CIFAR-10
 data set, compile the program and train the model.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import argparse
 import datetime
 import sys
 import time
 import tensorflow as tf
 from tensorflow.contrib.model_pruning.examples.cifar10 import cifar10_pruning as cifar10
 from tensorflow.contrib.model_pruning.python import pruning
 FLAGS = None
 def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()
    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()
    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)
    # Calculate loss.
    loss = cifar10.loss(logits, labels)
    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = cifar10.train(loss, global_step)
    # Parse pruning hyperparameters
    pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams)
    # Create a pruning object using the pruning hyperparameters
    pruning_obj = pruning.Pruning(pruning_hparams, global_step=global_step)
    # Use the pruning_obj to add ops to the training graph to update the masks
    # The conditional_mask_update_op will update the masks only when the
    # training step is in [begin_pruning_step, end_pruning_step] specified in
    # the pruning spec proto
    mask_update_op = pruning_obj.conditional_mask_update_op()
    # Use the pruning_obj to add summaries to the graph to track the sparsity
    # of each of the layers
    pruning_obj.add_pruning_summaries()
    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""
      def begin(self):
        self._step = -1
      def before_run(self, run_context):
        self._step += 1
        self._start_time = time.time()
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.
      def after_run(self, run_context, run_values):
        duration = time.time() - self._start_time
        loss_value = run_values.results
        if self._step % 10 == 0:
          num_examples_per_step = 128
          examples_per_sec = num_examples_per_step / duration
          sec_per_batch = float(duration)
          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print(format_str % (datetime.datetime.now(), self._step, loss_value,
                              examples_per_sec, sec_per_batch))
    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      while not mon_sess.should_stop():
        mon_sess.run(train_op)
        # Update the masks
        mon_sess.run(mask_update_op)
 def main(argv=None):  # pylint: disable=unused-argument
  cifar10.maybe_download_and_extract()
  if tf.gfile.Exists(FLAGS.train_dir):
    tf.gfile.DeleteRecursively(FLAGS.train_dir)
  tf.gfile.MakeDirs(FLAGS.train_dir)
  train()
 if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--train_dir',
      type=str,
      default='/tmp/cifar10_train',
      help='Directory where to write event logs and checkpoint.')
  parser.add_argument(
      '--pruning_hparams',
      type=str,
      default='',
      help="""Comma separated list of pruning-related hyperparameters""")
  parser.add_argument(
      '--max_steps',
      type=int,
      default=1000000,
      help='Number of batches to run.')
  parser.add_argument(
      '--log_device_placement',
      type=bool,
      default=False,
      help='Whether to log device placement.')
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/tensorflow/contrib/model_pruning/python/layers/core_layers.py
+++ b/tensorflow/contrib/model_pruning/python/layers/core_layers.py
@ -0,0 +1,477 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Contains the core layer classes for model pruning and its functional aliases.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.layers import base
 from tensorflow.python.layers import utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import standard_ops
 MASK_COLLECTION = 'masks'
 THRESHOLD_COLLECTION = 'thresholds'
 MASKED_WEIGHT_COLLECTION = 'masked_weights'
 WEIGHT_COLLECTION = 'kernel'
 # The 'weights' part of the name is needed for the quantization library
 # to recognize that the kernel should be quantized.
 MASKED_WEIGHT_NAME = 'weights/masked_weight'
 class _MaskedConv(base.Layer):
  """Abstract nD convolution layer (private, used as implementation base).
  This layer creates a convolution kernel that is convolved
  (actually cross-correlated) with the layer input to produce a tensor of
  outputs. The weight tensor of this layer is masked.
  If `use_bias` is True (and a `bias_initializer` is provided),
  a bias vector is created and added to the outputs. Finally, if
  `activation` is not `None`, it is applied to the outputs as well.
  Arguments:
    rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
    filters: Integer, the dimensionality of the output space (i.e. the number
      of filters in the convolution).
    kernel_size: An integer or tuple/list of n integers, specifying the
      length of the convolution window.
    strides: An integer or tuple/list of n integers,
      specifying the stride length of the convolution.
      Specifying any stride value != 1 is incompatible with specifying
      any `dilation_rate` value != 1.
    padding: One of `"valid"` or `"same"` (case-insensitive).
    data_format: A string, one of `channels_last` (default) or `channels_first`.
      The ordering of the dimensions in the inputs.
      `channels_last` corresponds to inputs with shape
      `(batch, ..., channels)` while `channels_first` corresponds to
      inputs with shape `(batch, channels, ...)`.
    dilation_rate: An integer or tuple/list of n integers, specifying
      the dilation rate to use for dilated convolution.
      Currently, specifying any `dilation_rate` value != 1 is
      incompatible with specifying any `strides` value != 1.
    activation: Activation function. Set it to None to maintain a
      linear activation.
    use_bias: Boolean, whether the layer uses a bias.
    kernel_initializer: An initializer for the convolution kernel.
    bias_initializer: An initializer for the bias vector. If None, no bias will
      be applied.
    kernel_regularizer: Optional regularizer for the convolution kernel.
    bias_regularizer: Optional regularizer for the bias vector.
    activity_regularizer: Regularizer function for the output.
    trainable: Boolean, if `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    name: A string, the name of the layer.
  """
  def __init__(self,
               rank,
               filters,
               kernel_size,
               strides=1,
               padding='valid',
               data_format='channels_last',
               dilation_rate=1,
               activation=None,
               use_bias=True,
               kernel_initializer=None,
               bias_initializer=init_ops.zeros_initializer(),
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               trainable=True,
               name=None,
               **kwargs):
    super(_MaskedConv, self).__init__(
        trainable=trainable,
        name=name,
        activity_regularizer=activity_regularizer,
        **kwargs)
    self.rank = rank
    self.filters = filters
    self.kernel_size = utils.normalize_tuple(kernel_size, rank, 'kernel_size')
    self.strides = utils.normalize_tuple(strides, rank, 'strides')
    self.padding = utils.normalize_padding(padding)
    self.data_format = utils.normalize_data_format(data_format)
    self.dilation_rate = utils.normalize_tuple(dilation_rate, rank,
                                               'dilation_rate')
    self.activation = activation
    self.use_bias = use_bias
    self.kernel_initializer = kernel_initializer
    self.bias_initializer = bias_initializer
    self.kernel_regularizer = kernel_regularizer
    self.bias_regularizer = bias_regularizer
    self.input_spec = base.InputSpec(ndim=self.rank + 2)
  def build(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    channel_axis = 1 if self.data_format == 'channels_first' else -1
    if input_shape[channel_axis].value is None:
      raise ValueError('The channel dimension of the inputs '
                       'should be defined. Found `None`.')
    input_dim = input_shape[channel_axis].value
    kernel_shape = self.kernel_size + (input_dim, self.filters)
    self.mask = self.add_variable(
        name='mask',
        shape=kernel_shape,
        initializer=init_ops.ones_initializer(),
        trainable=False,
        dtype=self.dtype)
    self.kernel = self.add_variable(
        name='kernel',
        shape=kernel_shape,
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        trainable=True,
        dtype=self.dtype)
    self.threshold = self.add_variable(
        name='threshold',
        shape=[],
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        dtype=self.dtype)
    # Add masked_weights in the weights namescope so as to make it easier
    # for the quantization library to add quant ops.
    self.masked_kernel = math_ops.multiply(self.mask, self.kernel,
                                           MASKED_WEIGHT_NAME)
    ops.add_to_collection(MASK_COLLECTION, self.mask)
    ops.add_to_collection(MASKED_WEIGHT_COLLECTION, self.masked_kernel)
    ops.add_to_collection(THRESHOLD_COLLECTION, self.threshold)
    ops.add_to_collection(WEIGHT_COLLECTION, self.kernel)
    if self.use_bias:
      self.bias = self.add_variable(
          name='bias',
          shape=(self.filters,),
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          trainable=True,
          dtype=self.dtype)
    else:
      self.bias = None
    self.input_spec = base.InputSpec(
        ndim=self.rank + 2, axes={channel_axis: input_dim})
    self.built = True
  def call(self, inputs):
    outputs = nn.convolution(
        input=inputs,
        filter=self.masked_kernel,
        dilation_rate=self.dilation_rate,
        strides=self.strides,
        padding=self.padding.upper(),
        data_format=utils.convert_data_format(self.data_format, self.rank + 2))
    if self.bias is not None:
      if self.data_format == 'channels_first':
        if self.rank == 1:
          # nn.bias_add does not accept a 1D input tensor.
          bias = array_ops.reshape(self.bias, (1, self.filters, 1))
          outputs += bias
        if self.rank == 2:
          outputs = nn.bias_add(outputs, self.bias, data_format='NCHW')
        if self.rank == 3:
          # As of Mar 2017, direct addition is significantly slower than
          # bias_add when computing gradients. To use bias_add, we collapse Z
          # and Y into a single dimension to obtain a 4D input tensor.
          outputs_shape = outputs.shape.as_list()
          outputs_4d = array_ops.reshape(outputs, [
              outputs_shape[0], outputs_shape[1],
              outputs_shape[2] * outputs_shape[3], outputs_shape[4]
          ])
          outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW')
          outputs = array_ops.reshape(outputs_4d, outputs_shape)
      else:
        outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')
    if self.activation is not None:
      return self.activation(outputs)
    return outputs
  def _compute_output_shape(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape).as_list()
    if self.data_format == 'channels_last':
      space = input_shape[1:-1]
      new_space = []
      for i in range(len(space)):
        new_dim = utils.conv_output_length(
            space[i],
            self.kernel_size[i],
            padding=self.padding,
            stride=self.strides[i],
            dilation=self.dilation_rate[i])
        new_space.append(new_dim)
      return tensor_shape.TensorShape([input_shape[0]] + new_space +
                                      [self.filters])
    else:
      space = input_shape[2:]
      new_space = []
      for i in range(len(space)):
        new_dim = utils.conv_output_length(
            space[i],
            self.kernel_size[i],
            padding=self.padding,
            stride=self.strides[i],
            dilation=self.dilation_rate[i])
        new_space.append(new_dim)
      return tensor_shape.TensorShape([input_shape[0], self.filters] +
                                      new_space)
 class MaskedConv2D(_MaskedConv):
  """2D convolution layer (e.g. spatial convolution over images).
  This layer creates a convolution kernel that is convolved
  (actually cross-correlated) with the layer input to produce a tensor of
  outputs. If `use_bias` is True (and a `bias_initializer` is provided),
  a bias vector is created and added to the outputs. Finally, if
  `activation` is not `None`, it is applied to the outputs as well.
  Arguments:
    filters: Integer, the dimensionality of the output space (i.e. the number
      of filters in the convolution).
    kernel_size: An integer or tuple/list of 2 integers, specifying the
      height and width of the 2D convolution window.
      Can be a single integer to specify the same value for
      all spatial dimensions.
    strides: An integer or tuple/list of 2 integers,
      specifying the strides of the convolution along the height and width.
      Can be a single integer to specify the same value for
      all spatial dimensions.
      Specifying any stride value != 1 is incompatible with specifying
      any `dilation_rate` value != 1.
    padding: One of `"valid"` or `"same"` (case-insensitive).
    data_format: A string, one of `channels_last` (default) or `channels_first`.
      The ordering of the dimensions in the inputs.
      `channels_last` corresponds to inputs with shape
      `(batch, height, width, channels)` while `channels_first` corresponds to
      inputs with shape `(batch, channels, height, width)`.
    dilation_rate: An integer or tuple/list of 2 integers, specifying
      the dilation rate to use for dilated convolution.
      Can be a single integer to specify the same value for
      all spatial dimensions.
      Currently, specifying any `dilation_rate` value != 1 is
      incompatible with specifying any stride value != 1.
    activation: Activation function. Set it to None to maintain a
      linear activation.
    use_bias: Boolean, whether the layer uses a bias.
    kernel_initializer: An initializer for the convolution kernel.
    bias_initializer: An initializer for the bias vector. If None, no bias will
      be applied.
    kernel_regularizer: Optional regularizer for the convolution kernel.
    bias_regularizer: Optional regularizer for the bias vector.
    activity_regularizer: Regularizer function for the output.
    trainable: Boolean, if `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    name: A string, the name of the layer.
  """
  def __init__(self,
               filters,
               kernel_size,
               strides=(1, 1),
               padding='valid',
               data_format='channels_last',
               dilation_rate=(1, 1),
               activation=None,
               use_bias=True,
               kernel_initializer=None,
               bias_initializer=init_ops.zeros_initializer(),
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               trainable=True,
               name=None,
               **kwargs):
    super(MaskedConv2D, self).__init__(
        rank=2,
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        trainable=trainable,
        name=name,
        **kwargs)
 class MaskedFullyConnected(base.Layer):
  """Fully-connected layer class with masked weights.
  This layer implements the operation:
  `outputs = activation(inputs.kernel + bias)`
  Where `activation` is the activation function passed as the `activation`
  argument (if not `None`), `kernel` is a weights matrix created by the layer,
  and `bias` is a bias vector created by the layer
  (only if `use_bias` is `True`).
  Note: if the input to the layer has a rank greater than 2, then it is
  flattened prior to the initial matrix multiply by `kernel`.
  Arguments:
    units: Integer or Long, dimensionality of the output space.
    activation: Activation function (callable). Set it to None to maintain a
      linear activation.
    use_bias: Boolean, whether the layer uses a bias.
    kernel_initializer: Initializer function for the weight matrix.
    bias_initializer: Initializer function for the bias.
    kernel_regularizer: Regularizer function for the weight matrix.
    bias_regularizer: Regularizer function for the bias.
    activity_regularizer: Regularizer function for the output.
    trainable: Boolean, if `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    name: String, the name of the layer. Layers with the same name will
      share weights, but to avoid mistakes we require reuse=True in such cases.
    reuse: Boolean, whether to reuse the weights of a previous layer
      by the same name.
  Properties:
    units: Python integer, dimensionality of the output space.
    activation: Activation function (callable).
    use_bias: Boolean, whether the layer uses a bias.
    kernel_initializer: Initializer instance (or name) for the weight matrix.
    bias_initializer: Initializer instance (or name) for the bias.
    kernel_regularizer: Regularizer instance for the weight matrix (callable)
    bias_regularizer: Regularizer instance for the bias (callable).
    activity_regularizer: Regularizer instance for the output (callable)
    kernel: Weight matrix (TensorFlow variable or tensor).
    bias: Bias vector, if applicable (TensorFlow variable or tensor).
  """
  def __init__(self,
               units,
               activation=None,
               use_bias=True,
               kernel_initializer=None,
               bias_initializer=init_ops.zeros_initializer(),
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               trainable=True,
               name=None,
               **kwargs):
    super(MaskedFullyConnected, self).__init__(
        trainable=trainable,
        name=name,
        activity_regularizer=activity_regularizer,
        **kwargs)
    self.units = units
    self.activation = activation
    self.use_bias = use_bias
    self.kernel_initializer = kernel_initializer
    self.bias_initializer = bias_initializer
    self.kernel_regularizer = kernel_regularizer
    self.bias_regularizer = bias_regularizer
    self.input_spec = base.InputSpec(min_ndim=2)
  def build(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    if input_shape[-1].value is None:
      raise ValueError('The last dimension of the inputs to `Dense` '
                       'should be defined. Found `None`.')
    self.input_spec = base.InputSpec(
        min_ndim=2, axes={-1: input_shape[-1].value})
    self.kernel = self.add_variable(
        'kernel',
        shape=[input_shape[-1].value, self.units],
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        dtype=self.dtype,
        trainable=True)
    self.mask = self.add_variable(
        name='mask',
        shape=[input_shape[-1].value, self.units],
        initializer=init_ops.ones_initializer(),
        trainable=False,
        dtype=self.dtype)
    self.threshold = self.add_variable(
        name='threshold',
        shape=[],
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        dtype=self.dtype)
    # Add masked_weights in the weights namescope so as to make it easier
    # for the quantization library to add quant ops.
    self.masked_kernel = math_ops.multiply(self.mask, self.kernel,
                                           MASKED_WEIGHT_NAME)
    ops.add_to_collection(MASK_COLLECTION, self.mask)
    ops.add_to_collection(MASKED_WEIGHT_COLLECTION, self.masked_kernel)
    ops.add_to_collection(THRESHOLD_COLLECTION, self.threshold)
    ops.add_to_collection(WEIGHT_COLLECTION, self.kernel)
    if self.use_bias:
      self.bias = self.add_variable(
          'bias',
          shape=[
              self.units,
          ],
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          dtype=self.dtype,
          trainable=True)
    else:
      self.bias = None
    self.built = True
  def call(self, inputs):
    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
    shape = inputs.get_shape().as_list()
    output_shape = shape[:-1] + [self.units]
    if len(output_shape) > 2:
      # Broadcasting is required for the inputs.
      outputs = standard_ops.tensordot(inputs, self.masked_kernel,
                                       [[len(shape) - 1], [0]])
      # Reshape the output back to the original ndim of the input.
      outputs.set_shape(output_shape)
    else:
      outputs = standard_ops.matmul(inputs, self.masked_kernel)
    if self.use_bias:
      outputs = nn.bias_add(outputs, self.bias)
    if self.activation is not None:
      return self.activation(outputs)  # pylint: disable=not-callable
    return outputs
  def _compute_output_shape(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    input_shape = input_shape.with_rank_at_least(2)
    if input_shape[-1].value is None:
      raise ValueError(
          'The innermost dimension of input_shape must be defined, but saw: %s'
          % input_shape)
    return input_shape[:-1].concatenate(self.units)
--- a/tensorflow/contrib/model_pruning/python/layers/layers.py
+++ b/tensorflow/contrib/model_pruning/python/layers/layers.py
@ -0,0 +1,364 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tensorflow layers with added variables for parameter masking.
 Branched from tensorflow/contrib/layers/python/layers/layers.py
 """
 # pylint: disable=missing-docstring
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 import six
 from tensorflow.contrib.framework.python.ops import add_arg_scope
 from tensorflow.contrib.framework.python.ops import variables
 from tensorflow.contrib.layers.python.layers import initializers
 from tensorflow.contrib.layers.python.layers import utils
 from tensorflow.contrib.model_pruning.python.layers import core_layers as core
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as tf_variables
 def _model_variable_getter(getter,
                           name,
                           shape=None,
                           dtype=None,
                           initializer=None,
                           regularizer=None,
                           trainable=True,
                           collections=None,
                           caching_device=None,
                           partitioner=None,
                           rename=None,
                           use_resource=None,
                           **_):
  """Getter that uses model_variable for compatibility with core layers."""
  short_name = name.split('/')[-1]
  if rename and short_name in rename:
    name_components = name.split('/')
    name_components[-1] = rename[short_name]
    name = '/'.join(name_components)
  return variables.model_variable(
      name,
      shape=shape,
      dtype=dtype,
      initializer=initializer,
      regularizer=regularizer,
      collections=collections,
      trainable=trainable,
      caching_device=caching_device,
      partitioner=partitioner,
      custom_getter=getter,
      use_resource=use_resource)
 def _build_variable_getter(rename=None):
  """Build a model variable getter that respects scope getter and renames."""
  # VariableScope will nest the getters
  def layer_variable_getter(getter, *args, **kwargs):
    kwargs['rename'] = rename
    return _model_variable_getter(getter, *args, **kwargs)
  return layer_variable_getter
 def _add_variable_to_collections(variable, collections_set, collections_name):
  """Adds variable (or all its parts) to all collections with that name."""
  collections = utils.get_variable_collections(collections_set,
                                               collections_name) or []
  variables_list = [variable]
  if isinstance(variable, tf_variables.PartitionedVariable):
    variables_list = [v for v in variable]
  for collection in collections:
    for var in variables_list:
      if var not in ops.get_collection(collection):
        ops.add_to_collection(collection, var)
@add_arg_scope
 def masked_convolution(inputs,
                       num_outputs,
                       kernel_size,
                       stride=1,
                       padding='SAME',
                       data_format=None,
                       rate=1,
                       activation_fn=nn.relu,
                       normalizer_fn=None,
                       normalizer_params=None,
                       weights_initializer=initializers.xavier_initializer(),
                       weights_regularizer=None,
                       biases_initializer=init_ops.zeros_initializer(),
                       biases_regularizer=None,
                       reuse=None,
                       variables_collections=None,
                       outputs_collections=None,
                       trainable=True,
                       scope=None):
  """Adds an 2D convolution followed by an optional batch_norm layer.
  The layer creates a mask variable on top of the weight variable. The input to
  the convolution operation is the elementwise multiplication of the mask
  variable and the weigh
  It is required that 1 <= N <= 3.
  `convolution` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved (actually cross-correlated) with the
  `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
  provided (such as `batch_norm`), it is then applied. Otherwise, if
  `normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
  variable would be created and added the activations. Finally, if
  `activation_fn` is not `None`, it is applied to the activations as well.
  Performs atrous convolution with input stride/dilation rate equal to `rate`
  if a value > 1 for any dimension of `rate` is specified.  In this case
  `stride` values != 1 are not supported.
  Args:
    inputs: A Tensor of rank N+2 of shape
      `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
      not start with "NC" (default), or
      `[batch_size, in_channels] + input_spatial_shape` if data_format starts
      with "NC".
    num_outputs: Integer, the number of output filters.
    kernel_size: A sequence of N positive integers specifying the spatial
      dimensions of of the filters.  Can be a single integer to specify the same
      value for all spatial dimensions.
    stride: A sequence of N positive integers specifying the stride at which to
      compute output.  Can be a single integer to specify the same value for all
      spatial dimensions.  Specifying any `stride` value != 1 is incompatible
      with specifying any `rate` value != 1.
    padding: One of `"VALID"` or `"SAME"`.
    data_format: A string or None.  Specifies whether the channel dimension of
      the `input` and output is the last dimension (default, or if `data_format`
      does not start with "NC"), or the second dimension (if `data_format`
      starts with "NC").  For N=1, the valid values are "NWC" (default) and
      "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".
      For N=3, the valid values are "NDHWC" (default) and "NCDHW".
    rate: A sequence of N positive integers specifying the dilation rate to use
      for atrous convolution.  Can be a single integer to specify the same
      value for all spatial dimensions.  Specifying any `rate` value != 1 is
      incompatible with specifying any `stride` value != 1.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
  Returns:
    A tensor representing the output of the operation.
  Raises:
    ValueError: If `data_format` is invalid.
    ValueError: Both 'rate' and `stride` are not uniformly 1.
  """
  if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
    raise ValueError('Invalid data_format: %r' % (data_format,))
  layer_variable_getter = _build_variable_getter({
      'bias': 'biases',
      'kernel': 'weights'
  })
  with variable_scope.variable_scope(
      scope, 'Conv', [inputs], reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)
    input_rank = inputs.get_shape().ndims
    if input_rank == 3:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)
    elif input_rank == 4:
      layer_class = core.MaskedConv2D
    elif input_rank == 5:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)
    else:
      raise ValueError('Sparse Convolution not supported for input with rank',
                       input_rank)
    if data_format is None or data_format == 'NHWC':
      df = 'channels_last'
    elif data_format == 'NCHW':
      df = 'channels_first'
    else:
      raise ValueError('Unsupported data fromat', data_format)
    layer = layer_class(
        filters=num_outputs,
        kernel_size=kernel_size,
        strides=stride,
        padding=padding,
        data_format=df,
        dilation_rate=rate,
        activation=None,
        use_bias=not normalizer_fn and biases_initializer,
        kernel_initializer=weights_initializer,
        bias_initializer=biases_initializer,
        kernel_regularizer=weights_regularizer,
        bias_regularizer=biases_regularizer,
        activity_regularizer=None,
        trainable=trainable,
        name=sc.name,
        dtype=inputs.dtype.base_dtype,
        _scope=sc,
        _reuse=reuse)
    outputs = layer.apply(inputs)
    # Add variables to collections.
    _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
    if layer.use_bias:
      _add_variable_to_collections(layer.bias, variables_collections, 'biases')
    if normalizer_fn is not None:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
 masked_conv2d = masked_convolution
@add_arg_scope
 def masked_fully_connected(
    inputs,
    num_outputs,
    activation_fn=nn.relu,
    normalizer_fn=None,
    normalizer_params=None,
    weights_initializer=initializers.xavier_initializer(),
    weights_regularizer=None,
    biases_initializer=init_ops.zeros_initializer(),
    biases_regularizer=None,
    reuse=None,
    variables_collections=None,
    outputs_collections=None,
    trainable=True,
    scope=None):
  """Adds a sparse fully connected layer. The weight matrix is masked.
  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.
  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.
  Args:
    inputs: A tensor of at least rank 2 and static value for the last dimension;
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer or long, the number of output units in the layer.
    activation_fn: Activation function. The default value is a ReLU function.
      Explicitly set it to None to skip it and maintain a linear activation.
    normalizer_fn: Normalization function to use instead of `biases`. If
      `normalizer_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
      default set to None for no normalizer function
    normalizer_params: Normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: Collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.
  Returns:
     The tensor variable representing the result of the series of operations.
  Raises:
    ValueError: If x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, six.integer_types):
    raise ValueError('num_outputs should be int or long, got %s.' %
                     (num_outputs,))
  layer_variable_getter = _build_variable_getter({
      'bias': 'biases',
      'kernel': 'weights'
  })
  with variable_scope.variable_scope(
      scope,
      'fully_connected', [inputs],
      reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)
    layer = core.MaskedFullyConnected(
        units=num_outputs,
        activation=None,
        use_bias=not normalizer_fn and biases_initializer,
        kernel_initializer=weights_initializer,
        bias_initializer=biases_initializer,
        kernel_regularizer=weights_regularizer,
        bias_regularizer=biases_regularizer,
        activity_regularizer=None,
        trainable=trainable,
        name=sc.name,
        dtype=inputs.dtype.base_dtype,
        _scope=sc,
        _reuse=reuse)
    outputs = layer.apply(inputs)
    # Add variables to collections.
    _add_variable_to_collections(layer.kernel, variables_collections, 'weights')
    if layer.bias is not None:
      _add_variable_to_collections(layer.bias, variables_collections, 'biases')
    # Apply normalizer function / layer.
    if normalizer_fn is not None:
      if not normalizer_params:
        normalizer_params = {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
--- a/tensorflow/contrib/model_pruning/python/layers/layers_test.py
+++ b/tensorflow/contrib/model_pruning/python/layers/layers_test.py
@ -0,0 +1,139 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for imagingvision.intelligence.tensorflow.model_pruning.layers."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.model_pruning.python.layers import core_layers
 from tensorflow.contrib.model_pruning.python.layers import layers
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 class MaskedConvolutionLayerTest(test.TestCase):
  def setUp(self):
    super(MaskedConvolutionLayerTest, self).setUp()
    self.height, self.width = 7, 9
  def testInvalidRank3(self):
    input_tensor = array_ops.ones((self.height, self.width, 3))
    with self.assertRaisesRegexp(ValueError, 'rank'):
      layers.masked_conv2d(input_tensor, 32, 3)
  def testInvalidRank5(self):
    input_tensor = array_ops.ones((8, 8, self.height, self.width, 3))
    with self.assertRaisesRegexp(ValueError, 'rank'):
      layers.masked_conv2d(input_tensor, 32, 3)
  def testSingleConvMaskAdded(self):
    kernel_size = 3
    input_depth, output_depth = 8, 32
    input_tensor = array_ops.ones((8, self.height, self.width, input_depth))
    layers.masked_conv2d(input_tensor, output_depth, kernel_size)
    masks = ops.get_collection(core_layers.MASK_COLLECTION)
    self.assertEqual(len(masks), 1)
    self.assertListEqual(masks[0].get_shape().as_list(),
                         [kernel_size, kernel_size, input_depth, output_depth])
    masked_weight = ops.get_collection(core_layers.MASKED_WEIGHT_COLLECTION)
    self.assertEqual(len(masked_weight), 1)
    self.assertListEqual(masked_weight[0].get_shape().as_list(),
                         [kernel_size, kernel_size, input_depth, output_depth])
  def testMultipleConvMaskAdded(self):
    number_of_layers = 5
    kernel_size = 3
    base_depth = 4
    depth_step = 7
    input_tensor = array_ops.ones((8, self.height, self.width, base_depth))
    top_layer = input_tensor
    for ix in range(number_of_layers):
      top_layer = layers.masked_conv2d(top_layer, base_depth +
                                       (ix + 1) * depth_step, kernel_size)
    masks = ops.get_collection(core_layers.MASK_COLLECTION)
    self.assertEqual(len(masks), number_of_layers)
    for ix in range(number_of_layers):
      self.assertListEqual(masks[ix].get_shape().as_list(), [
          kernel_size, kernel_size, base_depth + ix * depth_step,
          base_depth + (ix + 1) * depth_step
      ])
    masked_weight = ops.get_collection(core_layers.MASKED_WEIGHT_COLLECTION)
    self.assertEqual(len(masked_weight), number_of_layers)
    for ix in range(number_of_layers):
      self.assertListEqual(masked_weight[ix].get_shape().as_list(), [
          kernel_size, kernel_size, base_depth + ix * depth_step,
          base_depth + (ix + 1) * depth_step
      ])
 class MaskedFullyConnectedLayerTest(test.TestCase):
  def testSingleFCMaskAdded(self):
    input_depth, output_depth = 8, 32
    input_tensor = array_ops.ones((5, input_depth))
    layers.masked_fully_connected(input_tensor, output_depth)
    masks = ops.get_collection(core_layers.MASK_COLLECTION)
    self.assertEqual(len(masks), 1)
    self.assertListEqual(masks[0].get_shape().as_list(),
                         [input_depth, output_depth])
    masked_weight = ops.get_collection(core_layers.MASKED_WEIGHT_COLLECTION)
    self.assertEqual(len(masked_weight), 1)
    self.assertListEqual(masked_weight[0].get_shape().as_list(),
                         [input_depth, output_depth])
  def testMultipleConvMaskAdded(self):
    number_of_layers = 5
    base_depth = 4
    depth_step = 7
    input_tensor = array_ops.ones((8, base_depth))
    top_layer = input_tensor
    for ix in range(number_of_layers):
      top_layer = layers.masked_fully_connected(top_layer, base_depth +
                                                (ix + 1) * depth_step)
    masks = ops.get_collection(core_layers.MASK_COLLECTION)
    self.assertEqual(len(masks), number_of_layers)
    for ix in range(number_of_layers):
      self.assertListEqual(masks[ix].get_shape().as_list(), [
          base_depth + ix * depth_step, base_depth + (ix + 1) * depth_step
      ])
    masked_weight = ops.get_collection(core_layers.MASKED_WEIGHT_COLLECTION)
    self.assertEqual(len(masked_weight), number_of_layers)
    for ix in range(number_of_layers):
      self.assertListEqual(masked_weight[ix].get_shape().as_list(), [
          base_depth + ix * depth_step, base_depth + (ix + 1) * depth_step
      ])
 if __name__ == '__main__':
  test.main()
--- a/tensorflow/contrib/model_pruning/python/layers/rnn_cells.py
+++ b/tensorflow/contrib/model_pruning/python/layers/rnn_cells.py
@ -0,0 +1,340 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Module implementing RNN Cells with pruning.
 This module implements BasicLSTMCell and LSTMCell with pruning.
 Code adapted from third_party/tensorflow/python/ops/rnn_cell_impl.py
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.model_pruning.python.layers import core_layers
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import rnn_cell as tf_rnn
 class MaskedBasicLSTMCell(tf_rnn.BasicLSTMCell):
  """Basic LSTM recurrent network cell with pruning.
  Overrides the call method of tensorflow BasicLSTMCell and injects the weight
  masks
  The implementation is based on: http://arxiv.org/abs/1409.2329.
  We add forget_bias (default: 1) to the biases of the forget gate in order to
  reduce the scale of forgetting in the beginning of the training.
  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.
  For advanced models, please use the full @{tf.nn.rnn_cell.LSTMCell}
  that follows.
  """
  def __init__(self,
               num_units,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None,
               name=None):
    """Initialize the basic LSTM cell with pruning.
    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
      When restoring from CudnnLSTM-trained checkpoints, must use
      CudnnCompatibleLSTMCell instead.
    """
    super(MaskedBasicLSTMCell, self).__init__(
        num_units,
        forget_bias=forget_bias,
        state_is_tuple=state_is_tuple,
        activation=activation,
        reuse=reuse,
        name=name)
  def build(self, inputs_shape):
    # Call the build method of the parent class.
    super(MaskedBasicLSTMCell, self).build(inputs_shape)
    input_depth = inputs_shape[1].value
    h_depth = self._num_units
    self._mask = self.add_variable(
        name="mask",
        shape=[input_depth + h_depth, 4 * h_depth],
        initializer=init_ops.ones_initializer(),
        trainable=False,
        dtype=self.dtype)
    self._threshold = self.add_variable(
        name="threshold",
        shape=[],
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        dtype=self.dtype)
    # Add masked_weights in the weights namescope so as to make it easier
    # for the quantization library to add quant ops.
    self._masked_kernel = math_ops.multiply(self._mask, self._kernel,
                                            core_layers.MASKED_WEIGHT_NAME)
    if self._mask not in ops.get_collection_ref(core_layers.MASK_COLLECTION):
      ops.add_to_collection(core_layers.MASK_COLLECTION, self._mask)
      ops.add_to_collection(core_layers.MASKED_WEIGHT_COLLECTION,
                            self._masked_kernel)
      ops.add_to_collection(core_layers.THRESHOLD_COLLECTION, self._threshold)
      ops.add_to_collection(core_layers.WEIGHT_COLLECTION, self._kernel)
  def call(self, inputs, state):
    """Long short-term memory cell (LSTM) with masks for pruning.
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, self.state_size]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * self.state_size]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
    sigmoid = math_ops.sigmoid
    one = constant_op.constant(1, dtype=dtypes.int32)
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)
    gate_inputs = math_ops.matmul(
        array_ops.concat([inputs, h], 1), self._masked_kernel)
    gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(
        value=gate_inputs, num_or_size_splits=4, axis=one)
    forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype)
    # Note that using `add` and `multiply` instead of `+` and `*` gives a
    # performance improvement. So using those at the cost of readability.
    add = math_ops.add
    multiply = math_ops.multiply
    new_c = add(
        multiply(c, sigmoid(add(f, forget_bias_tensor))),
        multiply(sigmoid(i), self._activation(j)))
    new_h = multiply(self._activation(new_c), sigmoid(o))
    if self._state_is_tuple:
      new_state = tf_rnn.LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state
 class MaskedLSTMCell(tf_rnn.LSTMCell):
  """LSTMCell with pruning.
  Overrides the call method of tensorflow LSTMCell and injects the weight masks.
  Masks are applied to only the weight matrix of the LSTM and not the
  projection matrix.
  """
  def __init__(self,
               num_units,
               use_peepholes=False,
               cell_clip=None,
               initializer=None,
               num_proj=None,
               proj_clip=None,
               num_unit_shards=None,
               num_proj_shards=None,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None):
    """Initialize the parameters for an LSTM cell with masks for pruning.
    Args:
      num_units: int, The number of units in the LSTM cell
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training. Must set it manually to `0.0` when restoring from
        CudnnLSTM trained checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      When restoring from CudnnLSTM-trained checkpoints, must use
      CudnnCompatibleLSTMCell instead.
    """
    super(MaskedLSTMCell, self).__init__(
        num_units,
        use_peepholes=use_peepholes,
        cell_clip=cell_clip,
        initializer=initializer,
        num_proj=num_proj,
        proj_clip=proj_clip,
        num_unit_shards=num_unit_shards,
        num_proj_shards=num_proj_shards,
        forget_bias=forget_bias,
        state_is_tuple=state_is_tuple,
        activation=activation,
        reuse=reuse)
  def build(self, inputs_shape):
    # Call the build method of the parent class.
    super(MaskedLSTMCell, self).build(inputs_shape)
    input_depth = inputs_shape[1].value
    h_depth = self._num_units
    self._mask = self.add_variable(
        name="mask",
        shape=[input_depth + h_depth, 4 * h_depth],
        initializer=init_ops.ones_initializer(),
        trainable=False,
        dtype=self.dtype)
    self._threshold = self.add_variable(
        name="threshold",
        shape=[],
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        dtype=self.dtype)
    # Add masked_weights in the weights namescope so as to make it easier
    # for the quantization library to add quant ops.
    self._masked_kernel = math_ops.multiply(self._mask, self._kernel,
                                            core_layers.MASKED_WEIGHT_NAME)
    if self._mask not in ops.get_collection_ref(core_layers.MASK_COLLECTION):
      ops.add_to_collection(core_layers.MASK_COLLECTION, self._mask)
      ops.add_to_collection(core_layers.MASKED_WEIGHT_COLLECTION,
                            self._masked_kernel)
      ops.add_to_collection(core_layers.THRESHOLD_COLLECTION, self._threshold)
      ops.add_to_collection(core_layers.WEIGHT_COLLECTION, self._kernel)
  def call(self, inputs, state):
    """Run one step of LSTM.
    Args:
      inputs: input Tensor, 2D, `[batch, num_units].
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
    Returns:
      A tuple containing:
      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.
    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj
    sigmoid = math_ops.sigmoid
    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    lstm_matrix = math_ops.matmul(
        array_ops.concat([inputs, m_prev], 1), self._masked_kernel)
    lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)
    i, j, f, o = array_ops.split(
        value=lstm_matrix, num_or_size_splits=4, axis=1)
    # Diagonal connections
    if self._use_peepholes:
      c = (
          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
    else:
      c = (
          sigmoid(f + self._forget_bias) * c_prev +
          sigmoid(i) * self._activation(j))
    if self._cell_clip is not None:
      # pylint: disable=invalid-unary-operand-type
      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
      # pylint: enable=invalid-unary-operand-type
    if self._use_peepholes:
      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
    else:
      m = sigmoid(o) * self._activation(c)
    if self._num_proj is not None:
      m = math_ops.matmul(m, self._proj_kernel)
      if self._proj_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        # pylint: enable=invalid-unary-operand-type
    new_state = (
        tf_rnn.LSTMStateTuple(c, m)
        if self._state_is_tuple else array_ops.concat([c, m], 1))
    return m, new_state
--- a/tensorflow/contrib/model_pruning/python/layers/rnn_cells_test.py
+++ b/tensorflow/contrib/model_pruning/python/layers/rnn_cells_test.py
@ -0,0 +1,85 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for creating different number of masks in rnn_cells."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.model_pruning.python import pruning
 from tensorflow.contrib.model_pruning.python.layers import rnn_cells
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell as tf_rnn_cells
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 class RnnCellsTest(test.TestCase):
  def setUp(self):
    super(RnnCellsTest, self).setUp()
    self.batch_size = 8
    self.dim = 10
  def testMaskedBasicLSTMCell(self):
    expected_num_masks = 1
    expected_num_rows = 2 * self.dim
    expected_num_cols = 4 * self.dim
    with self.test_session():
      inputs = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      c = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      h = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      state = tf_rnn_cells.LSTMStateTuple(c, h)
      lstm_cell = rnn_cells.MaskedBasicLSTMCell(self.dim)
      lstm_cell(inputs, state)
      self.assertEqual(len(pruning.get_masks()), expected_num_masks)
      self.assertEqual(len(pruning.get_masked_weights()), expected_num_masks)
      self.assertEqual(len(pruning.get_thresholds()), expected_num_masks)
      self.assertEqual(len(pruning.get_weights()), expected_num_masks)
      for mask in pruning.get_masks():
        self.assertEqual(mask.shape, (expected_num_rows, expected_num_cols))
      for weight in pruning.get_weights():
        self.assertEqual(weight.shape, (expected_num_rows, expected_num_cols))
  def testMaskedLSTMCell(self):
    expected_num_masks = 1
    expected_num_rows = 2 * self.dim
    expected_num_cols = 4 * self.dim
    with self.test_session():
      inputs = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      c = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      h = variables.Variable(
          random_ops.random_normal([self.batch_size, self.dim]))
      state = tf_rnn_cells.LSTMStateTuple(c, h)
      lstm_cell = rnn_cells.MaskedLSTMCell(self.dim)
      lstm_cell(inputs, state)
      self.assertEqual(len(pruning.get_masks()), expected_num_masks)
      self.assertEqual(len(pruning.get_masked_weights()), expected_num_masks)
      self.assertEqual(len(pruning.get_thresholds()), expected_num_masks)
      self.assertEqual(len(pruning.get_weights()), expected_num_masks)
      for mask in pruning.get_masks():
        self.assertEqual(mask.shape, (expected_num_rows, expected_num_cols))
      for weight in pruning.get_weights():
        self.assertEqual(weight.shape, (expected_num_rows, expected_num_cols))
 if __name__ == '__main__':
  test.main()
--- a/tensorflow/contrib/model_pruning/python/learning.py
+++ b/tensorflow/contrib/model_pruning/python/learning.py
@ -0,0 +1,188 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Wrapper around tf-slim's training code contrib/slim/python/slim/learning.py
 to support training of pruned models
 *******************************************************************
 * A simple working training script with support for model pruning *
 *******************************************************************
  # Load data and create the model:
  images, labels = LoadData(...)
  predictions = MyModel(images)
  # Define the loss:
  slim.losses.log_loss(predictions, labels)
  total_loss = slim.losses.get_total_loss()
  # Define the optimizer:
  optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
  # Create the train_op
  train_op = slim.learning.create_train_op(total_loss, optimizer)
  # Set up sparsity
  sparsity = pruning.setup_gradual_sparsity(self.global_step)
  # Create mask update op
  mask_update_op = pruning.add_mask_update_ip(sparsity)
  # Run training.
  learning.train(train_op,
                 my_log_dir,
                 mask_update_op)
  see contrib/slim/python/slim/learning.py for additional examples
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib import slim as _slim
 _USE_DEFAULT = 0
 train_step = _slim.learning.train_step
 def train(train_op,
          logdir,
          mask_update_op,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          summary_writer=_USE_DEFAULT,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None,
          trace_every_n_steps=None):
  """Wrapper around tf-slim's train function.
  Runs a training loop using a TensorFlow supervisor.
  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.
  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    mask_update_op: Operation that upon execution updates the weight masks and
      thresholds.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The address of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training,
      as measured by 'global_step': training will stop if global_step is
      greater than 'number_of_steps'. If the value is left as None, training
      proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.global_variables_initializer()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.local_variables_initializer()` and `tf.tables_initializer()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    summary_writer: `SummaryWriter` to use.  Can be `None`
      to indicate that no summaries should be written. If unset, we
      create a SummaryWriter.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer, or a list of
      them. If the argument is supplied, gradient updates will be synchronous.
      If left as `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.
    trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
      and add it to the summaries every `trace_every_n_steps`. If None, no trace
      information will be produced or saved.
  Returns:
    the value of the loss function after training.
  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, if `number_of_steps` is
      negative, or if `trace_every_n_steps` is not `None` and no `logdir` is
      provided.
  """
  def train_step_with_pruning_fn(sess, train_op, global_step,
                                 train_step_kwargs):
    total_loss, should_stop = train_step_fn(sess, train_op, global_step,
                                            train_step_kwargs)
    sess.run(mask_update_op)
    return total_loss, should_stop
  total_loss, _ = _slim.learning.train(
      train_op,
      logdir,
      train_step_fn=train_step_with_pruning_fn,
      train_step_kwargs=train_step_kwargs,
      log_every_n_steps=log_every_n_steps,
      graph=graph,
      master=master,
      is_chief=is_chief,
      global_step=global_step,
      number_of_steps=number_of_steps,
      init_op=init_op,
      init_feed_dict=init_feed_dict,
      local_init_op=local_init_op,
      init_fn=init_fn,
      ready_op=ready_op,
      summary_op=summary_op,
      save_summaries_secs=save_summaries_secs,
      summary_writer=summary_writer,
      startup_delay_steps=startup_delay_steps,
      saver=saver,
      save_interval_secs=save_interval_secs,
      sync_optimizer=sync_optimizer,
      session_config=session_config,
      trace_every_n_steps=trace_every_n_steps)
  return total_loss
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@ -0,0 +1,585 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Helper functions to add support for magnitude-based model pruning.
  # Adds variables and ops to the graph to enable
  # elementwise masking of weights
  apply_mask(weights)
  # Returns a list containing the sparsity of each of the weight tensors
  get_weight_sparsity()
  # Returns a list of all the masked weight tensorflow variables
  get_masked_weights()
  # Returns a list of all the mask tensorflow variables
  get_masks()
  # Returns a list of all the thresholds
  get_thresholds()
  # Returns a list of all the weight tensors that have been masked
  get_weights()
  The Pruning class uses a proto (defined in pruning.proto) to set up the
  parameters for a pruning specification. Here's a typical usage:
  # Initialize a pruning spec from a proto
  pruning_spec = '/tmp/pruning.pb'
  p = Pruning(pruning_spec)
  # Add mask update ops to the graph
  mask_update_op = p.conditional_mask_update_op()
  # Add the summaries
  p.add_pruning_summaries()
  # Run the op
  session.run(mask_update_op)
  # An object of the pruning also accepts externally defined sparsity:
  sparsity = tf.Variable(0.5, name = "ConstantSparsity")
  pruning_spec = '/tmp/pruning.pb'
  p = Pruning(pruning_spec, sparsity=sparsity)
 """
 # pylint: disable=missing-docstring
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.contrib.model_pruning.python.layers import core_layers as core
 from tensorflow.contrib.training.python.training import hparam
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_impl
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
 from tensorflow.python.training import training_util
 _MASK_COLLECTION = core.MASK_COLLECTION
 _THRESHOLD_COLLECTION = core.THRESHOLD_COLLECTION
 _MASKED_WEIGHT_COLLECTION = core.MASKED_WEIGHT_COLLECTION
 _WEIGHT_COLLECTION = core.WEIGHT_COLLECTION
 _MASKED_WEIGHT_NAME = core.MASKED_WEIGHT_NAME
 def _weight_mask_variable(var, scope):
  """Create a mask for the weights.
  This function adds a variable 'mask' to the graph.
  Args:
    var: the weight variable that needs to be masked
    scope: The variable scope of the variable var
  Returns:
    the mask variable of the same size and shape as var, initialized to all 1s.
  """
  with variable_scope.variable_scope(scope):
    mask = variable_scope.get_variable(
        'mask',
        var.get_shape(),
        initializer=init_ops.ones_initializer(),
        trainable=False,
        dtype=var.dtype)
  return mask
 def _weight_threshold_variable(var, scope):
  """Create a scalar threshold for the weights.
  This function adds a variable
  'threshold' to the graph.
  Args:
    var: The weight variable that needs to be masked
    scope: The variable scope of the variable var
  Returns:
    a scalar threshold variable initialized to 0.
  """
  with variable_scope.variable_scope(scope):
    threshold = variable_scope.get_variable(
        'threshold', [],
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        dtype=var.dtype)
    return threshold
 def _histogram(values, value_range, nbins=100, dtype=np.int32, name=None):
  """Return histogram of values.
  Given the tensor `values`, this operation returns a rank 1 histogram counting
  the number of entries in `values` that fell into every bin.  The bins are
  equal width and determined by the arguments `value_range` and `nbins`.
  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram').
  Returns:
    A 1-D `Tensor` holding histogram of values.
  """
  with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
    values = ops.convert_to_tensor(values, name='values')
    values = gen_array_ops.reshape(values, [-1])
    value_range = ops.convert_to_tensor(value_range, name='value_range')
    nbins = ops.convert_to_tensor(nbins, dtype=np.int32, name='nbins')
    nbins_float = math_ops.cast(nbins, values.dtype)
    # Map tensor values that fall within value_range to [0, 1].
    scaled_values = math_ops.truediv(
        values - value_range[0],
        value_range[1] - value_range[0],
        name='scaled_values')
    # map tensor values within the open interval value_range to {0,.., nbins-1},
    # values outside the open interval will be zero or less, or nbins or more.
    indices = math_ops.floor(nbins_float * scaled_values, name='indices')
    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
    indices = math_ops.cast(
        clip_ops.clip_by_value(indices, 0, nbins_float - 1), np.int32)
    return math_ops.unsorted_segment_sum(
        array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
 def _determine_partitioned_axis(partitioned_variable):
  partitioned_axis = 0
  concatenated_variable_shape = partitioned_variable.get_shape()
  for partition in partitioned_variable:
    partition_shape = partition.get_shape()
    maybe_partitioned_axis = np.less(partition_shape,
                                     concatenated_variable_shape)
    # Sanity check: make sure number of partitioned axis == 1
    if np.count_nonzero(maybe_partitioned_axis) != 1:
      raise ValueError('Number of partitioned axes %s not equal to 1' %
                       np.count_nonzero(maybe_partitioned_axis))
    partitioned_axis = np.where(maybe_partitioned_axis)[0][0]
  return partitioned_axis
 def _variable_assign(var, new_value):
  return state_ops.assign(var, new_value, name=var.op.name + '_assign')
 def _partitioned_variable_assign(partitioned_var, new_value):
  """Assign op for partitioned variables.
  Args:
    partitioned_var: A partitioned tensotflow variable
    new_value: Value to be assigned to the variable var
  Returns:
    A tensorflow op that groups the assign ops for each of the variable slices
  """
  # Determine which axis was used to partition the variable. Currently
  # tensorflow allows partitioning variable only along 1 axis.
  axis = 0 if len(partitioned_var) == 1 else _determine_partitioned_axis(
      partitioned_var)
  partition_sizes = np.array(
      [partition.get_shape()[axis] for partition in partitioned_var])
  new_partitioned_values = array_ops.split(
      new_value,
      ops.convert_to_tensor(partition_sizes, dtype=np.int32),
      axis=axis)
  op_list = []
  for partition in partitioned_var:
    op_list.append(
        _variable_assign(partition, new_partitioned_values[len(op_list)]))
  return control_flow_ops.group(
      *op_list, name=partitioned_var.name + '_group_assign')
 def apply_mask(x, scope=''):
  """Apply mask to a given weight tensor.
  Args:
    x: Input weight tensor
    scope: The current variable scope. Defaults to ""
  Returns:
    Tensor representing masked_weights
  """
  mask = _weight_mask_variable(x, scope)
  threshold = _weight_threshold_variable(x, scope)
  # Add masked_weights in the weights namescope so as to make it easier
  # for the quantization library to add quant ops.
  masked_weights = math_ops.multiply(mask, x, _MASKED_WEIGHT_NAME)
  # Make sure the mask for a given variable are not added multiple times to the
  # collection. This is particularly important when applying mask to RNN's
  # weight variables
  if mask not in ops.get_collection_ref(_MASK_COLLECTION):
    ops.add_to_collection(_THRESHOLD_COLLECTION, threshold)
    ops.add_to_collection(_MASK_COLLECTION, mask)
    ops.add_to_collection(_MASKED_WEIGHT_COLLECTION, masked_weights)
    ops.add_to_collection(_WEIGHT_COLLECTION, x)
  return masked_weights
 def get_masked_weights():
  return ops.get_collection(_MASKED_WEIGHT_COLLECTION)
 def get_masks():
  return ops.get_collection(_MASK_COLLECTION)
 def get_thresholds():
  return ops.get_collection(_THRESHOLD_COLLECTION)
 def get_weights():
  return ops.get_collection(_WEIGHT_COLLECTION)
 def get_weight_sparsity():
  """Get sparsity of the weights.
  Args:
    None
  Returns:
    A list containing the sparsity of each of the weight tensors
  """
  masks = get_masks()
  return [nn_impl.zero_fraction(mask) for mask in masks]
 def get_pruning_hparams():
  """Get a tf.HParams object with the default values for the hyperparameters.
    name: string
      name of the pruning specification. Used for adding summaries and ops under
      a common tensorflow name_scope
    begin_pruning_step: integer
      the global step at which to begin pruning
    end_pruning_step: integer
      the global step at which to terminate pruning. Defaults to -1 implying
      that pruning continues till the training stops
    do_not_prune: list of strings
      list of layers that are not pruned
    threshold_decay: float
      the decay factor to use for exponential decay of the thresholds
    pruning_frequency: integer
      How often should the masks be updated? (in # of global_steps)
    nbins: integer
      number of bins to use for histogram computation
    initial_sparsity: float
      initial sparsity value
    target_sparsity: float
      target sparsity value
    sparsity_function_begin_step: integer
      the global step at this which the gradual sparsity function begins to
      take effect
    sparsity_function_end_step: integer
      the global step used as the end point for the gradual sparsity function
    sparsity_function_exponent: float
      exponent = 1 is linearly varying sparsity between initial and final.
      exponent > 1 varies more slowly towards the end than the beginning
    We use the following sparsity function:
    num_steps = (sparsity_function_end_step -
                 sparsity_function_begin_step)/pruning_frequency
    sparsity(step) = (initial_sparsity - target_sparsity)*
                     [1-step/(num_steps -1)]**exponent + target_sparsity
  Args:
    None
  Returns:
    tf.HParams object initialized to default values
  """
  return hparam.HParams(
      name='model_pruning',
      begin_pruning_step=0,
      end_pruning_step=-1,
      do_not_prune=[''],
      threshold_decay=0.9,
      pruning_frequency=10,
      nbins=255,
      initial_sparsity=0,
      target_sparsity=0.5,
      sparsity_function_begin_step=0,
      sparsity_function_end_step=100,
      sparsity_function_exponent=3)
 class Pruning(object):
  def __init__(self,
               spec=None,
               global_step=None,
               sparsity=None,
               partitioner=None):
    """Set up the specification for model pruning.
    If a spec is provided, the sparsity is set up based on the sparsity_function
    in the spec. The effect of sparsity_function is overridden if the sparsity
    variable is passed to the constructor. This enables setting up arbitrary
    sparsity profiles externally and passing it to this pruning functions.
    Args:
      spec: Pruning spec as defined in pruning.proto
      global_step: A tensorflow variable that is used while setting up the
        sparsity function
      sparsity: A tensorflow scalar variable storing the sparsity
      partitioner: The tensorflow partitioner function used to distribute
        parameters across shards
    """
    # Pruning specification
    self._spec = spec if spec else get_pruning_hparams()
    # A tensorflow variable that tracks the sparsity function.
    # If not provided as input, the graph must already contain the global_step
    # variable before calling this constructor.
    self._global_step = self._setup_global_step(global_step)
    # Stores the tensorflow sparsity variable.
    # Built using self._setup_sparsity() or provided externally
    self._sparsity = sparsity if sparsity else self._setup_sparsity()
    # Stores the partitioner function uses to partition variables across tasks/
    self._partitioner = partitioner
    # List of tensorflow assignments ops for new masks and thresholds
    self._assign_ops = []
    # Tensorflow variable keeping track of the last global step when the masks
    # were updated
    self._last_update_step = self._setup_last_update_step()
  def _setup_global_step(self, global_step):
    graph_global_step = global_step
    if graph_global_step is None:
      graph_global_step = training_util.get_global_step()
    return math_ops.cast(graph_global_step, np.int32)
  def _setup_sparsity(self):
    begin_step = self._spec.sparsity_function_begin_step
    end_step = self._spec.sparsity_function_end_step
    initial_sparsity = self._spec.initial_sparsity
    target_sparsity = self._spec.target_sparsity
    exponent = self._spec.sparsity_function_exponent
    if begin_step >= end_step:
      raise ValueError(
          'Pruning must begin before it can end. begin_step=%d, end_step=%d' %
          (begin_step, end_step))
    with ops.name_scope(self._spec.name):
      p = math_ops.minimum(1.0,
                           math_ops.maximum(
                               0.0,
                               math_ops.div(
                                   math_ops.cast(self._global_step - begin_step,
                                                 np.float32),
                                   end_step - begin_step)))
      sparsity = math_ops.add(
          math_ops.multiply(initial_sparsity - target_sparsity,
                            math_ops.pow(1 - p, exponent)),
          target_sparsity,
          name='sparsity')
    return sparsity
  def _setup_last_update_step(self):
    with variable_scope.variable_scope(self._spec.name) as scope:
      try:
        last_update_step = variable_scope.get_variable(
            'last_mask_update_step', [],
            initializer=init_ops.zeros_initializer(),
            trainable=False,
            dtype=np.int32)
      except ValueError:
        scope.reuse_variables()
        last_update_step = variable_scope.get_variable(
            'last_mask_update_step', dtype=np.int32)
    return last_update_step
  def _exists_in_do_not_prune_list(self, tensor_name):
    do_not_prune_list = self._spec.do_not_prune
    if not do_not_prune_list[0]:
      return False
    for layer_name in do_not_prune_list:
      if tensor_name.find(layer_name) != -1:
        return True
    return False
  def _update_mask(self, weights, threshold):
    """Updates the mask for a given weight tensor.
    This functions first computes the cdf of the weight tensor, and estimates
    the threshold value such that 'desired_sparsity' fraction of weights
    have magnitude less than the threshold.
    Args:
      weights: The weight tensor that needs to be masked.
      threshold: The current threshold value. The function will compute a new
        threshold and return the exponential moving average using the current
        value of threshold
    Returns:
      new_threshold: The new value of the threshold based on weights, and
        desired_sparsity
      new_mask: A n-D numpy array containing 0 or 1 to indicate which of the
        values in weights falls below the threshold
    Raises:
      ValueError: if sparsity is not defined
    """
    if self._sparsity is None:
      raise ValueError('Sparsity variable undefined')
    with ops.name_scope(weights.op.name + '_pruning_ops'):
      abs_weights = math_ops.abs(weights)
      max_value = math_ops.reduce_max(abs_weights)
      histogram = _histogram(
          abs_weights, [0.0, max_value],
          nbins=self._spec.nbins,
          dtype=np.float32)
      cdf = math_ops.cumsum(histogram)
      norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram))
      current_threshold = math_ops.multiply(
          math_ops.div(
              math_ops.reduce_sum(
                  math_ops.cast(
                      math_ops.less(norm_cdf, self._sparsity), np.float32)),
              float(self._spec.nbins)), max_value)
      smoothed_threshold = math_ops.add_n([
          math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay),
          math_ops.multiply(threshold, self._spec.threshold_decay)
      ])
      new_mask = math_ops.cast(
          math_ops.greater(abs_weights, smoothed_threshold), np.float32)
    return smoothed_threshold, new_mask
  def _get_mask_assign_ops(self):
    # Make sure the assignment ops have not already been added to the list
    if self._assign_ops:
      raise ValueError(
          'Assign op list not empty. _get_mask_assign_ops() called twice?')
    masks = get_masks()
    weights = get_weights()
    thresholds = get_thresholds()
    if len(masks) != len(thresholds):
      raise ValueError(
          'Number of masks %s and number of thresholds %s mismatch' %
          (len(masks), len(thresholds)))
    for index, mask in enumerate(masks):
      threshold = thresholds[index]
      weight = weights[index] if self._partitioner is None else weights[
          index].as_tensor()
      if self._spec.do_not_prune:
        if self._exists_in_do_not_prune_list(mask.name):
          continue
      new_threshold, new_mask = self._update_mask(weight, threshold)
      self._assign_ops.append(_variable_assign(threshold, new_threshold))
      self._assign_ops.append(
          _variable_assign(mask, new_mask) if self._partitioner is None else
          _partitioned_variable_assign(mask, new_mask))
  def mask_update_op(self):
    with ops.name_scope(self._spec.name):
      if not self._assign_ops:
        self._get_mask_assign_ops()
      with ops.control_dependencies([
          state_ops.assign(
              self._last_update_step,
              self._global_step,
              name='last_mask_update_step_assign')
      ]):
        with ops.control_dependencies(self._assign_ops):
          logging.info('Updating masks.')
          return control_flow_ops.no_op('mask_update')
  def conditional_mask_update_op(self):
    def maybe_update_masks():
      with ops.name_scope(self._spec.name):
        is_step_within_pruning_range = math_ops.logical_and(
            math_ops.greater_equal(self._global_step,
                                   self._spec.begin_pruning_step),
            # If end_pruning_step is negative, keep pruning forever!
            math_ops.logical_or(
                math_ops.less_equal(self._global_step,
                                    self._spec.end_pruning_step),
                math_ops.less(self._spec.end_pruning_step, 0)))
        is_pruning_step = math_ops.less_equal(
            math_ops.add(self._last_update_step, self._spec.pruning_frequency),
            self._global_step)
        return math_ops.logical_and(is_step_within_pruning_range,
                                    is_pruning_step)
    def mask_update_op():
      return self.mask_update_op()
    def no_update_op():
      return control_flow_ops.no_op()
    return control_flow_ops.cond(maybe_update_masks(), mask_update_op,
                                 no_update_op)
  def add_pruning_summaries(self):
    """Adds summaries for this pruning spec.
    Args: none
    Returns: none
    """
    with ops.name_scope(self._spec.name + '_summaries'):
      summary.scalar('sparsity', self._sparsity)
      summary.scalar('last_mask_update_step', self._last_update_step)
      masks = get_masks()
      thresholds = get_thresholds()
      for index, mask in enumerate(masks):
        if not self._exists_in_do_not_prune_list(mask.name):
          summary.scalar(mask.name + '/sparsity', nn_impl.zero_fraction(mask))
          summary.scalar(thresholds[index].op.name + '/threshold',
                         thresholds[index])
  def print_hparams(self):
    logging.info(self._spec.to_json())
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@ -0,0 +1,162 @@
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for the key functions in pruning library."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.contrib.model_pruning.python import pruning
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import training_util
 class PruningHParamsTest(test.TestCase):
  PARAM_LIST = [
      "name=test", "threshold_decay=0.9", "pruning_frequency=10",
      "do_not_prune=[conv1,conv2]", "sparsity_function_end_step=100",
      "target_sparsity=0.9"
  ]
  TEST_HPARAMS = ",".join(PARAM_LIST)
  def setUp(self):
    super(PruningHParamsTest, self).setUp()
    # Add global step variable to the graph
    self.global_step = training_util.get_or_create_global_step()
    # Add sparsity
    self.sparsity = variables.Variable(0.5, name="sparsity")
    # Parse hparams
    self.pruning_hparams = pruning.get_pruning_hparams().parse(
        self.TEST_HPARAMS)
  def testInit(self):
    p = pruning.Pruning(self.pruning_hparams)
    self.assertEqual(p._spec.name, "test")
    self.assertAlmostEqual(p._spec.threshold_decay, 0.9)
    self.assertEqual(p._spec.pruning_frequency, 10)
    self.assertAllEqual(p._spec.do_not_prune, ["conv1", "conv2"])
    self.assertEqual(p._spec.sparsity_function_end_step, 100)
    self.assertAlmostEqual(p._spec.target_sparsity, 0.9)
  def testInitWithExternalSparsity(self):
    with self.test_session():
      p = pruning.Pruning(spec=self.pruning_hparams, sparsity=self.sparsity)
      variables.global_variables_initializer().run()
      sparsity = p._sparsity.eval()
      self.assertAlmostEqual(sparsity, 0.5)
  def testInitWithVariableReuse(self):
    with self.test_session():
      p = pruning.Pruning(spec=self.pruning_hparams, sparsity=self.sparsity)
      p_copy = pruning.Pruning(
          spec=self.pruning_hparams, sparsity=self.sparsity)
      variables.global_variables_initializer().run()
      sparsity = p._sparsity.eval()
      self.assertAlmostEqual(sparsity, 0.5)
      self.assertEqual(p._sparsity.eval(), p_copy._sparsity.eval())
 class PruningTest(test.TestCase):
  def setUp(self):
    super(PruningTest, self).setUp()
    self.global_step = training_util.get_or_create_global_step()
  def testCreateMask2D(self):
    width = 10
    height = 20
    with self.test_session():
      weights = variables.Variable(
          random_ops.random_normal([width, height], stddev=1), name="weights")
      masked_weights = pruning.apply_mask(weights,
                                          variable_scope.get_variable_scope())
      variables.global_variables_initializer().run()
      weights_val = weights.eval()
      masked_weights_val = masked_weights.eval()
      self.assertAllEqual(weights_val, masked_weights_val)
  def testUpdateSingleMask(self):
    with self.test_session() as session:
      weights = variables.Variable(
          math_ops.linspace(1.0, 100.0, 100), name="weights")
      masked_weights = pruning.apply_mask(weights)
      sparsity = variables.Variable(0.5, name="sparsity")
      p = pruning.Pruning(sparsity=sparsity)
      p._spec.threshold_decay = 0.0
      mask_update_op = p.mask_update_op()
      variables.global_variables_initializer().run()
      masked_weights_val = masked_weights.eval()
      self.assertAllEqual(np.count_nonzero(masked_weights_val), 100)
      session.run(mask_update_op)
      masked_weights_val = masked_weights.eval()
      self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
  def testPartitionedVariableMasking(self):
    partitioner = partitioned_variables.variable_axis_size_partitioner(40)
    with self.test_session() as session:
      with variable_scope.variable_scope("", partitioner=partitioner):
        sparsity = variables.Variable(0.5, name="Sparsity")
        weights = variable_scope.get_variable(
            "weights", initializer=math_ops.linspace(1.0, 100.0, 100))
        masked_weights = pruning.apply_mask(
            weights, scope=variable_scope.get_variable_scope())
      p = pruning.Pruning(sparsity=sparsity, partitioner=partitioner)
      p._spec.threshold_decay = 0.0
      mask_update_op = p.mask_update_op()
      variables.global_variables_initializer().run()
      masked_weights_val = masked_weights.eval()
      session.run(mask_update_op)
      masked_weights_val = masked_weights.eval()
      self.assertAllEqual(np.count_nonzero(masked_weights_val), 51)
  def testConditionalMaskUpdate(self):
    param_list = [
        "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6"
    ]
    test_spec = ",".join(param_list)
    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
    weights = variables.Variable(
        math_ops.linspace(1.0, 100.0, 100), name="weights")
    masked_weights = pruning.apply_mask(weights)
    sparsity = variables.Variable(0.00, name="sparsity")
    # Set up pruning
    p = pruning.Pruning(pruning_hparams, sparsity=sparsity)
    p._spec.threshold_decay = 0.0
    mask_update_op = p.conditional_mask_update_op()
    sparsity_val = math_ops.linspace(0.0, 0.9, 10)
    increment_global_step = state_ops.assign_add(self.global_step, 1)
    non_zero_count = []
    with self.test_session() as session:
      variables.global_variables_initializer().run()
      for i in range(10):
        session.run(state_ops.assign(sparsity, sparsity_val[i]))
        session.run(mask_update_op)
        session.run(increment_global_step)
        non_zero_count.append(np.count_nonzero(masked_weights.eval()))
    # Weights pruned at steps 0,2,4,and,6
    expected_non_zero_count = [100, 100, 80, 80, 60, 60, 40, 40, 40, 40]
    self.assertAllEqual(expected_non_zero_count, non_zero_count)
 if __name__ == "__main__":
  test.main()