Introducing TensortRT Operator to TF which can run (sub)graphs in

highly optimized TensorRT engines. This commit is a merged version of many commits by benbarsdell <bbarsdell at nvidia.com> deadeyegoodwin <davidg at nvidia.com jjsjann123 <jiej at nvidia.com> samikama <skama at nvidia.com>
2018-01-19 22:58:50 +00:00 · 2018-01-19 22:58:50 +00:00 · 825e7a32e9
commit 825e7a32e9
parent e810b107d8
35 changed files with 4589 additions and 16 deletions
--- a/configure.py
+++ b/configure.py
@ -37,12 +37,14 @@ _TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)),
 _TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'WORKSPACE')
 _DEFAULT_CUDA_VERSION = '9.0'
+_DEFAULT_TENSORRT_VERSION = '4'
 _DEFAULT_CUDNN_VERSION = '7'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
 _DEFAULT_CUDA_PATH = '/usr/local/cuda'
 _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
 _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
                          'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
+_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/x86_64-linux-gnu'
 _TF_OPENCL_VERSION = '1.2'
 _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
 _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
@ -382,13 +384,12 @@ def set_build_var(environ_cp, var_name, query_item, option_name,

  var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default)))
  environ_cp[var_name] = var
-  if var == '1':
-    write_to_bazelrc('build --define %s=true' % option_name)
-  elif bazel_config_name is not None:
-    # TODO(mikecase): Migrate all users of configure.py to use --config Bazel
-    # options and not to set build configs through environment variables.
-    write_to_bazelrc('build:%s --define %s=true'
-                     % (bazel_config_name, option_name))
+  # TODO(mikecase): Migrate all users of configure.py to use --config Bazel
+  # options and not to set build configs through environment variables.
+  if var=='1':
+    setting='true'
+    confname=":%s"%(bazel_config_name) if bazel_config_name is not None else ""
+    write_to_bazelrc('build%s --define %s=%s' % (confname,option_name,setting))


 def set_action_env_var(environ_cp,
@ -438,13 +439,12 @@ def convert_version_to_int(version):
  for seg in version_segments:
    if not seg.isdigit():
      return None
-
  version_str = ''.join(['%03d' % int(seg) for seg in version_segments])
  return int(version_str)


 def check_bazel_version(min_version):
-  """Check installed bezel version is at least min_version.
+  """Check installed bazel version is at least min_version.

  Args:
    min_version: string for minimum bazel version.
@ -1056,6 +1056,108 @@ def set_other_cuda_vars(environ_cp):
      write_to_bazelrc('test --config=cuda')


+def set_tf_trt_version(environ_cp):
+  """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION."""
+  ask_trt_version = (
+      'Please specify the TensorRT (libnvinfer) version you want to use. '
+      '[Leave empty to default to libnvinfer %s]: ') % _DEFAULT_TENSORRT_VERSION
+
+  while True:
+    tf_trt_version = get_from_env_or_user_or_default(
+        environ_cp, 'TF_TENSORRT_VERSION', ask_trt_version,
+        _DEFAULT_TENSORRT_VERSION)
+    # if library version is passed and known
+    default_trt_path = environ_cp.get('TENSORRT_INSTALL_PATH',_DEFAULT_TENSORRT_PATH_LINUX)
+    ask_trt_path = (r'Please specify the location where libnvinfer %s library is '
+                      'installed. Refer to README.md for more details. [Default'
+                      ' is %s]:') % (tf_trt_version, default_trt_path)
+    trt_install_path = get_from_env_or_user_or_default(
+        environ_cp, 'TENSORRT_INSTALL_PATH', ask_trt_path, default_trt_path)
+
+    # Result returned from "read" will be used unexpanded. That make "~"
+    # unusable. Going through one more level of expansion to handle that.
+    trt_install_path = os.path.realpath(
+        os.path.expanduser(trt_install_path))
+    # Simple function to search for libnvinfer in install path
+    # it will find all libnvinfer.so* in user defined install path
+    # and lib64 subdirectory and return absolute paths
+    def find_libs(search_path):
+      fl=set()
+      if os.path.exists(search_path) and os.path.isdir(search_path):
+        fl.update([os.path.realpath(os.path.join(search_path,x)) \
+                   for x in os.listdir(search_path) if 'libnvinfer.so' in x])
+      return fl
+    possible_files=find_libs(trt_install_path)
+    possible_files.update(find_libs(os.path.join(trt_install_path,'lib64')))
+    if is_linux():
+      cudnnpatt=re.compile(".*libcudnn.so\.?(.*) =>.*$")
+      cudapatt =re.compile(".*libcudart.so\.?(.*) =>.*$")
+      def is_compatible(lib,cudaver,cudnnver):
+        ldd_bin=which('ldd') or '/usr/bin/ldd'
+        ldd_out=run_shell([ldd_bin,lib]).split(os.linesep)
+        for l in ldd_out:
+          if 'libcudnn.so' in l:
+            cudnn=cudnnpatt.search(l)
+          elif 'libcudart.so' in l:
+            cudart=cudapatt.search(l)
+        if cudnn:
+          cudnn=convert_version_to_int(cudnn.group(1)) if len(cudnn.group(1)) else 0
+        if cudart:
+          cudart=convert_version_to_int(cudart.group(1)) if len(cudart.group(1)) else 0
+        return (cudnn==cudnnver) and (cudart==cudaver)
+      cudaver=convert_version_to_int(environ_cp['TF_CUDA_VERSION'])
+      cudnnver=convert_version_to_int(environ_cp['TF_CUDNN_VERSION'])
+      valid_libs=[]
+      vfinder=re.compile('.*libnvinfer.so.?(.*)$')
+      highest_ver=[0,None,None]
+
+      for l in possible_files:
+        if is_compatible(l,cudaver,cudnnver):
+          valid_libs.append(l)
+          vstr=vfinder.search(l).group(1)
+          currver=convert_version_to_int(vstr) if len(vstr) else 0
+          if currver > highest_ver[0]:
+            highest_ver= [currver,vstr,l]
+      if highest_ver[1] is not None:
+        trt_install_path=os.path.dirname(highest_ver[2])
+        tf_trt_version=highest_ver[1]
+        break
+      ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
+      libnvinfer_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
+      libnvinfer_path_from_ldconfig = re.search('.*libnvinfer.so.* => (.*)',
+                                           libnvinfer_path_from_ldconfig)
+      if libnvinfer_path_from_ldconfig:
+        libnvinfer_path_from_ldconfig = libnvinfer_path_from_ldconfig.group(1)
+        if os.path.exists('%s.%s' % (libnvinfer_path_from_ldconfig,
+                                     tf_trt_version)):
+          trt_install_path = os.path.dirname(libnvinfer_path_from_ldconfig)
+          break
+
+    # Reset and Retry
+    if len(possible_files):
+      print(
+          'Invalid path to TensorRT %s. libnvinfer.so* files found are for incompatible cuda versions '
+           % tf_trt_version)
+      print(trt_install_path)
+      print(os.path.join(trt_install_path,'lib64'))
+    else:
+      print(
+          'Invalid path to TensorRT %s. No libnvinfer.so* files found in '
+          'found:' % tf_trt_version)
+      print(trt_install_path)
+      print(os.path.join(trt_install_path,'lib64'))
+      if is_linux():
+        print('%s.%s' % (libnvinfer_path_from_ldconfig, tf_trt_version))
+
+    environ_cp['TF_TENSORRT_VERSION'] = ''
+
+  # Set TENSORRT_INSTALL_PATH and TENSORRT_CUDNN_VERSION
+  environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path
+  write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path)
+  environ_cp['TF_TENSORRT_VERSION'] = tf_trt_version
+  write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_trt_version)
+  write_to_bazelrc('build:tensorrt --define using_tensorrt=true')
+
 def set_host_cxx_compiler(environ_cp):
  """Set HOST_CXX_COMPILER."""
  default_cxx_host_compiler = which('g++') or ''
@ -1244,9 +1346,11 @@ def main():
    environ_cp['TF_NEED_COMPUTECPP'] = '0'
    environ_cp['TF_NEED_OPENCL'] = '0'
    environ_cp['TF_CUDA_CLANG'] = '0'
+    environ_cp['TF_NEED_TENSORRT'] = '0'

  if is_macos():
    environ_cp['TF_NEED_JEMALLOC'] = '0'
+    environ_cp['TF_NEED_TENSORRT'] = '0'

  set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
                'with_jemalloc', True)
@ -1301,6 +1405,10 @@ def main():
      if not is_windows():
        set_gcc_host_compiler_path(environ_cp)
    set_other_cuda_vars(environ_cp)
+    # enable tensorrt if desired. Disabled on non-linux
+    set_action_env_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', False)
+    if environ_cp.get('TF_NEED_TENSORRT') == '1':
+      set_tf_trt_version(environ_cp)

  set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
  if environ_cp.get('TF_NEED_MPI') == '1':
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@ -358,6 +358,14 @@ config_setting(
    },
 )

+config_setting(
+    name = "using_tensorrt",
+    define_values = {
+        "using_tensorrt":"true",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
    name = "with_mpi_support",
    values = {"define": "with_mpi_support=true"},
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@ -7,6 +7,7 @@ package(default_visibility = ["//tensorflow:__subpackages__"])

 load("//third_party/mpi:mpi.bzl", "if_mpi")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_tensorrt//:build_defs.bzl", "if_trt")

 py_library(
    name = "contrib_py",
@ -104,7 +105,9 @@ py_library(
        "//tensorflow/contrib/training:training_py",
        "//tensorflow/contrib/util:util_py",
        "//tensorflow/python:util",
-    ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]),
+    ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_ops_py"])
+    + if_trt(["//tensorflow/contrib/tensorrt:init_py"]),
+
 )

 cc_library(
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@ -0,0 +1,266 @@
+# -*- python -*-
+# Description:
+#   provide tensorrt operators and converter package
+
+package(default_visibility = ["//tensorflow:__subpackages__"])
+
+licenses(["notice"])  # Apache 2.0
+
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_custom_op_library",
+    "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_py_wrap_cc",
+    "tf_cc_test",
+    "tf_kernel_library",
+    "tf_custom_op_py_library",
+    "tf_copts",
+)
+
+
+
+tf_custom_op_library(
+    name = "python/ops/_trt_engine_op.so",
+    srcs = [
+        "kernels/trt_engine_op.cc",
+        "ops/trt_engine_op.cc",
+        "kernels/trt_engine_op.h",
+    ],
+    gpu_srcs = [],
+    deps = [
+        "@local_config_tensorrt//:tensorrt",
+        ":trt_shape_function",
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core/kernels:bounds_check_lib",
+        "//tensorflow/core/kernels:ops_util_hdrs",
+    ],
+)
+
+cc_library(
+    name = "trt_shape_function",
+    srcs=[
+        "shape_fn/trt_shfn.cc",
+    ],
+    hdrs=["shape_fn/trt_shfn.h"],
+    copts=tf_copts(),
+    deps=[
+        ":trt_logging",
+        "//third_party/eigen3",
+        "@local_config_tensorrt//:tensorrt",
+        "@protobuf_archive//:protobuf",
+        "@nsync//:nsync_headers",
+        "//tensorflow/core:framework_headers_lib",
+    ]
+)
+
+
+tf_kernel_library(
+    name = "trt_engine_op_kernel",
+    srcs = [
+        "kernels/trt_engine_op.cc",
+    ],
+    hdrs=[
+        "kernels/trt_engine_op.h",
+    ],
+    gpu_srcs = [
+    ],
+    deps = [
+        ":trt_logging",
+        ":trt_shape_function",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//third_party/eigen3",
+        "//tensorflow/core:gpu_headers_lib",
+        "@local_config_tensorrt//:tensorrt",
+        "//tensorflow/core:lib_proto_parsing",        
+    ],
+    alwayslink=1,
+)
+
+tf_gen_op_libs(
+   op_lib_names = [
+	"trt_engine_op",
+   ],
+    deps=[
+        "@local_config_tensorrt//:tensorrt",
+    ]
+)
+
+
+cc_library(
+    name="trt_logging",
+    srcs = [
+         "log/trt_logger.cc",
+    ],
+    hdrs=[
+         "log/trt_logger.h",
+    ],
+    deps=[
+        "@local_config_tensorrt//:tensorrt",
+        "//tensorflow/core:lib_proto_parsing",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "trt_engine_op",
+    deps = [
+        ":trt_engine_op_op_lib",
+        ":trt_shape_function",
+    ],
+)
+
+
+tf_custom_op_py_library(
+    name = "trt_engine_op_loader",
+    srcs = ["python/ops/trt_engine_op.py"],
+    dso = [":python/ops/_trt_engine_op.so",
+           "@local_config_tensorrt//:tensorrt",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:resources",
+    ],
+)
+
+py_library(
+    name = "init_py",
+    srcs = [
+        "__init__.py",
+        "python/__init__.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":trt_ops_py",
+        ":trt_convert_py",
+        
+    ],
+)
+
+py_library(
+    name="trt_ops_py",
+    srcs_version = "PY2AND3",
+    deps=[":trt_engine_op",
+          ":trt_engine_op_loader",
+    ],
+    
+)
+
+py_library(
+    name="trt_convert_py",
+    srcs=["python/trt_convert.py"],
+    srcs_version = "PY2AND3",
+    deps=[
+        ":wrap_conversion"
+    ],
+)
+
+tf_py_wrap_cc(
+    name="wrap_conversion",
+    srcs=["trt_conversion.i"],
+    deps=[
+        ":trt_conversion",
+        "//tensorflow/core:framework_lite",
+        "//util/python:python_headers",
+    ],
+)
+
+cc_library(
+    name= "trt_conversion",
+    srcs=[
+        "convert/convert_nodes.cc",
+        "convert/convert_graph.cc",
+        "segment/segment.cc",
+        "convert/inferShapes.cc",
+    ],
+    hdrs=[
+        "convert/convert_nodes.h",
+        "convert/convert_graph.h",
+        "convert/inferShapes.h",
+        "segment/segment.h",
+        "segment/union_find.h",
+    ],
+    deps=[
+        "@local_config_tensorrt//:tensorrt",
+        "@protobuf_archive//:protobuf_headers",
+        "@nsync//:nsync_headers",
+        ":trt_logging",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:core_cpu_base",
+        #"//third_party/eigen3",
+    ],
+)
+
+tf_custom_op_library(
+    name = "tensorrt_ops.so",
+    srcs = [
+        "ops/tensorrt_ops.cc",
+    ],
+    deps = [
+        "@local_config_tensorrt//:tensorrt",
+    ],
+)
+
+
+# Library for the segmenting portion of TensorRT operation creation
+cc_library(
+    name = "segment",
+    srcs = [
+        "segment/segment.cc",
+    ],
+    hdrs = [
+        "segment/union_find.h",
+        "segment/segment.h",
+    ],
+    deps = [
+        "@protobuf_archive//:protobuf_headers",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:lib_proto_parsing",
+        "//third_party/eigen3",
+    ],
+    linkstatic = 1,
+)
+
+tf_cc_test(
+    name = "segment_test",
+    size = "small",
+    srcs = ["segment/segment_test.cc"],
+    deps = [
+        ":segment",
+        "//tensorflow/c:c_api",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
+
+# Library for the node-level conversion portion of TensorRT operation creation
+
+filegroup(
+    name = "cppfiles",
+    srcs = glob(["**/*.cc"]),
+    visibility=["//visibility:private"],
+)
+
+filegroup(
+    name = "headers",
+    srcs = glob(["**/*.h"]),
+    visibility=["//visibility:private"],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@ -0,0 +1,42 @@
+Using TensorRT in TensorFlow
+============================
+
+This module provides necessary bindings and introduces TRT_engine_op
+operator that wraps a subgraph in TensorRT.
+
+Compilation
+-----------
+
+In order to compile the module, you need to have a local TensorRT
+installation (libnvinfer.so and respective include files). During the
+configuration step, TensorRT should be enabled and installation path
+should be set. If installed through package managers (deb,rpm),
+configure script should find the necessary components from the system
+automatically. If installed from tar packages, user has to set path to
+location where the library is installed during configuration.
+
+In order to enable TensorRT support, user has to add `--config=tensorrt` to
+the build flags during the compilation such as
+
+```
+bazel build --config=cuda --config=opt --config=tensorrt //tensorflow/tools/pip_package:build_pip_package
+bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/
+```
+
+After the installation of tensorflow package, TensorRT transformation
+will be available. An example use is shown below.
+
+```python
+import tensorflow as tf
+import tensorflow.contrib.tensorrt as trt
+#... create and train or load model
+gdef=sess.graph.as_graph_def()
+trt_gdef=trt.CreateInferenceGraph(gdef, #original graph_def
+				  ["output"], #name of output node(s)
+				  max_batch_size, #maximum batch size to run the inference
+				  max_workspace_size # max memory for TensorRT to use 
+				  )
+tf.reset_default_graph()
+tf.import_graph_def(graph_def=trt_gdef)
+#...... run inference
+```
--- a/tensorflow/contrib/tensorrt/init.py
+++ b/tensorflow/contrib/tensorrt/init.py
@ -0,0 +1,19 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tensorrt.python import *
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@ -0,0 +1,253 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+
+#include <list>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <map>
+#include <utility>
+
+#include "NvInfer.h"
+
+#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
+#include "tensorflow/contrib/tensorrt/convert/inferShapes.h"
+#include "tensorflow/contrib/tensorrt/segment/segment.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+#define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1)
+//------------------------------------------------------------------------------
+namespace tensorrt {
+namespace convert {
+
+namespace {
+
+static std::unordered_set<std::string> output_nodes;
+bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) {
+  static const std::set<std::string> candidate_ops = {
+      "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu",
+      "Add",      "Mul",   "Sub",    "Rsqrt",   "Pad"  // "Placeholder" ,"Mean"
+                                                       // TODO(ben,jie): ...
+  };
+  if (output_nodes.count(node_def.name())) return false;
+  return candidate_ops.count(node_def.op());
+}
+
+void GetSubGraphIncomingEdges(tensorflow::Graph const& graph,
+                              std::set<int> const& subgraph_node_ids,
+                              tensorflow::EdgeSet* incoming_edges) {
+  for (int node_id : subgraph_node_ids) {
+    tensorflow::Node const* node = graph.FindNodeId(node_id);
+    LOG(DEBUG) << node->name() << " has incoming edges: ";
+    for (tensorflow::Edge const* edge : node->in_edges()) {
+      if (!subgraph_node_ids.count(edge->src()->id()) &&
+          !edge->src()->IsSource()) {
+        LOG(DEBUG) << edge->src()->name() << ", ";
+        incoming_edges->insert(edge);
+      }
+    }
+  }
+}
+
+void GetSubGraphOutgoingEdges(tensorflow::Graph const& graph,
+                              std::set<int> const& subgraph_node_ids,
+                              tensorflow::EdgeSet* outgoing_edges) {
+  for (int node_id : subgraph_node_ids) {
+    tensorflow::Node const* node = graph.FindNodeId(node_id);
+    LOG(DEBUG) << node->name() << " has outgoing edges: ";
+    for (tensorflow::Edge const* edge : node->out_edges()) {
+      if (!subgraph_node_ids.count(edge->dst()->id()) &&
+          !edge->dst()->IsSink()) {
+        outgoing_edges->insert(edge);
+      }
+    }
+  }
+}
+
+std::pair<std::string, int> ParseTensorName(std::string name,
+                                            int default_idx = 0) {
+  int idx = default_idx;
+  size_t sep = name.find_last_of(':');
+  if (sep != std::string::npos) {
+    name = name.substr(0, sep);
+    idx = std::stoi(name.substr(sep + 1));
+  }
+  return std::make_pair(name, idx);
+}
+
+std::unordered_map<std::string, std::vector<int>> BuildTensorNameMap(
+    const std::vector<std::string>& tensor_names) {
+  std::unordered_map<std::string, std::vector<int>> result;
+  for (std::string const& tensor_name : tensor_names) {
+    std::string node_name;
+    int index;
+    std::tie(node_name, index) = ParseTensorName(tensor_name);
+    result[node_name].push_back(index);
+  }
+  return result;
+}
+
+tensorflow::Status ConvertSubGraphToTensorRT(
+    tensorflow::Graph& graph, const std::vector<std::string>& output_names,
+    const std::set<int>& subgraph_node_ids, size_t max_batch_size,
+    size_t max_workspace_size, const ShapeMap& shape_map) {
+  tensorflow::EdgeSet subgraph_incoming_edges;
+  GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges);
+
+  std::vector<std::pair<int, int>> subgraph_inputs;
+
+
+  // Collect inputs by looking for incoming edges
+  for (tensorflow::Edge const* edge : subgraph_incoming_edges) {
+    subgraph_inputs.push_back({edge->src()->id(), edge->src_output()});
+  }
+  std::set<std::pair<int, int>> subgraph_outputs_set;
+  // Collect outputs referenced from output_names
+  auto output_name_to_index_map = BuildTensorNameMap(output_names);
+  // for (int node_id : subgraph_node_ids_no_placeholder) {
+  for (int node_id : subgraph_node_ids) {
+    tensorflow::Node* node = graph.FindNodeId(node_id);
+    if (output_name_to_index_map.count(node->name())) {
+      for (int index : output_name_to_index_map.at(node->name())) {
+        subgraph_outputs_set.insert({node_id, index});
+      }
+    }
+  }
+  // Collect outputs referenced from outgoing edges
+  tensorflow::EdgeSet subgraph_outgoing_edges;
+  // GetSubGraphOutgoingEdges(graph, subgraph_node_ids_no_placeholder,
+  //  &subgraph_outgoing_edges);
+  GetSubGraphOutgoingEdges(graph, subgraph_node_ids, &subgraph_outgoing_edges);
+  for (tensorflow::Edge const* edge : subgraph_outgoing_edges) {
+    subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()});
+  }
+  // Impose an ordering on the outputs
+  std::vector<std::pair<int, int>> subgraph_outputs(
+      subgraph_outputs_set.begin(), subgraph_outputs_set.end());
+  // Build TensorRT node and add it to the graph
+  tensorflow::NodeDef trt_node_def;
+  TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(
+      graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs,
+      max_batch_size, max_workspace_size, shape_map, &trt_node_def));
+  tensorflow::Status status;
+  tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status);
+
+  TF_RETURN_IF_ERROR(status);
+
+  // Re-map outgoing edges to use the new TRT node instead of the orig subgraph
+  std::map<std::pair<int, int>, int> subgraph_edge_to_output_map;
+  for (size_t i = 0; i < subgraph_outputs.size(); ++i) {
+    subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i});
+  }
+  TF_RETURN_IF_ERROR(status);
+  for (tensorflow::Edge const* edge : subgraph_outgoing_edges) {
+    std::pair<int, int> old_src = {edge->src()->id(), edge->src_output()};
+    int new_src_output = subgraph_edge_to_output_map.at(old_src);
+    graph.UpdateEdge(trt_node, new_src_output, edge->dst(), edge->dst_input());
+  }
+  // Remove the original subgraph
+  for (int node_id : subgraph_node_ids) {
+    tensorflow::Node* node = graph.FindNodeId(node_id);
+    // Don't remove the input placeholders
+    if (node->type_string() == "Placeholder") {
+      continue;
+    }
+    graph.RemoveNode(node);
+  }
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status BuildNodeMap(
+    const tensorflow::Graph& graph,
+    std::unordered_map<std::string, tensorflow::Node*>* node_map) {
+  for (auto* node : graph.op_nodes()) {
+    if (!node_map->insert({node->name(), node}).second) {
+      return tensorflow::errors::AlreadyExists(
+          "Node name is not unique in graph: " + node->name());
+    }
+  }
+  return tensorflow::Status::OK();
+}
+
+}  // namespace
+
+tensorflow::Status ConvertGraphDefToTensorRT(
+    const tensorflow::GraphDef& graph_def,
+    const std::vector<std::string>& output_names, size_t max_batch_size,
+    size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) {
+  ShapeMap shape_map;
+  TF_RETURN_IF_ERROR(
+      tensorflow::trt::inferShapes(graph_def, output_names, shape_map));
+  std::stringstream oss;
+  for (auto& n : shape_map) {  // nodes
+    oss << " Node= " << n.first << ", ";
+    for (auto o : n.second) {  // outputs
+      oss << o.first.DebugString() << " T= " << o.second << ", ";
+    }
+    LOG(DEBUG) << oss.str();
+    oss.str("");
+  }
+  // Build full graph
+  tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(),
+                                             graph_def.library());
+  tensorflow::Graph graph(flib);
+  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
+      tensorflow::GraphConstructorOptions(), graph_def, &graph));
+
+  // Segment the graph into subgraphs that can be converted to TensorRT
+  tensorrt::segment::SegmentOptions segment_options;
+  // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT)
+  for (auto node : output_names) output_nodes.insert(node);
+
+  // TODO(sami): this should be passed as a knob!!!!
+  segment_options.minimum_segment_size = 2;
+  tensorrt::segment::SegmentNodesVector segments;
+  TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph(
+      graph_def, IsTensorRTCandidate, segment_options, &segments));
+  if (segments.size() > 1) {
+    // LOG(WARNING) << "Multiple TensorRT candidate subgraphs were found, "
+    //<< "but only the first can be converted.";
+    // segments.erase(++segments.begin(), segments.end());
+    LOG(INFO) << "MULTIPLE tensorrt candidate conversion: " << segments.size();
+  }
+  std::unordered_map<std::string, tensorflow::Node*> node_map;
+  TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
+  for (std::set<std::string> const& subgraph_node_names : segments) {
+    std::set<int> subgraph_node_ids;
+    for (std::string const& node_name : subgraph_node_names) {
+      subgraph_node_ids.insert(node_map.at(node_name)->id());
+    }
+    TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT(
+        graph, output_names, subgraph_node_ids, max_batch_size,
+        max_workspace_size, shape_map));
+  }
+  graph.ToGraphDef(new_graph_def);
+  return tensorflow::Status::OK();
+}
+
+}  // namespace convert
+}  // namespace tensorrt
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@ -0,0 +1,34 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_GRAPH_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_GRAPH_H_
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorrt {
+namespace convert {
+
+tensorflow::Status ConvertGraphDefToTensorRT(
+    const tensorflow::GraphDef& graph_def,
+    const std::vector<std::string>& output_names, size_t max_batch_size,
+    size_t max_workspace_size, tensorflow::GraphDef* new_graph_def);
+}
+}  // namespace tensorrt
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_GRAPH_H_
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@ -0,0 +1,42 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
+
+#include <set>
+#include <vector>
+#include <utility>
+
+#include "tensorflow/contrib/tensorrt/convert/inferShapes.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorrt {
+namespace convert {
+
+tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
+    const tensorflow::Graph& graph, const std::set<int>& subgraph_node_ids,
+    const std::vector<std::pair<int, int>>&
+        input_inds,  // {node_id, output_idx}
+    const std::vector<std::pair<int, int>>&
+        output_inds,  // {node_id, output_idx}
+    size_t max_batch_size, size_t max_workspace_size, const ShapeMap& shape_map,
+    tensorflow::NodeDef* trt_node);
+}  // namespace convert
+}  // namespace tensorrt
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_
--- a/tensorflow/contrib/tensorrt/convert/inferShapes.cc
+++ b/tensorflow/contrib/tensorrt/convert/inferShapes.cc
@ -0,0 +1,125 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/tensorrt/convert/inferShapes.h"
+#include <functional>
+#include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb_text.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+#define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1)
+
+namespace tensorflow {
+namespace trt {
+std::vector<tensorflow::DataType> getTypes(const tensorflow::OpDef& op,
+                                           const tensorflow::NodeDef& nd,
+                                           bool inp = true) {
+  const auto& attrMap = nd.attr();
+  auto getType = [&attrMap](decltype(
+                     op.input_arg(0)) a) -> std::vector<tensorflow::DataType> {
+    std::vector<tensorflow::DataType> tvec;
+    if (!a.type_list_attr().empty()) {  // get the list types
+      const auto& tl = attrMap.at(a.type_list_attr()).list();
+      int tsize = tl.type_size();
+      tvec.reserve(tsize);
+      for (int t = 0; t < tsize; t++) {
+        tvec.push_back(tl.type(t));
+      }
+      return tvec;
+    }
+    tensorflow::DataType cType = tensorflow::DT_INVALID;
+    if (a.type() != tensorflow::DT_INVALID) {  // get defined types
+      cType = a.type();
+    } else if (!a.type_attr().empty()) {
+      cType = attrMap.at(a.type_attr()).type();
+    }
+    if (!a.number_attr().empty()) {  // numbertypes
+      int64 nTensors = attrMap.at(a.number_attr()).i();
+      tvec = std::vector<tensorflow::DataType>(nTensors, cType);
+      return tvec;
+    }
+    tvec.push_back(cType);
+    return tvec;
+  };
+  std::vector<tensorflow::DataType> types;
+  if (inp) {
+    int n_inputs = op.input_arg_size();
+    for (int i = 0; i < n_inputs; i++) {
+      auto tout = getType(op.input_arg(i));
+      LOG(DEBUG) << "Node= " << nd.name() << " #inputs" << tout.size();
+      types.insert(types.end(), tout.begin(), tout.end());
+    }
+  } else {
+    int n_outputs = op.output_arg_size();
+    // types.resize(n_outputs);
+    for (int i = 0; i < n_outputs; i++) {
+      auto tout = getType(op.output_arg(i));
+      LOG(DEBUG) << "Node= " << nd.name() << " #outputs" << tout.size();
+      types.insert(types.end(), tout.begin(), tout.end());
+    }
+  }
+  return types;
+}
+
+tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def,
+                               const std::vector<std::string>& output_names,
+                               ShapeMap& shapes) {
+  tensorflow::Graph g(OpRegistry::Global());
+  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
+      tensorflow::GraphConstructorOptions(), graph_def, &g));
+  std::vector<tensorflow::Node*> POnodes;
+  tensorflow::GetPostOrder(g, &POnodes);
+  tensorflow::ShapeRefiner refiner(graph_def.versions().producer(),
+                                   OpRegistry::Global());
+  for (auto n = POnodes.rbegin(); n != POnodes.rend(); ++n) {
+    TF_CHECK_OK(refiner.AddNode(*n));
+  }
+
+  auto shape2PTS = [](tensorflow::shape_inference::InferenceContext* ic,
+                      const tensorflow::shape_inference::ShapeHandle& sh)
+      -> tensorflow::PartialTensorShape {
+    std::vector<int64> dims;
+    int64 rank = ic->Rank(sh);
+    for (int64 i = 0; i < rank; i++) {
+      auto dh = ic->Dim(sh, i);
+      dims.push_back(ic->Value(dh));
+    }
+    return tensorflow::PartialTensorShape(dims);
+  };
+  for (const auto& n : POnodes) {
+    auto ic = refiner.GetContext(n);
+    if (ic) {
+      int nOuts = ic->num_outputs();
+      auto types = getTypes(n->op_def(), n->def(), false);
+      std::vector<
+          std::pair<tensorflow::PartialTensorShape, tensorflow::DataType>>
+          SAT;
+      for (int i = 0; i < nOuts; i++) {
+        auto PTS = shape2PTS(ic, ic->output(i));
+        SAT.push_back({PTS, types.at(i)});
+      }
+      shapes[n->name()] = SAT;
+    } else {
+      LOG(WARNING) << "Node " << n->name() << " doesn't have InferenceContext!";
+    }
+  }
+  return tensorflow::Status::OK();
+}
+}  // namespace trt
+}  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/convert/inferShapes.h
+++ b/tensorflow/contrib/tensorrt/convert/inferShapes.h
@ -0,0 +1,39 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <utility>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/status.h"
+
+typedef std::unordered_map<std::string,
+                           std::vector<std::pair<tensorflow::PartialTensorShape,
+                                                 tensorflow::DataType>>>
+    ShapeMap;
+namespace tensorflow {
+namespace trt {
+tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def,
+                               const std::vector<std::string>& output_names,
+                               ShapeMap& shapes);
+}
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@ -0,0 +1,183 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/tensorrt/kernels/trt_engine_op.h"
+#include <cuda_runtime_api.h>
+#include <sstream>
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/stream_executor.h"
+// Use TF logging f
+
+
+namespace tensorflow {
+static ::tensorflow::tensorrt::Logger gLogger;
+
+using namespace nvinfer1;
+
+namespace tensorrt {
+
+TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) {
+  // char *gieModelStream{nullptr};
+  // size_t size{0};
+
+  // read serialized_engine
+  std::string serialized_engine;
+  OP_REQUIRES_OK(context,
+                 context->GetAttr("serialized_engine", &serialized_engine));
+
+  // register input output node name in trt_sub_graph
+  OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_));
+  OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_));
+
+  // TODO(samikama) runtime should be taken from a resourcemanager as well.
+  //  Only engine should be in the op and context and runtime should be taken
+  //  from resourcemanager
+  IRuntime* infer = createInferRuntime(gLogger);
+  trt_engine_ptr_.reset(infer->deserializeCudaEngine(
+      serialized_engine.c_str(), serialized_engine.size(), nullptr));
+
+  trt_context_ptr_.reset(trt_engine_ptr_->createExecutionContext());
+  // runtime is safe to delete after engine creation
+  infer->destroy();
+  std::stringstream oss;
+  // debug iterate through all binding instances
+  for (int i = 0; i < trt_engine_ptr_->getNbBindings(); i++) {
+    LOG(INFO) << "index: " << i
+              << ", binding name: " << trt_engine_ptr_->getBindingName(i);
+
+    if (trt_engine_ptr_->bindingIsInput(i)) {
+      LOG(INFO) << "INPUT";
+    } else {
+      LOG(INFO) << "OUTPUT";
+    }
+    oss << "Dimension: ";
+    auto dims = trt_engine_ptr_->getBindingDimensions(i);
+    oss << " nbDims: " << dims.nbDims << " -> ";
+    for (int j = 0; j < Dims::MAX_DIMS; j++) {
+      oss << dims.d[j] << ", ";
+    }
+    LOG(INFO) << oss.str();
+    oss.str("");
+    switch (trt_engine_ptr_->getBindingDataType(i)) {
+      case nvinfer1::DataType::kFLOAT:
+        LOG(INFO) << "data type float" << std::endl;
+        break;
+      case nvinfer1::DataType::kHALF:
+        LOG(INFO) << "data type half" << std::endl;
+        break;
+      case nvinfer1::DataType::kINT8:
+        LOG(INFO) << "data type int8" << std::endl;
+        break;
+    }
+  }
+
+  // CHECK_NE(cudaStreamCreate(&stream_),0); // logic here is wrong
+  // cudaStreamCreate(&stream_);
+}
+
+void TRTEngineOp::Compute(OpKernelContext* context) {
+  int nbBindings = context->num_inputs() + context->num_outputs();
+  // TODO(jjsjann123) multiple input/output
+  std::vector<void*> buffers(nbBindings);
+
+  size_t bindingIndex;
+  int nbBatch = 0;
+  bool valid = true;
+  for (int i = 0; i < context->num_inputs(); i++) {
+    // Grab the input tensor
+    bindingIndex = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str());
+
+    const Tensor& input_tensor = context->input(i);
+    const TensorShape& input_shape = input_tensor.shape();
+    if (i == 0) {
+      nbBatch = input_shape.dim_size(0);
+    } else if (nbBatch != input_shape.dim_size(0)) {
+      valid = false;
+      break;
+    }
+    // int64 input_shape.dim_size(int d)
+    // int input_shape.dims()
+    switch (trt_engine_ptr_->getBindingDataType(bindingIndex)) {
+      case nvinfer1::DataType::kFLOAT:
+        LOG(INFO) << "float";
+        buffers[bindingIndex] = (void*)(input_tensor.flat<float>().data());
+        break;
+      case nvinfer1::DataType::kHALF:
+        LOG(INFO) << "half";
+        // buffers[bindingIndex] = (void*)input_tensor.flat<float16>().data();
+        break;
+      case nvinfer1::DataType::kINT8:
+        LOG(INFO) << "int8";
+        // buffers[bindingIndex] = (void*)input_tensor.flat<int8>().data();
+        break;
+    }
+  }
+
+  if (!valid) LOG(WARNING) << "input data inconsistent batch size";
+
+  for (int i = 0; i < static_cast<int>(output_nodes_.size()); i++) {
+    // This is bad that we have to reallocate output buffer every run.
+    // Create an output tensor
+    bindingIndex = trt_engine_ptr_->getBindingIndex(output_nodes_[i].c_str());
+    Tensor* output_tensor = NULL;
+
+    TensorShape output_shape;
+    if (bindingIndex != -1) {
+      LOG(INFO) << "got binding " << bindingIndex;
+      auto dims = trt_engine_ptr_->getBindingDimensions(bindingIndex);
+      std::vector<int> trt_shape(dims.nbDims + 1);
+      trt_shape[0] = nbBatch;
+      for (int j = 0; j < dims.nbDims; j++) trt_shape[j + 1] = dims.d[j];
+      TensorShapeUtils::MakeShape(trt_shape.data(), trt_shape.size(),
+                                  &output_shape);
+    } else {
+      LOG(INFO) << "no binding ";
+      break;
+    }
+
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(i, output_shape, &output_tensor));
+    // buffers[bindingIndex] = (void*)output_tensor->flat<float>();
+    // buffers[bindingIndex] = output_tensor->flat<float>().data();
+    switch (trt_engine_ptr_->getBindingDataType(bindingIndex)) {
+      case nvinfer1::DataType::kFLOAT:
+        LOG(INFO) << "float";
+        buffers[bindingIndex] =
+            reinterpret_cast<void*>(output_tensor->flat<float>().data());
+        break;
+      case nvinfer1::DataType::kHALF:
+        LOG(INFO) << "half";
+        // buffers[bindingIndex] = (void*)output_tensor->flat<float16>().data();
+        break;
+      case nvinfer1::DataType::kINT8:
+        LOG(INFO) << "int8";
+        // buffers[bindingIndex] = (void*)output_tensor->flat<int8>().data();
+        break;
+    }
+  }
+  // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files
+  const cudaStream_t* stream = CHECK_NOTNULL(
+      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
+                                                ->stream()
+                                                ->implementation()
+                                                ->CudaStreamMemberHack()));
+
+  trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr);
+  cudaStreamSynchronize(*stream);
+}
+
+REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp);
+}  // namespace tensorrt
+}  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
@ -0,0 +1,55 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_
+
+#include <NvInfer.h>
+#include <cuda_runtime_api.h>
+#include <memory>
+#include <string>
+#include <vector>
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+
+namespace tensorrt {
+class Logger;
+class TRTEngineOp : public OpKernel {
+ public:
+  explicit TRTEngineOp(OpKernelConstruction* context);
+
+  void Compute(OpKernelContext* context) override;
+
+ private:
+  template <typename T>
+  struct Destroyer {
+    void operator()(T* d) { d->destroy(); }
+  };
+  template <typename T>
+  using destroyed_ptr = std::unique_ptr<T, Destroyer<T>>;
+  destroyed_ptr<nvinfer1::ICudaEngine> trt_engine_ptr_;
+  // TODO(samikama) context should go to a resource manager!
+  destroyed_ptr<nvinfer1::IExecutionContext> trt_context_ptr_;
+  std::vector<string> input_nodes_;
+  std::vector<string> output_nodes_;
+};
+
+}  // namespace tensorrt
+
+}  // namespace tensorflow
+
+#endif // TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_ENGINE_OP_H_
--- a/tensorflow/contrib/tensorrt/log/trt_logger.cc
+++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc
@ -0,0 +1,56 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+// Use TF logging for TensorRT informations
+#include "tensorflow/core/platform/logging.h"
+
+#define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1)
+//------------------------------------------------------------------------------
+namespace tensorflow {
+
+//------------------------------------------------------------------------------
+namespace tensorrt {
+
+void Logger::log(Severity severity, const char* msg) {
+  // suppress info-level messages
+  switch (severity) {
+    case Severity::kINFO: {  // mark TRT info messages as debug!
+      LOG(DEBUG) << msg;
+      break;
+    }
+    case Severity::kWARNING: {
+      LOG(WARNING) << msg;
+      break;
+    }
+    case Severity::kERROR: {
+      LOG(ERROR) << msg;
+      break;
+    }
+    case Severity::kINTERNAL_ERROR: {
+      LOG(FATAL) << msg;
+      break;
+    }
+    // This is useless for now. But would catch it in future if enum changes. It
+    // is always good to have default case!
+    default: {
+      LOG(FATAL) << name_ << "Got unknown severity level from TRT " << msg;
+      break;
+    }
+  }
+}
+
+}  // namespace tensorrt
+
+}  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/log/trt_logger.h
+++ b/tensorflow/contrib/tensorrt/log/trt_logger.h
@ -0,0 +1,41 @@
+// -*- c++ -*-
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_LOG_TRT_LOGGER_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_LOG_TRT_LOGGER_H_
+
+// Use TF logging f
+#include <NvInfer.h>
+#include <string>
+
+//------------------------------------------------------------------------------
+namespace tensorflow {
+
+//------------------------------------------------------------------------------
+namespace tensorrt {
+
+// Logger for GIE info/warning/errors
+class Logger : public nvinfer1::ILogger {
+  void log(nvinfer1::ILogger::Severity severity, const char* msg) override;
+
+ private:
+  std::string name_;
+};
+
+}  // namespace tensorrt
+
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_LOG_TRT_LOGGER_H_
--- a/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/ops/trt_engine_op.cc
@ -0,0 +1,37 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+
+namespace tensorflow {
+
+namespace shape_inference {
+extern Status TRTEngineOpShapeInference(InferenceContext* c);
+}
+
+REGISTER_OP("TRTEngineOp")
+    .Attr("serialized_engine: string")
+    .Attr("input_nodes: list(string)")
+    .Attr("output_nodes: list(string)")
+    .Attr("InT: list({int8, float16, float32})")
+    .Attr("OutT: list({int8, float16, float32})")
+    .Input("in_tensor: InT")
+    .Output("out_tensor: OutT")
+    .SetShapeFn(shape_inference::TRTEngineOpShapeInference);
+
+}  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/python/init.py
+++ b/tensorflow/contrib/tensorrt/python/init.py
@ -0,0 +1,8 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import,wildcard-import
+from tensorflow.contrib.tensorrt.python.ops import trt_engine_op
+from tensorflow.contrib.tensorrt.python.trt_convert import CreateInferenceGraph
+# pylint: enable=unused-import,wildcard-import
--- a/tensorflow/contrib/tensorrt/python/ops/trt_engine_op.py
+++ b/tensorflow/contrib/tensorrt/python/ops/trt_engine_op.py
@ -0,0 +1,35 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import platform
+
+if platform.system() != "Windows":
+  # pylint: disable=wildcard-import,unused-import,g-import-not-at-top
+  from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import *
+
+  from tensorflow.contrib.util import loader
+  from tensorflow.python.platform import resource_loader
+  # pylint: enable=wildcard-import,unused-import,g-import-not-at-top
+
+  _trt_engine_op = loader.load_op_library(
+      resource_loader.get_path_to_datafile("_trt_engine_op.so"))
+else:
+  raise RuntimeError("Windows platforms are not supported")
+
+
--- a/tensorflow/contrib/tensorrt/python/trt_convert.py
+++ b/tensorflow/contrib/tensorrt/python/trt_convert.py
@ -0,0 +1,91 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Exposes the Python wrapper conversion to trt_graph."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import,wildcard-import, line-too-long
+from tensorflow.core.framework import graph_pb2
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import errors_impl as _impl
+from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert
+from tensorflow.python.util import compat
+import tensorflow as tf
+from tensorflow.python.grappler import tf_optimizer
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.framework import meta_graph
+from tensorflow.python.framework import ops
+
+
+def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace_size=2<<20):
+  """Python wrapper for the TRT transormation.
+
+
+  Args:
+    input_graph_def: GraphDef object containing a model to be transformed.
+    outputs: List of node names for the model outputs.
+    max_batch_size: max size for the input batch
+    max_workspace_size: parameter to control memory allocation (in Bytes)
+
+  Returns:
+    New GraphDef with TRTEngineOps placed in graph replacing subgraphs.
+  """
+
+  # with errors.raise_exception_on_not_ok_status() as status:
+  #   output_graph_def_string = trt_convert(
+  #       input_graph_def_string,outputs,
+  #       max_batch_size,max_workspace_size, status)
+  g = tf.Graph()
+  with g.as_default():
+    tf.import_graph_def(input_graph_def, name="")
+  rewriter_config = rewriter_config_pb2.RewriterConfig()
+  rewriter_config.optimizers.append('layout')
+  rewriter_config.optimizers.append('constfold')
+
+  # mark output nodes as fetch
+  train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+  for node_name in outputs:
+    out_node = g.get_operation_by_name(node_name)
+    for i in range(0,len(out_node.outputs)):
+      train_op.append(out_node.outputs[0])
+
+  # constant folding
+  mg = meta_graph.create_meta_graph_def(graph=g)
+  meta_graph.add_collection_def(mg, ops.GraphKeys.TRAIN_OP)
+  optimized_graph_def_str = \
+    tf_optimizer.OptimizeGraph(rewriter_config, mg).SerializeToString()
+
+  # TODO(sami): Fix this when we can return status from C++ library
+  # There is a problem with the TF internal library setup that doesn't allow us to return a status object from C++.
+  # Thus we return a  pair or strings where first one is encoded status and the second one is the
+  # transformed graphs protobuf string.
+  out = trt_convert(
+      optimized_graph_def_str ,outputs,
+      max_batch_size,max_workspace_size)
+  status = out[0]
+  output_graph_def_string = out[1]
+  del optimized_graph_def_str #save some memory
+  if len(status) < 2:
+    raise _impl.UnknownError(None,None,status)
+  if status[:2] != "OK":
+    msg=status.split(";")
+    if len(msg) == 1:
+      raise RuntimeError("Status message is malformed {}".format(status))
+    raise _impl._make_specific_exception(None,None,";".join(msg[1:]), int(msg[0]))
+  output_graph_def = graph_pb2.GraphDef()
+  output_graph_def.ParseFromString(output_graph_def_string)
+  del output_graph_def_string #save some memory
+  return output_graph_def
--- a/tensorflow/contrib/tensorrt/segment/segment.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment.cc
@ -0,0 +1,259 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/segment/segment.h"
+
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/contrib/tensorrt/segment/union_find.h"
+#include "tensorflow/core/graph/algorithm.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+
+//------------------------------------------------------------------------------
+namespace tensorrt {
+namespace segment {
+
+//------------------------------------------------------------------------------
+namespace {
+
+//------------------------------------------------------------------------------
+bool CanContractEdge(const tensorflow::Edge* edge,
+                     const tensorflow::Graph& graph) {
+  const tensorflow::Node* src = edge->src();
+  const tensorflow::Node* dst = edge->dst();
+
+  // Can't contract edge if doing so would cause a cycle in the
+  // graph. So, if there is a directed path from 'src' to 'dst', other
+  // than 'edge' (or any other direct edge from 'src' to 'dst'), then
+  // combining 'src' and 'dst' will cause a cycle along that path.
+  //
+  // In practice, to avoid modifying the graph and to take advantage
+  // of existing graph functions, we perform an equivalent.
+  //   1. Get all nodes incoming to 'dst', excluding 'src'
+  //   2. Reverse DFS from those nodes
+  //   3. If reverse DFS reaches 'src' then we have a cycle
+  std::vector<tensorflow::Node*> dfs_start_nodes;
+  for (tensorflow::Node* node : dst->in_nodes()) {
+    if (node != src) {
+      dfs_start_nodes.push_back(node);
+    }
+  }
+
+  bool is_cycle = false;
+  if (!dfs_start_nodes.empty()) {
+    tensorflow::ReverseDFSFrom(graph, dfs_start_nodes, {},
+                               [&is_cycle, src](tensorflow::Node* node) {
+                                 if (node == src) {
+                                   is_cycle = true;
+                                 }
+                               });
+  }
+
+  return !is_cycle;
+}
+
+//------------------------------------------------------------------------------
+void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph,
+                  std::vector<const tensorflow::Edge*>* remove_edges) {
+  // Transfer all inputs and outputs of 'dst' to 'src' except edges
+  // connecting the two.
+  tensorflow::Node* src = edge->src();
+  tensorflow::Node* dst = edge->dst();
+
+  // We can use '0' for input/output index because we don't need them
+  // to be accurate for the way we are using the graph.
+  std::vector<const tensorflow::Edge*> in_edges(dst->in_edges().begin(),
+                                                dst->in_edges().end());
+  for (const tensorflow::Edge* in_edge : in_edges) {
+    if (in_edge->src() != src) {
+      tensorflow::Edge* e = const_cast<tensorflow::Edge*>(in_edge);
+      if (e->src() == graph->source_node()) {
+        graph->AddEdge(e->src(), e->src_output(), src,
+                       tensorflow::Graph::kControlSlot);
+      } else {
+        graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */);
+      }
+    }
+  }
+
+  std::vector<const tensorflow::Edge*> out_edges(dst->out_edges().begin(),
+                                                 dst->out_edges().end());
+  for (const tensorflow::Edge* out_edge : out_edges) {
+    tensorflow::Edge* e = const_cast<tensorflow::Edge*>(out_edge);
+    if (e->dst() == graph->sink_node()) {
+      graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(),
+                     e->dst_input());
+    } else {
+      graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input());
+    }
+  }
+
+  // Return the edges that must be removed to disconnect 'dst' from
+  // the graph. We don't actually remove 'dst' since the caller holds
+  // references to all the nodes.
+  for (const auto& in_edge : dst->in_edges()) {
+    remove_edges->push_back(in_edge);
+  }
+  for (const auto& out_edge : dst->out_edges()) {
+    remove_edges->push_back(out_edge);
+  }
+}
+
+}  // namespace
+
+//------------------------------------------------------------------------------
+tensorflow::Status SegmentGraph(
+    const tensorflow::GraphDef& gdef,
+    const std::function<bool(const tensorflow::NodeDef&)>& candidate_fn,
+    const SegmentOptions& options, SegmentNodesVector* segments) {
+  // Create a Graph representation of the GraphDef.
+  tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(),
+                                             gdef.library());
+  tensorflow::Graph graph(flib);
+  TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph(
+      tensorflow::GraphConstructorOptions(), gdef, &graph));
+
+  // tensorflow::DumpGraph("Pre-Segment", &graph);
+
+  // Use a union-find to collect the nodes that belong to the same
+  // segment. A node value of nullptr indicates that the node is not a
+  // candidate for TRT.
+  std::vector<UnionFind<tensorflow::Node*>> node_segments;
+  for (int i = 0; i < graph.num_node_ids(); ++i) {
+    tensorflow::Node* node = graph.FindNodeId(i);
+    if (!candidate_fn(node->def())) {
+      node = nullptr;
+    }
+    node_segments.emplace_back(node);
+  }
+
+  // Visit nodes in reverse topological order and use edge
+  // contraction to merge candidate nodes.
+  std::vector<tensorflow::Node*> order;
+  tensorflow::GetPostOrder(graph, &order);
+
+  for (const tensorflow::Node* node : order) {
+    // All output nodes of 'node' have been visited...
+    VLOG(2) << "Trying node " << node->name();
+
+    // 'node' must be a TRT candidate...
+    if (node_segments[node->id()].Value() == nullptr) {
+      VLOG(2) << "... not a TRT candidate";
+      continue;
+    }
+
+    // Contract output edges to combine 'node' with output
+    // nodes. Iterate since combining two nodes may unblock other
+    // combining.
+    while (true) {
+      std::set<const tensorflow::Edge*> contract_edges;
+      for (const tensorflow::Edge* out_edge : node->out_edges()) {
+        VLOG(2) << "... out node " << out_edge->dst()->name();
+
+        // Out node must be TRT candidate...
+        if (node_segments[out_edge->dst()->id()].Value() == nullptr) {
+          VLOG(2) << "... ... not a TRT candidate";
+          continue;
+        }
+
+        if (CanContractEdge(out_edge, graph)) {
+          VLOG(2) << "... ... can contract";
+          contract_edges.insert(out_edge);
+        } else {
+          VLOG(2) << "... ... cannot contract, would form cycle";
+        }
+      }
+
+      if (contract_edges.empty()) {
+        break;
+      }
+
+      // Contract edges and collect the adjacent nodes into the same
+      // segment/subgraph.
+      while (!contract_edges.empty()) {
+        const tensorflow::Edge* contract_edge = *contract_edges.begin();
+        const tensorflow::Node* src = contract_edge->src();
+        const tensorflow::Node* dst = contract_edge->dst();
+
+        VLOG(2) << "Merge " << src->name() << " <- " << dst->name();
+        node_segments[src->id()].Merge(&node_segments[dst->id()]);
+
+        // Contracting the edge leaves disconnected graph edges.
+        // Remove these from the graph and from 'contract_edges' so we
+        // don't visit them again.
+        tensorflow::Edge* e = const_cast<tensorflow::Edge*>(contract_edge);
+        std::vector<const tensorflow::Edge*> remove_edges;
+        ContractEdge(e, &graph, &remove_edges);
+
+        for (const tensorflow::Edge* r : remove_edges) {
+          contract_edges.erase(r);
+          graph.RemoveEdge(r);
+        }
+      }
+    }
+  }
+
+  // Collect the segments/subgraphs. Each subgraph is represented by a
+  // set of the names of the nodes in that subgraph.
+  std::unordered_map<std::string, std::set<std::string>> sg_map;
+  for (auto& u : node_segments) {
+    if ((u.Value() != nullptr) && (u.ParentValue() != nullptr)) {
+      sg_map[u.ParentValue()->name()].insert(u.Value()->name());
+    }
+  }
+
+  // Cleanup the graph to remove disconnected nodes before outputting
+  if (VLOG_IS_ON(2)) {
+    for (tensorflow::Node* node : graph.nodes()) {
+      if ((node->in_edges().size() == 0) && (node->out_edges().size() == 0)) {
+        graph.RemoveNode(node);
+      }
+    }
+    // tensorflow::DumpGraph("Post-Segment", &graph);
+  }
+
+  // Convert the segments into the expected return format
+  for (const auto& itr : sg_map) {
+    const auto& segment_node_names = itr.second;
+    if (VLOG_IS_ON(1)) {
+      std::string s;
+      for (const auto& name : segment_node_names) {
+        s += " " + name;
+      }
+      VLOG(1) << "Segment " << segments->size() << ":" << s;
+    }
+
+    // Don't use small segments.
+    if (static_cast<int>(segment_node_names.size()) <
+        options.minimum_segment_size) {
+      VLOG(1) << "Segment " << segments->size() << " has only "
+              << segment_node_names.size() << " nodes, dropping";
+      continue;
+    }
+
+    segments->emplace_back(segment_node_names);
+  }
+
+  return tensorflow::Status::OK();
+}
+
+}  // namespace segment
+}  // namespace tensorrt
--- a/tensorflow/contrib/tensorrt/segment/segment.h
+++ b/tensorflow/contrib/tensorrt/segment/segment.h
@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_SEGMENT_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_SEGMENT_H_
+
+#include <set>
+#include <vector>
+#include <string>
+
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorrt {
+namespace segment {
+
+using SegmentNodesVector = std::vector<std::set<std::string>>;
+
+struct SegmentOptions {
+  // Segment must contain at least this many nodes.
+  int minimum_segment_size = 2;
+};
+
+// Get the subgraphs of a graph that can be handled by TensorRT.
+//
+// @param gdef The GraphDef describing the network
+// @param candidate_fn A function that returns true for a NodeDef if
+// that node can be handled by TensorRT.
+// @param segments Returns the TensorRT segments/subgraphs. Each entry
+// in the vector describes a subgraph by giving a set of the names of
+// all the NodeDefs in that subgraph.
+// @return the status.
+tensorflow::Status SegmentGraph(
+    const tensorflow::GraphDef& gdef,
+    const std::function<bool(const tensorflow::NodeDef&)>& candidate_fn,
+    const SegmentOptions& options, SegmentNodesVector* segments);
+
+}  // namespace segment
+}  // namespace tensorrt
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_SEGMENT_H_
--- a/tensorflow/contrib/tensorrt/segment/segment_test.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc
@ -0,0 +1,363 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/segment/segment.h"
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/test.h"
+
+//------------------------------------------------------------------------------
+using namespace tensorflow;
+
+namespace tensorrt {
+namespace segment {
+namespace test {
+
+class SegmentTest : public ::testing::Test {
+ public:
+  bool GetGraphDef(TF_Graph* graph, tensorflow::GraphDef* graph_def);
+
+  TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name);
+  TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
+                    TF_Status* s, const char* name);
+
+  std::function<bool(const NodeDef&)> MakeCandidateFn(
+      const std::set<std::string>& node_names);
+
+ protected:
+  void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name,
+                         TF_Operation** op);
+  void AddHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
+                 TF_Status* s, const char* name, TF_Operation** op, bool check);
+
+  SegmentOptions default_options_;
+};
+
+bool SegmentTest::GetGraphDef(TF_Graph* graph,
+                              tensorflow::GraphDef* graph_def) {
+  TF_Status* s = TF_NewStatus();
+  TF_Buffer* buffer = TF_NewBuffer();
+  TF_GraphToGraphDef(graph, buffer, s);
+  bool ret = TF_GetCode(s) == TF_OK;
+  EXPECT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  if (ret) ret = graph_def->ParseFromArray(buffer->data, buffer->length);
+  TF_DeleteBuffer(buffer);
+  TF_DeleteStatus(s);
+  return ret;
+}
+
+std::function<bool(const NodeDef&)> SegmentTest::MakeCandidateFn(
+    const std::set<std::string>& node_names) {
+  return [node_names](const NodeDef& node) -> bool {
+    return node_names.find(node.name()) != node_names.end();
+  };
+}
+
+void SegmentTest::PlaceholderHelper(TF_Graph* graph, TF_Status* s,
+                                    const char* name, TF_Operation** op) {
+  TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name);
+  TF_SetAttrType(desc, "dtype", TF_INT32);
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
+}
+
+TF_Operation* SegmentTest::Placeholder(TF_Graph* graph, TF_Status* s,
+                                       const char* name) {
+  TF_Operation* op;
+  PlaceholderHelper(graph, s, name, &op);
+  return op;
+}
+
+void SegmentTest::AddHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
+                            TF_Status* s, const char* name, TF_Operation** op,
+                            bool check) {
+  TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name);
+  TF_Output add_inputs[2] = {{l, 0}, {r, 0}};
+  TF_AddInputList(desc, add_inputs, 2);
+  *op = TF_FinishOperation(desc, s);
+  if (check) {
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    ASSERT_NE(*op, nullptr);
+  }
+}
+
+TF_Operation* SegmentTest::Add(TF_Operation* l, TF_Operation* r,
+                               TF_Graph* graph, TF_Status* s,
+                               const char* name) {
+  TF_Operation* op;
+  AddHelper(l, r, graph, s, name, &op, true);
+  return op;
+}
+
+//------------------------------------------------------------------------------
+TEST_F(SegmentTest, Empty) {
+  TF_Graph* graph = TF_NewGraph();
+
+  GraphDef graph_def;
+  ASSERT_TRUE(GetGraphDef(graph, &graph_def));
+
+  SegmentNodesVector segments;
+  ASSERT_EQ(
+      SegmentGraph(graph_def, MakeCandidateFn({}), default_options_, &segments),
+      tensorflow::Status::OK());
+
+  // Expect no segments/subgraphs.
+  EXPECT_TRUE(segments.empty());
+}
+
+//------------------------------------------------------------------------------
+TEST_F(SegmentTest, Simple) {
+  TF_Status* s = TF_NewStatus();
+  TF_Graph* graph = TF_NewGraph();
+
+  //           feed
+  //         //    ||
+  //       add0    add1
+  //        | |    /
+  //        |  add2
+  //        |  /  ||
+  //       add3    add4
+  //           |  /
+  //          <sink>
+  //
+  TF_Operation* feed = Placeholder(graph, s, "feed");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("feed"), string(TF_OperationName(feed)));
+
+  TF_Operation* add0 = Add(feed, feed, graph, s, "add0");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add1 = Add(feed, feed, graph, s, "add1");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add2 = Add(add0, add1, graph, s, "add2");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add3 = Add(add0, add2, graph, s, "add3");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add3"), string(TF_OperationName(add3)));
+  TF_Operation* add4 = Add(add2, add2, graph, s, "add4");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add4"), string(TF_OperationName(add4)));
+
+  GraphDef graph_def;
+  ASSERT_TRUE(GetGraphDef(graph, &graph_def));
+
+  SegmentNodesVector segments;
+  ASSERT_EQ(
+      SegmentGraph(graph_def,
+                   MakeCandidateFn({"add0", "add1", "add2", "add3", "add4"}),
+                   default_options_, &segments),
+      tensorflow::Status::OK());
+
+  // Expect all Add operations to be collapsed into a single segment
+  ASSERT_EQ(segments.size(), 1);
+  std::vector<std::string> expected{"add0", "add1", "add2", "add3", "add4"};
+  for (const auto& ex : expected) {
+    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+        << "Missing expected node " << ex;
+  }
+}
+
+//------------------------------------------------------------------------------
+TEST_F(SegmentTest, AvoidCycle) {
+  TF_Status* s = TF_NewStatus();
+  TF_Graph* graph = TF_NewGraph();
+
+  // add2 is not a TRT candidate so add0/add3 cannot be formed as a
+  // subgraph
+  //
+  //           feed
+  //         //    ||
+  //       add0    add1
+  //        | |    /
+  //        |  add2
+  //        |  /  ||
+  //       add3    add4
+  //           |  /
+  //          <sink>
+  //
+  TF_Operation* feed = Placeholder(graph, s, "feed");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("feed"), string(TF_OperationName(feed)));
+
+  TF_Operation* add0 = Add(feed, feed, graph, s, "add0");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add1 = Add(feed, feed, graph, s, "add1");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add2 = Add(add0, add1, graph, s, "add2");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add3 = Add(add0, add2, graph, s, "add3");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add3"), string(TF_OperationName(add3)));
+  TF_Operation* add4 = Add(add2, add2, graph, s, "add4");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add4"), string(TF_OperationName(add4)));
+
+  GraphDef graph_def;
+  ASSERT_TRUE(GetGraphDef(graph, &graph_def));
+
+  SegmentNodesVector segments;
+  ASSERT_EQ(
+      SegmentGraph(graph_def, MakeCandidateFn({"add0", "add1", "add3", "add4"}),
+                   default_options_, &segments),
+      tensorflow::Status::OK());
+
+  // Expect no subgraphs
+  EXPECT_EQ(segments.size(), 0);
+}
+
+//------------------------------------------------------------------------------
+TEST_F(SegmentTest, Multiple) {
+  TF_Status* s = TF_NewStatus();
+  TF_Graph* graph = TF_NewGraph();
+
+  // add5 is not a TRT candidate so two subgraphs should be formed
+  //
+  //                feed
+  //         //      ||     ||
+  //       add0    add1      add7
+  //        | |    /        /   ||
+  //        |  add2-----add5    add8
+  //        |  /  |    |  |    |
+  //       add3   add4     add6
+  //           |     |     /
+  //               <sink>
+  //
+  TF_Operation* feed = Placeholder(graph, s, "feed");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("feed"), string(TF_OperationName(feed)));
+
+  TF_Operation* add0 = Add(feed, feed, graph, s, "add0");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add1 = Add(feed, feed, graph, s, "add1");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add7 = Add(feed, feed, graph, s, "add7");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add2 = Add(add0, add1, graph, s, "add2");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add5 = Add(add2, add7, graph, s, "add5");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add8 = Add(add7, add7, graph, s, "add8");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add3 = Add(add0, add2, graph, s, "add3");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add3"), string(TF_OperationName(add3)));
+  TF_Operation* add4 = Add(add2, add5, graph, s, "add4");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add4"), string(TF_OperationName(add4)));
+  TF_Operation* add6 = Add(add5, add8, graph, s, "add6");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add6"), string(TF_OperationName(add6)));
+
+  GraphDef graph_def;
+  ASSERT_TRUE(GetGraphDef(graph, &graph_def));
+
+  SegmentNodesVector segments;
+  ASSERT_EQ(SegmentGraph(graph_def,
+                         MakeCandidateFn({"add0", "add1", "add2", "add3",
+                                          "add4", "add6", "add7", "add8"}),
+                         default_options_, &segments),
+            tensorflow::Status::OK());
+
+  // Expect two subgraphs
+  EXPECT_EQ(segments.size(), 2);
+
+  std::vector<std::string> expected0{"add0", "add1", "add2", "add3"};
+  for (const auto& ex : expected0) {
+    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+        << "Missing expected node " << ex;
+  }
+
+  std::vector<std::string> expected1{"add6", "add8"};
+  for (const auto& ex : expected1) {
+    EXPECT_TRUE(segments[1].find(ex) != segments[1].end())
+        << "Missing expected node " << ex;
+  }
+}
+
+//------------------------------------------------------------------------------
+TEST_F(SegmentTest, BigIfElse) {
+  TF_Status* s = TF_NewStatus();
+  TF_Graph* graph = TF_NewGraph();
+
+  // add2 is not a TRT candidate
+  //
+  //           feed
+  //            ||
+  //           add0
+  //         //    ||
+  //       add1    add4
+  //        ||      ||
+  //       add2    add5
+  //        ||      ||
+  //       add3    add6
+  //         ||    //
+  //           add7
+  //            ||
+  //          <sink>
+  //
+  TF_Operation* feed = Placeholder(graph, s, "feed");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("feed"), string(TF_OperationName(feed)));
+
+  TF_Operation* add0 = Add(feed, feed, graph, s, "add0");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add1 = Add(add0, add0, graph, s, "add1");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add2 = Add(add1, add1, graph, s, "add2");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add3 = Add(add2, add2, graph, s, "add3");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add4 = Add(add0, add0, graph, s, "add4");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add5 = Add(add4, add4, graph, s, "add5");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add6 = Add(add5, add5, graph, s, "add6");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  TF_Operation* add7 = Add(add3, add6, graph, s, "add7");
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  EXPECT_EQ(string("add7"), string(TF_OperationName(add7)));
+
+  GraphDef graph_def;
+  ASSERT_TRUE(GetGraphDef(graph, &graph_def));
+
+  SegmentNodesVector segments;
+  ASSERT_EQ(SegmentGraph(graph_def,
+                         MakeCandidateFn({"add0", "add1", "add3", "add4",
+                                          "add5", "add6", "add7"}),
+                         default_options_, &segments),
+            tensorflow::Status::OK());
+
+  // Expect 2 subgraphs
+  EXPECT_EQ(segments.size(), 2);
+
+  std::vector<std::string> expected0{"add3", "add4", "add5", "add6", "add7"};
+  for (const auto& ex : expected0) {
+    EXPECT_TRUE(segments[0].find(ex) != segments[0].end())
+        << "Missing expected node " << ex;
+  }
+
+  std::vector<std::string> expected1{"add0", "add1"};
+  for (const auto& ex : expected1) {
+    EXPECT_TRUE(segments[1].find(ex) != segments[1].end())
+        << "Missing expected node " << ex;
+  }
+}
+
+}  // namespace test
+}  // namespace segment
+}  // namespace tensorrt
--- a/tensorflow/contrib/tensorrt/segment/union_find.h
+++ b/tensorflow/contrib/tensorrt/segment/union_find.h
@ -0,0 +1,77 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_UNION_FIND_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_UNION_FIND_H_
+
+namespace tensorrt {
+namespace segment {
+
+// Union-Find data structure.
+// Each cluster has an associated value; when merging clusters we can control
+// which value becomes the representative of the merged clusters. Values must be
+// copyable.
+template <typename T>
+class UnionFind {
+ public:
+  UnionFind() : size_(1), parent_(nullptr) {}
+  explicit UnionFind(const T& v) : size_(1), parent_(nullptr), value_(v) {}
+
+  // Returns the number of elements in a cluster.
+  int Size() { return FindRoot()->size_; }
+
+  // Merges this cluster with 'other'. This cluster's value becomes
+  // the value of the merged cluster; the value of 'other' is ignored.
+  void Merge(UnionFind* other);
+
+  // Each cluster has an associated value. Retrieves the value associated
+  // with this cluster.
+  T& ParentValue() { return FindRoot()->value_; }
+
+  // Get the original value of this node.
+  T& Value() { return value_; }
+
+ private:
+  // Finds the root element of the cluster. Performs path compression.
+  UnionFind* FindRoot();
+
+  int size_;
+  UnionFind* parent_;
+  T value_;
+};
+
+template <typename T>
+void UnionFind<T>::Merge(UnionFind* other) {
+  UnionFind<T>* a = FindRoot();
+  UnionFind<T>* b = other->FindRoot();
+  if (a == b) return;
+
+  b->parent_ = a;
+  a->size_ += b->size_;
+}
+
+template <typename T>
+UnionFind<T>* UnionFind<T>::FindRoot() {
+  if (!parent_) return this;
+  // Path compression: update intermediate nodes to point to the root of the
+  // equivalence class.
+  parent_ = parent_->FindRoot();
+  return parent_;
+}
+
+}  // namespace segment
+}  // namespace tensorrt
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_SEGMENT_UNION_FIND_H_
--- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc
+++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.cc
@ -0,0 +1,123 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h"
+#include <string>
+#include <vector>
+#include "NvInfer.h"
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+
+namespace tensorflow {
+namespace shape_inference {
+tensorflow::Status TRTEngineOpShapeInference(InferenceContext* c) {
+  tensorflow::tensorrt::Logger gLogger;
+  string serialized_engine;
+  c->GetAttr("serialized_engine", &serialized_engine);
+  nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(gLogger);
+  nvinfer1::ICudaEngine* trt_engine = infer->deserializeCudaEngine(
+      serialized_engine.c_str(), serialized_engine.size(), nullptr);
+
+  // debug print out engine binding;
+  std::stringstream oss;
+  for (int i = 0; i < trt_engine->getNbBindings(); i++) {
+    LOG(INFO) << "index: " << i
+              << ", binding name: " << trt_engine->getBindingName(i);
+
+    bool input_flag = trt_engine->bindingIsInput(i);
+    oss << "input?: " << (input_flag ? "Y" : "N");
+
+    oss << "Dimension: ";
+    auto dims = trt_engine->getBindingDimensions(i);
+    oss << " nbDims: " << dims.nbDims << " -> ";
+    for (int j = 0; j < dims.nbDims; j++) oss << dims.d[j] << ", ";
+    LOG(INFO) << oss.str();
+    oss.str("");
+    switch (trt_engine->getBindingDataType(i)) {
+      case nvinfer1::DataType::kFLOAT:
+        LOG(INFO) << "data type: float" << std::endl;
+        break;
+      case nvinfer1::DataType::kHALF:
+        LOG(INFO) << "data type: half" << std::endl;
+        break;
+      case nvinfer1::DataType::kINT8:
+        LOG(INFO) << "data type: int8" << std::endl;
+        break;
+    }
+  }
+
+  int nbBatch = -1;
+  // debug print out input arrays
+  std::vector<::tensorflow::DataType> input_type;
+  c->GetAttr("InT", &input_type);
+  oss.str("");
+  for (size_t i = 0; i < c->num_inputs(); i++) {
+    // check if input shape is legit
+    auto input_shape = c->input(i);
+    int index = i;
+    oss << "input:" << i << " type: " << input_type[index] << " shape: ";
+    for (int j = 0; j < c->Rank(input_shape); j++) {
+      auto dimHandler = c->Dim(input_shape, j);
+      if (c->ValueKnown(dimHandler))
+        oss << c->Value(dimHandler) << ", ";
+      else
+        oss << "?" << c->Value(dimHandler) << ", ";
+      if (j == 0) {
+        if (i == 0)
+          nbBatch = c->Value(dimHandler);
+        else if (nbBatch != c->Value(dimHandler))
+          LOG(WARNING) << "!!!!!!nbBatch does not match!!!!!!";
+        // assert(nbBatch == c->Value(dimHandler);
+      }
+    }
+    LOG(INFO) << oss.str();
+  }
+
+  // arrange input here
+  std::vector<string> input_nodes;
+  c->GetAttr("input_nodes", &input_nodes);
+  for (size_t i = 0; i < input_nodes.size(); i++) {
+    int index = i;
+    LOG(INFO) << "input:" << i << " name: " << input_nodes[index];
+  }
+
+  // arrange output here
+  std::vector<string> output_nodes;
+  c->GetAttr("output_nodes", &output_nodes);
+  oss.str("");
+  for (size_t i = 0; i < output_nodes.size(); i++) {
+    int index = i;
+    int binding_index =
+        trt_engine->getBindingIndex(output_nodes[index].c_str());
+    oss << "string name " << output_nodes[index];
+    ShapeHandle output_shape;
+    std::vector<DimensionHandle> vecDim;
+    vecDim.emplace_back(c->MakeDim(nbBatch));
+    if (binding_index != -1) {
+      oss << "got binding " << binding_index;
+      auto dims = trt_engine->getBindingDimensions(binding_index);
+      for (int j = 0; j < dims.nbDims; j++)
+        vecDim.emplace_back(c->MakeDim(dims.d[j]));
+    } else {
+      oss << "no binding ";
+    }
+    output_shape = c->MakeShape(vecDim);
+    c->set_output(i, output_shape);
+    LOG(INFO) << oss.str();
+  }
+
+  return Status::OK();
+}
+}  // namespace shape_inference
+}  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h
+++ b/tensorflow/contrib/tensorrt/shape_fn/trt_shfn.h
@ -0,0 +1,28 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CONTRIB_TENSORRT_SHAPE_FN_TRT_SHFN_H_
+#define TENSORFLOW_CONTRIB_TENSORRT_SHAPE_FN_TRT_SHFN_H_
+
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+namespace shape_inference {
+Status TRTEngineOpShapeInference(InferenceContext* c);
+}  // namespace shape_inference
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CONTRIB_TENSORRT_SHAPE_FN_TRT_SHFN_H_
--- a/tensorflow/contrib/tensorrt/trt_conversion.i
+++ b/tensorflow/contrib/tensorrt/trt_conversion.i
@ -0,0 +1,84 @@
+/*
+
+  wrap trt_conversion
+
+ */
+%{
+#define SWIG_FILE_WITH_INIT
+%}
+%include "std_string.i"
+%include "std_pair.i"
+%include "tensorflow/python/lib/core/strings.i"
+%include "tensorflow/python/platform/base.i"
+%template(StringPair) std::pair<string,string>;
+%template() std::pair<swig::SwigPtr_PyObject, swig::SwigPtr_PyObject>;
+
+%{
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/util/stat_summarizer.h"
+#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
+%}
+
+%ignoreall
+%unignore tensorflow;
+%unignore trt_convert;
+
+%{
+  std::pair<string,string> trt_convert(string graph_def_string,//const tensorflow::GraphDef&
+		   std::vector<string> output_names,
+		   size_t max_batch_size,
+		   size_t max_workspace_size
+		   // unfortunately we can't use TF_Status here since it
+		   // is in c/c_api and brings in a lot of other libraries
+		   // which in turn declare ops. These ops are included
+		   // statically in our library and cause an abort when
+		   // module is loaded due to double registration
+		   // until Tensorflow properly exposes these headers
+		   // we have to work around this by returning a string
+		   // and converting it to exception on python side.
+		   //,TF_Status* out_status) {
+		   ) {
+    string out_status;
+
+    tensorflow::GraphDef graph_def;
+    if (!graph_def.ParseFromString(graph_def_string)) {
+      out_status="InvalidArgument;Couldn't interpret input as a GraphDef";
+      return std::pair<string,string>{out_status,""};
+    }
+
+    if (!output_names.size()) {
+      out_status="InvalidArgument;Size of the output_names vector is 0";
+      return std::pair<string,string>{out_status,""};
+      //return "";
+    }
+    tensorflow::GraphDef outGraph;
+    tensorflow::Status conversion_status =
+      tensorrt::convert::ConvertGraphDefToTensorRT(graph_def,
+						   output_names,
+						   max_batch_size,
+						   max_workspace_size,
+						   &outGraph);
+    if (!conversion_status.ok()) {
+      auto retCode=(int)conversion_status.code();
+      char buff[2000];
+      snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str());
+      out_status=buff;
+      return std::pair<string,string>{out_status,""};
+    }
+    string result;
+    if (!outGraph.SerializeToString(&result)) {
+      out_status="InvalidArgument;Couldn't serialize output as a GraphDef";
+      return std::pair<string,string>{out_status,""};
+    }
+    out_status="OK;All good!";
+    return std::pair<string,string>{out_status,result};
+  }
+%}
+
+std::pair<string,string> trt_convert(string graph_def_string,
+				     std::vector<string> output_names,
+				     size_t max_batch_size,
+				     size_t max_workspace_size);
+
+%unignoreall
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -279,7 +279,7 @@ def tf_cc_shared_object(
    linkopts=[],
    framework_so=tf_binary_additional_srcs(),
    **kwargs):
-  native.cc_binary(
+    native.cc_binary(
      name=name,
      srcs=srcs + framework_so,
      deps=deps,
@ -1281,6 +1281,45 @@ def tf_extension_linkopts():
 def tf_extension_copts():
  return []  # No extension c opts

+# In tf_py_wrap_cc generated libraries
+# module init functions are not exported unless
+# they contain one of the keywords in the version file
+# this prevents custom python modules.
+# This function attempts to append init_module_name to list of
+# exported functions in version script
+def _append_init_to_versionscript_impl(ctx):
+    modName=ctx.attr.module_name
+    isVS=ctx.attr.is_version_script
+    if isVS:
+        ctx.actions.expand_template(
+            template=ctx.file.template_file,
+            output=ctx.outputs.versionscript,
+            substitutions={
+                "global:":"global:\n   init_%s;"%modName,
+            },
+            is_executable=False,
+        )
+    else:
+        ctx.actions.expand_template(
+            template=ctx.file.template_file,
+            output=ctx.outputs.versionscript,
+            substitutions={
+                "*tensorflow*":"*tensorflow*\ninit_%s"%modName,
+            },
+            is_executable=False,
+        )
+
+
+_append_init_to_versionscript= rule(
+    implementation=_append_init_to_versionscript_impl,
+    attrs={
+        "module_name":attr.string(mandatory=True),
+        "template_file":attr.label(allow_files=True,single_file=True,mandatory=True),
+        "is_version_script":attr.bool(default=True,doc='whether target is a ld version script or exported symbol list',mandatory=False),
+    },
+    outputs={"versionscript":"%{name}.lds"},
+)
+
 def tf_py_wrap_cc(name,
                             srcs,
                             swig_includes=[],
@ -1302,26 +1341,39 @@ def tf_py_wrap_cc(name,
      toolchain_deps=["//tools/defaults:crosstool"],
      module_name=module_name,
      py_module_name=name)
+  vscriptname=name+"_versionscript"
+  _append_init_to_versionscript(
+      name=vscriptname,
+      module_name=module_name,
+      is_version_script=select({
+          "@local_config_cuda//cuda:darwin":False,
+          "//conditions:default":True,
+          }),
+      template_file=select({
+          "@local_config_cuda//cuda:darwin":clean_dep("//tensorflow:tf_exported_symbols.lds"),
+          "//conditions:default":clean_dep("//tensorflow:tf_version_script.lds")
+      })
+  )
  extra_linkopts = select({
      "@local_config_cuda//cuda:darwin": [
          "-Wl,-exported_symbols_list",
-          clean_dep("//tensorflow:tf_exported_symbols.lds")
+          "%s.lds"%vscriptname,
      ],
      clean_dep("//tensorflow:windows"): [],
      clean_dep("//tensorflow:windows_msvc"): [],
      "//conditions:default": [
          "-Wl,--version-script",
-          clean_dep("//tensorflow:tf_version_script.lds")
+          "%s.lds"%vscriptname,
      ]
  })
  extra_deps += select({
      "@local_config_cuda//cuda:darwin": [
-          clean_dep("//tensorflow:tf_exported_symbols.lds")
+          "%s.lds"%vscriptname,
      ],
      clean_dep("//tensorflow:windows"): [],
      clean_dep("//tensorflow:windows_msvc"): [],
      "//conditions:default": [
-          clean_dep("//tensorflow:tf_version_script.lds")
+          "%s.lds"%vscriptname,
      ]
  })

--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@ -11,6 +11,7 @@ load(
 )
 load("//third_party/mkl:build_defs.bzl", "if_mkl")
 load("//tensorflow:tensorflow.bzl", "if_cuda")
+load("@local_config_tensorrt//:build_defs.bzl", "if_trt")
 load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_license_deps")

 # This returns a list of headers of all public header libraries (e.g.,
@ -201,7 +202,8 @@ sh_binary(
            "//tensorflow/python:test_ops",
            "//tensorflow/tools/dist_test/server:grpc_tensorflow_server",
        ],
-    }) + if_mkl(["//third_party/mkl:intel_binary_blob"]),
+    }) + if_mkl(["//third_party/mkl:intel_binary_blob"])
+    + if_trt(["//tensorflow/contrib/tensorrt:init_py"]),
 )

 # A genrule for generating a marker file for the pip package on Windows
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -1,6 +1,7 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.

 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load("//third_party/tensorrt:build_defs.bzl", "trt_repository")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
 load("//third_party/git:git_configure.bzl", "git_configure")
 load("//third_party/py:python_configure.bzl", "python_configure")
@ -66,6 +67,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""):
  # version we require here.
  check_bazel_version_at_least("0.5.4")
  cuda_configure(name="local_config_cuda")
+  trt_repository(name="local_config_tensorrt")
  git_configure(name="local_config_git")
  sycl_configure(name="local_config_sycl")
  python_configure(name="local_config_python")
--- a/third_party/tensorrt/BUILD
+++ b/third_party/tensorrt/BUILD
--- a/third_party/tensorrt/BUILD.tpl
+++ b/third_party/tensorrt/BUILD.tpl
@ -0,0 +1,42 @@
+# -*- python -*-
+# Description:
+#   provide tensorrt information
+
+#TODO(Sami) these needs to be defined 
+
+licenses(["notice"])  
+
+exports_files(["LICENSE"])
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
+
+config_setting(
+    name = "trt_enabled",
+    define_values = {
+        "using_tensorrt":"true"
+    },
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "tensorrt",
+    srcs =[%{tensorrt_lib}],
+    hdrs = ["include/NvInfer.h",
+            "include/NvUtils.h",
+    ],
+    copts= cuda_default_copts(),
+    deps =["@local_config_cuda//cuda:cuda",
+	   "@local_config_cuda//cuda:cudnn",],
+    linkstatic = 1,
+    #include_prefix="include/",
+    includes=["include/"],
+    visibility = ["//visibility:public"],	
+)
+
+%{tensorrt_genrules}
+
+# filegroup(
+#     name = "%{tensorrt_lib}",
+#     srcs =  ["%{tensorrt_lib}"],
+#     visibility = ["//visibility:public"],
+# )
--- a/third_party/tensorrt/LICENSE
+++ b/third_party/tensorrt/LICENSE
@ -0,0 +1,203 @@
+Copyright 2015 The TensorFlow Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2015, The TensorFlow Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/third_party/tensorrt/build_defs.bzl
+++ b/third_party/tensorrt/build_defs.bzl
@ -0,0 +1,85 @@
+# -*- python -*-
+"""
+ add a repo_generator rule for tensorrt
+
+"""
+
+_TENSORRT_INSTALLATION_PATH="TENSORRT_INSTALL_PATH"
+_TF_TENSORRT_VERSION="TF_TENSORRT_VERSION"
+
+def _is_trt_enabled(repo_ctx):
+    if "TF_NEED_TENSORRT" in repo_ctx.os.environ:
+        enable_trt = repo_ctx.os.environ["TF_NEED_TENSORRT"].strip()
+        return enable_trt == "1"
+    return False
+
+def _dummy_repo(repo_ctx):
+
+    repo_ctx.template("BUILD",Label("//third_party/tensorrt:BUILD.tpl"),
+                      {"%{tensorrt_lib}":"","%{tensorrt_genrules}":""},
+                      False)
+    repo_ctx.template("build_defs.bzl",Label("//third_party/tensorrt:build_defs.bzl.tpl"),
+                      {"%{trt_configured}":"False"},False)
+    repo_ctx.file("include/NvUtils.h","",False)
+    repo_ctx.file("include/NvInfer.h","",False)
+
+def _trt_repo_impl(repo_ctx):
+    """
+    Implements local_config_tensorrt
+    """
+
+    if not _is_trt_enabled(repo_ctx):
+        _dummy_repo(repo_ctx)
+        return
+    trt_libdir=repo_ctx.os.environ[_TENSORRT_INSTALLATION_PATH]
+    trt_ver=repo_ctx.os.environ[_TF_TENSORRT_VERSION]
+# if deb installation
+# once a standardized installation between tar and deb
+# is done, we don't need this
+    if trt_libdir == '/usr/lib/x86_64-linux-gnu':
+        incPath='/usr/include/x86_64-linux-gnu'
+        incname='/usr/include/x86_64-linux-gnu/NvInfer.h'
+    else:
+        incPath=str(repo_ctx.path("%s/../include"%trt_libdir).realpath)
+        incname=incPath+'/NvInfer.h'
+    if len(trt_ver)>0:
+        origLib="%s/libnvinfer.so.%s"%(trt_libdir,trt_ver)
+    else:
+        origLib="%s/libnvinfer.so"%trt_libdir        
+    objdump=repo_ctx.which("objdump")
+    if objdump == None:
+        if len(trt_ver)>0:
+            targetlib="lib/libnvinfer.so.%s"%(trt_ver[0])
+        else:
+            targetlib="lib/libnvinfer.so"
+    else:
+        soname=repo_ctx.execute([objdump,"-p",origLib])
+        for l in soname.stdout.splitlines():
+            if "SONAME" in l:
+                lib=l.strip().split(" ")[-1]
+                targetlib="lib/%s"%(lib)
+    
+    if len(trt_ver)>0:
+        repo_ctx.symlink(origLib,targetlib)
+    else:
+        repo_ctx.symlink(origLib,targetlib)
+    grule=('genrule(\n    name = "trtlinks",\n'+
+           '    outs = [\n    "%s",\n    "include/NvInfer.h",\n    "include/NvUtils.h",\n     ],\n'%targetlib +
+           '    cmd="""ln -sf %s $(@D)/%s '%(origLib,targetlib) +
+           '&&\n    ln -sf %s $(@D)/include/NvInfer.h '%(incname) +
+           '&&\n    ln -sf %s/NvUtils.h $(@D)/include/NvUtils.h""",\n)\n'%(incPath))
+    repo_ctx.template("BUILD",Label("//third_party/tensorrt:BUILD.tpl"),
+                      {"%{tensorrt_lib}":'"%s"'%targetlib,"%{tensorrt_genrules}":grule},
+                      False)
+    repo_ctx.template("build_defs.bzl",Label("//third_party/tensorrt:build_defs.bzl.tpl"),
+                      {"%{trt_configured}":"True"},False)
+
+trt_repository=repository_rule(
+    implementation= _trt_repo_impl,
+    local=True,
+    environ=[
+        "TF_NEED_TENSORRT",
+        _TF_TENSORRT_VERSION,
+        _TENSORRT_INSTALLATION_PATH,
+        ],
+    )
--- a/third_party/tensorrt/build_defs.bzl.tpl
+++ b/third_party/tensorrt/build_defs.bzl.tpl
@ -0,0 +1,18 @@
+# -*- python -*-
+"""
+template file for trt functions
+
+"""
+
+def is_trt_enabled():
+    return %{trt_configured}
+
+def if_trt(if_true,if_false=[]):
+    # if is_trt_enabled():
+    #     return if_true
+    # return if_false
+
+    return select({
+        "@local_config_tensorrt//:trt_enabled":if_true,
+        "//conditions:default":if_false,
+    })