Move contrib/quantization ops to tensorflow/core

Change: 136410307
2016-10-17 15:18:33 -08:00 · 2016-10-17 15:18:33 -08:00 · 66024fd508
commit 66024fd508
parent 9b8ff3f50c
63 changed files with 1018 additions and 1382 deletions
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@ -108,10 +108,6 @@ filegroup(
        "//tensorflow/contrib/metrics/kernels:all_files",
        "//tensorflow/contrib/ndlstm:all_files",
        "//tensorflow/contrib/opt:all_files",
-        "//tensorflow/contrib/quantization:all_files",
-        "//tensorflow/contrib/quantization/kernels:all_files",
-        "//tensorflow/contrib/quantization/kernels/hexagon:all_files",
-        "//tensorflow/contrib/quantization/tools:all_files",
        "//tensorflow/contrib/rnn:all_files",
        "//tensorflow/contrib/session_bundle:all_files",
        "//tensorflow/contrib/session_bundle/example:all_files",
@ -133,6 +129,7 @@ filegroup(
        "//tensorflow/core/distributed_runtime:all_files",
        "//tensorflow/core/distributed_runtime/rpc:all_files",
        "//tensorflow/core/kernels:all_files",
+        "//tensorflow/core/kernels/hexagon:all_files",
        "//tensorflow/core/ops/compat:all_files",
        "//tensorflow/core/platform/cloud:all_files",
        "//tensorflow/core/platform/default/build_config:all_files",
@ -180,6 +177,7 @@ filegroup(
        "//tensorflow/tools/docs:all_files",
        "//tensorflow/tools/git:all_files",
        "//tensorflow/tools/proto_text:all_files",
+        "//tensorflow/tools/quantization:all_files",
        "//tensorflow/tools/test:all_files",
        "//tensorflow/user_ops:all_files",
        "//third_party/hadoop:all_files",
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@ -60,6 +60,7 @@ include(gif)
 include(png)
 include(jpeg)
 include(eigen)
+include(gemmlowp)
 include(jsoncpp)
 include(farmhash)
 include(highwayhash)
@ -88,6 +89,7 @@ include_directories(
    ${png_INCLUDE_DIR}
    ${jpeg_INCLUDE_DIR}
    ${eigen_INCLUDE_DIRS}
+    ${gemmlowp_INCLUDE_DIR}
    ${jsoncpp_INCLUDE_DIR}
    ${farmhash_INCLUDE_DIR}
    ${highwayhash_INCLUDE_DIR}
--- a/tensorflow/contrib/cmake/external/gemmlowp.cmake
+++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake
@ -0,0 +1,15 @@
+include (ExternalProject)
+
+set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
+set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
+set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
+set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
+
+ExternalProject_Add(gemmlowp
+    PREFIX gemmlowp
+    URL ${gemmlowp_URL}
+    URL_HASH ${gemmlowp_HASH}
+    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+    BUILD_IN_SOURCE 1
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
+    INSTALL_COMMAND "")
--- a/tensorflow/contrib/cmake/patches/gemmlowp/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/patches/gemmlowp/CMakeLists.txt
@ -0,0 +1,3 @@
+cmake_minimum_required(VERSION 2.8.3)
+
+project(gemmlowp)
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@ -73,6 +73,7 @@ HOST_INCLUDES := \
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
+  -I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(HOST_GENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
 	HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@ -146,6 +147,7 @@ INCLUDES := \
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
@ -240,6 +242,7 @@ ifeq ($(TARGET),ANDROID)
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/gen/protobuf/include \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@ -141,6 +141,17 @@ tensorflow/core/kernels/batch_norm_op.cc
 tensorflow/core/kernels/avgpooling_op.cc
 tensorflow/core/kernels/argmax_op.cc
 tensorflow/core/kernels/aggregate_ops.cc
+tensorflow/core/kernels/dequantize_op.cc
+tensorflow/core/kernels/quantization_utils.cc
+tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+tensorflow/core/kernels/quantize_op.cc
+tensorflow/core/kernels/quantized_activation_ops.cc
+tensorflow/core/kernels/quantized_batch_norm_op.cc
+tensorflow/core/kernels/quantized_bias_add_op.cc
+tensorflow/core/kernels/quantized_concat_op.cc
+tensorflow/core/kernels/quantized_conv_ops.cc
+tensorflow/core/kernels/quantized_matmul_op.cc
+tensorflow/core/kernels/quantized_pooling_ops.cc
 tensorflow/core/ops/training_ops.cc
 tensorflow/core/ops/string_ops.cc
 tensorflow/core/ops/state_ops.cc
--- a/tensorflow/contrib/quantization/BUILD
+++ b/tensorflow/contrib/quantization/BUILD
@ -13,53 +13,6 @@ load(
    "tf_custom_op_library",
 )

-cc_library(
-    name = "cc_array_ops",
-    srcs = ["ops/array_ops.cc"],
-    linkstatic = 1,
-    deps = [
-        "//tensorflow/core:framework",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "cc_math_ops",
-    srcs = ["ops/math_ops.cc"],
-    linkstatic = 1,
-    deps = [
-        "//tensorflow/core:framework",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "cc_nn_ops",
-    srcs = ["ops/nn_ops.cc"],
-    linkstatic = 1,
-    deps = [
-        "//tensorflow/core:framework",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "cc_ops",
-    linkstatic = 1,
-    deps = [
-        ":cc_array_ops",
-        ":cc_math_ops",
-        ":cc_nn_ops",
-    ],
-    alwayslink = 1,
-)
-
-filegroup(
-    name = "android_ops",
-    srcs = glob(["ops/*.cc"]),
-    visibility = ["//visibility:public"],
-)
-
 py_library(
    name = "quantization_py",
    srcs = [
@ -69,8 +22,6 @@ py_library(
    srcs_version = "PY2AND3",
    deps = [
        ":ops",
-        "//tensorflow/contrib/quantization:quantized_ops_py",
-        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
    ],
 )

@ -83,52 +34,9 @@ py_library(
    ],
    srcs_version = "PY2AND3",
    deps = [
-        ":array_ops",
-        ":math_ops",
-        ":nn_ops",
-    ],
-)
-
-tf_gen_op_wrapper_py(
-    name = "array_ops",
-    deps = ["//tensorflow/contrib/quantization:cc_array_ops"],
-)
-
-tf_gen_op_wrapper_py(
-    name = "math_ops",
-    deps = ["//tensorflow/contrib/quantization:cc_math_ops"],
-)
-
-tf_gen_op_wrapper_py(
-    name = "nn_ops",
-    deps = ["//tensorflow/contrib/quantization:cc_nn_ops"],
-)
-
-py_test(
-    name = "dequantize_op_test",
-    size = "small",
-    srcs = ["python/dequantize_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":ops",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/quantization:quantized_ops_py",
-        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-py_test(
-    name = "quantized_conv_ops_test",
-    size = "small",
-    srcs = ["python/quantized_conv_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":ops",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/quantization:quantized_ops_py",
-        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
-        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
    ],
 )

@ -139,24 +47,6 @@ filegroup(
    ]),
 )

-tf_custom_op_library(
-    name = "_quantized_ops.so",
-    srcs = [
-        "ops/array_ops.cc",
-        "ops/math_ops.cc",
-        "ops/nn_ops.cc",
-    ],
-    deps = [
-    ],
-)
-
-py_library(
-    name = "quantized_ops_py",
-    srcs = ["load_quantized_ops_so.py"],
-    data = ["_quantized_ops.so"],
-    srcs_version = "PY2AND3",
-)
-
 filegroup(
    name = "all_files",
    srcs = glob(
--- a/tensorflow/contrib/quantization/Makefile.in
+++ b/tensorflow/contrib/quantization/Makefile.in
@ -1,69 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# This sub Makefile compiles libraries under this directory. This is designed to
-# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile.
-# You can build targets in this file by including this sub makefile like:
-# $ make -f tensorflow/contrib/makefile/Makefile TARGET=<target> \
-# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \
-# (optional: NDK_ROOT=<ndk_root>) contrib_quantization_tests
-# TODO(satok): Support more targets
-
-GTEST_DIR := \
-$(MAKEFILE_DIR)/downloads/googletest/googletest
-
-GTEST_HEADERS = \
-$(wildcard $(GTEST_DIR)/include/gtest/*.h) \
-$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h)
-
-GTEST_SRCS := \
-$(wildcard $(GTEST_DIR)/src/*.cc) \
-$(wildcard $(GTEST_DIR)/src/*.h) \
-$(GTEST_HEADERS)
-
-QUANTIZATION_TEST_SRCS := \
-tensorflow/contrib/quantization/ops/math_ops.cc \
-tensorflow/contrib/quantization/kernels/quantize_op.cc \
-tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \
-tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \
-tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \
-tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \
-tensorflow/contrib/makefile/test/test_main.cc
-
-QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o))
-
-QUANTIZATION_TEST_NAME := contrib_quantization_tests
-QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME)
-
-INCLUDES += \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include
-
-QUANTIZATION_TEST_INCLUDES := $(INCLUDES)
-
-$(OBJDIR)gtest-all.o : $(GTEST_SRCS)
-	$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \
-	$(GTEST_DIR)/src/gtest-all.cc -o $@
-
-$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o
-	$(AR) $(ARFLAGS) $@ $^
-
-$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS)
-	@mkdir -p $(dir $@)
-	$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \
-	-o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \
-	$(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS)
-
-$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH)
--- a/tensorflow/contrib/quantization/init.py
+++ b/tensorflow/contrib/quantization/init.py
@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_
 from tensorflow.contrib.quantization.python.math_ops import *
 from tensorflow.contrib.quantization.python.nn_ops import *

-from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
-from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
-from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
-from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
+from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
+from tensorflow.python.ops.gen_array_ops import dequantize
+from tensorflow.python.ops.gen_array_ops import quantize_v2
+from tensorflow.python.ops.gen_array_ops import quantized_concat
--- a/tensorflow/contrib/quantization/kernels/BUILD
+++ b/tensorflow/contrib/quantization/kernels/BUILD
@ -1,311 +0,0 @@
-# Description:
-#   quantization-specific OpKernels
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["-parse_headers"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-load(
-    "//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
-    "tf_custom_op_library",
-    "tf_kernel_library",
-)
-
-filegroup(
-    name = "android_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-filegroup(
-    name = "all_files",
-    srcs = glob(
-        ["**/*"],
-        exclude = [
-            "**/METADATA",
-            "**/OWNERS",
-        ],
-    ),
-    visibility = ["//tensorflow:__subpackages__"],
-)
-
-tf_kernel_library(
-    name = "quantized_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-    ],
-    hdrs = [
-        "quantization_utils.h",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:conv_ops",
-        "//tensorflow/core/kernels:eigen_helpers",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:pooling_ops",
-        "//third_party/eigen3",
-        "@gemmlowp//:gemmlowp",
-    ],
-)
-
-tf_custom_op_library(
-    name = "_quantized_kernels.so",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:ops_util_hdrs",
-        "//tensorflow/core/kernels:pooling_ops_hdrs",
-        "@gemmlowp//:gemmlowp",
-    ],
-)
-
-py_library(
-    name = "quantized_kernels_py",
-    srcs = ["load_quantized_kernels_so.py"],
-    data = ["_quantized_kernels.so"],
-    srcs_version = "PY2AND3",
-)
-
-tf_cc_test(
-    name = "quantize_down_and_shrink_range_op_test",
-    size = "small",
-    srcs = ["quantize_down_and_shrink_range_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantization_utils_test",
-    srcs = ["quantization_utils_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//third_party/eigen3",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_activation_ops_test",
-    srcs = ["quantized_activation_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_bias_add_op_test",
-    size = "small",
-    srcs = ["quantized_bias_add_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_conv_ops_test",
-    size = "small",
-    srcs = ["quantized_conv_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantize_op_test",
-    size = "small",
-    srcs = ["quantize_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_matmul_op_test",
-    size = "small",
-    srcs = ["quantized_matmul_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_pooling_ops_test",
-    size = "small",
-    srcs = ["quantized_pooling_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_concat_op_test",
-    size = "small",
-    srcs = ["quantized_concat_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_batch_norm_op_test",
-    size = "small",
-    srcs = ["quantized_batch_norm_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:batch_norm_op",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//third_party/eigen3",
-    ],
-)
--- a/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py
+++ b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py
@ -1,48 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Ops for quantized evaluation."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import threading
-
-import tensorflow as tf
-
-QUANTIZED_KERNELS_FILE = '_quantized_kernels.so'
-
-_quantized_kernels = None
-_kernels_lock = threading.Lock()
-
-
-# Workaround for the fact that importing tensorflow imports contrib
-# (even if a user isn't using this or any other contrib op), but
-# there's not yet any guarantee that the shared object exists.
-# In which case, "import tensorflow" will always crash, even for users that
-# never use contrib.
-def Load(library_base_dir=''):
-  """Load the quantized ops library and return the loaded module."""
-  with _kernels_lock:
-    global _quantized_kernels
-    if not _quantized_kernels:
-      data_files_path = os.path.join(library_base_dir,
-                                     tf.resource_loader.get_data_files_path())
-      tf.logging.info('data path: %s', data_files_path)
-      _quantized_kernels = tf.load_op_library(os.path.join(
-          data_files_path, QUANTIZED_KERNELS_FILE))
-
-      assert _quantized_kernels, 'Could not load _quantized_kernels.so'
-  return _quantized_kernels
--- a/tensorflow/contrib/quantization/load_quantized_ops_so.py
+++ b/tensorflow/contrib/quantization/load_quantized_ops_so.py
@ -1,48 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Ops for quantized evaluation."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import threading
-
-import tensorflow as tf
-
-QUANTIZED_OPS_FILE = '_quantized_ops.so'
-
-_quantized_ops = None
-_ops_lock = threading.Lock()
-
-
-# Workaround for the fact that importing tensorflow imports contrib
-# (even if a user isn't using this or any other contrib op), but
-# there's not yet any guarantee that the shared object exists.
-# In which case, "import tensorflow" will always crash, even for users that
-# never use contrib.
-def Load(library_base_dir=''):
-  """Load the quantized ops library and return the loaded module."""
-  with _ops_lock:
-    global _quantized_ops
-    if not _quantized_ops:
-      data_files_path = os.path.join(library_base_dir,
-                                     tf.resource_loader.get_data_files_path())
-      tf.logging.info('q:data path: %s', data_files_path)
-      _quantized_ops = tf.load_op_library(os.path.join(
-          data_files_path, QUANTIZED_OPS_FILE))
-
-      assert _quantized_ops, 'Could not load quantized_ops.so'
-  return _quantized_ops
--- a/tensorflow/contrib/quantization/ops/array_ops.cc
+++ b/tensorflow/contrib/quantization/ops/array_ops.cc
@ -1,195 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("QuantizeV2")
-    .Input("input: float")
-    .Input("min_range: float")
-    .Input("max_range: float")
-    .Output("output: T")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("T: quantizedtype")
-    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-if T == qint8, out[i] -= (range(T) + 1) / 2.0
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-Assume the input is type float and has a possible range of [0.0, 6.0] and the
-output type is quint8 ([0, 255]). The min_range and max_range values should be
-specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
-value of the input by 255/6 and cast to quint8.
-
-If the output type was qint8 ([-128, 127]), the operation will additionally
-subtract each value by 128 prior to casting, so that the range of values aligns
-with the range of qint8.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```
-number_of_steps = 1 << (# of bits in T)
-range_adjust = number_of_steps / (number_of_steps - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = number_of_steps / range
-quantized = round(input * range_scale) - round(range_min * range_scale) +
-  numeric_limits<T>::min()
-quantized = max(quantized, numeric_limits<T>::min())
-quantized = min(quantized, numeric_limits<T>::max())
-```
-
-The biggest difference between this and MIN_COMBINED is that the minimum range
-is rounded first, before it's subtracted from the rounded value. With
-MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
-and dequantizing will introduce a larger and larger error.
-
-One thing to watch out for is that the operator may choose to adjust the
-requested minimum and maximum values slightly during the quantization process,
-so you should always use the output ports as the range for further calculations.
-For example, if the requested minimum and maximum values are close to equal,
-they will be separated by a small epsilon value to prevent ill-formed quantized
-buffers from being created. Otherwise, you can end up with buffers where all the
-quantized values map to the same float value, which causes problems for
-operations that have to perform further calculations on them.
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-output: The quantized data produced from the float input.
-output_min: The actual minimum scalar value used for the output.
-output_max: The actual maximum scalar value used for the output.
-
-)doc");
-
-REGISTER_OP("Dequantize")
-    .Input("input: T")
-    .Input("min_range: float")
-    .Input("max_range: float")
-    .Output("output: float")
-    .Attr("T: quantizedtype")
-    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Dequantize the 'input' tensor into a float Tensor.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-if T == qint8, in[i] += (range(T) + 1)/ 2.0
-out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-If the input comes from a QuantizedRelu6, the output type is
-quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
-0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
-Dequantize on quint8 will take each value, cast to float, and multiply
-by 6 / 255.
-Note that if quantizedtype is qint8, the operation will additionally add
-each value by 128 prior to casting.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```
-number_of_steps = 1 << (# of bits in T)
-range_adjust = number_of_steps / (number_of_steps - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = range / number_of_steps
-const double offset_input = static_cast<double>(input) - lowest_quantized;
-result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
-```
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-
-)doc");
-
-REGISTER_OP("QuantizedConcat")
-    .Input("concat_dim: int32")
-    .Input("values: N * T")
-    .Input("input_mins: N * float32")
-    .Input("input_maxes: N * float32")
-    .Output("output: T")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("N: int >= 2")
-    .Attr("T: type")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
-      ShapeHandle unused;
-      for (int i = 2; i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
-      }
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Concatenates quantized tensors along one dimension.
-
-concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-  range [0, rank(values)).
-values: The `N` Tensors to concatenate. Their ranks and types must match,
-  and their sizes must match in all dimensions except `concat_dim`.
-input_mins: The minimum scalar values for each of the input tensors.
-input_maxes: The maximum scalar values for each of the input tensors.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-output: A `Tensor` with the concatenation of values stacked along the
-  `concat_dim` dimension.  This tensor's shape matches that of `values` except
-  in `concat_dim` where it has the sum of the sizes.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/contrib/quantization/ops/math_ops.cc
+++ b/tensorflow/contrib/quantization/ops/math_ops.cc
@ -1,126 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("QuantizedMatMul")
-    .Input("a: T1")
-    .Input("b: T2")
-    .Input("min_a: float")
-    .Input("max_a: float")
-    .Input("min_b: float")
-    .Input("max_b: float")
-    .Output("out: Toutput")
-    .Output("min_out: float")
-    .Output("max_out: float")
-    .Attr("T1: quantizedtype")
-    .Attr("T2: quantizedtype")
-    .Attr("Toutput: quantizedtype = DT_QINT32")
-    .Attr("transpose_a: bool = false")
-    .Attr("transpose_b: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Perform a quantized matrix multiplication of  `a` by the matrix `b`.
-
-The inputs must be two-dimensional matrices and the inner dimension of
-`a` (after being transposed if `transpose_a` is non-zero) must match the
-outer dimension of `b` (after being transposed if `transposed_b` is
-non-zero).
-
-a: Must be a two-dimensional tensor.
-b: Must be a two-dimensional tensor.
-transpose_a: If true, `a` is transposed before multiplication.
-transpose_b: If true, `b` is transposed before multiplication.
-min_a: The float value that the lowest quantized `a` value represents.
-max_a: The float value that the highest quantized `a` value represents.
-min_b: The float value that the lowest quantized `b` value represents.
-max_b: The float value that the highest quantized `b` value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizeDownAndShrinkRange")
-    .Input("input: Tinput")
-    .Input("input_min: float")
-    .Input("input_max: float")
-    .Output("output: out_type")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Convert the quantized 'input' tensor into a lower-precision 'output', using the
-actual distribution of the values to maximize the usage of the lower bit depth
-and adjusting the output min and max ranges accordingly.
-
-[input_min, input_max] are scalar floats that specify the range for the float
-interpretation of the 'input' data. For example, if input_min is -1.0f and
-input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-
-This operator tries to squeeze as much precision as possible into an output with
-a lower bit depth by calculating the actual min and max values found in the
-data. For example, maybe that quint16 input has no values lower than 16,384 and
-none higher than 49,152. That means only half the range is actually needed, all
-the float interpretations are between -0.5f and 0.5f, so if we want to compress
-the data into a quint8 output, we can use that range rather than the theoretical
-1.0f to 1.0f that is suggested by the input min and max.
-
-In practice, this is most useful for taking output from operations like
-QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-may have large potential output ranges, but in practice have a distribution of
-input values that only uses a small fraction of the possible range. By feeding
-that output into this operator, we can reduce it from 32 bits down to 8 with
-minimal loss of accuracy.
-
-input_min: The float value that the minimum quantized input value represents.
-input_max: The float value that the maximum quantized input value represents.
-Tinput: The type of the input.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-out_type: The type of the output. Should be a lower bit depth than Tinput.
-
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/contrib/quantization/ops/nn_ops.cc
+++ b/tensorflow/contrib/quantization/ops/nn_ops.cc
@ -1,348 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/util/padding.h"
-
-namespace tensorflow {
-
-using shape_inference::DimensionHandle;
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-REGISTER_OP("QuantizedAvgPool")
-    .Input("input: T")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Output("output: T")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("T: quantizedtype")
-    .Attr("ksize: list(int)")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the average pool of the input tensor for quantized types.
-
-input: 4-D with shape `[batch, height, width, channels]`.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.  The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedBiasAdd")
-    .Input("input: T1")
-    .Input("bias: T2")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Input("min_bias: float")
-    .Input("max_bias: float")
-    .Output("output: out_type")
-    .Output("min_out: float")
-    .Output("max_out: float")
-    .Attr("T1: quantizedtype")
-    .Attr("T2: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Adds Tensor 'bias' to Tensor 'input' for Quantized types.
-
-Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
-
-bias: A 1D bias Tensor with size matching the last dimension of 'input'.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_bias: The float value that the lowest quantized bias value represents.
-max_bias: The float value that the highest quantized bias value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedConv2D")
-    .Input("input: Tinput")
-    .Input("filter: Tfilter")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Input("min_filter: float")
-    .Input("max_filter: float")
-    .Output("output: out_type")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("Tfilter: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QINT32")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes a 2D convolution given quantized 4D input and filter tensors.
-The inputs are quantized tensors where the lowest value represents the real
-number of the associated minimum, and the highest represents the maximum.
-This means that you can only interpret the quantized output in the same way, by
-taking the returned minimum and maximum values into account.
-
-filter: filter's input_depth dimension must match input's depth dimensions.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_filter: The float value that the lowest quantized filter value represents.
-max_filter: The float value that the highest quantized filter value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedMaxPool")
-    .Input("input: T")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Output("output: T")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("T: quantizedtype")
-    .Attr("ksize: list(int)")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the max pool of the input tensor for quantized types.
-
-input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor. The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedRelu")
-    .Input("features: Tinput")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear: `max(features, 0)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedRelu6")
-    .Input("features: Tinput")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedReluX")
-    .Input("features: Tinput")
-    .Input("max_value: float")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
-    .Input("t: Tinput")
-    .Input("t_min: float")
-    .Input("t_max: float")
-    .Input("m: Tinput")
-    .Input("m_min: float")
-    .Input("m_max: float")
-    .Input("v: Tinput")
-    .Input("v_min: float")
-    .Input("v_max: float")
-    .Input("beta: Tinput")
-    .Input("beta_min: float")
-    .Input("beta_max: float")
-    .Input("gamma: Tinput")
-    .Input("gamma_min: float")
-    .Input("gamma_max: float")
-    .Output("result: out_type")
-    .Output("result_min: float")
-    .Output("result_max: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .Attr("variance_epsilon: float")
-    .Attr("scale_after_normalization: bool")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
-
-      DimensionHandle last_dim = c->Dim(input, 3);
-      for (int i = 1; i < 5; ++i) {  // covers m, v, beta, gamma
-        ShapeHandle vec;
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
-        TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
-      }
-
-      ShapeHandle out;
-      TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
-      c->set_output(0, out);
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Quantized Batch normalization.
-
-This op is deprecated and will be removed in the future. Prefer
-`tf.nn.batch_normalization`.
-
-t: A 4D input Tensor.
-t_min: The value represented by the lowest quantized input.
-t_max: The value represented by the highest quantized input.
-m: A 1D mean Tensor with size matching the last dimension of t.
-  This is the first output from tf.nn.moments,
-  or a saved moving average thereof.
-m_min: The value represented by the lowest quantized mean.
-m_max: The value represented by the highest quantized mean.
-v: A 1D variance Tensor with size matching the last dimension of t.
-  This is the second output from tf.nn.moments,
-  or a saved moving average thereof.
-v_min: The value represented by the lowest quantized variance.
-v_max: The value represented by the highest quantized variance.
-beta: A 1D beta Tensor with size matching the last dimension of t.
-  An offset to be added to the normalized tensor.
-beta_min: The value represented by the lowest quantized offset.
-beta_max: The value represented by the highest quantized offset.
-gamma: A 1D gamma Tensor with size matching the last dimension of t.
-  If "scale_after_normalization" is true, this tensor will be multiplied
-  with the normalized tensor.
-gamma_min: The value represented by the lowest quantized gamma.
-gamma_max: The value represented by the highest quantized gamma.
-variance_epsilon: A small float number to avoid dividing by 0.
-scale_after_normalization: A bool indicating whether the resulted tensor
-  needs to be multiplied with gamma.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/contrib/quantization/python/array_ops.py
+++ b/tensorflow/contrib/quantization/python/array_ops.py
@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function

 # pylint: disable=unused-import,wildcard-import
-from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
-from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
-from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
-from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
+from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
+from tensorflow.python.ops.gen_array_ops import dequantize
+from tensorflow.python.ops.gen_array_ops import quantize_v2
+from tensorflow.python.ops.gen_array_ops import quantized_concat
--- a/tensorflow/contrib/quantization/python/math_ops.py
+++ b/tensorflow/contrib/quantization/python/math_ops.py
@ -19,10 +19,7 @@ from __future__ import division
 from __future__ import print_function

 # pylint: disable=unused-import,wildcard-import
-from tensorflow.contrib.quantization.ops import gen_math_ops
-from tensorflow.contrib.quantization.ops.gen_math_ops import *
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
-
-
-ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops.gen_math_ops import *
--- a/tensorflow/contrib/quantization/python/nn_ops.py
+++ b/tensorflow/contrib/quantization/python/nn_ops.py
@ -19,17 +19,7 @@ from __future__ import division
 from __future__ import print_function

 # pylint: disable=unused-import,wildcard-import
-from tensorflow.contrib.quantization.ops import gen_nn_ops
-from tensorflow.contrib.quantization.ops.gen_nn_ops import *
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
-
-
-ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops.gen_nn_ops import *
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -524,6 +524,7 @@ cc_library(
        "//tensorflow/core/kernels:nn",
        "//tensorflow/core/kernels:parameterized_truncated_normal_op",
        "//tensorflow/core/kernels:parsing",
+        "//tensorflow/core/kernels:quantized_ops",
        "//tensorflow/core/kernels:random_ops",
        "//tensorflow/core/kernels:required",
        "//tensorflow/core/kernels:sdca_ops",
@ -734,6 +735,7 @@ cc_library(
    deps = [
        ":protos_cc",
        "//third_party/eigen3",
+        "@gemmlowp//:gemmlowp",
    ],
    alwayslink = 1,
 )
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -2228,6 +2228,7 @@ filegroup(
    srcs = [
        ":android_extended_ops_group1",
        ":android_extended_ops_group2",
+        ":android_quantized_ops",
    ],
    visibility = ["//visibility:public"],
 )
@ -2366,6 +2367,26 @@ filegroup(
    ],
 )

+filegroup(
+    name = "android_quantized_ops",
+    srcs = [
+        "dequantize_op.cc",
+        "quantization_utils.cc",
+        "quantization_utils.h",
+        "quantize_down_and_shrink_range.cc",
+        "quantize_op.cc",
+        "quantized_activation_ops.cc",
+        "quantized_batch_norm_op.cc",
+        "quantized_bias_add_op.cc",
+        "quantized_concat_op.cc",
+        "quantized_conv_ops.cc",
+        "quantized_matmul_op.cc",
+        "quantized_pooling_ops.cc",
+        "reference_gemm.h",
+    ],
+    visibility = ["//visibility:public"],
+)
+
 # A file group which contains nearly all available operators which
 # may work on Android. This is intended to be used with selective
 # registration.
@ -2436,10 +2457,244 @@ cc_library(
        "//tensorflow/core:android_tensorflow_lib_lite",
        "//tensorflow/core:protos_cc",
        "//third_party/eigen3",
+        "@gemmlowp//:gemmlowp",
    ],
    alwayslink = 1,
 )

+#   Quantization-specific OpKernels
+
+tf_kernel_library(
+    name = "quantized_ops",
+    srcs = [
+        "dequantize_op.cc",
+        "quantization_utils.cc",
+        "quantize_down_and_shrink_range.cc",
+        "quantize_op.cc",
+        "quantized_activation_ops.cc",
+        "quantized_batch_norm_op.cc",
+        "quantized_bias_add_op.cc",
+        "quantized_concat_op.cc",
+        "quantized_conv_ops.cc",
+        "quantized_matmul_op.cc",
+        "quantized_pooling_ops.cc",
+    ],
+    hdrs = [
+        "quantization_utils.h",
+        "reference_gemm.h",
+    ],
+    deps = [
+        "//tensorflow/core",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core/kernels:concat_lib_hdrs",
+        "//tensorflow/core/kernels:conv_ops",
+        "//tensorflow/core/kernels:eigen_helpers",
+        "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/kernels:pooling_ops",
+        "//third_party/eigen3",
+        "@gemmlowp//:gemmlowp",
+    ],
+)
+
+tf_cc_test(
+    name = "quantize_down_and_shrink_range_op_test",
+    size = "small",
+    srcs = ["quantize_down_and_shrink_range_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantization_utils_test",
+    srcs = ["quantization_utils_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//third_party/eigen3",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_activation_ops_test",
+    srcs = ["quantized_activation_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_bias_add_op_test",
+    size = "small",
+    srcs = ["quantized_bias_add_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_conv_ops_test",
+    size = "small",
+    srcs = ["quantized_conv_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantize_op_test",
+    size = "small",
+    srcs = ["quantize_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_matmul_op_test",
+    size = "small",
+    srcs = ["quantized_matmul_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_pooling_ops_test",
+    size = "small",
+    srcs = ["quantized_pooling_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_concat_op_test",
+    size = "small",
+    srcs = ["quantized_concat_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_batch_norm_op_test",
+    size = "small",
+    srcs = ["quantized_batch_norm_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:batch_norm_op",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//third_party/eigen3",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 # Google-internal targets.  These must be at the end for syncrepo.

--- a/tensorflow/contrib/quantization/kernels/dequantize_op.cc
+++ b/tensorflow/contrib/quantization/kernels/dequantize_op.cc
@ -17,7 +17,7 @@ limitations under the License.

 #define EIGEN_USE_THREADS

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
--- a/tensorflow/contrib/quantization/kernels/hexagon/BUILD
+++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
@ -31,10 +31,6 @@ tf_cc_test(
    size = "small",
    srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
    deps = [
-        "//tensorflow/contrib/quantization:cc_array_ops",
-        "//tensorflow/contrib/quantization:cc_math_ops",
-        "//tensorflow/contrib/quantization:cc_nn_ops",
-        "//tensorflow/contrib/quantization/kernels:quantized_ops",
        "//tensorflow/core:framework",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
@ -42,6 +38,7 @@ tf_cc_test(
        "//tensorflow/core:testlib",
        "//tensorflow/core/kernels:ops_testutil",
        "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/kernels:quantized_ops",
    ],
 )

@ -51,7 +48,6 @@ tf_cc_test(
    srcs = ["graph_transferer_test.cc"],
    deps = [
        "//tensorflow/cc:cc_ops",
-        "//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer",
        "//tensorflow/core:core_cpu",
        "//tensorflow/core:direct_session",
        "//tensorflow/core:lib",
@ -60,6 +56,7 @@ tf_cc_test(
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels/hexagon:graph_transferer",
    ],
 )

--- a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
+#include "tensorflow/core/kernels/hexagon/graph_transferer.h"

 namespace tensorflow {
 void GraphTransferer::LoadGraphFromProto(
--- a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_

 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
@ -37,4 +37,4 @@ class GraphTransferer {

 }  // namespace tensorflow

-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_TRANSFERER_H
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
--- a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc
@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/public/session.h"
--- a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
@ -14,7 +14,8 @@ limitations under the License.
 ==============================================================================*/
 // Tests in this file are designed to evaluate hexagon DSP operations.

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#define EIGEN_USE_THREADS
+
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
@ -26,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"

--- a/tensorflow/contrib/quantization/kernels/quantization_utils.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"

 namespace tensorflow {

--- a/tensorflow/contrib/quantization/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_

 #define EIGEN_USE_THREADS

@ -552,4 +552,4 @@ class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase {

 }  // namespace tensorflow

-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
--- a/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
@ -18,7 +18,7 @@ limitations under the License.
 #include <limits>

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
--- a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
@ -20,7 +20,7 @@ limitations under the License.
 #include <math.h>

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
--- a/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc
--- a/tensorflow/contrib/quantization/kernels/quantize_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_op.cc
@ -17,7 +17,7 @@ limitations under the License.

 #define EIGEN_USE_THREADS

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
--- a/tensorflow/contrib/quantization/kernels/quantize_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_op_test.cc
--- a/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc
@ -16,7 +16,7 @@ limitations under the License.
 // Implements a quantized version of the Relu6 operation.
 #define EIGEN_USE_THREADS

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc
@ -16,7 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc
@ -16,7 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@ -15,7 +15,7 @@ limitations under the License.

 // Implements a quantized eight-bit version of the bias addition operation.

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc
@ -15,7 +15,7 @@ limitations under the License.

 #include <functional>

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc
@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc
@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
@ -19,8 +19,8 @@ limitations under the License.
 #include <vector>

 #include "public/gemmlowp.h"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
-#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/ops_util.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc
@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
@ -16,8 +16,8 @@ limitations under the License.
 // Implements a quantized eight-bit version of the matmul operation.

 #include "public/gemmlowp.h"
-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
-#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc
@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
--- a/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/quantization/kernels/reference_gemm.h
+++ b/tensorflow/contrib/quantization/kernels/reference_gemm.h
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_

 // This is an unoptimized but debuggable implementation of the GEMM matrix
 // multiply function, used to compare to faster but more opaque versions, or
@ -87,4 +87,4 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c,
 }
 }  // namespace tensorflow

-#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@ -4054,6 +4054,176 @@ debug_urls: List of URLs to debug targets, e.g.,
            file:///foo/tfdbg_dump, grpc:://localhost:11011
 )doc");

+REGISTER_OP("QuantizeV2")
+    .Input("input: float")
+    .Input("min_range: float")
+    .Input("max_range: float")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("T: quantizedtype")
+    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
+if T == qint8, out[i] -= (range(T) + 1) / 2.0
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+Assume the input is type float and has a possible range of [0.0, 6.0] and the
+output type is quint8 ([0, 255]). The min_range and max_range values should be
+specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
+value of the input by 255/6 and cast to quint8.
+
+If the output type was qint8 ([-128, 127]), the operation will additionally
+subtract each value by 128 prior to casting, so that the range of values aligns
+with the range of qint8.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```
+number_of_steps = 1 << (# of bits in T)
+range_adjust = number_of_steps / (number_of_steps - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = number_of_steps / range
+quantized = round(input * range_scale) - round(range_min * range_scale) +
+  numeric_limits<T>::min()
+quantized = max(quantized, numeric_limits<T>::min())
+quantized = min(quantized, numeric_limits<T>::max())
+```
+
+The biggest difference between this and MIN_COMBINED is that the minimum range
+is rounded first, before it's subtracted from the rounded value. With
+MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
+and dequantizing will introduce a larger and larger error.
+
+One thing to watch out for is that the operator may choose to adjust the
+requested minimum and maximum values slightly during the quantization process,
+so you should always use the output ports as the range for further calculations.
+For example, if the requested minimum and maximum values are close to equal,
+they will be separated by a small epsilon value to prevent ill-formed quantized
+buffers from being created. Otherwise, you can end up with buffers where all the
+quantized values map to the same float value, which causes problems for
+operations that have to perform further calculations on them.
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+output: The quantized data produced from the float input.
+output_min: The actual minimum scalar value used for the output.
+output_max: The actual maximum scalar value used for the output.
+
+)doc");
+
+REGISTER_OP("Dequantize")
+    .Input("input: T")
+    .Input("min_range: float")
+    .Input("max_range: float")
+    .Output("output: float")
+    .Attr("T: quantizedtype")
+    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dequantize the 'input' tensor into a float Tensor.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+if T == qint8, in[i] += (range(T) + 1)/ 2.0
+out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+If the input comes from a QuantizedRelu6, the output type is
+quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
+0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
+Dequantize on quint8 will take each value, cast to float, and multiply
+by 6 / 255.
+Note that if quantizedtype is qint8, the operation will additionally add
+each value by 128 prior to casting.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```
+number_of_steps = 1 << (# of bits in T)
+range_adjust = number_of_steps / (number_of_steps - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = range / number_of_steps
+const double offset_input = static_cast<double>(input) - lowest_quantized;
+result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
+```
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+
+)doc");
+
+REGISTER_OP("QuantizedConcat")
+    .Input("concat_dim: int32")
+    .Input("values: N * T")
+    .Input("input_mins: N * float32")
+    .Input("input_maxes: N * float32")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("N: int >= 2")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
+      ShapeHandle unused;
+      for (int i = std::max(0, c->num_inputs() - 2); i < c->num_inputs(); ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
+      }
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Concatenates quantized tensors along one dimension.
+
+concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+  range [0, rank(values)).
+values: The `N` Tensors to concatenate. Their ranks and types must match,
+  and their sizes must match in all dimensions except `concat_dim`.
+input_mins: The minimum scalar values for each of the input tensors.
+input_maxes: The maximum scalar values for each of the input tensors.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+output: A `Tensor` with the concatenation of values stacked along the
+  `concat_dim` dimension.  This tensor's shape matches that of `values` except
+  in `concat_dim` where it has the sum of the sizes.
+)doc");
+
 // Deprecated op registrations:

 // The following can be deleted after 10mar2017.
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@ -2058,6 +2058,106 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
 ```
 )doc");

+REGISTER_OP("QuantizedMatMul")
+    .Input("a: T1")
+    .Input("b: T2")
+    .Input("min_a: float")
+    .Input("max_a: float")
+    .Input("min_b: float")
+    .Input("max_b: float")
+    .Output("out: Toutput")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("Toutput: quantizedtype = DT_QINT32")
+    .Attr("transpose_a: bool = false")
+    .Attr("transpose_b: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Perform a quantized matrix multiplication of  `a` by the matrix `b`.
+
+The inputs must be two-dimensional matrices and the inner dimension of
+`a` (after being transposed if `transpose_a` is non-zero) must match the
+outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero).
+
+a: Must be a two-dimensional tensor.
+b: Must be a two-dimensional tensor.
+transpose_a: If true, `a` is transposed before multiplication.
+transpose_b: If true, `b` is transposed before multiplication.
+min_a: The float value that the lowest quantized `a` value represents.
+max_a: The float value that the highest quantized `a` value represents.
+min_b: The float value that the lowest quantized `b` value represents.
+max_b: The float value that the highest quantized `b` value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizeDownAndShrinkRange")
+    .Input("input: Tinput")
+    .Input("input_min: float")
+    .Input("input_max: float")
+    .Output("output: out_type")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Convert the quantized 'input' tensor into a lower-precision 'output', using the
+actual distribution of the values to maximize the usage of the lower bit depth
+and adjusting the output min and max ranges accordingly.
+
+[input_min, input_max] are scalar floats that specify the range for the float
+interpretation of the 'input' data. For example, if input_min is -1.0f and
+input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+
+This operator tries to squeeze as much precision as possible into an output with
+a lower bit depth by calculating the actual min and max values found in the
+data. For example, maybe that quint16 input has no values lower than 16,384 and
+none higher than 49,152. That means only half the range is actually needed, all
+the float interpretations are between -0.5f and 0.5f, so if we want to compress
+the data into a quint8 output, we can use that range rather than the theoretical
+-1.0f to 1.0f that is suggested by the input min and max.
+
+In practice, this is most useful for taking output from operations like
+QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+may have large potential output ranges, but in practice have a distribution of
+input values that only uses a small fraction of the possible range. By feeding
+that output into this operator, we can reduce it from 32 bits down to 8 with
+minimal loss of accuracy.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+out_type: The type of the output. Should be a lower bit depth than Tinput.
+
+)doc");
+
 // Deprecated ops:
 REGISTER_OP("BatchFFT")
    .Input("input: complex64")
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -1994,4 +1994,324 @@ overlapping: When set to True, it means when pooling, the values at the boundary
 output: 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
 )doc");

+REGISTER_OP("QuantizedAvgPool")
+    .Input("input: T")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Output("output: T")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("T: quantizedtype")
+    .Attr("ksize: list(int)")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Produces the average pool of the input tensor for quantized types.
+
+input: 4-D with shape `[batch, height, width, channels]`.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.  The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedBiasAdd")
+    .Input("input: T1")
+    .Input("bias: T2")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Input("min_bias: float")
+    .Input("max_bias: float")
+    .Output("output: out_type")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Adds Tensor 'bias' to Tensor 'input' for Quantized types.
+
+Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
+
+bias: A 1D bias Tensor with size matching the last dimension of 'input'.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_bias: The float value that the lowest quantized bias value represents.
+max_bias: The float value that the highest quantized bias value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedConv2D")
+    .Input("input: Tinput")
+    .Input("filter: Tfilter")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Input("min_filter: float")
+    .Input("max_filter: float")
+    .Output("output: out_type")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("Tfilter: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QINT32")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes a 2D convolution given quantized 4D input and filter tensors.
+The inputs are quantized tensors where the lowest value represents the real
+number of the associated minimum, and the highest represents the maximum.
+This means that you can only interpret the quantized output in the same way, by
+taking the returned minimum and maximum values into account.
+
+filter: filter's input_depth dimension must match input's depth dimensions.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_filter: The float value that the lowest quantized filter value represents.
+max_filter: The float value that the highest quantized filter value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedMaxPool")
+    .Input("input: T")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Output("output: T")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("T: quantizedtype")
+    .Attr("ksize: list(int)")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Produces the max pool of the input tensor for quantized types.
+
+input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor. The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedRelu")
+    .Input("features: Tinput")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear: `max(features, 0)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedRelu6")
+    .Input("features: Tinput")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedReluX")
+    .Input("features: Tinput")
+    .Input("max_value: float")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
+    .Input("t: Tinput")
+    .Input("t_min: float")
+    .Input("t_max: float")
+    .Input("m: Tinput")
+    .Input("m_min: float")
+    .Input("m_max: float")
+    .Input("v: Tinput")
+    .Input("v_min: float")
+    .Input("v_max: float")
+    .Input("beta: Tinput")
+    .Input("beta_min: float")
+    .Input("beta_max: float")
+    .Input("gamma: Tinput")
+    .Input("gamma_min: float")
+    .Input("gamma_max: float")
+    .Output("result: out_type")
+    .Output("result_min: float")
+    .Output("result_max: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .Attr("variance_epsilon: float")
+    .Attr("scale_after_normalization: bool")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+
+      DimensionHandle last_dim = c->Dim(input, 3);
+      for (int i = 1; i < 5; ++i) {  // covers m, v, beta, gamma
+        ShapeHandle vec;
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
+        TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
+      }
+
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
+      c->set_output(0, out);
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Quantized Batch normalization.
+
+This op is deprecated and will be removed in the future. Prefer
+`tf.nn.batch_normalization`.
+
+t: A 4D input Tensor.
+t_min: The value represented by the lowest quantized input.
+t_max: The value represented by the highest quantized input.
+m: A 1D mean Tensor with size matching the last dimension of t.
+  This is the first output from tf.nn.moments,
+  or a saved moving average thereof.
+m_min: The value represented by the lowest quantized mean.
+m_max: The value represented by the highest quantized mean.
+v: A 1D variance Tensor with size matching the last dimension of t.
+  This is the second output from tf.nn.moments,
+  or a saved moving average thereof.
+v_min: The value represented by the lowest quantized variance.
+v_max: The value represented by the highest quantized variance.
+beta: A 1D beta Tensor with size matching the last dimension of t.
+  An offset to be added to the normalized tensor.
+beta_min: The value represented by the lowest quantized offset.
+beta_max: The value represented by the highest quantized offset.
+gamma: A 1D gamma Tensor with size matching the last dimension of t.
+  If "scale_after_normalization" is true, this tensor will be multiplied
+  with the normalized tensor.
+gamma_min: The value represented by the lowest quantized gamma.
+gamma_max: The value represented by the highest quantized gamma.
+variance_epsilon: A small float number to avoid dividing by 0.
+scale_after_normalization: A bool indicating whether the resulted tensor
+  needs to be multiplied with gamma.
+)doc");
+
 }  // namespace tensorflow
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -2149,6 +2149,33 @@ py_binary(
    ],
 )

+# -----------------------------------------------------------------------------
+# Quantization
+
+py_test(
+    name = "dequantize_op_test",
+    size = "small",
+    srcs = ["ops/dequantize_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ops",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "quantized_conv_ops_test",
+    size = "small",
+    srcs = ["ops/quantized_conv_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ops",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 filegroup(
    name = "all_files",
    srcs = glob(
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@ -74,6 +74,9 @@ or join multiple tensors together.
@@boolean_mask
@@one_hot
@@sequence_mask
+@@dequantize
+@@quantize_v2
+@@quantized_concat

 """
 from __future__ import absolute_import
@ -2318,3 +2321,9 @@ def squeeze(input, squeeze_dims=None, name=None):
  if np.isscalar(squeeze_dims):
    squeeze_dims = [squeeze_dims]
  return gen_array_ops._squeeze(input, squeeze_dims, name)
+
+
+# TODO(cwhipkey): Verify and enable shape functions for these.
+ops.RegisterShape("QuantizeV2")(None)
+ops.RegisterShape("QuantizedBatchNormWithGlobalNormalization")(None)
+ops.RegisterShape("QuantizedConcat")(None)
--- a/tensorflow/contrib/quantization/python/dequantize_op_test.py
+++ b/tensorflow/contrib/quantization/python/dequantize_op_test.py
@ -21,24 +21,16 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf

-# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
-# with Bazel.
-# pylint: disable=g-bad-import-order
-from tensorflow.contrib.quantization import load_quantized_ops_so
-from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
-

 class DequantizeOpTest(tf.test.TestCase):

  def __init__(self, method_name="runTest"):
    super(DequantizeOpTest, self).__init__(method_name)
-    load_quantized_ops_so.Load()
-    load_quantized_kernels_so.Load()

  def _testDequantizeOp(self, inputs, min_range, max_range, dtype):
    with self.test_session():
      input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype)
-      dequantized = tf.contrib.quantization.dequantize(
+      dequantized = tf.dequantize(
          input_op, min_range, max_range)
      tf_ans = dequantized.eval()

--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@ -1954,3 +1954,6 @@ def reduced_shape(input_shape, axes):
       axes],                               # [1, 2]
      [input_shape,                         # [2, 3, 5, 7]
       array_ops.fill(axes_shape, 1)])      # [1, 1]
+
+
+ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@ -277,6 +277,12 @@ classes when using one of the sampled loss functions above.

@@compute_accidental_hits

+### Quantization ops
+
+@@quantized_relu_x
+@@quantized_max_pool
+@@quantized_avg_pool
+
 """
 from __future__ import absolute_import
 from __future__ import division
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@ -1925,4 +1925,14 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
                                              padding=padding,
                                              name=name))

+
+ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
+
 # pylint: enable=invalid-name
--- a/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py
+++ b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py
@ -21,19 +21,11 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf

-# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
-# with Bazel.
-# pylint: disable=g-bad-import-order
-from tensorflow.contrib.quantization import load_quantized_ops_so
-from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
-

 class Conv2DTest(tf.test.TestCase):

  def __init__(self, method_name="runTest"):
    super(Conv2DTest, self).__init__(method_name)
-    load_quantized_ops_so.Load()
-    load_quantized_kernels_so.Load()

  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
                    expected):
@ -67,16 +59,16 @@ class Conv2DTest(tf.test.TestCase):
    with self.test_session(use_gpu=False) as sess:
      t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8)
      t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8)
-      conv = tf.contrib.quantization.quantized_conv2d(t1,
-                                                      t2,
-                                                      out_type=tf.qint32,
-                                                      strides=[1, stride,
-                                                               stride, 1],
-                                                      padding=padding,
-                                                      min_input=x1_min,
-                                                      max_input=x1_max,
-                                                      min_filter=x2_min,
-                                                      max_filter=x2_max)
+      conv = tf.nn.quantized_conv2d(t1,
+                                    t2,
+                                    out_type=tf.qint32,
+                                    strides=[1, stride,
+                                             stride, 1],
+                                    padding=padding,
+                                    min_input=x1_min,
+                                    max_input=x1_max,
+                                    min_filter=x2_min,
+                                    max_filter=x2_max)
      value = sess.run(conv)
    quantized_output = value[0]
    output_min = value[1]
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -142,18 +142,21 @@ def if_not_mobile(a):
  })

 def tf_copts():
-  return (["-fno-exceptions", "-DEIGEN_AVOID_STL_ARRAY"] +
+  return (["-fno-exceptions",
+           "-DEIGEN_AVOID_STL_ARRAY",
+           "-Iexternal/gemmlowp",] +
          if_cuda(["-DGOOGLE_CUDA=1"]) +
          if_android_arm(["-mfpu=neon"]) +
-          select({"//tensorflow:android": [
-                    "-std=c++11",
-                    "-DMIN_LOG_LEVEL=0",
-                    "-DTF_LEAN_BINARY",
-                    "-O2",
-                  ],
-                  "//tensorflow:darwin": [],
-                  "//tensorflow:ios": ["-std=c++11",],
-                  "//conditions:default": ["-pthread"]}))
+          select({
+              "//tensorflow:android": [
+                  "-std=c++11",
+                  "-DMIN_LOG_LEVEL=0",
+                  "-DTF_LEAN_BINARY",
+                  "-O2",
+              ],
+              "//tensorflow:darwin": [],
+              "//tensorflow:ios": ["-std=c++11",],
+              "//conditions:default": ["-pthread"]}))

 def tf_opts_nortti_if_android():
  return if_android([
--- a/tensorflow/contrib/quantization/tools/BUILD
+++ b/tensorflow/contrib/quantization/tools/BUILD
@ -13,9 +13,6 @@ py_library(
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/quantization:ops",
-        "//tensorflow/contrib/quantization:quantized_ops_py",
-        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
        "//tensorflow/python:platform",
    ],
 )
@ -26,9 +23,6 @@ py_binary(
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/quantization:ops",
-        "//tensorflow/contrib/quantization:quantized_ops_py",
-        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
        "//tensorflow/python:platform",
    ],
 )
--- a/tensorflow/contrib/quantization/tools/graph_to_dot.py
+++ b/tensorflow/contrib/quantization/tools/graph_to_dot.py
--- a/tensorflow/contrib/quantization/tools/quantize_graph.py
+++ b/tensorflow/contrib/quantization/tools/quantize_graph.py
@ -15,8 +15,8 @@
 r"""Transforms a float-trained graph into an equivalent quantized version.

 An example of command-line usage is:
-bazel build tensorflow/contrib/quantization/tools:quantize_graph \
-&& bazel-bin/tensorflow/contrib/quantization/tools/quantize_graph \
+bazel build tensorflow/tools/quantization:quantize_graph \
+&& bazel-bin/tensorflow/tools/quantization/quantize_graph \
 --input=tensorflow_inception_graph.pb
 --output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
 --mode=eightbit --logtostderr
@ -35,12 +35,6 @@ import tensorflow as tf
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import tensor_util

-# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
-# with Bazel.
-# pylint: disable=g-bad-import-order
-from tensorflow.contrib.quantization import load_quantized_ops_so
-from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
-

 flags = tf.app.flags
 FLAGS = flags.FLAGS
@ -60,8 +54,6 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3",
                    """ graph loaded from a file.""")
 flags.DEFINE_boolean("strip_redundant_quantization", True,
                     """Removes redundant dequantize/quantize pairs.""")
-flags.DEFINE_boolean("load_quantization_so", True,
-                     """Explicitly load the quantization ops library""")


 def print_input_nodes(current_node, nodes_map, indent, already_visited):
@ -290,9 +282,6 @@ class GraphRewriter(object):
    self.nodes_map = self.create_nodes_map(input_graph)
    self.output_graph = None
    self.mode = mode
-    if FLAGS.load_quantization_so:
-      load_quantized_ops_so.Load()
-      load_quantized_kernels_so.Load()

  def create_nodes_map(self, graph):
    """Builds a mapping of node names to their defs from the graph."""
--- a/tensorflow/contrib/quantization/tools/quantize_graph_test.py
+++ b/tensorflow/contrib/quantization/tools/quantize_graph_test.py
@ -20,11 +20,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import numpy as np

+import numpy as np
 import tensorflow as tf
-from tensorflow.contrib.quantization.tools import quantize_graph
+
 from tensorflow.python.framework import graph_util
+from tensorflow.tools.quantization import quantize_graph

 flags = tf.app.flags
 FLAGS = flags.FLAGS