diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index e86fa076ef0..4cad55b3993 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -110,6 +110,10 @@ filegroup(
         "//tensorflow/contrib/metrics:all_files",
         "//tensorflow/contrib/metrics/kernels:all_files",
         "//tensorflow/contrib/opt:all_files",
+        "//tensorflow/contrib/quantization:all_files",
+        "//tensorflow/contrib/quantization/kernels:all_files",
+        "//tensorflow/contrib/quantization/kernels/hexagon:all_files",
+        "//tensorflow/contrib/quantization/tools:all_files",
         "//tensorflow/contrib/rnn:all_files",
         "//tensorflow/contrib/session_bundle:all_files",
         "//tensorflow/contrib/session_bundle/example:all_files",
@@ -130,7 +134,6 @@ filegroup(
         "//tensorflow/core/distributed_runtime:all_files",
         "//tensorflow/core/distributed_runtime/rpc:all_files",
         "//tensorflow/core/kernels:all_files",
-        "//tensorflow/core/kernels/hexagon:all_files",
         "//tensorflow/core/ops/compat:all_files",
         "//tensorflow/core/platform/cloud:all_files",
         "//tensorflow/core/platform/default/build_config:all_files",
@@ -178,7 +181,6 @@ filegroup(
         "//tensorflow/tools/docs:all_files",
         "//tensorflow/tools/git:all_files",
         "//tensorflow/tools/proto_text:all_files",
-        "//tensorflow/tools/quantization:all_files",
         "//tensorflow/tools/test:all_files",
         "//tensorflow/user_ops:all_files",
         "//third_party/hadoop:all_files",
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 1aa0b9e0123..a5d3cb49fff 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -73,7 +73,6 @@ HOST_INCLUDES := \
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
--I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(HOST_GENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
 	HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@@ -147,7 +146,6 @@ INCLUDES := \
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
--I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
 ifeq ($(HAS_GEN_HOST_PROTOC),true)
@@ -240,7 +238,6 @@ ifeq ($(TARGET),ANDROID)
 -I. \
 -I$(MAKEFILE_DIR)/downloads/ \
 -I$(MAKEFILE_DIR)/downloads/eigen \
--I$(MAKEFILE_DIR)/downloads/gemmlowp \
 -I$(MAKEFILE_DIR)/gen/protobuf/include \
 -I$(PROTOGENDIR) \
 -I$(PBTGENDIR)
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 3d02e19e0a8..39899192e8b 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -130,17 +130,6 @@ tensorflow/core/kernels/batch_norm_op.cc
 tensorflow/core/kernels/avgpooling_op.cc
 tensorflow/core/kernels/argmax_op.cc
 tensorflow/core/kernels/aggregate_ops.cc
-tensorflow/core/kernels/dequantize_op.cc
-tensorflow/core/kernels/quantization_utils.cc
-tensorflow/core/kernels/quantize_down_and_shrink_range.cc
-tensorflow/core/kernels/quantize_op.cc
-tensorflow/core/kernels/quantized_activation_ops.cc
-tensorflow/core/kernels/quantized_batch_norm_op.cc
-tensorflow/core/kernels/quantized_bias_add_op.cc
-tensorflow/core/kernels/quantized_concat_op.cc
-tensorflow/core/kernels/quantized_conv_ops.cc
-tensorflow/core/kernels/quantized_matmul_op.cc
-tensorflow/core/kernels/quantized_pooling_ops.cc
 tensorflow/core/ops/training_ops.cc
 tensorflow/core/ops/string_ops.cc
 tensorflow/core/ops/state_ops.cc
diff --git a/tensorflow/contrib/quantization/BUILD b/tensorflow/contrib/quantization/BUILD
index 5347b32bdb4..881349fda7e 100644
--- a/tensorflow/contrib/quantization/BUILD
+++ b/tensorflow/contrib/quantization/BUILD
@@ -13,6 +13,53 @@ load(
     "tf_custom_op_library",
 )
 
+cc_library(
+    name = "cc_array_ops",
+    srcs = ["ops/array_ops.cc"],
+    linkstatic = 1,
+    deps = [
+        "//tensorflow/core:framework",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "cc_math_ops",
+    srcs = ["ops/math_ops.cc"],
+    linkstatic = 1,
+    deps = [
+        "//tensorflow/core:framework",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "cc_nn_ops",
+    srcs = ["ops/nn_ops.cc"],
+    linkstatic = 1,
+    deps = [
+        "//tensorflow/core:framework",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "cc_ops",
+    linkstatic = 1,
+    deps = [
+        ":cc_array_ops",
+        ":cc_math_ops",
+        ":cc_nn_ops",
+    ],
+    alwayslink = 1,
+)
+
+filegroup(
+    name = "android_ops",
+    srcs = glob(["ops/*.cc"]),
+    visibility = ["//visibility:public"],
+)
+
 py_library(
     name = "quantization_py",
     srcs = [
@@ -22,6 +69,8 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":ops",
+        "//tensorflow/contrib/quantization:quantized_ops_py",
+        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
     ],
 )
 
@@ -34,9 +83,52 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:nn_ops",
+        ":array_ops",
+        ":math_ops",
+        ":nn_ops",
+    ],
+)
+
+tf_gen_op_wrapper_py(
+    name = "array_ops",
+    deps = ["//tensorflow/contrib/quantization:cc_array_ops"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "math_ops",
+    deps = ["//tensorflow/contrib/quantization:cc_math_ops"],
+)
+
+tf_gen_op_wrapper_py(
+    name = "nn_ops",
+    deps = ["//tensorflow/contrib/quantization:cc_nn_ops"],
+)
+
+py_test(
+    name = "dequantize_op_test",
+    size = "small",
+    srcs = ["python/dequantize_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ops",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/quantization:quantized_ops_py",
+        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
+py_test(
+    name = "quantized_conv_ops_test",
+    size = "small",
+    srcs = ["python/quantized_conv_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ops",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/quantization:quantized_ops_py",
+        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
+        "//tensorflow/python:framework_test_lib",
     ],
 )
 
@@ -47,6 +139,24 @@ filegroup(
     ]),
 )
 
+tf_custom_op_library(
+    name = "_quantized_ops.so",
+    srcs = [
+        "ops/array_ops.cc",
+        "ops/math_ops.cc",
+        "ops/nn_ops.cc",
+    ],
+    deps = [
+    ],
+)
+
+py_library(
+    name = "quantized_ops_py",
+    srcs = ["load_quantized_ops_so.py"],
+    data = ["_quantized_ops.so"],
+    srcs_version = "PY2AND3",
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/quantization/Makefile.in b/tensorflow/contrib/quantization/Makefile.in
new file mode 100644
index 00000000000..563639e5d75
--- /dev/null
+++ b/tensorflow/contrib/quantization/Makefile.in
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# This sub Makefile compiles libraries under this directory. This is designed to
+# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile.
+# You can build targets in this file by including this sub makefile like:
+# $ make -f tensorflow/contrib/makefile/Makefile TARGET=<target> \
+# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \
+# (optional: NDK_ROOT=<ndk_root>) contrib_quantization_tests
+# TODO(satok): Support more targets
+
+GTEST_DIR := \
+$(MAKEFILE_DIR)/downloads/googletest/googletest
+
+GTEST_HEADERS = \
+$(wildcard $(GTEST_DIR)/include/gtest/*.h) \
+$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h)
+
+GTEST_SRCS := \
+$(wildcard $(GTEST_DIR)/src/*.cc) \
+$(wildcard $(GTEST_DIR)/src/*.h) \
+$(GTEST_HEADERS)
+
+QUANTIZATION_TEST_SRCS := \
+tensorflow/contrib/quantization/ops/math_ops.cc \
+tensorflow/contrib/quantization/kernels/quantize_op.cc \
+tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \
+tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \
+tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \
+tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \
+tensorflow/contrib/makefile/test/test_main.cc
+
+QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o))
+
+QUANTIZATION_TEST_NAME := contrib_quantization_tests
+QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME)
+
+INCLUDES += \
+-I$(MAKEFILE_DIR)/downloads/gemmlowp \
+-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include
+
+QUANTIZATION_TEST_INCLUDES := $(INCLUDES)
+
+$(OBJDIR)gtest-all.o : $(GTEST_SRCS)
+	$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \
+	$(GTEST_DIR)/src/gtest-all.cc -o $@
+
+$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o
+	$(AR) $(ARFLAGS) $@ $^
+
+$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \
+	-o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \
+	$(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS)
+
+$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH)
diff --git a/tensorflow/contrib/quantization/__init__.py b/tensorflow/contrib/quantization/__init__.py
index dcb73399b31..833dd20b5a3 100644
--- a/tensorflow/contrib/quantization/__init__.py
+++ b/tensorflow/contrib/quantization/__init__.py
@@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_
 from tensorflow.contrib.quantization.python.math_ops import *
 from tensorflow.contrib.quantization.python.nn_ops import *
 
-from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
-from tensorflow.python.ops.gen_array_ops import dequantize
-from tensorflow.python.ops.gen_array_ops import quantize_v2
-from tensorflow.python.ops.gen_array_ops import quantized_concat
+from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
+from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
+from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
+from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD
new file mode 100644
index 00000000000..6be2ccaa07f
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/BUILD
@@ -0,0 +1,311 @@
+# Description:
+#   quantization-specific OpKernels
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["-parse_headers"],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_cc_test",
+    "tf_custom_op_library",
+    "tf_kernel_library",
+)
+
+filegroup(
+    name = "android_ops",
+    srcs = [
+        "dequantize_op.cc",
+        "quantization_utils.cc",
+        "quantization_utils.h",
+        "quantize_down_and_shrink_range.cc",
+        "quantize_op.cc",
+        "quantized_activation_ops.cc",
+        "quantized_batch_norm_op.cc",
+        "quantized_bias_add_op.cc",
+        "quantized_concat_op.cc",
+        "quantized_conv_ops.cc",
+        "quantized_matmul_op.cc",
+        "quantized_pooling_ops.cc",
+        "reference_gemm.h",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+tf_kernel_library(
+    name = "quantized_ops",
+    srcs = [
+        "dequantize_op.cc",
+        "quantization_utils.cc",
+        "quantize_down_and_shrink_range.cc",
+        "quantize_op.cc",
+        "quantized_activation_ops.cc",
+        "quantized_batch_norm_op.cc",
+        "quantized_bias_add_op.cc",
+        "quantized_concat_op.cc",
+        "quantized_conv_ops.cc",
+        "quantized_matmul_op.cc",
+        "quantized_pooling_ops.cc",
+    ],
+    hdrs = [
+        "quantization_utils.h",
+        "reference_gemm.h",
+    ],
+    deps = [
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/kernels:concat_lib_hdrs",
+        "//tensorflow/core/kernels:conv_ops",
+        "//tensorflow/core/kernels:eigen_helpers",
+        "//tensorflow/core/kernels:ops_util",
+        "//tensorflow/core/kernels:pooling_ops",
+        "//third_party/eigen3",
+        "@gemmlowp//:eight_bit_int_gemm",
+    ],
+)
+
+tf_custom_op_library(
+    name = "_quantized_kernels.so",
+    srcs = [
+        "dequantize_op.cc",
+        "quantization_utils.cc",
+        "quantization_utils.h",
+        "quantize_down_and_shrink_range.cc",
+        "quantize_op.cc",
+        "quantized_activation_ops.cc",
+        "quantized_batch_norm_op.cc",
+        "quantized_bias_add_op.cc",
+        "quantized_concat_op.cc",
+        "quantized_conv_ops.cc",
+        "quantized_matmul_op.cc",
+        "quantized_pooling_ops.cc",
+        "reference_gemm.h",
+    ],
+    deps = [
+        "//tensorflow/core/kernels:concat_lib_hdrs",
+        "//tensorflow/core/kernels:ops_util_hdrs",
+        "//tensorflow/core/kernels:pooling_ops_hdrs",
+        "@gemmlowp//:eight_bit_int_gemm",
+    ],
+)
+
+py_library(
+    name = "quantized_kernels_py",
+    srcs = ["load_quantized_kernels_so.py"],
+    data = ["_quantized_kernels.so"],
+    srcs_version = "PY2AND3",
+)
+
+tf_cc_test(
+    name = "quantize_down_and_shrink_range_op_test",
+    size = "small",
+    srcs = ["quantize_down_and_shrink_range_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantization_utils_test",
+    srcs = ["quantization_utils_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//third_party/eigen3",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_activation_ops_test",
+    srcs = ["quantized_activation_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_bias_add_op_test",
+    size = "small",
+    srcs = ["quantized_bias_add_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_conv_ops_test",
+    size = "small",
+    srcs = ["quantized_conv_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantize_op_test",
+    size = "small",
+    srcs = ["quantize_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_matmul_op_test",
+    size = "small",
+    srcs = ["quantized_matmul_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_pooling_ops_test",
+    size = "small",
+    srcs = ["quantized_pooling_ops_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_concat_op_test",
+    size = "small",
+    srcs = ["quantized_concat_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//tensorflow/core/kernels:ops_util",
+    ],
+)
+
+tf_cc_test(
+    name = "quantized_batch_norm_op_test",
+    size = "small",
+    srcs = ["quantized_batch_norm_op_test.cc"],
+    deps = [
+        ":quantized_ops",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels:batch_norm_op",
+        "//tensorflow/core/kernels:ops_testutil",
+        "//third_party/eigen3",
+    ],
+)
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/contrib/quantization/kernels/dequantize_op.cc
similarity index 98%
rename from tensorflow/core/kernels/dequantize_op.cc
rename to tensorflow/contrib/quantization/kernels/dequantize_op.cc
index 375287000eb..a088954fc21 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/contrib/quantization/kernels/dequantize_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
similarity index 79%
rename from tensorflow/core/kernels/hexagon/BUILD
rename to tensorflow/contrib/quantization/kernels/hexagon/BUILD
index c31b5c06d7d..b57a2ac1b59 100644
--- a/tensorflow/core/kernels/hexagon/BUILD
+++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
@@ -30,16 +30,16 @@ tf_cc_test(
     size = "small",
     srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
     deps = [
-        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/contrib/quantization:cc_array_ops",
+        "//tensorflow/contrib/quantization:cc_math_ops",
+        "//tensorflow/contrib/quantization:cc_nn_ops",
+        "//tensorflow/contrib/quantization/kernels:quantized_ops",
         "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
         "//tensorflow/core/kernels:ops_testutil",
         "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:quantized_ops",
     ],
 )
diff --git a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
similarity index 98%
rename from tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
rename to tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
index 7b3fdd19a54..3d139fbe0a0 100644
--- a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
@@ -14,8 +14,7 @@ limitations under the License.
 ==============================================================================*/
 // Tests in this file are designed to evaluate hexagon DSP operations.
 
-#define EIGEN_USE_THREADS
-
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -27,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
 #include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 
diff --git a/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py
new file mode 100644
index 00000000000..3b7fd57a93b
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py
@@ -0,0 +1,48 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ops for quantized evaluation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+
+import tensorflow as tf
+
+QUANTIZED_KERNELS_FILE = '_quantized_kernels.so'
+
+_quantized_kernels = None
+_kernels_lock = threading.Lock()
+
+
+# Workaround for the fact that importing tensorflow imports contrib
+# (even if a user isn't using this or any other contrib op), but
+# there's not yet any guarantee that the shared object exists.
+# In which case, "import tensorflow" will always crash, even for users that
+# never use contrib.
+def Load(library_base_dir=''):
+  """Load the quantized ops library and return the loaded module."""
+  with _kernels_lock:
+    global _quantized_kernels
+    if not _quantized_kernels:
+      data_files_path = os.path.join(library_base_dir,
+                                     tf.resource_loader.get_data_files_path())
+      tf.logging.info('data path: %s', data_files_path)
+      _quantized_kernels = tf.load_op_library(os.path.join(
+          data_files_path, QUANTIZED_KERNELS_FILE))
+
+      assert _quantized_kernels, 'Could not load _quantized_kernels.so'
+  return _quantized_kernels
diff --git a/tensorflow/core/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
similarity index 96%
rename from tensorflow/core/kernels/quantization_utils.cc
rename to tensorflow/contrib/quantization/kernels/quantization_utils.cc
index 6f36c0d4820..72651f96b0c 100644
--- a/tensorflow/core/kernels/quantization_utils.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h
similarity index 98%
rename from tensorflow/core/kernels/quantization_utils.h
rename to tensorflow/contrib/quantization/kernels/quantization_utils.h
index a098179034b..3b6a4901ba0 100644
--- a/tensorflow/core/kernels/quantization_utils.h
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
 
 #define EIGEN_USE_THREADS
 
@@ -552,4 +552,4 @@ class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase {
 
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantization_utils_test.cc
rename to tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
index 55b5193ce14..d62610b2ca6 100644
--- a/tensorflow/core/kernels/quantization_utils_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <limits>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
similarity index 98%
rename from tensorflow/core/kernels/quantize_down_and_shrink_range.cc
rename to tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
index aef5f0b6a35..18dffd1dc6a 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include <math.h>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc
similarity index 100%
rename from tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/contrib/quantization/kernels/quantize_op.cc
similarity index 98%
rename from tensorflow/core/kernels/quantize_op.cc
rename to tensorflow/contrib/quantization/kernels/quantize_op.cc
index 003654c1b0f..2bab8ad4476 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantize_op.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/type_traits.h"
diff --git a/tensorflow/core/kernels/quantize_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_op_test.cc
similarity index 100%
rename from tensorflow/core/kernels/quantize_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantize_op_test.cc
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc
similarity index 98%
rename from tensorflow/core/kernels/quantized_activation_ops.cc
rename to tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc
index ea1cf15f7bb..a86b611ad68 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc
@@ -16,7 +16,7 @@ limitations under the License.
 // Implements a quantized version of the Relu6 operation.
 #define EIGEN_USE_THREADS
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/quantized_activation_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc
similarity index 98%
rename from tensorflow/core/kernels/quantized_activation_ops_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc
index 38c7d4ffef8..19efe6093ed 100644
--- a/tensorflow/core/kernels/quantized_activation_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_batch_norm_op.cc
rename to tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc
index 18d83b41494..2a684824d37 100644
--- a/tensorflow/core/kernels/quantized_batch_norm_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_batch_norm_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc
index 9880d972cde..ccb6a59ecfb 100644
--- a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
similarity index 98%
rename from tensorflow/core/kernels/quantized_bias_add_op.cc
rename to tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
index 0b34bfcad83..c319eb97daa 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 // Implements a quantized eight-bit version of the bias addition operation.
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
diff --git a/tensorflow/core/kernels/quantized_bias_add_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_bias_add_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc
index 3fd0eaa9814..56535029b50 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include <functional>
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_concat_op.cc
rename to tensorflow/contrib/quantization/kernels/quantized_concat_op.cc
index f929dd61cbc..abe8c9138d5 100644
--- a/tensorflow/core/kernels/quantized_concat_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
diff --git a/tensorflow/core/kernels/quantized_concat_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_concat_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc
index dc1439279e5..1301259fddf 100644
--- a/tensorflow/core/kernels/quantized_concat_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_conv_ops.cc
rename to tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
index fb69d770c0b..b25bff45a11 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include <vector>
 
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/ops_util.h"
diff --git a/tensorflow/core/kernels/quantized_conv_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_conv_ops_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc
index 01e55f85939..6a07004a92c 100644
--- a/tensorflow/core/kernels/quantized_conv_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
similarity index 98%
rename from tensorflow/core/kernels/quantized_matmul_op.cc
rename to tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
index 0ce9e376423..18de2d1d97f 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc
@@ -16,8 +16,8 @@ limitations under the License.
 // Implements a quantized eight-bit version of the matmul operation.
 
 #include "public/gemmlowp.h"
-#include "tensorflow/core/kernels/quantization_utils.h"
-#include "tensorflow/core/kernels/reference_gemm.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
diff --git a/tensorflow/core/kernels/quantized_matmul_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc
similarity index 99%
rename from tensorflow/core/kernels/quantized_matmul_op_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc
index e82464d4e72..3eea7518186 100644
--- a/tensorflow/core/kernels/quantized_matmul_op_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include <memory>
 #include <vector>
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
similarity index 100%
rename from tensorflow/core/kernels/quantized_pooling_ops.cc
rename to tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc
diff --git a/tensorflow/core/kernels/quantized_pooling_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc
similarity index 98%
rename from tensorflow/core/kernels/quantized_pooling_ops_test.cc
rename to tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc
index d3247d15d6c..3bc05ed455a 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops_test.cc
+++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/kernels/quantization_utils.h"
+#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/graph.pb.h"
diff --git a/tensorflow/core/kernels/reference_gemm.h b/tensorflow/contrib/quantization/kernels/reference_gemm.h
similarity index 92%
rename from tensorflow/core/kernels/reference_gemm.h
rename to tensorflow/contrib/quantization/kernels/reference_gemm.h
index 5e4cde07d76..5af3a771282 100644
--- a/tensorflow/core/kernels/reference_gemm.h
+++ b/tensorflow/contrib/quantization/kernels/reference_gemm.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
 
 // This is an unoptimized but debuggable implementation of the GEMM matrix
 // multiply function, used to compare to faster but more opaque versions, or
@@ -87,4 +87,4 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c,
 }
 }  // namespace tensorflow
 
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
diff --git a/tensorflow/contrib/quantization/load_quantized_ops_so.py b/tensorflow/contrib/quantization/load_quantized_ops_so.py
new file mode 100644
index 00000000000..6eb424e534e
--- /dev/null
+++ b/tensorflow/contrib/quantization/load_quantized_ops_so.py
@@ -0,0 +1,48 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Ops for quantized evaluation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+
+import tensorflow as tf
+
+QUANTIZED_OPS_FILE = '_quantized_ops.so'
+
+_quantized_ops = None
+_ops_lock = threading.Lock()
+
+
+# Workaround for the fact that importing tensorflow imports contrib
+# (even if a user isn't using this or any other contrib op), but
+# there's not yet any guarantee that the shared object exists.
+# In which case, "import tensorflow" will always crash, even for users that
+# never use contrib.
+def Load(library_base_dir=''):
+  """Load the quantized ops library and return the loaded module."""
+  with _ops_lock:
+    global _quantized_ops
+    if not _quantized_ops:
+      data_files_path = os.path.join(library_base_dir,
+                                     tf.resource_loader.get_data_files_path())
+      tf.logging.info('q:data path: %s', data_files_path)
+      _quantized_ops = tf.load_op_library(os.path.join(
+          data_files_path, QUANTIZED_OPS_FILE))
+
+      assert _quantized_ops, 'Could not load quantized_ops.so'
+  return _quantized_ops
diff --git a/tensorflow/contrib/quantization/ops/array_ops.cc b/tensorflow/contrib/quantization/ops/array_ops.cc
new file mode 100644
index 00000000000..ff636c79578
--- /dev/null
+++ b/tensorflow/contrib/quantization/ops/array_ops.cc
@@ -0,0 +1,195 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("QuantizeV2")
+    .Input("input: float")
+    .Input("min_range: float")
+    .Input("max_range: float")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("T: quantizedtype")
+    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
+if T == qint8, out[i] -= (range(T) + 1) / 2.0
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+Assume the input is type float and has a possible range of [0.0, 6.0] and the
+output type is quint8 ([0, 255]). The min_range and max_range values should be
+specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
+value of the input by 255/6 and cast to quint8.
+
+If the output type was qint8 ([-128, 127]), the operation will additionally
+subtract each value by 128 prior to casting, so that the range of values aligns
+with the range of qint8.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```
+number_of_steps = 1 << (# of bits in T)
+range_adjust = number_of_steps / (number_of_steps - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = number_of_steps / range
+quantized = round(input * range_scale) - round(range_min * range_scale) +
+  numeric_limits<T>::min()
+quantized = max(quantized, numeric_limits<T>::min())
+quantized = min(quantized, numeric_limits<T>::max())
+```
+
+The biggest difference between this and MIN_COMBINED is that the minimum range
+is rounded first, before it's subtracted from the rounded value. With
+MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
+and dequantizing will introduce a larger and larger error.
+
+One thing to watch out for is that the operator may choose to adjust the
+requested minimum and maximum values slightly during the quantization process,
+so you should always use the output ports as the range for further calculations.
+For example, if the requested minimum and maximum values are close to equal,
+they will be separated by a small epsilon value to prevent ill-formed quantized
+buffers from being created. Otherwise, you can end up with buffers where all the
+quantized values map to the same float value, which causes problems for
+operations that have to perform further calculations on them.
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+output: The quantized data produced from the float input.
+output_min: The actual minimum scalar value used for the output.
+output_max: The actual maximum scalar value used for the output.
+
+)doc");
+
+REGISTER_OP("Dequantize")
+    .Input("input: T")
+    .Input("min_range: float")
+    .Input("max_range: float")
+    .Output("output: float")
+    .Attr("T: quantizedtype")
+    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Dequantize the 'input' tensor into a float Tensor.
+
+[min_range, max_range] are scalar floats that specify the range for
+the 'input' data. The 'mode' attribute controls exactly which calculations are
+used to convert the float values to their quantized equivalents.
+
+In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+
+```
+if T == qint8, in[i] += (range(T) + 1)/ 2.0
+out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
+```
+here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+
+*MIN_COMBINED Mode Example*
+
+If the input comes from a QuantizedRelu6, the output type is
+quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
+0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
+Dequantize on quint8 will take each value, cast to float, and multiply
+by 6 / 255.
+Note that if quantizedtype is qint8, the operation will additionally add
+each value by 128 prior to casting.
+
+If the mode is 'MIN_FIRST', then this approach is used:
+
+```
+number_of_steps = 1 << (# of bits in T)
+range_adjust = number_of_steps / (number_of_steps - 1)
+range = (range_max - range_min) * range_adjust
+range_scale = range / number_of_steps
+const double offset_input = static_cast<double>(input) - lowest_quantized;
+result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
+```
+
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+
+)doc");
+
+REGISTER_OP("QuantizedConcat")
+    .Input("concat_dim: int32")
+    .Input("values: N * T")
+    .Input("input_mins: N * float32")
+    .Input("input_maxes: N * float32")
+    .Output("output: T")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("N: int >= 2")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
+      ShapeHandle unused;
+      for (int i = 2; i < c->num_inputs(); ++i) {
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
+      }
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Concatenates quantized tensors along one dimension.
+
+concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+  range [0, rank(values)).
+values: The `N` Tensors to concatenate. Their ranks and types must match,
+  and their sizes must match in all dimensions except `concat_dim`.
+input_mins: The minimum scalar values for each of the input tensors.
+input_maxes: The maximum scalar values for each of the input tensors.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+output: A `Tensor` with the concatenation of values stacked along the
+  `concat_dim` dimension.  This tensor's shape matches that of `values` except
+  in `concat_dim` where it has the sum of the sizes.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/ops/math_ops.cc b/tensorflow/contrib/quantization/ops/math_ops.cc
new file mode 100644
index 00000000000..93bb2836303
--- /dev/null
+++ b/tensorflow/contrib/quantization/ops/math_ops.cc
@@ -0,0 +1,126 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("QuantizedMatMul")
+    .Input("a: T1")
+    .Input("b: T2")
+    .Input("min_a: float")
+    .Input("max_a: float")
+    .Input("min_b: float")
+    .Input("max_b: float")
+    .Output("out: Toutput")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("Toutput: quantizedtype = DT_QINT32")
+    .Attr("transpose_a: bool = false")
+    .Attr("transpose_b: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Perform a quantized matrix multiplication of  `a` by the matrix `b`.
+
+The inputs must be two-dimensional matrices and the inner dimension of
+`a` (after being transposed if `transpose_a` is non-zero) must match the
+outer dimension of `b` (after being transposed if `transposed_b` is
+non-zero).
+
+a: Must be a two-dimensional tensor.
+b: Must be a two-dimensional tensor.
+transpose_a: If true, `a` is transposed before multiplication.
+transpose_b: If true, `b` is transposed before multiplication.
+min_a: The float value that the lowest quantized `a` value represents.
+max_a: The float value that the highest quantized `a` value represents.
+min_b: The float value that the lowest quantized `b` value represents.
+max_b: The float value that the highest quantized `b` value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizeDownAndShrinkRange")
+    .Input("input: Tinput")
+    .Input("input_min: float")
+    .Input("input_max: float")
+    .Output("output: out_type")
+    .Output("output_min: float")
+    .Output("output_max: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Convert the quantized 'input' tensor into a lower-precision 'output', using the
+actual distribution of the values to maximize the usage of the lower bit depth
+and adjusting the output min and max ranges accordingly.
+
+[input_min, input_max] are scalar floats that specify the range for the float
+interpretation of the 'input' data. For example, if input_min is -1.0f and
+input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+
+This operator tries to squeeze as much precision as possible into an output with
+a lower bit depth by calculating the actual min and max values found in the
+data. For example, maybe that quint16 input has no values lower than 16,384 and
+none higher than 49,152. That means only half the range is actually needed, all
+the float interpretations are between -0.5f and 0.5f, so if we want to compress
+the data into a quint8 output, we can use that range rather than the theoretical
+-1.0f to 1.0f that is suggested by the input min and max.
+
+In practice, this is most useful for taking output from operations like
+QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+may have large potential output ranges, but in practice have a distribution of
+input values that only uses a small fraction of the possible range. By feeding
+that output into this operator, we can reduce it from 32 bits down to 8 with
+minimal loss of accuracy.
+
+input_min: The float value that the minimum quantized input value represents.
+input_max: The float value that the maximum quantized input value represents.
+Tinput: The type of the input.
+output_min: The float value that the minimum quantized output value represents.
+output_max: The float value that the maximum quantized output value represents.
+out_type: The type of the output. Should be a lower bit depth than Tinput.
+
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/ops/nn_ops.cc b/tensorflow/contrib/quantization/ops/nn_ops.cc
new file mode 100644
index 00000000000..720377043de
--- /dev/null
+++ b/tensorflow/contrib/quantization/ops/nn_ops.cc
@@ -0,0 +1,348 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/util/padding.h"
+
+namespace tensorflow {
+
+using shape_inference::DimensionHandle;
+using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
+
+REGISTER_OP("QuantizedAvgPool")
+    .Input("input: T")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Output("output: T")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("T: quantizedtype")
+    .Attr("ksize: list(int)")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Produces the average pool of the input tensor for quantized types.
+
+input: 4-D with shape `[batch, height, width, channels]`.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.  The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedBiasAdd")
+    .Input("input: T1")
+    .Input("bias: T2")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Input("min_bias: float")
+    .Input("max_bias: float")
+    .Output("output: out_type")
+    .Output("min_out: float")
+    .Output("max_out: float")
+    .Attr("T1: quantizedtype")
+    .Attr("T2: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Adds Tensor 'bias' to Tensor 'input' for Quantized types.
+
+Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
+
+bias: A 1D bias Tensor with size matching the last dimension of 'input'.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_bias: The float value that the lowest quantized bias value represents.
+max_bias: The float value that the highest quantized bias value represents.
+min_out: The float value that the lowest quantized output value represents.
+max_out: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedConv2D")
+    .Input("input: Tinput")
+    .Input("filter: Tfilter")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Input("min_filter: float")
+    .Input("max_filter: float")
+    .Output("output: out_type")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("Tfilter: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QINT32")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes a 2D convolution given quantized 4D input and filter tensors.
+The inputs are quantized tensors where the lowest value represents the real
+number of the associated minimum, and the highest represents the maximum.
+This means that you can only interpret the quantized output in the same way, by
+taking the returned minimum and maximum values into account.
+
+filter: filter's input_depth dimension must match input's depth dimensions.
+strides: The stride of the sliding window for each dimension of the input
+  tensor.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_filter: The float value that the lowest quantized filter value represents.
+max_filter: The float value that the highest quantized filter value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedMaxPool")
+    .Input("input: T")
+    .Input("min_input: float")
+    .Input("max_input: float")
+    .Output("output: T")
+    .Output("min_output: float")
+    .Output("max_output: float")
+    .Attr("T: quantizedtype")
+    .Attr("ksize: list(int)")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Produces the max pool of the input tensor for quantized types.
+
+input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+ksize: The size of the window for each dimension of the input tensor.
+  The length must be 4 to match the number of dimensions of the input.
+strides: The stride of the sliding window for each dimension of the input
+  tensor. The length must be 4 to match the number of dimensions of the input.
+padding: The type of padding algorithm to use.
+min_input: The float value that the lowest quantized input value represents.
+max_input: The float value that the highest quantized input value represents.
+min_output: The float value that the lowest quantized output value represents.
+max_output: The float value that the highest quantized output value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedRelu")
+    .Input("features: Tinput")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear: `max(features, 0)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedRelu6")
+    .Input("features: Tinput")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedReluX")
+    .Input("features: Tinput")
+    .Input("max_value: float")
+    .Input("min_features: float")
+    .Input("max_features: float")
+    .Output("activations: out_type")
+    .Output("min_activations: float")
+    .Output("max_activations: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype = DT_QUINT8")
+    .SetShapeFn([](InferenceContext* c) {
+      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
+
+activations: Has the same output shape as "features".
+min_features: The float value that the lowest quantized value represents.
+max_features: The float value that the highest quantized value represents.
+min_activations: The float value that the lowest quantized value represents.
+max_activations: The float value that the highest quantized value represents.
+
+)doc");
+
+REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
+    .Input("t: Tinput")
+    .Input("t_min: float")
+    .Input("t_max: float")
+    .Input("m: Tinput")
+    .Input("m_min: float")
+    .Input("m_max: float")
+    .Input("v: Tinput")
+    .Input("v_min: float")
+    .Input("v_max: float")
+    .Input("beta: Tinput")
+    .Input("beta_min: float")
+    .Input("beta_max: float")
+    .Input("gamma: Tinput")
+    .Input("gamma_min: float")
+    .Input("gamma_max: float")
+    .Output("result: out_type")
+    .Output("result_min: float")
+    .Output("result_max: float")
+    .Attr("Tinput: quantizedtype")
+    .Attr("out_type: quantizedtype")
+    .Attr("variance_epsilon: float")
+    .Attr("scale_after_normalization: bool")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle input;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
+
+      DimensionHandle last_dim = c->Dim(input, 3);
+      for (int i = 1; i < 5; ++i) {  // covers m, v, beta, gamma
+        ShapeHandle vec;
+        TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
+        TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
+      }
+
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
+      c->set_output(0, out);
+      c->set_output(1, c->Scalar());
+      c->set_output(2, c->Scalar());
+
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Quantized Batch normalization.
+
+This op is deprecated and will be removed in the future. Prefer
+`tf.nn.batch_normalization`.
+
+t: A 4D input Tensor.
+t_min: The value represented by the lowest quantized input.
+t_max: The value represented by the highest quantized input.
+m: A 1D mean Tensor with size matching the last dimension of t.
+  This is the first output from tf.nn.moments,
+  or a saved moving average thereof.
+m_min: The value represented by the lowest quantized mean.
+m_max: The value represented by the highest quantized mean.
+v: A 1D variance Tensor with size matching the last dimension of t.
+  This is the second output from tf.nn.moments,
+  or a saved moving average thereof.
+v_min: The value represented by the lowest quantized variance.
+v_max: The value represented by the highest quantized variance.
+beta: A 1D beta Tensor with size matching the last dimension of t.
+  An offset to be added to the normalized tensor.
+beta_min: The value represented by the lowest quantized offset.
+beta_max: The value represented by the highest quantized offset.
+gamma: A 1D gamma Tensor with size matching the last dimension of t.
+  If "scale_after_normalization" is true, this tensor will be multiplied
+  with the normalized tensor.
+gamma_min: The value represented by the lowest quantized gamma.
+gamma_max: The value represented by the highest quantized gamma.
+variance_epsilon: A small float number to avoid dividing by 0.
+scale_after_normalization: A bool indicating whether the resulted tensor
+  needs to be multiplied with gamma.
+)doc");
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/python/array_ops.py b/tensorflow/contrib/quantization/python/array_ops.py
index b873d4df144..2ab65e903e9 100644
--- a/tensorflow/contrib/quantization/python/array_ops.py
+++ b/tensorflow/contrib/quantization/python/array_ops.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import
-from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
-from tensorflow.python.ops.gen_array_ops import dequantize
-from tensorflow.python.ops.gen_array_ops import quantize_v2
-from tensorflow.python.ops.gen_array_ops import quantized_concat
+from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
+from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
+from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
+from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
diff --git a/tensorflow/python/ops/dequantize_op_test.py b/tensorflow/contrib/quantization/python/dequantize_op_test.py
similarity index 87%
rename from tensorflow/python/ops/dequantize_op_test.py
rename to tensorflow/contrib/quantization/python/dequantize_op_test.py
index e60c22de638..b1d47cc4a2e 100644
--- a/tensorflow/python/ops/dequantize_op_test.py
+++ b/tensorflow/contrib/quantization/python/dequantize_op_test.py
@@ -21,16 +21,24 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf
 
+# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
+# with Bazel.
+# pylint: disable=g-bad-import-order
+from tensorflow.contrib.quantization import load_quantized_ops_so
+from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
+
 
 class DequantizeOpTest(tf.test.TestCase):
 
   def __init__(self, method_name="runTest"):
     super(DequantizeOpTest, self).__init__(method_name)
+    load_quantized_ops_so.Load()
+    load_quantized_kernels_so.Load()
 
   def _testDequantizeOp(self, inputs, min_range, max_range, dtype):
     with self.test_session():
       input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype)
-      dequantized = tf.dequantize(
+      dequantized = tf.contrib.quantization.dequantize(
           input_op, min_range, max_range)
       tf_ans = dequantized.eval()
 
diff --git a/tensorflow/contrib/quantization/python/math_ops.py b/tensorflow/contrib/quantization/python/math_ops.py
index d863cdad26f..d4fabbd36bd 100644
--- a/tensorflow/contrib/quantization/python/math_ops.py
+++ b/tensorflow/contrib/quantization/python/math_ops.py
@@ -19,7 +19,10 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import
+from tensorflow.contrib.quantization.ops import gen_math_ops
+from tensorflow.contrib.quantization.ops.gen_math_ops import *
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_math_ops
-from tensorflow.python.ops.gen_math_ops import *
+
+
+ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/contrib/quantization/python/nn_ops.py b/tensorflow/contrib/quantization/python/nn_ops.py
index fd28423317a..d31f1d4e686 100644
--- a/tensorflow/contrib/quantization/python/nn_ops.py
+++ b/tensorflow/contrib/quantization/python/nn_ops.py
@@ -19,7 +19,17 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import
+from tensorflow.contrib.quantization.ops import gen_nn_ops
+from tensorflow.contrib.quantization.ops.gen_nn_ops import *
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import gen_nn_ops
-from tensorflow.python.ops.gen_nn_ops import *
+
+
+ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
+ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py
similarity index 87%
rename from tensorflow/python/ops/quantized_conv_ops_test.py
rename to tensorflow/contrib/quantization/python/quantized_conv_ops_test.py
index f98dd39f091..9b24d4129de 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py
@@ -21,11 +21,19 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf
 
+# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
+# with Bazel.
+# pylint: disable=g-bad-import-order
+from tensorflow.contrib.quantization import load_quantized_ops_so
+from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
+
 
 class Conv2DTest(tf.test.TestCase):
 
   def __init__(self, method_name="runTest"):
     super(Conv2DTest, self).__init__(method_name)
+    load_quantized_ops_so.Load()
+    load_quantized_kernels_so.Load()
 
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
                     expected):
@@ -59,16 +67,16 @@ class Conv2DTest(tf.test.TestCase):
     with self.test_session(use_gpu=False) as sess:
       t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8)
       t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8)
-      conv = tf.nn.quantized_conv2d(t1,
-                                    t2,
-                                    out_type=tf.qint32,
-                                    strides=[1, stride,
-                                             stride, 1],
-                                    padding=padding,
-                                    min_input=x1_min,
-                                    max_input=x1_max,
-                                    min_filter=x2_min,
-                                    max_filter=x2_max)
+      conv = tf.contrib.quantization.quantized_conv2d(t1,
+                                                      t2,
+                                                      out_type=tf.qint32,
+                                                      strides=[1, stride,
+                                                               stride, 1],
+                                                      padding=padding,
+                                                      min_input=x1_min,
+                                                      max_input=x1_max,
+                                                      min_filter=x2_min,
+                                                      max_filter=x2_max)
       value = sess.run(conv)
     quantized_output = value[0]
     output_min = value[1]
diff --git a/tensorflow/tools/quantization/BUILD b/tensorflow/contrib/quantization/tools/BUILD
similarity index 78%
rename from tensorflow/tools/quantization/BUILD
rename to tensorflow/contrib/quantization/tools/BUILD
index 5d8115eefc8..82a13e04d64 100644
--- a/tensorflow/tools/quantization/BUILD
+++ b/tensorflow/contrib/quantization/tools/BUILD
@@ -13,6 +13,9 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/quantization:ops",
+        "//tensorflow/contrib/quantization:quantized_ops_py",
+        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
         "//tensorflow/python:platform",
     ],
 )
@@ -23,6 +26,9 @@ py_binary(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/quantization:ops",
+        "//tensorflow/contrib/quantization:quantized_ops_py",
+        "//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
         "//tensorflow/python:platform",
     ],
 )
diff --git a/tensorflow/tools/quantization/graph_to_dot.py b/tensorflow/contrib/quantization/tools/graph_to_dot.py
similarity index 100%
rename from tensorflow/tools/quantization/graph_to_dot.py
rename to tensorflow/contrib/quantization/tools/graph_to_dot.py
diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/contrib/quantization/tools/quantize_graph.py
similarity index 98%
rename from tensorflow/tools/quantization/quantize_graph.py
rename to tensorflow/contrib/quantization/tools/quantize_graph.py
index 501a907993f..5ded5566915 100644
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/contrib/quantization/tools/quantize_graph.py
@@ -35,6 +35,13 @@ import tensorflow as tf
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import tensor_util
 
+# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
+# with Bazel.
+# pylint: disable=g-bad-import-order
+from tensorflow.contrib.quantization import load_quantized_ops_so
+from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
+
+
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 
@@ -53,6 +60,8 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3",
                     """ graph loaded from a file.""")
 flags.DEFINE_boolean("strip_redundant_quantization", True,
                      """Removes redundant dequantize/quantize pairs.""")
+flags.DEFINE_boolean("load_quantization_so", True,
+                     """Explicitly load the quantization ops library""")
 
 
 def print_input_nodes(current_node, nodes_map, indent, already_visited):
@@ -241,7 +250,7 @@ def quantize_weight_eightbit(input_node, quantization_mode):
 
   sess = tf.Session()
   with sess.as_default():
-    quantize_op = tf.quantize_v2(
+    quantize_op = tf.contrib.quantization.python.quantize_v2(
         float_tensor,
         min_value,
         max_value,
@@ -281,6 +290,9 @@ class GraphRewriter(object):
     self.nodes_map = self.create_nodes_map(input_graph)
     self.output_graph = None
     self.mode = mode
+    if FLAGS.load_quantization_so:
+      load_quantized_ops_so.Load()
+      load_quantized_kernels_so.Load()
 
   def create_nodes_map(self, graph):
     """Builds a mapping of node names to their defs from the graph."""
diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/contrib/quantization/tools/quantize_graph_test.py
similarity index 99%
rename from tensorflow/tools/quantization/quantize_graph_test.py
rename to tensorflow/contrib/quantization/tools/quantize_graph_test.py
index a45c68df05a..4826ea26896 100644
--- a/tensorflow/tools/quantization/quantize_graph_test.py
+++ b/tensorflow/contrib/quantization/tools/quantize_graph_test.py
@@ -20,11 +20,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import numpy as np
+
 import tensorflow as tf
+from tensorflow.contrib.quantization.tools import quantize_graph
 from tensorflow.python.framework import graph_util
-from tensorflow.tools.quantization import quantize_graph
 
 flags = tf.app.flags
 FLAGS = flags.FLAGS
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 243601c6e48..e2774069df5 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -487,7 +487,6 @@ cc_library(
         "//tensorflow/core/kernels:nn",
         "//tensorflow/core/kernels:parameterized_truncated_normal_op",
         "//tensorflow/core/kernels:parsing",
-        "//tensorflow/core/kernels:quantized_ops",
         "//tensorflow/core/kernels:random_ops",
         "//tensorflow/core/kernels:required",
         "//tensorflow/core/kernels:sdca_ops",
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 306f61e5ec3..f391aa9e3f5 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -29,7 +29,6 @@ load(
     "tf_cc_tests",
     "tf_copts",
     "tf_cuda_library",
-    "tf_custom_op_library",
     "tf_opts_nortti_if_android",
     "tf_kernel_libraries",
     "tf_kernel_library",
@@ -2163,7 +2162,6 @@ filegroup(
     srcs = [
         ":android_extended_ops_group1",
         ":android_extended_ops_group2",
-        ":android_quantized_ops",
     ],
     visibility = ["//visibility:public"],
 )
@@ -2293,26 +2291,6 @@ filegroup(
     ],
 )
 
-filegroup(
-    name = "android_quantized_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    visibility = ["//visibility:public"],
-)
-
 # A file group which contains nearly all available operators which
 # may work on Android. This is intended to be used with selective
 # registration.
@@ -2370,7 +2348,6 @@ cc_library(
         "//tensorflow:android": [
             "//tensorflow/core/kernels:android_core_ops",
             "//tensorflow/core/kernels:android_extended_ops",
-            "@gemmlowp//:eight_bit_int_gemm_sources",
         ],
         "//conditions:default": [],
     }),
@@ -2388,264 +2365,6 @@ cc_library(
     alwayslink = 1,
 )
 
-#   Quantization-specific OpKernels
-
-tf_kernel_library(
-    name = "quantized_ops",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-    ],
-    hdrs = [
-        "quantization_utils.h",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/core",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:conv_ops",
-        "//tensorflow/core/kernels:eigen_helpers",
-        "//tensorflow/core/kernels:ops_util",
-        "//tensorflow/core/kernels:pooling_ops",
-        "//third_party/eigen3",
-        "@gemmlowp//:eight_bit_int_gemm",
-    ],
-)
-
-tf_custom_op_library(
-    name = "_quantized_kernels.so",
-    srcs = [
-        "dequantize_op.cc",
-        "quantization_utils.cc",
-        "quantization_utils.h",
-        "quantize_down_and_shrink_range.cc",
-        "quantize_op.cc",
-        "quantized_activation_ops.cc",
-        "quantized_batch_norm_op.cc",
-        "quantized_bias_add_op.cc",
-        "quantized_concat_op.cc",
-        "quantized_conv_ops.cc",
-        "quantized_matmul_op.cc",
-        "quantized_pooling_ops.cc",
-        "reference_gemm.h",
-    ],
-    deps = [
-        "//tensorflow/core/kernels:concat_lib_hdrs",
-        "//tensorflow/core/kernels:ops_util_hdrs",
-        "//tensorflow/core/kernels:pooling_ops_hdrs",
-        "@gemmlowp//:eight_bit_int_gemm",
-    ],
-)
-
-tf_cc_test(
-    name = "quantize_down_and_shrink_range_op_test",
-    size = "small",
-    srcs = ["quantize_down_and_shrink_range_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantization_utils_test",
-    srcs = ["quantization_utils_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//third_party/eigen3",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_activation_ops_test",
-    srcs = ["quantized_activation_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_bias_add_op_test",
-    size = "small",
-    srcs = ["quantized_bias_add_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_conv_ops_test",
-    size = "small",
-    srcs = ["quantized_conv_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantize_op_test",
-    size = "small",
-    srcs = ["quantize_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_matmul_op_test",
-    size = "small",
-    srcs = ["quantized_matmul_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_pooling_ops_test",
-    size = "small",
-    srcs = ["quantized_pooling_ops_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_concat_op_test",
-    size = "small",
-    srcs = ["quantized_concat_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//tensorflow/core/kernels:ops_util",
-    ],
-)
-
-tf_cc_test(
-    name = "quantized_batch_norm_op_test",
-    size = "small",
-    srcs = ["quantized_batch_norm_op_test.cc"],
-    deps = [
-        ":quantized_ops",
-        "//tensorflow/core:array_ops_op_lib",
-        "//tensorflow/core:core_cpu_internal",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:math_ops_op_lib",
-        "//tensorflow/core:nn_ops_op_lib",
-        "//tensorflow/core:protos_all_cc",
-        "//tensorflow/core:test",
-        "//tensorflow/core:test_main",
-        "//tensorflow/core:testlib",
-        "//tensorflow/core/kernels:batch_norm_op",
-        "//tensorflow/core/kernels:ops_testutil",
-        "//third_party/eigen3",
-    ],
-)
-
 # -----------------------------------------------------------------------------
 # Google-internal targets.  These must be at the end for syncrepo.
 
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 8156900cdcd..12e50ce84ec 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -4030,176 +4030,6 @@ debug_urls: List of URLs to debug targets, e.g.,
             file:///foo/tfdbg_dump, grpc:://localhost:11011
 )doc");
 
-REGISTER_OP("QuantizeV2")
-    .Input("input: float")
-    .Input("min_range: float")
-    .Input("max_range: float")
-    .Output("output: T")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("T: quantizedtype")
-    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-if T == qint8, out[i] -= (range(T) + 1) / 2.0
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-Assume the input is type float and has a possible range of [0.0, 6.0] and the
-output type is quint8 ([0, 255]). The min_range and max_range values should be
-specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
-value of the input by 255/6 and cast to quint8.
-
-If the output type was qint8 ([-128, 127]), the operation will additionally
-subtract each value by 128 prior to casting, so that the range of values aligns
-with the range of qint8.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```
-number_of_steps = 1 << (# of bits in T)
-range_adjust = number_of_steps / (number_of_steps - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = number_of_steps / range
-quantized = round(input * range_scale) - round(range_min * range_scale) +
-  numeric_limits<T>::min()
-quantized = max(quantized, numeric_limits<T>::min())
-quantized = min(quantized, numeric_limits<T>::max())
-```
-
-The biggest difference between this and MIN_COMBINED is that the minimum range
-is rounded first, before it's subtracted from the rounded value. With
-MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
-and dequantizing will introduce a larger and larger error.
-
-One thing to watch out for is that the operator may choose to adjust the
-requested minimum and maximum values slightly during the quantization process,
-so you should always use the output ports as the range for further calculations.
-For example, if the requested minimum and maximum values are close to equal,
-they will be separated by a small epsilon value to prevent ill-formed quantized
-buffers from being created. Otherwise, you can end up with buffers where all the
-quantized values map to the same float value, which causes problems for
-operations that have to perform further calculations on them.
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-output: The quantized data produced from the float input.
-output_min: The actual minimum scalar value used for the output.
-output_max: The actual maximum scalar value used for the output.
-
-)doc");
-
-REGISTER_OP("Dequantize")
-    .Input("input: T")
-    .Input("min_range: float")
-    .Input("max_range: float")
-    .Output("output: float")
-    .Attr("T: quantizedtype")
-    .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Dequantize the 'input' tensor into a float Tensor.
-
-[min_range, max_range] are scalar floats that specify the range for
-the 'input' data. The 'mode' attribute controls exactly which calculations are
-used to convert the float values to their quantized equivalents.
-
-In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-
-```
-if T == qint8, in[i] += (range(T) + 1)/ 2.0
-out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
-```
-here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-
-*MIN_COMBINED Mode Example*
-
-If the input comes from a QuantizedRelu6, the output type is
-quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
-0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
-Dequantize on quint8 will take each value, cast to float, and multiply
-by 6 / 255.
-Note that if quantizedtype is qint8, the operation will additionally add
-each value by 128 prior to casting.
-
-If the mode is 'MIN_FIRST', then this approach is used:
-
-```
-number_of_steps = 1 << (# of bits in T)
-range_adjust = number_of_steps / (number_of_steps - 1)
-range = (range_max - range_min) * range_adjust
-range_scale = range / number_of_steps
-const double offset_input = static_cast<double>(input) - lowest_quantized;
-result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
-```
-
-min_range: The minimum scalar value possibly produced for the input.
-max_range: The maximum scalar value possibly produced for the input.
-
-)doc");
-
-REGISTER_OP("QuantizedConcat")
-    .Input("concat_dim: int32")
-    .Input("values: N * T")
-    .Input("input_mins: N * float32")
-    .Input("input_maxes: N * float32")
-    .Output("output: T")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("N: int >= 2")
-    .Attr("T: type")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
-      ShapeHandle unused;
-      for (int i = std::max(0, c->num_inputs() - 2); i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
-      }
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Concatenates quantized tensors along one dimension.
-
-concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-  range [0, rank(values)).
-values: The `N` Tensors to concatenate. Their ranks and types must match,
-  and their sizes must match in all dimensions except `concat_dim`.
-input_mins: The minimum scalar values for each of the input tensors.
-input_maxes: The maximum scalar values for each of the input tensors.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-output: A `Tensor` with the concatenation of values stacked along the
-  `concat_dim` dimension.  This tensor's shape matches that of `values` except
-  in `concat_dim` where it has the sum of the sizes.
-)doc");
-
 // Deprecated op registrations:
 
 // The following can be deleted after 10mar2017.
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 905a2b6a2d4..0034301690b 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -2096,106 +2096,6 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
 ```
 )doc");
 
-REGISTER_OP("QuantizedMatMul")
-    .Input("a: T1")
-    .Input("b: T2")
-    .Input("min_a: float")
-    .Input("max_a: float")
-    .Input("min_b: float")
-    .Input("max_b: float")
-    .Output("out: Toutput")
-    .Output("min_out: float")
-    .Output("max_out: float")
-    .Attr("T1: quantizedtype")
-    .Attr("T2: quantizedtype")
-    .Attr("Toutput: quantizedtype = DT_QINT32")
-    .Attr("transpose_a: bool = false")
-    .Attr("transpose_b: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Perform a quantized matrix multiplication of  `a` by the matrix `b`.
-
-The inputs must be two-dimensional matrices and the inner dimension of
-`a` (after being transposed if `transpose_a` is non-zero) must match the
-outer dimension of `b` (after being transposed if `transposed_b` is
-non-zero).
-
-a: Must be a two-dimensional tensor.
-b: Must be a two-dimensional tensor.
-transpose_a: If true, `a` is transposed before multiplication.
-transpose_b: If true, `b` is transposed before multiplication.
-min_a: The float value that the lowest quantized `a` value represents.
-max_a: The float value that the highest quantized `a` value represents.
-min_b: The float value that the lowest quantized `b` value represents.
-max_b: The float value that the highest quantized `b` value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizeDownAndShrinkRange")
-    .Input("input: Tinput")
-    .Input("input_min: float")
-    .Input("input_max: float")
-    .Output("output: out_type")
-    .Output("output_min: float")
-    .Output("output_max: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Convert the quantized 'input' tensor into a lower-precision 'output', using the
-actual distribution of the values to maximize the usage of the lower bit depth
-and adjusting the output min and max ranges accordingly.
-
-[input_min, input_max] are scalar floats that specify the range for the float
-interpretation of the 'input' data. For example, if input_min is -1.0f and
-input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-
-This operator tries to squeeze as much precision as possible into an output with
-a lower bit depth by calculating the actual min and max values found in the
-data. For example, maybe that quint16 input has no values lower than 16,384 and
-none higher than 49,152. That means only half the range is actually needed, all
-the float interpretations are between -0.5f and 0.5f, so if we want to compress
-the data into a quint8 output, we can use that range rather than the theoretical
--1.0f to 1.0f that is suggested by the input min and max.
-
-In practice, this is most useful for taking output from operations like
-QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-may have large potential output ranges, but in practice have a distribution of
-input values that only uses a small fraction of the possible range. By feeding
-that output into this operator, we can reduce it from 32 bits down to 8 with
-minimal loss of accuracy.
-
-input_min: The float value that the minimum quantized input value represents.
-input_max: The float value that the maximum quantized input value represents.
-Tinput: The type of the input.
-output_min: The float value that the minimum quantized output value represents.
-output_max: The float value that the maximum quantized output value represents.
-out_type: The type of the output. Should be a lower bit depth than Tinput.
-
-)doc");
-
 // Deprecated ops:
 REGISTER_OP("BatchFFT")
     .Input("input: complex64")
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index a3f24d39984..0b5d1596675 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1962,324 +1962,4 @@ overlapping: When set to True, it means when pooling, the values at the boundary
 output: 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
 )doc");
 
-REGISTER_OP("QuantizedAvgPool")
-    .Input("input: T")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Output("output: T")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("T: quantizedtype")
-    .Attr("ksize: list(int)")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the average pool of the input tensor for quantized types.
-
-input: 4-D with shape `[batch, height, width, channels]`.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.  The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedBiasAdd")
-    .Input("input: T1")
-    .Input("bias: T2")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Input("min_bias: float")
-    .Input("max_bias: float")
-    .Output("output: out_type")
-    .Output("min_out: float")
-    .Output("max_out: float")
-    .Attr("T1: quantizedtype")
-    .Attr("T2: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Adds Tensor 'bias' to Tensor 'input' for Quantized types.
-
-Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
-
-bias: A 1D bias Tensor with size matching the last dimension of 'input'.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_bias: The float value that the lowest quantized bias value represents.
-max_bias: The float value that the highest quantized bias value represents.
-min_out: The float value that the lowest quantized output value represents.
-max_out: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedConv2D")
-    .Input("input: Tinput")
-    .Input("filter: Tfilter")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Input("min_filter: float")
-    .Input("max_filter: float")
-    .Output("output: out_type")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("Tfilter: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QINT32")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes a 2D convolution given quantized 4D input and filter tensors.
-The inputs are quantized tensors where the lowest value represents the real
-number of the associated minimum, and the highest represents the maximum.
-This means that you can only interpret the quantized output in the same way, by
-taking the returned minimum and maximum values into account.
-
-filter: filter's input_depth dimension must match input's depth dimensions.
-strides: The stride of the sliding window for each dimension of the input
-  tensor.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_filter: The float value that the lowest quantized filter value represents.
-max_filter: The float value that the highest quantized filter value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedMaxPool")
-    .Input("input: T")
-    .Input("min_input: float")
-    .Input("max_input: float")
-    .Output("output: T")
-    .Output("min_output: float")
-    .Output("max_output: float")
-    .Attr("T: quantizedtype")
-    .Attr("ksize: list(int)")
-    .Attr("strides: list(int)")
-    .Attr(GetPaddingAttrString())
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Produces the max pool of the input tensor for quantized types.
-
-input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-ksize: The size of the window for each dimension of the input tensor.
-  The length must be 4 to match the number of dimensions of the input.
-strides: The stride of the sliding window for each dimension of the input
-  tensor. The length must be 4 to match the number of dimensions of the input.
-padding: The type of padding algorithm to use.
-min_input: The float value that the lowest quantized input value represents.
-max_input: The float value that the highest quantized input value represents.
-min_output: The float value that the lowest quantized output value represents.
-max_output: The float value that the highest quantized output value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedRelu")
-    .Input("features: Tinput")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear: `max(features, 0)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedRelu6")
-    .Input("features: Tinput")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedReluX")
-    .Input("features: Tinput")
-    .Input("max_value: float")
-    .Input("min_features: float")
-    .Input("max_features: float")
-    .Output("activations: out_type")
-    .Output("min_activations: float")
-    .Output("max_activations: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype = DT_QUINT8")
-    .SetShapeFn([](InferenceContext* c) {
-      TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
-      ShapeHandle unused;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
-
-activations: Has the same output shape as "features".
-min_features: The float value that the lowest quantized value represents.
-max_features: The float value that the highest quantized value represents.
-min_activations: The float value that the lowest quantized value represents.
-max_activations: The float value that the highest quantized value represents.
-
-)doc");
-
-REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
-    .Input("t: Tinput")
-    .Input("t_min: float")
-    .Input("t_max: float")
-    .Input("m: Tinput")
-    .Input("m_min: float")
-    .Input("m_max: float")
-    .Input("v: Tinput")
-    .Input("v_min: float")
-    .Input("v_max: float")
-    .Input("beta: Tinput")
-    .Input("beta_min: float")
-    .Input("beta_max: float")
-    .Input("gamma: Tinput")
-    .Input("gamma_min: float")
-    .Input("gamma_max: float")
-    .Output("result: out_type")
-    .Output("result_min: float")
-    .Output("result_max: float")
-    .Attr("Tinput: quantizedtype")
-    .Attr("out_type: quantizedtype")
-    .Attr("variance_epsilon: float")
-    .Attr("scale_after_normalization: bool")
-    .SetShapeFn([](InferenceContext* c) {
-      ShapeHandle input;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
-
-      DimensionHandle last_dim = c->Dim(input, 3);
-      for (int i = 1; i < 5; ++i) {  // covers m, v, beta, gamma
-        ShapeHandle vec;
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
-        TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
-      }
-
-      ShapeHandle out;
-      TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
-      c->set_output(0, out);
-      c->set_output(1, c->Scalar());
-      c->set_output(2, c->Scalar());
-
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Quantized Batch normalization.
-
-This op is deprecated and will be removed in the future. Prefer
-`tf.nn.batch_normalization`.
-
-t: A 4D input Tensor.
-t_min: The value represented by the lowest quantized input.
-t_max: The value represented by the highest quantized input.
-m: A 1D mean Tensor with size matching the last dimension of t.
-  This is the first output from tf.nn.moments,
-  or a saved moving average thereof.
-m_min: The value represented by the lowest quantized mean.
-m_max: The value represented by the highest quantized mean.
-v: A 1D variance Tensor with size matching the last dimension of t.
-  This is the second output from tf.nn.moments,
-  or a saved moving average thereof.
-v_min: The value represented by the lowest quantized variance.
-v_max: The value represented by the highest quantized variance.
-beta: A 1D beta Tensor with size matching the last dimension of t.
-  An offset to be added to the normalized tensor.
-beta_min: The value represented by the lowest quantized offset.
-beta_max: The value represented by the highest quantized offset.
-gamma: A 1D gamma Tensor with size matching the last dimension of t.
-  If "scale_after_normalization" is true, this tensor will be multiplied
-  with the normalized tensor.
-gamma_min: The value represented by the lowest quantized gamma.
-gamma_max: The value represented by the highest quantized gamma.
-variance_epsilon: A small float number to avoid dividing by 0.
-scale_after_normalization: A bool indicating whether the resulted tensor
-  needs to be multiplied with gamma.
-)doc");
-
 }  // namespace tensorflow
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 7ba6206bc25..fe2d56eec3a 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2100,33 +2100,6 @@ py_binary(
     ],
 )
 
-# -----------------------------------------------------------------------------
-# Quantization
-
-py_test(
-    name = "dequantize_op_test",
-    size = "small",
-    srcs = ["ops/dequantize_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":ops",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-py_test(
-    name = "quantized_conv_ops_test",
-    size = "small",
-    srcs = ["ops/quantized_conv_ops_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":ops",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 02370ab4ede..8548c78faf4 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -74,9 +74,6 @@ or join multiple tensors together.
 @@boolean_mask
 @@one_hot
 @@sequence_mask
-@@dequantize
-@@quantize_v2
-@@quantized_concat
 
 """
 from __future__ import absolute_import
@@ -2288,9 +2285,3 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None):
       return result
     else:
       return gen_math_ops.cast(result, dtype)
-
-
-# TODO(cwhipkey): Verify and enable shape functions for these.
-ops.RegisterShape("QuantizeV2")(None)
-ops.RegisterShape("QuantizedBatchNormWithGlobalNormalization")(None)
-ops.RegisterShape("QuantizedConcat")(None)
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index ddf707ed83c..d5f51dee71e 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1910,6 +1910,3 @@ def reduced_shape(input_shape, axes):
        axes],                               # [1, 2]
       [input_shape,                         # [2, 3, 5, 7]
        array_ops.fill(axes_shape, 1)])      # [1, 1]
-
-
-ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 2467a8ac66d..4eaaa8bf502 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1364,14 +1364,4 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
                                               padding=padding,
                                               name=name))
 
-
-ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
-ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
-
 # pylint: enable=invalid-name
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 84db0002fbc..70fb6455c85 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -136,8 +136,7 @@ def if_not_mobile(a):
   })
 
 def tf_copts():
-  return (["-fno-exceptions",
-           "-DEIGEN_AVOID_STL_ARRAY"] +
+  return (["-fno-exceptions", "-DEIGEN_AVOID_STL_ARRAY"] +
           if_cuda(["-DGOOGLE_CUDA=1"]) +
           if_android_arm(["-mfpu=neon"]) +
           select({"//tensorflow:android": [
@@ -145,7 +144,6 @@ def tf_copts():
                     "-DMIN_LOG_LEVEL=0",
                     "-DTF_LEAN_BINARY",
                     "-O2",
-                    "-Iexternal/gemmlowp",
                   ],
                   "//tensorflow:darwin": [],
                   "//tensorflow:ios": ["-std=c++11",],