diff --git a/tensorflow/BUILD b/tensorflow/BUILD index e86fa076ef0..4cad55b3993 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -110,6 +110,10 @@ filegroup( "//tensorflow/contrib/metrics:all_files", "//tensorflow/contrib/metrics/kernels:all_files", "//tensorflow/contrib/opt:all_files", + "//tensorflow/contrib/quantization:all_files", + "//tensorflow/contrib/quantization/kernels:all_files", + "//tensorflow/contrib/quantization/kernels/hexagon:all_files", + "//tensorflow/contrib/quantization/tools:all_files", "//tensorflow/contrib/rnn:all_files", "//tensorflow/contrib/session_bundle:all_files", "//tensorflow/contrib/session_bundle/example:all_files", @@ -130,7 +134,6 @@ filegroup( "//tensorflow/core/distributed_runtime:all_files", "//tensorflow/core/distributed_runtime/rpc:all_files", "//tensorflow/core/kernels:all_files", - "//tensorflow/core/kernels/hexagon:all_files", "//tensorflow/core/ops/compat:all_files", "//tensorflow/core/platform/cloud:all_files", "//tensorflow/core/platform/default/build_config:all_files", @@ -178,7 +181,6 @@ filegroup( "//tensorflow/tools/docs:all_files", "//tensorflow/tools/git:all_files", "//tensorflow/tools/proto_text:all_files", - "//tensorflow/tools/quantization:all_files", "//tensorflow/tools/test:all_files", "//tensorflow/user_ops:all_files", "//third_party/hadoop:all_files", diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 1aa0b9e0123..a5d3cb49fff 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -73,7 +73,6 @@ HOST_INCLUDES := \ -I. \ -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ --I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(HOST_GENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include @@ -147,7 +146,6 @@ INCLUDES := \ -I. \ -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ --I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) ifeq ($(HAS_GEN_HOST_PROTOC),true) @@ -240,7 +238,6 @@ ifeq ($(TARGET),ANDROID) -I. \ -I$(MAKEFILE_DIR)/downloads/ \ -I$(MAKEFILE_DIR)/downloads/eigen \ --I$(MAKEFILE_DIR)/downloads/gemmlowp \ -I$(MAKEFILE_DIR)/gen/protobuf/include \ -I$(PROTOGENDIR) \ -I$(PBTGENDIR) diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 3d02e19e0a8..39899192e8b 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -130,17 +130,6 @@ tensorflow/core/kernels/batch_norm_op.cc tensorflow/core/kernels/avgpooling_op.cc tensorflow/core/kernels/argmax_op.cc tensorflow/core/kernels/aggregate_ops.cc -tensorflow/core/kernels/dequantize_op.cc -tensorflow/core/kernels/quantization_utils.cc -tensorflow/core/kernels/quantize_down_and_shrink_range.cc -tensorflow/core/kernels/quantize_op.cc -tensorflow/core/kernels/quantized_activation_ops.cc -tensorflow/core/kernels/quantized_batch_norm_op.cc -tensorflow/core/kernels/quantized_bias_add_op.cc -tensorflow/core/kernels/quantized_concat_op.cc -tensorflow/core/kernels/quantized_conv_ops.cc -tensorflow/core/kernels/quantized_matmul_op.cc -tensorflow/core/kernels/quantized_pooling_ops.cc tensorflow/core/ops/training_ops.cc tensorflow/core/ops/string_ops.cc tensorflow/core/ops/state_ops.cc diff --git a/tensorflow/contrib/quantization/BUILD b/tensorflow/contrib/quantization/BUILD index 5347b32bdb4..881349fda7e 100644 --- a/tensorflow/contrib/quantization/BUILD +++ b/tensorflow/contrib/quantization/BUILD @@ -13,6 +13,53 @@ load( "tf_custom_op_library", ) +cc_library( + name = "cc_array_ops", + srcs = ["ops/array_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_math_ops", + srcs = ["ops/math_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_nn_ops", + srcs = ["ops/nn_ops.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "cc_ops", + linkstatic = 1, + deps = [ + ":cc_array_ops", + ":cc_math_ops", + ":cc_nn_ops", + ], + alwayslink = 1, +) + +filegroup( + name = "android_ops", + srcs = glob(["ops/*.cc"]), + visibility = ["//visibility:public"], +) + py_library( name = "quantization_py", srcs = [ @@ -22,6 +69,8 @@ py_library( srcs_version = "PY2AND3", deps = [ ":ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", ], ) @@ -34,9 +83,52 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn_ops", + ":array_ops", + ":math_ops", + ":nn_ops", + ], +) + +tf_gen_op_wrapper_py( + name = "array_ops", + deps = ["//tensorflow/contrib/quantization:cc_array_ops"], +) + +tf_gen_op_wrapper_py( + name = "math_ops", + deps = ["//tensorflow/contrib/quantization:cc_math_ops"], +) + +tf_gen_op_wrapper_py( + name = "nn_ops", + deps = ["//tensorflow/contrib/quantization:cc_nn_ops"], +) + +py_test( + name = "dequantize_op_test", + size = "small", + srcs = ["python/dequantize_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":ops", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:framework_test_lib", + ], +) + +py_test( + name = "quantized_conv_ops_test", + size = "small", + srcs = ["python/quantized_conv_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":ops", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", + "//tensorflow/python:framework_test_lib", ], ) @@ -47,6 +139,24 @@ filegroup( ]), ) +tf_custom_op_library( + name = "_quantized_ops.so", + srcs = [ + "ops/array_ops.cc", + "ops/math_ops.cc", + "ops/nn_ops.cc", + ], + deps = [ + ], +) + +py_library( + name = "quantized_ops_py", + srcs = ["load_quantized_ops_so.py"], + data = ["_quantized_ops.so"], + srcs_version = "PY2AND3", +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/quantization/Makefile.in b/tensorflow/contrib/quantization/Makefile.in new file mode 100644 index 00000000000..563639e5d75 --- /dev/null +++ b/tensorflow/contrib/quantization/Makefile.in @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# This sub Makefile compiles libraries under this directory. This is designed to +# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile. +# You can build targets in this file by including this sub makefile like: +# $ make -f tensorflow/contrib/makefile/Makefile TARGET= \ +# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \ +# (optional: NDK_ROOT=) contrib_quantization_tests +# TODO(satok): Support more targets + +GTEST_DIR := \ +$(MAKEFILE_DIR)/downloads/googletest/googletest + +GTEST_HEADERS = \ +$(wildcard $(GTEST_DIR)/include/gtest/*.h) \ +$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h) + +GTEST_SRCS := \ +$(wildcard $(GTEST_DIR)/src/*.cc) \ +$(wildcard $(GTEST_DIR)/src/*.h) \ +$(GTEST_HEADERS) + +QUANTIZATION_TEST_SRCS := \ +tensorflow/contrib/quantization/ops/math_ops.cc \ +tensorflow/contrib/quantization/kernels/quantize_op.cc \ +tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \ +tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \ +tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \ +tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \ +tensorflow/contrib/makefile/test/test_main.cc + +QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o)) + +QUANTIZATION_TEST_NAME := contrib_quantization_tests +QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME) + +INCLUDES += \ +-I$(MAKEFILE_DIR)/downloads/gemmlowp \ +-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include + +QUANTIZATION_TEST_INCLUDES := $(INCLUDES) + +$(OBJDIR)gtest-all.o : $(GTEST_SRCS) + $(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \ + $(GTEST_DIR)/src/gtest-all.cc -o $@ + +$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o + $(AR) $(ARFLAGS) $@ $^ + +$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \ + -o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \ + $(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS) + +$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH) diff --git a/tensorflow/contrib/quantization/__init__.py b/tensorflow/contrib/quantization/__init__.py index dcb73399b31..833dd20b5a3 100644 --- a/tensorflow/contrib/quantization/__init__.py +++ b/tensorflow/contrib/quantization/__init__.py @@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_ from tensorflow.contrib.quantization.python.math_ops import * from tensorflow.contrib.quantization.python.nn_ops import * -from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops -from tensorflow.python.ops.gen_array_ops import dequantize -from tensorflow.python.ops.gen_array_ops import quantize_v2 -from tensorflow.python.ops.gen_array_ops import quantized_concat +from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops +from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize +from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2 +from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat diff --git a/tensorflow/contrib/quantization/kernels/BUILD b/tensorflow/contrib/quantization/kernels/BUILD new file mode 100644 index 00000000000..6be2ccaa07f --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/BUILD @@ -0,0 +1,311 @@ +# Description: +# quantization-specific OpKernels + +package( + default_visibility = ["//visibility:public"], + features = ["-parse_headers"], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", + "tf_custom_op_library", + "tf_kernel_library", +) + +filegroup( + name = "android_ops", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantization_utils.h", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + "reference_gemm.h", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +tf_kernel_library( + name = "quantized_ops", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + ], + hdrs = [ + "quantization_utils.h", + "reference_gemm.h", + ], + deps = [ + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:pooling_ops", + "//third_party/eigen3", + "@gemmlowp//:eight_bit_int_gemm", + ], +) + +tf_custom_op_library( + name = "_quantized_kernels.so", + srcs = [ + "dequantize_op.cc", + "quantization_utils.cc", + "quantization_utils.h", + "quantize_down_and_shrink_range.cc", + "quantize_op.cc", + "quantized_activation_ops.cc", + "quantized_batch_norm_op.cc", + "quantized_bias_add_op.cc", + "quantized_concat_op.cc", + "quantized_conv_ops.cc", + "quantized_matmul_op.cc", + "quantized_pooling_ops.cc", + "reference_gemm.h", + ], + deps = [ + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:ops_util_hdrs", + "//tensorflow/core/kernels:pooling_ops_hdrs", + "@gemmlowp//:eight_bit_int_gemm", + ], +) + +py_library( + name = "quantized_kernels_py", + srcs = ["load_quantized_kernels_so.py"], + data = ["_quantized_kernels.so"], + srcs_version = "PY2AND3", +) + +tf_cc_test( + name = "quantize_down_and_shrink_range_op_test", + size = "small", + srcs = ["quantize_down_and_shrink_range_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantization_utils_test", + srcs = ["quantization_utils_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//third_party/eigen3", + ], +) + +tf_cc_test( + name = "quantized_activation_ops_test", + srcs = ["quantized_activation_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_bias_add_op_test", + size = "small", + srcs = ["quantized_bias_add_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_conv_ops_test", + size = "small", + srcs = ["quantized_conv_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantize_op_test", + size = "small", + srcs = ["quantize_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_matmul_op_test", + size = "small", + srcs = ["quantized_matmul_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_pooling_ops_test", + size = "small", + srcs = ["quantized_pooling_ops_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_concat_op_test", + size = "small", + srcs = ["quantized_concat_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_cc_test( + name = "quantized_batch_norm_op_test", + size = "small", + srcs = ["quantized_batch_norm_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:batch_norm_op", + "//tensorflow/core/kernels:ops_testutil", + "//third_party/eigen3", + ], +) diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/contrib/quantization/kernels/dequantize_op.cc similarity index 98% rename from tensorflow/core/kernels/dequantize_op.cc rename to tensorflow/contrib/quantization/kernels/dequantize_op.cc index 375287000eb..a088954fc21 100644 --- a/tensorflow/core/kernels/dequantize_op.cc +++ b/tensorflow/contrib/quantization/kernels/dequantize_op.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/contrib/quantization/kernels/hexagon/BUILD similarity index 79% rename from tensorflow/core/kernels/hexagon/BUILD rename to tensorflow/contrib/quantization/kernels/hexagon/BUILD index c31b5c06d7d..b57a2ac1b59 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD @@ -30,16 +30,16 @@ tf_cc_test( size = "small", srcs = ["quantized_matmul_op_for_hexagon_test.cc"], deps = [ - "//tensorflow/core:array_ops_op_lib", + "//tensorflow/contrib/quantization:cc_array_ops", + "//tensorflow/contrib/quantization:cc_math_ops", + "//tensorflow/contrib/quantization:cc_nn_ops", + "//tensorflow/contrib/quantization/kernels:quantized_ops", "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:ops_testutil", "//tensorflow/core/kernels:ops_util", - "//tensorflow/core/kernels:quantized_ops", ], ) diff --git a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc similarity index 98% rename from tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc rename to tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc index 7b3fdd19a54..3d139fbe0a0 100644 --- a/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc +++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc @@ -14,8 +14,7 @@ limitations under the License. ==============================================================================*/ // Tests in this file are designed to evaluate hexagon DSP operations. -#define EIGEN_USE_THREADS - +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" @@ -27,7 +26,6 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" diff --git a/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py new file mode 100644 index 00000000000..3b7fd57a93b --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/load_quantized_kernels_so.py @@ -0,0 +1,48 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops for quantized evaluation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import threading + +import tensorflow as tf + +QUANTIZED_KERNELS_FILE = '_quantized_kernels.so' + +_quantized_kernels = None +_kernels_lock = threading.Lock() + + +# Workaround for the fact that importing tensorflow imports contrib +# (even if a user isn't using this or any other contrib op), but +# there's not yet any guarantee that the shared object exists. +# In which case, "import tensorflow" will always crash, even for users that +# never use contrib. +def Load(library_base_dir=''): + """Load the quantized ops library and return the loaded module.""" + with _kernels_lock: + global _quantized_kernels + if not _quantized_kernels: + data_files_path = os.path.join(library_base_dir, + tf.resource_loader.get_data_files_path()) + tf.logging.info('data path: %s', data_files_path) + _quantized_kernels = tf.load_op_library(os.path.join( + data_files_path, QUANTIZED_KERNELS_FILE)) + + assert _quantized_kernels, 'Could not load _quantized_kernels.so' + return _quantized_kernels diff --git a/tensorflow/core/kernels/quantization_utils.cc b/tensorflow/contrib/quantization/kernels/quantization_utils.cc similarity index 96% rename from tensorflow/core/kernels/quantization_utils.cc rename to tensorflow/contrib/quantization/kernels/quantization_utils.cc index 6f36c0d4820..72651f96b0c 100644 --- a/tensorflow/core/kernels/quantization_utils.cc +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/quantization_utils.h b/tensorflow/contrib/quantization/kernels/quantization_utils.h similarity index 98% rename from tensorflow/core/kernels/quantization_utils.h rename to tensorflow/contrib/quantization/kernels/quantization_utils.h index a098179034b..3b6a4901ba0 100644 --- a/tensorflow/core/kernels/quantization_utils.h +++ b/tensorflow/contrib/quantization/kernels/quantization_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ #define EIGEN_USE_THREADS @@ -552,4 +552,4 @@ class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase { } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_ diff --git a/tensorflow/core/kernels/quantization_utils_test.cc b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc similarity index 99% rename from tensorflow/core/kernels/quantization_utils_test.cc rename to tensorflow/contrib/quantization/kernels/quantization_utils_test.cc index 55b5193ce14..d62610b2ca6 100644 --- a/tensorflow/core/kernels/quantization_utils_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantization_utils_test.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/tensor_testutil.h" diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc similarity index 98% rename from tensorflow/core/kernels/quantize_down_and_shrink_range.cc rename to tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc index aef5f0b6a35..18dffd1dc6a 100644 --- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc +++ b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range.cc @@ -20,7 +20,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc similarity index 100% rename from tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantize_down_and_shrink_range_op_test.cc diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/contrib/quantization/kernels/quantize_op.cc similarity index 98% rename from tensorflow/core/kernels/quantize_op.cc rename to tensorflow/contrib/quantization/kernels/quantize_op.cc index 003654c1b0f..2bab8ad4476 100644 --- a/tensorflow/core/kernels/quantize_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantize_op.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" diff --git a/tensorflow/core/kernels/quantize_op_test.cc b/tensorflow/contrib/quantization/kernels/quantize_op_test.cc similarity index 100% rename from tensorflow/core/kernels/quantize_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantize_op_test.cc diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc similarity index 98% rename from tensorflow/core/kernels/quantized_activation_ops.cc rename to tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc index ea1cf15f7bb..a86b611ad68 100644 --- a/tensorflow/core/kernels/quantized_activation_ops.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops.cc @@ -16,7 +16,7 @@ limitations under the License. // Implements a quantized version of the Relu6 operation. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" diff --git a/tensorflow/core/kernels/quantized_activation_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc similarity index 98% rename from tensorflow/core/kernels/quantized_activation_ops_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc index 38c7d4ffef8..19efe6093ed 100644 --- a/tensorflow/core/kernels/quantized_activation_ops_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_activation_ops_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/kernels/quantized_batch_norm_op.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc similarity index 99% rename from tensorflow/core/kernels/quantized_batch_norm_op.cc rename to tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc index 18d83b41494..2a684824d37 100644 --- a/tensorflow/core/kernels/quantized_batch_norm_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op.cc @@ -16,7 +16,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc similarity index 99% rename from tensorflow/core/kernels/quantized_batch_norm_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc index 9880d972cde..ccb6a59ecfb 100644 --- a/tensorflow/core/kernels/quantized_batch_norm_op_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_batch_norm_op_test.cc @@ -16,7 +16,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc similarity index 98% rename from tensorflow/core/kernels/quantized_bias_add_op.cc rename to tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc index 0b34bfcad83..c319eb97daa 100644 --- a/tensorflow/core/kernels/quantized_bias_add_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op.cc @@ -15,7 +15,7 @@ limitations under the License. // Implements a quantized eight-bit version of the bias addition operation. -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" diff --git a/tensorflow/core/kernels/quantized_bias_add_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc similarity index 99% rename from tensorflow/core/kernels/quantized_bias_add_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc index 3fd0eaa9814..56535029b50 100644 --- a/tensorflow/core/kernels/quantized_bias_add_op_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_bias_add_op_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc similarity index 99% rename from tensorflow/core/kernels/quantized_concat_op.cc rename to tensorflow/contrib/quantization/kernels/quantized_concat_op.cc index f929dd61cbc..abe8c9138d5 100644 --- a/tensorflow/core/kernels/quantized_concat_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/quantized_concat_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc similarity index 99% rename from tensorflow/core/kernels/quantized_concat_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc index dc1439279e5..1301259fddf 100644 --- a/tensorflow/core/kernels/quantized_concat_op_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_concat_op_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc similarity index 99% rename from tensorflow/core/kernels/quantized_conv_ops.cc rename to tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc index fb69d770c0b..b25bff45a11 100644 --- a/tensorflow/core/kernels/quantized_conv_ops.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc @@ -19,8 +19,8 @@ limitations under the License. #include #include "public/gemmlowp.h" -#include "tensorflow/core/kernels/quantization_utils.h" -#include "tensorflow/core/kernels/reference_gemm.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/reference_gemm.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/ops_util.h" diff --git a/tensorflow/core/kernels/quantized_conv_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc similarity index 99% rename from tensorflow/core/kernels/quantized_conv_ops_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc index 01e55f85939..6a07004a92c 100644 --- a/tensorflow/core/kernels/quantized_conv_ops_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_conv_ops_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc similarity index 98% rename from tensorflow/core/kernels/quantized_matmul_op.cc rename to tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc index 0ce9e376423..18de2d1d97f 100644 --- a/tensorflow/core/kernels/quantized_matmul_op.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc @@ -16,8 +16,8 @@ limitations under the License. // Implements a quantized eight-bit version of the matmul operation. #include "public/gemmlowp.h" -#include "tensorflow/core/kernels/quantization_utils.h" -#include "tensorflow/core/kernels/reference_gemm.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/reference_gemm.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/kernels/quantized_matmul_op_test.cc b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc similarity index 99% rename from tensorflow/core/kernels/quantized_matmul_op_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc index e82464d4e72..3eea7518186 100644 --- a/tensorflow/core/kernels/quantized_matmul_op_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc similarity index 100% rename from tensorflow/core/kernels/quantized_pooling_ops.cc rename to tensorflow/contrib/quantization/kernels/quantized_pooling_ops.cc diff --git a/tensorflow/core/kernels/quantized_pooling_ops_test.cc b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc similarity index 98% rename from tensorflow/core/kernels/quantized_pooling_ops_test.cc rename to tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc index d3247d15d6c..3bc05ed455a 100644 --- a/tensorflow/core/kernels/quantized_pooling_ops_test.cc +++ b/tensorflow/contrib/quantization/kernels/quantized_pooling_ops_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/contrib/quantization/kernels/quantization_utils.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/graph.pb.h" diff --git a/tensorflow/core/kernels/reference_gemm.h b/tensorflow/contrib/quantization/kernels/reference_gemm.h similarity index 92% rename from tensorflow/core/kernels/reference_gemm.h rename to tensorflow/contrib/quantization/kernels/reference_gemm.h index 5e4cde07d76..5af3a771282 100644 --- a/tensorflow/core/kernels/reference_gemm.h +++ b/tensorflow/contrib/quantization/kernels/reference_gemm.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_ -#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ // This is an unoptimized but debuggable implementation of the GEMM matrix // multiply function, used to compare to faster but more opaque versions, or @@ -87,4 +87,4 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c, } } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_ +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_ diff --git a/tensorflow/contrib/quantization/load_quantized_ops_so.py b/tensorflow/contrib/quantization/load_quantized_ops_so.py new file mode 100644 index 00000000000..6eb424e534e --- /dev/null +++ b/tensorflow/contrib/quantization/load_quantized_ops_so.py @@ -0,0 +1,48 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops for quantized evaluation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import threading + +import tensorflow as tf + +QUANTIZED_OPS_FILE = '_quantized_ops.so' + +_quantized_ops = None +_ops_lock = threading.Lock() + + +# Workaround for the fact that importing tensorflow imports contrib +# (even if a user isn't using this or any other contrib op), but +# there's not yet any guarantee that the shared object exists. +# In which case, "import tensorflow" will always crash, even for users that +# never use contrib. +def Load(library_base_dir=''): + """Load the quantized ops library and return the loaded module.""" + with _ops_lock: + global _quantized_ops + if not _quantized_ops: + data_files_path = os.path.join(library_base_dir, + tf.resource_loader.get_data_files_path()) + tf.logging.info('q:data path: %s', data_files_path) + _quantized_ops = tf.load_op_library(os.path.join( + data_files_path, QUANTIZED_OPS_FILE)) + + assert _quantized_ops, 'Could not load quantized_ops.so' + return _quantized_ops diff --git a/tensorflow/contrib/quantization/ops/array_ops.cc b/tensorflow/contrib/quantization/ops/array_ops.cc new file mode 100644 index 00000000000..ff636c79578 --- /dev/null +++ b/tensorflow/contrib/quantization/ops/array_ops.cc @@ -0,0 +1,195 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizeV2") + .Input("input: float") + .Input("min_range: float") + .Input("max_range: float") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype") + .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. + +[min_range, max_range] are scalar floats that specify the range for +the 'input' data. The 'mode' attribute controls exactly which calculations are +used to convert the float values to their quantized equivalents. + +In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: + +``` +out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +if T == qint8, out[i] -= (range(T) + 1) / 2.0 +``` +here `range(T) = numeric_limits::max() - numeric_limits::min()` + +*MIN_COMBINED Mode Example* + +Assume the input is type float and has a possible range of [0.0, 6.0] and the +output type is quint8 ([0, 255]). The min_range and max_range values should be +specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +value of the input by 255/6 and cast to quint8. + +If the output type was qint8 ([-128, 127]), the operation will additionally +subtract each value by 128 prior to casting, so that the range of values aligns +with the range of qint8. + +If the mode is 'MIN_FIRST', then this approach is used: + +``` +number_of_steps = 1 << (# of bits in T) +range_adjust = number_of_steps / (number_of_steps - 1) +range = (range_max - range_min) * range_adjust +range_scale = number_of_steps / range +quantized = round(input * range_scale) - round(range_min * range_scale) + + numeric_limits::min() +quantized = max(quantized, numeric_limits::min()) +quantized = min(quantized, numeric_limits::max()) +``` + +The biggest difference between this and MIN_COMBINED is that the minimum range +is rounded first, before it's subtracted from the rounded value. With +MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +and dequantizing will introduce a larger and larger error. + +One thing to watch out for is that the operator may choose to adjust the +requested minimum and maximum values slightly during the quantization process, +so you should always use the output ports as the range for further calculations. +For example, if the requested minimum and maximum values are close to equal, +they will be separated by a small epsilon value to prevent ill-formed quantized +buffers from being created. Otherwise, you can end up with buffers where all the +quantized values map to the same float value, which causes problems for +operations that have to perform further calculations on them. + +min_range: The minimum scalar value possibly produced for the input. +max_range: The maximum scalar value possibly produced for the input. +output: The quantized data produced from the float input. +output_min: The actual minimum scalar value used for the output. +output_max: The actual maximum scalar value used for the output. + +)doc"); + +REGISTER_OP("Dequantize") + .Input("input: T") + .Input("min_range: float") + .Input("max_range: float") + .Output("output: float") + .Attr("T: quantizedtype") + .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + return Status::OK(); + }) + .Doc(R"doc( +Dequantize the 'input' tensor into a float Tensor. + +[min_range, max_range] are scalar floats that specify the range for +the 'input' data. The 'mode' attribute controls exactly which calculations are +used to convert the float values to their quantized equivalents. + +In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: + +``` +if T == qint8, in[i] += (range(T) + 1)/ 2.0 +out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +``` +here `range(T) = numeric_limits::max() - numeric_limits::min()` + +*MIN_COMBINED Mode Example* + +If the input comes from a QuantizedRelu6, the output type is +quint8 (range of 0-255) but the possible range of QuantizedRelu6 is +0-6. The min_range and max_range values are therefore 0.0 and 6.0. +Dequantize on quint8 will take each value, cast to float, and multiply +by 6 / 255. +Note that if quantizedtype is qint8, the operation will additionally add +each value by 128 prior to casting. + +If the mode is 'MIN_FIRST', then this approach is used: + +``` +number_of_steps = 1 << (# of bits in T) +range_adjust = number_of_steps / (number_of_steps - 1) +range = (range_max - range_min) * range_adjust +range_scale = range / number_of_steps +const double offset_input = static_cast(input) - lowest_quantized; +result = range_min + ((input - numeric_limits::min()) * range_scale) +``` + +min_range: The minimum scalar value possibly produced for the input. +max_range: The maximum scalar value possibly produced for the input. + +)doc"); + +REGISTER_OP("QuantizedConcat") + .Input("concat_dim: int32") + .Input("values: N * T") + .Input("input_mins: N * float32") + .Input("input_maxes: N * float32") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Attr("N: int >= 2") + .Attr("T: type") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c)); + ShapeHandle unused; + for (int i = 2; i < c->num_inputs(); ++i) { + TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); + } + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Concatenates quantized tensors along one dimension. + +concat_dim: 0-D. The dimension along which to concatenate. Must be in the + range [0, rank(values)). +values: The `N` Tensors to concatenate. Their ranks and types must match, + and their sizes must match in all dimensions except `concat_dim`. +input_mins: The minimum scalar values for each of the input tensors. +input_maxes: The maximum scalar values for each of the input tensors. +output_min: The float value that the minimum quantized output value represents. +output_max: The float value that the maximum quantized output value represents. +output: A `Tensor` with the concatenation of values stacked along the + `concat_dim` dimension. This tensor's shape matches that of `values` except + in `concat_dim` where it has the sum of the sizes. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/ops/math_ops.cc b/tensorflow/contrib/quantization/ops/math_ops.cc new file mode 100644 index 00000000000..93bb2836303 --- /dev/null +++ b/tensorflow/contrib/quantization/ops/math_ops.cc @@ -0,0 +1,126 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizedMatMul") + .Input("a: T1") + .Input("b: T2") + .Input("min_a: float") + .Input("max_a: float") + .Input("min_b: float") + .Input("max_b: float") + .Output("out: Toutput") + .Output("min_out: float") + .Output("max_out: float") + .Attr("T1: quantizedtype") + .Attr("T2: quantizedtype") + .Attr("Toutput: quantizedtype = DT_QINT32") + .Attr("transpose_a: bool = false") + .Attr("transpose_b: bool = false") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Perform a quantized matrix multiplication of `a` by the matrix `b`. + +The inputs must be two-dimensional matrices and the inner dimension of +`a` (after being transposed if `transpose_a` is non-zero) must match the +outer dimension of `b` (after being transposed if `transposed_b` is +non-zero). + +a: Must be a two-dimensional tensor. +b: Must be a two-dimensional tensor. +transpose_a: If true, `a` is transposed before multiplication. +transpose_b: If true, `b` is transposed before multiplication. +min_a: The float value that the lowest quantized `a` value represents. +max_a: The float value that the highest quantized `a` value represents. +min_b: The float value that the lowest quantized `b` value represents. +max_b: The float value that the highest quantized `b` value represents. +min_out: The float value that the lowest quantized output value represents. +max_out: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizeDownAndShrinkRange") + .Input("input: Tinput") + .Input("input_min: float") + .Input("input_max: float") + .Output("output: out_type") + .Output("output_min: float") + .Output("output_max: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Convert the quantized 'input' tensor into a lower-precision 'output', using the +actual distribution of the values to maximize the usage of the lower bit depth +and adjusting the output min and max ranges accordingly. + +[input_min, input_max] are scalar floats that specify the range for the float +interpretation of the 'input' data. For example, if input_min is -1.0f and +input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. + +This operator tries to squeeze as much precision as possible into an output with +a lower bit depth by calculating the actual min and max values found in the +data. For example, maybe that quint16 input has no values lower than 16,384 and +none higher than 49,152. That means only half the range is actually needed, all +the float interpretations are between -0.5f and 0.5f, so if we want to compress +the data into a quint8 output, we can use that range rather than the theoretical +-1.0f to 1.0f that is suggested by the input min and max. + +In practice, this is most useful for taking output from operations like +QuantizedMatMul that can produce higher bit-depth outputs than their inputs and +may have large potential output ranges, but in practice have a distribution of +input values that only uses a small fraction of the possible range. By feeding +that output into this operator, we can reduce it from 32 bits down to 8 with +minimal loss of accuracy. + +input_min: The float value that the minimum quantized input value represents. +input_max: The float value that the maximum quantized input value represents. +Tinput: The type of the input. +output_min: The float value that the minimum quantized output value represents. +output_max: The float value that the maximum quantized output value represents. +out_type: The type of the output. Should be a lower bit depth than Tinput. + +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/ops/nn_ops.cc b/tensorflow/contrib/quantization/ops/nn_ops.cc new file mode 100644 index 00000000000..720377043de --- /dev/null +++ b/tensorflow/contrib/quantization/ops/nn_ops.cc @@ -0,0 +1,348 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/util/padding.h" + +namespace tensorflow { + +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("QuantizedAvgPool") + .Input("input: T") + .Input("min_input: float") + .Input("max_input: float") + .Output("output: T") + .Output("min_output: float") + .Output("max_output: float") + .Attr("T: quantizedtype") + .Attr("ksize: list(int)") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Produces the average pool of the input tensor for quantized types. + +input: 4-D with shape `[batch, height, width, channels]`. +ksize: The size of the window for each dimension of the input tensor. + The length must be 4 to match the number of dimensions of the input. +strides: The stride of the sliding window for each dimension of the input + tensor. The length must be 4 to match the number of dimensions of the input. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedBiasAdd") + .Input("input: T1") + .Input("bias: T2") + .Input("min_input: float") + .Input("max_input: float") + .Input("min_bias: float") + .Input("max_bias: float") + .Output("output: out_type") + .Output("min_out: float") + .Output("max_out: float") + .Attr("T1: quantizedtype") + .Attr("T2: quantizedtype") + .Attr("out_type: quantizedtype") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Adds Tensor 'bias' to Tensor 'input' for Quantized types. + +Broadcasts the values of bias on dimensions 0..N-2 of 'input'. + +bias: A 1D bias Tensor with size matching the last dimension of 'input'. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_bias: The float value that the lowest quantized bias value represents. +max_bias: The float value that the highest quantized bias value represents. +min_out: The float value that the lowest quantized output value represents. +max_out: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedConv2D") + .Input("input: Tinput") + .Input("filter: Tfilter") + .Input("min_input: float") + .Input("max_input: float") + .Input("min_filter: float") + .Input("max_filter: float") + .Output("output: out_type") + .Output("min_output: float") + .Output("max_output: float") + .Attr("Tinput: quantizedtype") + .Attr("Tfilter: quantizedtype") + .Attr("out_type: quantizedtype = DT_QINT32") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes a 2D convolution given quantized 4D input and filter tensors. +The inputs are quantized tensors where the lowest value represents the real +number of the associated minimum, and the highest represents the maximum. +This means that you can only interpret the quantized output in the same way, by +taking the returned minimum and maximum values into account. + +filter: filter's input_depth dimension must match input's depth dimensions. +strides: The stride of the sliding window for each dimension of the input + tensor. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_filter: The float value that the lowest quantized filter value represents. +max_filter: The float value that the highest quantized filter value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedMaxPool") + .Input("input: T") + .Input("min_input: float") + .Input("max_input: float") + .Output("output: T") + .Output("min_output: float") + .Output("max_output: float") + .Attr("T: quantizedtype") + .Attr("ksize: list(int)") + .Attr("strides: list(int)") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Produces the max pool of the input tensor for quantized types. + +input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +ksize: The size of the window for each dimension of the input tensor. + The length must be 4 to match the number of dimensions of the input. +strides: The stride of the sliding window for each dimension of the input + tensor. The length must be 4 to match the number of dimensions of the input. +padding: The type of padding algorithm to use. +min_input: The float value that the lowest quantized input value represents. +max_input: The float value that the highest quantized input value represents. +min_output: The float value that the lowest quantized output value represents. +max_output: The float value that the highest quantized output value represents. + +)doc"); + +REGISTER_OP("QuantizedRelu") + .Input("features: Tinput") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear: `max(features, 0)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedRelu6") + .Input("features: Tinput") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedReluX") + .Input("features: Tinput") + .Input("max_value: float") + .Input("min_features: float") + .Input("max_features: float") + .Output("activations: out_type") + .Output("min_activations: float") + .Output("max_activations: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` + +activations: Has the same output shape as "features". +min_features: The float value that the lowest quantized value represents. +max_features: The float value that the highest quantized value represents. +min_activations: The float value that the lowest quantized value represents. +max_activations: The float value that the highest quantized value represents. + +)doc"); + +REGISTER_OP("QuantizedBatchNormWithGlobalNormalization") + .Input("t: Tinput") + .Input("t_min: float") + .Input("t_max: float") + .Input("m: Tinput") + .Input("m_min: float") + .Input("m_max: float") + .Input("v: Tinput") + .Input("v_min: float") + .Input("v_max: float") + .Input("beta: Tinput") + .Input("beta_min: float") + .Input("beta_max: float") + .Input("gamma: Tinput") + .Input("gamma_min: float") + .Input("gamma_max: float") + .Output("result: out_type") + .Output("result_min: float") + .Output("result_max: float") + .Attr("Tinput: quantizedtype") + .Attr("out_type: quantizedtype") + .Attr("variance_epsilon: float") + .Attr("scale_after_normalization: bool") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); + + DimensionHandle last_dim = c->Dim(input, 3); + for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma + ShapeHandle vec; + TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec)); + TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim)); + } + + ShapeHandle out; + TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out)); + c->set_output(0, out); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + + return Status::OK(); + }) + .Doc(R"doc( +Quantized Batch normalization. + +This op is deprecated and will be removed in the future. Prefer +`tf.nn.batch_normalization`. + +t: A 4D input Tensor. +t_min: The value represented by the lowest quantized input. +t_max: The value represented by the highest quantized input. +m: A 1D mean Tensor with size matching the last dimension of t. + This is the first output from tf.nn.moments, + or a saved moving average thereof. +m_min: The value represented by the lowest quantized mean. +m_max: The value represented by the highest quantized mean. +v: A 1D variance Tensor with size matching the last dimension of t. + This is the second output from tf.nn.moments, + or a saved moving average thereof. +v_min: The value represented by the lowest quantized variance. +v_max: The value represented by the highest quantized variance. +beta: A 1D beta Tensor with size matching the last dimension of t. + An offset to be added to the normalized tensor. +beta_min: The value represented by the lowest quantized offset. +beta_max: The value represented by the highest quantized offset. +gamma: A 1D gamma Tensor with size matching the last dimension of t. + If "scale_after_normalization" is true, this tensor will be multiplied + with the normalized tensor. +gamma_min: The value represented by the lowest quantized gamma. +gamma_max: The value represented by the highest quantized gamma. +variance_epsilon: A small float number to avoid dividing by 0. +scale_after_normalization: A bool indicating whether the resulted tensor + needs to be multiplied with gamma. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/python/array_ops.py b/tensorflow/contrib/quantization/python/array_ops.py index b873d4df144..2ab65e903e9 100644 --- a/tensorflow/contrib/quantization/python/array_ops.py +++ b/tensorflow/contrib/quantization/python/array_ops.py @@ -19,7 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import -from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops -from tensorflow.python.ops.gen_array_ops import dequantize -from tensorflow.python.ops.gen_array_ops import quantize_v2 -from tensorflow.python.ops.gen_array_ops import quantized_concat +from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops +from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize +from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2 +from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat diff --git a/tensorflow/python/ops/dequantize_op_test.py b/tensorflow/contrib/quantization/python/dequantize_op_test.py similarity index 87% rename from tensorflow/python/ops/dequantize_op_test.py rename to tensorflow/contrib/quantization/python/dequantize_op_test.py index e60c22de638..b1d47cc4a2e 100644 --- a/tensorflow/python/ops/dequantize_op_test.py +++ b/tensorflow/contrib/quantization/python/dequantize_op_test.py @@ -21,16 +21,24 @@ from __future__ import print_function import numpy as np import tensorflow as tf +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + class DequantizeOpTest(tf.test.TestCase): def __init__(self, method_name="runTest"): super(DequantizeOpTest, self).__init__(method_name) + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() def _testDequantizeOp(self, inputs, min_range, max_range, dtype): with self.test_session(): input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype) - dequantized = tf.dequantize( + dequantized = tf.contrib.quantization.dequantize( input_op, min_range, max_range) tf_ans = dequantized.eval() diff --git a/tensorflow/contrib/quantization/python/math_ops.py b/tensorflow/contrib/quantization/python/math_ops.py index d863cdad26f..d4fabbd36bd 100644 --- a/tensorflow/contrib/quantization/python/math_ops.py +++ b/tensorflow/contrib/quantization/python/math_ops.py @@ -19,7 +19,10 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.quantization.ops import gen_math_ops +from tensorflow.contrib.quantization.ops.gen_math_ops import * from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops.gen_math_ops import * + + +ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/quantization/python/nn_ops.py b/tensorflow/contrib/quantization/python/nn_ops.py index fd28423317a..d31f1d4e686 100644 --- a/tensorflow/contrib/quantization/python/nn_ops.py +++ b/tensorflow/contrib/quantization/python/nn_ops.py @@ -19,7 +19,17 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.quantization.ops import gen_nn_ops +from tensorflow.contrib.quantization.ops.gen_nn_ops import * from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_nn_ops -from tensorflow.python.ops.gen_nn_ops import * + + +ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py similarity index 87% rename from tensorflow/python/ops/quantized_conv_ops_test.py rename to tensorflow/contrib/quantization/python/quantized_conv_ops_test.py index f98dd39f091..9b24d4129de 100644 --- a/tensorflow/python/ops/quantized_conv_ops_test.py +++ b/tensorflow/contrib/quantization/python/quantized_conv_ops_test.py @@ -21,11 +21,19 @@ from __future__ import print_function import numpy as np import tensorflow as tf +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + class Conv2DTest(tf.test.TestCase): def __init__(self, method_name="runTest"): super(Conv2DTest, self).__init__(method_name) + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected): @@ -59,16 +67,16 @@ class Conv2DTest(tf.test.TestCase): with self.test_session(use_gpu=False) as sess: t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8) t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8) - conv = tf.nn.quantized_conv2d(t1, - t2, - out_type=tf.qint32, - strides=[1, stride, - stride, 1], - padding=padding, - min_input=x1_min, - max_input=x1_max, - min_filter=x2_min, - max_filter=x2_max) + conv = tf.contrib.quantization.quantized_conv2d(t1, + t2, + out_type=tf.qint32, + strides=[1, stride, + stride, 1], + padding=padding, + min_input=x1_min, + max_input=x1_max, + min_filter=x2_min, + max_filter=x2_max) value = sess.run(conv) quantized_output = value[0] output_min = value[1] diff --git a/tensorflow/tools/quantization/BUILD b/tensorflow/contrib/quantization/tools/BUILD similarity index 78% rename from tensorflow/tools/quantization/BUILD rename to tensorflow/contrib/quantization/tools/BUILD index 5d8115eefc8..82a13e04d64 100644 --- a/tensorflow/tools/quantization/BUILD +++ b/tensorflow/contrib/quantization/tools/BUILD @@ -13,6 +13,9 @@ py_library( srcs_version = "PY2AND3", deps = [ "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", "//tensorflow/python:platform", ], ) @@ -23,6 +26,9 @@ py_binary( srcs_version = "PY2AND3", deps = [ "//tensorflow:tensorflow_py", + "//tensorflow/contrib/quantization:ops", + "//tensorflow/contrib/quantization:quantized_ops_py", + "//tensorflow/contrib/quantization/kernels:quantized_kernels_py", "//tensorflow/python:platform", ], ) diff --git a/tensorflow/tools/quantization/graph_to_dot.py b/tensorflow/contrib/quantization/tools/graph_to_dot.py similarity index 100% rename from tensorflow/tools/quantization/graph_to_dot.py rename to tensorflow/contrib/quantization/tools/graph_to_dot.py diff --git a/tensorflow/tools/quantization/quantize_graph.py b/tensorflow/contrib/quantization/tools/quantize_graph.py similarity index 98% rename from tensorflow/tools/quantization/quantize_graph.py rename to tensorflow/contrib/quantization/tools/quantize_graph.py index 501a907993f..5ded5566915 100644 --- a/tensorflow/tools/quantization/quantize_graph.py +++ b/tensorflow/contrib/quantization/tools/quantize_graph.py @@ -35,6 +35,13 @@ import tensorflow as tf from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_util +# TODO(petewarden) - Remove this ugly hack to get around Python linking problems +# with Bazel. +# pylint: disable=g-bad-import-order +from tensorflow.contrib.quantization import load_quantized_ops_so +from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so + + flags = tf.app.flags FLAGS = flags.FLAGS @@ -53,6 +60,8 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3", """ graph loaded from a file.""") flags.DEFINE_boolean("strip_redundant_quantization", True, """Removes redundant dequantize/quantize pairs.""") +flags.DEFINE_boolean("load_quantization_so", True, + """Explicitly load the quantization ops library""") def print_input_nodes(current_node, nodes_map, indent, already_visited): @@ -241,7 +250,7 @@ def quantize_weight_eightbit(input_node, quantization_mode): sess = tf.Session() with sess.as_default(): - quantize_op = tf.quantize_v2( + quantize_op = tf.contrib.quantization.python.quantize_v2( float_tensor, min_value, max_value, @@ -281,6 +290,9 @@ class GraphRewriter(object): self.nodes_map = self.create_nodes_map(input_graph) self.output_graph = None self.mode = mode + if FLAGS.load_quantization_so: + load_quantized_ops_so.Load() + load_quantized_kernels_so.Load() def create_nodes_map(self, graph): """Builds a mapping of node names to their defs from the graph.""" diff --git a/tensorflow/tools/quantization/quantize_graph_test.py b/tensorflow/contrib/quantization/tools/quantize_graph_test.py similarity index 99% rename from tensorflow/tools/quantization/quantize_graph_test.py rename to tensorflow/contrib/quantization/tools/quantize_graph_test.py index a45c68df05a..4826ea26896 100644 --- a/tensorflow/tools/quantization/quantize_graph_test.py +++ b/tensorflow/contrib/quantization/tools/quantize_graph_test.py @@ -20,11 +20,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - import numpy as np + import tensorflow as tf +from tensorflow.contrib.quantization.tools import quantize_graph from tensorflow.python.framework import graph_util -from tensorflow.tools.quantization import quantize_graph flags = tf.app.flags FLAGS = flags.FLAGS diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 243601c6e48..e2774069df5 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -487,7 +487,6 @@ cc_library( "//tensorflow/core/kernels:nn", "//tensorflow/core/kernels:parameterized_truncated_normal_op", "//tensorflow/core/kernels:parsing", - "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:random_ops", "//tensorflow/core/kernels:required", "//tensorflow/core/kernels:sdca_ops", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 306f61e5ec3..f391aa9e3f5 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -29,7 +29,6 @@ load( "tf_cc_tests", "tf_copts", "tf_cuda_library", - "tf_custom_op_library", "tf_opts_nortti_if_android", "tf_kernel_libraries", "tf_kernel_library", @@ -2163,7 +2162,6 @@ filegroup( srcs = [ ":android_extended_ops_group1", ":android_extended_ops_group2", - ":android_quantized_ops", ], visibility = ["//visibility:public"], ) @@ -2293,26 +2291,6 @@ filegroup( ], ) -filegroup( - name = "android_quantized_ops", - srcs = [ - "dequantize_op.cc", - "quantization_utils.cc", - "quantization_utils.h", - "quantize_down_and_shrink_range.cc", - "quantize_op.cc", - "quantized_activation_ops.cc", - "quantized_batch_norm_op.cc", - "quantized_bias_add_op.cc", - "quantized_concat_op.cc", - "quantized_conv_ops.cc", - "quantized_matmul_op.cc", - "quantized_pooling_ops.cc", - "reference_gemm.h", - ], - visibility = ["//visibility:public"], -) - # A file group which contains nearly all available operators which # may work on Android. This is intended to be used with selective # registration. @@ -2370,7 +2348,6 @@ cc_library( "//tensorflow:android": [ "//tensorflow/core/kernels:android_core_ops", "//tensorflow/core/kernels:android_extended_ops", - "@gemmlowp//:eight_bit_int_gemm_sources", ], "//conditions:default": [], }), @@ -2388,264 +2365,6 @@ cc_library( alwayslink = 1, ) -# Quantization-specific OpKernels - -tf_kernel_library( - name = "quantized_ops", - srcs = [ - "dequantize_op.cc", - "quantization_utils.cc", - "quantize_down_and_shrink_range.cc", - "quantize_op.cc", - "quantized_activation_ops.cc", - "quantized_batch_norm_op.cc", - "quantized_bias_add_op.cc", - "quantized_concat_op.cc", - "quantized_conv_ops.cc", - "quantized_matmul_op.cc", - "quantized_pooling_ops.cc", - ], - hdrs = [ - "quantization_utils.h", - "reference_gemm.h", - ], - deps = [ - "//tensorflow/core", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core/kernels:concat_lib_hdrs", - "//tensorflow/core/kernels:conv_ops", - "//tensorflow/core/kernels:eigen_helpers", - "//tensorflow/core/kernels:ops_util", - "//tensorflow/core/kernels:pooling_ops", - "//third_party/eigen3", - "@gemmlowp//:eight_bit_int_gemm", - ], -) - -tf_custom_op_library( - name = "_quantized_kernels.so", - srcs = [ - "dequantize_op.cc", - "quantization_utils.cc", - "quantization_utils.h", - "quantize_down_and_shrink_range.cc", - "quantize_op.cc", - "quantized_activation_ops.cc", - "quantized_batch_norm_op.cc", - "quantized_bias_add_op.cc", - "quantized_concat_op.cc", - "quantized_conv_ops.cc", - "quantized_matmul_op.cc", - "quantized_pooling_ops.cc", - "reference_gemm.h", - ], - deps = [ - "//tensorflow/core/kernels:concat_lib_hdrs", - "//tensorflow/core/kernels:ops_util_hdrs", - "//tensorflow/core/kernels:pooling_ops_hdrs", - "@gemmlowp//:eight_bit_int_gemm", - ], -) - -tf_cc_test( - name = "quantize_down_and_shrink_range_op_test", - size = "small", - srcs = ["quantize_down_and_shrink_range_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantization_utils_test", - srcs = ["quantization_utils_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:core_cpu_internal", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//third_party/eigen3", - ], -) - -tf_cc_test( - name = "quantized_activation_ops_test", - srcs = ["quantized_activation_ops_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_bias_add_op_test", - size = "small", - srcs = ["quantized_bias_add_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_conv_ops_test", - size = "small", - srcs = ["quantized_conv_ops_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantize_op_test", - size = "small", - srcs = ["quantize_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_matmul_op_test", - size = "small", - srcs = ["quantized_matmul_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_pooling_ops_test", - size = "small", - srcs = ["quantized_pooling_ops_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_concat_op_test", - size = "small", - srcs = ["quantized_concat_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_cc_test( - name = "quantized_batch_norm_op_test", - size = "small", - srcs = ["quantized_batch_norm_op_test.cc"], - deps = [ - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu_internal", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:batch_norm_op", - "//tensorflow/core/kernels:ops_testutil", - "//third_party/eigen3", - ], -) - # ----------------------------------------------------------------------------- # Google-internal targets. These must be at the end for syncrepo. diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 8156900cdcd..12e50ce84ec 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -4030,176 +4030,6 @@ debug_urls: List of URLs to debug targets, e.g., file:///foo/tfdbg_dump, grpc:://localhost:11011 )doc"); -REGISTER_OP("QuantizeV2") - .Input("input: float") - .Input("min_range: float") - .Input("max_range: float") - .Output("output: T") - .Output("output_min: float") - .Output("output_max: float") - .Attr("T: quantizedtype") - .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. - -[min_range, max_range] are scalar floats that specify the range for -the 'input' data. The 'mode' attribute controls exactly which calculations are -used to convert the float values to their quantized equivalents. - -In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: - -``` -out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -if T == qint8, out[i] -= (range(T) + 1) / 2.0 -``` -here `range(T) = numeric_limits::max() - numeric_limits::min()` - -*MIN_COMBINED Mode Example* - -Assume the input is type float and has a possible range of [0.0, 6.0] and the -output type is quint8 ([0, 255]). The min_range and max_range values should be -specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -value of the input by 255/6 and cast to quint8. - -If the output type was qint8 ([-128, 127]), the operation will additionally -subtract each value by 128 prior to casting, so that the range of values aligns -with the range of qint8. - -If the mode is 'MIN_FIRST', then this approach is used: - -``` -number_of_steps = 1 << (# of bits in T) -range_adjust = number_of_steps / (number_of_steps - 1) -range = (range_max - range_min) * range_adjust -range_scale = number_of_steps / range -quantized = round(input * range_scale) - round(range_min * range_scale) + - numeric_limits::min() -quantized = max(quantized, numeric_limits::min()) -quantized = min(quantized, numeric_limits::max()) -``` - -The biggest difference between this and MIN_COMBINED is that the minimum range -is rounded first, before it's subtracted from the rounded value. With -MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -and dequantizing will introduce a larger and larger error. - -One thing to watch out for is that the operator may choose to adjust the -requested minimum and maximum values slightly during the quantization process, -so you should always use the output ports as the range for further calculations. -For example, if the requested minimum and maximum values are close to equal, -they will be separated by a small epsilon value to prevent ill-formed quantized -buffers from being created. Otherwise, you can end up with buffers where all the -quantized values map to the same float value, which causes problems for -operations that have to perform further calculations on them. - -min_range: The minimum scalar value possibly produced for the input. -max_range: The maximum scalar value possibly produced for the input. -output: The quantized data produced from the float input. -output_min: The actual minimum scalar value used for the output. -output_max: The actual maximum scalar value used for the output. - -)doc"); - -REGISTER_OP("Dequantize") - .Input("input: T") - .Input("min_range: float") - .Input("max_range: float") - .Output("output: float") - .Attr("T: quantizedtype") - .Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - return Status::OK(); - }) - .Doc(R"doc( -Dequantize the 'input' tensor into a float Tensor. - -[min_range, max_range] are scalar floats that specify the range for -the 'input' data. The 'mode' attribute controls exactly which calculations are -used to convert the float values to their quantized equivalents. - -In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: - -``` -if T == qint8, in[i] += (range(T) + 1)/ 2.0 -out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) -``` -here `range(T) = numeric_limits::max() - numeric_limits::min()` - -*MIN_COMBINED Mode Example* - -If the input comes from a QuantizedRelu6, the output type is -quint8 (range of 0-255) but the possible range of QuantizedRelu6 is -0-6. The min_range and max_range values are therefore 0.0 and 6.0. -Dequantize on quint8 will take each value, cast to float, and multiply -by 6 / 255. -Note that if quantizedtype is qint8, the operation will additionally add -each value by 128 prior to casting. - -If the mode is 'MIN_FIRST', then this approach is used: - -``` -number_of_steps = 1 << (# of bits in T) -range_adjust = number_of_steps / (number_of_steps - 1) -range = (range_max - range_min) * range_adjust -range_scale = range / number_of_steps -const double offset_input = static_cast(input) - lowest_quantized; -result = range_min + ((input - numeric_limits::min()) * range_scale) -``` - -min_range: The minimum scalar value possibly produced for the input. -max_range: The maximum scalar value possibly produced for the input. - -)doc"); - -REGISTER_OP("QuantizedConcat") - .Input("concat_dim: int32") - .Input("values: N * T") - .Input("input_mins: N * float32") - .Input("input_maxes: N * float32") - .Output("output: T") - .Output("output_min: float") - .Output("output_max: float") - .Attr("N: int >= 2") - .Attr("T: type") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c)); - ShapeHandle unused; - for (int i = std::max(0, c->num_inputs() - 2); i < c->num_inputs(); ++i) { - TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); - } - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Concatenates quantized tensors along one dimension. - -concat_dim: 0-D. The dimension along which to concatenate. Must be in the - range [0, rank(values)). -values: The `N` Tensors to concatenate. Their ranks and types must match, - and their sizes must match in all dimensions except `concat_dim`. -input_mins: The minimum scalar values for each of the input tensors. -input_maxes: The maximum scalar values for each of the input tensors. -output_min: The float value that the minimum quantized output value represents. -output_max: The float value that the maximum quantized output value represents. -output: A `Tensor` with the concatenation of values stacked along the - `concat_dim` dimension. This tensor's shape matches that of `values` except - in `concat_dim` where it has the sum of the sizes. -)doc"); - // Deprecated op registrations: // The following can be deleted after 10mar2017. diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 905a2b6a2d4..0034301690b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -2096,106 +2096,6 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0] ``` )doc"); -REGISTER_OP("QuantizedMatMul") - .Input("a: T1") - .Input("b: T2") - .Input("min_a: float") - .Input("max_a: float") - .Input("min_b: float") - .Input("max_b: float") - .Output("out: Toutput") - .Output("min_out: float") - .Output("max_out: float") - .Attr("T1: quantizedtype") - .Attr("T2: quantizedtype") - .Attr("Toutput: quantizedtype = DT_QINT32") - .Attr("transpose_a: bool = false") - .Attr("transpose_b: bool = false") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Perform a quantized matrix multiplication of `a` by the matrix `b`. - -The inputs must be two-dimensional matrices and the inner dimension of -`a` (after being transposed if `transpose_a` is non-zero) must match the -outer dimension of `b` (after being transposed if `transposed_b` is -non-zero). - -a: Must be a two-dimensional tensor. -b: Must be a two-dimensional tensor. -transpose_a: If true, `a` is transposed before multiplication. -transpose_b: If true, `b` is transposed before multiplication. -min_a: The float value that the lowest quantized `a` value represents. -max_a: The float value that the highest quantized `a` value represents. -min_b: The float value that the lowest quantized `b` value represents. -max_b: The float value that the highest quantized `b` value represents. -min_out: The float value that the lowest quantized output value represents. -max_out: The float value that the highest quantized output value represents. - -)doc"); - -REGISTER_OP("QuantizeDownAndShrinkRange") - .Input("input: Tinput") - .Input("input_min: float") - .Input("input_max: float") - .Output("output: out_type") - .Output("output_min: float") - .Output("output_max: float") - .Attr("Tinput: quantizedtype") - .Attr("out_type: quantizedtype") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Convert the quantized 'input' tensor into a lower-precision 'output', using the -actual distribution of the values to maximize the usage of the lower bit depth -and adjusting the output min and max ranges accordingly. - -[input_min, input_max] are scalar floats that specify the range for the float -interpretation of the 'input' data. For example, if input_min is -1.0f and -input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. - -This operator tries to squeeze as much precision as possible into an output with -a lower bit depth by calculating the actual min and max values found in the -data. For example, maybe that quint16 input has no values lower than 16,384 and -none higher than 49,152. That means only half the range is actually needed, all -the float interpretations are between -0.5f and 0.5f, so if we want to compress -the data into a quint8 output, we can use that range rather than the theoretical --1.0f to 1.0f that is suggested by the input min and max. - -In practice, this is most useful for taking output from operations like -QuantizedMatMul that can produce higher bit-depth outputs than their inputs and -may have large potential output ranges, but in practice have a distribution of -input values that only uses a small fraction of the possible range. By feeding -that output into this operator, we can reduce it from 32 bits down to 8 with -minimal loss of accuracy. - -input_min: The float value that the minimum quantized input value represents. -input_max: The float value that the maximum quantized input value represents. -Tinput: The type of the input. -output_min: The float value that the minimum quantized output value represents. -output_max: The float value that the maximum quantized output value represents. -out_type: The type of the output. Should be a lower bit depth than Tinput. - -)doc"); - // Deprecated ops: REGISTER_OP("BatchFFT") .Input("input: complex64") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index a3f24d39984..0b5d1596675 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1962,324 +1962,4 @@ overlapping: When set to True, it means when pooling, the values at the boundary output: 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. )doc"); -REGISTER_OP("QuantizedAvgPool") - .Input("input: T") - .Input("min_input: float") - .Input("max_input: float") - .Output("output: T") - .Output("min_output: float") - .Output("max_output: float") - .Attr("T: quantizedtype") - .Attr("ksize: list(int)") - .Attr("strides: list(int)") - .Attr(GetPaddingAttrString()) - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Produces the average pool of the input tensor for quantized types. - -input: 4-D with shape `[batch, height, width, channels]`. -ksize: The size of the window for each dimension of the input tensor. - The length must be 4 to match the number of dimensions of the input. -strides: The stride of the sliding window for each dimension of the input - tensor. The length must be 4 to match the number of dimensions of the input. -padding: The type of padding algorithm to use. -min_input: The float value that the lowest quantized input value represents. -max_input: The float value that the highest quantized input value represents. -min_output: The float value that the lowest quantized output value represents. -max_output: The float value that the highest quantized output value represents. - -)doc"); - -REGISTER_OP("QuantizedBiasAdd") - .Input("input: T1") - .Input("bias: T2") - .Input("min_input: float") - .Input("max_input: float") - .Input("min_bias: float") - .Input("max_bias: float") - .Output("output: out_type") - .Output("min_out: float") - .Output("max_out: float") - .Attr("T1: quantizedtype") - .Attr("T2: quantizedtype") - .Attr("out_type: quantizedtype") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Adds Tensor 'bias' to Tensor 'input' for Quantized types. - -Broadcasts the values of bias on dimensions 0..N-2 of 'input'. - -bias: A 1D bias Tensor with size matching the last dimension of 'input'. -min_input: The float value that the lowest quantized input value represents. -max_input: The float value that the highest quantized input value represents. -min_bias: The float value that the lowest quantized bias value represents. -max_bias: The float value that the highest quantized bias value represents. -min_out: The float value that the lowest quantized output value represents. -max_out: The float value that the highest quantized output value represents. - -)doc"); - -REGISTER_OP("QuantizedConv2D") - .Input("input: Tinput") - .Input("filter: Tfilter") - .Input("min_input: float") - .Input("max_input: float") - .Input("min_filter: float") - .Input("max_filter: float") - .Output("output: out_type") - .Output("min_output: float") - .Output("max_output: float") - .Attr("Tinput: quantizedtype") - .Attr("Tfilter: quantizedtype") - .Attr("out_type: quantizedtype = DT_QINT32") - .Attr("strides: list(int)") - .Attr(GetPaddingAttrString()) - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Computes a 2D convolution given quantized 4D input and filter tensors. -The inputs are quantized tensors where the lowest value represents the real -number of the associated minimum, and the highest represents the maximum. -This means that you can only interpret the quantized output in the same way, by -taking the returned minimum and maximum values into account. - -filter: filter's input_depth dimension must match input's depth dimensions. -strides: The stride of the sliding window for each dimension of the input - tensor. -padding: The type of padding algorithm to use. -min_input: The float value that the lowest quantized input value represents. -max_input: The float value that the highest quantized input value represents. -min_filter: The float value that the lowest quantized filter value represents. -max_filter: The float value that the highest quantized filter value represents. -min_output: The float value that the lowest quantized output value represents. -max_output: The float value that the highest quantized output value represents. - -)doc"); - -REGISTER_OP("QuantizedMaxPool") - .Input("input: T") - .Input("min_input: float") - .Input("max_input: float") - .Output("output: T") - .Output("min_output: float") - .Output("max_output: float") - .Attr("T: quantizedtype") - .Attr("ksize: list(int)") - .Attr("strides: list(int)") - .Attr(GetPaddingAttrString()) - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Produces the max pool of the input tensor for quantized types. - -input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -ksize: The size of the window for each dimension of the input tensor. - The length must be 4 to match the number of dimensions of the input. -strides: The stride of the sliding window for each dimension of the input - tensor. The length must be 4 to match the number of dimensions of the input. -padding: The type of padding algorithm to use. -min_input: The float value that the lowest quantized input value represents. -max_input: The float value that the highest quantized input value represents. -min_output: The float value that the lowest quantized output value represents. -max_output: The float value that the highest quantized output value represents. - -)doc"); - -REGISTER_OP("QuantizedRelu") - .Input("features: Tinput") - .Input("min_features: float") - .Input("max_features: float") - .Output("activations: out_type") - .Output("min_activations: float") - .Output("max_activations: float") - .Attr("Tinput: quantizedtype") - .Attr("out_type: quantizedtype = DT_QUINT8") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Computes Quantized Rectified Linear: `max(features, 0)` - -activations: Has the same output shape as "features". -min_features: The float value that the lowest quantized value represents. -max_features: The float value that the highest quantized value represents. -min_activations: The float value that the lowest quantized value represents. -max_activations: The float value that the highest quantized value represents. - -)doc"); - -REGISTER_OP("QuantizedRelu6") - .Input("features: Tinput") - .Input("min_features: float") - .Input("max_features: float") - .Output("activations: out_type") - .Output("min_activations: float") - .Output("max_activations: float") - .Attr("Tinput: quantizedtype") - .Attr("out_type: quantizedtype = DT_QUINT8") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` - -activations: Has the same output shape as "features". -min_features: The float value that the lowest quantized value represents. -max_features: The float value that the highest quantized value represents. -min_activations: The float value that the lowest quantized value represents. -max_activations: The float value that the highest quantized value represents. - -)doc"); - -REGISTER_OP("QuantizedReluX") - .Input("features: Tinput") - .Input("max_value: float") - .Input("min_features: float") - .Input("max_features: float") - .Output("activations: out_type") - .Output("min_activations: float") - .Output("max_activations: float") - .Attr("Tinput: quantizedtype") - .Attr("out_type: quantizedtype = DT_QUINT8") - .SetShapeFn([](InferenceContext* c) { - TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); - ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - return Status::OK(); - }) - .Doc(R"doc( -Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` - -activations: Has the same output shape as "features". -min_features: The float value that the lowest quantized value represents. -max_features: The float value that the highest quantized value represents. -min_activations: The float value that the lowest quantized value represents. -max_activations: The float value that the highest quantized value represents. - -)doc"); - -REGISTER_OP("QuantizedBatchNormWithGlobalNormalization") - .Input("t: Tinput") - .Input("t_min: float") - .Input("t_max: float") - .Input("m: Tinput") - .Input("m_min: float") - .Input("m_max: float") - .Input("v: Tinput") - .Input("v_min: float") - .Input("v_max: float") - .Input("beta: Tinput") - .Input("beta_min: float") - .Input("beta_max: float") - .Input("gamma: Tinput") - .Input("gamma_min: float") - .Input("gamma_max: float") - .Output("result: out_type") - .Output("result_min: float") - .Output("result_max: float") - .Attr("Tinput: quantizedtype") - .Attr("out_type: quantizedtype") - .Attr("variance_epsilon: float") - .Attr("scale_after_normalization: bool") - .SetShapeFn([](InferenceContext* c) { - ShapeHandle input; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); - - DimensionHandle last_dim = c->Dim(input, 3); - for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma - ShapeHandle vec; - TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec)); - TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim)); - } - - ShapeHandle out; - TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out)); - c->set_output(0, out); - c->set_output(1, c->Scalar()); - c->set_output(2, c->Scalar()); - - return Status::OK(); - }) - .Doc(R"doc( -Quantized Batch normalization. - -This op is deprecated and will be removed in the future. Prefer -`tf.nn.batch_normalization`. - -t: A 4D input Tensor. -t_min: The value represented by the lowest quantized input. -t_max: The value represented by the highest quantized input. -m: A 1D mean Tensor with size matching the last dimension of t. - This is the first output from tf.nn.moments, - or a saved moving average thereof. -m_min: The value represented by the lowest quantized mean. -m_max: The value represented by the highest quantized mean. -v: A 1D variance Tensor with size matching the last dimension of t. - This is the second output from tf.nn.moments, - or a saved moving average thereof. -v_min: The value represented by the lowest quantized variance. -v_max: The value represented by the highest quantized variance. -beta: A 1D beta Tensor with size matching the last dimension of t. - An offset to be added to the normalized tensor. -beta_min: The value represented by the lowest quantized offset. -beta_max: The value represented by the highest quantized offset. -gamma: A 1D gamma Tensor with size matching the last dimension of t. - If "scale_after_normalization" is true, this tensor will be multiplied - with the normalized tensor. -gamma_min: The value represented by the lowest quantized gamma. -gamma_max: The value represented by the highest quantized gamma. -variance_epsilon: A small float number to avoid dividing by 0. -scale_after_normalization: A bool indicating whether the resulted tensor - needs to be multiplied with gamma. -)doc"); - } // namespace tensorflow diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 7ba6206bc25..fe2d56eec3a 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2100,33 +2100,6 @@ py_binary( ], ) -# ----------------------------------------------------------------------------- -# Quantization - -py_test( - name = "dequantize_op_test", - size = "small", - srcs = ["ops/dequantize_op_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":ops", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - ], -) - -py_test( - name = "quantized_conv_ops_test", - size = "small", - srcs = ["ops/quantized_conv_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":ops", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 02370ab4ede..8548c78faf4 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -74,9 +74,6 @@ or join multiple tensors together. @@boolean_mask @@one_hot @@sequence_mask -@@dequantize -@@quantize_v2 -@@quantized_concat """ from __future__ import absolute_import @@ -2288,9 +2285,3 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None): return result else: return gen_math_ops.cast(result, dtype) - - -# TODO(cwhipkey): Verify and enable shape functions for these. -ops.RegisterShape("QuantizeV2")(None) -ops.RegisterShape("QuantizedBatchNormWithGlobalNormalization")(None) -ops.RegisterShape("QuantizedConcat")(None) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index ddf707ed83c..d5f51dee71e 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1910,6 +1910,3 @@ def reduced_shape(input_shape, axes): axes], # [1, 2] [input_shape, # [2, 3, 5, 7] array_ops.fill(axes_shape, 1)]) # [1, 1] - - -ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 2467a8ac66d..4eaaa8bf502 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1364,14 +1364,4 @@ def erosion2d(value, kernel, strides, rates, padding, name=None): padding=padding, name=name)) - -ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn) -ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn) - # pylint: enable=invalid-name diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 84db0002fbc..70fb6455c85 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -136,8 +136,7 @@ def if_not_mobile(a): }) def tf_copts(): - return (["-fno-exceptions", - "-DEIGEN_AVOID_STL_ARRAY"] + + return (["-fno-exceptions", "-DEIGEN_AVOID_STL_ARRAY"] + if_cuda(["-DGOOGLE_CUDA=1"]) + if_android_arm(["-mfpu=neon"]) + select({"//tensorflow:android": [ @@ -145,7 +144,6 @@ def tf_copts(): "-DMIN_LOG_LEVEL=0", "-DTF_LEAN_BINARY", "-O2", - "-Iexternal/gemmlowp", ], "//tensorflow:darwin": [], "//tensorflow:ios": ["-std=c++11",],