Move contrib/quantization ops to tensorflow/core
Change: 136410307
This commit is contained in:
parent
9b8ff3f50c
commit
66024fd508
@ -108,10 +108,6 @@ filegroup(
|
||||
"//tensorflow/contrib/metrics/kernels:all_files",
|
||||
"//tensorflow/contrib/ndlstm:all_files",
|
||||
"//tensorflow/contrib/opt:all_files",
|
||||
"//tensorflow/contrib/quantization:all_files",
|
||||
"//tensorflow/contrib/quantization/kernels:all_files",
|
||||
"//tensorflow/contrib/quantization/kernels/hexagon:all_files",
|
||||
"//tensorflow/contrib/quantization/tools:all_files",
|
||||
"//tensorflow/contrib/rnn:all_files",
|
||||
"//tensorflow/contrib/session_bundle:all_files",
|
||||
"//tensorflow/contrib/session_bundle/example:all_files",
|
||||
@ -133,6 +129,7 @@ filegroup(
|
||||
"//tensorflow/core/distributed_runtime:all_files",
|
||||
"//tensorflow/core/distributed_runtime/rpc:all_files",
|
||||
"//tensorflow/core/kernels:all_files",
|
||||
"//tensorflow/core/kernels/hexagon:all_files",
|
||||
"//tensorflow/core/ops/compat:all_files",
|
||||
"//tensorflow/core/platform/cloud:all_files",
|
||||
"//tensorflow/core/platform/default/build_config:all_files",
|
||||
@ -180,6 +177,7 @@ filegroup(
|
||||
"//tensorflow/tools/docs:all_files",
|
||||
"//tensorflow/tools/git:all_files",
|
||||
"//tensorflow/tools/proto_text:all_files",
|
||||
"//tensorflow/tools/quantization:all_files",
|
||||
"//tensorflow/tools/test:all_files",
|
||||
"//tensorflow/user_ops:all_files",
|
||||
"//third_party/hadoop:all_files",
|
||||
|
@ -60,6 +60,7 @@ include(gif)
|
||||
include(png)
|
||||
include(jpeg)
|
||||
include(eigen)
|
||||
include(gemmlowp)
|
||||
include(jsoncpp)
|
||||
include(farmhash)
|
||||
include(highwayhash)
|
||||
@ -88,6 +89,7 @@ include_directories(
|
||||
${png_INCLUDE_DIR}
|
||||
${jpeg_INCLUDE_DIR}
|
||||
${eigen_INCLUDE_DIRS}
|
||||
${gemmlowp_INCLUDE_DIR}
|
||||
${jsoncpp_INCLUDE_DIR}
|
||||
${farmhash_INCLUDE_DIR}
|
||||
${highwayhash_INCLUDE_DIR}
|
||||
|
15
tensorflow/contrib/cmake/external/gemmlowp.cmake
vendored
Normal file
15
tensorflow/contrib/cmake/external/gemmlowp.cmake
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
include (ExternalProject)
|
||||
|
||||
set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
|
||||
set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
|
||||
set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
|
||||
|
||||
ExternalProject_Add(gemmlowp
|
||||
PREFIX gemmlowp
|
||||
URL ${gemmlowp_URL}
|
||||
URL_HASH ${gemmlowp_HASH}
|
||||
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
|
||||
BUILD_IN_SOURCE 1
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
|
||||
INSTALL_COMMAND "")
|
3
tensorflow/contrib/cmake/patches/gemmlowp/CMakeLists.txt
Normal file
3
tensorflow/contrib/cmake/patches/gemmlowp/CMakeLists.txt
Normal file
@ -0,0 +1,3 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
|
||||
project(gemmlowp)
|
@ -73,6 +73,7 @@ HOST_INCLUDES := \
|
||||
-I. \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(HOST_GENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
|
||||
@ -146,6 +147,7 @@ INCLUDES := \
|
||||
-I. \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
@ -240,6 +242,7 @@ ifeq ($(TARGET),ANDROID)
|
||||
-I. \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/gen/protobuf/include \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
|
@ -141,6 +141,17 @@ tensorflow/core/kernels/batch_norm_op.cc
|
||||
tensorflow/core/kernels/avgpooling_op.cc
|
||||
tensorflow/core/kernels/argmax_op.cc
|
||||
tensorflow/core/kernels/aggregate_ops.cc
|
||||
tensorflow/core/kernels/dequantize_op.cc
|
||||
tensorflow/core/kernels/quantization_utils.cc
|
||||
tensorflow/core/kernels/quantize_down_and_shrink_range.cc
|
||||
tensorflow/core/kernels/quantize_op.cc
|
||||
tensorflow/core/kernels/quantized_activation_ops.cc
|
||||
tensorflow/core/kernels/quantized_batch_norm_op.cc
|
||||
tensorflow/core/kernels/quantized_bias_add_op.cc
|
||||
tensorflow/core/kernels/quantized_concat_op.cc
|
||||
tensorflow/core/kernels/quantized_conv_ops.cc
|
||||
tensorflow/core/kernels/quantized_matmul_op.cc
|
||||
tensorflow/core/kernels/quantized_pooling_ops.cc
|
||||
tensorflow/core/ops/training_ops.cc
|
||||
tensorflow/core/ops/string_ops.cc
|
||||
tensorflow/core/ops/state_ops.cc
|
||||
|
@ -13,53 +13,6 @@ load(
|
||||
"tf_custom_op_library",
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cc_array_ops",
|
||||
srcs = ["ops/array_ops.cc"],
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//tensorflow/core:framework",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cc_math_ops",
|
||||
srcs = ["ops/math_ops.cc"],
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//tensorflow/core:framework",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cc_nn_ops",
|
||||
srcs = ["ops/nn_ops.cc"],
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//tensorflow/core:framework",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cc_ops",
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
":cc_array_ops",
|
||||
":cc_math_ops",
|
||||
":cc_nn_ops",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "android_ops",
|
||||
srcs = glob(["ops/*.cc"]),
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "quantization_py",
|
||||
srcs = [
|
||||
@ -69,8 +22,6 @@ py_library(
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":ops",
|
||||
"//tensorflow/contrib/quantization:quantized_ops_py",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
|
||||
],
|
||||
)
|
||||
|
||||
@ -83,52 +34,9 @@ py_library(
|
||||
],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":array_ops",
|
||||
":math_ops",
|
||||
":nn_ops",
|
||||
],
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_py(
|
||||
name = "array_ops",
|
||||
deps = ["//tensorflow/contrib/quantization:cc_array_ops"],
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_py(
|
||||
name = "math_ops",
|
||||
deps = ["//tensorflow/contrib/quantization:cc_math_ops"],
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_py(
|
||||
name = "nn_ops",
|
||||
deps = ["//tensorflow/contrib/quantization:cc_nn_ops"],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "dequantize_op_test",
|
||||
size = "small",
|
||||
srcs = ["python/dequantize_op_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":ops",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/quantization:quantized_ops_py",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "quantized_conv_ops_test",
|
||||
size = "small",
|
||||
srcs = ["python/quantized_conv_ops_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":ops",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/quantization:quantized_ops_py",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
"//tensorflow/python:array_ops",
|
||||
"//tensorflow/python:math_ops",
|
||||
"//tensorflow/python:nn_ops",
|
||||
],
|
||||
)
|
||||
|
||||
@ -139,24 +47,6 @@ filegroup(
|
||||
]),
|
||||
)
|
||||
|
||||
tf_custom_op_library(
|
||||
name = "_quantized_ops.so",
|
||||
srcs = [
|
||||
"ops/array_ops.cc",
|
||||
"ops/math_ops.cc",
|
||||
"ops/nn_ops.cc",
|
||||
],
|
||||
deps = [
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "quantized_ops_py",
|
||||
srcs = ["load_quantized_ops_so.py"],
|
||||
data = ["_quantized_ops.so"],
|
||||
srcs_version = "PY2AND3",
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
|
@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
# This sub Makefile compiles libraries under this directory. This is designed to
|
||||
# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile.
|
||||
# You can build targets in this file by including this sub makefile like:
|
||||
# $ make -f tensorflow/contrib/makefile/Makefile TARGET=<target> \
|
||||
# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \
|
||||
# (optional: NDK_ROOT=<ndk_root>) contrib_quantization_tests
|
||||
# TODO(satok): Support more targets
|
||||
|
||||
GTEST_DIR := \
|
||||
$(MAKEFILE_DIR)/downloads/googletest/googletest
|
||||
|
||||
GTEST_HEADERS = \
|
||||
$(wildcard $(GTEST_DIR)/include/gtest/*.h) \
|
||||
$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h)
|
||||
|
||||
GTEST_SRCS := \
|
||||
$(wildcard $(GTEST_DIR)/src/*.cc) \
|
||||
$(wildcard $(GTEST_DIR)/src/*.h) \
|
||||
$(GTEST_HEADERS)
|
||||
|
||||
QUANTIZATION_TEST_SRCS := \
|
||||
tensorflow/contrib/quantization/ops/math_ops.cc \
|
||||
tensorflow/contrib/quantization/kernels/quantize_op.cc \
|
||||
tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \
|
||||
tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \
|
||||
tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \
|
||||
tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \
|
||||
tensorflow/contrib/makefile/test/test_main.cc
|
||||
|
||||
QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o))
|
||||
|
||||
QUANTIZATION_TEST_NAME := contrib_quantization_tests
|
||||
QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME)
|
||||
|
||||
INCLUDES += \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include
|
||||
|
||||
QUANTIZATION_TEST_INCLUDES := $(INCLUDES)
|
||||
|
||||
$(OBJDIR)gtest-all.o : $(GTEST_SRCS)
|
||||
$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \
|
||||
$(GTEST_DIR)/src/gtest-all.cc -o $@
|
||||
|
||||
$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o
|
||||
$(AR) $(ARFLAGS) $@ $^
|
||||
|
||||
$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \
|
||||
-o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \
|
||||
$(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS)
|
||||
|
||||
$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH)
|
@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_
|
||||
from tensorflow.contrib.quantization.python.math_ops import *
|
||||
from tensorflow.contrib.quantization.python.nn_ops import *
|
||||
|
||||
from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
|
||||
from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
|
||||
from tensorflow.python.ops.gen_array_ops import dequantize
|
||||
from tensorflow.python.ops.gen_array_ops import quantize_v2
|
||||
from tensorflow.python.ops.gen_array_ops import quantized_concat
|
||||
|
@ -1,311 +0,0 @@
|
||||
# Description:
|
||||
# quantization-specific OpKernels
|
||||
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
features = ["-parse_headers"],
|
||||
)
|
||||
|
||||
licenses(["notice"]) # Apache 2.0
|
||||
|
||||
load(
|
||||
"//tensorflow:tensorflow.bzl",
|
||||
"tf_cc_test",
|
||||
"tf_custom_op_library",
|
||||
"tf_kernel_library",
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "android_ops",
|
||||
srcs = [
|
||||
"dequantize_op.cc",
|
||||
"quantization_utils.cc",
|
||||
"quantization_utils.h",
|
||||
"quantize_down_and_shrink_range.cc",
|
||||
"quantize_op.cc",
|
||||
"quantized_activation_ops.cc",
|
||||
"quantized_batch_norm_op.cc",
|
||||
"quantized_bias_add_op.cc",
|
||||
"quantized_concat_op.cc",
|
||||
"quantized_conv_ops.cc",
|
||||
"quantized_matmul_op.cc",
|
||||
"quantized_pooling_ops.cc",
|
||||
"reference_gemm.h",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
["**/*"],
|
||||
exclude = [
|
||||
"**/METADATA",
|
||||
"**/OWNERS",
|
||||
],
|
||||
),
|
||||
visibility = ["//tensorflow:__subpackages__"],
|
||||
)
|
||||
|
||||
tf_kernel_library(
|
||||
name = "quantized_ops",
|
||||
srcs = [
|
||||
"dequantize_op.cc",
|
||||
"quantization_utils.cc",
|
||||
"quantize_down_and_shrink_range.cc",
|
||||
"quantize_op.cc",
|
||||
"quantized_activation_ops.cc",
|
||||
"quantized_batch_norm_op.cc",
|
||||
"quantized_bias_add_op.cc",
|
||||
"quantized_concat_op.cc",
|
||||
"quantized_conv_ops.cc",
|
||||
"quantized_matmul_op.cc",
|
||||
"quantized_pooling_ops.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"quantization_utils.h",
|
||||
"reference_gemm.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core/kernels:concat_lib_hdrs",
|
||||
"//tensorflow/core/kernels:conv_ops",
|
||||
"//tensorflow/core/kernels:eigen_helpers",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//tensorflow/core/kernels:pooling_ops",
|
||||
"//third_party/eigen3",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
)
|
||||
|
||||
tf_custom_op_library(
|
||||
name = "_quantized_kernels.so",
|
||||
srcs = [
|
||||
"dequantize_op.cc",
|
||||
"quantization_utils.cc",
|
||||
"quantization_utils.h",
|
||||
"quantize_down_and_shrink_range.cc",
|
||||
"quantize_op.cc",
|
||||
"quantized_activation_ops.cc",
|
||||
"quantized_batch_norm_op.cc",
|
||||
"quantized_bias_add_op.cc",
|
||||
"quantized_concat_op.cc",
|
||||
"quantized_conv_ops.cc",
|
||||
"quantized_matmul_op.cc",
|
||||
"quantized_pooling_ops.cc",
|
||||
"reference_gemm.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core/kernels:concat_lib_hdrs",
|
||||
"//tensorflow/core/kernels:ops_util_hdrs",
|
||||
"//tensorflow/core/kernels:pooling_ops_hdrs",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
)
|
||||
|
||||
py_library(
|
||||
name = "quantized_kernels_py",
|
||||
srcs = ["load_quantized_kernels_so.py"],
|
||||
data = ["_quantized_kernels.so"],
|
||||
srcs_version = "PY2AND3",
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantize_down_and_shrink_range_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantize_down_and_shrink_range_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantization_utils_test",
|
||||
srcs = ["quantization_utils_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:core_cpu_internal",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_activation_ops_test",
|
||||
srcs = ["quantized_activation_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_bias_add_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_bias_add_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_conv_ops_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_conv_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantize_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantize_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_matmul_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_matmul_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_pooling_ops_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_pooling_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_concat_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_concat_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_batch_norm_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_batch_norm_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/core:core_cpu_internal",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:batch_norm_op",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
@ -1,48 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Ops for quantized evaluation."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import threading
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
QUANTIZED_KERNELS_FILE = '_quantized_kernels.so'
|
||||
|
||||
_quantized_kernels = None
|
||||
_kernels_lock = threading.Lock()
|
||||
|
||||
|
||||
# Workaround for the fact that importing tensorflow imports contrib
|
||||
# (even if a user isn't using this or any other contrib op), but
|
||||
# there's not yet any guarantee that the shared object exists.
|
||||
# In which case, "import tensorflow" will always crash, even for users that
|
||||
# never use contrib.
|
||||
def Load(library_base_dir=''):
|
||||
"""Load the quantized ops library and return the loaded module."""
|
||||
with _kernels_lock:
|
||||
global _quantized_kernels
|
||||
if not _quantized_kernels:
|
||||
data_files_path = os.path.join(library_base_dir,
|
||||
tf.resource_loader.get_data_files_path())
|
||||
tf.logging.info('data path: %s', data_files_path)
|
||||
_quantized_kernels = tf.load_op_library(os.path.join(
|
||||
data_files_path, QUANTIZED_KERNELS_FILE))
|
||||
|
||||
assert _quantized_kernels, 'Could not load _quantized_kernels.so'
|
||||
return _quantized_kernels
|
@ -1,48 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Ops for quantized evaluation."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import threading
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
QUANTIZED_OPS_FILE = '_quantized_ops.so'
|
||||
|
||||
_quantized_ops = None
|
||||
_ops_lock = threading.Lock()
|
||||
|
||||
|
||||
# Workaround for the fact that importing tensorflow imports contrib
|
||||
# (even if a user isn't using this or any other contrib op), but
|
||||
# there's not yet any guarantee that the shared object exists.
|
||||
# In which case, "import tensorflow" will always crash, even for users that
|
||||
# never use contrib.
|
||||
def Load(library_base_dir=''):
|
||||
"""Load the quantized ops library and return the loaded module."""
|
||||
with _ops_lock:
|
||||
global _quantized_ops
|
||||
if not _quantized_ops:
|
||||
data_files_path = os.path.join(library_base_dir,
|
||||
tf.resource_loader.get_data_files_path())
|
||||
tf.logging.info('q:data path: %s', data_files_path)
|
||||
_quantized_ops = tf.load_op_library(os.path.join(
|
||||
data_files_path, QUANTIZED_OPS_FILE))
|
||||
|
||||
assert _quantized_ops, 'Could not load quantized_ops.so'
|
||||
return _quantized_ops
|
@ -1,195 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
REGISTER_OP("QuantizeV2")
|
||||
.Input("input: float")
|
||||
.Input("min_range: float")
|
||||
.Input("max_range: float")
|
||||
.Output("output: T")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
|
||||
|
||||
[min_range, max_range] are scalar floats that specify the range for
|
||||
the 'input' data. The 'mode' attribute controls exactly which calculations are
|
||||
used to convert the float values to their quantized equivalents.
|
||||
|
||||
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
|
||||
|
||||
```
|
||||
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
|
||||
if T == qint8, out[i] -= (range(T) + 1) / 2.0
|
||||
```
|
||||
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
|
||||
|
||||
*MIN_COMBINED Mode Example*
|
||||
|
||||
Assume the input is type float and has a possible range of [0.0, 6.0] and the
|
||||
output type is quint8 ([0, 255]). The min_range and max_range values should be
|
||||
specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
|
||||
value of the input by 255/6 and cast to quint8.
|
||||
|
||||
If the output type was qint8 ([-128, 127]), the operation will additionally
|
||||
subtract each value by 128 prior to casting, so that the range of values aligns
|
||||
with the range of qint8.
|
||||
|
||||
If the mode is 'MIN_FIRST', then this approach is used:
|
||||
|
||||
```
|
||||
number_of_steps = 1 << (# of bits in T)
|
||||
range_adjust = number_of_steps / (number_of_steps - 1)
|
||||
range = (range_max - range_min) * range_adjust
|
||||
range_scale = number_of_steps / range
|
||||
quantized = round(input * range_scale) - round(range_min * range_scale) +
|
||||
numeric_limits<T>::min()
|
||||
quantized = max(quantized, numeric_limits<T>::min())
|
||||
quantized = min(quantized, numeric_limits<T>::max())
|
||||
```
|
||||
|
||||
The biggest difference between this and MIN_COMBINED is that the minimum range
|
||||
is rounded first, before it's subtracted from the rounded value. With
|
||||
MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
|
||||
and dequantizing will introduce a larger and larger error.
|
||||
|
||||
One thing to watch out for is that the operator may choose to adjust the
|
||||
requested minimum and maximum values slightly during the quantization process,
|
||||
so you should always use the output ports as the range for further calculations.
|
||||
For example, if the requested minimum and maximum values are close to equal,
|
||||
they will be separated by a small epsilon value to prevent ill-formed quantized
|
||||
buffers from being created. Otherwise, you can end up with buffers where all the
|
||||
quantized values map to the same float value, which causes problems for
|
||||
operations that have to perform further calculations on them.
|
||||
|
||||
min_range: The minimum scalar value possibly produced for the input.
|
||||
max_range: The maximum scalar value possibly produced for the input.
|
||||
output: The quantized data produced from the float input.
|
||||
output_min: The actual minimum scalar value used for the output.
|
||||
output_max: The actual maximum scalar value used for the output.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("Dequantize")
|
||||
.Input("input: T")
|
||||
.Input("min_range: float")
|
||||
.Input("max_range: float")
|
||||
.Output("output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Dequantize the 'input' tensor into a float Tensor.
|
||||
|
||||
[min_range, max_range] are scalar floats that specify the range for
|
||||
the 'input' data. The 'mode' attribute controls exactly which calculations are
|
||||
used to convert the float values to their quantized equivalents.
|
||||
|
||||
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
|
||||
|
||||
```
|
||||
if T == qint8, in[i] += (range(T) + 1)/ 2.0
|
||||
out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
|
||||
```
|
||||
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
|
||||
|
||||
*MIN_COMBINED Mode Example*
|
||||
|
||||
If the input comes from a QuantizedRelu6, the output type is
|
||||
quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
|
||||
0-6. The min_range and max_range values are therefore 0.0 and 6.0.
|
||||
Dequantize on quint8 will take each value, cast to float, and multiply
|
||||
by 6 / 255.
|
||||
Note that if quantizedtype is qint8, the operation will additionally add
|
||||
each value by 128 prior to casting.
|
||||
|
||||
If the mode is 'MIN_FIRST', then this approach is used:
|
||||
|
||||
```
|
||||
number_of_steps = 1 << (# of bits in T)
|
||||
range_adjust = number_of_steps / (number_of_steps - 1)
|
||||
range = (range_max - range_min) * range_adjust
|
||||
range_scale = range / number_of_steps
|
||||
const double offset_input = static_cast<double>(input) - lowest_quantized;
|
||||
result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
|
||||
```
|
||||
|
||||
min_range: The minimum scalar value possibly produced for the input.
|
||||
max_range: The maximum scalar value possibly produced for the input.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedConcat")
|
||||
.Input("concat_dim: int32")
|
||||
.Input("values: N * T")
|
||||
.Input("input_mins: N * float32")
|
||||
.Input("input_maxes: N * float32")
|
||||
.Output("output: T")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("N: int >= 2")
|
||||
.Attr("T: type")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
|
||||
ShapeHandle unused;
|
||||
for (int i = 2; i < c->num_inputs(); ++i) {
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
|
||||
}
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Concatenates quantized tensors along one dimension.
|
||||
|
||||
concat_dim: 0-D. The dimension along which to concatenate. Must be in the
|
||||
range [0, rank(values)).
|
||||
values: The `N` Tensors to concatenate. Their ranks and types must match,
|
||||
and their sizes must match in all dimensions except `concat_dim`.
|
||||
input_mins: The minimum scalar values for each of the input tensors.
|
||||
input_maxes: The maximum scalar values for each of the input tensors.
|
||||
output_min: The float value that the minimum quantized output value represents.
|
||||
output_max: The float value that the maximum quantized output value represents.
|
||||
output: A `Tensor` with the concatenation of values stacked along the
|
||||
`concat_dim` dimension. This tensor's shape matches that of `values` except
|
||||
in `concat_dim` where it has the sum of the sizes.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,126 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
REGISTER_OP("QuantizedMatMul")
|
||||
.Input("a: T1")
|
||||
.Input("b: T2")
|
||||
.Input("min_a: float")
|
||||
.Input("max_a: float")
|
||||
.Input("min_b: float")
|
||||
.Input("max_b: float")
|
||||
.Output("out: Toutput")
|
||||
.Output("min_out: float")
|
||||
.Output("max_out: float")
|
||||
.Attr("T1: quantizedtype")
|
||||
.Attr("T2: quantizedtype")
|
||||
.Attr("Toutput: quantizedtype = DT_QINT32")
|
||||
.Attr("transpose_a: bool = false")
|
||||
.Attr("transpose_b: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Perform a quantized matrix multiplication of `a` by the matrix `b`.
|
||||
|
||||
The inputs must be two-dimensional matrices and the inner dimension of
|
||||
`a` (after being transposed if `transpose_a` is non-zero) must match the
|
||||
outer dimension of `b` (after being transposed if `transposed_b` is
|
||||
non-zero).
|
||||
|
||||
a: Must be a two-dimensional tensor.
|
||||
b: Must be a two-dimensional tensor.
|
||||
transpose_a: If true, `a` is transposed before multiplication.
|
||||
transpose_b: If true, `b` is transposed before multiplication.
|
||||
min_a: The float value that the lowest quantized `a` value represents.
|
||||
max_a: The float value that the highest quantized `a` value represents.
|
||||
min_b: The float value that the lowest quantized `b` value represents.
|
||||
max_b: The float value that the highest quantized `b` value represents.
|
||||
min_out: The float value that the lowest quantized output value represents.
|
||||
max_out: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizeDownAndShrinkRange")
|
||||
.Input("input: Tinput")
|
||||
.Input("input_min: float")
|
||||
.Input("input_max: float")
|
||||
.Output("output: out_type")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Convert the quantized 'input' tensor into a lower-precision 'output', using the
|
||||
actual distribution of the values to maximize the usage of the lower bit depth
|
||||
and adjusting the output min and max ranges accordingly.
|
||||
|
||||
[input_min, input_max] are scalar floats that specify the range for the float
|
||||
interpretation of the 'input' data. For example, if input_min is -1.0f and
|
||||
input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
|
||||
value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
|
||||
|
||||
This operator tries to squeeze as much precision as possible into an output with
|
||||
a lower bit depth by calculating the actual min and max values found in the
|
||||
data. For example, maybe that quint16 input has no values lower than 16,384 and
|
||||
none higher than 49,152. That means only half the range is actually needed, all
|
||||
the float interpretations are between -0.5f and 0.5f, so if we want to compress
|
||||
the data into a quint8 output, we can use that range rather than the theoretical
|
||||
-1.0f to 1.0f that is suggested by the input min and max.
|
||||
|
||||
In practice, this is most useful for taking output from operations like
|
||||
QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
|
||||
may have large potential output ranges, but in practice have a distribution of
|
||||
input values that only uses a small fraction of the possible range. By feeding
|
||||
that output into this operator, we can reduce it from 32 bits down to 8 with
|
||||
minimal loss of accuracy.
|
||||
|
||||
input_min: The float value that the minimum quantized input value represents.
|
||||
input_max: The float value that the maximum quantized input value represents.
|
||||
Tinput: The type of the input.
|
||||
output_min: The float value that the minimum quantized output value represents.
|
||||
output_max: The float value that the maximum quantized output value represents.
|
||||
out_type: The type of the output. Should be a lower bit depth than Tinput.
|
||||
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,348 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/util/padding.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
REGISTER_OP("QuantizedAvgPool")
|
||||
.Input("input: T")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Output("output: T")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("ksize: list(int)")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Produces the average pool of the input tensor for quantized types.
|
||||
|
||||
input: 4-D with shape `[batch, height, width, channels]`.
|
||||
ksize: The size of the window for each dimension of the input tensor.
|
||||
The length must be 4 to match the number of dimensions of the input.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor. The length must be 4 to match the number of dimensions of the input.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedBiasAdd")
|
||||
.Input("input: T1")
|
||||
.Input("bias: T2")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Input("min_bias: float")
|
||||
.Input("max_bias: float")
|
||||
.Output("output: out_type")
|
||||
.Output("min_out: float")
|
||||
.Output("max_out: float")
|
||||
.Attr("T1: quantizedtype")
|
||||
.Attr("T2: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Adds Tensor 'bias' to Tensor 'input' for Quantized types.
|
||||
|
||||
Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
|
||||
|
||||
bias: A 1D bias Tensor with size matching the last dimension of 'input'.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_bias: The float value that the lowest quantized bias value represents.
|
||||
max_bias: The float value that the highest quantized bias value represents.
|
||||
min_out: The float value that the lowest quantized output value represents.
|
||||
max_out: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedConv2D")
|
||||
.Input("input: Tinput")
|
||||
.Input("filter: Tfilter")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Input("min_filter: float")
|
||||
.Input("max_filter: float")
|
||||
.Output("output: out_type")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("Tfilter: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QINT32")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes a 2D convolution given quantized 4D input and filter tensors.
|
||||
The inputs are quantized tensors where the lowest value represents the real
|
||||
number of the associated minimum, and the highest represents the maximum.
|
||||
This means that you can only interpret the quantized output in the same way, by
|
||||
taking the returned minimum and maximum values into account.
|
||||
|
||||
filter: filter's input_depth dimension must match input's depth dimensions.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_filter: The float value that the lowest quantized filter value represents.
|
||||
max_filter: The float value that the highest quantized filter value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedMaxPool")
|
||||
.Input("input: T")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Output("output: T")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("ksize: list(int)")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Produces the max pool of the input tensor for quantized types.
|
||||
|
||||
input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
|
||||
ksize: The size of the window for each dimension of the input tensor.
|
||||
The length must be 4 to match the number of dimensions of the input.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor. The length must be 4 to match the number of dimensions of the input.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedRelu")
|
||||
.Input("features: Tinput")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear: `max(features, 0)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedRelu6")
|
||||
.Input("features: Tinput")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedReluX")
|
||||
.Input("features: Tinput")
|
||||
.Input("max_value: float")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
|
||||
.Input("t: Tinput")
|
||||
.Input("t_min: float")
|
||||
.Input("t_max: float")
|
||||
.Input("m: Tinput")
|
||||
.Input("m_min: float")
|
||||
.Input("m_max: float")
|
||||
.Input("v: Tinput")
|
||||
.Input("v_min: float")
|
||||
.Input("v_max: float")
|
||||
.Input("beta: Tinput")
|
||||
.Input("beta_min: float")
|
||||
.Input("beta_max: float")
|
||||
.Input("gamma: Tinput")
|
||||
.Input("gamma_min: float")
|
||||
.Input("gamma_max: float")
|
||||
.Output("result: out_type")
|
||||
.Output("result_min: float")
|
||||
.Output("result_max: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.Attr("variance_epsilon: float")
|
||||
.Attr("scale_after_normalization: bool")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
|
||||
|
||||
DimensionHandle last_dim = c->Dim(input, 3);
|
||||
for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma
|
||||
ShapeHandle vec;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
|
||||
TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
|
||||
}
|
||||
|
||||
ShapeHandle out;
|
||||
TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
|
||||
c->set_output(0, out);
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Quantized Batch normalization.
|
||||
|
||||
This op is deprecated and will be removed in the future. Prefer
|
||||
`tf.nn.batch_normalization`.
|
||||
|
||||
t: A 4D input Tensor.
|
||||
t_min: The value represented by the lowest quantized input.
|
||||
t_max: The value represented by the highest quantized input.
|
||||
m: A 1D mean Tensor with size matching the last dimension of t.
|
||||
This is the first output from tf.nn.moments,
|
||||
or a saved moving average thereof.
|
||||
m_min: The value represented by the lowest quantized mean.
|
||||
m_max: The value represented by the highest quantized mean.
|
||||
v: A 1D variance Tensor with size matching the last dimension of t.
|
||||
This is the second output from tf.nn.moments,
|
||||
or a saved moving average thereof.
|
||||
v_min: The value represented by the lowest quantized variance.
|
||||
v_max: The value represented by the highest quantized variance.
|
||||
beta: A 1D beta Tensor with size matching the last dimension of t.
|
||||
An offset to be added to the normalized tensor.
|
||||
beta_min: The value represented by the lowest quantized offset.
|
||||
beta_max: The value represented by the highest quantized offset.
|
||||
gamma: A 1D gamma Tensor with size matching the last dimension of t.
|
||||
If "scale_after_normalization" is true, this tensor will be multiplied
|
||||
with the normalized tensor.
|
||||
gamma_min: The value represented by the lowest quantized gamma.
|
||||
gamma_max: The value represented by the highest quantized gamma.
|
||||
variance_epsilon: A small float number to avoid dividing by 0.
|
||||
scale_after_normalization: A bool indicating whether the resulted tensor
|
||||
needs to be multiplied with gamma.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -19,7 +19,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import,wildcard-import
|
||||
from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
|
||||
from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
|
||||
from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
|
||||
from tensorflow.python.ops.gen_array_ops import dequantize
|
||||
from tensorflow.python.ops.gen_array_ops import quantize_v2
|
||||
from tensorflow.python.ops.gen_array_ops import quantized_concat
|
||||
|
@ -19,10 +19,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import,wildcard-import
|
||||
from tensorflow.contrib.quantization.ops import gen_math_ops
|
||||
from tensorflow.contrib.quantization.ops.gen_math_ops import *
|
||||
from tensorflow.python.framework import common_shapes
|
||||
from tensorflow.python.framework import ops
|
||||
|
||||
|
||||
ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
|
||||
from tensorflow.python.ops import gen_math_ops
|
||||
from tensorflow.python.ops.gen_math_ops import *
|
||||
|
@ -19,17 +19,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import,wildcard-import
|
||||
from tensorflow.contrib.quantization.ops import gen_nn_ops
|
||||
from tensorflow.contrib.quantization.ops.gen_nn_ops import *
|
||||
from tensorflow.python.framework import common_shapes
|
||||
from tensorflow.python.framework import ops
|
||||
|
||||
|
||||
ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
|
||||
from tensorflow.python.ops import gen_nn_ops
|
||||
from tensorflow.python.ops.gen_nn_ops import *
|
||||
|
@ -524,6 +524,7 @@ cc_library(
|
||||
"//tensorflow/core/kernels:nn",
|
||||
"//tensorflow/core/kernels:parameterized_truncated_normal_op",
|
||||
"//tensorflow/core/kernels:parsing",
|
||||
"//tensorflow/core/kernels:quantized_ops",
|
||||
"//tensorflow/core/kernels:random_ops",
|
||||
"//tensorflow/core/kernels:required",
|
||||
"//tensorflow/core/kernels:sdca_ops",
|
||||
@ -734,6 +735,7 @@ cc_library(
|
||||
deps = [
|
||||
":protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
@ -2228,6 +2228,7 @@ filegroup(
|
||||
srcs = [
|
||||
":android_extended_ops_group1",
|
||||
":android_extended_ops_group2",
|
||||
":android_quantized_ops",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
@ -2366,6 +2367,26 @@ filegroup(
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "android_quantized_ops",
|
||||
srcs = [
|
||||
"dequantize_op.cc",
|
||||
"quantization_utils.cc",
|
||||
"quantization_utils.h",
|
||||
"quantize_down_and_shrink_range.cc",
|
||||
"quantize_op.cc",
|
||||
"quantized_activation_ops.cc",
|
||||
"quantized_batch_norm_op.cc",
|
||||
"quantized_bias_add_op.cc",
|
||||
"quantized_concat_op.cc",
|
||||
"quantized_conv_ops.cc",
|
||||
"quantized_matmul_op.cc",
|
||||
"quantized_pooling_ops.cc",
|
||||
"reference_gemm.h",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# A file group which contains nearly all available operators which
|
||||
# may work on Android. This is intended to be used with selective
|
||||
# registration.
|
||||
@ -2436,10 +2457,244 @@ cc_library(
|
||||
"//tensorflow/core:android_tensorflow_lib_lite",
|
||||
"//tensorflow/core:protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# Quantization-specific OpKernels
|
||||
|
||||
tf_kernel_library(
|
||||
name = "quantized_ops",
|
||||
srcs = [
|
||||
"dequantize_op.cc",
|
||||
"quantization_utils.cc",
|
||||
"quantize_down_and_shrink_range.cc",
|
||||
"quantize_op.cc",
|
||||
"quantized_activation_ops.cc",
|
||||
"quantized_batch_norm_op.cc",
|
||||
"quantized_bias_add_op.cc",
|
||||
"quantized_concat_op.cc",
|
||||
"quantized_conv_ops.cc",
|
||||
"quantized_matmul_op.cc",
|
||||
"quantized_pooling_ops.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"quantization_utils.h",
|
||||
"reference_gemm.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/core",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core/kernels:concat_lib_hdrs",
|
||||
"//tensorflow/core/kernels:conv_ops",
|
||||
"//tensorflow/core/kernels:eigen_helpers",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//tensorflow/core/kernels:pooling_ops",
|
||||
"//third_party/eigen3",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantize_down_and_shrink_range_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantize_down_and_shrink_range_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantization_utils_test",
|
||||
srcs = ["quantization_utils_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:core_cpu_internal",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_activation_ops_test",
|
||||
srcs = ["quantized_activation_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_bias_add_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_bias_add_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_conv_ops_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_conv_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantize_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantize_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_matmul_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_matmul_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_pooling_ops_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_pooling_ops_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_concat_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_concat_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "quantized_batch_norm_op_test",
|
||||
size = "small",
|
||||
srcs = ["quantized_batch_norm_op_test.cc"],
|
||||
deps = [
|
||||
":quantized_ops",
|
||||
"//tensorflow/core:array_ops_op_lib",
|
||||
"//tensorflow/core:core_cpu_internal",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:math_ops_op_lib",
|
||||
"//tensorflow/core:nn_ops_op_lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:batch_norm_op",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Google-internal targets. These must be at the end for syncrepo.
|
||||
|
||||
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/type_traits.h"
|
@ -31,10 +31,6 @@ tf_cc_test(
|
||||
size = "small",
|
||||
srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
|
||||
deps = [
|
||||
"//tensorflow/contrib/quantization:cc_array_ops",
|
||||
"//tensorflow/contrib/quantization:cc_math_ops",
|
||||
"//tensorflow/contrib/quantization:cc_nn_ops",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
@ -42,6 +38,7 @@ tf_cc_test(
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels:ops_testutil",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
"//tensorflow/core/kernels:quantized_ops",
|
||||
],
|
||||
)
|
||||
|
||||
@ -51,7 +48,6 @@ tf_cc_test(
|
||||
srcs = ["graph_transferer_test.cc"],
|
||||
deps = [
|
||||
"//tensorflow/cc:cc_ops",
|
||||
"//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:direct_session",
|
||||
"//tensorflow/core:lib",
|
||||
@ -60,6 +56,7 @@ tf_cc_test(
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/kernels/hexagon:graph_transferer",
|
||||
],
|
||||
)
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
|
||||
#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
|
||||
|
||||
namespace tensorflow {
|
||||
void GraphTransferer::LoadGraphFromProto(
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
|
||||
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
@ -37,4 +37,4 @@ class GraphTransferer {
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_TRANSFERER_H
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
|
@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
|
||||
#include "tensorflow/cc/ops/const_op.h"
|
||||
#include "tensorflow/cc/ops/standard_ops.h"
|
||||
#include "tensorflow/core/graph/graph_def_builder.h"
|
||||
#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/public/session.h"
|
@ -14,7 +14,8 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
// Tests in this file are designed to evaluate hexagon DSP operations.
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
@ -26,6 +27,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/types.pb.h"
|
||||
#include "tensorflow/core/kernels/ops_testutil.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
@ -552,4 +552,4 @@ class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase {
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
|
@ -18,7 +18,7 @@ limitations under the License.
|
||||
#include <limits>
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/tensor_testutil.h"
|
@ -20,7 +20,7 @@ limitations under the License.
|
||||
#include <math.h>
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/type_traits.h"
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/type_traits.h"
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
// Implements a quantized version of the Relu6 operation.
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/node_def_builder.h"
|
@ -15,7 +15,7 @@ limitations under the License.
|
||||
|
||||
// Implements a quantized eight-bit version of the bias addition operation.
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
@ -15,7 +15,7 @@ limitations under the License.
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
@ -18,7 +18,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
@ -19,8 +19,8 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "public/gemmlowp.h"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/reference_gemm.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
// Implements a quantized eight-bit version of the matmul operation.
|
||||
|
||||
#include "public/gemmlowp.h"
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/reference_gemm.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/kernels/quantization_utils.h"
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
#include "tensorflow/core/framework/fake_input.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
|
||||
|
||||
// This is an unoptimized but debuggable implementation of the GEMM matrix
|
||||
// multiply function, used to compare to faster but more opaque versions, or
|
||||
@ -87,4 +87,4 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c,
|
||||
}
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
|
@ -4054,6 +4054,176 @@ debug_urls: List of URLs to debug targets, e.g.,
|
||||
file:///foo/tfdbg_dump, grpc:://localhost:11011
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizeV2")
|
||||
.Input("input: float")
|
||||
.Input("min_range: float")
|
||||
.Input("max_range: float")
|
||||
.Output("output: T")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
|
||||
|
||||
[min_range, max_range] are scalar floats that specify the range for
|
||||
the 'input' data. The 'mode' attribute controls exactly which calculations are
|
||||
used to convert the float values to their quantized equivalents.
|
||||
|
||||
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
|
||||
|
||||
```
|
||||
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
|
||||
if T == qint8, out[i] -= (range(T) + 1) / 2.0
|
||||
```
|
||||
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
|
||||
|
||||
*MIN_COMBINED Mode Example*
|
||||
|
||||
Assume the input is type float and has a possible range of [0.0, 6.0] and the
|
||||
output type is quint8 ([0, 255]). The min_range and max_range values should be
|
||||
specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
|
||||
value of the input by 255/6 and cast to quint8.
|
||||
|
||||
If the output type was qint8 ([-128, 127]), the operation will additionally
|
||||
subtract each value by 128 prior to casting, so that the range of values aligns
|
||||
with the range of qint8.
|
||||
|
||||
If the mode is 'MIN_FIRST', then this approach is used:
|
||||
|
||||
```
|
||||
number_of_steps = 1 << (# of bits in T)
|
||||
range_adjust = number_of_steps / (number_of_steps - 1)
|
||||
range = (range_max - range_min) * range_adjust
|
||||
range_scale = number_of_steps / range
|
||||
quantized = round(input * range_scale) - round(range_min * range_scale) +
|
||||
numeric_limits<T>::min()
|
||||
quantized = max(quantized, numeric_limits<T>::min())
|
||||
quantized = min(quantized, numeric_limits<T>::max())
|
||||
```
|
||||
|
||||
The biggest difference between this and MIN_COMBINED is that the minimum range
|
||||
is rounded first, before it's subtracted from the rounded value. With
|
||||
MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
|
||||
and dequantizing will introduce a larger and larger error.
|
||||
|
||||
One thing to watch out for is that the operator may choose to adjust the
|
||||
requested minimum and maximum values slightly during the quantization process,
|
||||
so you should always use the output ports as the range for further calculations.
|
||||
For example, if the requested minimum and maximum values are close to equal,
|
||||
they will be separated by a small epsilon value to prevent ill-formed quantized
|
||||
buffers from being created. Otherwise, you can end up with buffers where all the
|
||||
quantized values map to the same float value, which causes problems for
|
||||
operations that have to perform further calculations on them.
|
||||
|
||||
min_range: The minimum scalar value possibly produced for the input.
|
||||
max_range: The maximum scalar value possibly produced for the input.
|
||||
output: The quantized data produced from the float input.
|
||||
output_min: The actual minimum scalar value used for the output.
|
||||
output_max: The actual maximum scalar value used for the output.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("Dequantize")
|
||||
.Input("input: T")
|
||||
.Input("min_range: float")
|
||||
.Input("max_range: float")
|
||||
.Output("output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Dequantize the 'input' tensor into a float Tensor.
|
||||
|
||||
[min_range, max_range] are scalar floats that specify the range for
|
||||
the 'input' data. The 'mode' attribute controls exactly which calculations are
|
||||
used to convert the float values to their quantized equivalents.
|
||||
|
||||
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
|
||||
|
||||
```
|
||||
if T == qint8, in[i] += (range(T) + 1)/ 2.0
|
||||
out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
|
||||
```
|
||||
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
|
||||
|
||||
*MIN_COMBINED Mode Example*
|
||||
|
||||
If the input comes from a QuantizedRelu6, the output type is
|
||||
quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
|
||||
0-6. The min_range and max_range values are therefore 0.0 and 6.0.
|
||||
Dequantize on quint8 will take each value, cast to float, and multiply
|
||||
by 6 / 255.
|
||||
Note that if quantizedtype is qint8, the operation will additionally add
|
||||
each value by 128 prior to casting.
|
||||
|
||||
If the mode is 'MIN_FIRST', then this approach is used:
|
||||
|
||||
```
|
||||
number_of_steps = 1 << (# of bits in T)
|
||||
range_adjust = number_of_steps / (number_of_steps - 1)
|
||||
range = (range_max - range_min) * range_adjust
|
||||
range_scale = range / number_of_steps
|
||||
const double offset_input = static_cast<double>(input) - lowest_quantized;
|
||||
result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
|
||||
```
|
||||
|
||||
min_range: The minimum scalar value possibly produced for the input.
|
||||
max_range: The maximum scalar value possibly produced for the input.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedConcat")
|
||||
.Input("concat_dim: int32")
|
||||
.Input("values: N * T")
|
||||
.Input("input_mins: N * float32")
|
||||
.Input("input_maxes: N * float32")
|
||||
.Output("output: T")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("N: int >= 2")
|
||||
.Attr("T: type")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
|
||||
ShapeHandle unused;
|
||||
for (int i = std::max(0, c->num_inputs() - 2); i < c->num_inputs(); ++i) {
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
|
||||
}
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Concatenates quantized tensors along one dimension.
|
||||
|
||||
concat_dim: 0-D. The dimension along which to concatenate. Must be in the
|
||||
range [0, rank(values)).
|
||||
values: The `N` Tensors to concatenate. Their ranks and types must match,
|
||||
and their sizes must match in all dimensions except `concat_dim`.
|
||||
input_mins: The minimum scalar values for each of the input tensors.
|
||||
input_maxes: The maximum scalar values for each of the input tensors.
|
||||
output_min: The float value that the minimum quantized output value represents.
|
||||
output_max: The float value that the maximum quantized output value represents.
|
||||
output: A `Tensor` with the concatenation of values stacked along the
|
||||
`concat_dim` dimension. This tensor's shape matches that of `values` except
|
||||
in `concat_dim` where it has the sum of the sizes.
|
||||
)doc");
|
||||
|
||||
// Deprecated op registrations:
|
||||
|
||||
// The following can be deleted after 10mar2017.
|
||||
|
@ -2058,6 +2058,106 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
|
||||
```
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedMatMul")
|
||||
.Input("a: T1")
|
||||
.Input("b: T2")
|
||||
.Input("min_a: float")
|
||||
.Input("max_a: float")
|
||||
.Input("min_b: float")
|
||||
.Input("max_b: float")
|
||||
.Output("out: Toutput")
|
||||
.Output("min_out: float")
|
||||
.Output("max_out: float")
|
||||
.Attr("T1: quantizedtype")
|
||||
.Attr("T2: quantizedtype")
|
||||
.Attr("Toutput: quantizedtype = DT_QINT32")
|
||||
.Attr("transpose_a: bool = false")
|
||||
.Attr("transpose_b: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Perform a quantized matrix multiplication of `a` by the matrix `b`.
|
||||
|
||||
The inputs must be two-dimensional matrices and the inner dimension of
|
||||
`a` (after being transposed if `transpose_a` is non-zero) must match the
|
||||
outer dimension of `b` (after being transposed if `transposed_b` is
|
||||
non-zero).
|
||||
|
||||
a: Must be a two-dimensional tensor.
|
||||
b: Must be a two-dimensional tensor.
|
||||
transpose_a: If true, `a` is transposed before multiplication.
|
||||
transpose_b: If true, `b` is transposed before multiplication.
|
||||
min_a: The float value that the lowest quantized `a` value represents.
|
||||
max_a: The float value that the highest quantized `a` value represents.
|
||||
min_b: The float value that the lowest quantized `b` value represents.
|
||||
max_b: The float value that the highest quantized `b` value represents.
|
||||
min_out: The float value that the lowest quantized output value represents.
|
||||
max_out: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizeDownAndShrinkRange")
|
||||
.Input("input: Tinput")
|
||||
.Input("input_min: float")
|
||||
.Input("input_max: float")
|
||||
.Output("output: out_type")
|
||||
.Output("output_min: float")
|
||||
.Output("output_max: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Convert the quantized 'input' tensor into a lower-precision 'output', using the
|
||||
actual distribution of the values to maximize the usage of the lower bit depth
|
||||
and adjusting the output min and max ranges accordingly.
|
||||
|
||||
[input_min, input_max] are scalar floats that specify the range for the float
|
||||
interpretation of the 'input' data. For example, if input_min is -1.0f and
|
||||
input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
|
||||
value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
|
||||
|
||||
This operator tries to squeeze as much precision as possible into an output with
|
||||
a lower bit depth by calculating the actual min and max values found in the
|
||||
data. For example, maybe that quint16 input has no values lower than 16,384 and
|
||||
none higher than 49,152. That means only half the range is actually needed, all
|
||||
the float interpretations are between -0.5f and 0.5f, so if we want to compress
|
||||
the data into a quint8 output, we can use that range rather than the theoretical
|
||||
-1.0f to 1.0f that is suggested by the input min and max.
|
||||
|
||||
In practice, this is most useful for taking output from operations like
|
||||
QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
|
||||
may have large potential output ranges, but in practice have a distribution of
|
||||
input values that only uses a small fraction of the possible range. By feeding
|
||||
that output into this operator, we can reduce it from 32 bits down to 8 with
|
||||
minimal loss of accuracy.
|
||||
|
||||
input_min: The float value that the minimum quantized input value represents.
|
||||
input_max: The float value that the maximum quantized input value represents.
|
||||
Tinput: The type of the input.
|
||||
output_min: The float value that the minimum quantized output value represents.
|
||||
output_max: The float value that the maximum quantized output value represents.
|
||||
out_type: The type of the output. Should be a lower bit depth than Tinput.
|
||||
|
||||
)doc");
|
||||
|
||||
// Deprecated ops:
|
||||
REGISTER_OP("BatchFFT")
|
||||
.Input("input: complex64")
|
||||
|
@ -1994,4 +1994,324 @@ overlapping: When set to True, it means when pooling, the values at the boundary
|
||||
output: 4-D. Gradients w.r.t. the input of `fractional_avg_pool`.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedAvgPool")
|
||||
.Input("input: T")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Output("output: T")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("ksize: list(int)")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Produces the average pool of the input tensor for quantized types.
|
||||
|
||||
input: 4-D with shape `[batch, height, width, channels]`.
|
||||
ksize: The size of the window for each dimension of the input tensor.
|
||||
The length must be 4 to match the number of dimensions of the input.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor. The length must be 4 to match the number of dimensions of the input.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedBiasAdd")
|
||||
.Input("input: T1")
|
||||
.Input("bias: T2")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Input("min_bias: float")
|
||||
.Input("max_bias: float")
|
||||
.Output("output: out_type")
|
||||
.Output("min_out: float")
|
||||
.Output("max_out: float")
|
||||
.Attr("T1: quantizedtype")
|
||||
.Attr("T2: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Adds Tensor 'bias' to Tensor 'input' for Quantized types.
|
||||
|
||||
Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
|
||||
|
||||
bias: A 1D bias Tensor with size matching the last dimension of 'input'.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_bias: The float value that the lowest quantized bias value represents.
|
||||
max_bias: The float value that the highest quantized bias value represents.
|
||||
min_out: The float value that the lowest quantized output value represents.
|
||||
max_out: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedConv2D")
|
||||
.Input("input: Tinput")
|
||||
.Input("filter: Tfilter")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Input("min_filter: float")
|
||||
.Input("max_filter: float")
|
||||
.Output("output: out_type")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("Tfilter: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QINT32")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes a 2D convolution given quantized 4D input and filter tensors.
|
||||
The inputs are quantized tensors where the lowest value represents the real
|
||||
number of the associated minimum, and the highest represents the maximum.
|
||||
This means that you can only interpret the quantized output in the same way, by
|
||||
taking the returned minimum and maximum values into account.
|
||||
|
||||
filter: filter's input_depth dimension must match input's depth dimensions.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_filter: The float value that the lowest quantized filter value represents.
|
||||
max_filter: The float value that the highest quantized filter value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedMaxPool")
|
||||
.Input("input: T")
|
||||
.Input("min_input: float")
|
||||
.Input("max_input: float")
|
||||
.Output("output: T")
|
||||
.Output("min_output: float")
|
||||
.Output("max_output: float")
|
||||
.Attr("T: quantizedtype")
|
||||
.Attr("ksize: list(int)")
|
||||
.Attr("strides: list(int)")
|
||||
.Attr(GetPaddingAttrString())
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Produces the max pool of the input tensor for quantized types.
|
||||
|
||||
input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
|
||||
ksize: The size of the window for each dimension of the input tensor.
|
||||
The length must be 4 to match the number of dimensions of the input.
|
||||
strides: The stride of the sliding window for each dimension of the input
|
||||
tensor. The length must be 4 to match the number of dimensions of the input.
|
||||
padding: The type of padding algorithm to use.
|
||||
min_input: The float value that the lowest quantized input value represents.
|
||||
max_input: The float value that the highest quantized input value represents.
|
||||
min_output: The float value that the lowest quantized output value represents.
|
||||
max_output: The float value that the highest quantized output value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedRelu")
|
||||
.Input("features: Tinput")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear: `max(features, 0)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedRelu6")
|
||||
.Input("features: Tinput")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedReluX")
|
||||
.Input("features: Tinput")
|
||||
.Input("max_value: float")
|
||||
.Input("min_features: float")
|
||||
.Input("max_features: float")
|
||||
.Output("activations: out_type")
|
||||
.Output("min_activations: float")
|
||||
.Output("max_activations: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype = DT_QUINT8")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
|
||||
|
||||
activations: Has the same output shape as "features".
|
||||
min_features: The float value that the lowest quantized value represents.
|
||||
max_features: The float value that the highest quantized value represents.
|
||||
min_activations: The float value that the lowest quantized value represents.
|
||||
max_activations: The float value that the highest quantized value represents.
|
||||
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
|
||||
.Input("t: Tinput")
|
||||
.Input("t_min: float")
|
||||
.Input("t_max: float")
|
||||
.Input("m: Tinput")
|
||||
.Input("m_min: float")
|
||||
.Input("m_max: float")
|
||||
.Input("v: Tinput")
|
||||
.Input("v_min: float")
|
||||
.Input("v_max: float")
|
||||
.Input("beta: Tinput")
|
||||
.Input("beta_min: float")
|
||||
.Input("beta_max: float")
|
||||
.Input("gamma: Tinput")
|
||||
.Input("gamma_min: float")
|
||||
.Input("gamma_max: float")
|
||||
.Output("result: out_type")
|
||||
.Output("result_min: float")
|
||||
.Output("result_max: float")
|
||||
.Attr("Tinput: quantizedtype")
|
||||
.Attr("out_type: quantizedtype")
|
||||
.Attr("variance_epsilon: float")
|
||||
.Attr("scale_after_normalization: bool")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
|
||||
|
||||
DimensionHandle last_dim = c->Dim(input, 3);
|
||||
for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma
|
||||
ShapeHandle vec;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
|
||||
TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
|
||||
}
|
||||
|
||||
ShapeHandle out;
|
||||
TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
|
||||
c->set_output(0, out);
|
||||
c->set_output(1, c->Scalar());
|
||||
c->set_output(2, c->Scalar());
|
||||
|
||||
return Status::OK();
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Quantized Batch normalization.
|
||||
|
||||
This op is deprecated and will be removed in the future. Prefer
|
||||
`tf.nn.batch_normalization`.
|
||||
|
||||
t: A 4D input Tensor.
|
||||
t_min: The value represented by the lowest quantized input.
|
||||
t_max: The value represented by the highest quantized input.
|
||||
m: A 1D mean Tensor with size matching the last dimension of t.
|
||||
This is the first output from tf.nn.moments,
|
||||
or a saved moving average thereof.
|
||||
m_min: The value represented by the lowest quantized mean.
|
||||
m_max: The value represented by the highest quantized mean.
|
||||
v: A 1D variance Tensor with size matching the last dimension of t.
|
||||
This is the second output from tf.nn.moments,
|
||||
or a saved moving average thereof.
|
||||
v_min: The value represented by the lowest quantized variance.
|
||||
v_max: The value represented by the highest quantized variance.
|
||||
beta: A 1D beta Tensor with size matching the last dimension of t.
|
||||
An offset to be added to the normalized tensor.
|
||||
beta_min: The value represented by the lowest quantized offset.
|
||||
beta_max: The value represented by the highest quantized offset.
|
||||
gamma: A 1D gamma Tensor with size matching the last dimension of t.
|
||||
If "scale_after_normalization" is true, this tensor will be multiplied
|
||||
with the normalized tensor.
|
||||
gamma_min: The value represented by the lowest quantized gamma.
|
||||
gamma_max: The value represented by the highest quantized gamma.
|
||||
variance_epsilon: A small float number to avoid dividing by 0.
|
||||
scale_after_normalization: A bool indicating whether the resulted tensor
|
||||
needs to be multiplied with gamma.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -2149,6 +2149,33 @@ py_binary(
|
||||
],
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Quantization
|
||||
|
||||
py_test(
|
||||
name = "dequantize_op_test",
|
||||
size = "small",
|
||||
srcs = ["ops/dequantize_op_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":ops",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "quantized_conv_ops_test",
|
||||
size = "small",
|
||||
srcs = ["ops/quantized_conv_ops_test.py"],
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":ops",
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/python:framework_test_lib",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all_files",
|
||||
srcs = glob(
|
||||
|
@ -74,6 +74,9 @@ or join multiple tensors together.
|
||||
@@boolean_mask
|
||||
@@one_hot
|
||||
@@sequence_mask
|
||||
@@dequantize
|
||||
@@quantize_v2
|
||||
@@quantized_concat
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
@ -2318,3 +2321,9 @@ def squeeze(input, squeeze_dims=None, name=None):
|
||||
if np.isscalar(squeeze_dims):
|
||||
squeeze_dims = [squeeze_dims]
|
||||
return gen_array_ops._squeeze(input, squeeze_dims, name)
|
||||
|
||||
|
||||
# TODO(cwhipkey): Verify and enable shape functions for these.
|
||||
ops.RegisterShape("QuantizeV2")(None)
|
||||
ops.RegisterShape("QuantizedBatchNormWithGlobalNormalization")(None)
|
||||
ops.RegisterShape("QuantizedConcat")(None)
|
||||
|
@ -21,24 +21,16 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
|
||||
# with Bazel.
|
||||
# pylint: disable=g-bad-import-order
|
||||
from tensorflow.contrib.quantization import load_quantized_ops_so
|
||||
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
|
||||
|
||||
|
||||
class DequantizeOpTest(tf.test.TestCase):
|
||||
|
||||
def __init__(self, method_name="runTest"):
|
||||
super(DequantizeOpTest, self).__init__(method_name)
|
||||
load_quantized_ops_so.Load()
|
||||
load_quantized_kernels_so.Load()
|
||||
|
||||
def _testDequantizeOp(self, inputs, min_range, max_range, dtype):
|
||||
with self.test_session():
|
||||
input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype)
|
||||
dequantized = tf.contrib.quantization.dequantize(
|
||||
dequantized = tf.dequantize(
|
||||
input_op, min_range, max_range)
|
||||
tf_ans = dequantized.eval()
|
||||
|
@ -1954,3 +1954,6 @@ def reduced_shape(input_shape, axes):
|
||||
axes], # [1, 2]
|
||||
[input_shape, # [2, 3, 5, 7]
|
||||
array_ops.fill(axes_shape, 1)]) # [1, 1]
|
||||
|
||||
|
||||
ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
|
||||
|
@ -277,6 +277,12 @@ classes when using one of the sampled loss functions above.
|
||||
|
||||
@@compute_accidental_hits
|
||||
|
||||
### Quantization ops
|
||||
|
||||
@@quantized_relu_x
|
||||
@@quantized_max_pool
|
||||
@@quantized_avg_pool
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
@ -1925,4 +1925,14 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
|
||||
padding=padding,
|
||||
name=name))
|
||||
|
||||
|
||||
ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
|
||||
ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
|
||||
|
||||
# pylint: enable=invalid-name
|
||||
|
@ -21,19 +21,11 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
|
||||
# with Bazel.
|
||||
# pylint: disable=g-bad-import-order
|
||||
from tensorflow.contrib.quantization import load_quantized_ops_so
|
||||
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
|
||||
|
||||
|
||||
class Conv2DTest(tf.test.TestCase):
|
||||
|
||||
def __init__(self, method_name="runTest"):
|
||||
super(Conv2DTest, self).__init__(method_name)
|
||||
load_quantized_ops_so.Load()
|
||||
load_quantized_kernels_so.Load()
|
||||
|
||||
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
|
||||
expected):
|
||||
@ -67,16 +59,16 @@ class Conv2DTest(tf.test.TestCase):
|
||||
with self.test_session(use_gpu=False) as sess:
|
||||
t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8)
|
||||
t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8)
|
||||
conv = tf.contrib.quantization.quantized_conv2d(t1,
|
||||
t2,
|
||||
out_type=tf.qint32,
|
||||
strides=[1, stride,
|
||||
stride, 1],
|
||||
padding=padding,
|
||||
min_input=x1_min,
|
||||
max_input=x1_max,
|
||||
min_filter=x2_min,
|
||||
max_filter=x2_max)
|
||||
conv = tf.nn.quantized_conv2d(t1,
|
||||
t2,
|
||||
out_type=tf.qint32,
|
||||
strides=[1, stride,
|
||||
stride, 1],
|
||||
padding=padding,
|
||||
min_input=x1_min,
|
||||
max_input=x1_max,
|
||||
min_filter=x2_min,
|
||||
max_filter=x2_max)
|
||||
value = sess.run(conv)
|
||||
quantized_output = value[0]
|
||||
output_min = value[1]
|
@ -142,18 +142,21 @@ def if_not_mobile(a):
|
||||
})
|
||||
|
||||
def tf_copts():
|
||||
return (["-fno-exceptions", "-DEIGEN_AVOID_STL_ARRAY"] +
|
||||
return (["-fno-exceptions",
|
||||
"-DEIGEN_AVOID_STL_ARRAY",
|
||||
"-Iexternal/gemmlowp",] +
|
||||
if_cuda(["-DGOOGLE_CUDA=1"]) +
|
||||
if_android_arm(["-mfpu=neon"]) +
|
||||
select({"//tensorflow:android": [
|
||||
"-std=c++11",
|
||||
"-DMIN_LOG_LEVEL=0",
|
||||
"-DTF_LEAN_BINARY",
|
||||
"-O2",
|
||||
],
|
||||
"//tensorflow:darwin": [],
|
||||
"//tensorflow:ios": ["-std=c++11",],
|
||||
"//conditions:default": ["-pthread"]}))
|
||||
select({
|
||||
"//tensorflow:android": [
|
||||
"-std=c++11",
|
||||
"-DMIN_LOG_LEVEL=0",
|
||||
"-DTF_LEAN_BINARY",
|
||||
"-O2",
|
||||
],
|
||||
"//tensorflow:darwin": [],
|
||||
"//tensorflow:ios": ["-std=c++11",],
|
||||
"//conditions:default": ["-pthread"]}))
|
||||
|
||||
def tf_opts_nortti_if_android():
|
||||
return if_android([
|
||||
|
@ -13,9 +13,6 @@ py_library(
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/quantization:ops",
|
||||
"//tensorflow/contrib/quantization:quantized_ops_py",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
|
||||
"//tensorflow/python:platform",
|
||||
],
|
||||
)
|
||||
@ -26,9 +23,6 @@ py_binary(
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//tensorflow/contrib/quantization:ops",
|
||||
"//tensorflow/contrib/quantization:quantized_ops_py",
|
||||
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
|
||||
"//tensorflow/python:platform",
|
||||
],
|
||||
)
|
@ -15,8 +15,8 @@
|
||||
r"""Transforms a float-trained graph into an equivalent quantized version.
|
||||
|
||||
An example of command-line usage is:
|
||||
bazel build tensorflow/contrib/quantization/tools:quantize_graph \
|
||||
&& bazel-bin/tensorflow/contrib/quantization/tools/quantize_graph \
|
||||
bazel build tensorflow/tools/quantization:quantize_graph \
|
||||
&& bazel-bin/tensorflow/tools/quantization/quantize_graph \
|
||||
--input=tensorflow_inception_graph.pb
|
||||
--output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
|
||||
--mode=eightbit --logtostderr
|
||||
@ -35,12 +35,6 @@ import tensorflow as tf
|
||||
from tensorflow.python.framework import graph_util
|
||||
from tensorflow.python.framework import tensor_util
|
||||
|
||||
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
|
||||
# with Bazel.
|
||||
# pylint: disable=g-bad-import-order
|
||||
from tensorflow.contrib.quantization import load_quantized_ops_so
|
||||
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
|
||||
|
||||
|
||||
flags = tf.app.flags
|
||||
FLAGS = flags.FLAGS
|
||||
@ -60,8 +54,6 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3",
|
||||
""" graph loaded from a file.""")
|
||||
flags.DEFINE_boolean("strip_redundant_quantization", True,
|
||||
"""Removes redundant dequantize/quantize pairs.""")
|
||||
flags.DEFINE_boolean("load_quantization_so", True,
|
||||
"""Explicitly load the quantization ops library""")
|
||||
|
||||
|
||||
def print_input_nodes(current_node, nodes_map, indent, already_visited):
|
||||
@ -290,9 +282,6 @@ class GraphRewriter(object):
|
||||
self.nodes_map = self.create_nodes_map(input_graph)
|
||||
self.output_graph = None
|
||||
self.mode = mode
|
||||
if FLAGS.load_quantization_so:
|
||||
load_quantized_ops_so.Load()
|
||||
load_quantized_kernels_so.Load()
|
||||
|
||||
def create_nodes_map(self, graph):
|
||||
"""Builds a mapping of node names to their defs from the graph."""
|
@ -20,11 +20,12 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.quantization.tools import quantize_graph
|
||||
|
||||
from tensorflow.python.framework import graph_util
|
||||
from tensorflow.tools.quantization import quantize_graph
|
||||
|
||||
flags = tf.app.flags
|
||||
FLAGS = flags.FLAGS
|
Loading…
Reference in New Issue
Block a user