Move contrib/quantization ops to tensorflow/core

Change: 136410307
This commit is contained in:
Andrew Harp 2016-10-17 15:18:33 -08:00 committed by TensorFlower Gardener
parent 9b8ff3f50c
commit 66024fd508
63 changed files with 1018 additions and 1382 deletions

View File

@ -108,10 +108,6 @@ filegroup(
"//tensorflow/contrib/metrics/kernels:all_files",
"//tensorflow/contrib/ndlstm:all_files",
"//tensorflow/contrib/opt:all_files",
"//tensorflow/contrib/quantization:all_files",
"//tensorflow/contrib/quantization/kernels:all_files",
"//tensorflow/contrib/quantization/kernels/hexagon:all_files",
"//tensorflow/contrib/quantization/tools:all_files",
"//tensorflow/contrib/rnn:all_files",
"//tensorflow/contrib/session_bundle:all_files",
"//tensorflow/contrib/session_bundle/example:all_files",
@ -133,6 +129,7 @@ filegroup(
"//tensorflow/core/distributed_runtime:all_files",
"//tensorflow/core/distributed_runtime/rpc:all_files",
"//tensorflow/core/kernels:all_files",
"//tensorflow/core/kernels/hexagon:all_files",
"//tensorflow/core/ops/compat:all_files",
"//tensorflow/core/platform/cloud:all_files",
"//tensorflow/core/platform/default/build_config:all_files",
@ -180,6 +177,7 @@ filegroup(
"//tensorflow/tools/docs:all_files",
"//tensorflow/tools/git:all_files",
"//tensorflow/tools/proto_text:all_files",
"//tensorflow/tools/quantization:all_files",
"//tensorflow/tools/test:all_files",
"//tensorflow/user_ops:all_files",
"//third_party/hadoop:all_files",

View File

@ -60,6 +60,7 @@ include(gif)
include(png)
include(jpeg)
include(eigen)
include(gemmlowp)
include(jsoncpp)
include(farmhash)
include(highwayhash)
@ -88,6 +89,7 @@ include_directories(
${png_INCLUDE_DIR}
${jpeg_INCLUDE_DIR}
${eigen_INCLUDE_DIRS}
${gemmlowp_INCLUDE_DIR}
${jsoncpp_INCLUDE_DIR}
${farmhash_INCLUDE_DIR}
${highwayhash_INCLUDE_DIR}

View File

@ -0,0 +1,15 @@
include (ExternalProject)
set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz)
set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882)
set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp)
ExternalProject_Add(gemmlowp
PREFIX gemmlowp
URL ${gemmlowp_URL}
URL_HASH ${gemmlowp_HASH}
DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
BUILD_IN_SOURCE 1
PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/gemmlowp/CMakeLists.txt ${gemmlowp_BUILD}
INSTALL_COMMAND "")

View File

@ -0,0 +1,3 @@
cmake_minimum_required(VERSION 2.8.3)
project(gemmlowp)

View File

@ -73,6 +73,7 @@ HOST_INCLUDES := \
-I. \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(HOST_GENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
@ -146,6 +147,7 @@ INCLUDES := \
-I. \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)
ifeq ($(HAS_GEN_HOST_PROTOC),true)
@ -240,6 +242,7 @@ ifeq ($(TARGET),ANDROID)
-I. \
-I$(MAKEFILE_DIR)/downloads/ \
-I$(MAKEFILE_DIR)/downloads/eigen \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/gen/protobuf/include \
-I$(PROTOGENDIR) \
-I$(PBTGENDIR)

View File

@ -141,6 +141,17 @@ tensorflow/core/kernels/batch_norm_op.cc
tensorflow/core/kernels/avgpooling_op.cc
tensorflow/core/kernels/argmax_op.cc
tensorflow/core/kernels/aggregate_ops.cc
tensorflow/core/kernels/dequantize_op.cc
tensorflow/core/kernels/quantization_utils.cc
tensorflow/core/kernels/quantize_down_and_shrink_range.cc
tensorflow/core/kernels/quantize_op.cc
tensorflow/core/kernels/quantized_activation_ops.cc
tensorflow/core/kernels/quantized_batch_norm_op.cc
tensorflow/core/kernels/quantized_bias_add_op.cc
tensorflow/core/kernels/quantized_concat_op.cc
tensorflow/core/kernels/quantized_conv_ops.cc
tensorflow/core/kernels/quantized_matmul_op.cc
tensorflow/core/kernels/quantized_pooling_ops.cc
tensorflow/core/ops/training_ops.cc
tensorflow/core/ops/string_ops.cc
tensorflow/core/ops/state_ops.cc

View File

@ -13,53 +13,6 @@ load(
"tf_custom_op_library",
)
cc_library(
name = "cc_array_ops",
srcs = ["ops/array_ops.cc"],
linkstatic = 1,
deps = [
"//tensorflow/core:framework",
],
alwayslink = 1,
)
cc_library(
name = "cc_math_ops",
srcs = ["ops/math_ops.cc"],
linkstatic = 1,
deps = [
"//tensorflow/core:framework",
],
alwayslink = 1,
)
cc_library(
name = "cc_nn_ops",
srcs = ["ops/nn_ops.cc"],
linkstatic = 1,
deps = [
"//tensorflow/core:framework",
],
alwayslink = 1,
)
cc_library(
name = "cc_ops",
linkstatic = 1,
deps = [
":cc_array_ops",
":cc_math_ops",
":cc_nn_ops",
],
alwayslink = 1,
)
filegroup(
name = "android_ops",
srcs = glob(["ops/*.cc"]),
visibility = ["//visibility:public"],
)
py_library(
name = "quantization_py",
srcs = [
@ -69,8 +22,6 @@ py_library(
srcs_version = "PY2AND3",
deps = [
":ops",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
],
)
@ -83,52 +34,9 @@ py_library(
],
srcs_version = "PY2AND3",
deps = [
":array_ops",
":math_ops",
":nn_ops",
],
)
tf_gen_op_wrapper_py(
name = "array_ops",
deps = ["//tensorflow/contrib/quantization:cc_array_ops"],
)
tf_gen_op_wrapper_py(
name = "math_ops",
deps = ["//tensorflow/contrib/quantization:cc_math_ops"],
)
tf_gen_op_wrapper_py(
name = "nn_ops",
deps = ["//tensorflow/contrib/quantization:cc_nn_ops"],
)
py_test(
name = "dequantize_op_test",
size = "small",
srcs = ["python/dequantize_op_test.py"],
srcs_version = "PY2AND3",
deps = [
":ops",
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
"//tensorflow/python:framework_test_lib",
],
)
py_test(
name = "quantized_conv_ops_test",
size = "small",
srcs = ["python/quantized_conv_ops_test.py"],
srcs_version = "PY2AND3",
deps = [
":ops",
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:array_ops",
"//tensorflow/python:math_ops",
"//tensorflow/python:nn_ops",
],
)
@ -139,24 +47,6 @@ filegroup(
]),
)
tf_custom_op_library(
name = "_quantized_ops.so",
srcs = [
"ops/array_ops.cc",
"ops/math_ops.cc",
"ops/nn_ops.cc",
],
deps = [
],
)
py_library(
name = "quantized_ops_py",
srcs = ["load_quantized_ops_so.py"],
data = ["_quantized_ops.so"],
srcs_version = "PY2AND3",
)
filegroup(
name = "all_files",
srcs = glob(

View File

@ -1,69 +0,0 @@
#!/usr/bin/env bash
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This sub Makefile compiles libraries under this directory. This is designed to
# be used as a sub Makefile with tensorflow/contrib/makefile/Makefile.
# You can build targets in this file by including this sub makefile like:
# $ make -f tensorflow/contrib/makefile/Makefile TARGET=<target> \
# SUB_MAKEFILES=$(pwd)/tensorflow/contrib/quantization/Makefile.in \
# (optional: NDK_ROOT=<ndk_root>) contrib_quantization_tests
# TODO(satok): Support more targets
GTEST_DIR := \
$(MAKEFILE_DIR)/downloads/googletest/googletest
GTEST_HEADERS = \
$(wildcard $(GTEST_DIR)/include/gtest/*.h) \
$(wildcard $(GTEST_DIR)/include/gtest/internal/*.h)
GTEST_SRCS := \
$(wildcard $(GTEST_DIR)/src/*.cc) \
$(wildcard $(GTEST_DIR)/src/*.h) \
$(GTEST_HEADERS)
QUANTIZATION_TEST_SRCS := \
tensorflow/contrib/quantization/ops/math_ops.cc \
tensorflow/contrib/quantization/kernels/quantize_op.cc \
tensorflow/contrib/quantization/kernels/quantized_conv_ops.cc \
tensorflow/contrib/quantization/kernels/quantized_matmul_op.cc \
tensorflow/contrib/quantization/kernels/quantized_matmul_op_test.cc \
tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc \
tensorflow/contrib/makefile/test/test_main.cc
QUANTIZATION_TEST_OBJS := $(addprefix $(OBJDIR), $(QUANTIZATION_TEST_SRCS:.cc=.o))
QUANTIZATION_TEST_NAME := contrib_quantization_tests
QUANTIZATION_TEST_BIN_PATH := $(BINDIR)$(QUANTIZATION_TEST_NAME)
INCLUDES += \
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
-I$(MAKEFILE_DIR)/downloads/googletest/googletest/include
QUANTIZATION_TEST_INCLUDES := $(INCLUDES)
$(OBJDIR)gtest-all.o : $(GTEST_SRCS)
$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) -I $(GTEST_DIR) -c \
$(GTEST_DIR)/src/gtest-all.cc -o $@
$(LIBDIR)gtest.a : $(OBJDIR)gtest-all.o
$(AR) $(ARFLAGS) $@ $^
$(QUANTIZATION_TEST_BIN_PATH): $(LIB_PATH) $(LIBDIR)gtest.a $(QUANTIZATION_TEST_OBJS)
@mkdir -p $(dir $@)
$(CXX) $(CXXFLAGS) $(QUANTIZATION_TEST_INCLUDES) \
-o $(QUANTIZATION_TEST_BIN_PATH) $(QUANTIZATION_TEST_OBJS) \
$(LIBFLAGS) $(LIB_PATH) $(LIBDIR)gtest.a $(LDFLAGS) $(LIBS)
$(QUANTIZATION_TEST_NAME): $(QUANTIZATION_TEST_BIN_PATH)

View File

@ -24,7 +24,7 @@ from tensorflow.contrib.quantization.python import array_ops as quantized_array_
from tensorflow.contrib.quantization.python.math_ops import *
from tensorflow.contrib.quantization.python.nn_ops import *
from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
from tensorflow.python.ops.gen_array_ops import dequantize
from tensorflow.python.ops.gen_array_ops import quantize_v2
from tensorflow.python.ops.gen_array_ops import quantized_concat

View File

@ -1,311 +0,0 @@
# Description:
# quantization-specific OpKernels
package(
default_visibility = ["//visibility:public"],
features = ["-parse_headers"],
)
licenses(["notice"]) # Apache 2.0
load(
"//tensorflow:tensorflow.bzl",
"tf_cc_test",
"tf_custom_op_library",
"tf_kernel_library",
)
filegroup(
name = "android_ops",
srcs = [
"dequantize_op.cc",
"quantization_utils.cc",
"quantization_utils.h",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
"quantized_batch_norm_op.cc",
"quantized_bias_add_op.cc",
"quantized_concat_op.cc",
"quantized_conv_ops.cc",
"quantized_matmul_op.cc",
"quantized_pooling_ops.cc",
"reference_gemm.h",
],
visibility = ["//visibility:public"],
)
filegroup(
name = "all_files",
srcs = glob(
["**/*"],
exclude = [
"**/METADATA",
"**/OWNERS",
],
),
visibility = ["//tensorflow:__subpackages__"],
)
tf_kernel_library(
name = "quantized_ops",
srcs = [
"dequantize_op.cc",
"quantization_utils.cc",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
"quantized_batch_norm_op.cc",
"quantized_bias_add_op.cc",
"quantized_concat_op.cc",
"quantized_conv_ops.cc",
"quantized_matmul_op.cc",
"quantized_pooling_ops.cc",
],
hdrs = [
"quantization_utils.h",
"reference_gemm.h",
],
deps = [
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core/kernels:concat_lib_hdrs",
"//tensorflow/core/kernels:conv_ops",
"//tensorflow/core/kernels:eigen_helpers",
"//tensorflow/core/kernels:ops_util",
"//tensorflow/core/kernels:pooling_ops",
"//third_party/eigen3",
"@gemmlowp//:gemmlowp",
],
)
tf_custom_op_library(
name = "_quantized_kernels.so",
srcs = [
"dequantize_op.cc",
"quantization_utils.cc",
"quantization_utils.h",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
"quantized_batch_norm_op.cc",
"quantized_bias_add_op.cc",
"quantized_concat_op.cc",
"quantized_conv_ops.cc",
"quantized_matmul_op.cc",
"quantized_pooling_ops.cc",
"reference_gemm.h",
],
deps = [
"//tensorflow/core/kernels:concat_lib_hdrs",
"//tensorflow/core/kernels:ops_util_hdrs",
"//tensorflow/core/kernels:pooling_ops_hdrs",
"@gemmlowp//:gemmlowp",
],
)
py_library(
name = "quantized_kernels_py",
srcs = ["load_quantized_kernels_so.py"],
data = ["_quantized_kernels.so"],
srcs_version = "PY2AND3",
)
tf_cc_test(
name = "quantize_down_and_shrink_range_op_test",
size = "small",
srcs = ["quantize_down_and_shrink_range_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantization_utils_test",
srcs = ["quantization_utils_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//third_party/eigen3",
],
)
tf_cc_test(
name = "quantized_activation_ops_test",
srcs = ["quantized_activation_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_bias_add_op_test",
size = "small",
srcs = ["quantized_bias_add_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_conv_ops_test",
size = "small",
srcs = ["quantized_conv_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantize_op_test",
size = "small",
srcs = ["quantize_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_matmul_op_test",
size = "small",
srcs = ["quantized_matmul_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_pooling_ops_test",
size = "small",
srcs = ["quantized_pooling_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_concat_op_test",
size = "small",
srcs = ["quantized_concat_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_batch_norm_op_test",
size = "small",
srcs = ["quantized_batch_norm_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:batch_norm_op",
"//tensorflow/core/kernels:ops_testutil",
"//third_party/eigen3",
],
)

View File

@ -1,48 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ops for quantized evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import threading
import tensorflow as tf
QUANTIZED_KERNELS_FILE = '_quantized_kernels.so'
_quantized_kernels = None
_kernels_lock = threading.Lock()
# Workaround for the fact that importing tensorflow imports contrib
# (even if a user isn't using this or any other contrib op), but
# there's not yet any guarantee that the shared object exists.
# In which case, "import tensorflow" will always crash, even for users that
# never use contrib.
def Load(library_base_dir=''):
"""Load the quantized ops library and return the loaded module."""
with _kernels_lock:
global _quantized_kernels
if not _quantized_kernels:
data_files_path = os.path.join(library_base_dir,
tf.resource_loader.get_data_files_path())
tf.logging.info('data path: %s', data_files_path)
_quantized_kernels = tf.load_op_library(os.path.join(
data_files_path, QUANTIZED_KERNELS_FILE))
assert _quantized_kernels, 'Could not load _quantized_kernels.so'
return _quantized_kernels

View File

@ -1,48 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ops for quantized evaluation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import threading
import tensorflow as tf
QUANTIZED_OPS_FILE = '_quantized_ops.so'
_quantized_ops = None
_ops_lock = threading.Lock()
# Workaround for the fact that importing tensorflow imports contrib
# (even if a user isn't using this or any other contrib op), but
# there's not yet any guarantee that the shared object exists.
# In which case, "import tensorflow" will always crash, even for users that
# never use contrib.
def Load(library_base_dir=''):
"""Load the quantized ops library and return the loaded module."""
with _ops_lock:
global _quantized_ops
if not _quantized_ops:
data_files_path = os.path.join(library_base_dir,
tf.resource_loader.get_data_files_path())
tf.logging.info('q:data path: %s', data_files_path)
_quantized_ops = tf.load_op_library(os.path.join(
data_files_path, QUANTIZED_OPS_FILE))
assert _quantized_ops, 'Could not load quantized_ops.so'
return _quantized_ops

View File

@ -1,195 +0,0 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
namespace tensorflow {
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
REGISTER_OP("QuantizeV2")
.Input("input: float")
.Input("min_range: float")
.Input("max_range: float")
.Output("output: T")
.Output("output_min: float")
.Output("output_max: float")
.Attr("T: quantizedtype")
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
[min_range, max_range] are scalar floats that specify the range for
the 'input' data. The 'mode' attribute controls exactly which calculations are
used to convert the float values to their quantized equivalents.
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
```
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
if T == qint8, out[i] -= (range(T) + 1) / 2.0
```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example*
Assume the input is type float and has a possible range of [0.0, 6.0] and the
output type is quint8 ([0, 255]). The min_range and max_range values should be
specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
value of the input by 255/6 and cast to quint8.
If the output type was qint8 ([-128, 127]), the operation will additionally
subtract each value by 128 prior to casting, so that the range of values aligns
with the range of qint8.
If the mode is 'MIN_FIRST', then this approach is used:
```
number_of_steps = 1 << (# of bits in T)
range_adjust = number_of_steps / (number_of_steps - 1)
range = (range_max - range_min) * range_adjust
range_scale = number_of_steps / range
quantized = round(input * range_scale) - round(range_min * range_scale) +
numeric_limits<T>::min()
quantized = max(quantized, numeric_limits<T>::min())
quantized = min(quantized, numeric_limits<T>::max())
```
The biggest difference between this and MIN_COMBINED is that the minimum range
is rounded first, before it's subtracted from the rounded value. With
MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
and dequantizing will introduce a larger and larger error.
One thing to watch out for is that the operator may choose to adjust the
requested minimum and maximum values slightly during the quantization process,
so you should always use the output ports as the range for further calculations.
For example, if the requested minimum and maximum values are close to equal,
they will be separated by a small epsilon value to prevent ill-formed quantized
buffers from being created. Otherwise, you can end up with buffers where all the
quantized values map to the same float value, which causes problems for
operations that have to perform further calculations on them.
min_range: The minimum scalar value possibly produced for the input.
max_range: The maximum scalar value possibly produced for the input.
output: The quantized data produced from the float input.
output_min: The actual minimum scalar value used for the output.
output_max: The actual maximum scalar value used for the output.
)doc");
REGISTER_OP("Dequantize")
.Input("input: T")
.Input("min_range: float")
.Input("max_range: float")
.Output("output: float")
.Attr("T: quantizedtype")
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
return Status::OK();
})
.Doc(R"doc(
Dequantize the 'input' tensor into a float Tensor.
[min_range, max_range] are scalar floats that specify the range for
the 'input' data. The 'mode' attribute controls exactly which calculations are
used to convert the float values to their quantized equivalents.
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
```
if T == qint8, in[i] += (range(T) + 1)/ 2.0
out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example*
If the input comes from a QuantizedRelu6, the output type is
quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
0-6. The min_range and max_range values are therefore 0.0 and 6.0.
Dequantize on quint8 will take each value, cast to float, and multiply
by 6 / 255.
Note that if quantizedtype is qint8, the operation will additionally add
each value by 128 prior to casting.
If the mode is 'MIN_FIRST', then this approach is used:
```
number_of_steps = 1 << (# of bits in T)
range_adjust = number_of_steps / (number_of_steps - 1)
range = (range_max - range_min) * range_adjust
range_scale = range / number_of_steps
const double offset_input = static_cast<double>(input) - lowest_quantized;
result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
```
min_range: The minimum scalar value possibly produced for the input.
max_range: The maximum scalar value possibly produced for the input.
)doc");
REGISTER_OP("QuantizedConcat")
.Input("concat_dim: int32")
.Input("values: N * T")
.Input("input_mins: N * float32")
.Input("input_maxes: N * float32")
.Output("output: T")
.Output("output_min: float")
.Output("output_max: float")
.Attr("N: int >= 2")
.Attr("T: type")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
ShapeHandle unused;
for (int i = 2; i < c->num_inputs(); ++i) {
TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
}
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Concatenates quantized tensors along one dimension.
concat_dim: 0-D. The dimension along which to concatenate. Must be in the
range [0, rank(values)).
values: The `N` Tensors to concatenate. Their ranks and types must match,
and their sizes must match in all dimensions except `concat_dim`.
input_mins: The minimum scalar values for each of the input tensors.
input_maxes: The maximum scalar values for each of the input tensors.
output_min: The float value that the minimum quantized output value represents.
output_max: The float value that the maximum quantized output value represents.
output: A `Tensor` with the concatenation of values stacked along the
`concat_dim` dimension. This tensor's shape matches that of `values` except
in `concat_dim` where it has the sum of the sizes.
)doc");
} // namespace tensorflow

View File

@ -1,126 +0,0 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
namespace tensorflow {
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
REGISTER_OP("QuantizedMatMul")
.Input("a: T1")
.Input("b: T2")
.Input("min_a: float")
.Input("max_a: float")
.Input("min_b: float")
.Input("max_b: float")
.Output("out: Toutput")
.Output("min_out: float")
.Output("max_out: float")
.Attr("T1: quantizedtype")
.Attr("T2: quantizedtype")
.Attr("Toutput: quantizedtype = DT_QINT32")
.Attr("transpose_a: bool = false")
.Attr("transpose_b: bool = false")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Perform a quantized matrix multiplication of `a` by the matrix `b`.
The inputs must be two-dimensional matrices and the inner dimension of
`a` (after being transposed if `transpose_a` is non-zero) must match the
outer dimension of `b` (after being transposed if `transposed_b` is
non-zero).
a: Must be a two-dimensional tensor.
b: Must be a two-dimensional tensor.
transpose_a: If true, `a` is transposed before multiplication.
transpose_b: If true, `b` is transposed before multiplication.
min_a: The float value that the lowest quantized `a` value represents.
max_a: The float value that the highest quantized `a` value represents.
min_b: The float value that the lowest quantized `b` value represents.
max_b: The float value that the highest quantized `b` value represents.
min_out: The float value that the lowest quantized output value represents.
max_out: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizeDownAndShrinkRange")
.Input("input: Tinput")
.Input("input_min: float")
.Input("input_max: float")
.Output("output: out_type")
.Output("output_min: float")
.Output("output_max: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Convert the quantized 'input' tensor into a lower-precision 'output', using the
actual distribution of the values to maximize the usage of the lower bit depth
and adjusting the output min and max ranges accordingly.
[input_min, input_max] are scalar floats that specify the range for the float
interpretation of the 'input' data. For example, if input_min is -1.0f and
input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
This operator tries to squeeze as much precision as possible into an output with
a lower bit depth by calculating the actual min and max values found in the
data. For example, maybe that quint16 input has no values lower than 16,384 and
none higher than 49,152. That means only half the range is actually needed, all
the float interpretations are between -0.5f and 0.5f, so if we want to compress
the data into a quint8 output, we can use that range rather than the theoretical
-1.0f to 1.0f that is suggested by the input min and max.
In practice, this is most useful for taking output from operations like
QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
may have large potential output ranges, but in practice have a distribution of
input values that only uses a small fraction of the possible range. By feeding
that output into this operator, we can reduce it from 32 bits down to 8 with
minimal loss of accuracy.
input_min: The float value that the minimum quantized input value represents.
input_max: The float value that the maximum quantized input value represents.
Tinput: The type of the input.
output_min: The float value that the minimum quantized output value represents.
output_max: The float value that the maximum quantized output value represents.
out_type: The type of the output. Should be a lower bit depth than Tinput.
)doc");
} // namespace tensorflow

View File

@ -1,348 +0,0 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/util/padding.h"
namespace tensorflow {
using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
REGISTER_OP("QuantizedAvgPool")
.Input("input: T")
.Input("min_input: float")
.Input("max_input: float")
.Output("output: T")
.Output("min_output: float")
.Output("max_output: float")
.Attr("T: quantizedtype")
.Attr("ksize: list(int)")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Produces the average pool of the input tensor for quantized types.
input: 4-D with shape `[batch, height, width, channels]`.
ksize: The size of the window for each dimension of the input tensor.
The length must be 4 to match the number of dimensions of the input.
strides: The stride of the sliding window for each dimension of the input
tensor. The length must be 4 to match the number of dimensions of the input.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedBiasAdd")
.Input("input: T1")
.Input("bias: T2")
.Input("min_input: float")
.Input("max_input: float")
.Input("min_bias: float")
.Input("max_bias: float")
.Output("output: out_type")
.Output("min_out: float")
.Output("max_out: float")
.Attr("T1: quantizedtype")
.Attr("T2: quantizedtype")
.Attr("out_type: quantizedtype")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Adds Tensor 'bias' to Tensor 'input' for Quantized types.
Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
bias: A 1D bias Tensor with size matching the last dimension of 'input'.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_bias: The float value that the lowest quantized bias value represents.
max_bias: The float value that the highest quantized bias value represents.
min_out: The float value that the lowest quantized output value represents.
max_out: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedConv2D")
.Input("input: Tinput")
.Input("filter: Tfilter")
.Input("min_input: float")
.Input("max_input: float")
.Input("min_filter: float")
.Input("max_filter: float")
.Output("output: out_type")
.Output("min_output: float")
.Output("max_output: float")
.Attr("Tinput: quantizedtype")
.Attr("Tfilter: quantizedtype")
.Attr("out_type: quantizedtype = DT_QINT32")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes a 2D convolution given quantized 4D input and filter tensors.
The inputs are quantized tensors where the lowest value represents the real
number of the associated minimum, and the highest represents the maximum.
This means that you can only interpret the quantized output in the same way, by
taking the returned minimum and maximum values into account.
filter: filter's input_depth dimension must match input's depth dimensions.
strides: The stride of the sliding window for each dimension of the input
tensor.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_filter: The float value that the lowest quantized filter value represents.
max_filter: The float value that the highest quantized filter value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedMaxPool")
.Input("input: T")
.Input("min_input: float")
.Input("max_input: float")
.Output("output: T")
.Output("min_output: float")
.Output("max_output: float")
.Attr("T: quantizedtype")
.Attr("ksize: list(int)")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Produces the max pool of the input tensor for quantized types.
input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
ksize: The size of the window for each dimension of the input tensor.
The length must be 4 to match the number of dimensions of the input.
strides: The stride of the sliding window for each dimension of the input
tensor. The length must be 4 to match the number of dimensions of the input.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedRelu")
.Input("features: Tinput")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear: `max(features, 0)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedRelu6")
.Input("features: Tinput")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedReluX")
.Input("features: Tinput")
.Input("max_value: float")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
.Input("t: Tinput")
.Input("t_min: float")
.Input("t_max: float")
.Input("m: Tinput")
.Input("m_min: float")
.Input("m_max: float")
.Input("v: Tinput")
.Input("v_min: float")
.Input("v_max: float")
.Input("beta: Tinput")
.Input("beta_min: float")
.Input("beta_max: float")
.Input("gamma: Tinput")
.Input("gamma_min: float")
.Input("gamma_max: float")
.Output("result: out_type")
.Output("result_min: float")
.Output("result_max: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype")
.Attr("variance_epsilon: float")
.Attr("scale_after_normalization: bool")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle input;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
DimensionHandle last_dim = c->Dim(input, 3);
for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma
ShapeHandle vec;
TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
}
ShapeHandle out;
TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
c->set_output(0, out);
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Quantized Batch normalization.
This op is deprecated and will be removed in the future. Prefer
`tf.nn.batch_normalization`.
t: A 4D input Tensor.
t_min: The value represented by the lowest quantized input.
t_max: The value represented by the highest quantized input.
m: A 1D mean Tensor with size matching the last dimension of t.
This is the first output from tf.nn.moments,
or a saved moving average thereof.
m_min: The value represented by the lowest quantized mean.
m_max: The value represented by the highest quantized mean.
v: A 1D variance Tensor with size matching the last dimension of t.
This is the second output from tf.nn.moments,
or a saved moving average thereof.
v_min: The value represented by the lowest quantized variance.
v_max: The value represented by the highest quantized variance.
beta: A 1D beta Tensor with size matching the last dimension of t.
An offset to be added to the normalized tensor.
beta_min: The value represented by the lowest quantized offset.
beta_max: The value represented by the highest quantized offset.
gamma: A 1D gamma Tensor with size matching the last dimension of t.
If "scale_after_normalization" is true, this tensor will be multiplied
with the normalized tensor.
gamma_min: The value represented by the lowest quantized gamma.
gamma_max: The value represented by the highest quantized gamma.
variance_epsilon: A small float number to avoid dividing by 0.
scale_after_normalization: A bool indicating whether the resulted tensor
needs to be multiplied with gamma.
)doc");
} // namespace tensorflow

View File

@ -19,7 +19,7 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,wildcard-import
from tensorflow.contrib.quantization.ops import gen_array_ops as quantized_gen_array_ops
from tensorflow.contrib.quantization.ops.gen_array_ops import dequantize
from tensorflow.contrib.quantization.ops.gen_array_ops import quantize_v2
from tensorflow.contrib.quantization.ops.gen_array_ops import quantized_concat
from tensorflow.python.ops import gen_array_ops as quantized_gen_array_ops
from tensorflow.python.ops.gen_array_ops import dequantize
from tensorflow.python.ops.gen_array_ops import quantize_v2
from tensorflow.python.ops.gen_array_ops import quantized_concat

View File

@ -19,10 +19,7 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,wildcard-import
from tensorflow.contrib.quantization.ops import gen_math_ops
from tensorflow.contrib.quantization.ops.gen_math_ops import *
from tensorflow.python.framework import common_shapes
from tensorflow.python.framework import ops
ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops.gen_math_ops import *

View File

@ -19,17 +19,7 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,wildcard-import
from tensorflow.contrib.quantization.ops import gen_nn_ops
from tensorflow.contrib.quantization.ops.gen_nn_ops import *
from tensorflow.python.framework import common_shapes
from tensorflow.python.framework import ops
ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops.gen_nn_ops import *

View File

@ -524,6 +524,7 @@ cc_library(
"//tensorflow/core/kernels:nn",
"//tensorflow/core/kernels:parameterized_truncated_normal_op",
"//tensorflow/core/kernels:parsing",
"//tensorflow/core/kernels:quantized_ops",
"//tensorflow/core/kernels:random_ops",
"//tensorflow/core/kernels:required",
"//tensorflow/core/kernels:sdca_ops",
@ -734,6 +735,7 @@ cc_library(
deps = [
":protos_cc",
"//third_party/eigen3",
"@gemmlowp//:gemmlowp",
],
alwayslink = 1,
)

View File

@ -2228,6 +2228,7 @@ filegroup(
srcs = [
":android_extended_ops_group1",
":android_extended_ops_group2",
":android_quantized_ops",
],
visibility = ["//visibility:public"],
)
@ -2366,6 +2367,26 @@ filegroup(
],
)
filegroup(
name = "android_quantized_ops",
srcs = [
"dequantize_op.cc",
"quantization_utils.cc",
"quantization_utils.h",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
"quantized_batch_norm_op.cc",
"quantized_bias_add_op.cc",
"quantized_concat_op.cc",
"quantized_conv_ops.cc",
"quantized_matmul_op.cc",
"quantized_pooling_ops.cc",
"reference_gemm.h",
],
visibility = ["//visibility:public"],
)
# A file group which contains nearly all available operators which
# may work on Android. This is intended to be used with selective
# registration.
@ -2436,10 +2457,244 @@ cc_library(
"//tensorflow/core:android_tensorflow_lib_lite",
"//tensorflow/core:protos_cc",
"//third_party/eigen3",
"@gemmlowp//:gemmlowp",
],
alwayslink = 1,
)
# Quantization-specific OpKernels
tf_kernel_library(
name = "quantized_ops",
srcs = [
"dequantize_op.cc",
"quantization_utils.cc",
"quantize_down_and_shrink_range.cc",
"quantize_op.cc",
"quantized_activation_ops.cc",
"quantized_batch_norm_op.cc",
"quantized_bias_add_op.cc",
"quantized_concat_op.cc",
"quantized_conv_ops.cc",
"quantized_matmul_op.cc",
"quantized_pooling_ops.cc",
],
hdrs = [
"quantization_utils.h",
"reference_gemm.h",
],
deps = [
"//tensorflow/core",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core/kernels:concat_lib_hdrs",
"//tensorflow/core/kernels:conv_ops",
"//tensorflow/core/kernels:eigen_helpers",
"//tensorflow/core/kernels:ops_util",
"//tensorflow/core/kernels:pooling_ops",
"//third_party/eigen3",
"@gemmlowp//:gemmlowp",
],
)
tf_cc_test(
name = "quantize_down_and_shrink_range_op_test",
size = "small",
srcs = ["quantize_down_and_shrink_range_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantization_utils_test",
srcs = ["quantization_utils_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//third_party/eigen3",
],
)
tf_cc_test(
name = "quantized_activation_ops_test",
srcs = ["quantized_activation_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_bias_add_op_test",
size = "small",
srcs = ["quantized_bias_add_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_conv_ops_test",
size = "small",
srcs = ["quantized_conv_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantize_op_test",
size = "small",
srcs = ["quantize_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_matmul_op_test",
size = "small",
srcs = ["quantized_matmul_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_pooling_ops_test",
size = "small",
srcs = ["quantized_pooling_ops_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_concat_op_test",
size = "small",
srcs = ["quantized_concat_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
],
)
tf_cc_test(
name = "quantized_batch_norm_op_test",
size = "small",
srcs = ["quantized_batch_norm_op_test.cc"],
deps = [
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:batch_norm_op",
"//tensorflow/core/kernels:ops_testutil",
"//third_party/eigen3",
],
)
# -----------------------------------------------------------------------------
# Google-internal targets. These must be at the end for syncrepo.

View File

@ -17,7 +17,7 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/type_traits.h"

View File

@ -31,10 +31,6 @@ tf_cc_test(
size = "small",
srcs = ["quantized_matmul_op_for_hexagon_test.cc"],
deps = [
"//tensorflow/contrib/quantization:cc_array_ops",
"//tensorflow/contrib/quantization:cc_math_ops",
"//tensorflow/contrib/quantization:cc_nn_ops",
"//tensorflow/contrib/quantization/kernels:quantized_ops",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
@ -42,6 +38,7 @@ tf_cc_test(
"//tensorflow/core:testlib",
"//tensorflow/core/kernels:ops_testutil",
"//tensorflow/core/kernels:ops_util",
"//tensorflow/core/kernels:quantized_ops",
],
)
@ -51,7 +48,6 @@ tf_cc_test(
srcs = ["graph_transferer_test.cc"],
deps = [
"//tensorflow/cc:cc_ops",
"//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer",
"//tensorflow/core:core_cpu",
"//tensorflow/core:direct_session",
"//tensorflow/core:lib",
@ -60,6 +56,7 @@ tf_cc_test(
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/kernels/hexagon:graph_transferer",
],
)

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
namespace tensorflow {
void GraphTransferer::LoadGraphFromProto(

View File

@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/protobuf.h"
@ -37,4 +37,4 @@ class GraphTransferer {
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_TRANSFERER_H
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_GRAPH_LOADER_H_

View File

@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/kernels/hexagon/graph_transferer.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/public/session.h"

View File

@ -14,7 +14,8 @@ limitations under the License.
==============================================================================*/
// Tests in this file are designed to evaluate hexagon DSP operations.
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#define EIGEN_USE_THREADS
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"
@ -26,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
namespace tensorflow {

View File

@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_
#define EIGEN_USE_THREADS
@ -552,4 +552,4 @@ class TensorflowGemmContext : public gemmlowp::MultiThreadGemmContextBase {
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_QUANTIZATION_UTILS_H_
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_QUANTIZATION_UTILS_H_

View File

@ -18,7 +18,7 @@ limitations under the License.
#include <limits>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/tensor_testutil.h"

View File

@ -20,7 +20,7 @@ limitations under the License.
#include <math.h>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/type_traits.h"

View File

@ -17,7 +17,7 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/type_traits.h"

View File

@ -16,7 +16,7 @@ limitations under the License.
// Implements a quantized version of the Relu6 operation.
#define EIGEN_USE_THREADS
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -16,7 +16,7 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"

View File

@ -16,7 +16,7 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/common_runtime/eigen_thread_pool.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/node_def_builder.h"

View File

@ -15,7 +15,7 @@ limitations under the License.
// Implements a quantized eight-bit version of the bias addition operation.
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"

View File

@ -15,7 +15,7 @@ limitations under the License.
#include <functional>
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -18,7 +18,7 @@ limitations under the License.
#include <vector>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor_types.h"

View File

@ -17,7 +17,7 @@ limitations under the License.
#include <memory>
#include <vector>
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"

View File

@ -19,8 +19,8 @@ limitations under the License.
#include <vector>
#include "public/gemmlowp.h"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/reference_gemm.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/kernels/ops_util.h"

View File

@ -17,7 +17,7 @@ limitations under the License.
#include <memory>
#include <vector>
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -16,8 +16,8 @@ limitations under the License.
// Implements a quantized eight-bit version of the matmul operation.
#include "public/gemmlowp.h"
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/contrib/quantization/kernels/reference_gemm.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/reference_gemm.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/core/errors.h"

View File

@ -17,7 +17,7 @@ limitations under the License.
#include <memory>
#include <vector>
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/quantization/kernels/quantization_utils.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/fake_input.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_
// This is an unoptimized but debuggable implementation of the GEMM matrix
// multiply function, used to compare to faster but more opaque versions, or
@ -87,4 +87,4 @@ void ReferenceGemm(bool transpose_a, bool transpose_b, bool transpose_c,
}
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_REFERENCE_GEMM_H_
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_REFERENCE_GEMM_H_

View File

@ -4054,6 +4054,176 @@ debug_urls: List of URLs to debug targets, e.g.,
file:///foo/tfdbg_dump, grpc:://localhost:11011
)doc");
REGISTER_OP("QuantizeV2")
.Input("input: float")
.Input("min_range: float")
.Input("max_range: float")
.Output("output: T")
.Output("output_min: float")
.Output("output_max: float")
.Attr("T: quantizedtype")
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
[min_range, max_range] are scalar floats that specify the range for
the 'input' data. The 'mode' attribute controls exactly which calculations are
used to convert the float values to their quantized equivalents.
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
```
out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
if T == qint8, out[i] -= (range(T) + 1) / 2.0
```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example*
Assume the input is type float and has a possible range of [0.0, 6.0] and the
output type is quint8 ([0, 255]). The min_range and max_range values should be
specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
value of the input by 255/6 and cast to quint8.
If the output type was qint8 ([-128, 127]), the operation will additionally
subtract each value by 128 prior to casting, so that the range of values aligns
with the range of qint8.
If the mode is 'MIN_FIRST', then this approach is used:
```
number_of_steps = 1 << (# of bits in T)
range_adjust = number_of_steps / (number_of_steps - 1)
range = (range_max - range_min) * range_adjust
range_scale = number_of_steps / range
quantized = round(input * range_scale) - round(range_min * range_scale) +
numeric_limits<T>::min()
quantized = max(quantized, numeric_limits<T>::min())
quantized = min(quantized, numeric_limits<T>::max())
```
The biggest difference between this and MIN_COMBINED is that the minimum range
is rounded first, before it's subtracted from the rounded value. With
MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
and dequantizing will introduce a larger and larger error.
One thing to watch out for is that the operator may choose to adjust the
requested minimum and maximum values slightly during the quantization process,
so you should always use the output ports as the range for further calculations.
For example, if the requested minimum and maximum values are close to equal,
they will be separated by a small epsilon value to prevent ill-formed quantized
buffers from being created. Otherwise, you can end up with buffers where all the
quantized values map to the same float value, which causes problems for
operations that have to perform further calculations on them.
min_range: The minimum scalar value possibly produced for the input.
max_range: The maximum scalar value possibly produced for the input.
output: The quantized data produced from the float input.
output_min: The actual minimum scalar value used for the output.
output_max: The actual maximum scalar value used for the output.
)doc");
REGISTER_OP("Dequantize")
.Input("input: T")
.Input("min_range: float")
.Input("max_range: float")
.Output("output: float")
.Attr("T: quantizedtype")
.Attr("mode: {'MIN_COMBINED', 'MIN_FIRST'} = 'MIN_COMBINED'")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
return Status::OK();
})
.Doc(R"doc(
Dequantize the 'input' tensor into a float Tensor.
[min_range, max_range] are scalar floats that specify the range for
the 'input' data. The 'mode' attribute controls exactly which calculations are
used to convert the float values to their quantized equivalents.
In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
```
if T == qint8, in[i] += (range(T) + 1)/ 2.0
out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
```
here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
*MIN_COMBINED Mode Example*
If the input comes from a QuantizedRelu6, the output type is
quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
0-6. The min_range and max_range values are therefore 0.0 and 6.0.
Dequantize on quint8 will take each value, cast to float, and multiply
by 6 / 255.
Note that if quantizedtype is qint8, the operation will additionally add
each value by 128 prior to casting.
If the mode is 'MIN_FIRST', then this approach is used:
```
number_of_steps = 1 << (# of bits in T)
range_adjust = number_of_steps / (number_of_steps - 1)
range = (range_max - range_min) * range_adjust
range_scale = range / number_of_steps
const double offset_input = static_cast<double>(input) - lowest_quantized;
result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
```
min_range: The minimum scalar value possibly produced for the input.
max_range: The maximum scalar value possibly produced for the input.
)doc");
REGISTER_OP("QuantizedConcat")
.Input("concat_dim: int32")
.Input("values: N * T")
.Input("input_mins: N * float32")
.Input("input_maxes: N * float32")
.Output("output: T")
.Output("output_min: float")
.Output("output_max: float")
.Attr("N: int >= 2")
.Attr("T: type")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::ConcatShape(c));
ShapeHandle unused;
for (int i = std::max(0, c->num_inputs() - 2); i < c->num_inputs(); ++i) {
TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused));
}
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Concatenates quantized tensors along one dimension.
concat_dim: 0-D. The dimension along which to concatenate. Must be in the
range [0, rank(values)).
values: The `N` Tensors to concatenate. Their ranks and types must match,
and their sizes must match in all dimensions except `concat_dim`.
input_mins: The minimum scalar values for each of the input tensors.
input_maxes: The maximum scalar values for each of the input tensors.
output_min: The float value that the minimum quantized output value represents.
output_max: The float value that the maximum quantized output value represents.
output: A `Tensor` with the concatenation of values stacked along the
`concat_dim` dimension. This tensor's shape matches that of `values` except
in `concat_dim` where it has the sum of the sizes.
)doc");
// Deprecated op registrations:
// The following can be deleted after 10mar2017.

View File

@ -2058,6 +2058,106 @@ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
```
)doc");
REGISTER_OP("QuantizedMatMul")
.Input("a: T1")
.Input("b: T2")
.Input("min_a: float")
.Input("max_a: float")
.Input("min_b: float")
.Input("max_b: float")
.Output("out: Toutput")
.Output("min_out: float")
.Output("max_out: float")
.Attr("T1: quantizedtype")
.Attr("T2: quantizedtype")
.Attr("Toutput: quantizedtype = DT_QINT32")
.Attr("transpose_a: bool = false")
.Attr("transpose_b: bool = false")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::MatMulShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Perform a quantized matrix multiplication of `a` by the matrix `b`.
The inputs must be two-dimensional matrices and the inner dimension of
`a` (after being transposed if `transpose_a` is non-zero) must match the
outer dimension of `b` (after being transposed if `transposed_b` is
non-zero).
a: Must be a two-dimensional tensor.
b: Must be a two-dimensional tensor.
transpose_a: If true, `a` is transposed before multiplication.
transpose_b: If true, `b` is transposed before multiplication.
min_a: The float value that the lowest quantized `a` value represents.
max_a: The float value that the highest quantized `a` value represents.
min_b: The float value that the lowest quantized `b` value represents.
max_b: The float value that the highest quantized `b` value represents.
min_out: The float value that the lowest quantized output value represents.
max_out: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizeDownAndShrinkRange")
.Input("input: Tinput")
.Input("input_min: float")
.Input("input_max: float")
.Output("output: out_type")
.Output("output_min: float")
.Output("output_max: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Convert the quantized 'input' tensor into a lower-precision 'output', using the
actual distribution of the values to maximize the usage of the lower bit depth
and adjusting the output min and max ranges accordingly.
[input_min, input_max] are scalar floats that specify the range for the float
interpretation of the 'input' data. For example, if input_min is -1.0f and
input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
This operator tries to squeeze as much precision as possible into an output with
a lower bit depth by calculating the actual min and max values found in the
data. For example, maybe that quint16 input has no values lower than 16,384 and
none higher than 49,152. That means only half the range is actually needed, all
the float interpretations are between -0.5f and 0.5f, so if we want to compress
the data into a quint8 output, we can use that range rather than the theoretical
-1.0f to 1.0f that is suggested by the input min and max.
In practice, this is most useful for taking output from operations like
QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
may have large potential output ranges, but in practice have a distribution of
input values that only uses a small fraction of the possible range. By feeding
that output into this operator, we can reduce it from 32 bits down to 8 with
minimal loss of accuracy.
input_min: The float value that the minimum quantized input value represents.
input_max: The float value that the maximum quantized input value represents.
Tinput: The type of the input.
output_min: The float value that the minimum quantized output value represents.
output_max: The float value that the maximum quantized output value represents.
out_type: The type of the output. Should be a lower bit depth than Tinput.
)doc");
// Deprecated ops:
REGISTER_OP("BatchFFT")
.Input("input: complex64")

View File

@ -1994,4 +1994,324 @@ overlapping: When set to True, it means when pooling, the values at the boundary
output: 4-D. Gradients w.r.t. the input of `fractional_avg_pool`.
)doc");
REGISTER_OP("QuantizedAvgPool")
.Input("input: T")
.Input("min_input: float")
.Input("max_input: float")
.Output("output: T")
.Output("min_output: float")
.Output("max_output: float")
.Attr("T: quantizedtype")
.Attr("ksize: list(int)")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::AvgPoolShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Produces the average pool of the input tensor for quantized types.
input: 4-D with shape `[batch, height, width, channels]`.
ksize: The size of the window for each dimension of the input tensor.
The length must be 4 to match the number of dimensions of the input.
strides: The stride of the sliding window for each dimension of the input
tensor. The length must be 4 to match the number of dimensions of the input.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedBiasAdd")
.Input("input: T1")
.Input("bias: T2")
.Input("min_input: float")
.Input("max_input: float")
.Input("min_bias: float")
.Input("max_bias: float")
.Output("output: out_type")
.Output("min_out: float")
.Output("max_out: float")
.Attr("T1: quantizedtype")
.Attr("T2: quantizedtype")
.Attr("out_type: quantizedtype")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::BiasAddShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Adds Tensor 'bias' to Tensor 'input' for Quantized types.
Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
bias: A 1D bias Tensor with size matching the last dimension of 'input'.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_bias: The float value that the lowest quantized bias value represents.
max_bias: The float value that the highest quantized bias value represents.
min_out: The float value that the lowest quantized output value represents.
max_out: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedConv2D")
.Input("input: Tinput")
.Input("filter: Tfilter")
.Input("min_input: float")
.Input("max_input: float")
.Input("min_filter: float")
.Input("max_filter: float")
.Output("output: out_type")
.Output("min_output: float")
.Output("max_output: float")
.Attr("Tinput: quantizedtype")
.Attr("Tfilter: quantizedtype")
.Attr("out_type: quantizedtype = DT_QINT32")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::Conv2DShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes a 2D convolution given quantized 4D input and filter tensors.
The inputs are quantized tensors where the lowest value represents the real
number of the associated minimum, and the highest represents the maximum.
This means that you can only interpret the quantized output in the same way, by
taking the returned minimum and maximum values into account.
filter: filter's input_depth dimension must match input's depth dimensions.
strides: The stride of the sliding window for each dimension of the input
tensor.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_filter: The float value that the lowest quantized filter value represents.
max_filter: The float value that the highest quantized filter value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedMaxPool")
.Input("input: T")
.Input("min_input: float")
.Input("max_input: float")
.Output("output: T")
.Output("min_output: float")
.Output("max_output: float")
.Attr("T: quantizedtype")
.Attr("ksize: list(int)")
.Attr("strides: list(int)")
.Attr(GetPaddingAttrString())
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::MaxPoolShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Produces the max pool of the input tensor for quantized types.
input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
ksize: The size of the window for each dimension of the input tensor.
The length must be 4 to match the number of dimensions of the input.
strides: The stride of the sliding window for each dimension of the input
tensor. The length must be 4 to match the number of dimensions of the input.
padding: The type of padding algorithm to use.
min_input: The float value that the lowest quantized input value represents.
max_input: The float value that the highest quantized input value represents.
min_output: The float value that the lowest quantized output value represents.
max_output: The float value that the highest quantized output value represents.
)doc");
REGISTER_OP("QuantizedRelu")
.Input("features: Tinput")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear: `max(features, 0)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedRelu6")
.Input("features: Tinput")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedReluX")
.Input("features: Tinput")
.Input("max_value: float")
.Input("min_features: float")
.Input("max_features: float")
.Output("activations: out_type")
.Output("min_activations: float")
.Output("max_activations: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype = DT_QUINT8")
.SetShapeFn([](InferenceContext* c) {
TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
activations: Has the same output shape as "features".
min_features: The float value that the lowest quantized value represents.
max_features: The float value that the highest quantized value represents.
min_activations: The float value that the lowest quantized value represents.
max_activations: The float value that the highest quantized value represents.
)doc");
REGISTER_OP("QuantizedBatchNormWithGlobalNormalization")
.Input("t: Tinput")
.Input("t_min: float")
.Input("t_max: float")
.Input("m: Tinput")
.Input("m_min: float")
.Input("m_max: float")
.Input("v: Tinput")
.Input("v_min: float")
.Input("v_max: float")
.Input("beta: Tinput")
.Input("beta_min: float")
.Input("beta_max: float")
.Input("gamma: Tinput")
.Input("gamma_min: float")
.Input("gamma_max: float")
.Output("result: out_type")
.Output("result_min: float")
.Output("result_max: float")
.Attr("Tinput: quantizedtype")
.Attr("out_type: quantizedtype")
.Attr("variance_epsilon: float")
.Attr("scale_after_normalization: bool")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle input;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
DimensionHandle last_dim = c->Dim(input, 3);
for (int i = 1; i < 5; ++i) { // covers m, v, beta, gamma
ShapeHandle vec;
TF_RETURN_IF_ERROR(c->WithRank(c->input(i * 3), 1, &vec));
TF_RETURN_IF_ERROR(c->Merge(last_dim, c->Dim(vec, 0), &last_dim));
}
ShapeHandle out;
TF_RETURN_IF_ERROR(c->ReplaceDim(input, 3, last_dim, &out));
c->set_output(0, out);
c->set_output(1, c->Scalar());
c->set_output(2, c->Scalar());
return Status::OK();
})
.Doc(R"doc(
Quantized Batch normalization.
This op is deprecated and will be removed in the future. Prefer
`tf.nn.batch_normalization`.
t: A 4D input Tensor.
t_min: The value represented by the lowest quantized input.
t_max: The value represented by the highest quantized input.
m: A 1D mean Tensor with size matching the last dimension of t.
This is the first output from tf.nn.moments,
or a saved moving average thereof.
m_min: The value represented by the lowest quantized mean.
m_max: The value represented by the highest quantized mean.
v: A 1D variance Tensor with size matching the last dimension of t.
This is the second output from tf.nn.moments,
or a saved moving average thereof.
v_min: The value represented by the lowest quantized variance.
v_max: The value represented by the highest quantized variance.
beta: A 1D beta Tensor with size matching the last dimension of t.
An offset to be added to the normalized tensor.
beta_min: The value represented by the lowest quantized offset.
beta_max: The value represented by the highest quantized offset.
gamma: A 1D gamma Tensor with size matching the last dimension of t.
If "scale_after_normalization" is true, this tensor will be multiplied
with the normalized tensor.
gamma_min: The value represented by the lowest quantized gamma.
gamma_max: The value represented by the highest quantized gamma.
variance_epsilon: A small float number to avoid dividing by 0.
scale_after_normalization: A bool indicating whether the resulted tensor
needs to be multiplied with gamma.
)doc");
} // namespace tensorflow

View File

@ -2149,6 +2149,33 @@ py_binary(
],
)
# -----------------------------------------------------------------------------
# Quantization
py_test(
name = "dequantize_op_test",
size = "small",
srcs = ["ops/dequantize_op_test.py"],
srcs_version = "PY2AND3",
deps = [
":ops",
"//tensorflow:tensorflow_py",
"//tensorflow/python:framework_test_lib",
],
)
py_test(
name = "quantized_conv_ops_test",
size = "small",
srcs = ["ops/quantized_conv_ops_test.py"],
srcs_version = "PY2AND3",
deps = [
":ops",
"//tensorflow:tensorflow_py",
"//tensorflow/python:framework_test_lib",
],
)
filegroup(
name = "all_files",
srcs = glob(

View File

@ -74,6 +74,9 @@ or join multiple tensors together.
@@boolean_mask
@@one_hot
@@sequence_mask
@@dequantize
@@quantize_v2
@@quantized_concat
"""
from __future__ import absolute_import
@ -2318,3 +2321,9 @@ def squeeze(input, squeeze_dims=None, name=None):
if np.isscalar(squeeze_dims):
squeeze_dims = [squeeze_dims]
return gen_array_ops._squeeze(input, squeeze_dims, name)
# TODO(cwhipkey): Verify and enable shape functions for these.
ops.RegisterShape("QuantizeV2")(None)
ops.RegisterShape("QuantizedBatchNormWithGlobalNormalization")(None)
ops.RegisterShape("QuantizedConcat")(None)

View File

@ -21,24 +21,16 @@ from __future__ import print_function
import numpy as np
import tensorflow as tf
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
# with Bazel.
# pylint: disable=g-bad-import-order
from tensorflow.contrib.quantization import load_quantized_ops_so
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
class DequantizeOpTest(tf.test.TestCase):
def __init__(self, method_name="runTest"):
super(DequantizeOpTest, self).__init__(method_name)
load_quantized_ops_so.Load()
load_quantized_kernels_so.Load()
def _testDequantizeOp(self, inputs, min_range, max_range, dtype):
with self.test_session():
input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype)
dequantized = tf.contrib.quantization.dequantize(
dequantized = tf.dequantize(
input_op, min_range, max_range)
tf_ans = dequantized.eval()

View File

@ -1954,3 +1954,6 @@ def reduced_shape(input_shape, axes):
axes], # [1, 2]
[input_shape, # [2, 3, 5, 7]
array_ops.fill(axes_shape, 1)]) # [1, 1]
ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn)

View File

@ -277,6 +277,12 @@ classes when using one of the sampled loss functions above.
@@compute_accidental_hits
### Quantization ops
@@quantized_relu_x
@@quantized_max_pool
@@quantized_avg_pool
"""
from __future__ import absolute_import
from __future__ import division

View File

@ -1925,4 +1925,14 @@ def erosion2d(value, kernel, strides, rates, padding, name=None):
padding=padding,
name=name))
ops.RegisterShape("QuantizedAvgPool")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedBiasAdd")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedConv2D")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedMaxPool")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedRelu")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedRelu6")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizedReluX")(common_shapes.call_cpp_shape_fn)
ops.RegisterShape("QuantizeDownAndShrinkRange")(common_shapes.call_cpp_shape_fn)
# pylint: enable=invalid-name

View File

@ -21,19 +21,11 @@ from __future__ import print_function
import numpy as np
import tensorflow as tf
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
# with Bazel.
# pylint: disable=g-bad-import-order
from tensorflow.contrib.quantization import load_quantized_ops_so
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
class Conv2DTest(tf.test.TestCase):
def __init__(self, method_name="runTest"):
super(Conv2DTest, self).__init__(method_name)
load_quantized_ops_so.Load()
load_quantized_kernels_so.Load()
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
expected):
@ -67,16 +59,16 @@ class Conv2DTest(tf.test.TestCase):
with self.test_session(use_gpu=False) as sess:
t1 = tf.constant(x1, shape=tensor_in_sizes, dtype=tf.quint8)
t2 = tf.constant(x2, shape=filter_in_sizes, dtype=tf.quint8)
conv = tf.contrib.quantization.quantized_conv2d(t1,
t2,
out_type=tf.qint32,
strides=[1, stride,
stride, 1],
padding=padding,
min_input=x1_min,
max_input=x1_max,
min_filter=x2_min,
max_filter=x2_max)
conv = tf.nn.quantized_conv2d(t1,
t2,
out_type=tf.qint32,
strides=[1, stride,
stride, 1],
padding=padding,
min_input=x1_min,
max_input=x1_max,
min_filter=x2_min,
max_filter=x2_max)
value = sess.run(conv)
quantized_output = value[0]
output_min = value[1]

View File

@ -142,18 +142,21 @@ def if_not_mobile(a):
})
def tf_copts():
return (["-fno-exceptions", "-DEIGEN_AVOID_STL_ARRAY"] +
return (["-fno-exceptions",
"-DEIGEN_AVOID_STL_ARRAY",
"-Iexternal/gemmlowp",] +
if_cuda(["-DGOOGLE_CUDA=1"]) +
if_android_arm(["-mfpu=neon"]) +
select({"//tensorflow:android": [
"-std=c++11",
"-DMIN_LOG_LEVEL=0",
"-DTF_LEAN_BINARY",
"-O2",
],
"//tensorflow:darwin": [],
"//tensorflow:ios": ["-std=c++11",],
"//conditions:default": ["-pthread"]}))
select({
"//tensorflow:android": [
"-std=c++11",
"-DMIN_LOG_LEVEL=0",
"-DTF_LEAN_BINARY",
"-O2",
],
"//tensorflow:darwin": [],
"//tensorflow:ios": ["-std=c++11",],
"//conditions:default": ["-pthread"]}))
def tf_opts_nortti_if_android():
return if_android([

View File

@ -13,9 +13,6 @@ py_library(
srcs_version = "PY2AND3",
deps = [
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/quantization:ops",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
"//tensorflow/python:platform",
],
)
@ -26,9 +23,6 @@ py_binary(
srcs_version = "PY2AND3",
deps = [
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/quantization:ops",
"//tensorflow/contrib/quantization:quantized_ops_py",
"//tensorflow/contrib/quantization/kernels:quantized_kernels_py",
"//tensorflow/python:platform",
],
)

View File

@ -15,8 +15,8 @@
r"""Transforms a float-trained graph into an equivalent quantized version.
An example of command-line usage is:
bazel build tensorflow/contrib/quantization/tools:quantize_graph \
&& bazel-bin/tensorflow/contrib/quantization/tools/quantize_graph \
bazel build tensorflow/tools/quantization:quantize_graph \
&& bazel-bin/tensorflow/tools/quantization/quantize_graph \
--input=tensorflow_inception_graph.pb
--output_node_names="softmax2" --print_nodes --output=/tmp/quantized_graph.pb \
--mode=eightbit --logtostderr
@ -35,12 +35,6 @@ import tensorflow as tf
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import tensor_util
# TODO(petewarden) - Remove this ugly hack to get around Python linking problems
# with Bazel.
# pylint: disable=g-bad-import-order
from tensorflow.contrib.quantization import load_quantized_ops_so
from tensorflow.contrib.quantization.kernels import load_quantized_kernels_so
flags = tf.app.flags
FLAGS = flags.FLAGS
@ -60,8 +54,6 @@ flags.DEFINE_string("test_input_dims", "1,224,224,3",
""" graph loaded from a file.""")
flags.DEFINE_boolean("strip_redundant_quantization", True,
"""Removes redundant dequantize/quantize pairs.""")
flags.DEFINE_boolean("load_quantization_so", True,
"""Explicitly load the quantization ops library""")
def print_input_nodes(current_node, nodes_map, indent, already_visited):
@ -290,9 +282,6 @@ class GraphRewriter(object):
self.nodes_map = self.create_nodes_map(input_graph)
self.output_graph = None
self.mode = mode
if FLAGS.load_quantization_so:
load_quantized_ops_so.Load()
load_quantized_kernels_so.Load()
def create_nodes_map(self, graph):
"""Builds a mapping of node names to their defs from the graph."""

View File

@ -20,11 +20,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow.contrib.quantization.tools import quantize_graph
from tensorflow.python.framework import graph_util
from tensorflow.tools.quantization import quantize_graph
flags = tf.app.flags
FLAGS = flags.FLAGS