support for {int8,int8} convolutions and fusions
This commit is contained in:
parent
c85e67d2d5
commit
e4e8f0c0ff
@ -152,7 +152,7 @@ static inline bool IsMklLayoutDependentOp(const string& op_name,
|
||||
|
||||
// Restrict quantized ops to QUINT8 and QINT8 for now
|
||||
if (kernel.find(kMklQuantizedOpLabelPattern) != string::npos) {
|
||||
return (Tinput == DT_QUINT8 && Tfilter == DT_QINT8);
|
||||
return (Tfilter == DT_QINT8);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -24,8 +24,8 @@ limitations under the License.
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "mkldnn.hpp"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "mkldnn.hpp"
|
||||
#include "tensorflow/core/framework/bounds_check.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
@ -570,17 +570,15 @@ class MklConvOp : public OpKernel {
|
||||
OP_REQUIRES(context, dilations_.size() == 5,
|
||||
errors::InvalidArgument("Dilation rates field must "
|
||||
"specify 5 dimensions"));
|
||||
OP_REQUIRES(context,
|
||||
(GetTensorDim(dilations_, data_format_, 'N') == 1 &&
|
||||
GetTensorDim(dilations_, data_format_, 'C') == 1),
|
||||
OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
|
||||
GetTensorDim(dilations_, data_format_, 'C') == 1),
|
||||
errors::InvalidArgument(
|
||||
"Current implementation does not yet support "
|
||||
"dilations rates in the batch and depth dimensions."));
|
||||
OP_REQUIRES(
|
||||
context,
|
||||
(GetTensorDim(dilations_, data_format_, '0') > 0 &&
|
||||
GetTensorDim(dilations_, data_format_, '1') > 0 &&
|
||||
GetTensorDim(dilations_, data_format_, '2') > 0),
|
||||
context, (GetTensorDim(dilations_, data_format_, '0') > 0 &&
|
||||
GetTensorDim(dilations_, data_format_, '1') > 0 &&
|
||||
GetTensorDim(dilations_, data_format_, '2') > 0),
|
||||
errors::InvalidArgument("Dilated rates should be larger than 0."));
|
||||
}
|
||||
}
|
||||
@ -590,7 +588,6 @@ class MklConvOp : public OpKernel {
|
||||
// Input tensors
|
||||
const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
|
||||
const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
|
||||
|
||||
MklDnnShape src_mkl_shape, filter_mkl_shape;
|
||||
GetMklShape(context, kInputIndex_Src, &src_mkl_shape, eager_mode);
|
||||
GetMklShape(context, kInputIndex_Filter, &filter_mkl_shape, eager_mode);
|
||||
@ -786,7 +783,6 @@ class MklConvOp : public OpKernel {
|
||||
src_data = static_cast<Tinput*>(
|
||||
const_cast<Tinput*>(src_tensor.flat<Tinput>().data()));
|
||||
}
|
||||
|
||||
Tfilter* filter_data = nullptr;
|
||||
if (IS_FILTER_REORDER_NEEDED(filter_md, conv_fwd_pd, conv_fwd)) {
|
||||
bool is_filter_cached = false;
|
||||
@ -859,7 +855,6 @@ class MklConvOp : public OpKernel {
|
||||
cpu_engine_);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete primitive since it is not cached.
|
||||
if (do_not_cache) delete conv_fwd;
|
||||
} catch (mkldnn::error& e) {
|
||||
@ -1417,10 +1412,10 @@ class MklFusedConvOp
|
||||
|
||||
// We create new class for each version of Quantized Convolution and inherit
|
||||
// from the FP32 version of the base class
|
||||
template <typename Device, typename Tbias, typename Toutput,
|
||||
template <typename Device, typename Tinput, typename Tbias, typename Toutput,
|
||||
typename Ttemp_output, bool bias_enabled, bool is_depthwise>
|
||||
class MklQuantizedConv2DOp
|
||||
: public MklConvOp<Device, quint8, qint8, Tbias, Toutput, Ttemp_output,
|
||||
: public MklConvOp<Device, Tinput, qint8, Tbias, Toutput, Ttemp_output,
|
||||
int32, bias_enabled, false, is_depthwise, false> {
|
||||
public:
|
||||
virtual ~MklQuantizedConv2DOp() {
|
||||
@ -1436,7 +1431,7 @@ class MklQuantizedConv2DOp
|
||||
}
|
||||
|
||||
explicit MklQuantizedConv2DOp(OpKernelConstruction* context)
|
||||
: MklConvOp<Device, quint8, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
: MklConvOp<Device, Tinput, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
bias_enabled, false, is_depthwise, false>(context) {
|
||||
bool is_filter_const;
|
||||
OP_REQUIRES_OK(context,
|
||||
@ -1453,7 +1448,7 @@ class MklQuantizedConv2DOp
|
||||
|
||||
void Compute(OpKernelContext* context) override {
|
||||
// Compute int32 output tensor
|
||||
MklConvOp<Device, quint8, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
MklConvOp<Device, Tinput, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
bias_enabled, false, is_depthwise, false>::Compute(context);
|
||||
|
||||
// Compute additional outputs: min/max scalars.
|
||||
@ -1488,7 +1483,7 @@ class MklQuantizedConv2DOp
|
||||
if (min_filter.dims() == 0) {
|
||||
float min_output_value;
|
||||
float max_output_value;
|
||||
MklQuantizationRangeForMultiplication<quint8, qint8, qint32>(
|
||||
MklQuantizationRangeForMultiplication<Tinput, qint8, qint32>(
|
||||
min_input, max_input, min_filter.flat<float>()(0),
|
||||
max_filter.flat<float>()(0), &min_output_value, &max_output_value);
|
||||
AllocateOutputSetMklShape(context, 1, &output_min, {},
|
||||
@ -1505,7 +1500,7 @@ class MklQuantizedConv2DOp
|
||||
AllocateOutputSetMklShape(context, 2, &output_max,
|
||||
{static_cast<ptrdiff_t>(depth)},
|
||||
output_max_mkl_shape);
|
||||
MklQuantizationRangeForMultiplication<quint8, qint8, qint32>(
|
||||
MklQuantizationRangeForMultiplication<Tinput, qint8, qint32>(
|
||||
min_input, max_input, min_filter, max_filter, &output_min,
|
||||
&output_max);
|
||||
}
|
||||
@ -1515,10 +1510,9 @@ class MklQuantizedConv2DOp
|
||||
protected:
|
||||
void ExtendConvFwdParams(OpKernelContext* context,
|
||||
MklConvFwdParams& params) override {
|
||||
MklConvOp<Device, quint8, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
MklConvOp<Device, Tinput, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
bias_enabled, false, is_depthwise,
|
||||
false>::ExtendConvFwdParams(context, params);
|
||||
|
||||
// When the output type is quint8, the output data id requantized
|
||||
// into quint8. A post_op "output_scale" is added to do the conversion.
|
||||
if (std::is_same<Toutput, quint8>::value ||
|
||||
@ -1540,22 +1534,26 @@ class MklQuantizedConv2DOp
|
||||
const float max_freezed_output =
|
||||
context->input(7 + bias_index_offset).flat<float>()(0);
|
||||
|
||||
float factor = std::is_same<Toutput, quint8>::value ? 255.0f : 127.0f;
|
||||
float int_output_limit =
|
||||
std::is_same<Toutput, quint8>::value ? 255.0f : 127.0f;
|
||||
size_t depth = min_filter_vector.NumElements();
|
||||
const float* min_filter = min_filter_vector.flat<float>().data();
|
||||
const float* max_filter = max_filter_vector.flat<float>().data();
|
||||
std::vector<float> scales(depth);
|
||||
float input_range = std::max(std::abs(min_input), std::abs(max_input));
|
||||
float output_range =
|
||||
float float_input_range =
|
||||
std::max(std::abs(min_input), std::abs(max_input));
|
||||
float float_output_range =
|
||||
std::max(std::abs(min_freezed_output), std::abs(max_freezed_output));
|
||||
const float int_const_scale_limit =
|
||||
(std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
|
||||
for (size_t i = 0; i < depth; ++i) {
|
||||
// For simplicity and symmetry, we set filter range to be outer
|
||||
// bounds of min_filter and max_filter.
|
||||
float filter_range =
|
||||
float float_filter_range =
|
||||
std::max(std::abs(min_filter[i]), std::abs(max_filter[i]));
|
||||
// To understand the scaling, please see mkl_requantize_ops_test.
|
||||
scales[i] = factor * input_range * filter_range /
|
||||
(255.0f * 127.0f * output_range);
|
||||
scales[i] = int_output_limit * float_input_range * float_filter_range /
|
||||
(int_const_scale_limit * float_output_range);
|
||||
}
|
||||
params.post_op_params.push_back(
|
||||
{"output_scale", ALGORITHM_UNDEF, scales});
|
||||
@ -1584,6 +1582,8 @@ class MklQuantizedConv2DOp
|
||||
const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
|
||||
}
|
||||
|
||||
const float int_const_scale_limit =
|
||||
(std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
|
||||
// Re-scale bias if either of following 2 conditions are met:
|
||||
// 1. Bias is not const;
|
||||
// 2. Bias is const, but bias cache is empty (first iteration).
|
||||
@ -1597,7 +1597,7 @@ class MklQuantizedConv2DOp
|
||||
std::vector<float> scales(depth);
|
||||
for (size_t i = 0; i < depth; ++i) {
|
||||
scales[i] =
|
||||
255.0 * 127.0 /
|
||||
int_const_scale_limit /
|
||||
(std::max(std::abs(max_input), std::abs(min_input)) *
|
||||
std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
|
||||
}
|
||||
@ -1700,32 +1700,33 @@ class MklQuantizedConv2DOp
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device, typename Tbias, typename Toutput,
|
||||
template <typename Device, typename Tinput, typename Tbias, typename Toutput,
|
||||
typename Ttemp_output, bool bias_enabled, bool is_depthwise>
|
||||
class MklQuantizedConv2DReluOp
|
||||
: public MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output,
|
||||
: public MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled, is_depthwise> {
|
||||
public:
|
||||
virtual ~MklQuantizedConv2DReluOp() {}
|
||||
|
||||
explicit MklQuantizedConv2DReluOp(OpKernelConstruction* context)
|
||||
: MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output, bias_enabled,
|
||||
is_depthwise>(context) {}
|
||||
: MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled, is_depthwise>(context) {}
|
||||
|
||||
protected:
|
||||
void ExtendConvFwdParams(OpKernelContext* context,
|
||||
MklConvFwdParams& params) override {
|
||||
MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output, bias_enabled,
|
||||
MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled,
|
||||
is_depthwise>::ExtendConvFwdParams(context, params);
|
||||
params.post_op_params.push_back(
|
||||
{"activation", ALGORITHM::eltwise_relu, {1.0, 0.0, 0.0}});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Device, typename Tbias, typename Toutput,
|
||||
template <typename Device, typename Tinput, typename Tbias, typename Toutput,
|
||||
typename Ttemp_output, bool bias_enabled, bool is_depthwise>
|
||||
class MklQuantizedConv2DSumReluOp
|
||||
: public MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output,
|
||||
: public MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled, is_depthwise> {
|
||||
public:
|
||||
virtual ~MklQuantizedConv2DSumReluOp() {
|
||||
@ -1741,13 +1742,14 @@ class MklQuantizedConv2DSumReluOp
|
||||
}
|
||||
|
||||
explicit MklQuantizedConv2DSumReluOp(OpKernelConstruction* context)
|
||||
: MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output, bias_enabled,
|
||||
is_depthwise>(context) {}
|
||||
: MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled, is_depthwise>(context) {}
|
||||
|
||||
protected:
|
||||
void ExtendConvFwdParams(OpKernelContext* context,
|
||||
MklConvFwdParams& params) override {
|
||||
MklQuantizedConv2DOp<Device, Tbias, Toutput, Ttemp_output, bias_enabled,
|
||||
MklQuantizedConv2DOp<Device, Tinput, Tbias, Toutput, Ttemp_output,
|
||||
bias_enabled,
|
||||
is_depthwise>::ExtendConvFwdParams(context, params);
|
||||
// Calculate the scale (beta in mkldnn api term) for sum
|
||||
if (std::is_same<Toutput, quint8>::value) {
|
||||
@ -1821,7 +1823,7 @@ class MklQuantizedConv2DSumReluOp
|
||||
*output_tensor = const_cast<Tensor*>(&summand);
|
||||
return;
|
||||
}
|
||||
MklConvOp<Device, quint8, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
MklConvOp<Device, Tinput, qint8, Tbias, Toutput, Ttemp_output, int32,
|
||||
bias_enabled, false, false,
|
||||
false>::AllocateOutputTensor(context, conv_prim_desc,
|
||||
output_dims_mkl_order,
|
||||
@ -1844,6 +1846,8 @@ class MklQuantizedConv2DSumReluOp
|
||||
const float* min_filter = min_filter_vector.flat<float>().data();
|
||||
const float* max_filter = max_filter_vector.flat<float>().data();
|
||||
|
||||
const float int_const_scale_limit =
|
||||
(std::is_same<Tinput, quint8>::value) ? 255.0 * 127.0 : 127.0 * 127.0;
|
||||
size_t depth = min_filter_vector.NumElements();
|
||||
std::vector<float> scales(depth);
|
||||
for (size_t i = 0; i < depth; ++i) {
|
||||
@ -1851,7 +1855,7 @@ class MklQuantizedConv2DSumReluOp
|
||||
// done regularly. A Cleaner design to address all mapping in one
|
||||
// function needs to be implemented in future which also supports other
|
||||
// quantized type mapping in future.
|
||||
scales[i] = 255.0 * 127.0 /
|
||||
scales[i] = int_const_scale_limit /
|
||||
(std::max(std::abs(max_input), std::abs(min_input)) *
|
||||
std::max(std::abs(max_filter[i]), std::abs(min_filter[i])));
|
||||
}
|
||||
@ -1911,33 +1915,41 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DPerChannel")
|
||||
.TypeConstraint<qint32>("out_type"),
|
||||
NoOp);
|
||||
// Register a templatized implementation of MklQuantizedConv2DPerChannel.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DPerChannel")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint32, qint32, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DPerChannel")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint32,
|
||||
qint32, false, false>);
|
||||
|
||||
// Register a templatized implementation of MklQuantizedConv2D.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2D")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint32, qint32, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2D")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint32,
|
||||
qint32, false, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint32, qint8, qint8, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2D")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint8, float, qint32,
|
||||
qint32, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, qint32, qint8,
|
||||
qint8, false, false>);
|
||||
|
||||
// Register NoOp kernel for QuantizedConv2DWithBias to get a python interface.
|
||||
// This kernel will be replaced by an MKL kernel during graph
|
||||
@ -1956,15 +1968,28 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndRequantize")
|
||||
.TypeConstraint<qint8>("out_type"),
|
||||
NoOp);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBias")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type"),
|
||||
NoOp);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint8>("out_type"),
|
||||
NoOp);
|
||||
// Register a templatized implementation MklQuantizedConv2DWithBias.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBias")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint32, qint32, true, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBias")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint32,
|
||||
qint32, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndRequantize")
|
||||
@ -1974,7 +1999,7 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint32, qint8, qint8, true, false>);
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, qint32, qint8, qint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndRequantize")
|
||||
@ -1984,7 +2009,36 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint8, qint8, true, false>);
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint8, qint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBias")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint8, float, qint32, qint32, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint8, qint32, qint8, qint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<qint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, qint8, float, qint8, qint8, true, false>);
|
||||
|
||||
// Register NoOp kernel for QuantizedConv2DAndRelu to get a python interface.
|
||||
// This kernel will be replaced by an MKL kernel during graph-optimization pass.
|
||||
@ -2003,23 +2057,23 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DAndReluAndRequantize")
|
||||
NoOp);
|
||||
|
||||
// Register a templatized implementation of MklQuantizedConv2DAndRelu.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, float, qint32, qint32, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, float,
|
||||
qint32, qint32, false, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint32, quint8, quint8, false, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, qint32,
|
||||
quint8, quint8, false, false>);
|
||||
|
||||
// Register NoOp kernel for QuantizedConv2DWithBiasAndRelu to get a python
|
||||
// interface.
|
||||
@ -2031,6 +2085,13 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndRelu")
|
||||
.TypeConstraint<qint32>("out_type"),
|
||||
NoOp);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type"),
|
||||
NoOp);
|
||||
|
||||
// Register NoOp kernel for QuantizedConv2DWithBiasAndReluAndRequantize
|
||||
// to get a python interface.
|
||||
// This kernel will be replaced by an MKL kernel during graph-optimization pass.
|
||||
@ -2041,37 +2102,71 @@ REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.TypeConstraint<quint8>("out_type"),
|
||||
NoOp);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("QuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<quint8>("out_type"),
|
||||
NoOp);
|
||||
// Register a templatized implementation of MklQuantizedConv2DWithBiasAndRelu.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, float, qint32, qint32, true, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, float,
|
||||
qint32, qint32, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint8, float,
|
||||
qint32, qint32, true, false>);
|
||||
// Register a templatized implementation of
|
||||
// MklQuantizedConv2DWithBiasAndReluAndRequantize.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, float, quint8, quint8, true, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, float,
|
||||
quint8, quint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint32, quint8, quint8, true, false>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, qint32,
|
||||
quint8, quint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint8, float,
|
||||
quint8, quint8, true, false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConv2DWithBiasAndReluAndRequantize")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<qint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint8, qint32,
|
||||
quint8, quint8, true, false>);
|
||||
|
||||
// Register NoOp kernel for QuantizedConv2DWithBiasSumAndRelu to get a python
|
||||
// interface.
|
||||
@ -2107,7 +2202,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, float, qint32, qint32, true, false>);
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, quint8, float, qint32, qint32, true,
|
||||
false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasSumAndReluAndRequantize")
|
||||
@ -2117,7 +2213,7 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, qint32, quint8, quint8, true,
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, quint8, qint32, quint8, quint8, true,
|
||||
false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
@ -2128,7 +2224,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, qint32, quint8, qint8, true, false>);
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, quint8, qint32, quint8, qint8, true,
|
||||
false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasSumAndReluAndRequantize")
|
||||
@ -2138,7 +2235,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, float, quint8, quint8, true, false>);
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, quint8, float, quint8, quint8, true,
|
||||
false>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedConv2DWithBiasSignedSumAndReluAndRequantize")
|
||||
@ -2148,7 +2246,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, float, quint8, qint8, true, false>);
|
||||
MklQuantizedConv2DSumReluOp<CPUDevice, quint8, float, quint8, qint8, true,
|
||||
false>);
|
||||
|
||||
// Register NoOp kernels for non-fused and fused versions of
|
||||
// QuantizedDepthwiseConv2D to get a Python interface. These kernels will be
|
||||
@ -2184,14 +2283,14 @@ REGISTER_KERNEL_BUILDER(
|
||||
|
||||
// Register templatized MKL kernels for non-fused and fused-versions of
|
||||
// QuantizedDepthwiseConv2D.
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedDepthwiseConv2D")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint32, qint32, false, true>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedDepthwiseConv2D")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint32,
|
||||
qint32, false, true>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedDepthwiseConv2DWithBias")
|
||||
@ -2200,16 +2299,16 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DOp<CPUDevice, float, qint32, qint32, true, true>);
|
||||
MklQuantizedConv2DOp<CPUDevice, quint8, float, qint32, qint32, true, true>);
|
||||
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("_MklQuantizedDepthwiseConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, float, qint32, qint32, true, true>);
|
||||
REGISTER_KERNEL_BUILDER(Name("_MklQuantizedDepthwiseConv2DWithBiasAndRelu")
|
||||
.Device(DEVICE_CPU)
|
||||
.TypeConstraint<quint8>("Tinput")
|
||||
.TypeConstraint<qint8>("Tfilter")
|
||||
.TypeConstraint<qint32>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, float,
|
||||
qint32, qint32, true, true>);
|
||||
|
||||
// Tbias -> float
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
@ -2220,7 +2319,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<float>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, float, quint8, quint8, true, true>);
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, float, quint8, quint8, true,
|
||||
true>);
|
||||
|
||||
// Tbias -> qint32
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
@ -2231,7 +2331,8 @@ REGISTER_KERNEL_BUILDER(
|
||||
.TypeConstraint<qint32>("Tbias")
|
||||
.TypeConstraint<quint8>("out_type")
|
||||
.Label(mkl_op_registry::kMklQuantizedOpLabel),
|
||||
MklQuantizedConv2DReluOp<CPUDevice, qint32, quint8, quint8, true, true>);
|
||||
MklQuantizedConv2DReluOp<CPUDevice, quint8, qint32, quint8, quint8, true,
|
||||
true>);
|
||||
|
||||
// Register 2D operations
|
||||
#define REGISTER_MKL_CPU_2D(T) \
|
||||
|
@ -285,6 +285,111 @@ TEST_F(QuantizedConv2DTest, Small) {
|
||||
test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
|
||||
}
|
||||
|
||||
TEST_F(QuantizedConv2DTest, SmallS8) {
|
||||
const int stride = 1;
|
||||
|
||||
const int depth = 1;
|
||||
const int image_width = 3;
|
||||
const int image_height = 3;
|
||||
const int image_batch_count = 1;
|
||||
|
||||
// Image -> uint8
|
||||
const float image_min = -127.0f;
|
||||
const float image_max = 127.0f;
|
||||
|
||||
TF_ASSERT_OK(NodeDefBuilder("quantized_conv_op", "_MklQuantizedConv2D")
|
||||
.Input(FakeInput(DT_QINT8)) // Input
|
||||
.Input(FakeInput(DT_QINT8)) // Filter
|
||||
.Input(FakeInput(DT_FLOAT)) // Min input
|
||||
.Input(FakeInput(DT_FLOAT)) // Max input
|
||||
.Input(FakeInput(DT_FLOAT)) // Min filter
|
||||
.Input(FakeInput(DT_FLOAT)) // Max filter
|
||||
// MKL metadata tensors //
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
.Input(FakeInput(DT_UINT8))
|
||||
///////////////////////////
|
||||
.Attr("Tinput", DataTypeToEnum<qint8>::v())
|
||||
.Attr("Tfilter", DataTypeToEnum<qint8>::v())
|
||||
.Attr("T", DataTypeToEnum<quint8>::v())
|
||||
.Attr("padding", "VALID")
|
||||
.Attr("out_type", DataTypeToEnum<qint32>::v())
|
||||
.Attr("strides", {1, stride, stride, 1})
|
||||
.Attr("_kernel", "QuantizedMklOp")
|
||||
.Finalize(node_def()));
|
||||
TF_ASSERT_OK(InitOp());
|
||||
// The image matrix is:
|
||||
// | 2 | 3 | 4 |
|
||||
// | 6 | -4 | -2 |
|
||||
// | 3 | 0 | 4 |
|
||||
Tensor image_float(DT_FLOAT,
|
||||
{image_batch_count, image_height, image_width, depth});
|
||||
test::FillValues<float>(&image_float, {2, 3, 4, 6, -4, -2, 3, 0, 4});
|
||||
Tensor image_quantized =
|
||||
FloatTensorToQuantized<qint8>(image_float, image_min, image_max);
|
||||
|
||||
const int filter_size = 3;
|
||||
const int filter_count = 1;
|
||||
|
||||
// Filter -> int8 with symmetric range
|
||||
const float filter_min = -127.0f;
|
||||
const float filter_max = 127.0f;
|
||||
|
||||
// The filter matrix is:
|
||||
// | 1 | 4 | 2 |
|
||||
// | 0 | 5 |-1 |
|
||||
// | 3 |-1 |-3 |
|
||||
|
||||
Tensor filter_float(DT_FLOAT,
|
||||
{filter_size, filter_size, depth, filter_count});
|
||||
test::FillValues<float>(&filter_float, {1, 4, 2, 0, 5, -1, 3, -1, -3});
|
||||
Tensor filter_quantized =
|
||||
FloatTensorToQuantized<qint8>(filter_float, filter_min, filter_max);
|
||||
|
||||
AddInputFromArray<qint8>(image_quantized.shape(),
|
||||
image_quantized.flat<qint8>());
|
||||
AddInputFromArray<qint8>(filter_quantized.shape(),
|
||||
filter_quantized.flat<qint8>());
|
||||
AddInputFromArray<float>(TensorShape({1}), {image_min});
|
||||
AddInputFromArray<float>(TensorShape({1}), {image_max});
|
||||
AddInputFromArray<float>(TensorShape({1}), {filter_min});
|
||||
AddInputFromArray<float>(TensorShape({1}), {filter_max});
|
||||
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
|
||||
|
||||
TF_ASSERT_OK(RunOpKernel());
|
||||
|
||||
// Output -> float
|
||||
const int expected_width = 1;
|
||||
const int expected_height = 1;
|
||||
Tensor expected_float(
|
||||
DT_FLOAT, TensorShape({image_batch_count, expected_height, expected_width,
|
||||
filter_count}));
|
||||
test::FillValues<float>(&expected_float, {1});
|
||||
|
||||
const Tensor& output = *GetOutput(0);
|
||||
const Tensor& output_mkl_metadata = *GetOutput(3);
|
||||
|
||||
ConvMklToTF conv_comp;
|
||||
Tensor output_quantized;
|
||||
conv_comp.ConvertMklToTF<qint32>(DT_QINT32, output, output_mkl_metadata,
|
||||
output_quantized);
|
||||
|
||||
const float output_min = GetOutput(1)->flat<float>()(0);
|
||||
const float output_max = GetOutput(2)->flat<float>()(0);
|
||||
Tensor output_float =
|
||||
QuantizedTensorToFloat<qint32>(output_quantized, output_min, output_max);
|
||||
|
||||
test::ExpectTensorNear<float>(expected_float, output_float, 1.0);
|
||||
}
|
||||
// Output -> qint32
|
||||
TEST_F(QuantizedConv2DTest, Small32Bit) {
|
||||
const int stride = 1;
|
||||
|
Loading…
Reference in New Issue
Block a user