Add support for variable number of bits to FakeQuant* ops.
Change: 154377062
This commit is contained in:
parent
49132675ca
commit
4f525819b6
tensorflow
core
tools/api/golden
@ -48,6 +48,10 @@ namespace tensorflow {
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
namespace {
|
||||
bool IsNumBitsValid(int num_bits) { return num_bits >= 2 && num_bits <= 8; }
|
||||
} // namespace
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Implementation of FakeQuantWithMinMaxArgsOp, see its documentation in
|
||||
// core/ops/array_ops.cc.
|
||||
@ -60,19 +64,25 @@ class FakeQuantWithMinMaxArgsOp
|
||||
: Base::UnaryElementWiseOp(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("min", &min_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max", &max_));
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, min_ < max_,
|
||||
InvalidArgument("min has to be smaller than max, was: ", min_,
|
||||
" >= ", max_));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
}
|
||||
|
||||
void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
|
||||
FakeQuantWithMinMaxArgsFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), input.flat<float>(), min_, max_,
|
||||
output->flat<float>());
|
||||
steps_, output->flat<float>());
|
||||
}
|
||||
private:
|
||||
float min_;
|
||||
float max_;
|
||||
int steps_;
|
||||
};
|
||||
|
||||
// Implementation of FakeQuantWithMinMaxArgsGradientOp, see its documentation in
|
||||
@ -88,9 +98,14 @@ class FakeQuantWithMinMaxArgsGradientOp
|
||||
: Base::BinaryElementWiseOp(context) {
|
||||
OP_REQUIRES_OK(context, context->GetAttr("min", &min_));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("max", &max_));
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, min_ < max_,
|
||||
InvalidArgument("min has to be smaller than max, was: ", min_,
|
||||
" >= ", max_));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
}
|
||||
|
||||
template <int NDIMS>
|
||||
@ -105,11 +120,12 @@ class FakeQuantWithMinMaxArgsGradientOp
|
||||
InvalidArgument("gradient and input must be the same size"));
|
||||
FakeQuantWithMinMaxArgsGradientFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), gradient.flat<float>(),
|
||||
input.flat<float>(), min_, max_, output->flat<float>());
|
||||
input.flat<float>(), min_, max_, steps_, output->flat<float>());
|
||||
}
|
||||
private:
|
||||
float min_;
|
||||
float max_;
|
||||
int steps_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_CPU),
|
||||
@ -124,20 +140,16 @@ typedef Eigen::GpuDevice GPUDevice;
|
||||
// Forward declarations for functor specializations for GPU.
|
||||
template <>
|
||||
void FakeQuantWithMinMaxArgsFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
const float min, const float max,
|
||||
typename TTypes<float>::Flat outputs);
|
||||
const GPUDevice& d, typename TTypes<float>::ConstFlat inputs, float min,
|
||||
float max, int steps, typename TTypes<float>::Flat outputs);
|
||||
extern template struct FakeQuantWithMinMaxArgsFunctor<GPUDevice>;
|
||||
REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_GPU),
|
||||
FakeQuantWithMinMaxArgsOp<GPUDevice>);
|
||||
|
||||
template <>
|
||||
void FakeQuantWithMinMaxArgsGradientFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstFlat gradients,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
const float min, const float max,
|
||||
const GPUDevice& d, typename TTypes<float>::ConstFlat gradients,
|
||||
typename TTypes<float>::ConstFlat inputs, float min, float max, int steps,
|
||||
typename TTypes<float>::Flat backprops);
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_GPU),
|
||||
@ -152,6 +164,11 @@ class FakeQuantWithMinMaxVarsOp : public OpKernel {
|
||||
public:
|
||||
explicit FakeQuantWithMinMaxVarsOp(OpKernelConstruction* context)
|
||||
: OpKernel::OpKernel(context) {
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_persistent(DT_BOOL, {},
|
||||
@ -175,7 +192,7 @@ class FakeQuantWithMinMaxVarsOp : public OpKernel {
|
||||
|
||||
FakeQuantWithMinMaxVarsFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), input.flat<float>(),
|
||||
min.scalar<float>(), max.scalar<float>(),
|
||||
min.scalar<float>(), max.scalar<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
@ -183,6 +200,7 @@ class FakeQuantWithMinMaxVarsOp : public OpKernel {
|
||||
}
|
||||
|
||||
private:
|
||||
int steps_;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
PersistentTensor check_min_max_handle_;
|
||||
#endif
|
||||
@ -195,6 +213,11 @@ class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
|
||||
public:
|
||||
explicit FakeQuantWithMinMaxVarsGradientOp(OpKernelConstruction* context)
|
||||
: OpKernel::OpKernel(context) {
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_persistent(DT_BOOL, {},
|
||||
@ -231,6 +254,7 @@ class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
|
||||
FakeQuantWithMinMaxVarsGradientFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), gradient.flat<float>(),
|
||||
input.flat<float>(), min.scalar<float>(), max.scalar<float>(),
|
||||
steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
@ -239,6 +263,7 @@ class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
|
||||
}
|
||||
|
||||
private:
|
||||
int steps_;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
PersistentTensor check_min_max_handle_;
|
||||
#endif
|
||||
@ -253,10 +278,9 @@ REGISTER_KERNEL_BUILDER(
|
||||
#if GOOGLE_CUDA
|
||||
template <>
|
||||
void FakeQuantWithMinMaxVarsFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
const GPUDevice& d, typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstScalar min,
|
||||
typename TTypes<float>::ConstScalar max,
|
||||
typename TTypes<float>::ConstScalar max, int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -270,11 +294,10 @@ REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVars")
|
||||
|
||||
template <>
|
||||
void FakeQuantWithMinMaxVarsGradientFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstFlat gradients,
|
||||
const GPUDevice& d, typename TTypes<float>::ConstFlat gradients,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstScalar min,
|
||||
typename TTypes<float>::ConstScalar max,
|
||||
typename TTypes<float>::ConstScalar max, int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -297,6 +320,11 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
|
||||
public:
|
||||
explicit FakeQuantWithMinMaxVarsPerChannelOp(OpKernelConstruction* context)
|
||||
: OpKernel::OpKernel(context) {
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_persistent(DT_BOOL, {},
|
||||
@ -330,7 +358,7 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
|
||||
FakeQuant4WithMinMaxVarsPerChannelFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), input.dim_size(0),
|
||||
input.dim_size(1), input.dim_size(2), input.dim_size(3),
|
||||
input.flat<float>(), min.vec<float>(), max.vec<float>(),
|
||||
input.flat<float>(), min.vec<float>(), max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
@ -339,9 +367,9 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
|
||||
}
|
||||
case 2: {
|
||||
FakeQuant2WithMinMaxVarsPerChannelFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(),
|
||||
input.dim_size(0), input.dim_size(1),
|
||||
input.flat<float>(), min.vec<float>(), max.vec<float>(),
|
||||
functor(context->eigen_device<Device>(), input.dim_size(0),
|
||||
input.dim_size(1), input.flat<float>(), min.vec<float>(),
|
||||
max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
@ -350,8 +378,8 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
|
||||
}
|
||||
case 1: {
|
||||
FakeQuant1WithMinMaxVarsPerChannelFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(),
|
||||
input.vec<float>(), min.vec<float>(), max.vec<float>(),
|
||||
functor(context->eigen_device<Device>(), input.vec<float>(),
|
||||
min.vec<float>(), max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
@ -366,6 +394,7 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
|
||||
}
|
||||
|
||||
private:
|
||||
int steps_;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
PersistentTensor check_min_max_handle_;
|
||||
#endif
|
||||
@ -378,6 +407,11 @@ class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
|
||||
public:
|
||||
explicit FakeQuantWithMinMaxVarsPerChannelGradientOp(
|
||||
OpKernelConstruction* context) : OpKernel::OpKernel(context) {
|
||||
int num_bits;
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_bits", &num_bits));
|
||||
OP_REQUIRES(context, IsNumBitsValid(num_bits),
|
||||
InvalidArgument("num_bits must be between 2 and 8, inclusive"));
|
||||
steps_ = (1 << num_bits) - 1;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
OP_REQUIRES_OK(context,
|
||||
context->allocate_persistent(DT_BOOL, {},
|
||||
@ -423,38 +457,36 @@ class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
|
||||
FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(), input.dim_size(0),
|
||||
input.dim_size(1), input.dim_size(2), input.dim_size(3),
|
||||
gradient.flat<float>(), input.flat<float>(),
|
||||
min.vec<float>(), max.vec<float>(),
|
||||
gradient.flat<float>(), input.flat<float>(), min.vec<float>(),
|
||||
max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
grad_wrt_input->flat<float>(),
|
||||
grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
|
||||
grad_wrt_input->flat<float>(), grad_wrt_min->vec<float>(),
|
||||
grad_wrt_max->vec<float>());
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(),
|
||||
input.dim_size(0), input.dim_size(1),
|
||||
gradient.flat<float>(), input.flat<float>(),
|
||||
min.vec<float>(), max.vec<float>(),
|
||||
functor(context->eigen_device<Device>(), input.dim_size(0),
|
||||
input.dim_size(1), gradient.flat<float>(), input.flat<float>(),
|
||||
min.vec<float>(), max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
grad_wrt_input->flat<float>(),
|
||||
grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
|
||||
grad_wrt_input->flat<float>(), grad_wrt_min->vec<float>(),
|
||||
grad_wrt_max->vec<float>());
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<Device> functor;
|
||||
functor(context->eigen_device<Device>(),
|
||||
gradient.vec<float>(), input.vec<float>(),
|
||||
min.vec<float>(), max.vec<float>(),
|
||||
functor(context->eigen_device<Device>(), gradient.vec<float>(),
|
||||
input.vec<float>(), min.vec<float>(), max.vec<float>(), steps_,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
check_min_max->scalar<bool>(),
|
||||
#endif
|
||||
grad_wrt_input->vec<float>(),
|
||||
grad_wrt_min->vec<float>(), grad_wrt_max->vec<float>());
|
||||
grad_wrt_input->vec<float>(), grad_wrt_min->vec<float>(),
|
||||
grad_wrt_max->vec<float>());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -465,6 +497,7 @@ class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
|
||||
}
|
||||
|
||||
private:
|
||||
int steps_;
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
PersistentTensor check_min_max_handle_;
|
||||
#endif
|
||||
@ -480,10 +513,9 @@ REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannelGradient")
|
||||
#if GOOGLE_CUDA
|
||||
template <>
|
||||
void FakeQuant1WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstVec inputs,
|
||||
typename TTypes<float>::ConstVec min,
|
||||
typename TTypes<float>::ConstVec max,
|
||||
const GPUDevice& d, typename TTypes<float>::ConstVec inputs,
|
||||
typename TTypes<float>::ConstVec min, typename TTypes<float>::ConstVec max,
|
||||
int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -495,7 +527,7 @@ void FakeQuant2WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d, const Index batch_size, const Index depth,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstFlat min,
|
||||
typename TTypes<float>::ConstFlat max,
|
||||
typename TTypes<float>::ConstFlat max, int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -508,7 +540,7 @@ void FakeQuant4WithMinMaxVarsPerChannelFunctor<GPUDevice>::operator()(
|
||||
const Index width, const Index depth,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstFlat min,
|
||||
typename TTypes<float>::ConstFlat max,
|
||||
typename TTypes<float>::ConstFlat max, int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -523,11 +555,10 @@ REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannel")
|
||||
|
||||
template <>
|
||||
void FakeQuant1WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d,
|
||||
typename TTypes<float>::ConstVec gradients,
|
||||
const GPUDevice& d, typename TTypes<float>::ConstVec gradients,
|
||||
typename TTypes<float>::ConstVec inputs,
|
||||
typename TTypes<float>::ConstVec min,
|
||||
typename TTypes<float>::ConstVec max,
|
||||
typename TTypes<float>::ConstVec min, typename TTypes<float>::ConstVec max,
|
||||
int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -542,8 +573,8 @@ void FakeQuant2WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
|
||||
const GPUDevice& d, const Index batch_size, const Index depth,
|
||||
typename TTypes<float>::ConstFlat gradients,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstVec min,
|
||||
typename TTypes<float>::ConstVec max,
|
||||
typename TTypes<float>::ConstVec min, typename TTypes<float>::ConstVec max,
|
||||
int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
@ -559,8 +590,8 @@ void FakeQuant4WithMinMaxVarsPerChannelGradientFunctor<GPUDevice>::operator()(
|
||||
const Index width, const Index depth,
|
||||
typename TTypes<float>::ConstFlat gradients,
|
||||
typename TTypes<float>::ConstFlat inputs,
|
||||
typename TTypes<float>::ConstVec min,
|
||||
typename TTypes<float>::ConstVec max,
|
||||
typename TTypes<float>::ConstVec min, typename TTypes<float>::ConstVec max,
|
||||
int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
typename TTypes<bool>::Scalar check_min_max,
|
||||
#endif
|
||||
|
@ -35,31 +35,27 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) {
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static constexpr int kSteps = 255;
|
||||
static constexpr float kStepsFloat = static_cast<float>(kSteps);
|
||||
|
||||
// Gymnastics with nudged zero point is to ensure that real zero maps to
|
||||
// an integer, which is required for e.g. zero-padding in convolutional layers.
|
||||
// Returns (nudged_min, nudged_max, nudged_scale).
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(const float min,
|
||||
const float max,
|
||||
float* nudged_min,
|
||||
float* nudged_max,
|
||||
float* scale) {
|
||||
*scale = (max - min) / (kStepsFloat - 0.0f);
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(
|
||||
const float min, const float max, const int steps, float* nudged_min,
|
||||
float* nudged_max, float* scale) {
|
||||
const float steps_float = static_cast<float>(steps);
|
||||
*scale = (max - min) / (steps_float - 0.0f);
|
||||
const float zero_point_from_min = 0.0f - min / *scale;
|
||||
const uint8 nudged_zero_point = [zero_point_from_min] {
|
||||
const uint8 nudged_zero_point = [zero_point_from_min, steps, steps_float] {
|
||||
if (zero_point_from_min < 0.0f) {
|
||||
return static_cast<uint8>(0);
|
||||
} else if (zero_point_from_min > kStepsFloat) {
|
||||
return static_cast<uint8>(kSteps);
|
||||
} else {
|
||||
return static_cast<uint8>(StdRound(zero_point_from_min));
|
||||
}
|
||||
if (zero_point_from_min > steps_float) {
|
||||
return static_cast<uint8>(steps);
|
||||
}
|
||||
return static_cast<uint8>(StdRound(zero_point_from_min));
|
||||
}();
|
||||
|
||||
*nudged_min = (0.0f - nudged_zero_point) * (*scale);
|
||||
*nudged_max = (kStepsFloat - nudged_zero_point) * (*scale);
|
||||
*nudged_max = (steps_float - nudged_zero_point) * (*scale);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -80,13 +76,13 @@ using Flat = typename tensorflow::TTypes<T>::Flat;
|
||||
template <typename Device>
|
||||
struct FakeQuantWithMinMaxArgsFunctor {
|
||||
void operator()(const Device& d, ConstFlat<float> inputs, const float min,
|
||||
const float max, Flat<float> outputs) {
|
||||
const float max, const int steps, Flat<float> outputs) {
|
||||
eigen_assert(min <= 0.0f && "min should be <= 0.0");
|
||||
eigen_assert(max >= 0.0f && "max should be >= 0.0");
|
||||
eigen_assert(min < max && "min should be < max");
|
||||
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min, max, &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min, max, steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const float inv_nudged_scale = 1.0f / nudged_scale;
|
||||
|
||||
auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
|
||||
@ -103,13 +99,13 @@ template <typename Device>
|
||||
struct FakeQuantWithMinMaxArgsGradientFunctor {
|
||||
void operator()(const Device& d, ConstFlat<float> gradients,
|
||||
ConstFlat<float> inputs, const float min, const float max,
|
||||
Flat<float> backprops) {
|
||||
const int steps, Flat<float> backprops) {
|
||||
eigen_assert(min <= 0.0f && "min should be <= 0.0");
|
||||
eigen_assert(max >= 0.0f && "max should be >= 0.0");
|
||||
eigen_assert(min < max && "min should be < max");
|
||||
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min, max, &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min, max, steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
|
||||
auto between_nudged_min_max =
|
||||
(inputs >= nudged_min && inputs <= nudged_max)
|
||||
@ -124,6 +120,7 @@ template <typename Device>
|
||||
struct FakeQuantWithMinMaxVarsFunctor {
|
||||
void operator()(const Device& d, ConstFlat<float> inputs,
|
||||
ConstScalar<float> min, ConstScalar<float> max,
|
||||
const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -138,7 +135,7 @@ struct FakeQuantWithMinMaxVarsFunctor {
|
||||
#endif
|
||||
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(), max(), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(), max(), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const auto nudged_scale_repl = inputs.constant(nudged_scale);
|
||||
|
||||
const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
|
||||
@ -155,7 +152,7 @@ template <typename Device>
|
||||
struct FakeQuantWithMinMaxVarsGradientFunctor {
|
||||
void operator()(const Device& d, ConstFlat<float> gradients,
|
||||
ConstFlat<float> inputs, ConstScalar<float> min,
|
||||
ConstScalar<float> max,
|
||||
ConstScalar<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -172,7 +169,7 @@ struct FakeQuantWithMinMaxVarsGradientFunctor {
|
||||
#endif
|
||||
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(), max(), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(), max(), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
|
||||
const auto between_min_max =
|
||||
(inputs >= nudged_min && inputs <= nudged_max)
|
||||
@ -200,7 +197,7 @@ using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
|
||||
template <typename Device>
|
||||
struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
|
||||
void operator()(const Device& d, ConstVec<float> inputs, ConstVec<float> min,
|
||||
ConstVec<float> max,
|
||||
ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -216,7 +213,7 @@ struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
|
||||
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const float clamped =
|
||||
std::max(std::min(inputs(i), nudged_max), nudged_min);
|
||||
const float clamped_shifted = clamped - nudged_min;
|
||||
@ -233,7 +230,7 @@ template <typename Device>
|
||||
struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
|
||||
void operator()(const Device& d, const Index batch_size, const Index depth,
|
||||
ConstFlat<float> inputs, ConstVec<float> min,
|
||||
ConstVec<float> max,
|
||||
ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -251,7 +248,7 @@ struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
|
||||
const auto inputs_restored = inputs.reshape(restored);
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const auto clamped =
|
||||
inputs_restored.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
|
||||
const auto clamped_shifted = clamped - nudged_min;
|
||||
@ -269,7 +266,7 @@ template <typename Device>
|
||||
struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
|
||||
void operator()(const Device& d, const Index batch_size, const Index height,
|
||||
const Index width, const Index depth, ConstFlat<float> inputs,
|
||||
ConstVec<float> min, ConstVec<float> max,
|
||||
ConstVec<float> min, ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -287,7 +284,7 @@ struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
|
||||
const auto inputs_restored = inputs.reshape(restored);
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const auto clamped =
|
||||
inputs_restored.chip<3>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
|
||||
const auto clamped_shifted = clamped - nudged_min;
|
||||
@ -308,7 +305,7 @@ template <typename Device>
|
||||
struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor {
|
||||
void operator()(const Device& d, ConstVec<float> gradients,
|
||||
ConstVec<float> inputs, ConstVec<float> min,
|
||||
ConstVec<float> max,
|
||||
ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -325,7 +322,7 @@ struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor {
|
||||
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
|
||||
const bool between_min_max =
|
||||
inputs(i) >= nudged_min && inputs(i) <= nudged_max;
|
||||
@ -346,7 +343,7 @@ template <typename Device>
|
||||
struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
|
||||
void operator()(const Device& d, const Index batch_size, const Index depth,
|
||||
ConstFlat<float> gradients, ConstFlat<float> inputs,
|
||||
ConstVec<float> min, ConstVec<float> max,
|
||||
ConstVec<float> min, ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -366,7 +363,7 @@ struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
|
||||
const auto inputs_restored = inputs.reshape(restored);
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const auto gradients_chip = gradients_restored.chip<1>(i);
|
||||
const auto inputs_chip = inputs_restored.chip<1>(i);
|
||||
|
||||
@ -399,7 +396,7 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
|
||||
void operator()(const Device& d, const Index batch_size, const Index height,
|
||||
const Index width, const Index depth,
|
||||
ConstFlat<float> gradients, ConstFlat<float> inputs,
|
||||
ConstVec<float> min, ConstVec<float> max,
|
||||
ConstVec<float> min, ConstVec<float> max, const int steps,
|
||||
#ifndef FAKE_QUANT_NO_DEBUG
|
||||
Scalar<bool> check_min_max,
|
||||
#endif
|
||||
@ -419,7 +416,7 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
|
||||
const auto inputs_restored = inputs.reshape(restored);
|
||||
for (Index i = 0; i < min.size(); ++i) {
|
||||
float nudged_min, nudged_max, nudged_scale;
|
||||
Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
|
||||
Nudge(min(i), max(i), steps, &nudged_min, &nudged_max, &nudged_scale);
|
||||
const auto gradients_chip = gradients_restored.chip<3>(i);
|
||||
const auto inputs_chip = inputs_restored.chip<3>(i);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4839,6 +4839,7 @@ output: A new tensor with the given shape and updates applied according
|
||||
REGISTER_OP("FakeQuantWithMinMaxArgs")
|
||||
.Attr("min: float = -6.0")
|
||||
.Attr("max: float = 6.0")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("inputs: float")
|
||||
.Output("outputs: float")
|
||||
.SetShapeFn(shape_inference::UnchangedShape)
|
||||
@ -4848,6 +4849,7 @@ Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
|
||||
Attributes [min; max] define the clamping range for the 'inputs' data. Op
|
||||
divides this range into 255 steps (total of 256 values), then replaces each
|
||||
'inputs' value with the closest of the quantized step values.
|
||||
'num_bits' is the bitwidth of the quantization; between 2 and 8, inclusive.
|
||||
|
||||
Quantization is called fake since the output is still in floating point.
|
||||
)doc");
|
||||
@ -4855,6 +4857,7 @@ Quantization is called fake since the output is still in floating point.
|
||||
REGISTER_OP("FakeQuantWithMinMaxArgsGradient")
|
||||
.Attr("min: float = -6.0")
|
||||
.Attr("max: float = 6.0")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("gradients: float")
|
||||
.Input("inputs: float")
|
||||
.Output("backprops: float")
|
||||
@ -4869,6 +4872,7 @@ backprops: Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("FakeQuantWithMinMaxVars")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("inputs: float")
|
||||
.Input("min: float")
|
||||
.Input("max: float")
|
||||
@ -4887,11 +4891,13 @@ and `max` to 'outputs' tensor of same shape as `inputs`.
|
||||
[min; max] is the clamping range for the 'inputs' data. Op divides this range
|
||||
into 255 steps (total of 256 values), then replaces each 'inputs' value with the
|
||||
closest of the quantized step values.
|
||||
'num_bits' is the bitwidth of the quantization; between 2 and 8, inclusive.
|
||||
|
||||
This operation has a gradient and thus allows for training `min` and `max` values.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("FakeQuantWithMinMaxVarsGradient")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("gradients: float")
|
||||
.Input("inputs: float")
|
||||
.Input("min: float")
|
||||
@ -4920,6 +4926,7 @@ Compute gradients for a FakeQuantWithMinMaxVars operation.
|
||||
gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation.
|
||||
inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation.
|
||||
min, max: Quantization interval, scalar floats.
|
||||
num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
|
||||
backprops_wrt_input: Backpropagated gradients w.r.t. inputs:
|
||||
`gradients * (inputs >= min && inputs <= max)`.
|
||||
backprop_wrt_min: Backpropagated gradients w.r.t. min parameter:
|
||||
@ -4929,6 +4936,7 @@ backprop_wrt_max: Backpropagated gradients w.r.t. max parameter:
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("inputs: float")
|
||||
.Input("min: float")
|
||||
.Input("max: float")
|
||||
@ -4955,11 +4963,13 @@ to 'outputs' tensor of same shape as `inputs`.
|
||||
[min; max] is the clamping range for the 'inputs' data in the corresponding
|
||||
depth channel. Op divides this range into 255 steps (total of 256 values), then
|
||||
replaces each 'inputs' value with the closest of the quantized step values.
|
||||
'num_bits' is the bitwidth of the quantization; between 2 and 8, inclusive.
|
||||
|
||||
This operation has a gradient and thus allows for training `min` and `max` values.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient")
|
||||
.Attr("num_bits: int = 8")
|
||||
.Input("gradients: float")
|
||||
.Input("inputs: float")
|
||||
.Input("min: float")
|
||||
@ -4993,6 +5003,7 @@ gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
|
||||
inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
|
||||
same as `gradients`.
|
||||
min, max: Quantization interval, floats of shape `[d]`.
|
||||
num_bits: The bitwidth of the quantization; between 2 and 8, inclusive.
|
||||
backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as
|
||||
`inputs`:
|
||||
`gradients * (inputs >= min && inputs <= max)`.
|
||||
|
@ -890,27 +890,27 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_args"
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_args_gradient"
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_vars"
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_vars_gradient"
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_vars_per_channel"
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
argspec: "args=[\'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fake_quant_with_min_max_vars_per_channel_gradient"
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
argspec: "args=[\'gradients\', \'inputs\', \'min\', \'max\', \'num_bits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "fft"
|
||||
|
Loading…
Reference in New Issue
Block a user