Merge branch 'master' into ENH/better_leaky_relu
This commit is contained in:
commit
9d517206ed
@ -890,21 +890,26 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
|
||||
VLOG(1) << "Running component function on device " << target
|
||||
<< " with handle " << handle;
|
||||
VLOG(4) << " with " << opts_copy.DebugString();
|
||||
flr->Run(
|
||||
opts_copy, handle, comp_args, comp_rets,
|
||||
[comp_rets, rets, comp_data, refcounted_done](const Status& status) {
|
||||
if (!status.ok()) {
|
||||
VLOG(2) << "Component function execution failed: " << status;
|
||||
refcounted_done->UpdateStatus(status);
|
||||
} else {
|
||||
for (int i = 0; i < comp_rets->size(); ++i) {
|
||||
(*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
|
||||
}
|
||||
}
|
||||
delete comp_rets;
|
||||
// refcounted_done is thread-safe
|
||||
refcounted_done->Unref();
|
||||
});
|
||||
|
||||
flr->Run(opts_copy, handle, comp_args, comp_rets,
|
||||
[comp_rets, rets, comp_data, refcounted_done,
|
||||
data](const Status& status) {
|
||||
if (!status.ok()) {
|
||||
VLOG(2) << "Component function execution failed: " << status;
|
||||
const string function_and_msg = strings::StrCat(
|
||||
errors::FormatFunctionForError(data->function_name_),
|
||||
" ", status.error_message());
|
||||
refcounted_done->UpdateStatus(
|
||||
Status(status.code(), function_and_msg));
|
||||
} else {
|
||||
for (int i = 0; i < comp_rets->size(); ++i) {
|
||||
(*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
|
||||
}
|
||||
}
|
||||
delete comp_rets;
|
||||
// refcounted_done is thread-safe
|
||||
refcounted_done->Unref();
|
||||
});
|
||||
} else {
|
||||
opts_copy.remote_execution = true;
|
||||
|
||||
|
@ -1264,7 +1264,9 @@ tf_kernel_library(
|
||||
tf_kernel_library(
|
||||
name = "unique_op",
|
||||
prefix = "unique_op",
|
||||
deps = ARRAY_DEPS,
|
||||
deps = ARRAY_DEPS + [
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
],
|
||||
)
|
||||
|
||||
tf_kernel_library(
|
||||
|
@ -37,6 +37,21 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
namespace {
|
||||
template <typename T>
|
||||
struct RawType {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct RawType<qint8> {
|
||||
// spacetodepth_op_gpu.cu.cc does not instantiate SpaceToDepthOpFunctor for
|
||||
// int8, so we map qint8 to uint8. Instantiating int8 could slow down
|
||||
// compilation and the code generated is almost the same as for uint8.
|
||||
using type = uint8;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
@ -66,17 +81,17 @@ class SpaceToDepthOp : public OpKernel {
|
||||
const Tensor& input = context->input(0);
|
||||
const int dims = input.dims();
|
||||
|
||||
// Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here.
|
||||
constexpr bool is_int8x4 = std::is_same<T, qint8>::value;
|
||||
OP_REQUIRES(context, (is_int8x4 == (data_format_ == FORMAT_NCHW_VECT_C)),
|
||||
errors::InvalidArgument(
|
||||
"qint8 should be used with data_format NCHW_VECT_C."));
|
||||
|
||||
constexpr int kVect = is_int8x4 ? 4 : 1;
|
||||
constexpr int kDims = is_int8x4 ? 5 : 4;
|
||||
OP_REQUIRES(context, kDims == dims,
|
||||
errors::InvalidArgument("Input rank should be: ", kDims,
|
||||
" instead of: ", dims));
|
||||
const bool is_int8x4 = (data_format_ == FORMAT_NCHW_VECT_C);
|
||||
const int vect = is_int8x4 ? 4 : 1;
|
||||
if (is_int8x4) {
|
||||
OP_REQUIRES(
|
||||
context, dims == 5,
|
||||
errors::InvalidArgument("Input rank should be 5 instead of ", dims));
|
||||
} else {
|
||||
OP_REQUIRES(
|
||||
context, dims == 4,
|
||||
errors::InvalidArgument("Input rank should be 4 instead of ", dims));
|
||||
}
|
||||
|
||||
constexpr int kNumSpatialDims = 2;
|
||||
const int batch_size =
|
||||
@ -87,7 +102,7 @@ class SpaceToDepthOp : public OpKernel {
|
||||
input.dim_size(GetTensorDimIndex<kNumSpatialDims>(data_format_, 'W'));
|
||||
const int input_depth =
|
||||
input.dim_size(GetTensorDimIndex<kNumSpatialDims>(data_format_, 'C')) *
|
||||
kVect;
|
||||
vect;
|
||||
|
||||
// Both width and height must be divisible by block_size.
|
||||
OP_REQUIRES(context,
|
||||
@ -111,32 +126,32 @@ class SpaceToDepthOp : public OpKernel {
|
||||
output_width, output_depth),
|
||||
&outputs_tensor));
|
||||
|
||||
auto Tinput = input.tensor<T, kDims>();
|
||||
auto Toutput = outputs_tensor->tensor<T, kDims>();
|
||||
|
||||
if (std::is_same<Device, GPUDevice>::value) {
|
||||
if (is_int8x4) {
|
||||
using RT = typename RawType<T>::type;
|
||||
if (data_format_ == FORMAT_NCHW_VECT_C) {
|
||||
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
|
||||
auto Tinput_v = input.template reinterpret_last_dimension<int32, 4>();
|
||||
auto Toutput_v = outputs_tensor->reinterpret_last_dimension<int32, 4>();
|
||||
functor::SpaceToDepthOpFunctor<GPUDevice, int32, FORMAT_NCHW> functor;
|
||||
functor(context->eigen_device<GPUDevice>(), Tinput_v, block_size_,
|
||||
Toutput_v);
|
||||
return;
|
||||
} else if (data_format_ == FORMAT_NCHW) {
|
||||
functor::SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> functor;
|
||||
functor(context->eigen_device<GPUDevice>(), Tinput, block_size_,
|
||||
Toutput);
|
||||
return;
|
||||
CHECK((std::is_same<T, RT>::value));
|
||||
functor::SpaceToDepthOpFunctor<GPUDevice, RT, FORMAT_NCHW> functor;
|
||||
functor(context->eigen_device<GPUDevice>(), input.tensor<RT, 4>(),
|
||||
block_size_, outputs_tensor->tensor<RT, 4>());
|
||||
} else {
|
||||
CHECK((std::is_same<T, RT>::value));
|
||||
functor::SpaceToDepthOpFunctor<GPUDevice, RT, FORMAT_NHWC> functor;
|
||||
functor(context->eigen_device<GPUDevice>(), input.tensor<RT, 4>(),
|
||||
block_size_, outputs_tensor->tensor<RT, 4>());
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: Assumes data_format_ == FORMAT_NHWC here, since we have rejected
|
||||
// (CPU && data_format_ != FORMAT_NHWC) in the constructor.
|
||||
|
||||
if (!is_int8x4) {
|
||||
} else {
|
||||
// NOTE: Assumes data_format_ == FORMAT_NHWC here, since we have rejected
|
||||
// (CPU && data_format_ != FORMAT_NHWC) in the constructor.
|
||||
functor::SpaceToDepthOpFunctor<Device, T, FORMAT_NHWC> functor;
|
||||
functor(context->eigen_device<Device>(), Tinput, block_size_, Toutput);
|
||||
functor(context->eigen_device<Device>(), input.tensor<T, 4>(),
|
||||
block_size_, outputs_tensor->tensor<T, 4>());
|
||||
}
|
||||
};
|
||||
|
||||
@ -181,6 +196,7 @@ struct SpaceToDepthOpFunctor<CPUDevice, T, FORMAT_NHWC> {
|
||||
SpaceToDepthOp<CPUDevice, type>);
|
||||
|
||||
TF_CALL_ALL_TYPES(REGISTER);
|
||||
TF_CALL_qint8(REGISTER);
|
||||
#undef REGISTER
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
@ -14,9 +14,9 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "tensorflow/core/framework/bounds_check.h"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
@ -106,7 +106,7 @@ class UniqueOp : public OpKernel {
|
||||
auto Tin = input.flat<T>();
|
||||
const int64 N = static_cast<int64>(Tin.size());
|
||||
|
||||
std::unordered_map<T, TIndex> uniq;
|
||||
absl::flat_hash_map<T, TIndex> uniq;
|
||||
uniq.reserve(2 * N);
|
||||
for (Eigen::Index i = 0, j = 0; i < N; ++i) {
|
||||
auto it = uniq.insert(std::make_pair(Tin(i), j));
|
||||
@ -153,7 +153,8 @@ class UniqueOp : public OpKernel {
|
||||
return true;
|
||||
};
|
||||
|
||||
std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
|
||||
absl::flat_hash_map<int64, int64, decltype(hash_fn),
|
||||
decltype(equal_to_fn)>
|
||||
uniq(0, hash_fn, equal_to_fn);
|
||||
|
||||
uniq.reserve(2 * Tin.dimension(1));
|
||||
|
@ -206,20 +206,49 @@ void CompareRoundingResults(int flat_size, const int depth_multiplier,
|
||||
}
|
||||
#endif
|
||||
|
||||
void TryTestOneDepthwiseConv3x3Filter() {
|
||||
bool GenerateValidShapeConfigurations(
|
||||
int filter_width, int filter_height, int depth_multiplier,
|
||||
int dilation_width_factor, int dilation_height_factor,
|
||||
RuntimeShape* input_shape_inference, RuntimeShape* filter_shape_inference,
|
||||
RuntimeShape* output_shape_inference, int* pad_width, int* pad_height,
|
||||
int* stride) {
|
||||
const int batch = UniformRandomInt(1, 3);
|
||||
const int input_depth = 8 * ExponentialRandomPositiveInt(0.9f, 10, 50);
|
||||
int input_width = UniformRandomInt(5, 50);
|
||||
int input_height = UniformRandomInt(5, 50);
|
||||
const int input_width = UniformRandomInt(5, 50);
|
||||
const int input_height = UniformRandomInt(5, 50);
|
||||
*stride = UniformRandomInt(1, 2);
|
||||
const bool test_pad = UniformRandomInt(0, 1);
|
||||
const auto padding_type = test_pad ? PaddingType::kValid : PaddingType::kSame;
|
||||
|
||||
const int output_depth = input_depth * depth_multiplier;
|
||||
|
||||
input_shape_inference->BuildFrom(
|
||||
{batch, input_height, input_width, input_depth});
|
||||
|
||||
filter_shape_inference->BuildFrom(
|
||||
{1, filter_height, filter_width, output_depth});
|
||||
|
||||
EXPECT_TRUE(ComputeConvSizes(
|
||||
*input_shape_inference, output_depth, filter_width, filter_height,
|
||||
*stride, dilation_width_factor, dilation_height_factor, padding_type,
|
||||
output_shape_inference, pad_width, pad_height));
|
||||
|
||||
// We just care about whether the shape is suitable so we use non-per-channel
|
||||
// case.
|
||||
return optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported<
|
||||
optimized_ops::depthwise_conv::QuantizationType::kNonPerChannelUint8>(
|
||||
*input_shape_inference, *filter_shape_inference, *stride, *stride,
|
||||
dilation_width_factor, dilation_height_factor, *pad_width, *pad_height,
|
||||
depth_multiplier, *output_shape_inference, 0);
|
||||
}
|
||||
|
||||
void TryTestOneDepthwiseConv3x3Filter() {
|
||||
const int filter_width = 3;
|
||||
const int filter_height = 3;
|
||||
const int depth_multiplier = 1;
|
||||
const int stride = UniformRandomInt(1, 2);
|
||||
// We don't support dilations in the 3x3 filter.
|
||||
const int dilation_width_factor = 1;
|
||||
const int dilation_height_factor = 1;
|
||||
// Currently only support valid for per-channel fast kernel.
|
||||
const auto padding_type = PaddingType::kValid;
|
||||
|
||||
const int output_activation_min = -128;
|
||||
const int output_activation_max = 127;
|
||||
@ -227,19 +256,25 @@ void TryTestOneDepthwiseConv3x3Filter() {
|
||||
const std::int32_t input_offset = UniformRandomInt(-25, 25);
|
||||
const std::int32_t output_offset = UniformRandomInt(-25, 25);
|
||||
|
||||
const int output_depth = input_depth * depth_multiplier;
|
||||
|
||||
RuntimeShape input_shape_inference(
|
||||
{batch, input_height, input_width, input_depth});
|
||||
RuntimeShape input_shape_inference;
|
||||
RuntimeShape filter_shape_inference;
|
||||
RuntimeShape output_shape_inference;
|
||||
int pad_width, pad_height;
|
||||
EXPECT_TRUE(ComputeConvSizes(
|
||||
input_shape_inference, output_depth, filter_width, filter_height, stride,
|
||||
dilation_width_factor, dilation_height_factor, padding_type,
|
||||
&output_shape_inference, &pad_width, &pad_height));
|
||||
int stride;
|
||||
|
||||
// Keeps trying until we get valid shape/configurations for 3x3 filter case.
|
||||
bool generated_valid_configurations_for_3x3_kernel = false;
|
||||
while (!generated_valid_configurations_for_3x3_kernel) {
|
||||
generated_valid_configurations_for_3x3_kernel =
|
||||
GenerateValidShapeConfigurations(
|
||||
filter_width, filter_height, depth_multiplier,
|
||||
dilation_width_factor, dilation_height_factor,
|
||||
&input_shape_inference, &filter_shape_inference,
|
||||
&output_shape_inference, &pad_width, &pad_height, &stride);
|
||||
}
|
||||
|
||||
const int output_depth = output_shape_inference.Dims(3);
|
||||
|
||||
RuntimeShape filter_shape_inference(
|
||||
{1, filter_height, filter_width, output_depth});
|
||||
RuntimeShape bias_shape_inference({1, 1, 1, output_depth});
|
||||
const int input_buffer_size = input_shape_inference.FlatSize();
|
||||
const int filter_buffer_size = filter_shape_inference.FlatSize();
|
||||
|
@ -422,7 +422,11 @@ inline bool Fast3x3FilterKernelSupported(
|
||||
|
||||
// TODO(b/132878669): Support padding.
|
||||
if (pad_height == 1) {
|
||||
return false;
|
||||
for (int i = 0; i < output_depth; ++i) {
|
||||
if (output_shift_ptr[i] != output_shift_ptr[0]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < output_depth; ++i) {
|
||||
|
@ -1902,8 +1902,8 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 2,
|
||||
template <>
|
||||
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
EdgeType::kCenter, 1, 1> {
|
||||
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
|
||||
const int32* bias_ptr, uint8* output_ptr,
|
||||
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
|
||||
const int32* bias_ptr, int8* output_ptr,
|
||||
const DepthwiseConvParams* params_ptr) {
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
|
||||
@ -1931,9 +1931,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"dup v25.8h, w9\n"
|
||||
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
|
||||
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
|
||||
|
||||
@ -1953,13 +1953,13 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]], #8\n"
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
|
||||
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
|
||||
@ -1976,9 +1976,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]]\n"
|
||||
:
|
||||
// Outputs.
|
||||
@ -2003,8 +2003,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
template <>
|
||||
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
EdgeType::kCorner, 1, 1> {
|
||||
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
|
||||
const int32* bias_ptr, uint8* output_ptr,
|
||||
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
|
||||
const int32* bias_ptr, int8* output_ptr,
|
||||
const DepthwiseConvParams* params_ptr) {
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
|
||||
@ -2052,17 +2052,17 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"dup v25.8h, w6\n"
|
||||
|
||||
// Add input and filter offsets.
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
|
||||
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
|
||||
|
||||
@ -2094,20 +2094,20 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]], #8\n"
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
|
||||
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
|
||||
|
||||
@ -2129,9 +2129,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]]\n"
|
||||
:
|
||||
// Outputs.
|
||||
@ -2156,8 +2156,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
template <>
|
||||
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
EdgeType::kHorizontal, 1, 1> {
|
||||
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
|
||||
const int32* bias_ptr, uint8* output_ptr,
|
||||
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
|
||||
const int32* bias_ptr, int8* output_ptr,
|
||||
const DepthwiseConvParams* params_ptr) {
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
|
||||
@ -2211,21 +2211,21 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"dup v25.8h, w12\n"
|
||||
|
||||
// Add input and filter offsets.
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"uaddw v12.8h, v26.8h, v12.8b\n"
|
||||
"uaddw v13.8h, v26.8h, v13.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v12.8h, v26.8h, v12.8b\n"
|
||||
"saddw v13.8h, v26.8h, v13.8b\n"
|
||||
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"uaddw v4.8h, v25.8h, v4.8b\n"
|
||||
"uaddw v5.8h, v25.8h, v5.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v4.8h, v25.8h, v4.8b\n"
|
||||
"saddw v5.8h, v25.8h, v5.8b\n"
|
||||
|
||||
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
|
||||
|
||||
@ -2272,25 +2272,25 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]], #8\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"uaddw v12.8h, v26.8h, v12.8b\n"
|
||||
"uaddw v13.8h, v26.8h, v13.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v12.8h, v26.8h, v12.8b\n"
|
||||
"saddw v13.8h, v26.8h, v13.8b\n"
|
||||
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v4.8h, v25.8h, v4.8b\n"
|
||||
"uaddw v5.8h, v25.8h, v5.8b\n"
|
||||
"saddw v4.8h, v25.8h, v4.8b\n"
|
||||
"saddw v5.8h, v25.8h, v5.8b\n"
|
||||
|
||||
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
|
||||
|
||||
@ -2315,9 +2315,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]]\n"
|
||||
:
|
||||
// Outputs.
|
||||
@ -2342,8 +2342,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
template <>
|
||||
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
EdgeType::kVertical, 1, 1> {
|
||||
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
|
||||
const int32* bias_ptr, uint8* output_ptr,
|
||||
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
|
||||
const int32* bias_ptr, int8* output_ptr,
|
||||
const DepthwiseConvParams* params_ptr) {
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
|
||||
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
|
||||
@ -2399,21 +2399,21 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"dup v25.8h, w12\n"
|
||||
|
||||
// Add input and filter offsets.
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"uaddw v12.8h, v26.8h, v12.8b\n"
|
||||
"uaddw v13.8h, v26.8h, v13.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v12.8h, v26.8h, v12.8b\n"
|
||||
"saddw v13.8h, v26.8h, v13.8b\n"
|
||||
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"uaddw v4.8h, v25.8h, v4.8b\n"
|
||||
"uaddw v5.8h, v25.8h, v5.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v4.8h, v25.8h, v4.8b\n"
|
||||
"saddw v5.8h, v25.8h, v5.8b\n"
|
||||
|
||||
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
|
||||
|
||||
@ -2462,25 +2462,25 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"uaddw v8.8h, v26.8h, v8.8b\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"saddw v8.8h, v26.8h, v8.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]], #8\n"
|
||||
"uaddw v9.8h, v26.8h, v9.8b\n"
|
||||
"uaddw v10.8h, v26.8h, v10.8b\n"
|
||||
"uaddw v11.8h, v26.8h, v11.8b\n"
|
||||
"uaddw v12.8h, v26.8h, v12.8b\n"
|
||||
"uaddw v13.8h, v26.8h, v13.8b\n"
|
||||
"saddw v9.8h, v26.8h, v9.8b\n"
|
||||
"saddw v10.8h, v26.8h, v10.8b\n"
|
||||
"saddw v11.8h, v26.8h, v11.8b\n"
|
||||
"saddw v12.8h, v26.8h, v12.8b\n"
|
||||
"saddw v13.8h, v26.8h, v13.8b\n"
|
||||
|
||||
"uaddw v0.8h, v25.8h, v0.8b\n"
|
||||
"uaddw v1.8h, v25.8h, v1.8b\n"
|
||||
"uaddw v2.8h, v25.8h, v2.8b\n"
|
||||
"saddw v0.8h, v25.8h, v0.8b\n"
|
||||
"saddw v1.8h, v25.8h, v1.8b\n"
|
||||
"saddw v2.8h, v25.8h, v2.8b\n"
|
||||
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v3.8h, v25.8h, v3.8b\n"
|
||||
"saddw v3.8h, v25.8h, v3.8b\n"
|
||||
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
|
||||
"uaddw v4.8h, v25.8h, v4.8b\n"
|
||||
"uaddw v5.8h, v25.8h, v5.8b\n"
|
||||
"saddw v4.8h, v25.8h, v4.8b\n"
|
||||
"saddw v5.8h, v25.8h, v5.8b\n"
|
||||
|
||||
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
|
||||
|
||||
@ -2505,10 +2505,10 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
|
||||
"sqxtn v16.4h, v16.4s\n"
|
||||
"sqxtn2 v16.8h, v17.4s\n"
|
||||
"sqadd v16.8h, v16.8h, v28.8h\n"
|
||||
"sqxtun v16.8b, v16.8h\n"
|
||||
"sqxtn v16.8b, v16.8h\n"
|
||||
// TODO(b/129852264): Improve testing coverage.
|
||||
"umax v16.8b, v16.8b, v30.8b\n"
|
||||
"umin v16.8b, v16.8b, v31.8b\n"
|
||||
"smax v16.8b, v16.8b, v30.8b\n"
|
||||
"smin v16.8b, v16.8b, v31.8b\n"
|
||||
"st1 {v16.8b}, [%[output_ptr]]\n"
|
||||
:
|
||||
// Outputs.
|
||||
@ -2690,10 +2690,10 @@ struct DepthwiseConvMultiRowPerChannel {
|
||||
// * Vertical edges.
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
inline void DepthwiseConvHandlePaddingPerChannel(
|
||||
const uint8* input_data, const uint8* filter_data, const int32* bias_data,
|
||||
uint8* output_data, const DepthwiseConvParams& params) {
|
||||
const int8* input_data, const int8* filter_data, const int32* bias_data,
|
||||
int8* output_data, const DepthwiseConvParams& params) {
|
||||
if (params.input_width == 1 && params.input_height == 1) {
|
||||
const uint8* filter_ptr =
|
||||
const int8* filter_ptr =
|
||||
filter_data + params.filter_row_size + params.output_depth;
|
||||
DepthwiseConvPartialPerChannel<output_rounding, EdgeType::kCenter, 1,
|
||||
1>::Run(input_data, filter_ptr, bias_data,
|
||||
@ -2707,10 +2707,10 @@ inline void DepthwiseConvHandlePaddingPerChannel(
|
||||
const int32 out_y_end_corner = params.output_height - 1;
|
||||
|
||||
// Handle top row.
|
||||
const uint8* input_ptr = input_data;
|
||||
const uint8* filter_ptr =
|
||||
const int8* input_ptr = input_data;
|
||||
const int8* filter_ptr =
|
||||
filter_data + params.filter_row_size + params.output_depth;
|
||||
uint8* output_ptr = output_data;
|
||||
int8* output_ptr = output_data;
|
||||
|
||||
DepthwiseConvPartialPerChannel<output_rounding, EdgeType::kCorner, 1, 1>::Run(
|
||||
input_ptr, filter_ptr, bias_data, output_ptr, ¶ms);
|
||||
@ -2911,16 +2911,16 @@ inline void DepthwiseConv3x3FilterPerChannel(
|
||||
int32 end_y = row_end;
|
||||
|
||||
// TODO(b/132878669): Support padding.
|
||||
// if (pad_width == 1 && pad_height == 1) {
|
||||
// DepthwiseConvHandlePaddingPerChannel<output_rounding>(
|
||||
// input_ptr, filter_data, bias_data, output_ptr, params);
|
||||
//
|
||||
// // Update extents now that the edges have been handled.
|
||||
// out_x = 1;
|
||||
// end_x = params.output_width - 1;
|
||||
// out_y = std::max(1, out_y);
|
||||
// end_y = std::min(params.output_height - 1, end_y);
|
||||
// }
|
||||
if (pad_width == 1 && pad_height == 1) {
|
||||
DepthwiseConvHandlePaddingPerChannel<output_rounding>(
|
||||
input_ptr, filter_data, bias_data, output_ptr, params);
|
||||
|
||||
// Update extents now that the edges have been handled.
|
||||
out_x = 1;
|
||||
end_x = params.output_width - 1;
|
||||
out_y = std::max(1, out_y);
|
||||
end_y = std::min(params.output_height - 1, end_y);
|
||||
}
|
||||
|
||||
// pad_width and pad_height can both be 0 or 1, depending on padding option,
|
||||
// such as Padding_VALID / Padding_SAME.
|
||||
|
@ -213,9 +213,10 @@ int Main(int argc, char* argv[]) {
|
||||
evaluator->EvaluateModel();
|
||||
|
||||
if (!proto_output_file_path.empty()) {
|
||||
std::ofstream proto_out_file(proto_output_file_path, std::ios::out);
|
||||
std::ofstream proto_out_file(proto_output_file_path,
|
||||
std::ios::out | std::ios::binary);
|
||||
TopkAccuracyEvalMetrics metrics = results_writer.AggregatedMetrics();
|
||||
proto_out_file << metrics.DebugString();
|
||||
proto_out_file << metrics.SerializeAsString();
|
||||
proto_out_file.close();
|
||||
}
|
||||
|
||||
|
@ -256,21 +256,20 @@ class _InterpolateFunctionError(object):
|
||||
_, tags = error_interpolation.parse_message(message)
|
||||
g = None
|
||||
func_stack = []
|
||||
# pylint: disable=protected-access
|
||||
for t in tags:
|
||||
if t.type == "function_node":
|
||||
# TODO(mdan): Tests should cover this.
|
||||
if t.name == compat.as_str(self._func.name):
|
||||
g = self._func._graph
|
||||
g = self._func.graph
|
||||
elif g:
|
||||
next_func = g._get_function(t.name)
|
||||
if next_func is not None and isinstance(next_func,
|
||||
_EagerDefinedFunction):
|
||||
g = next_func._graph
|
||||
g = next_func.graph
|
||||
if g:
|
||||
func_stack.append(g.name)
|
||||
else:
|
||||
func_stack.append("<unknown>")
|
||||
# pylint: enable=protected-access
|
||||
if g:
|
||||
message = error_interpolation.interpolate(message, g)
|
||||
message += "\n\nFunction call stack:\n"
|
||||
|
@ -18,12 +18,15 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.core.framework import attr_value_pb2
|
||||
from tensorflow.core.framework import graph_pb2
|
||||
from tensorflow.core.framework import variable_pb2
|
||||
from tensorflow.core.protobuf import config_pb2
|
||||
from tensorflow.core.protobuf import meta_graph_pb2
|
||||
from tensorflow.python.eager import wrap_function
|
||||
from tensorflow.python.framework import tensor_util
|
||||
from tensorflow.python.grappler import tf_optimizer
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.platform import tf_logging as logging
|
||||
from tensorflow.python.training.saver import export_meta_graph
|
||||
|
||||
@ -43,6 +46,19 @@ def _run_inline_graph_optimization(func):
|
||||
meta_graph = export_meta_graph(
|
||||
graph_def=func.graph.as_graph_def(), graph=func.graph)
|
||||
|
||||
# Clear the initializer_name for the variables collections, since they are not
|
||||
# needed after saved to saved_model.
|
||||
for name in [
|
||||
"variables", "model_variables", "trainable_variables", "local_variables"
|
||||
]:
|
||||
raw_list = []
|
||||
for raw in meta_graph.collection_def["variables"].bytes_list.value:
|
||||
variable = variable_pb2.VariableDef()
|
||||
variable.ParseFromString(raw)
|
||||
variable.ClearField("initializer_name")
|
||||
raw_list.append(variable.SerializeToString())
|
||||
meta_graph.collection_def[name].bytes_list.value[:] = raw_list
|
||||
|
||||
# Add a collection 'train_op' so that Grappler knows the outputs.
|
||||
fetch_collection = meta_graph_pb2.CollectionDef()
|
||||
for array in func.inputs + func.outputs:
|
||||
@ -123,6 +139,7 @@ def convert_variables_to_constants_v2(func):
|
||||
resource_identities = {}
|
||||
placeholders = {}
|
||||
converted_input_indices = set()
|
||||
reference_variables = []
|
||||
for node in graph_def.node:
|
||||
if node.name in map_name_to_value:
|
||||
# Get the dtype and data for the Placeholders whose values are stored as
|
||||
@ -134,6 +151,9 @@ def convert_variables_to_constants_v2(func):
|
||||
}
|
||||
converted_input_indices.add(
|
||||
func.captured_inputs.index(map_name_to_value[node.name]))
|
||||
# Collect the reference variables that cannot be lifted.
|
||||
if node.op == "VariableV2":
|
||||
reference_variables.append(node)
|
||||
if node.op == "ReadVariableOp":
|
||||
# Get name of Placeholder op associated with ReadVariableOp. There can be
|
||||
# an Identity in between the ReadVariableOp and Placeholder. Store the
|
||||
@ -158,7 +178,35 @@ def convert_variables_to_constants_v2(func):
|
||||
output_graph_def = graph_pb2.GraphDef()
|
||||
how_many_converted = 0
|
||||
|
||||
# Add identity node after the reference variable and get the tensor values
|
||||
# for them.
|
||||
if reference_variables:
|
||||
reference_variable_tensors = []
|
||||
with func.graph.as_default():
|
||||
for node in reference_variables:
|
||||
identity_node = array_ops.identity(
|
||||
func.graph.as_graph_element(node.name + ":0"))
|
||||
reference_variable_tensors.append(identity_node.name)
|
||||
|
||||
reference_variable_values = func.prune([], reference_variable_tensors)()
|
||||
|
||||
# Add values of reference variables as constant nodes.
|
||||
for node, value in zip(reference_variables, reference_variable_values):
|
||||
output_node = output_graph_def.node.add()
|
||||
dtype = attr_value_pb2.AttrValue()
|
||||
dtype.type = value.dtype.as_datatype_enum
|
||||
|
||||
output_node.op = "Const"
|
||||
output_node.name = node.name
|
||||
output_node.attr["dtype"].CopyFrom(dtype)
|
||||
output_node.attr["value"].tensor.CopyFrom(
|
||||
tensor_util.make_tensor_proto(value))
|
||||
how_many_converted += 1
|
||||
|
||||
for input_node in graph_def.node:
|
||||
# Skip VariableV2 node, since their values are added by the identity nodes.
|
||||
if input_node.op == "VariableV2":
|
||||
continue
|
||||
output_node = output_graph_def.node.add()
|
||||
# Convert Placeholder ops to Const ops.
|
||||
if input_node.name in placeholders:
|
||||
|
@ -21,12 +21,17 @@ from __future__ import print_function
|
||||
import os
|
||||
|
||||
from tensorflow.python import keras
|
||||
from tensorflow.python.client import session as session_lib
|
||||
from tensorflow.python.eager import def_function
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import convert_to_constants
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import variables
|
||||
from tensorflow.python.platform import test
|
||||
from tensorflow.python.saved_model import simple_save
|
||||
from tensorflow.python.saved_model.load import load
|
||||
from tensorflow.python.saved_model.save import save
|
||||
from tensorflow.python.training.tracking import tracking
|
||||
@ -51,9 +56,9 @@ class VariablesToConstantsTest(test.TestCase):
|
||||
input_data):
|
||||
# Check that the converted ConcreteFunction produces the same result as the
|
||||
# original Function.
|
||||
expected_value = func(input_data)
|
||||
expected_value = nest.flatten(func(input_data))
|
||||
actual_value = nest.flatten(converted_concrete_func(input_data))
|
||||
self.assertEqual(expected_value.numpy(), actual_value)
|
||||
self.assertEqual(expected_value[0].numpy(), actual_value)
|
||||
|
||||
# Ensure the shape is retained.
|
||||
self.assertEqual(converted_concrete_func.inputs[0].shape, input_data.shape)
|
||||
@ -65,7 +70,7 @@ class VariablesToConstantsTest(test.TestCase):
|
||||
# Load it back and make sure it works.
|
||||
loaded_obj = load(save_dir)
|
||||
actual_value = nest.flatten(loaded_obj.signatures["mykey"](input_data))
|
||||
self.assertEqual(expected_value.numpy(), actual_value)
|
||||
self.assertEqual(expected_value[0].numpy(), actual_value)
|
||||
|
||||
@test_util.run_v2_only
|
||||
def testConstSavedModel(self):
|
||||
@ -231,6 +236,44 @@ class VariablesToConstantsTest(test.TestCase):
|
||||
actual_value = nest.flatten(output_func(input_data))
|
||||
self.assertEqual(expected_value.numpy(), actual_value)
|
||||
|
||||
def _v1_single_metagraph_saved_model(self):
|
||||
export_graph = ops.Graph()
|
||||
with export_graph.as_default():
|
||||
start = array_ops.placeholder(
|
||||
shape=[1, 1], dtype=dtypes.float32, name="start")
|
||||
distractor = variables.RefVariable(-1., name="distractor")
|
||||
v = variables.RefVariable(3., name="v")
|
||||
local_variable = variables.VariableV1(
|
||||
1.,
|
||||
collections=[ops.GraphKeys.LOCAL_VARIABLES],
|
||||
trainable=False,
|
||||
use_resource=True)
|
||||
output = array_ops.identity(start * v * local_variable, name="output")
|
||||
with session_lib.Session() as session:
|
||||
session.run([v.initializer, distractor.initializer,
|
||||
local_variable.initializer])
|
||||
path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
|
||||
simple_save.simple_save(
|
||||
session,
|
||||
path,
|
||||
inputs={"start": start},
|
||||
outputs={"output": output},
|
||||
legacy_init_op=local_variable.initializer)
|
||||
return path
|
||||
|
||||
@test_util.run_v2_only
|
||||
def test_ref_variable_import(self):
|
||||
saved = self._v1_single_metagraph_saved_model()
|
||||
imported = load(saved)
|
||||
fn = imported.signatures["serving_default"]
|
||||
output_func = convert_to_constants.convert_variables_to_constants_v2(fn)
|
||||
constant_graph_def = output_func.graph.as_graph_def()
|
||||
self.assertEqual(0, self._getNumVariables(constant_graph_def))
|
||||
self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def))
|
||||
|
||||
input_data = constant_op.constant(1., shape=[1, 1])
|
||||
root = tracking.AutoTrackable()
|
||||
self._testConvertedFunction(root, fn, output_func, input_data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -235,6 +235,11 @@ class SpaceToDepthTest(test.TestCase):
|
||||
|
||||
def spaceToDepthUsingTranspose(self, tensor, block_size, data_format):
|
||||
block_size_sq = block_size * block_size
|
||||
|
||||
dtype = tensor.dtype
|
||||
if dtype == dtypes.qint8:
|
||||
tensor = array_ops.bitcast(tensor, dtypes.int8)
|
||||
|
||||
if data_format == "NHWC":
|
||||
b, ih, iw, ic = tensor.shape.as_list()
|
||||
assert ih % block_size == 0, (ih, block_size)
|
||||
@ -253,56 +258,87 @@ class SpaceToDepthTest(test.TestCase):
|
||||
[b, ic, oh, block_size, ow, block_size])
|
||||
tensor = array_ops.transpose(tensor, [0, 3, 5, 1, 2, 4])
|
||||
tensor = array_ops.reshape(tensor, [b, oc, oh, ow])
|
||||
|
||||
if dtype == dtypes.qint8:
|
||||
tensor = array_ops.bitcast(tensor, dtype)
|
||||
return tensor
|
||||
|
||||
def compareToTranspose(self, batch_size, out_height, out_width, in_channels,
|
||||
block_size, data_format, use_gpu):
|
||||
block_size, data_format, data_type, use_gpu):
|
||||
in_height = out_height * block_size
|
||||
in_width = out_width * block_size
|
||||
nhwc_input_shape = [batch_size, in_height, in_width, in_channels]
|
||||
nchw_input_shape = [batch_size, in_channels, in_height, in_width]
|
||||
total_size = np.prod(nhwc_input_shape)
|
||||
|
||||
if data_format == "NCHW_VECT_C":
|
||||
# Initialize the input tensor with qint8 values that circle -127..127.
|
||||
x = [((f + 128) % 255) - 127 for f in range(total_size)]
|
||||
t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32)
|
||||
expected = self.spaceToDepthUsingTranspose(t, block_size, "NHWC")
|
||||
t = test_util.NHWCToNCHW_VECT_C(t)
|
||||
t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8)
|
||||
t = array_ops.space_to_depth(t, block_size, data_format="NCHW_VECT_C")
|
||||
t = gen_array_ops.dequantize(t, -128, 127)
|
||||
actual = test_util.NCHW_VECT_CToNHWC(t)
|
||||
else:
|
||||
# Initialize the input tensor with ascending whole numbers as floats.
|
||||
x = [f * 1.0 for f in range(total_size)]
|
||||
shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape
|
||||
t = constant_op.constant(x, shape=shape, dtype=dtypes.float32)
|
||||
expected = self.spaceToDepthUsingTranspose(t, block_size, data_format)
|
||||
actual = array_ops.space_to_depth(t, block_size, data_format=data_format)
|
||||
# Construct the input tensor in data_type and NHWC.
|
||||
# force_cpu is needed because quantize_v2 runs on only CPU.
|
||||
with test_util.force_cpu():
|
||||
if data_type == dtypes.qint8:
|
||||
# Initialize the input tensor with qint8 values that circle -127..127.
|
||||
x = [((f + 128) % 255) - 127 for f in range(total_size)]
|
||||
t = constant_op.constant(
|
||||
x, shape=nhwc_input_shape, dtype=dtypes.float32)
|
||||
t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8)
|
||||
else:
|
||||
assert data_type == dtypes.float32
|
||||
# Initialize the input tensor with ascending whole numbers as floats.
|
||||
x = [f * 1.0 for f in range(total_size)]
|
||||
shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape
|
||||
t = constant_op.constant(x, shape=shape, dtype=dtypes.float32)
|
||||
|
||||
with test_util.device(use_gpu):
|
||||
if data_format == "NCHW_VECT_C":
|
||||
assert data_type == dtypes.qint8
|
||||
|
||||
# Convert to int8, then NHWCToNCHW_VECT_C, and then back to qint8.
|
||||
actual = array_ops.bitcast(t, dtypes.int8)
|
||||
actual = test_util.NHWCToNCHW_VECT_C(actual)
|
||||
actual = array_ops.bitcast(actual, dtypes.qint8)
|
||||
actual = array_ops.space_to_depth(
|
||||
actual, block_size, data_format=data_format)
|
||||
actual = array_ops.bitcast(actual, dtypes.int8)
|
||||
actual = test_util.NCHW_VECT_CToNHWC(actual)
|
||||
actual = array_ops.bitcast(actual, dtypes.qint8)
|
||||
|
||||
expected = array_ops.bitcast(t, dtypes.int8)
|
||||
expected = math_ops.cast(expected, dtypes.float32)
|
||||
expected = self.spaceToDepthUsingTranspose(expected, block_size, "NHWC")
|
||||
expected = math_ops.cast(expected, dtypes.int8)
|
||||
expected = array_ops.bitcast(expected, dtypes.qint8)
|
||||
else:
|
||||
# Initialize the input tensor with ascending whole numbers as floats.
|
||||
actual = array_ops.space_to_depth(
|
||||
t, block_size, data_format=data_format)
|
||||
expected = self.spaceToDepthUsingTranspose(t, block_size, data_format)
|
||||
|
||||
with self.cached_session(use_gpu=use_gpu) as sess:
|
||||
actual_vals, expected_vals = self.evaluate([actual, expected])
|
||||
self.assertTrue(np.array_equal(actual_vals, expected_vals))
|
||||
|
||||
# TODO(jingyue): figure out why this test failed in eager mode.
|
||||
@test_util.run_deprecated_v1
|
||||
def testAgainstTranspose(self):
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", False)
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.float32, False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", dtypes.float32, False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", dtypes.float32, False)
|
||||
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.qint8, False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", dtypes.qint8, False)
|
||||
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", dtypes.qint8, False)
|
||||
|
||||
if not test.is_gpu_available():
|
||||
tf_logging.info("skipping gpu tests since gpu not available")
|
||||
return
|
||||
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", True)
|
||||
self.compareToTranspose(3, 2, 3, 2, 2, "NHWC", True)
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NCHW", True)
|
||||
self.compareToTranspose(3, 2, 3, 2, 3, "NCHW", True)
|
||||
self.compareToTranspose(5, 7, 11, 3, 2, "NCHW", True)
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.float32, True)
|
||||
self.compareToTranspose(3, 2, 3, 2, 2, "NHWC", dtypes.float32, True)
|
||||
self.compareToTranspose(3, 2, 3, 1, 2, "NCHW", dtypes.float32, True)
|
||||
self.compareToTranspose(3, 2, 3, 2, 3, "NCHW", dtypes.float32, True)
|
||||
self.compareToTranspose(5, 7, 11, 3, 2, "NCHW", dtypes.float32, True)
|
||||
|
||||
self.compareToTranspose(3, 2, 3, 4, 2, "NCHW_VECT_C", True)
|
||||
self.compareToTranspose(3, 2, 3, 8, 3, "NCHW_VECT_C", True)
|
||||
self.compareToTranspose(5, 7, 11, 12, 2, "NCHW_VECT_C", True)
|
||||
self.compareToTranspose(3, 2, 3, 4, 2, "NCHW_VECT_C", dtypes.qint8, True)
|
||||
self.compareToTranspose(3, 2, 3, 8, 3, "NCHW_VECT_C", dtypes.qint8, True)
|
||||
self.compareToTranspose(5, 7, 11, 12, 2, "NCHW_VECT_C", dtypes.qint8, True)
|
||||
|
||||
|
||||
class SpaceToDepthGradientTest(test.TestCase):
|
||||
|
@ -1971,6 +1971,7 @@ def tf_py_wrap_cc(
|
||||
# //third_party/tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
|
||||
# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
|
||||
def py_test(deps = [], data = [], kernels = [], **kwargs):
|
||||
# Python version placeholder
|
||||
native.py_test(
|
||||
# TODO(jlebar): Ideally we'd use tcmalloc here.,
|
||||
deps = select({
|
||||
@ -1999,6 +2000,8 @@ def py_binary(name, deps = [], **kwargs):
|
||||
name = name + "_deps",
|
||||
deps = deps,
|
||||
)
|
||||
|
||||
# Python version placeholder
|
||||
native.py_binary(
|
||||
name = name,
|
||||
deps = select({
|
||||
|
@ -34,10 +34,6 @@ tf_class {
|
||||
name: "is_alive"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "join"
|
||||
argspec: "args=[\'self\', \'timeout\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "loop"
|
||||
argspec: "args=[\'coord\', \'timer_interval_secs\', \'target\', \'args\', \'kwargs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
|
||||
|
@ -41,6 +41,9 @@ _CORNER_CASES = {
|
||||
'estimator.NanLossDuringTrainingError': {
|
||||
'message': {}
|
||||
},
|
||||
'train.LooperThread': {
|
||||
'join': {}
|
||||
}
|
||||
}
|
||||
|
||||
# Python 2 vs. 3 differences
|
||||
|
Loading…
Reference in New Issue
Block a user