Merge branch 'master' into ENH/better_leaky_relu

This commit is contained in:
Yan Facai (颜发才) 2019-05-23 16:36:34 +08:00
commit 9d517206ed
No known key found for this signature in database
GPG Key ID: EFEF7D6C65FC90BA
15 changed files with 415 additions and 223 deletions

View File

@ -890,21 +890,26 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice(
VLOG(1) << "Running component function on device " << target
<< " with handle " << handle;
VLOG(4) << " with " << opts_copy.DebugString();
flr->Run(
opts_copy, handle, comp_args, comp_rets,
[comp_rets, rets, comp_data, refcounted_done](const Status& status) {
if (!status.ok()) {
VLOG(2) << "Component function execution failed: " << status;
refcounted_done->UpdateStatus(status);
} else {
for (int i = 0; i < comp_rets->size(); ++i) {
(*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
}
}
delete comp_rets;
// refcounted_done is thread-safe
refcounted_done->Unref();
});
flr->Run(opts_copy, handle, comp_args, comp_rets,
[comp_rets, rets, comp_data, refcounted_done,
data](const Status& status) {
if (!status.ok()) {
VLOG(2) << "Component function execution failed: " << status;
const string function_and_msg = strings::StrCat(
errors::FormatFunctionForError(data->function_name_),
" ", status.error_message());
refcounted_done->UpdateStatus(
Status(status.code(), function_and_msg));
} else {
for (int i = 0; i < comp_rets->size(); ++i) {
(*rets)[comp_data.ret_indices_[i]] = (*comp_rets)[i];
}
}
delete comp_rets;
// refcounted_done is thread-safe
refcounted_done->Unref();
});
} else {
opts_copy.remote_execution = true;

View File

@ -1264,7 +1264,9 @@ tf_kernel_library(
tf_kernel_library(
name = "unique_op",
prefix = "unique_op",
deps = ARRAY_DEPS,
deps = ARRAY_DEPS + [
"@com_google_absl//absl/container:flat_hash_map",
],
)
tf_kernel_library(

View File

@ -37,6 +37,21 @@ limitations under the License.
namespace tensorflow {
namespace {
template <typename T>
struct RawType {
using type = T;
};
template <>
struct RawType<qint8> {
// spacetodepth_op_gpu.cu.cc does not instantiate SpaceToDepthOpFunctor for
// int8, so we map qint8 to uint8. Instantiating int8 could slow down
// compilation and the code generated is almost the same as for uint8.
using type = uint8;
};
} // namespace
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
@ -66,17 +81,17 @@ class SpaceToDepthOp : public OpKernel {
const Tensor& input = context->input(0);
const int dims = input.dims();
// Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here.
constexpr bool is_int8x4 = std::is_same<T, qint8>::value;
OP_REQUIRES(context, (is_int8x4 == (data_format_ == FORMAT_NCHW_VECT_C)),
errors::InvalidArgument(
"qint8 should be used with data_format NCHW_VECT_C."));
constexpr int kVect = is_int8x4 ? 4 : 1;
constexpr int kDims = is_int8x4 ? 5 : 4;
OP_REQUIRES(context, kDims == dims,
errors::InvalidArgument("Input rank should be: ", kDims,
" instead of: ", dims));
const bool is_int8x4 = (data_format_ == FORMAT_NCHW_VECT_C);
const int vect = is_int8x4 ? 4 : 1;
if (is_int8x4) {
OP_REQUIRES(
context, dims == 5,
errors::InvalidArgument("Input rank should be 5 instead of ", dims));
} else {
OP_REQUIRES(
context, dims == 4,
errors::InvalidArgument("Input rank should be 4 instead of ", dims));
}
constexpr int kNumSpatialDims = 2;
const int batch_size =
@ -87,7 +102,7 @@ class SpaceToDepthOp : public OpKernel {
input.dim_size(GetTensorDimIndex<kNumSpatialDims>(data_format_, 'W'));
const int input_depth =
input.dim_size(GetTensorDimIndex<kNumSpatialDims>(data_format_, 'C')) *
kVect;
vect;
// Both width and height must be divisible by block_size.
OP_REQUIRES(context,
@ -111,32 +126,32 @@ class SpaceToDepthOp : public OpKernel {
output_width, output_depth),
&outputs_tensor));
auto Tinput = input.tensor<T, kDims>();
auto Toutput = outputs_tensor->tensor<T, kDims>();
if (std::is_same<Device, GPUDevice>::value) {
if (is_int8x4) {
using RT = typename RawType<T>::type;
if (data_format_ == FORMAT_NCHW_VECT_C) {
// NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32.
auto Tinput_v = input.template reinterpret_last_dimension<int32, 4>();
auto Toutput_v = outputs_tensor->reinterpret_last_dimension<int32, 4>();
functor::SpaceToDepthOpFunctor<GPUDevice, int32, FORMAT_NCHW> functor;
functor(context->eigen_device<GPUDevice>(), Tinput_v, block_size_,
Toutput_v);
return;
} else if (data_format_ == FORMAT_NCHW) {
functor::SpaceToDepthOpFunctor<GPUDevice, T, FORMAT_NCHW> functor;
functor(context->eigen_device<GPUDevice>(), Tinput, block_size_,
Toutput);
return;
CHECK((std::is_same<T, RT>::value));
functor::SpaceToDepthOpFunctor<GPUDevice, RT, FORMAT_NCHW> functor;
functor(context->eigen_device<GPUDevice>(), input.tensor<RT, 4>(),
block_size_, outputs_tensor->tensor<RT, 4>());
} else {
CHECK((std::is_same<T, RT>::value));
functor::SpaceToDepthOpFunctor<GPUDevice, RT, FORMAT_NHWC> functor;
functor(context->eigen_device<GPUDevice>(), input.tensor<RT, 4>(),
block_size_, outputs_tensor->tensor<RT, 4>());
}
}
// NOTE: Assumes data_format_ == FORMAT_NHWC here, since we have rejected
// (CPU && data_format_ != FORMAT_NHWC) in the constructor.
if (!is_int8x4) {
} else {
// NOTE: Assumes data_format_ == FORMAT_NHWC here, since we have rejected
// (CPU && data_format_ != FORMAT_NHWC) in the constructor.
functor::SpaceToDepthOpFunctor<Device, T, FORMAT_NHWC> functor;
functor(context->eigen_device<Device>(), Tinput, block_size_, Toutput);
functor(context->eigen_device<Device>(), input.tensor<T, 4>(),
block_size_, outputs_tensor->tensor<T, 4>());
}
};
@ -181,6 +196,7 @@ struct SpaceToDepthOpFunctor<CPUDevice, T, FORMAT_NHWC> {
SpaceToDepthOp<CPUDevice, type>);
TF_CALL_ALL_TYPES(REGISTER);
TF_CALL_qint8(REGISTER);
#undef REGISTER
#if GOOGLE_CUDA

View File

@ -14,9 +14,9 @@ limitations under the License.
==============================================================================*/
#include <functional>
#include <unordered_map>
#include <utility>
#include "absl/container/flat_hash_map.h"
#include "tensorflow/core/framework/bounds_check.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
@ -106,7 +106,7 @@ class UniqueOp : public OpKernel {
auto Tin = input.flat<T>();
const int64 N = static_cast<int64>(Tin.size());
std::unordered_map<T, TIndex> uniq;
absl::flat_hash_map<T, TIndex> uniq;
uniq.reserve(2 * N);
for (Eigen::Index i = 0, j = 0; i < N; ++i) {
auto it = uniq.insert(std::make_pair(Tin(i), j));
@ -153,7 +153,8 @@ class UniqueOp : public OpKernel {
return true;
};
std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
absl::flat_hash_map<int64, int64, decltype(hash_fn),
decltype(equal_to_fn)>
uniq(0, hash_fn, equal_to_fn);
uniq.reserve(2 * Tin.dimension(1));

View File

@ -206,20 +206,49 @@ void CompareRoundingResults(int flat_size, const int depth_multiplier,
}
#endif
void TryTestOneDepthwiseConv3x3Filter() {
bool GenerateValidShapeConfigurations(
int filter_width, int filter_height, int depth_multiplier,
int dilation_width_factor, int dilation_height_factor,
RuntimeShape* input_shape_inference, RuntimeShape* filter_shape_inference,
RuntimeShape* output_shape_inference, int* pad_width, int* pad_height,
int* stride) {
const int batch = UniformRandomInt(1, 3);
const int input_depth = 8 * ExponentialRandomPositiveInt(0.9f, 10, 50);
int input_width = UniformRandomInt(5, 50);
int input_height = UniformRandomInt(5, 50);
const int input_width = UniformRandomInt(5, 50);
const int input_height = UniformRandomInt(5, 50);
*stride = UniformRandomInt(1, 2);
const bool test_pad = UniformRandomInt(0, 1);
const auto padding_type = test_pad ? PaddingType::kValid : PaddingType::kSame;
const int output_depth = input_depth * depth_multiplier;
input_shape_inference->BuildFrom(
{batch, input_height, input_width, input_depth});
filter_shape_inference->BuildFrom(
{1, filter_height, filter_width, output_depth});
EXPECT_TRUE(ComputeConvSizes(
*input_shape_inference, output_depth, filter_width, filter_height,
*stride, dilation_width_factor, dilation_height_factor, padding_type,
output_shape_inference, pad_width, pad_height));
// We just care about whether the shape is suitable so we use non-per-channel
// case.
return optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported<
optimized_ops::depthwise_conv::QuantizationType::kNonPerChannelUint8>(
*input_shape_inference, *filter_shape_inference, *stride, *stride,
dilation_width_factor, dilation_height_factor, *pad_width, *pad_height,
depth_multiplier, *output_shape_inference, 0);
}
void TryTestOneDepthwiseConv3x3Filter() {
const int filter_width = 3;
const int filter_height = 3;
const int depth_multiplier = 1;
const int stride = UniformRandomInt(1, 2);
// We don't support dilations in the 3x3 filter.
const int dilation_width_factor = 1;
const int dilation_height_factor = 1;
// Currently only support valid for per-channel fast kernel.
const auto padding_type = PaddingType::kValid;
const int output_activation_min = -128;
const int output_activation_max = 127;
@ -227,19 +256,25 @@ void TryTestOneDepthwiseConv3x3Filter() {
const std::int32_t input_offset = UniformRandomInt(-25, 25);
const std::int32_t output_offset = UniformRandomInt(-25, 25);
const int output_depth = input_depth * depth_multiplier;
RuntimeShape input_shape_inference(
{batch, input_height, input_width, input_depth});
RuntimeShape input_shape_inference;
RuntimeShape filter_shape_inference;
RuntimeShape output_shape_inference;
int pad_width, pad_height;
EXPECT_TRUE(ComputeConvSizes(
input_shape_inference, output_depth, filter_width, filter_height, stride,
dilation_width_factor, dilation_height_factor, padding_type,
&output_shape_inference, &pad_width, &pad_height));
int stride;
// Keeps trying until we get valid shape/configurations for 3x3 filter case.
bool generated_valid_configurations_for_3x3_kernel = false;
while (!generated_valid_configurations_for_3x3_kernel) {
generated_valid_configurations_for_3x3_kernel =
GenerateValidShapeConfigurations(
filter_width, filter_height, depth_multiplier,
dilation_width_factor, dilation_height_factor,
&input_shape_inference, &filter_shape_inference,
&output_shape_inference, &pad_width, &pad_height, &stride);
}
const int output_depth = output_shape_inference.Dims(3);
RuntimeShape filter_shape_inference(
{1, filter_height, filter_width, output_depth});
RuntimeShape bias_shape_inference({1, 1, 1, output_depth});
const int input_buffer_size = input_shape_inference.FlatSize();
const int filter_buffer_size = filter_shape_inference.FlatSize();

View File

@ -422,7 +422,11 @@ inline bool Fast3x3FilterKernelSupported(
// TODO(b/132878669): Support padding.
if (pad_height == 1) {
return false;
for (int i = 0; i < output_depth; ++i) {
if (output_shift_ptr[i] != output_shift_ptr[0]) {
return false;
}
}
}
for (int i = 0; i < output_depth; ++i) {

View File

@ -1902,8 +1902,8 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 2,
template <>
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
EdgeType::kCenter, 1, 1> {
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
const int32* bias_ptr, uint8* output_ptr,
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
const int32* bias_ptr, int8* output_ptr,
const DepthwiseConvParams* params_ptr) {
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
@ -1931,9 +1931,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"dup v25.8h, w9\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
@ -1953,13 +1953,13 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]], #8\n"
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
@ -1976,9 +1976,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]]\n"
:
// Outputs.
@ -2003,8 +2003,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
template <>
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
EdgeType::kCorner, 1, 1> {
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
const int32* bias_ptr, uint8* output_ptr,
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
const int32* bias_ptr, int8* output_ptr,
const DepthwiseConvParams* params_ptr) {
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
@ -2052,17 +2052,17 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"dup v25.8h, w6\n"
// Add input and filter offsets.
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
@ -2094,20 +2094,20 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]], #8\n"
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
@ -2129,9 +2129,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]]\n"
:
// Outputs.
@ -2156,8 +2156,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
template <>
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
EdgeType::kHorizontal, 1, 1> {
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
const int32* bias_ptr, uint8* output_ptr,
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
const int32* bias_ptr, int8* output_ptr,
const DepthwiseConvParams* params_ptr) {
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
@ -2211,21 +2211,21 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"dup v25.8h, w12\n"
// Add input and filter offsets.
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"uaddw v12.8h, v26.8h, v12.8b\n"
"uaddw v13.8h, v26.8h, v13.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"saddw v12.8h, v26.8h, v12.8b\n"
"saddw v13.8h, v26.8h, v13.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"uaddw v4.8h, v25.8h, v4.8b\n"
"uaddw v5.8h, v25.8h, v5.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"saddw v4.8h, v25.8h, v4.8b\n"
"saddw v5.8h, v25.8h, v5.8b\n"
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
@ -2272,25 +2272,25 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"uaddw v8.8h, v26.8h, v8.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"st1 {v16.8b}, [%[output_ptr]], #8\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"uaddw v12.8h, v26.8h, v12.8b\n"
"uaddw v13.8h, v26.8h, v13.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"saddw v12.8h, v26.8h, v12.8b\n"
"saddw v13.8h, v26.8h, v13.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v4.8h, v25.8h, v4.8b\n"
"uaddw v5.8h, v25.8h, v5.8b\n"
"saddw v4.8h, v25.8h, v4.8b\n"
"saddw v5.8h, v25.8h, v5.8b\n"
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
@ -2315,9 +2315,9 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]]\n"
:
// Outputs.
@ -2342,8 +2342,8 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
template <>
struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
EdgeType::kVertical, 1, 1> {
static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
const int32* bias_ptr, uint8* output_ptr,
static inline void Run(const int8* input_ptr, const int8* filter_ptr,
const int32* bias_ptr, int8* output_ptr,
const DepthwiseConvParams* params_ptr) {
#define DEPTHWISECONV_LABEL_DEPTH_8_LOOP "1"
#define DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "2"
@ -2399,21 +2399,21 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"dup v25.8h, w12\n"
// Add input and filter offsets.
"uaddw v8.8h, v26.8h, v8.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"uaddw v12.8h, v26.8h, v12.8b\n"
"uaddw v13.8h, v26.8h, v13.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"saddw v12.8h, v26.8h, v12.8b\n"
"saddw v13.8h, v26.8h, v13.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"uaddw v4.8h, v25.8h, v4.8b\n"
"uaddw v5.8h, v25.8h, v5.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"saddw v4.8h, v25.8h, v4.8b\n"
"saddw v5.8h, v25.8h, v5.8b\n"
"blt " DEPTHWISECONV_LABEL_DEPTH_8_AFTER_LOOP "f\n"
@ -2462,25 +2462,25 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"uaddw v8.8h, v26.8h, v8.8b\n"
"sqxtn v16.8b, v16.8h\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"saddw v8.8h, v26.8h, v8.8b\n"
"st1 {v16.8b}, [%[output_ptr]], #8\n"
"uaddw v9.8h, v26.8h, v9.8b\n"
"uaddw v10.8h, v26.8h, v10.8b\n"
"uaddw v11.8h, v26.8h, v11.8b\n"
"uaddw v12.8h, v26.8h, v12.8b\n"
"uaddw v13.8h, v26.8h, v13.8b\n"
"saddw v9.8h, v26.8h, v9.8b\n"
"saddw v10.8h, v26.8h, v10.8b\n"
"saddw v11.8h, v26.8h, v11.8b\n"
"saddw v12.8h, v26.8h, v12.8b\n"
"saddw v13.8h, v26.8h, v13.8b\n"
"uaddw v0.8h, v25.8h, v0.8b\n"
"uaddw v1.8h, v25.8h, v1.8b\n"
"uaddw v2.8h, v25.8h, v2.8b\n"
"saddw v0.8h, v25.8h, v0.8b\n"
"saddw v1.8h, v25.8h, v1.8b\n"
"saddw v2.8h, v25.8h, v2.8b\n"
"ld1 {v16.4s}, [%[bias_ptr]], #16\n"
"uaddw v3.8h, v25.8h, v3.8b\n"
"saddw v3.8h, v25.8h, v3.8b\n"
"ld1 {v17.4s}, [%[bias_ptr]], #16\n"
"uaddw v4.8h, v25.8h, v4.8b\n"
"uaddw v5.8h, v25.8h, v5.8b\n"
"saddw v4.8h, v25.8h, v4.8b\n"
"saddw v5.8h, v25.8h, v5.8b\n"
"bge " DEPTHWISECONV_LABEL_DEPTH_8_LOOP "b\n"
@ -2505,10 +2505,10 @@ struct DepthwiseConvPartialPerChannel<DepthwiseConvOutputRounding::kUpward,
"sqxtn v16.4h, v16.4s\n"
"sqxtn2 v16.8h, v17.4s\n"
"sqadd v16.8h, v16.8h, v28.8h\n"
"sqxtun v16.8b, v16.8h\n"
"sqxtn v16.8b, v16.8h\n"
// TODO(b/129852264): Improve testing coverage.
"umax v16.8b, v16.8b, v30.8b\n"
"umin v16.8b, v16.8b, v31.8b\n"
"smax v16.8b, v16.8b, v30.8b\n"
"smin v16.8b, v16.8b, v31.8b\n"
"st1 {v16.8b}, [%[output_ptr]]\n"
:
// Outputs.
@ -2690,10 +2690,10 @@ struct DepthwiseConvMultiRowPerChannel {
// * Vertical edges.
template <DepthwiseConvOutputRounding output_rounding>
inline void DepthwiseConvHandlePaddingPerChannel(
const uint8* input_data, const uint8* filter_data, const int32* bias_data,
uint8* output_data, const DepthwiseConvParams& params) {
const int8* input_data, const int8* filter_data, const int32* bias_data,
int8* output_data, const DepthwiseConvParams& params) {
if (params.input_width == 1 && params.input_height == 1) {
const uint8* filter_ptr =
const int8* filter_ptr =
filter_data + params.filter_row_size + params.output_depth;
DepthwiseConvPartialPerChannel<output_rounding, EdgeType::kCenter, 1,
1>::Run(input_data, filter_ptr, bias_data,
@ -2707,10 +2707,10 @@ inline void DepthwiseConvHandlePaddingPerChannel(
const int32 out_y_end_corner = params.output_height - 1;
// Handle top row.
const uint8* input_ptr = input_data;
const uint8* filter_ptr =
const int8* input_ptr = input_data;
const int8* filter_ptr =
filter_data + params.filter_row_size + params.output_depth;
uint8* output_ptr = output_data;
int8* output_ptr = output_data;
DepthwiseConvPartialPerChannel<output_rounding, EdgeType::kCorner, 1, 1>::Run(
input_ptr, filter_ptr, bias_data, output_ptr, &params);
@ -2911,16 +2911,16 @@ inline void DepthwiseConv3x3FilterPerChannel(
int32 end_y = row_end;
// TODO(b/132878669): Support padding.
// if (pad_width == 1 && pad_height == 1) {
// DepthwiseConvHandlePaddingPerChannel<output_rounding>(
// input_ptr, filter_data, bias_data, output_ptr, params);
//
// // Update extents now that the edges have been handled.
// out_x = 1;
// end_x = params.output_width - 1;
// out_y = std::max(1, out_y);
// end_y = std::min(params.output_height - 1, end_y);
// }
if (pad_width == 1 && pad_height == 1) {
DepthwiseConvHandlePaddingPerChannel<output_rounding>(
input_ptr, filter_data, bias_data, output_ptr, params);
// Update extents now that the edges have been handled.
out_x = 1;
end_x = params.output_width - 1;
out_y = std::max(1, out_y);
end_y = std::min(params.output_height - 1, end_y);
}
// pad_width and pad_height can both be 0 or 1, depending on padding option,
// such as Padding_VALID / Padding_SAME.

View File

@ -213,9 +213,10 @@ int Main(int argc, char* argv[]) {
evaluator->EvaluateModel();
if (!proto_output_file_path.empty()) {
std::ofstream proto_out_file(proto_output_file_path, std::ios::out);
std::ofstream proto_out_file(proto_output_file_path,
std::ios::out | std::ios::binary);
TopkAccuracyEvalMetrics metrics = results_writer.AggregatedMetrics();
proto_out_file << metrics.DebugString();
proto_out_file << metrics.SerializeAsString();
proto_out_file.close();
}

View File

@ -256,21 +256,20 @@ class _InterpolateFunctionError(object):
_, tags = error_interpolation.parse_message(message)
g = None
func_stack = []
# pylint: disable=protected-access
for t in tags:
if t.type == "function_node":
# TODO(mdan): Tests should cover this.
if t.name == compat.as_str(self._func.name):
g = self._func._graph
g = self._func.graph
elif g:
next_func = g._get_function(t.name)
if next_func is not None and isinstance(next_func,
_EagerDefinedFunction):
g = next_func._graph
g = next_func.graph
if g:
func_stack.append(g.name)
else:
func_stack.append("<unknown>")
# pylint: enable=protected-access
if g:
message = error_interpolation.interpolate(message, g)
message += "\n\nFunction call stack:\n"

View File

@ -18,12 +18,15 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.framework import graph_pb2
from tensorflow.core.framework import variable_pb2
from tensorflow.core.protobuf import config_pb2
from tensorflow.core.protobuf import meta_graph_pb2
from tensorflow.python.eager import wrap_function
from tensorflow.python.framework import tensor_util
from tensorflow.python.grappler import tf_optimizer
from tensorflow.python.ops import array_ops
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.training.saver import export_meta_graph
@ -43,6 +46,19 @@ def _run_inline_graph_optimization(func):
meta_graph = export_meta_graph(
graph_def=func.graph.as_graph_def(), graph=func.graph)
# Clear the initializer_name for the variables collections, since they are not
# needed after saved to saved_model.
for name in [
"variables", "model_variables", "trainable_variables", "local_variables"
]:
raw_list = []
for raw in meta_graph.collection_def["variables"].bytes_list.value:
variable = variable_pb2.VariableDef()
variable.ParseFromString(raw)
variable.ClearField("initializer_name")
raw_list.append(variable.SerializeToString())
meta_graph.collection_def[name].bytes_list.value[:] = raw_list
# Add a collection 'train_op' so that Grappler knows the outputs.
fetch_collection = meta_graph_pb2.CollectionDef()
for array in func.inputs + func.outputs:
@ -123,6 +139,7 @@ def convert_variables_to_constants_v2(func):
resource_identities = {}
placeholders = {}
converted_input_indices = set()
reference_variables = []
for node in graph_def.node:
if node.name in map_name_to_value:
# Get the dtype and data for the Placeholders whose values are stored as
@ -134,6 +151,9 @@ def convert_variables_to_constants_v2(func):
}
converted_input_indices.add(
func.captured_inputs.index(map_name_to_value[node.name]))
# Collect the reference variables that cannot be lifted.
if node.op == "VariableV2":
reference_variables.append(node)
if node.op == "ReadVariableOp":
# Get name of Placeholder op associated with ReadVariableOp. There can be
# an Identity in between the ReadVariableOp and Placeholder. Store the
@ -158,7 +178,35 @@ def convert_variables_to_constants_v2(func):
output_graph_def = graph_pb2.GraphDef()
how_many_converted = 0
# Add identity node after the reference variable and get the tensor values
# for them.
if reference_variables:
reference_variable_tensors = []
with func.graph.as_default():
for node in reference_variables:
identity_node = array_ops.identity(
func.graph.as_graph_element(node.name + ":0"))
reference_variable_tensors.append(identity_node.name)
reference_variable_values = func.prune([], reference_variable_tensors)()
# Add values of reference variables as constant nodes.
for node, value in zip(reference_variables, reference_variable_values):
output_node = output_graph_def.node.add()
dtype = attr_value_pb2.AttrValue()
dtype.type = value.dtype.as_datatype_enum
output_node.op = "Const"
output_node.name = node.name
output_node.attr["dtype"].CopyFrom(dtype)
output_node.attr["value"].tensor.CopyFrom(
tensor_util.make_tensor_proto(value))
how_many_converted += 1
for input_node in graph_def.node:
# Skip VariableV2 node, since their values are added by the identity nodes.
if input_node.op == "VariableV2":
continue
output_node = output_graph_def.node.add()
# Convert Placeholder ops to Const ops.
if input_node.name in placeholders:

View File

@ -21,12 +21,17 @@ from __future__ import print_function
import os
from tensorflow.python import keras
from tensorflow.python.client import session as session_lib
from tensorflow.python.eager import def_function
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import convert_to_constants
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
from tensorflow.python.saved_model import simple_save
from tensorflow.python.saved_model.load import load
from tensorflow.python.saved_model.save import save
from tensorflow.python.training.tracking import tracking
@ -51,9 +56,9 @@ class VariablesToConstantsTest(test.TestCase):
input_data):
# Check that the converted ConcreteFunction produces the same result as the
# original Function.
expected_value = func(input_data)
expected_value = nest.flatten(func(input_data))
actual_value = nest.flatten(converted_concrete_func(input_data))
self.assertEqual(expected_value.numpy(), actual_value)
self.assertEqual(expected_value[0].numpy(), actual_value)
# Ensure the shape is retained.
self.assertEqual(converted_concrete_func.inputs[0].shape, input_data.shape)
@ -65,7 +70,7 @@ class VariablesToConstantsTest(test.TestCase):
# Load it back and make sure it works.
loaded_obj = load(save_dir)
actual_value = nest.flatten(loaded_obj.signatures["mykey"](input_data))
self.assertEqual(expected_value.numpy(), actual_value)
self.assertEqual(expected_value[0].numpy(), actual_value)
@test_util.run_v2_only
def testConstSavedModel(self):
@ -231,6 +236,44 @@ class VariablesToConstantsTest(test.TestCase):
actual_value = nest.flatten(output_func(input_data))
self.assertEqual(expected_value.numpy(), actual_value)
def _v1_single_metagraph_saved_model(self):
export_graph = ops.Graph()
with export_graph.as_default():
start = array_ops.placeholder(
shape=[1, 1], dtype=dtypes.float32, name="start")
distractor = variables.RefVariable(-1., name="distractor")
v = variables.RefVariable(3., name="v")
local_variable = variables.VariableV1(
1.,
collections=[ops.GraphKeys.LOCAL_VARIABLES],
trainable=False,
use_resource=True)
output = array_ops.identity(start * v * local_variable, name="output")
with session_lib.Session() as session:
session.run([v.initializer, distractor.initializer,
local_variable.initializer])
path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid()))
simple_save.simple_save(
session,
path,
inputs={"start": start},
outputs={"output": output},
legacy_init_op=local_variable.initializer)
return path
@test_util.run_v2_only
def test_ref_variable_import(self):
saved = self._v1_single_metagraph_saved_model()
imported = load(saved)
fn = imported.signatures["serving_default"]
output_func = convert_to_constants.convert_variables_to_constants_v2(fn)
constant_graph_def = output_func.graph.as_graph_def()
self.assertEqual(0, self._getNumVariables(constant_graph_def))
self.assertFalse(self._hasStatefulPartitionedCallOp(constant_graph_def))
input_data = constant_op.constant(1., shape=[1, 1])
root = tracking.AutoTrackable()
self._testConvertedFunction(root, fn, output_func, input_data)
if __name__ == "__main__":
test.main()

View File

@ -235,6 +235,11 @@ class SpaceToDepthTest(test.TestCase):
def spaceToDepthUsingTranspose(self, tensor, block_size, data_format):
block_size_sq = block_size * block_size
dtype = tensor.dtype
if dtype == dtypes.qint8:
tensor = array_ops.bitcast(tensor, dtypes.int8)
if data_format == "NHWC":
b, ih, iw, ic = tensor.shape.as_list()
assert ih % block_size == 0, (ih, block_size)
@ -253,56 +258,87 @@ class SpaceToDepthTest(test.TestCase):
[b, ic, oh, block_size, ow, block_size])
tensor = array_ops.transpose(tensor, [0, 3, 5, 1, 2, 4])
tensor = array_ops.reshape(tensor, [b, oc, oh, ow])
if dtype == dtypes.qint8:
tensor = array_ops.bitcast(tensor, dtype)
return tensor
def compareToTranspose(self, batch_size, out_height, out_width, in_channels,
block_size, data_format, use_gpu):
block_size, data_format, data_type, use_gpu):
in_height = out_height * block_size
in_width = out_width * block_size
nhwc_input_shape = [batch_size, in_height, in_width, in_channels]
nchw_input_shape = [batch_size, in_channels, in_height, in_width]
total_size = np.prod(nhwc_input_shape)
if data_format == "NCHW_VECT_C":
# Initialize the input tensor with qint8 values that circle -127..127.
x = [((f + 128) % 255) - 127 for f in range(total_size)]
t = constant_op.constant(x, shape=nhwc_input_shape, dtype=dtypes.float32)
expected = self.spaceToDepthUsingTranspose(t, block_size, "NHWC")
t = test_util.NHWCToNCHW_VECT_C(t)
t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8)
t = array_ops.space_to_depth(t, block_size, data_format="NCHW_VECT_C")
t = gen_array_ops.dequantize(t, -128, 127)
actual = test_util.NCHW_VECT_CToNHWC(t)
else:
# Initialize the input tensor with ascending whole numbers as floats.
x = [f * 1.0 for f in range(total_size)]
shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape
t = constant_op.constant(x, shape=shape, dtype=dtypes.float32)
expected = self.spaceToDepthUsingTranspose(t, block_size, data_format)
actual = array_ops.space_to_depth(t, block_size, data_format=data_format)
# Construct the input tensor in data_type and NHWC.
# force_cpu is needed because quantize_v2 runs on only CPU.
with test_util.force_cpu():
if data_type == dtypes.qint8:
# Initialize the input tensor with qint8 values that circle -127..127.
x = [((f + 128) % 255) - 127 for f in range(total_size)]
t = constant_op.constant(
x, shape=nhwc_input_shape, dtype=dtypes.float32)
t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8)
else:
assert data_type == dtypes.float32
# Initialize the input tensor with ascending whole numbers as floats.
x = [f * 1.0 for f in range(total_size)]
shape = nchw_input_shape if data_format == "NCHW" else nhwc_input_shape
t = constant_op.constant(x, shape=shape, dtype=dtypes.float32)
with test_util.device(use_gpu):
if data_format == "NCHW_VECT_C":
assert data_type == dtypes.qint8
# Convert to int8, then NHWCToNCHW_VECT_C, and then back to qint8.
actual = array_ops.bitcast(t, dtypes.int8)
actual = test_util.NHWCToNCHW_VECT_C(actual)
actual = array_ops.bitcast(actual, dtypes.qint8)
actual = array_ops.space_to_depth(
actual, block_size, data_format=data_format)
actual = array_ops.bitcast(actual, dtypes.int8)
actual = test_util.NCHW_VECT_CToNHWC(actual)
actual = array_ops.bitcast(actual, dtypes.qint8)
expected = array_ops.bitcast(t, dtypes.int8)
expected = math_ops.cast(expected, dtypes.float32)
expected = self.spaceToDepthUsingTranspose(expected, block_size, "NHWC")
expected = math_ops.cast(expected, dtypes.int8)
expected = array_ops.bitcast(expected, dtypes.qint8)
else:
# Initialize the input tensor with ascending whole numbers as floats.
actual = array_ops.space_to_depth(
t, block_size, data_format=data_format)
expected = self.spaceToDepthUsingTranspose(t, block_size, data_format)
with self.cached_session(use_gpu=use_gpu) as sess:
actual_vals, expected_vals = self.evaluate([actual, expected])
self.assertTrue(np.array_equal(actual_vals, expected_vals))
# TODO(jingyue): figure out why this test failed in eager mode.
@test_util.run_deprecated_v1
def testAgainstTranspose(self):
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", False)
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", False)
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", False)
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.float32, False)
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", dtypes.float32, False)
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", dtypes.float32, False)
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.qint8, False)
self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", dtypes.qint8, False)
self.compareToTranspose(1, 2, 3, 2, 3, "NHWC", dtypes.qint8, False)
if not test.is_gpu_available():
tf_logging.info("skipping gpu tests since gpu not available")
return
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", True)
self.compareToTranspose(3, 2, 3, 2, 2, "NHWC", True)
self.compareToTranspose(3, 2, 3, 1, 2, "NCHW", True)
self.compareToTranspose(3, 2, 3, 2, 3, "NCHW", True)
self.compareToTranspose(5, 7, 11, 3, 2, "NCHW", True)
self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.float32, True)
self.compareToTranspose(3, 2, 3, 2, 2, "NHWC", dtypes.float32, True)
self.compareToTranspose(3, 2, 3, 1, 2, "NCHW", dtypes.float32, True)
self.compareToTranspose(3, 2, 3, 2, 3, "NCHW", dtypes.float32, True)
self.compareToTranspose(5, 7, 11, 3, 2, "NCHW", dtypes.float32, True)
self.compareToTranspose(3, 2, 3, 4, 2, "NCHW_VECT_C", True)
self.compareToTranspose(3, 2, 3, 8, 3, "NCHW_VECT_C", True)
self.compareToTranspose(5, 7, 11, 12, 2, "NCHW_VECT_C", True)
self.compareToTranspose(3, 2, 3, 4, 2, "NCHW_VECT_C", dtypes.qint8, True)
self.compareToTranspose(3, 2, 3, 8, 3, "NCHW_VECT_C", dtypes.qint8, True)
self.compareToTranspose(5, 7, 11, 12, 2, "NCHW_VECT_C", dtypes.qint8, True)
class SpaceToDepthGradientTest(test.TestCase):

View File

@ -1971,6 +1971,7 @@ def tf_py_wrap_cc(
# //third_party/tensorflow/tools/pip_package:win_pip_package_marker for specific reasons.
# 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test.
def py_test(deps = [], data = [], kernels = [], **kwargs):
# Python version placeholder
native.py_test(
# TODO(jlebar): Ideally we'd use tcmalloc here.,
deps = select({
@ -1999,6 +2000,8 @@ def py_binary(name, deps = [], **kwargs):
name = name + "_deps",
deps = deps,
)
# Python version placeholder
native.py_binary(
name = name,
deps = select({

View File

@ -34,10 +34,6 @@ tf_class {
name: "is_alive"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "join"
argspec: "args=[\'self\', \'timeout\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "loop"
argspec: "args=[\'coord\', \'timer_interval_secs\', \'target\', \'args\', \'kwargs\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "

View File

@ -41,6 +41,9 @@ _CORNER_CASES = {
'estimator.NanLossDuringTrainingError': {
'message': {}
},
'train.LooperThread': {
'join': {}
}
}
# Python 2 vs. 3 differences