836 lines
34 KiB
C++
836 lines
34 KiB
C++
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
// Tests that multi-dimensional arrays can be reduced among various
|
|
// user-provided dimensions.
|
|
//
|
|
// Note that comments for these tests are white-box in that they talk about the
|
|
// default data layout.
|
|
//
|
|
// The test space for reductions is the cartesian product of:
|
|
//
|
|
// <possible ranks> x
|
|
// <possible layouts for chosen rank> x
|
|
// <possible subsets of dimensions in chosen rank>
|
|
|
|
#include <stdlib.h>
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "tensorflow/compiler/xla/array2d.h"
|
|
#include "tensorflow/compiler/xla/array4d.h"
|
|
#include "tensorflow/compiler/xla/client/computation.h"
|
|
#include "tensorflow/compiler/xla/client/computation_builder.h"
|
|
#include "tensorflow/compiler/xla/client/global_data.h"
|
|
#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
|
|
#include "tensorflow/compiler/xla/client/local_client.h"
|
|
#include "tensorflow/compiler/xla/layout_util.h"
|
|
#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
|
|
#include "tensorflow/compiler/xla/literal_util.h"
|
|
#include "tensorflow/compiler/xla/reference_util.h"
|
|
#include "tensorflow/compiler/xla/shape_util.h"
|
|
#include "tensorflow/compiler/xla/status_macros.h"
|
|
#include "tensorflow/compiler/xla/statusor.h"
|
|
#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
|
|
#include "tensorflow/compiler/xla/tests/literal_test_util.h"
|
|
#include "tensorflow/compiler/xla/tests/test_macros.h"
|
|
#include "tensorflow/compiler/xla/util.h"
|
|
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
|
#include "tensorflow/core/lib/gtl/array_slice.h"
|
|
#include "tensorflow/core/platform/test.h"
|
|
#include "tensorflow/core/platform/types.h"
|
|
|
|
namespace xla {
|
|
namespace {
|
|
|
|
class ReduceTest : public ClientLibraryTestBase {
|
|
protected:
|
|
ReduceTest() {
|
|
// Implementation note: laid out z >> y >> x by default.
|
|
// clang-format off
|
|
literal_2d_ = Literal::CreateR2<float>({
|
|
// x0 x1 x2
|
|
{ 1.f, 2.f, 3.f}, // y0
|
|
{ 4.f, 5.f, 6.f}, // y1
|
|
});
|
|
literal_3d_ = Literal::CreateR3Projected<float>({
|
|
// x0 x1 x2
|
|
{ 1.f, 2.f, 3.f}, // y0
|
|
{ 4.f, 5.f, 6.f}, // y1
|
|
}, 4);
|
|
// clang-format on
|
|
CHECK(ShapeUtil::Equal(
|
|
literal_3d_->shape(),
|
|
ShapeUtil::MakeShape(F32, {/*z=*/4, /*y=*/2, /*x=*/3})))
|
|
<< literal_3d_->shape().ShortDebugString();
|
|
}
|
|
|
|
// Runs an R1 => R0 reduction test with the given number of elements.
|
|
void RunR1ToR0Test(int64 element_count) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {element_count});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
std::vector<float> input_data(element_count);
|
|
for (int64 i = 0; i < element_count; ++i) {
|
|
input_data[i] = rand_r(&seed_) % 3;
|
|
if (rand_r(&seed_) % 2 == 0) {
|
|
input_data[i] *= -1;
|
|
}
|
|
}
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR1(AsSlice(input_data));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
float expected = 0.0;
|
|
for (float item : input_data) {
|
|
expected += item;
|
|
}
|
|
ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.001));
|
|
}
|
|
|
|
void RunR1ToR0PredTest(bool and_reduce,
|
|
tensorflow::gtl::ArraySlice<int> input_data) {
|
|
const int element_count = input_data.size();
|
|
ComputationBuilder builder(client_, TestName());
|
|
const Shape input_shape = ShapeUtil::MakeShape(S32, {element_count});
|
|
auto input_par = builder.Parameter(0, input_shape, "input");
|
|
auto pred_values =
|
|
builder.Eq(input_par, builder.ConstantR1<int>(element_count, 1));
|
|
ComputationDataHandle init_value;
|
|
Computation reduce;
|
|
if (and_reduce) {
|
|
init_value = builder.ConstantR0<bool>(true);
|
|
reduce = CreateScalarLogicalAndComputation(&builder);
|
|
} else {
|
|
init_value = builder.ConstantR0<bool>(false);
|
|
reduce = CreateScalarLogicalOrComputation(&builder);
|
|
}
|
|
builder.Reduce(pred_values, init_value, reduce,
|
|
/*dimensions_to_reduce=*/{0});
|
|
|
|
std::unique_ptr<Literal> input_literal = Literal::CreateR1(input_data);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
bool expected = and_reduce;
|
|
for (bool item : input_data) {
|
|
if (and_reduce) {
|
|
expected = expected && item;
|
|
} else {
|
|
expected = expected || item;
|
|
}
|
|
}
|
|
ComputeAndCompareR0<bool>(&builder, expected, {input_global_data.get()});
|
|
}
|
|
|
|
// Runs an R2 => R0 reduction test with the given number of (rows, cols).
|
|
void RunR2ToR0Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0, 1});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal->Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
float expected = 0.0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
expected += input_data(rowno, colno);
|
|
}
|
|
}
|
|
ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
// Runs an R2 => R1 reduction test with the given number of (rows, cols).
|
|
void RunR2ToR1Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
builder.Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal->Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += input_data(rowno, colno);
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void ComputeAndCompareGeneric(
|
|
typename std::enable_if<std::is_floating_point<NativeT>::value,
|
|
ComputationBuilder>::type* builder,
|
|
tensorflow::gtl::ArraySlice<NativeT> expected,
|
|
tensorflow::gtl::ArraySlice<GlobalData*> arguments) {
|
|
ComputeAndCompareR1<NativeT>(builder, expected, arguments,
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void ComputeAndCompareGeneric(
|
|
typename std::enable_if<std::is_integral<NativeT>::value,
|
|
ComputationBuilder>::type* builder,
|
|
tensorflow::gtl::ArraySlice<NativeT> expected,
|
|
tensorflow::gtl::ArraySlice<GlobalData*> arguments) {
|
|
ComputeAndCompareR1<NativeT>(builder, expected, arguments);
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void RunVectorizedReduceTestForType(
|
|
const std::function<Computation(ComputationBuilder*)>&
|
|
reduction_function_generator,
|
|
const std::function<NativeT(NativeT, NativeT)>&
|
|
reference_reduction_function,
|
|
const NativeT& initial_value) {
|
|
const int rows = 64, cols = 128;
|
|
const int minor = 1, major = 0;
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation reduction_function = reduction_function_generator(&builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(
|
|
xla::primitive_util::NativeToPrimitiveType<NativeT>(), {rows, cols});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<NativeT>(initial_value);
|
|
builder.Reduce(input, zero, reduction_function,
|
|
/*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<NativeT> input_data(rows, cols);
|
|
input_data.FillUnique(initial_value);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal->Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
// NativeT can be bool, and std::vector<bool> does not convert to
|
|
// ArraySlice.
|
|
std::unique_ptr<NativeT[]> expected(new NativeT[cols]);
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
NativeT column_result = initial_value;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_result = reference_reduction_function(column_result,
|
|
input_data(rowno, colno));
|
|
}
|
|
expected[colno] = column_result;
|
|
}
|
|
|
|
ComputeAndCompareGeneric<NativeT>(
|
|
&builder, tensorflow::gtl::ArraySlice<NativeT>(expected.get(), cols),
|
|
{input_global_data.get()});
|
|
}
|
|
|
|
void RunVectorizedReduceTest(
|
|
const std::function<Computation(PrimitiveType, ComputationBuilder*)>&
|
|
reduction_function_generator_for_type,
|
|
const std::function<float(float, float)>&
|
|
reference_reduction_function_for_floats,
|
|
const std::function<int32(int32, int32)>&
|
|
reference_reduction_function_for_ints,
|
|
const std::function<uint32(uint32, uint32)>&
|
|
reference_reduction_function_for_uints,
|
|
float floating_point_identity, int32 signed_int_identity,
|
|
uint32 unsigned_int_identity) {
|
|
// Float version
|
|
RunVectorizedReduceTestForType<float>(
|
|
[&](ComputationBuilder* builder) {
|
|
return reduction_function_generator_for_type(F32, builder);
|
|
},
|
|
reference_reduction_function_for_floats, floating_point_identity);
|
|
|
|
// Signed int version
|
|
RunVectorizedReduceTestForType<int32>(
|
|
[&](ComputationBuilder* builder) {
|
|
return reduction_function_generator_for_type(S32, builder);
|
|
},
|
|
reference_reduction_function_for_ints, signed_int_identity);
|
|
|
|
// Unsigned int version
|
|
RunVectorizedReduceTestForType<uint32>(
|
|
[&](ComputationBuilder* builder) {
|
|
return reduction_function_generator_for_type(U32, builder);
|
|
},
|
|
reference_reduction_function_for_uints, unsigned_int_identity);
|
|
}
|
|
|
|
std::unique_ptr<Literal> literal_2d_;
|
|
std::unique_ptr<Literal> literal_3d_;
|
|
uint32 seed_ = 0xdeadbeef;
|
|
};
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR1_0_F32_To_R0) { RunR1ToR0Test(0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1_F32_To_R0) { RunR1ToR0Test(1); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_2_F32_To_R0) { RunR1ToR0Test(2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16_F32_To_R0) { RunR1ToR0Test(16); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_128_F32_To_R0) { RunR1ToR0Test(128); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_129_F32_To_R0) { RunR1ToR0Test(129); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_240_F32_To_R0) { RunR1ToR0Test(240); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_256_F32_To_R0) { RunR1ToR0Test(256); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1024_F32_To_R0) { RunR1ToR0Test(1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_2048_F32_To_R0) { RunR1ToR0Test(2048); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16K_F32_To_R0) { RunR1ToR0Test(16 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16KP1_F32_To_R0) {
|
|
RunR1ToR0Test(16 * 1024 + 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR1_64K_F32_To_R0) { RunR1ToR0Test(64 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1M_F32_To_R0) { RunR1ToR0Test(1024 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16M_F32_To_R0) { RunR1ToR0Test(4096 * 4096); }
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R0) { RunR2ToR0Test(0, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R0) { RunR2ToR0Test(0, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R0) { RunR2ToR0Test(1, 1); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R0) { RunR2ToR0Test(2, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R0) { RunR2ToR0Test(2, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R0) { RunR2ToR0Test(8, 8); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R0) { RunR2ToR0Test(9, 9); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R0) { RunR2ToR0Test(50, 111); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R0) { RunR2ToR0Test(111, 50); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R0) {
|
|
RunR2ToR0Test(111, 50, 0, 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R0) { RunR2ToR0Test(1024, 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R0) { RunR2ToR0Test(1000, 1500); }
|
|
|
|
// Disabled due to b/33245142. Failed on 2016-11-30.
|
|
// XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R1) { RunR2ToR1Test(0, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R1) { RunR2ToR1Test(0, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R1) { RunR2ToR1Test(1, 1); }
|
|
// Disabled due to b/33245142. Failed on 2016-11-30.
|
|
// XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R1) { RunR2ToR1Test(2, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R1) { RunR2ToR1Test(2, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R1) { RunR2ToR1Test(8, 8); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R1) { RunR2ToR1Test(9, 9); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R1) { RunR2ToR1Test(50, 111); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R1) { RunR2ToR1Test(111, 50); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R1) {
|
|
RunR2ToR1Test(111, 50, 0, 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R1) { RunR2ToR1Test(1024, 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R1) { RunR2ToR1Test(1000, 1500); }
|
|
|
|
// TODO(b/34969189): Invalid CAS generated on GPU.
|
|
XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceAllOnesR1_10_Pred)) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count, 1);
|
|
RunR1ToR0PredTest(/*and_reduce=*/true, input);
|
|
}
|
|
|
|
// TODO(b/34969189): Invalid CAS generated on GPU.
|
|
XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(AndReduceOnesAndZerosR1_10_Pred)) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count);
|
|
for (int i = 0; i < element_count; ++i) {
|
|
input[i] = i % 2;
|
|
}
|
|
RunR1ToR0PredTest(/*and_reduce=*/true, input);
|
|
}
|
|
|
|
// TODO(b/34969189): Invalid CAS generated on GPU.
|
|
XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceAllOnesR1_10_Pred)) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count, 1);
|
|
RunR1ToR0PredTest(/*and_reduce=*/false, input);
|
|
}
|
|
|
|
// TODO(b/34969189): Invalid CAS generated on GPU.
|
|
XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OrReduceOnesAndZerosR1_10_Pred)) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count);
|
|
for (int i = 0; i < element_count; ++i) {
|
|
input[i] = i % 2;
|
|
}
|
|
RunR1ToR0PredTest(/*and_reduce=*/false, input);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceElementwiseR2_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
auto log_ = builder.Log(input);
|
|
builder.Reduce(log_, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR2FromArray2D(input_data);
|
|
input_literal = input_literal->Relayout(LayoutUtil::MakeLayout({0, 1}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += log(input_data(rowno, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, TransposeAndReduceElementwiseR2_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
auto log_ = builder.Log(input);
|
|
auto transpose = builder.Transpose(log_, {1, 0});
|
|
builder.Reduce(transpose, zero, add_f32, /*dimensions_to_reduce=*/{1});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR2FromArray2D(input_data);
|
|
input_literal = input_literal->Relayout(LayoutUtil::MakeLayout({0, 1}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += log(input_data(rowno, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, Reshape_111x2x25Reduce_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, 2, cols / 2});
|
|
auto input = builder.Parameter(0, input_shape, "input");
|
|
auto zero = builder.ConstantR0<float>(0.0);
|
|
auto log_ = builder.Log(input);
|
|
auto reshape = builder.Reshape(log_, {rows, cols});
|
|
builder.Reduce(reshape, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array3D<float> input_data(rows, 2, cols / 2);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
std::unique_ptr<Literal> input_literal =
|
|
Literal::CreateR3FromArray3D(input_data);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 major = 0; major < 2; ++major) {
|
|
for (int64 colno = 0; colno < cols / 2; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += log(input_data(rowno, major, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
struct BoundsLayout {
|
|
std::vector<int64> bounds;
|
|
std::vector<int64> layout;
|
|
std::vector<int64> reduce_dims;
|
|
};
|
|
|
|
void PrintTo(const BoundsLayout& spec, std::ostream* os) {
|
|
*os << tensorflow::strings::Printf(
|
|
"R%luToR%lu%s_%s_Reduce%s", spec.bounds.size(),
|
|
spec.bounds.size() - spec.reduce_dims.size(),
|
|
tensorflow::str_util::Join(spec.bounds, "x").c_str(),
|
|
tensorflow::str_util::Join(spec.layout, "").c_str(),
|
|
tensorflow::str_util::Join(spec.reduce_dims, "").c_str());
|
|
}
|
|
|
|
// Add-reduces a broadcasted scalar matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, AddReduce2DScalarToR0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
auto scalar = builder.ConstantR0<float>(42.0);
|
|
auto broacasted = builder.Broadcast(scalar, {500, 500});
|
|
builder.Reduce(broacasted, builder.ConstantR0<float>(0.0f), add, {0, 1});
|
|
|
|
float expected = 42.0f * static_cast<float>(500 * 500);
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Max-reduces a broadcasted scalar matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MaxReduce2DScalarToR0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto max = CreateScalarMaxComputation(F32, &builder);
|
|
auto scalar = builder.ConstantR0<float>(42.0);
|
|
auto broacasted = builder.Broadcast(scalar, {500, 500});
|
|
builder.Reduce(broacasted, builder.ConstantR0<float>(0.0f), max, {0, 1});
|
|
|
|
float expected = 42.0f;
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Max-reduces a matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MaxReduce2DToR0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto max = CreateScalarMaxComputation(F32, &builder);
|
|
Array2D<float> input(300, 250);
|
|
input.FillRandom(214.0f);
|
|
auto input_literal = Literal::CreateR2FromArray2D(input);
|
|
builder.Reduce(builder.ConstantLiteral(*input_literal),
|
|
builder.ConstantR0<float>(FLT_MIN), max, {0, 1});
|
|
auto input_max = FLT_MIN;
|
|
input.Each(
|
|
[&](int64, int64, float* v) { input_max = std::max(input_max, *v); });
|
|
ComputeAndCompareR0<float>(&builder, input_max, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Min-reduces matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MinReduce2DToR0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto min = CreateScalarMinComputation(F32, &builder);
|
|
Array2D<float> input(150, 130);
|
|
input.FillRandom(214.0f);
|
|
auto input_literal = Literal::CreateR2FromArray2D(input);
|
|
builder.Reduce(builder.ConstantLiteral(*input_literal),
|
|
builder.ConstantR0<float>(FLT_MAX), min, {0, 1});
|
|
|
|
auto input_min = FLT_MAX;
|
|
input.Each(
|
|
[&](int64, int64, float* v) { input_min = std::min(input_min, *v); });
|
|
ComputeAndCompareR0<float>(&builder, input_min, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, UnsignedInt_MinReduce) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Array2D<uint32> input({{1}, {2}});
|
|
auto min = CreateScalarMinComputation(U32, &builder);
|
|
auto input_literal = Literal::CreateR2FromArray2D(input);
|
|
auto initial_value =
|
|
builder.ConstantR0<uint32>(std::numeric_limits<uint32>::max());
|
|
|
|
builder.Reduce(builder.ConstantLiteral(*input_literal), initial_value, min,
|
|
{0, 1});
|
|
ComputeAndCompareR0<uint32>(&builder, 1, {});
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, UnsignedInt_MaxReduce) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Array2D<uint32> input({{1}, {2}});
|
|
auto max = CreateScalarMaxComputation(U32, &builder);
|
|
auto input_literal = Literal::CreateR2FromArray2D(input);
|
|
auto initial_value =
|
|
builder.ConstantR0<uint32>(std::numeric_limits<uint32>::min());
|
|
|
|
builder.Reduce(builder.ConstantLiteral(*input_literal), initial_value, max,
|
|
{0, 1});
|
|
ComputeAndCompareR0<uint32>(&builder, 2, {});
|
|
}
|
|
|
|
// Reduces a matrix among dimension 1.
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmong1) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1});
|
|
|
|
std::vector<float> expected = {6.f, 15.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmong0and1) {
|
|
// Reduce a matrix among dimensions 0 and 1 (sum it up to a scalar).
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1});
|
|
|
|
ComputeAndCompareR0<float>(&builder, 21.0f, {}, ErrorSpec(0.0001, 1e-4));
|
|
}
|
|
|
|
// Tests 2D matrix ReduceToRow operation.
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmongY) {
|
|
ComputationBuilder builder(client_, "reduce_among_y");
|
|
auto m = builder.ConstantLiteral(*literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0});
|
|
|
|
std::vector<float> expected = {5.f, 7.f, 9.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDims_1_2) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1, 2});
|
|
|
|
std::vector<float> expected = {21.f, 21.f, 21.f, 21.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDims_0_1) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1});
|
|
|
|
std::vector<float> expected = {20.f, 28.f, 36.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3ToR0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0, 1, 2});
|
|
|
|
float expected = 21.0f * 4.0;
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim0) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {0});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{4.f, 8.f, 12.f},
|
|
{16.f, 20.f, 24.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim1) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {1});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim2) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
auto m = builder.ConstantLiteral(*literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
builder.Reduce(m, builder.ConstantR0<float>(0.0f), add, {2});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Add) {
|
|
RunVectorizedReduceTest(CreateScalarAddComputation,
|
|
[](float a, float b) { return a + b; },
|
|
[](int32 a, int32 b) {
|
|
return static_cast<int32>(static_cast<uint32>(a) +
|
|
static_cast<uint32>(b));
|
|
},
|
|
[](uint32 a, uint32 b) { return a + b; }, 0.0, 0, 0);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Multiply) {
|
|
RunVectorizedReduceTest(CreateScalarMultiplyComputation,
|
|
[](float a, float b) { return a * b; },
|
|
[](int32 a, int32 b) {
|
|
return static_cast<int32>(static_cast<uint32>(a) *
|
|
static_cast<uint32>(b));
|
|
},
|
|
[](uint32 a, uint32 b) { return a * b; }, 1.0, 1, 1);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Max) {
|
|
RunVectorizedReduceTest(CreateScalarMaxComputation,
|
|
[](float a, float b) { return std::max(a, b); },
|
|
[](int32 a, int32 b) { return std::max(a, b); },
|
|
[](uint32 a, uint32 b) { return std::max(a, b); },
|
|
std::numeric_limits<float>::min(),
|
|
std::numeric_limits<int32>::min(),
|
|
std::numeric_limits<uint32>::min());
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Min) {
|
|
RunVectorizedReduceTest(CreateScalarMinComputation,
|
|
[](float a, float b) { return std::min(a, b); },
|
|
[](int32 a, int32 b) { return std::min(a, b); },
|
|
[](uint32 a, uint32 b) { return std::min(a, b); },
|
|
std::numeric_limits<float>::max(),
|
|
std::numeric_limits<int32>::max(),
|
|
std::numeric_limits<uint32>::max());
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_LogicalAnd) {
|
|
RunVectorizedReduceTestForType<bool>(CreateScalarLogicalAndComputation,
|
|
[](bool a, bool b) { return a && b; },
|
|
true);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_LogicalOr) {
|
|
RunVectorizedReduceTestForType<bool>(CreateScalarLogicalOrComputation,
|
|
[](bool a, bool b) { return a || b; },
|
|
false);
|
|
}
|
|
|
|
class ReduceR3ToR2Test : public ReduceTest,
|
|
public ::testing::WithParamInterface<BoundsLayout> {};
|
|
|
|
XLA_TEST_P(ReduceR3ToR2Test, ReduceR3ToR2) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
const auto& bounds = GetParam().bounds;
|
|
Array3D<float> input_array(bounds[0], bounds[1], bounds[2]);
|
|
input_array.FillRandom(3.14f, 0.05);
|
|
|
|
auto input_literal = Literal::CreateR3FromArray3D(input_array);
|
|
input_literal =
|
|
input_literal->Relayout(LayoutUtil::MakeLayout(GetParam().layout));
|
|
std::unique_ptr<GlobalData> input_data =
|
|
client_->TransferToServer(*input_literal).ConsumeValueOrDie();
|
|
|
|
auto input_activations =
|
|
builder.Parameter(0, input_literal->shape(), "input");
|
|
Computation add = CreateScalarAddComputation(F32, &builder);
|
|
auto sum = builder.Reduce(input_activations, builder.ConstantR0<float>(0.0f),
|
|
add, GetParam().reduce_dims);
|
|
|
|
auto expected =
|
|
ReferenceUtil::Reduce3DTo2D(input_array, 0.0f, GetParam().reduce_dims,
|
|
[](float a, float b) { return a + b; });
|
|
|
|
ComputeAndCompareR2<float>(&builder, *expected, {input_data.get()},
|
|
ErrorSpec(1e-3, 1e-3));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
ReduceR3ToR2Test_Instantiation, ReduceR3ToR2Test,
|
|
// Specifies (shape, layout, reduction dimensions).
|
|
::testing::Values(BoundsLayout{{4, 8, 128}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{4, 8, 128}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{4, 8, 128}, {2, 1, 0}, {2}},
|
|
// These should be simplified into a reshape.
|
|
BoundsLayout{{1, 21, 43}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {0}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {1}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {2}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {0}}));
|
|
|
|
// TODO(b/64093391) Disabled on GPU due to an assertion failure when running
|
|
// IrEmitterUnnested::EmitInitializer() for the Reduce operator. Failed on
|
|
// 2017-07-26.
|
|
XLA_TEST_F(ReduceTest, DISABLED_ON_GPU(OperationOnConstantAsInitValue)) {
|
|
ComputationBuilder builder(client_, TestName());
|
|
Computation max_f32 = CreateScalarMaxComputation(F32, &builder);
|
|
|
|
auto a = builder.ConstantR0<float>(2.0f);
|
|
auto a2 = builder.Abs(a);
|
|
|
|
std::unique_ptr<Literal> b_literal = Literal::CreateR1<float>({1.0f, 4.0f});
|
|
std::unique_ptr<GlobalData> b_data =
|
|
client_->TransferToServer(*b_literal).ConsumeValueOrDie();
|
|
auto b = builder.Parameter(0, b_literal->shape(), "b");
|
|
auto max = builder.Reduce(b, a2, max_f32, {0});
|
|
|
|
ComputeAndCompareR0<float>(&builder, 4.0f, {b_data.get()});
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace xla
|
|
|
|
int main(int argc, char** argv) {
|
|
std::vector<tensorflow::Flag> flag_list;
|
|
xla::legacy_flags::AppendDebugOptionsFlags(&flag_list);
|
|
xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
|
|
const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
|
|
if (!parse_result) {
|
|
LOG(ERROR) << "\n" << usage;
|
|
return 2;
|
|
}
|
|
testing::InitGoogleTest(&argc, argv);
|
|
if (argc > 1) {
|
|
LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
|
|
return 2;
|
|
}
|
|
return RUN_ALL_TESTS();
|
|
}
|