1196 lines
45 KiB
C++
1196 lines
45 KiB
C++
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
// Tests that multi-dimensional arrays can be reduced among various
|
|
// user-provided dimensions.
|
|
//
|
|
// Note that comments for these tests are white-box in that they talk about the
|
|
// default data layout.
|
|
//
|
|
// The test space for reductions is the cartesian product of:
|
|
//
|
|
// <possible ranks> x
|
|
// <possible layouts for chosen rank> x
|
|
// <possible subsets of dimensions in chosen rank>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <memory>
|
|
#include <random>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "absl/algorithm/container.h"
|
|
#include "absl/strings/str_format.h"
|
|
#include "absl/strings/str_join.h"
|
|
#include "absl/types/span.h"
|
|
#include "tensorflow/compiler/xla/array2d.h"
|
|
#include "tensorflow/compiler/xla/array4d.h"
|
|
#include "tensorflow/compiler/xla/client/global_data.h"
|
|
#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
|
|
#include "tensorflow/compiler/xla/client/local_client.h"
|
|
#include "tensorflow/compiler/xla/client/xla_builder.h"
|
|
#include "tensorflow/compiler/xla/client/xla_computation.h"
|
|
#include "tensorflow/compiler/xla/layout_util.h"
|
|
#include "tensorflow/compiler/xla/literal_util.h"
|
|
#include "tensorflow/compiler/xla/reference_util.h"
|
|
#include "tensorflow/compiler/xla/shape_util.h"
|
|
#include "tensorflow/compiler/xla/status_macros.h"
|
|
#include "tensorflow/compiler/xla/statusor.h"
|
|
#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
|
|
#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
|
|
#include "tensorflow/compiler/xla/tests/literal_test_util.h"
|
|
#include "tensorflow/compiler/xla/tests/test_macros.h"
|
|
#include "tensorflow/compiler/xla/util.h"
|
|
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
|
#include "tensorflow/core/lib/core/status_test_util.h"
|
|
#include "tensorflow/core/platform/test.h"
|
|
#include "tensorflow/core/platform/types.h"
|
|
|
|
namespace xla {
|
|
namespace {
|
|
|
|
using FuncGeneratorForType = XlaComputation (*)(PrimitiveType, XlaBuilder*);
|
|
|
|
using FuncGenerator = XlaComputation (*)(XlaBuilder*);
|
|
|
|
class ReduceTest : public ClientLibraryTestBase {
|
|
protected:
|
|
ReduceTest() {
|
|
// Implementation note: laid out z >> y >> x by default.
|
|
// clang-format off
|
|
literal_2d_ = LiteralUtil::CreateR2<float>({
|
|
// x0 x1 x2
|
|
{ 1.f, 2.f, 3.f}, // y0
|
|
{ 4.f, 5.f, 6.f}, // y1
|
|
});
|
|
literal_3d_ = LiteralUtil::CreateR3Projected<float>({
|
|
// x0 x1 x2
|
|
{ 1.f, 2.f, 3.f}, // y0
|
|
{ 4.f, 5.f, 6.f}, // y1
|
|
}, 4);
|
|
// clang-format on
|
|
CHECK(ShapeUtil::Equal(
|
|
literal_3d_.shape(),
|
|
ShapeUtil::MakeShape(F32, {/*z=*/4, /*y=*/2, /*x=*/3})))
|
|
<< literal_3d_.shape().ShortDebugString();
|
|
}
|
|
|
|
// Runs an R1 => R0 reduction test with the given number of elements.
|
|
void RunR1ToR0Test(int64 element_count) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {element_count});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
std::minstd_rand rng(seed_);
|
|
|
|
std::vector<float> input_data(element_count);
|
|
for (int64 i = 0; i < element_count; ++i) {
|
|
input_data[i] = rng() % 3;
|
|
if (rng() % 2 == 0) {
|
|
input_data[i] *= -1;
|
|
}
|
|
}
|
|
Literal input_literal = LiteralUtil::CreateR1(AsSlice(input_data));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
float expected = absl::c_accumulate(input_data, 0.0f);
|
|
ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.001));
|
|
}
|
|
|
|
void RunR1ToR0PredTest(bool and_reduce, absl::Span<const int> input_data) {
|
|
const int element_count = input_data.size();
|
|
XlaBuilder builder(TestName());
|
|
const Shape input_shape = ShapeUtil::MakeShape(S32, {element_count});
|
|
auto input_par = Parameter(&builder, 0, input_shape, "input");
|
|
auto pred_values =
|
|
Eq(input_par, ConstantR1<int>(&builder, element_count, 1));
|
|
XlaOp init_value;
|
|
XlaComputation reduce;
|
|
if (and_reduce) {
|
|
init_value = ConstantR0<bool>(&builder, true);
|
|
reduce = CreateScalarAndComputation(PRED, &builder);
|
|
} else {
|
|
init_value = ConstantR0<bool>(&builder, false);
|
|
reduce = CreateScalarOrComputation(PRED, &builder);
|
|
}
|
|
Reduce(pred_values, init_value, reduce,
|
|
/*dimensions_to_reduce=*/{0});
|
|
|
|
Literal input_literal = LiteralUtil::CreateR1(input_data);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
bool expected = and_reduce;
|
|
for (bool item : input_data) {
|
|
if (and_reduce) {
|
|
expected = expected && item;
|
|
} else {
|
|
expected = expected || item;
|
|
}
|
|
}
|
|
ComputeAndCompareR0<bool>(&builder, expected, {input_global_data.get()});
|
|
}
|
|
|
|
// Reduce predicate tensor with dimension rows * cols to dimension cols, to
|
|
// test the implementation of atomic operations on misaligned small data
|
|
// types.
|
|
template <int64 cols>
|
|
void RunR2ToR1PredTest(bool and_reduce, int64 rows, int64 minor = 1,
|
|
int64 major = 0) {
|
|
XlaBuilder builder(TestName());
|
|
const Shape input_shape = ShapeUtil::MakeShape(U8, {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto input_pred = Eq(input, ConstantR0<uint8>(&builder, 1));
|
|
|
|
XlaOp init_value;
|
|
XlaComputation reduce_op;
|
|
if (and_reduce) {
|
|
init_value = ConstantR0<bool>(&builder, true);
|
|
reduce_op = CreateScalarAndComputation(PRED, &builder);
|
|
} else {
|
|
init_value = ConstantR0<bool>(&builder, false);
|
|
reduce_op = CreateScalarOrComputation(PRED, &builder);
|
|
}
|
|
|
|
Reduce(input_pred, init_value, reduce_op,
|
|
/*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<uint8> input_data(rows, cols);
|
|
input_data.FillRandom(0, 1);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal.Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
std::array<bool, cols> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
bool column_sum = and_reduce ? true : false;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
if (and_reduce) {
|
|
column_sum = column_sum && input_data(rowno, colno);
|
|
} else {
|
|
column_sum = column_sum || input_data(rowno, colno);
|
|
}
|
|
}
|
|
expected[colno] = column_sum;
|
|
}
|
|
|
|
ComputeAndCompareR1<bool>(&builder, expected, {input_global_data.get()});
|
|
}
|
|
|
|
// Runs an R2 => R0 reduction test with the given number of (rows, cols).
|
|
void RunR2ToR0Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0, 1});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal.Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
float expected = 0.0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
expected += input_data(rowno, colno);
|
|
}
|
|
}
|
|
ComputeAndCompareR0<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
// Runs an R2 => R1 reduction test with the given number of (rows, cols).
|
|
void RunR2ToR1Test(int64 rows, int64 cols, int64 minor = 1, int64 major = 0) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal.Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += input_data(rowno, colno);
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void ComputeAndCompareGeneric(
|
|
typename std::enable_if<std::is_floating_point<NativeT>::value,
|
|
XlaBuilder>::type* builder,
|
|
absl::Span<const NativeT> expected,
|
|
absl::Span<GlobalData* const> arguments) {
|
|
ComputeAndCompareR1<NativeT>(builder, expected, arguments,
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void ComputeAndCompareGeneric(
|
|
typename std::enable_if<std::is_integral<NativeT>::value,
|
|
XlaBuilder>::type* builder,
|
|
absl::Span<const NativeT> expected,
|
|
absl::Span<GlobalData* const> arguments) {
|
|
ComputeAndCompareR1<NativeT>(builder, expected, arguments);
|
|
}
|
|
|
|
template <typename NativeT>
|
|
void RunVectorizedReduceTestForType(
|
|
const std::function<XlaComputation(XlaBuilder*)>&
|
|
reduction_function_generator,
|
|
const std::function<NativeT(NativeT, NativeT)>&
|
|
reference_reduction_function,
|
|
const NativeT& initial_value) {
|
|
const int rows = 64, cols = 128;
|
|
const int minor = 1, major = 0;
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation reduction_function = reduction_function_generator(&builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(
|
|
xla::primitive_util::NativeToPrimitiveType<NativeT>(), {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<NativeT>(&builder, initial_value);
|
|
Reduce(input, zero, reduction_function,
|
|
/*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<NativeT> input_data(rows, cols);
|
|
input_data.FillUnique(initial_value);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal =
|
|
input_literal.Relayout(LayoutUtil::MakeLayout({minor, major}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
// NativeT can be bool, and std::vector<bool> does not convert to
|
|
// Span.
|
|
std::unique_ptr<NativeT[]> expected(new NativeT[cols]);
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
NativeT column_result = initial_value;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_result = reference_reduction_function(column_result,
|
|
input_data(rowno, colno));
|
|
}
|
|
expected[colno] = column_result;
|
|
}
|
|
|
|
ComputeAndCompareGeneric<NativeT>(
|
|
&builder, absl::Span<const NativeT>(expected.get(), cols),
|
|
{input_global_data.get()});
|
|
}
|
|
|
|
void RunVectorizedReduceTest(
|
|
const std::function<XlaComputation(PrimitiveType, XlaBuilder*)>&
|
|
reduction_function_generator_for_type,
|
|
const std::function<float(float, float)>&
|
|
reference_reduction_function_for_floats,
|
|
const std::function<int32(int32, int32)>&
|
|
reference_reduction_function_for_ints,
|
|
const std::function<uint32(uint32, uint32)>&
|
|
reference_reduction_function_for_uints,
|
|
float floating_point_identity, int32 signed_int_identity,
|
|
uint32 unsigned_int_identity) {
|
|
// Float version
|
|
RunVectorizedReduceTestForType<float>(
|
|
[&](XlaBuilder* builder) {
|
|
return reduction_function_generator_for_type(F32, builder);
|
|
},
|
|
reference_reduction_function_for_floats, floating_point_identity);
|
|
|
|
// Signed int version
|
|
RunVectorizedReduceTestForType<int32>(
|
|
[&](XlaBuilder* builder) {
|
|
return reduction_function_generator_for_type(S32, builder);
|
|
},
|
|
reference_reduction_function_for_ints, signed_int_identity);
|
|
|
|
// Unsigned int version
|
|
RunVectorizedReduceTestForType<uint32>(
|
|
[&](XlaBuilder* builder) {
|
|
return reduction_function_generator_for_type(U32, builder);
|
|
},
|
|
reference_reduction_function_for_uints, unsigned_int_identity);
|
|
}
|
|
|
|
Literal literal_2d_;
|
|
Literal literal_3d_;
|
|
uint32 seed_ = 0xdeadbeef;
|
|
};
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR1_0_F32_To_R0) { RunR1ToR0Test(0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1_F32_To_R0) { RunR1ToR0Test(1); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_2_F32_To_R0) { RunR1ToR0Test(2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16_F32_To_R0) { RunR1ToR0Test(16); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_128_F32_To_R0) { RunR1ToR0Test(128); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_129_F32_To_R0) { RunR1ToR0Test(129); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_240_F32_To_R0) { RunR1ToR0Test(240); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_256_F32_To_R0) { RunR1ToR0Test(256); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1024_F32_To_R0) { RunR1ToR0Test(1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_2048_F32_To_R0) { RunR1ToR0Test(2048); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16K_F32_To_R0) { RunR1ToR0Test(16 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16KP1_F32_To_R0) {
|
|
RunR1ToR0Test(16 * 1024 + 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR1_64K_F32_To_R0) { RunR1ToR0Test(64 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_1M_F32_To_R0) { RunR1ToR0Test(1024 * 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR1_16M_F32_To_R0) { RunR1ToR0Test(4096 * 4096); }
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R0) { RunR2ToR0Test(0, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R0) { RunR2ToR0Test(0, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R0) { RunR2ToR0Test(1, 1); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R0) { RunR2ToR0Test(2, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R0) { RunR2ToR0Test(2, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R0) { RunR2ToR0Test(8, 8); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R0) { RunR2ToR0Test(9, 9); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R0) { RunR2ToR0Test(50, 111); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R0) { RunR2ToR0Test(111, 50); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R0) {
|
|
RunR2ToR0Test(111, 50, 0, 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R0) { RunR2ToR0Test(1024, 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R0) { RunR2ToR0Test(1000, 1500); }
|
|
|
|
// Disabled due to b/33245142. Failed on 2016-11-30.
|
|
// XLA_TEST_F(ReduceTest, ReduceR2_0x0_To_R1) { RunR2ToR1Test(0, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_0x2_To_R1) { RunR2ToR1Test(0, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1x1_To_R1) { RunR2ToR1Test(1, 1); }
|
|
// Disabled due to b/33245142. Failed on 2016-11-30.
|
|
// XLA_TEST_F(ReduceTest, ReduceR2_2x0_To_R1) { RunR2ToR1Test(2, 0); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_2x2_To_R1) { RunR2ToR1Test(2, 2); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_8x8_To_R1) { RunR2ToR1Test(8, 8); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_9x9_To_R1) { RunR2ToR1Test(9, 9); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_50x111_To_R1) { RunR2ToR1Test(50, 111); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_To_R1) { RunR2ToR1Test(111, 50); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_111x50_01_To_R1) {
|
|
RunR2ToR1Test(111, 50, 0, 1);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1024x1024_To_R1) { RunR2ToR1Test(1024, 1024); }
|
|
XLA_TEST_F(ReduceTest, ReduceR2_1000x1500_To_R1) { RunR2ToR1Test(1000, 1500); }
|
|
|
|
XLA_TEST_F(ReduceTest, AndReduceAllOnesR1_10_Pred) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count, 1);
|
|
RunR1ToR0PredTest(/*and_reduce=*/true, input);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, AndReduceOnesAndZerosR1_10_Pred) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count);
|
|
for (int i = 0; i < element_count; ++i) {
|
|
input[i] = i % 2;
|
|
}
|
|
RunR1ToR0PredTest(/*and_reduce=*/true, input);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, OrReduceAllOnesR1_10_Pred) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count, 1);
|
|
RunR1ToR0PredTest(/*and_reduce=*/false, input);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, OrReduceOnesAndZerosR1_10_Pred) {
|
|
constexpr int element_count = 10;
|
|
std::vector<int> input(element_count);
|
|
for (int i = 0; i < element_count; ++i) {
|
|
input[i] = i % 2;
|
|
}
|
|
RunR1ToR0PredTest(/*and_reduce=*/false, input);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceElementwiseR2_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
auto log_ = Log(input);
|
|
Reduce(log_, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal = input_literal.Relayout(LayoutUtil::MakeLayout({0, 1}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += std::log(input_data(rowno, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, TransposeAndReduceElementwiseR2_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, cols});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
auto log_ = Log(input);
|
|
auto transpose = Transpose(log_, {1, 0});
|
|
Reduce(transpose, zero, add_f32, /*dimensions_to_reduce=*/{1});
|
|
|
|
Array2D<float> input_data(rows, cols);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
input_literal = input_literal.Relayout(LayoutUtil::MakeLayout({0, 1}));
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 colno = 0; colno < cols; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += std::log(input_data(rowno, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
// Test that algebraic simplifier does not incorrectly fold a transpose into a
|
|
// reduction operation.
|
|
XLA_TEST_F(ReduceTest, TransposeAndReduceR3_12x111x50_To_R2) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {12, 111, 50});
|
|
XlaOp input = Parameter(&builder, 0, input_shape, "input");
|
|
XlaOp zero = ConstantR0<float>(&builder, 0.0);
|
|
XlaOp transpose = Transpose(input, /*permutation=*/{1, 0, 2});
|
|
Reduce(transpose, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
TF_ASSERT_OK_AND_ASSIGN(Literal input_data, MakeFakeLiteral(input_shape));
|
|
|
|
ComputeAndCompare(&builder, {std::move(input_data)}, ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, Reshape_111x2x25Reduce_111x50_To_R1) {
|
|
const int64 rows = 111, cols = 50;
|
|
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {rows, 2, cols / 2});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
auto log_ = Tanh(input);
|
|
auto reshape = Reshape(log_, {rows, cols});
|
|
Reduce(reshape, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array3D<float> input_data(rows, 2, cols / 2);
|
|
input_data.FillRandom(3.14f, 0.04);
|
|
Literal input_literal = LiteralUtil::CreateR3FromArray3D(input_data);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
std::vector<float> expected;
|
|
for (int64 major = 0; major < 2; ++major) {
|
|
for (int64 colno = 0; colno < cols / 2; ++colno) {
|
|
float column_sum = 0;
|
|
for (int64 rowno = 0; rowno < rows; ++rowno) {
|
|
column_sum += std::tanh(input_data(rowno, major, colno));
|
|
}
|
|
expected.push_back(column_sum);
|
|
}
|
|
}
|
|
ComputeAndCompareR1<float>(&builder, expected, {input_global_data.get()},
|
|
ErrorSpec(0.01, 1e-4));
|
|
}
|
|
|
|
struct BoundsLayout {
|
|
std::vector<int64> bounds;
|
|
std::vector<int64> layout;
|
|
std::vector<int64> reduce_dims;
|
|
};
|
|
|
|
void PrintTo(const BoundsLayout& spec, std::ostream* os) {
|
|
*os << absl::StrFormat("R%uToR%u%s_%s_Reduce%s", spec.bounds.size(),
|
|
spec.bounds.size() - spec.reduce_dims.size(),
|
|
absl::StrJoin(spec.bounds, "x"),
|
|
absl::StrJoin(spec.layout, ""),
|
|
absl::StrJoin(spec.reduce_dims, ""));
|
|
}
|
|
|
|
// Add-reduces a broadcasted scalar matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, AddReduce2DScalarToR0) {
|
|
XlaBuilder builder(TestName());
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
auto scalar = ConstantR0<float>(&builder, 42.0);
|
|
auto broadcasted = Broadcast(scalar, {500, 500});
|
|
Reduce(broadcasted, ConstantR0<float>(&builder, 0.0f), add, {0, 1});
|
|
|
|
float expected = 42.0f * static_cast<float>(500 * 500);
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Max-reduces a broadcasted scalar matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MaxReduce2DScalarToR0) {
|
|
XlaBuilder builder(TestName());
|
|
auto max = CreateScalarMaxComputation(F32, &builder);
|
|
auto scalar = ConstantR0<float>(&builder, 42.0);
|
|
auto broadcasted = Broadcast(scalar, {500, 500});
|
|
Reduce(broadcasted, ConstantR0<float>(&builder, 0.0f), max, {0, 1});
|
|
|
|
float expected = 42.0f;
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Max-reduces a matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MaxReduce2DToR0) {
|
|
XlaBuilder builder(TestName());
|
|
auto max = CreateScalarMaxComputation(F32, &builder);
|
|
Array2D<float> input(300, 250);
|
|
input.FillRandom(214.0f);
|
|
auto input_literal = LiteralUtil::CreateR2FromArray2D(input);
|
|
Reduce(ConstantLiteral(&builder, input_literal),
|
|
ConstantR0<float>(&builder, FLT_MIN), max, {0, 1});
|
|
auto input_max = FLT_MIN;
|
|
input.Each(
|
|
[&](int64, int64, float* v) { input_max = std::max(input_max, *v); });
|
|
ComputeAndCompareR0<float>(&builder, input_max, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
// Min-reduces matrix among dimension 1 and 0.
|
|
XLA_TEST_F(ReduceTest, MinReduce2DToR0) {
|
|
XlaBuilder builder(TestName());
|
|
auto min = CreateScalarMinComputation(F32, &builder);
|
|
Array2D<float> input(150, 130);
|
|
input.FillRandom(214.0f);
|
|
auto input_literal = LiteralUtil::CreateR2FromArray2D(input);
|
|
Reduce(ConstantLiteral(&builder, input_literal),
|
|
ConstantR0<float>(&builder, FLT_MAX), min, {0, 1});
|
|
|
|
auto input_min = FLT_MAX;
|
|
input.Each(
|
|
[&](int64, int64, float* v) { input_min = std::min(input_min, *v); });
|
|
ComputeAndCompareR0<float>(&builder, input_min, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, UnsignedInt_MinReduce) {
|
|
XlaBuilder builder(TestName());
|
|
Array2D<uint32> input({{1}, {2}});
|
|
auto min = CreateScalarMinComputation(U32, &builder);
|
|
auto input_literal = LiteralUtil::CreateR2FromArray2D(input);
|
|
auto initial_value =
|
|
ConstantR0<uint32>(&builder, std::numeric_limits<uint32>::max());
|
|
|
|
Reduce(ConstantLiteral(&builder, input_literal), initial_value, min, {0, 1});
|
|
ComputeAndCompareR0<uint32>(&builder, 1, {});
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, UnsignedInt_MaxReduce) {
|
|
XlaBuilder builder(TestName());
|
|
Array2D<uint32> input({{1}, {2}});
|
|
auto max = CreateScalarMaxComputation(U32, &builder);
|
|
auto input_literal = LiteralUtil::CreateR2FromArray2D(input);
|
|
auto initial_value =
|
|
ConstantR0<uint32>(&builder, std::numeric_limits<uint32>::min());
|
|
|
|
Reduce(ConstantLiteral(&builder, input_literal), initial_value, max, {0, 1});
|
|
ComputeAndCompareR0<uint32>(&builder, 2, {});
|
|
}
|
|
|
|
// Reduces a matrix among dimension 1.
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmong1) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {1});
|
|
|
|
std::vector<float> expected = {6.f, 15.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmong0and1) {
|
|
// Reduce a matrix among dimensions 0 and 1 (sum it up to a scalar).
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {0, 1});
|
|
|
|
ComputeAndCompareR0<float>(&builder, 21.0f, {}, ErrorSpec(0.0001, 1e-4));
|
|
}
|
|
|
|
// Tests 2D matrix ReduceToRow operation.
|
|
XLA_TEST_F(ReduceTest, Reduce2DAmongY) {
|
|
XlaBuilder builder("reduce_among_y");
|
|
auto m = ConstantLiteral(&builder, literal_2d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {0});
|
|
|
|
std::vector<float> expected = {5.f, 7.f, 9.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDims_1_2) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {1, 2});
|
|
|
|
std::vector<float> expected = {21.f, 21.f, 21.f, 21.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDims_0_1) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {0, 1});
|
|
|
|
std::vector<float> expected = {20.f, 28.f, 36.f};
|
|
ComputeAndCompareR1<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3ToR0) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {0, 1, 2});
|
|
|
|
float expected = 21.0f * 4.0;
|
|
ComputeAndCompareR0<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim0) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {0});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{4.f, 8.f, 12.f},
|
|
{16.f, 20.f, 24.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim1) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {1});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
{5.f, 7.f, 9.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceR3AmongDim2) {
|
|
XlaBuilder builder(TestName());
|
|
auto m = ConstantLiteral(&builder, literal_3d_);
|
|
auto add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(m, ConstantR0<float>(&builder, 0.0f), add, {2});
|
|
|
|
// clang-format off
|
|
Array2D<float> expected({
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
{6.f, 15.f},
|
|
});
|
|
// clang-format on
|
|
ComputeAndCompareR2<float>(&builder, expected, {}, ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Add) {
|
|
RunVectorizedReduceTest(
|
|
static_cast<FuncGeneratorForType>(CreateScalarAddComputation),
|
|
[](float a, float b) { return a + b; },
|
|
[](int32 a, int32 b) {
|
|
return static_cast<int32>(static_cast<uint32>(a) +
|
|
static_cast<uint32>(b));
|
|
},
|
|
[](uint32 a, uint32 b) { return a + b; }, 0.0, 0, 0);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Multiply) {
|
|
RunVectorizedReduceTest(
|
|
static_cast<FuncGeneratorForType>(CreateScalarMultiplyComputation),
|
|
[](float a, float b) { return a * b; },
|
|
[](int32 a, int32 b) {
|
|
return static_cast<int32>(static_cast<uint32>(a) *
|
|
static_cast<uint32>(b));
|
|
},
|
|
[](uint32 a, uint32 b) { return a * b; }, 1.0, 1, 1);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Max) {
|
|
RunVectorizedReduceTest(
|
|
static_cast<FuncGeneratorForType>(CreateScalarMaxComputation),
|
|
[](float a, float b) { return std::max(a, b); },
|
|
[](int32 a, int32 b) { return std::max(a, b); },
|
|
[](uint32 a, uint32 b) { return std::max(a, b); },
|
|
std::numeric_limits<float>::min(), std::numeric_limits<int32>::min(),
|
|
std::numeric_limits<uint32>::min());
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_Min) {
|
|
RunVectorizedReduceTest(
|
|
static_cast<FuncGeneratorForType>(CreateScalarMinComputation),
|
|
[](float a, float b) { return std::min(a, b); },
|
|
[](int32 a, int32 b) { return std::min(a, b); },
|
|
[](uint32 a, uint32 b) { return std::min(a, b); },
|
|
std::numeric_limits<float>::max(), std::numeric_limits<int32>::max(),
|
|
std::numeric_limits<uint32>::max());
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanAnd) {
|
|
RunVectorizedReduceTestForType<bool>(
|
|
static_cast<FuncGenerator>([](XlaBuilder* builder) {
|
|
return CreateScalarAndComputation(PRED, builder);
|
|
}),
|
|
[](bool a, bool b) { return a && b; }, true);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanOr) {
|
|
RunVectorizedReduceTestForType<bool>(
|
|
static_cast<FuncGenerator>([](XlaBuilder* builder) {
|
|
return CreateScalarOrComputation(PRED, builder);
|
|
}),
|
|
[](bool a, bool b) { return a || b; }, false);
|
|
}
|
|
|
|
class ReduceR3ToR2Test : public ReduceTest,
|
|
public ::testing::WithParamInterface<BoundsLayout> {};
|
|
|
|
XLA_TEST_P(ReduceR3ToR2Test, ReduceR3ToR2) {
|
|
XlaBuilder builder(TestName());
|
|
const auto& bounds = GetParam().bounds;
|
|
Array3D<float> input_array(bounds[0], bounds[1], bounds[2]);
|
|
// input_array.FillRandom(3.14f, 0.05);
|
|
input_array.Fill(1.0f);
|
|
|
|
auto input_literal = LiteralUtil::CreateR3FromArray3D(input_array);
|
|
input_literal =
|
|
input_literal.Relayout(LayoutUtil::MakeLayout(GetParam().layout));
|
|
std::unique_ptr<GlobalData> input_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
auto input_activations =
|
|
Parameter(&builder, 0, input_literal.shape(), "input");
|
|
XlaComputation add = CreateScalarAddComputation(F32, &builder);
|
|
Reduce(input_activations, ConstantR0<float>(&builder, 0.0f), add,
|
|
GetParam().reduce_dims);
|
|
|
|
auto expected =
|
|
ReferenceUtil::Reduce3DTo2D(input_array, 0.0f, GetParam().reduce_dims,
|
|
[](float a, float b) { return a + b; });
|
|
|
|
ComputeAndCompareR2<float>(&builder, *expected, {input_data.get()},
|
|
ErrorSpec(1e-3, 1e-3));
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
ReduceR3ToR2Test_Instantiation, ReduceR3ToR2Test,
|
|
// Specifies (shape, layout, reduction dimensions).
|
|
::testing::Values(BoundsLayout{{4, 8, 128}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{4, 8, 128}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{4, 8, 128}, {2, 1, 0}, {2}},
|
|
// These should be simplified into a reshape.
|
|
BoundsLayout{{1, 21, 43}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{1, 1, 1}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {0}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {1}},
|
|
BoundsLayout{{8, 16, 24}, {0, 1, 2}, {2}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{5, 10, 250}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {0}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{8, 16, 256}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {2}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {1}},
|
|
BoundsLayout{{2, 300, 784}, {2, 1, 0}, {0}}));
|
|
|
|
XLA_TEST_F(ReduceTest, OperationOnConstantAsInitValue) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation max_f32 = CreateScalarMaxComputation(F32, &builder);
|
|
|
|
auto a = ConstantR0<float>(&builder, 2.0f);
|
|
auto a2 = Abs(a);
|
|
|
|
Literal b_literal = LiteralUtil::CreateR1<float>({1.0f, 4.0f});
|
|
std::unique_ptr<GlobalData> b_data =
|
|
client_->TransferToServer(b_literal).ConsumeValueOrDie();
|
|
auto b = Parameter(&builder, 0, b_literal.shape(), "b");
|
|
Reduce(b, a2, max_f32, {0});
|
|
|
|
ComputeAndCompareR0<float>(&builder, 4.0f, {b_data.get()});
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, ReduceAndPredR2_128x64_To_R1) {
|
|
RunR2ToR1PredTest</*cols=64*/ 64>(/*and_reduce=true*/ true, /*rows=128*/ 128);
|
|
}
|
|
XLA_TEST_F(ReduceTest, ReduceOrPredR2_64x32_To_R1) {
|
|
RunR2ToR1PredTest</*cols=32*/ 32>(/*and_reduce=false*/ false, /*rows=64*/ 64);
|
|
}
|
|
|
|
// Tests reductions with different initial values. There's no test macro that
|
|
// combines TYPED_TEST and TYPED_P, so we have to do it manually.
|
|
class ReduceInitializerTest : public ReduceTest {
|
|
protected:
|
|
template <typename T>
|
|
void DoTest(T initializer, int num_elems) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation max_fn = CreateScalarMaxComputation(
|
|
primitive_util::NativeToPrimitiveType<T>(), &builder);
|
|
|
|
auto init = ConstantR0<T>(&builder, initializer);
|
|
std::vector<T> input_arr(num_elems, std::numeric_limits<T>::lowest());
|
|
auto input_literal = LiteralUtil::CreateR1<T>(input_arr);
|
|
auto input_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
Reduce(Parameter(&builder, 0, input_literal.shape(), "input"), init, max_fn,
|
|
{0});
|
|
|
|
ComputeAndCompareR0<T>(&builder, initializer, {input_data.get()});
|
|
}
|
|
};
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U8Small) { DoTest<uint8>(42, 2); }
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U8BigPowerOf2) { DoTest<uint8>(42, 4096); }
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U8InitializerBigNonPowerOf2) {
|
|
DoTest<uint8>(42, 4095);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U64InitializerZero) {
|
|
DoTest<uint64>(0, 1024);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U64InitializerOne) {
|
|
DoTest<uint64>(1, 1024);
|
|
}
|
|
|
|
XLA_TEST_F(ReduceInitializerTest, U64InitializerBigValue) {
|
|
DoTest<uint64>(1234556789123, 1024);
|
|
}
|
|
|
|
// Test the operational semantic that the init value is passed on the lhs for
|
|
// reduces. Can be tested by performing an "identity" reduce (that simply
|
|
// returns one of the parameters). In this case, we return the rhs, which for
|
|
// a 1D array with one element, should not be the init value.
|
|
XLA_TEST_F(ReduceTest, ReduceIdentity) {
|
|
XlaBuilder builder(TestName());
|
|
Shape single_float = ShapeUtil::MakeShape(F32, {});
|
|
Parameter(&builder, 0, single_float, "lhs-unused");
|
|
Parameter(&builder, 1, single_float, "rhs-used");
|
|
auto computation_status = builder.Build();
|
|
TF_ASSERT_OK(computation_status.status());
|
|
|
|
Shape operand_shape = ShapeUtil::MakeShape(F32, {1});
|
|
Reduce(Parameter(&builder, 0, operand_shape, "operand"),
|
|
Parameter(&builder, 1, single_float, "init"),
|
|
computation_status.ValueOrDie(), {0});
|
|
|
|
float operand[] = {42.0f};
|
|
float init = 58.5f;
|
|
float expected = 42.0f;
|
|
Literal input_literal = LiteralUtil::CreateR1<float>(operand);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
Literal input_literal2 = LiteralUtil::CreateR0<float>(init);
|
|
std::unique_ptr<GlobalData> input_global_data2 =
|
|
client_->TransferToServer(input_literal2).ConsumeValueOrDie();
|
|
ComputeAndCompareR0<float>(
|
|
&builder, expected, {input_global_data.get(), input_global_data2.get()},
|
|
ErrorSpec(0.0001));
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, AndReduceU64) {
|
|
XlaBuilder builder(TestName());
|
|
Array2D<uint64> initializer = {{0x123456789ABCDEF0ULL, 0x3BCDEF12A4567890ULL},
|
|
{0XFFFFFFFFFFFFFFD6ULL, 101},
|
|
{1, 0XFFFFFFFFFFFFFFFFULL}};
|
|
auto reducer = CreateScalarAndComputation(U64, &builder);
|
|
auto m = ConstantR2FromArray2D(&builder, initializer);
|
|
Reduce(m, ConstantR0<uint64>(&builder, 0xFFFFFFFFFFFFFFFFLL), reducer, {1});
|
|
|
|
std::vector<uint64> expected = {0x1204461080145890LL, 68, 1};
|
|
ComputeAndCompareR1<uint64>(&builder, expected, {});
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, OrReduceU64) {
|
|
XlaBuilder builder(TestName());
|
|
Array2D<uint64> initializer = {{0x123456789ABCDEF0ULL, 0x3BCDEF12A4567890ULL},
|
|
{0xFFFFFFFFFFFFFFD6ULL, 101},
|
|
{1, 0xCAFEBEEFABABABABULL}};
|
|
auto reducer = CreateScalarOrComputation(U64, &builder);
|
|
auto m = ConstantR2FromArray2D(&builder, initializer);
|
|
Reduce(m, ConstantR0<uint64>(&builder, 0), reducer, {1});
|
|
|
|
std::vector<uint64> expected = {0X3BFDFF7ABEFEFEF0ULL, 0XFFFFFFFFFFFFFFF7ULL,
|
|
0xCAFEBEEFABABABABULL};
|
|
ComputeAndCompareR1<uint64>(&builder, expected, {});
|
|
}
|
|
|
|
XLA_TEST_F(ReduceTest, R0ReduceInDisguise) {
|
|
XlaBuilder builder(TestName());
|
|
XlaComputation add_f32 = CreateScalarAddComputation(F32, &builder);
|
|
constexpr int element_count = 127;
|
|
const Shape input_shape = ShapeUtil::MakeShape(F32, {element_count, 1});
|
|
auto input = Parameter(&builder, 0, input_shape, "input");
|
|
auto zero = ConstantR0<float>(&builder, 0.0);
|
|
Reduce(input, zero, add_f32, /*dimensions_to_reduce=*/{0});
|
|
|
|
Array2D<float> input_data(element_count, 1);
|
|
input_data.FillRandom(3.0f);
|
|
Literal input_literal = LiteralUtil::CreateR2FromArray2D(input_data);
|
|
std::unique_ptr<GlobalData> input_global_data =
|
|
client_->TransferToServer(input_literal).ConsumeValueOrDie();
|
|
|
|
float expected = absl::c_accumulate(input_data, 0.0f);
|
|
ComputeAndCompareR1<float>(&builder, {expected}, {input_global_data.get()},
|
|
ErrorSpec(0.001));
|
|
}
|
|
|
|
class ReduceHloTest : public HloTestBase {};
|
|
|
|
XLA_TEST_F(ReduceHloTest, HandleReductionToVectorAndOtherReduction) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule HandleReductionToVectorAndOtherReduction
|
|
|
|
add {
|
|
acc = f32[] parameter(1)
|
|
op = f32[] parameter(0)
|
|
ROOT out = f32[] add(acc, op)
|
|
}
|
|
|
|
ENTRY main {
|
|
iota.3 = s32[2,2]{1,0} iota(), iota_dimension=0
|
|
iota.2 = s32[2,2]{1,0} iota(), iota_dimension=1
|
|
compare.0 = pred[2,2]{1,0} compare(iota.3, iota.2), direction=EQ
|
|
broadcast = pred[2,2,2,2]{3,2,1,0} broadcast(compare.0), dimensions={2,3}
|
|
param_0.16 = f32[2,2,2,2]{3,2,1,0} parameter(0)
|
|
constant_4 = f32[] constant(0)
|
|
broadcast.9 = f32[2,2,2,2]{3,2,1,0} broadcast(constant_4), dimensions={}
|
|
select.0 = f32[2,2,2,2]{3,2,1,0} select(broadcast, param_0.16, broadcast.9)
|
|
reduce.1 = f32[2,2,2]{2,1,0} reduce(select.0, constant_4), dimensions={2},
|
|
to_apply=add
|
|
abs.0 = f32[2,2,2]{2,1,0} abs(reduce.1)
|
|
log.0 = f32[2,2,2]{2,1,0} log(abs.0)
|
|
reduce.0 = f32[2,2]{1,0} reduce(log.0, constant_4), dimensions={2},
|
|
to_apply=add
|
|
ROOT tuple = (f32[2,2]{1,0}, f64[2,2,2]{2,1,0}) tuple(reduce.0, reduce.1)
|
|
}
|
|
)";
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
class VariadicReduceTest : public HloTestBase {};
|
|
|
|
XLA_TEST_F(VariadicReduceTest, Reduce_R3x2_to_R2x2_simple) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule Reduce_R3x2_to_R1x2_simple
|
|
|
|
add {
|
|
op1 = f32[] parameter(0)
|
|
op2 = f32[] parameter(1)
|
|
acc1 = f32[] parameter(2)
|
|
acc2 = f32[] parameter(3)
|
|
out1 = f32[] add(acc1, op1)
|
|
out2 = f32[] add(acc2, op2)
|
|
ROOT result = (f32[], f32[]) tuple(out1, out2)
|
|
}
|
|
|
|
ENTRY main {
|
|
inp1 = f32[3,4,5] parameter(0)
|
|
inp2 = f32[3,4,5] parameter(1)
|
|
zero = f32[] constant(0)
|
|
|
|
ROOT out = (f32[3,5], f32[3,5]) reduce(inp1, inp2, zero, zero),
|
|
dimensions={1},
|
|
to_apply=add
|
|
}
|
|
)";
|
|
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
XLA_TEST_F(VariadicReduceTest, Reduce_R3x2_to_R1x2_simple) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule Reduce_R3x2_to_R1x2_simple
|
|
|
|
add {
|
|
op1 = f32[] parameter(0)
|
|
op2 = f32[] parameter(1)
|
|
acc1 = f32[] parameter(2)
|
|
acc2 = f32[] parameter(3)
|
|
out1 = f32[] add(acc1, op1)
|
|
out2 = f32[] add(acc2, op2)
|
|
ROOT result = (f32[], f32[]) tuple(out1, out2)
|
|
}
|
|
|
|
ENTRY main {
|
|
inp1 = f32[10,20,3] parameter(0)
|
|
inp2 = f32[10,20,3] parameter(1)
|
|
zero = f32[] constant(0)
|
|
|
|
ROOT out = (f32[10], f32[10]) reduce(inp1, inp2, zero, zero),
|
|
dimensions={1,2},
|
|
to_apply=add
|
|
}
|
|
)";
|
|
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
XLA_TEST_F(VariadicReduceTest, Reduce_R1x2_to_R0x2_simple) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule Reduce_R1x2_to_R0x2_simple
|
|
|
|
add {
|
|
op1 = f32[] parameter(0)
|
|
op2 = f32[] parameter(1)
|
|
acc1 = f32[] parameter(2)
|
|
acc2 = f32[] parameter(3)
|
|
out1 = f32[] add(acc1, op1)
|
|
out2 = f32[] add(acc2, op2)
|
|
ROOT result = (f32[], f32[]) tuple(out1, out2)
|
|
}
|
|
|
|
ENTRY main {
|
|
inp1 = f32[100] parameter(0)
|
|
inp2 = f32[100] parameter(1)
|
|
zero = f32[] constant(0)
|
|
|
|
ROOT out = (f32[], f32[]) reduce(inp1, inp2, zero, zero),
|
|
dimensions={0},
|
|
to_apply=add
|
|
}
|
|
)";
|
|
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
XLA_TEST_F(VariadicReduceTest, Reduce_R1x2_to_R0x2_argmax) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule Reduce_R1x2_to_R0x2_argmax
|
|
|
|
argmax {
|
|
running_max = f32[] parameter(0)
|
|
running_max_idx = u32[] parameter(1)
|
|
current_value = f32[] parameter(2)
|
|
current_value_idx = u32[] parameter(3)
|
|
|
|
current = (f32[], u32[]) tuple(running_max, running_max_idx)
|
|
potential = (f32[], u32[]) tuple(current_value, current_value_idx)
|
|
|
|
cmp_code = pred[] compare(current_value, running_max), direction=GT
|
|
|
|
new_max = f32[] select(cmp_code, current_value, running_max)
|
|
new_idx = u32[] select(cmp_code, current_value_idx, running_max_idx)
|
|
|
|
ROOT out = (f32[], u32[]) tuple(new_max, new_idx)
|
|
}
|
|
|
|
ENTRY main {
|
|
input = f32[100] parameter(0)
|
|
idxs = u32[100]{0} iota(), iota_dimension=0
|
|
zero = f32[] constant(0)
|
|
zero_idx = u32[] constant(0)
|
|
|
|
ROOT out = (f32[], u32[]) reduce(
|
|
input, idxs, zero, zero_idx),
|
|
dimensions={0},
|
|
to_apply=%argmax
|
|
}
|
|
)";
|
|
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
XLA_TEST_F(VariadicReduceTest, ReduceMultiOutputVariadicAnd) {
|
|
absl::string_view hlo_string = R"(
|
|
HloModule VariadicReduceMultiOutput
|
|
|
|
VariadicAnd {
|
|
value = pred[] parameter(0)
|
|
value_idx = u32[] parameter(1)
|
|
current_value = pred[] parameter(2)
|
|
current_value_idx = u32[] parameter(3)
|
|
ROOT out = (pred[], u32[]) tuple(value, value_idx)
|
|
}
|
|
|
|
ENTRY CheckBuffer {
|
|
test_value = f32[] parameter(0)
|
|
buffer = f32[100] parameter(1)
|
|
value_broadcast = f32[100] broadcast(test_value), dimensions={}
|
|
comparison_result = pred[100] compare(buffer, value_broadcast), direction=EQ
|
|
true_constant = pred[] constant(true)
|
|
|
|
zero_idx = u32[] constant(0)
|
|
idxs = u32[100]{0} iota(), iota_dimension=0
|
|
out = (pred[], u32[]) reduce(
|
|
comparison_result, idxs, true_constant, zero_idx
|
|
), dimensions={0}, to_apply=VariadicAnd
|
|
|
|
ROOT returned = u32[] get-tuple-element(out), index=1
|
|
}
|
|
)";
|
|
|
|
EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{1e-5, 1e-5}));
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace xla
|