Internal tests cleanup.
PiperOrigin-RevId: 339762896 Change-Id: I2e06cf0b409a1e621cd567060ba8670ce70d34c9
This commit is contained in:
parent
b6aa9f3368
commit
04f62ae3b1
@ -44,38 +44,34 @@ static Graph* MakeGraph(int split_dim, int num_split,
|
||||
}
|
||||
|
||||
#define BM_SPLIT_1D(num_split, chunk_size) \
|
||||
static void BM_Split_1d_##num_split##_##chunk_size(int iters) { \
|
||||
testing::StopTiming(); \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * num_split * \
|
||||
chunk_size); \
|
||||
static void BM_Split_1d_##num_split##_##chunk_size( \
|
||||
::testing::benchmark::State& state) { \
|
||||
auto label = \
|
||||
strings::Printf("1-D %d chunks of %d each", num_split, chunk_size); \
|
||||
testing::SetLabel(label); \
|
||||
testing::UseRealTime(); \
|
||||
state.SetLabel(label); \
|
||||
auto g = MakeGraph(/* split_dim = */ 0, num_split, {chunk_size}); \
|
||||
testing::StartTiming(); \
|
||||
test::Benchmark("cpu", g).Run(iters); \
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
|
||||
num_split * chunk_size); \
|
||||
} \
|
||||
BENCHMARK(BM_Split_1d_##num_split##_##chunk_size);
|
||||
BENCHMARK(BM_Split_1d_##num_split##_##chunk_size)->UseRealTime();
|
||||
|
||||
#define BM_SPLIT_2D(split_dim, num_split, chunk_size0, chunk_size1) \
|
||||
static void \
|
||||
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1( \
|
||||
int iters) { \
|
||||
testing::StopTiming(); \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * num_split * \
|
||||
chunk_size0 * chunk_size1); \
|
||||
::testing::benchmark::State& state) { \
|
||||
auto label = \
|
||||
strings::Printf("2-D %d chunks in dim %d of (%d * %d) each", \
|
||||
num_split, split_dim, chunk_size0, chunk_size1); \
|
||||
testing::SetLabel(label); \
|
||||
testing::UseRealTime(); \
|
||||
state.SetLabel(label); \
|
||||
auto g = MakeGraph(split_dim, num_split, {chunk_size0, chunk_size1}); \
|
||||
testing::StartTiming(); \
|
||||
test::Benchmark("cpu", g).Run(iters); \
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
|
||||
num_split * chunk_size0 * chunk_size1); \
|
||||
} \
|
||||
BENCHMARK( \
|
||||
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1);
|
||||
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1) \
|
||||
->UseRealTime();
|
||||
|
||||
BM_SPLIT_1D(5, 1);
|
||||
BM_SPLIT_1D(262144, 1);
|
||||
|
@ -73,43 +73,40 @@ static Graph* MakeGraph(int split_dim, const std::vector<int64>& size_splits,
|
||||
}
|
||||
|
||||
#define BM_SPLITV_1D(num_split, total_size) \
|
||||
static void BM_SplitV_1d_##num_split##_##total_size(int iters) { \
|
||||
testing::StopTiming(); \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * total_size); \
|
||||
static void BM_SplitV_1d_##num_split##_##total_size( \
|
||||
::testing::benchmark::State& state) { \
|
||||
auto label = \
|
||||
strings::Printf("1-D %d chunks totaling %d", num_split, total_size); \
|
||||
testing::SetLabel(label); \
|
||||
testing::UseRealTime(); \
|
||||
state.SetLabel(label); \
|
||||
auto g = MakeGraph(/* split_dim = */ 0, \
|
||||
GenerateRandomIntsWithSum(total_size, num_split), \
|
||||
{total_size}); \
|
||||
testing::StartTiming(); \
|
||||
test::Benchmark("cpu", g).Run(iters); \
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
|
||||
total_size); \
|
||||
} \
|
||||
BENCHMARK(BM_SplitV_1d_##num_split##_##total_size);
|
||||
BENCHMARK(BM_SplitV_1d_##num_split##_##total_size)->UseRealTime();
|
||||
|
||||
#define BM_SPLITV_2D(split_dim, num_split, total_size0, total_size1) \
|
||||
static void \
|
||||
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1( \
|
||||
int iters) { \
|
||||
testing::StopTiming(); \
|
||||
::testing::benchmark::State& state) { \
|
||||
std::vector<int64> total_size_vec{total_size0, total_size1}; \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * total_size0 * \
|
||||
total_size1); \
|
||||
auto label = \
|
||||
strings::Printf("2-D %d chunks in dim %d totaling (%d * %d)", \
|
||||
num_split, split_dim, total_size0, total_size1); \
|
||||
testing::SetLabel(label); \
|
||||
testing::UseRealTime(); \
|
||||
state.SetLabel(label); \
|
||||
auto g = MakeGraph( \
|
||||
split_dim, \
|
||||
GenerateRandomIntsWithSum(total_size_vec[split_dim], num_split), \
|
||||
{total_size0, total_size1}); \
|
||||
testing::StartTiming(); \
|
||||
test::Benchmark("cpu", g).Run(iters); \
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
|
||||
total_size0 * total_size1); \
|
||||
} \
|
||||
BENCHMARK( \
|
||||
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1);
|
||||
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1) \
|
||||
->UseRealTime();
|
||||
|
||||
BM_SPLITV_1D(5, 20);
|
||||
BM_SPLITV_1D(262144, 1000000);
|
||||
|
@ -38,8 +38,8 @@ namespace {
|
||||
// For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim'
|
||||
// in size, and concat them together along "concat_dimension"
|
||||
template <typename T>
|
||||
static void SliceHelper(int iters, int size) {
|
||||
testing::StopTiming();
|
||||
static void SliceHelper(::testing::benchmark::State& state) {
|
||||
const int size = state.range(0);
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
DataType dt = DataTypeToEnum<T>::v();
|
||||
int kDim = 100;
|
||||
@ -70,32 +70,30 @@ static void SliceHelper(int iters, int size) {
|
||||
.Attr("T", dt)
|
||||
.Finalize(g, &node));
|
||||
|
||||
testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g).Run(iters);
|
||||
testing::UseRealTime();
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
|
||||
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * kDim * size *
|
||||
sizeof(T));
|
||||
}
|
||||
|
||||
static void BM_SliceFloat(int iters, int dim2) {
|
||||
SliceHelper<float>(iters, dim2);
|
||||
void BM_SliceFloat(::testing::benchmark::State& state) {
|
||||
SliceHelper<float>(state);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
|
||||
BENCHMARK(BM_SliceFloat)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_SliceComplex64(int iters, int dim2) {
|
||||
SliceHelper<std::complex<float>>(iters, dim2);
|
||||
void BM_SliceComplex64(::testing::benchmark::State& state) {
|
||||
SliceHelper<std::complex<float>>(state);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000);
|
||||
BENCHMARK(BM_SliceComplex64)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_SliceBFloat16(int iters, int dim2) {
|
||||
SliceHelper<bfloat16>(iters, dim2);
|
||||
void BM_SliceBFloat16(::testing::benchmark::State& state) {
|
||||
SliceHelper<bfloat16>(state);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
|
||||
BENCHMARK(BM_SliceBFloat16)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
|
||||
|
||||
static void BM_ValidateStridedSliceOp(int iters) {
|
||||
testing::StopTiming();
|
||||
void BM_ValidateStridedSliceOp(::testing::benchmark::State& state) {
|
||||
int kDim = 100;
|
||||
int kMaxSize = 15000;
|
||||
int size = 100;
|
||||
@ -104,8 +102,7 @@ static void BM_ValidateStridedSliceOp(int iters) {
|
||||
Tensor strides = test::AsTensor<int32>({1, 1});
|
||||
TensorShape input_shape({2 * kDim, kMaxSize});
|
||||
|
||||
testing::StartTiming();
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
for (auto s : state) {
|
||||
TensorShape processing_shape, final_shape;
|
||||
bool is_identity = true, slice_dim0 = true, is_simple_slice = true;
|
||||
gtl::InlinedVector<int64, 4> begin_out, end_out, strides_out;
|
||||
|
@ -76,17 +76,17 @@ Graph* SetupStringSplitGraph(const Tensor& input) {
|
||||
return g;
|
||||
}
|
||||
|
||||
void BM_StringSplit(int iters, int batch_size) {
|
||||
testing::StopTiming();
|
||||
testing::ItemsProcessed(static_cast<int64>(iters));
|
||||
testing::UseRealTime();
|
||||
static void BM_StringSplit(::testing::benchmark::State& state) {
|
||||
const int batch_size = state.range(0);
|
||||
|
||||
Tensor input = GetTestTensor(batch_size);
|
||||
Graph* g = SetupStringSplitGraph(input);
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g).Run(iters);
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()));
|
||||
}
|
||||
|
||||
BENCHMARK(BM_StringSplit)
|
||||
->UseRealTime()
|
||||
->Arg(1)
|
||||
->Arg(8)
|
||||
->Arg(16)
|
||||
@ -107,17 +107,17 @@ Graph* SetupStringSplitV2Graph(const Tensor& input) {
|
||||
return g;
|
||||
}
|
||||
|
||||
void BM_StringSplitV2(int iters, int batch_size) {
|
||||
testing::StopTiming();
|
||||
testing::ItemsProcessed(static_cast<int64>(iters));
|
||||
testing::UseRealTime();
|
||||
static void BM_StringSplitV2(::testing::benchmark::State& state) {
|
||||
const int batch_size = state.range(0);
|
||||
|
||||
Tensor input = GetTestTensor(batch_size);
|
||||
Graph* g = SetupStringSplitV2Graph(input);
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g).Run(iters);
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()));
|
||||
}
|
||||
|
||||
BENCHMARK(BM_StringSplitV2)
|
||||
->UseRealTime()
|
||||
->Arg(1)
|
||||
->Arg(8)
|
||||
->Arg(16)
|
||||
|
@ -149,27 +149,26 @@ Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len,
|
||||
return g;
|
||||
}
|
||||
|
||||
void BM_SubstrByte(int iters, int batch_size) {
|
||||
testing::StopTiming();
|
||||
testing::ItemsProcessed(static_cast<int64>(iters));
|
||||
testing::UseRealTime();
|
||||
static void BM_SubstrByte(::testing::benchmark::State& state) {
|
||||
const int batch_size = state.range(0);
|
||||
|
||||
Tensor input = GetTestTensor(batch_size);
|
||||
Graph* g = SetupSubstrGraph(input, 3, 30, kByteUnit);
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g).Run(iters);
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
void BM_SubstrUTF8(int iters, int batch_size) {
|
||||
testing::StopTiming();
|
||||
testing::ItemsProcessed(static_cast<int64>(iters));
|
||||
testing::UseRealTime();
|
||||
static void BM_SubstrUTF8(::testing::benchmark::State& state) {
|
||||
const int batch_size = state.range(0);
|
||||
|
||||
Tensor input = GetTestUTF8Tensor(batch_size);
|
||||
Graph* g = SetupSubstrGraph(input, 3, 30, kUTF8Unit);
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g).Run(iters);
|
||||
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
|
||||
state.SetItemsProcessed(state.iterations());
|
||||
}
|
||||
|
||||
BENCHMARK(BM_SubstrByte)
|
||||
->UseRealTime()
|
||||
->Arg(1)
|
||||
->Arg(8)
|
||||
->Arg(16)
|
||||
@ -178,6 +177,7 @@ BENCHMARK(BM_SubstrByte)
|
||||
->Arg(128)
|
||||
->Arg(256);
|
||||
BENCHMARK(BM_SubstrUTF8)
|
||||
->UseRealTime()
|
||||
->Arg(1)
|
||||
->Arg(8)
|
||||
->Arg(16)
|
||||
|
@ -103,14 +103,18 @@ static void SGD(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_SGD(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_SGD(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
SGD(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_SGD)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
@ -135,14 +139,18 @@ static void Adagrad(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_Adagrad(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_Adagrad(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
Adagrad(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_Adagrad)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
@ -168,17 +176,22 @@ static void SparseAdagrad(int32 m, int32 n, Graph** init_g, Graph** train_g) {
|
||||
*train_g = g;
|
||||
}
|
||||
}
|
||||
static void BM_SparseAdagrad(int iters, int m, int n) {
|
||||
const int64 tot = static_cast<int64>(iters) * m * n;
|
||||
testing::UseRealTime();
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_SparseAdagrad(::testing::benchmark::State& state) {
|
||||
const int m = state.range(0);
|
||||
const int n = state.range(1);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
SparseAdagrad(m, n, &init, &train);
|
||||
test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetMultiThreadedOptions(), init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * m * n;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_SparseAdagrad)
|
||||
->UseRealTime()
|
||||
->ArgPair(128, 1 << 10)
|
||||
->ArgPair(128, 4 << 10)
|
||||
->ArgPair(128, 8 << 10)
|
||||
@ -208,14 +221,18 @@ static void Momentum(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_Momentum(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_Momentum(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
Momentum(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
@ -251,19 +268,26 @@ static void Adam(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_Adam(int iters, int params, int is_multi_threaded) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_Adam(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
const int is_multi_threaded = state.range(1);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
Adam(params, &init, &train);
|
||||
if (is_multi_threaded) {
|
||||
// Use max thread number if test performance.
|
||||
test::Benchmark("cpu", train, nullptr, init).Run(iters);
|
||||
test::Benchmark("cpu", train, nullptr, init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
} else {
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
}
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_Adam)->ArgPair(128 << 10, 0)->ArgPair(256 << 10, 0);
|
||||
BENCHMARK(BM_Adam)->ArgPair(256 << 5, 1)->ArgPair(256 << 16, 1);
|
||||
@ -297,14 +321,18 @@ static void RMSProp(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_RMSProp(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_RMSProp(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
RMSProp(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benhcmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
@ -334,14 +362,18 @@ static void AddSign(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_AddSign(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_AddSign(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
AddSign(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benhcmark_api*/ false)
|
||||
.Run(state);
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
@ -371,14 +403,19 @@ static void PowerSign(int32 n, Graph** init_g, Graph** train_g) {
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_PowerSign(int iters, int params) {
|
||||
const int64 tot = static_cast<int64>(iters) * params;
|
||||
testing::ItemsProcessed(tot);
|
||||
testing::BytesProcessed(tot * sizeof(float));
|
||||
static void BM_PowerSign(::testing::benchmark::State& state) {
|
||||
const int params = state.range(0);
|
||||
|
||||
Graph* init;
|
||||
Graph* train;
|
||||
PowerSign(params, &init, &train);
|
||||
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
|
||||
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
|
||||
/*old_benhcmark_api*/ false)
|
||||
.Run(state);
|
||||
|
||||
const int64 tot = static_cast<int64>(state.iterations()) * params;
|
||||
state.SetItemsProcessed(tot);
|
||||
state.SetBytesProcessed(tot * sizeof(float));
|
||||
}
|
||||
BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10);
|
||||
|
||||
|
@ -108,11 +108,15 @@ static Graph* UnaryOpsChain(int tensor_size, int repeat_graph,
|
||||
return g;
|
||||
}
|
||||
|
||||
#define BM_UnaryOpsChain(N, R, F, type) \
|
||||
static void BM_UnaryOpsChain##_##type##_##N##_##R##_##F(int iters) { \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * N * R * F); \
|
||||
test::Benchmark(#type, UnaryOpsChain(N, R, F)).Run(iters); \
|
||||
} \
|
||||
#define BM_UnaryOpsChain(N, R, F, type) \
|
||||
static void BM_UnaryOpsChain##_##type##_##N##_##R##_##F( \
|
||||
::testing::benchmark::State& state) { \
|
||||
test::Benchmark(#type, UnaryOpsChain(N, R, F), \
|
||||
/*old_benchmark_api*/ false) \
|
||||
.Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * N * R * \
|
||||
F); \
|
||||
} \
|
||||
BENCHMARK(BM_UnaryOpsChain##_##type##_##N##_##R##_##F);
|
||||
|
||||
// Unary ops fused together.
|
||||
@ -140,11 +144,15 @@ static Graph* UnaryOpsCompo(int tensor_size, int repeat_graph,
|
||||
return g;
|
||||
}
|
||||
|
||||
#define BM_UnaryOpsCompo(N, R, F, type) \
|
||||
static void BM_UnaryOpsCompo##_##type##_##N##_##R##_##F(int iters) { \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * N * R * F); \
|
||||
test::Benchmark(#type, UnaryOpsCompo(N, R, F)).Run(iters); \
|
||||
} \
|
||||
#define BM_UnaryOpsCompo(N, R, F, type) \
|
||||
static void BM_UnaryOpsCompo##_##type##_##N##_##R##_##F( \
|
||||
::testing::benchmark::State& state) { \
|
||||
test::Benchmark(#type, UnaryOpsCompo(N, R, F), \
|
||||
/*old_benchmark_api*/ false) \
|
||||
.Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * N * R * \
|
||||
F); \
|
||||
} \
|
||||
BENCHMARK(BM_UnaryOpsCompo##_##type##_##N##_##R##_##F);
|
||||
|
||||
// BenchmarkName(tensor_size, repeat_graph, num_ops, type)
|
||||
|
@ -64,8 +64,10 @@ TensorProto GetRandomInt32TensorProtoWithRepeat(int dim, int repeat,
|
||||
return tensor_proto;
|
||||
}
|
||||
|
||||
static void BM_Unique_INT32(int iters, int dim, int max_int) {
|
||||
testing::StopTiming();
|
||||
void BM_Unique_INT32(::testing::benchmark::State& state) {
|
||||
const int dim = state.range(0);
|
||||
const int max_int = state.range(1);
|
||||
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
|
||||
Tensor input(DT_INT32, TensorShape({dim}));
|
||||
@ -78,16 +80,17 @@ static void BM_Unique_INT32(int iters, int dim, int max_int) {
|
||||
.Finalize(g, &node));
|
||||
FixupSourceAndSinkEdges(g);
|
||||
|
||||
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(int32));
|
||||
testing::UseRealTime();
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
|
||||
"SINGLE_THREADED_EXECUTOR")
|
||||
.Run(iters);
|
||||
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim *
|
||||
sizeof(int32));
|
||||
}
|
||||
|
||||
static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
|
||||
testing::StopTiming();
|
||||
void BM_Unique_INT32_Repeat(::testing::benchmark::State& state) {
|
||||
const int dim = state.range(0);
|
||||
const int max_int = state.range(1);
|
||||
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
|
||||
Tensor input(DT_INT32, TensorShape({dim * 200}));
|
||||
@ -101,13 +104,11 @@ static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
|
||||
.Finalize(g, &node));
|
||||
FixupSourceAndSinkEdges(g);
|
||||
|
||||
testing::BytesProcessed(static_cast<int64>(iters) * dim * 200 *
|
||||
sizeof(int32));
|
||||
testing::UseRealTime();
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
|
||||
"SINGLE_THREADED_EXECUTOR")
|
||||
.Run(iters);
|
||||
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim * 200 *
|
||||
sizeof(int32));
|
||||
}
|
||||
|
||||
TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
|
||||
@ -127,8 +128,9 @@ TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
|
||||
return tensor_proto;
|
||||
}
|
||||
|
||||
static void BM_Unique_STRING(int iters, int dim) {
|
||||
testing::StopTiming();
|
||||
void BM_Unique_STRING(::testing::benchmark::State& state) {
|
||||
const int dim = state.range(0);
|
||||
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
|
||||
Tensor input(DT_STRING, TensorShape({dim}));
|
||||
@ -140,16 +142,15 @@ static void BM_Unique_STRING(int iters, int dim) {
|
||||
.Attr("T", DT_STRING)
|
||||
.Finalize(g, &node));
|
||||
FixupSourceAndSinkEdges(g);
|
||||
|
||||
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(tstring));
|
||||
testing::UseRealTime();
|
||||
testing::StartTiming();
|
||||
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
|
||||
"SINGLE_THREADED_EXECUTOR")
|
||||
.Run(iters);
|
||||
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
|
||||
.Run(state);
|
||||
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim *
|
||||
sizeof(tstring));
|
||||
}
|
||||
|
||||
BENCHMARK(BM_Unique_INT32)
|
||||
->UseRealTime()
|
||||
->ArgPair(32, 1024 * 1024)
|
||||
->ArgPair(256, 1024 * 1024)
|
||||
->ArgPair(1024, 1024 * 1024)
|
||||
@ -168,6 +169,7 @@ BENCHMARK(BM_Unique_INT32)
|
||||
->ArgPair(4 * 1024 * 1024, 64 * 1024 * 1024);
|
||||
|
||||
BENCHMARK(BM_Unique_INT32_Repeat)
|
||||
->UseRealTime()
|
||||
->ArgPair(32, 1024 * 1024)
|
||||
->ArgPair(256, 1024 * 1024)
|
||||
->ArgPair(1024, 1024 * 1024)
|
||||
@ -192,6 +194,7 @@ BENCHMARK(BM_Unique_INT32_Repeat)
|
||||
->ArgPair(1024 * 1024, 64 * 1024 * 1024);
|
||||
|
||||
BENCHMARK(BM_Unique_STRING)
|
||||
->UseRealTime()
|
||||
->Arg(32)
|
||||
->Arg(256)
|
||||
->Arg(1024)
|
||||
|
@ -28,8 +28,8 @@ namespace {
|
||||
// Benchmark to simulate the overhead in training and serving workloads from too
|
||||
// many threads grabbing the ResourceMgr lock at the same time because of the
|
||||
// variable and queue ops.
|
||||
void ManyManyVariablesHelper(int threads, int variables, int iters) {
|
||||
testing::StopTiming();
|
||||
void ManyManyVariablesHelper(int threads, int variables,
|
||||
::testing::benchmark::State& state) {
|
||||
Graph g(OpRegistry::Global());
|
||||
std::vector<string> targets;
|
||||
for (int i = 0; i < variables; ++i) {
|
||||
@ -50,16 +50,16 @@ void ManyManyVariablesHelper(int threads, int variables, int iters) {
|
||||
Session* sess = NewSession(opts);
|
||||
TF_CHECK_OK(sess->Create(gd));
|
||||
TF_CHECK_OK(sess->Run({}, {}, targets, nullptr));
|
||||
testing::StartTiming();
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
for (auto s : state) {
|
||||
TF_CHECK_OK(sess->Run({}, {}, targets, nullptr));
|
||||
}
|
||||
testing::StopTiming();
|
||||
delete sess;
|
||||
}
|
||||
|
||||
void BM_ManyManyVariablesManyThreads(int iters, int threads) {
|
||||
ManyManyVariablesHelper(threads, 1000, iters);
|
||||
void BM_ManyManyVariablesManyThreads(::testing::benchmark::State& state) {
|
||||
const int threads = state.range(0);
|
||||
|
||||
ManyManyVariablesHelper(threads, 1000, state);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_ManyManyVariablesManyThreads)->Arg(50);
|
||||
|
@ -33,11 +33,14 @@ static Graph* Xent(int batch_size, int num_classes) {
|
||||
return g;
|
||||
}
|
||||
|
||||
#define BM_XentDev(BATCH, CLASS, DEVICE) \
|
||||
static void BM_Xent##_##BATCH##_##CLASS##_##DEVICE(int iters) { \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * CLASS); \
|
||||
test::Benchmark(#DEVICE, Xent(BATCH, CLASS)).Run(iters); \
|
||||
} \
|
||||
#define BM_XentDev(BATCH, CLASS, DEVICE) \
|
||||
static void BM_Xent##_##BATCH##_##CLASS##_##DEVICE( \
|
||||
::testing::benchmark::State& state) { \
|
||||
test::Benchmark(#DEVICE, Xent(BATCH, CLASS), /*old_benchmark_api*/ false) \
|
||||
.Run(state); \
|
||||
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * BATCH * \
|
||||
CLASS); \
|
||||
} \
|
||||
BENCHMARK(BM_Xent##_##BATCH##_##CLASS##_##DEVICE);
|
||||
|
||||
/// The representative tests for ptb_word on GPU
|
||||
|
Loading…
Reference in New Issue
Block a user