Internal tests cleanup.

PiperOrigin-RevId: 339762896
Change-Id: I2e06cf0b409a1e621cd567060ba8670ce70d34c9
This commit is contained in:
A. Unique TensorFlower 2020-10-29 15:49:24 -07:00 committed by TensorFlower Gardener
parent b6aa9f3368
commit 04f62ae3b1
10 changed files with 206 additions and 165 deletions

View File

@ -44,38 +44,34 @@ static Graph* MakeGraph(int split_dim, int num_split,
}
#define BM_SPLIT_1D(num_split, chunk_size) \
static void BM_Split_1d_##num_split##_##chunk_size(int iters) { \
testing::StopTiming(); \
testing::ItemsProcessed(static_cast<int64>(iters) * num_split * \
chunk_size); \
static void BM_Split_1d_##num_split##_##chunk_size( \
::testing::benchmark::State& state) { \
auto label = \
strings::Printf("1-D %d chunks of %d each", num_split, chunk_size); \
testing::SetLabel(label); \
testing::UseRealTime(); \
state.SetLabel(label); \
auto g = MakeGraph(/* split_dim = */ 0, num_split, {chunk_size}); \
testing::StartTiming(); \
test::Benchmark("cpu", g).Run(iters); \
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
num_split * chunk_size); \
} \
BENCHMARK(BM_Split_1d_##num_split##_##chunk_size);
BENCHMARK(BM_Split_1d_##num_split##_##chunk_size)->UseRealTime();
#define BM_SPLIT_2D(split_dim, num_split, chunk_size0, chunk_size1) \
static void \
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1( \
int iters) { \
testing::StopTiming(); \
testing::ItemsProcessed(static_cast<int64>(iters) * num_split * \
chunk_size0 * chunk_size1); \
::testing::benchmark::State& state) { \
auto label = \
strings::Printf("2-D %d chunks in dim %d of (%d * %d) each", \
num_split, split_dim, chunk_size0, chunk_size1); \
testing::SetLabel(label); \
testing::UseRealTime(); \
state.SetLabel(label); \
auto g = MakeGraph(split_dim, num_split, {chunk_size0, chunk_size1}); \
testing::StartTiming(); \
test::Benchmark("cpu", g).Run(iters); \
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
num_split * chunk_size0 * chunk_size1); \
} \
BENCHMARK( \
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1);
BM_Split_2d_##split_dim##_##num_split##_##chunk_size0##_##chunk_size1) \
->UseRealTime();
BM_SPLIT_1D(5, 1);
BM_SPLIT_1D(262144, 1);

View File

@ -73,43 +73,40 @@ static Graph* MakeGraph(int split_dim, const std::vector<int64>& size_splits,
}
#define BM_SPLITV_1D(num_split, total_size) \
static void BM_SplitV_1d_##num_split##_##total_size(int iters) { \
testing::StopTiming(); \
testing::ItemsProcessed(static_cast<int64>(iters) * total_size); \
static void BM_SplitV_1d_##num_split##_##total_size( \
::testing::benchmark::State& state) { \
auto label = \
strings::Printf("1-D %d chunks totaling %d", num_split, total_size); \
testing::SetLabel(label); \
testing::UseRealTime(); \
state.SetLabel(label); \
auto g = MakeGraph(/* split_dim = */ 0, \
GenerateRandomIntsWithSum(total_size, num_split), \
{total_size}); \
testing::StartTiming(); \
test::Benchmark("cpu", g).Run(iters); \
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
total_size); \
} \
BENCHMARK(BM_SplitV_1d_##num_split##_##total_size);
BENCHMARK(BM_SplitV_1d_##num_split##_##total_size)->UseRealTime();
#define BM_SPLITV_2D(split_dim, num_split, total_size0, total_size1) \
static void \
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1( \
int iters) { \
testing::StopTiming(); \
::testing::benchmark::State& state) { \
std::vector<int64> total_size_vec{total_size0, total_size1}; \
testing::ItemsProcessed(static_cast<int64>(iters) * total_size0 * \
total_size1); \
auto label = \
strings::Printf("2-D %d chunks in dim %d totaling (%d * %d)", \
num_split, split_dim, total_size0, total_size1); \
testing::SetLabel(label); \
testing::UseRealTime(); \
state.SetLabel(label); \
auto g = MakeGraph( \
split_dim, \
GenerateRandomIntsWithSum(total_size_vec[split_dim], num_split), \
{total_size0, total_size1}); \
testing::StartTiming(); \
test::Benchmark("cpu", g).Run(iters); \
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
total_size0 * total_size1); \
} \
BENCHMARK( \
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1);
BM_SplitV_2d_##split_dim##_##num_split##_##total_size0##_##total_size1) \
->UseRealTime();
BM_SPLITV_1D(5, 20);
BM_SPLITV_1D(262144, 1000000);

View File

@ -38,8 +38,8 @@ namespace {
// For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim'
// in size, and concat them together along "concat_dimension"
template <typename T>
static void SliceHelper(int iters, int size) {
testing::StopTiming();
static void SliceHelper(::testing::benchmark::State& state) {
const int size = state.range(0);
Graph* g = new Graph(OpRegistry::Global());
DataType dt = DataTypeToEnum<T>::v();
int kDim = 100;
@ -70,32 +70,30 @@ static void SliceHelper(int iters, int size) {
.Attr("T", dt)
.Finalize(g, &node));
testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
testing::UseRealTime();
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * kDim * size *
sizeof(T));
}
static void BM_SliceFloat(int iters, int dim2) {
SliceHelper<float>(iters, dim2);
void BM_SliceFloat(::testing::benchmark::State& state) {
SliceHelper<float>(state);
}
BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
BENCHMARK(BM_SliceFloat)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
static void BM_SliceComplex64(int iters, int dim2) {
SliceHelper<std::complex<float>>(iters, dim2);
void BM_SliceComplex64(::testing::benchmark::State& state) {
SliceHelper<std::complex<float>>(state);
}
BENCHMARK(BM_SliceComplex64)->Arg(100)->Arg(1000)->Arg(10000);
BENCHMARK(BM_SliceComplex64)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
static void BM_SliceBFloat16(int iters, int dim2) {
SliceHelper<bfloat16>(iters, dim2);
void BM_SliceBFloat16(::testing::benchmark::State& state) {
SliceHelper<bfloat16>(state);
}
BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
BENCHMARK(BM_SliceBFloat16)->UseRealTime()->Arg(100)->Arg(1000)->Arg(10000);
static void BM_ValidateStridedSliceOp(int iters) {
testing::StopTiming();
void BM_ValidateStridedSliceOp(::testing::benchmark::State& state) {
int kDim = 100;
int kMaxSize = 15000;
int size = 100;
@ -104,8 +102,7 @@ static void BM_ValidateStridedSliceOp(int iters) {
Tensor strides = test::AsTensor<int32>({1, 1});
TensorShape input_shape({2 * kDim, kMaxSize});
testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
TensorShape processing_shape, final_shape;
bool is_identity = true, slice_dim0 = true, is_simple_slice = true;
gtl::InlinedVector<int64, 4> begin_out, end_out, strides_out;

View File

@ -76,17 +76,17 @@ Graph* SetupStringSplitGraph(const Tensor& input) {
return g;
}
void BM_StringSplit(int iters, int batch_size) {
testing::StopTiming();
testing::ItemsProcessed(static_cast<int64>(iters));
testing::UseRealTime();
static void BM_StringSplit(::testing::benchmark::State& state) {
const int batch_size = state.range(0);
Tensor input = GetTestTensor(batch_size);
Graph* g = SetupStringSplitGraph(input);
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
state.SetItemsProcessed(static_cast<int64>(state.iterations()));
}
BENCHMARK(BM_StringSplit)
->UseRealTime()
->Arg(1)
->Arg(8)
->Arg(16)
@ -107,17 +107,17 @@ Graph* SetupStringSplitV2Graph(const Tensor& input) {
return g;
}
void BM_StringSplitV2(int iters, int batch_size) {
testing::StopTiming();
testing::ItemsProcessed(static_cast<int64>(iters));
testing::UseRealTime();
static void BM_StringSplitV2(::testing::benchmark::State& state) {
const int batch_size = state.range(0);
Tensor input = GetTestTensor(batch_size);
Graph* g = SetupStringSplitV2Graph(input);
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
state.SetItemsProcessed(static_cast<int64>(state.iterations()));
}
BENCHMARK(BM_StringSplitV2)
->UseRealTime()
->Arg(1)
->Arg(8)
->Arg(16)

View File

@ -149,27 +149,26 @@ Graph* SetupSubstrGraph(const Tensor& input, const int32 pos, const int32 len,
return g;
}
void BM_SubstrByte(int iters, int batch_size) {
testing::StopTiming();
testing::ItemsProcessed(static_cast<int64>(iters));
testing::UseRealTime();
static void BM_SubstrByte(::testing::benchmark::State& state) {
const int batch_size = state.range(0);
Tensor input = GetTestTensor(batch_size);
Graph* g = SetupSubstrGraph(input, 3, 30, kByteUnit);
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
state.SetItemsProcessed(state.iterations());
}
void BM_SubstrUTF8(int iters, int batch_size) {
testing::StopTiming();
testing::ItemsProcessed(static_cast<int64>(iters));
testing::UseRealTime();
static void BM_SubstrUTF8(::testing::benchmark::State& state) {
const int batch_size = state.range(0);
Tensor input = GetTestUTF8Tensor(batch_size);
Graph* g = SetupSubstrGraph(input, 3, 30, kUTF8Unit);
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
test::Benchmark("cpu", g, /*old_benchmark_api*/ false).Run(state);
state.SetItemsProcessed(state.iterations());
}
BENCHMARK(BM_SubstrByte)
->UseRealTime()
->Arg(1)
->Arg(8)
->Arg(16)
@ -178,6 +177,7 @@ BENCHMARK(BM_SubstrByte)
->Arg(128)
->Arg(256);
BENCHMARK(BM_SubstrUTF8)
->UseRealTime()
->Arg(1)
->Arg(8)
->Arg(16)

View File

@ -103,14 +103,18 @@ static void SGD(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_SGD(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_SGD(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
SGD(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_SGD)->Arg(128 << 10)->Arg(256 << 10);
@ -135,14 +139,18 @@ static void Adagrad(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_Adagrad(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_Adagrad(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
Adagrad(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_Adagrad)->Arg(128 << 10)->Arg(256 << 10);
@ -168,17 +176,22 @@ static void SparseAdagrad(int32 m, int32 n, Graph** init_g, Graph** train_g) {
*train_g = g;
}
}
static void BM_SparseAdagrad(int iters, int m, int n) {
const int64 tot = static_cast<int64>(iters) * m * n;
testing::UseRealTime();
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_SparseAdagrad(::testing::benchmark::State& state) {
const int m = state.range(0);
const int n = state.range(1);
Graph* init;
Graph* train;
SparseAdagrad(m, n, &init, &train);
test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetMultiThreadedOptions(), init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * m * n;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_SparseAdagrad)
->UseRealTime()
->ArgPair(128, 1 << 10)
->ArgPair(128, 4 << 10)
->ArgPair(128, 8 << 10)
@ -208,14 +221,18 @@ static void Momentum(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_Momentum(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_Momentum(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
Momentum(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10);
@ -251,19 +268,26 @@ static void Adam(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_Adam(int iters, int params, int is_multi_threaded) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_Adam(::testing::benchmark::State& state) {
const int params = state.range(0);
const int is_multi_threaded = state.range(1);
Graph* init;
Graph* train;
Adam(params, &init, &train);
if (is_multi_threaded) {
// Use max thread number if test performance.
test::Benchmark("cpu", train, nullptr, init).Run(iters);
test::Benchmark("cpu", train, nullptr, init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
} else {
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benchmark_api*/ false)
.Run(state);
}
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_Adam)->ArgPair(128 << 10, 0)->ArgPair(256 << 10, 0);
BENCHMARK(BM_Adam)->ArgPair(256 << 5, 1)->ArgPair(256 << 16, 1);
@ -297,14 +321,18 @@ static void RMSProp(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_RMSProp(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_RMSProp(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
RMSProp(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benhcmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10);
@ -334,14 +362,18 @@ static void AddSign(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_AddSign(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_AddSign(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
AddSign(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benhcmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10);
@ -371,14 +403,19 @@ static void PowerSign(int32 n, Graph** init_g, Graph** train_g) {
}
}
static void BM_PowerSign(int iters, int params) {
const int64 tot = static_cast<int64>(iters) * params;
testing::ItemsProcessed(tot);
testing::BytesProcessed(tot * sizeof(float));
static void BM_PowerSign(::testing::benchmark::State& state) {
const int params = state.range(0);
Graph* init;
Graph* train;
PowerSign(params, &init, &train);
test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
test::Benchmark("cpu", train, GetOptions(), init, nullptr, "",
/*old_benhcmark_api*/ false)
.Run(state);
const int64 tot = static_cast<int64>(state.iterations()) * params;
state.SetItemsProcessed(tot);
state.SetBytesProcessed(tot * sizeof(float));
}
BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10);

View File

@ -108,11 +108,15 @@ static Graph* UnaryOpsChain(int tensor_size, int repeat_graph,
return g;
}
#define BM_UnaryOpsChain(N, R, F, type) \
static void BM_UnaryOpsChain##_##type##_##N##_##R##_##F(int iters) { \
testing::ItemsProcessed(static_cast<int64>(iters) * N * R * F); \
test::Benchmark(#type, UnaryOpsChain(N, R, F)).Run(iters); \
} \
#define BM_UnaryOpsChain(N, R, F, type) \
static void BM_UnaryOpsChain##_##type##_##N##_##R##_##F( \
::testing::benchmark::State& state) { \
test::Benchmark(#type, UnaryOpsChain(N, R, F), \
/*old_benchmark_api*/ false) \
.Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * N * R * \
F); \
} \
BENCHMARK(BM_UnaryOpsChain##_##type##_##N##_##R##_##F);
// Unary ops fused together.
@ -140,11 +144,15 @@ static Graph* UnaryOpsCompo(int tensor_size, int repeat_graph,
return g;
}
#define BM_UnaryOpsCompo(N, R, F, type) \
static void BM_UnaryOpsCompo##_##type##_##N##_##R##_##F(int iters) { \
testing::ItemsProcessed(static_cast<int64>(iters) * N * R * F); \
test::Benchmark(#type, UnaryOpsCompo(N, R, F)).Run(iters); \
} \
#define BM_UnaryOpsCompo(N, R, F, type) \
static void BM_UnaryOpsCompo##_##type##_##N##_##R##_##F( \
::testing::benchmark::State& state) { \
test::Benchmark(#type, UnaryOpsCompo(N, R, F), \
/*old_benchmark_api*/ false) \
.Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * N * R * \
F); \
} \
BENCHMARK(BM_UnaryOpsCompo##_##type##_##N##_##R##_##F);
// BenchmarkName(tensor_size, repeat_graph, num_ops, type)

View File

@ -64,8 +64,10 @@ TensorProto GetRandomInt32TensorProtoWithRepeat(int dim, int repeat,
return tensor_proto;
}
static void BM_Unique_INT32(int iters, int dim, int max_int) {
testing::StopTiming();
void BM_Unique_INT32(::testing::benchmark::State& state) {
const int dim = state.range(0);
const int max_int = state.range(1);
Graph* g = new Graph(OpRegistry::Global());
Tensor input(DT_INT32, TensorShape({dim}));
@ -78,16 +80,17 @@ static void BM_Unique_INT32(int iters, int dim, int max_int) {
.Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(int32));
testing::UseRealTime();
testing::StartTiming();
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
.Run(state);
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim *
sizeof(int32));
}
static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
testing::StopTiming();
void BM_Unique_INT32_Repeat(::testing::benchmark::State& state) {
const int dim = state.range(0);
const int max_int = state.range(1);
Graph* g = new Graph(OpRegistry::Global());
Tensor input(DT_INT32, TensorShape({dim * 200}));
@ -101,13 +104,11 @@ static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
.Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * 200 *
sizeof(int32));
testing::UseRealTime();
testing::StartTiming();
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
.Run(state);
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim * 200 *
sizeof(int32));
}
TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
@ -127,8 +128,9 @@ TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
return tensor_proto;
}
static void BM_Unique_STRING(int iters, int dim) {
testing::StopTiming();
void BM_Unique_STRING(::testing::benchmark::State& state) {
const int dim = state.range(0);
Graph* g = new Graph(OpRegistry::Global());
Tensor input(DT_STRING, TensorShape({dim}));
@ -140,16 +142,15 @@ static void BM_Unique_STRING(int iters, int dim) {
.Attr("T", DT_STRING)
.Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(tstring));
testing::UseRealTime();
testing::StartTiming();
test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
"SINGLE_THREADED_EXECUTOR", /*old_benchmark_api*/ false)
.Run(state);
state.SetBytesProcessed(static_cast<int64>(state.iterations()) * dim *
sizeof(tstring));
}
BENCHMARK(BM_Unique_INT32)
->UseRealTime()
->ArgPair(32, 1024 * 1024)
->ArgPair(256, 1024 * 1024)
->ArgPair(1024, 1024 * 1024)
@ -168,6 +169,7 @@ BENCHMARK(BM_Unique_INT32)
->ArgPair(4 * 1024 * 1024, 64 * 1024 * 1024);
BENCHMARK(BM_Unique_INT32_Repeat)
->UseRealTime()
->ArgPair(32, 1024 * 1024)
->ArgPair(256, 1024 * 1024)
->ArgPair(1024, 1024 * 1024)
@ -192,6 +194,7 @@ BENCHMARK(BM_Unique_INT32_Repeat)
->ArgPair(1024 * 1024, 64 * 1024 * 1024);
BENCHMARK(BM_Unique_STRING)
->UseRealTime()
->Arg(32)
->Arg(256)
->Arg(1024)

View File

@ -28,8 +28,8 @@ namespace {
// Benchmark to simulate the overhead in training and serving workloads from too
// many threads grabbing the ResourceMgr lock at the same time because of the
// variable and queue ops.
void ManyManyVariablesHelper(int threads, int variables, int iters) {
testing::StopTiming();
void ManyManyVariablesHelper(int threads, int variables,
::testing::benchmark::State& state) {
Graph g(OpRegistry::Global());
std::vector<string> targets;
for (int i = 0; i < variables; ++i) {
@ -50,16 +50,16 @@ void ManyManyVariablesHelper(int threads, int variables, int iters) {
Session* sess = NewSession(opts);
TF_CHECK_OK(sess->Create(gd));
TF_CHECK_OK(sess->Run({}, {}, targets, nullptr));
testing::StartTiming();
for (int i = 0; i < iters; ++i) {
for (auto s : state) {
TF_CHECK_OK(sess->Run({}, {}, targets, nullptr));
}
testing::StopTiming();
delete sess;
}
void BM_ManyManyVariablesManyThreads(int iters, int threads) {
ManyManyVariablesHelper(threads, 1000, iters);
void BM_ManyManyVariablesManyThreads(::testing::benchmark::State& state) {
const int threads = state.range(0);
ManyManyVariablesHelper(threads, 1000, state);
}
BENCHMARK(BM_ManyManyVariablesManyThreads)->Arg(50);

View File

@ -33,11 +33,14 @@ static Graph* Xent(int batch_size, int num_classes) {
return g;
}
#define BM_XentDev(BATCH, CLASS, DEVICE) \
static void BM_Xent##_##BATCH##_##CLASS##_##DEVICE(int iters) { \
testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * CLASS); \
test::Benchmark(#DEVICE, Xent(BATCH, CLASS)).Run(iters); \
} \
#define BM_XentDev(BATCH, CLASS, DEVICE) \
static void BM_Xent##_##BATCH##_##CLASS##_##DEVICE( \
::testing::benchmark::State& state) { \
test::Benchmark(#DEVICE, Xent(BATCH, CLASS), /*old_benchmark_api*/ false) \
.Run(state); \
state.SetItemsProcessed(static_cast<int64>(state.iterations()) * BATCH * \
CLASS); \
} \
BENCHMARK(BM_Xent##_##BATCH##_##CLASS##_##DEVICE);
/// The representative tests for ptb_word on GPU