Open source auto-clustering integration tests

PiperOrigin-RevId: 250616023
This commit is contained in:
Sanjoy Das 2019-05-29 19:48:02 -07:00 committed by TensorFlower Gardener
parent 6959f065f5
commit cd4872bd01
8 changed files with 131384 additions and 0 deletions

View File

@ -0,0 +1,45 @@
licenses(["notice"]) # Apache 2.0
package(default_visibility = ["//visibility:private"])
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
cc_library(
name = "auto_clustering_test_helper",
testonly = True,
srcs = ["auto_clustering_test_helper.cc"],
hdrs = ["auto_clustering_test_helper.h"],
visibility = ["//visibility:public"],
deps = [
"//tensorflow/compiler/jit:compilation_passes",
"//tensorflow/compiler/jit:jit_compilation_passes",
"//tensorflow/compiler/jit:xla_cluster_util",
"//tensorflow/compiler/jit:xla_cpu_jit",
"//tensorflow/compiler/jit:xla_gpu_jit",
"//tensorflow/compiler/xla:status_macros",
"//tensorflow/compiler/xla:statusor",
"//tensorflow/core:framework",
"//tensorflow/core:graph",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/tools/optimization:optimization_pass_runner_lib",
"@com_google_absl//absl/strings",
],
)
tf_cc_test(
name = "auto_clustering_test",
srcs = ["auto_clustering_test.cc"],
data = [
"keras_imagenet_main.golden_summary",
"keras_imagenet_main.pbtxt",
"keras_imagenet_main_graph_mode.golden_summary",
"keras_imagenet_main_graph_mode.pbtxt",
],
tags = ["config-cuda-only"],
deps = [
":auto_clustering_test_helper",
"//tensorflow/core:test",
"@com_google_absl//absl/strings",
],
)

View File

@ -0,0 +1,69 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "absl/strings/str_cat.h"
#include "tensorflow/compiler/jit/tests/auto_clustering_test_helper.h"
#include "tensorflow/core/lib/core/status_test_util.h"
namespace tensorflow {
namespace {
class AutoClusteringTestImpl : public AutoClusteringTest {
protected:
Status RunAutoClusteringTest(absl::string_view key) {
string file_name_without_extension =
absl::StrCat(testing::TensorFlowSrcRoot(), "/compiler/jit/tests/", key);
return AutoClusteringTest::RunAutoClusteringTest(
absl::StrCat(file_name_without_extension, ".pbtxt"),
absl::StrCat(file_name_without_extension, ".golden_summary"));
}
};
Status BenchmarkHelper(absl::string_view key, benchmark::State& state) {
return BenchmarkMarkForCompilation(
absl::StrCat(testing::TensorFlowSrcRoot(), "/compiler/jit/tests/", key,
".pbtxt"),
state);
}
TEST_F(AutoClusteringTestImpl, KerasImagenetMain) {
// Generated from
//
// bazel run -c opt --config=cuda \
// tensorflow_models/official/resnet/keras:keras_imagenet_main \
// -- --skip_eval --num_gpus=1 --dtype=fp16 --batch_size=192 \
// --train_steps=210 --enable_xla --enable_eager=true
//
// At CL 245846452
TF_ASSERT_OK(RunAutoClusteringTest("keras_imagenet_main"));
}
TEST_F(AutoClusteringTestImpl, KerasImagenetMainGraphMode) {
// Generated from
//
// bazel run -c opt --config=cuda \
// tensorflow_models/official/resnet/keras:keras_imagenet_main \
// -- --use_synthetic_data --num_gpus=1 --batch_size=117 --train_steps=600 \
// --skip_eval=True --logtostderr --enable_xla
TF_ASSERT_OK(RunAutoClusteringTest("keras_imagenet_main_graph_mode"));
}
void BM_MarkForCompilationPass_KerasImagenetMain(benchmark::State& state) {
TF_CHECK_OK(BenchmarkHelper("keras_imagenet_main", state));
}
BENCHMARK(BM_MarkForCompilationPass_KerasImagenetMain);
} // namespace
} // namespace tensorflow

View File

@ -0,0 +1,184 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/jit/tests/auto_clustering_test_helper.h"
#include "absl/strings/numbers.h"
#include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
#include "tensorflow/compiler/jit/xla_cluster_util.h"
#include "tensorflow/compiler/xla/status_macros.h"
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/core/graph/graph_constructor.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/util/port.h"
#include "tensorflow/tools/optimization/optimization_pass_runner.h"
namespace tensorflow {
namespace {
xla::StatusOr<string> SummarizeClustering(
const GraphDef& auto_clustered_graph_def) {
testing::ResetClusterSequenceNumber();
Graph graph(OpRegistry::Global());
GraphConstructorOptions graph_opts;
graph_opts.expect_device_spec = true;
graph_opts.allow_internal_ops = true;
TF_RETURN_IF_ERROR(
ConvertGraphDefToGraph(graph_opts, auto_clustered_graph_def, &graph));
// cluster_id -> (operation name -> # of operations)
const int kNoCluster = -1;
std::map<int, std::map<string, int>> clusters;
std::map<int, int> cluster_size;
int clustered_nodes = 0;
for (Node* n : graph.op_nodes()) {
int cluster = kNoCluster;
if (absl::optional<absl::string_view> maybe_cluster =
GetXlaClusterForNode(*n)) {
maybe_cluster->remove_prefix(absl::string_view("cluster_").size());
TF_RET_CHECK(absl::SimpleAtoi(*maybe_cluster, &cluster));
clustered_nodes++;
}
clusters[cluster][n->type_string()]++;
cluster_size[cluster]++;
}
string result =
absl::StrCat("Clustered nodes: ", clustered_nodes,
"\nUnclustered nodes: ", cluster_size[kNoCluster],
"\nNumber of clusters: ", clusters.size() - 1, "\n\n");
for (const auto& pair : clusters) {
if (pair.first == kNoCluster) {
absl::StrAppend(&result, "unclustered");
} else {
absl::StrAppend(&result, "cluster ", pair.first);
}
absl::StrAppend(&result, " size ", cluster_size[pair.first], "\n");
for (const auto& ops_and_counts : pair.second) {
absl::StrAppend(&result, " ", ops_and_counts.first, " ",
ops_and_counts.second, "\n");
}
}
return result;
}
Status AssertGraphDefIsUnclustered(const GraphDef& graphdef) {
const char* kXlaClusterAttr = "_XlaCluster";
const char* kXlaAlreadyClusteredAttr = "_XlaAlreadyClustered";
for (const NodeDef& node : graphdef.node()) {
if (node.attr().count(kXlaClusterAttr) ||
node.attr().count(kXlaAlreadyClusteredAttr)) {
return errors::InvalidArgument(
"Input files are already clustered, you probably copied in "
"mark_for_compilation_<n>.pbtxt when you should have copied in "
"before_mark_for_compilation_<n>.pbtxt");
}
}
return Status::OK();
}
} // namespace
Status AutoClusteringTest::RunAutoClusteringTest(
absl::string_view graph_def_file_path,
absl::string_view golden_summary_file_path) {
if (!IsGoogleCudaEnabled()) {
// There is some slight change in the clustering decisions under
// --config=cuda. I have not looked closely at why that is happening, but
// most likely some of the partial declustering passes behave differently
// with --config=cuda because of different HostMemory. So for now only test
// the non-CUDA config, under the assumption that regressions with
// --config=cuda would also be detected as regressions without
// --config=cuda.
LOG(INFO) << "Not running "
<< ::testing::UnitTest::GetInstance()->current_test_info()->name()
<< " since test was not built with --config=cuda";
return Status::OK();
}
GraphDef graphdef;
TF_RETURN_IF_ERROR(
ReadTextProto(Env::Default(), string(graph_def_file_path), &graphdef));
TF_RETURN_IF_ERROR(AssertGraphDefIsUnclustered(graphdef));
OptimizationPassRunner runner;
TF_RETURN_IF_ERROR(
runner.SetJitLevel(tensorflow::OptimizerOptions::GlobalJitLevel::
OptimizerOptions_GlobalJitLevel_ON_2));
TF_RETURN_IF_ERROR(runner.AddCpus(32));
TF_RETURN_IF_ERROR(runner.AddGpus(8));
for (absl::string_view auto_clustering_pass :
{"CloneConstantsForBetterClusteringPass", "MarkForCompilationPass",
"IncreaseDynamismForAutoJitPass", "PartiallyDeclusterPass"}) {
GraphDef next;
TF_RETURN_IF_ERROR(
runner.Run(auto_clustering_pass, std::move(graphdef), &next));
graphdef = std::move(next);
}
TF_ASSIGN_OR_RETURN(string clustering_summary, SummarizeClustering(graphdef));
// To update golden files flip this to true and run
//
// bazel test --test_strategy=local \
// tensorflow/compiler/jit/tests:auto_clustering_test
bool update_golden = false;
if (update_golden) {
TF_RETURN_IF_ERROR(WriteStringToFile(
Env::Default(), string(golden_summary_file_path), clustering_summary));
}
string golden_file_contents;
TF_RETURN_IF_ERROR(ReadFileToString(
Env::Default(), string(golden_summary_file_path), &golden_file_contents));
EXPECT_EQ(golden_file_contents, clustering_summary);
return Status::OK();
}
Status BenchmarkMarkForCompilation(absl::string_view graph_def_path,
benchmark::State& state) {
GraphDef graph_def;
TF_RETURN_IF_ERROR(
ReadTextProto(Env::Default(), string(graph_def_path), &graph_def));
OptimizationPassRunner runner;
TF_RETURN_IF_ERROR(
runner.SetJitLevel(tensorflow::OptimizerOptions::GlobalJitLevel::
OptimizerOptions_GlobalJitLevel_ON_2));
TF_RETURN_IF_ERROR(runner.AddCpus(32));
TF_RETURN_IF_ERROR(runner.AddGpus(8));
for (auto _ : state) {
StopBenchmarkTiming();
GraphDef result;
GraphDef graph_def_copy = graph_def;
StartBenchmarkTiming();
TF_RETURN_IF_ERROR(runner.Run("MarkForCompilationPass",
std::move(graph_def_copy), &result));
}
return Status::OK();
}
} // namespace tensorflow

View File

@ -0,0 +1,59 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_JIT_TESTS_AUTO_CLUSTERING_TEST_HELPER_H_
#define TENSORFLOW_COMPILER_JIT_TESTS_AUTO_CLUSTERING_TEST_HELPER_H_
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"
namespace tensorflow {
// Helper to write integration tests and benchmarks for the auto-clustering pass
// pipeline. These tests run auto-clustering on a graphdef and compare a
// summary of the auto-clustering decisions with a "golden" summary.
//
// To create a new test from an TF workload first run the workload with the
// following environment variables set:
//
// TF_DUMP_GRAPH_PREFIX=<some temporary directory>
// TF_XLA_FLAGS="--tf_xla_clustering_debug"
//
// If auto-clustering is enabled this should produce files named
// before_mark_for_compilation_<N>.pbtxt in the temporary directory. As the
// file name suggests, these are graphdefs that have been dumped right before
// the mark_for_compilation pass. There should be one
// before_mark_for_compilation_<N>.pbtxt for every TF graph that was
// auto-clustered, out of which usually only one is the "main" graph that's
// running training/inference.
//
// Copy the pbtxt for that "main" graph to tensorflow/compiler/jit/tests/
// (i.e. this directory) and create a corresponding empty .golden_summary file.
// Add the .pbtxt and .golden_summary files to the "data" section of the cc_test
// rule for :auto_clustering_test and then see the comment on update_golden on
// how to auto-generate the .golden_summary file.
class AutoClusteringTest : public ::testing::Test {
protected:
Status RunAutoClusteringTest(absl::string_view graph_def_file_path,
absl::string_view golden_summary_file_path);
};
// Reads the GraphDef stored in graph_def_path (which must be a pbtxt file) and
// benchmarks MarkForCompilationPass on this graphdef.
Status BenchmarkMarkForCompilation(absl::string_view graph_def_path,
benchmark::State& state);
} // namespace tensorflow
#endif // TENSORFLOW_COMPILER_JIT_TESTS_AUTO_CLUSTERING_TEST_HELPER_H_

View File

@ -0,0 +1,60 @@
Clustered nodes: 2236
Unclustered nodes: 618
Number of clusters: 2
unclustered size 618
AssignAddVariableOp 1
Const 120
DivNoNan 1
Identity 2
Merge 53
PlaceholderWithDefault 1
ReadVariableOp 2
Switch 1
_Arg 435
_Retval 2
cluster 0 size 1637
Add 16
AddN 71
ArgMax 1
AssignAddVariableOp 1
BiasAdd 1
BiasAddGrad 1
Cast 115
Const 134
Conv2D 53
Conv2DBackpropFilter 53
Conv2DBackpropInput 52
Equal 1
FusedBatchNormGradV2 53
FusedBatchNormV2 53
MatMul 3
MaxPool 1
MaxPoolGrad 1
Mean 1
Mul 218
Pad 2
ReadVariableOp 538
Relu 49
ReluGrad 49
Reshape 2
ResourceApplyKerasMomentum 161
Slice 1
Softmax 1
SparseSoftmaxCrossEntropyWithLogits 1
Squeeze 1
Sum 1
Tile 1
Transpose 1
cluster 1 size 599
AddN 1
AssignAddVariableOp 1
AssignSubVariableOp 106
Const 4
DivNoNan 1
Identity 1
Mul 161
ReadVariableOp 106
Square 55
Sub 106
Sum 57

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
Clustered nodes: 1908
Unclustered nodes: 452
Number of clusters: 2
unclustered size 452
Const 11
DivNoNan 1
Identity 1
ReadVariableOp 2
VarHandleOp 435
_Retval 2
cluster 0 size 546
Add 1
AddN 1
AssignAddVariableOp 2
AssignSubVariableOp 106
Const 4
DivNoNan 1
Mul 107
NoOp 1
ReadVariableOp 106
Square 55
Sub 106
Sum 56
cluster 1 size 1362
Add 16
AddN 71
ArgMax 1
AssignAddVariableOp 1
BiasAdd 1
BiasAddGrad 1
Cast 3
Const 80
Conv2D 53
Conv2DBackpropFilter 53
Conv2DBackpropInput 52
Equal 1
FusedBatchNorm 53
FusedBatchNormGrad 53
Identity 2
MatMul 3
MaxPool 1
MaxPoolGrad 1
Mean 1
Mul 57
Pad 1
ReadVariableOp 540
Relu 49
ReluGrad 49
Reshape 2
ResourceApplyKerasMomentum 161
ShapeN 50
Softmax 1
SparseSoftmaxCrossEntropyWithLogits 1
Squeeze 1
Sum 1
Tile 1
Transpose 1

File diff suppressed because it is too large Load Diff