Rename _XlaAutoJitScope to _XlaInternalScope.

- Make it clear that the attribute is generated/used internally by auto_jit
and distinguish it from the user-provided _XlaScope.
- Also revise some other naming.
This commit is contained in:
Trent Lo 2019-08-16 10:46:41 -07:00
parent 14bb933f42
commit 230cbd8568
6 changed files with 52 additions and 52 deletions

View File

@ -42,9 +42,9 @@ class ClusterScopingPassImpl {
size_t GetUniqueScopeId() { return unique_scope_id_++; }
void AddScopeToAllPredecessors(Node* start);
void AddScopeToAllTransitivePredecessors(Node* start);
void AddScopeToAllSuccessors(Node* start);
void AddScopeToAllTransitiveSuccessors(Node* start);
private:
Graph* graph_;
@ -52,50 +52,50 @@ class ClusterScopingPassImpl {
size_t unique_scope_id_;
};
absl::optional<string> GetXlaAutoJitScope(Node* node) {
absl::optional<string> GetXlaInternalScope(Node* node) {
string scope;
if (GetNodeAttr(node->attrs(), kXlaAutoJitScopeAttr, &scope).ok()) {
if (GetNodeAttr(node->attrs(), kXlaInternalScopeAttr, &scope).ok()) {
return scope;
}
return absl::nullopt;
}
void SetXlaAutoJitScope(Node* node, StringPiece scope) {
node->AddAttr(kXlaAutoJitScopeAttr, scope);
void SetXlaInternalScope(Node* node, StringPiece scope) {
node->AddAttr(kXlaInternalScopeAttr, scope);
}
// NB! We append a new scope as suffix to the XlaAutoJitScope attribute instead
// of overriding the old value. In this way, we respect the original scopes.
// In other words, appending X to Y creates the conjunction of the scopes X
// and Y (i.e, X & Y in effect).
void AddOrAppendXlaAutoJitScope(Node* node, absl::string_view suffix) {
// NB! We append a new scope as suffix to the _XlaInternalScope attribute
// instead of overriding the old value. In this way, we respect the original
// scopes. In other words, appending X to Y creates the conjunction of the
// scopes X and Y (i.e, X & Y in effect).
void AddOrAppendXlaInternalScope(Node* node, absl::string_view suffix) {
string updated_scope;
absl::optional<string> cur_scope = GetXlaAutoJitScope(node);
absl::optional<string> cur_scope = GetXlaInternalScope(node);
if (cur_scope == absl::nullopt) {
updated_scope = std::string(suffix);
} else {
updated_scope = absl::StrCat(cur_scope.value(), "&", suffix);
}
SetXlaAutoJitScope(node, updated_scope);
SetXlaInternalScope(node, updated_scope);
}
void ClusterScopingPassImpl::AddScopeToAllPredecessors(Node* start) {
void ClusterScopingPassImpl::AddScopeToAllTransitivePredecessors(Node* start) {
const string unique_suffix = absl::StrCat("_", GetUniqueScopeId());
std::vector<Node*> starts;
starts.push_back(start);
auto enter = [&](Node* n) { AddOrAppendXlaAutoJitScope(n, unique_suffix); };
auto enter = [&](Node* n) { AddOrAppendXlaInternalScope(n, unique_suffix); };
ReverseDFSFrom(*graph_, starts, enter, /*leave=*/nullptr,
/*stable_comparator=*/NodeComparatorName());
}
void ClusterScopingPassImpl::AddScopeToAllSuccessors(Node* start) {
void ClusterScopingPassImpl::AddScopeToAllTransitiveSuccessors(Node* start) {
const string unique_suffix = absl::StrCat("_", GetUniqueScopeId());
std::vector<Node*> starts;
starts.push_back(start);
auto enter = [&](Node* n) { AddOrAppendXlaAutoJitScope(n, unique_suffix); };
auto enter = [&](Node* n) { AddOrAppendXlaInternalScope(n, unique_suffix); };
auto not_back_edge = [](const Edge& edge) -> bool {
return !edge.src()->IsNextIteration();
};
@ -104,14 +104,26 @@ void ClusterScopingPassImpl::AddScopeToAllSuccessors(Node* start) {
/*edge_filter=*/not_back_edge);
}
// This preserves the parallelism between pipeline stages. For example, below
// is a typical pattern of input pipelining in Tensorflow and this heuristic
// ensures Node_X and Node_Y are put into different clusters. Without the
// heuristic, they may be put into the same cluster and it can introduce
// artificial dependencies and incur great performance loss. In this example,
// Node_Y becomes dependent on IteratorGetNext and the latencies add up if
// Node_X and Node_Y are in the same cluster.
//
// IteratorGetNext -> Node_X -> Stage
//
// Unstage -> Node_Y
//
Status ClusterScopingPassImpl::ScopingForPipelineStages() {
for (Node* n : graph_->nodes()) {
DCHECK(n);
if (n->type_string() == "Unstage") {
AddScopeToAllSuccessors(n);
AddScopeToAllTransitiveSuccessors(n);
}
if (n->type_string() == "Stage") {
AddScopeToAllPredecessors(n);
AddScopeToAllTransitivePredecessors(n);
}
}
@ -123,18 +135,6 @@ Status ClusterScopingPassImpl::Run() {
return Status::OK();
}
// This preserves the parallelism between pipeline stages. For example,
// below is a typical pattern of input pipelining in Tensorflow and this
// heuristic ensures Node_X and Node_Y are put into different clusters.
// Without the heuristic, they may be put into the same cluster and it
// can introduce artificial dependencies and incur great performance loss.
// In this example, Node_Y becomes dependent on IteratorGetNext and the
// latencies add up if Node_X and Node_Y are in the same cluster.
//
// IteratorGetNext -> Node_X -> Stage
//
// Unstage -> Node_Y
//
return ScopingForPipelineStages();
}
} // namespace

View File

@ -20,7 +20,7 @@ limitations under the License.
namespace tensorflow {
// This pass adds scopes to nodes in the _XlaAutoJitScope attribute to guide
// This pass adds scopes to nodes in the _XlaInternalScope attribute to guide
// the later clustering passes. A major reason to do this is to prevent the
// clustering from losing critical parallelism in the Tensorflow graph, which
// can incur great performance degradation.

View File

@ -49,17 +49,17 @@ Status ClusterScoping(std::unique_ptr<Graph>* graph) {
return pass.Run(opt_options);
}
absl::flat_hash_map<string, string> GetXlaAutoJitScopes(const Graph& graph) {
absl::flat_hash_map<string, string> GetXlaInternalScopes(const Graph& graph) {
absl::flat_hash_map<string, string> scopes;
for (Node* node : graph.nodes()) {
string scope;
if (GetNodeAttr(node->attrs(), kXlaAutoJitScopeAttr, &scope).ok()) {
if (GetNodeAttr(node->attrs(), kXlaInternalScopeAttr, &scope).ok()) {
scopes[node->name()] = scope;
}
}
if (VLOG_IS_ON(2)) {
VLOG(2) << "_XlaScopes:";
VLOG(2) << "_XlaInternalScopes:";
for (const auto& p : scopes) {
VLOG(2) << " " << p.first << " -> " << p.second;
}
@ -120,7 +120,7 @@ TEST(XlaCompilationTest, StagePipelinePreserved) {
TF_ASSERT_OK(ClusterScoping(&graph));
auto scopes = GetXlaAutoJitScopes(*graph);
auto scopes = GetXlaInternalScopes(*graph);
EXPECT_NE(scopes["add0"], scopes["add1"]);
EXPECT_EQ(scopes["add0"], scopes["relu0"]);
EXPECT_EQ(scopes["add1"], scopes["relu1"]);
@ -156,15 +156,15 @@ TEST(XlaCompilationTest, StagePipelinePreservedAndInitialScopesRespected) {
// be separated by the ClusterScopingPass.
Node* add0 =
ops::BinaryOp("Add", a, b, builder.opts().WithName("add0").WithAttr(
kXlaAutoJitScopeAttr, "ClusterA"));
kXlaInternalScopeAttr, "ClusterA"));
Node* add1 = ops::BinaryOp("Add", unstage, b,
builder.opts().WithName("add1").WithAttr(
kXlaAutoJitScopeAttr, "ClusterA"));
kXlaInternalScopeAttr, "ClusterA"));
Node* relu0 =
ops::UnaryOp("Relu", add0, builder.opts().WithName("relu0").WithAttr(
kXlaAutoJitScopeAttr, "ClusterB"));
kXlaInternalScopeAttr, "ClusterB"));
ops::UnaryOp("Relu", add1, builder.opts().WithName("relu1").WithAttr(
kXlaAutoJitScopeAttr, "ClusterD"));
kXlaInternalScopeAttr, "ClusterD"));
BuildStageNode(builder, "stage", {DT_FLOAT}, {relu0});
TF_EXPECT_OK(GraphDefBuilderToGraph(builder, graph.get()));
@ -172,7 +172,7 @@ TEST(XlaCompilationTest, StagePipelinePreservedAndInitialScopesRespected) {
TF_ASSERT_OK(ClusterScoping(&graph));
auto scopes = GetXlaAutoJitScopes(*graph);
auto scopes = GetXlaInternalScopes(*graph);
EXPECT_NE(scopes["add0"], scopes["add1"]);
EXPECT_NE(scopes["add0"], scopes["relu0"]);
EXPECT_NE(scopes["add1"], scopes["relu1"]);

View File

@ -19,11 +19,11 @@ namespace tensorflow {
const char* const kXlaCompileAttr = "_XlaCompile";
// User-provided through jit_scope. Effective only when auto_jit is OFF.
// User-provided through jit_scope APIs. Effective only when auto_jit is OFF.
const char* const kXlaScopeAttr = "_XlaScope";
// Automatically inserted by auto_jit to guide clustering results. Effective
// only when auto_jit is ON.
const char* const kXlaAutoJitScopeAttr = "_XlaAutoJitScope";
const char* const kXlaInternalScopeAttr = "_XlaInternalScope";
} // namespace tensorflow

View File

@ -24,7 +24,7 @@ namespace tensorflow {
// Name of attribute used to tag operators for compilation with XLA
extern const char* const kXlaCompileAttr; // "_XlaCompile"
extern const char* const kXlaScopeAttr; // "_XlaScope"
extern const char* const kXlaAutoJitScopeAttr; // "_XlaAutoJitScope"
extern const char* const kXlaInternalScopeAttr; // "_XlaInternalScope"
} // namespace tensorflow

View File

@ -923,31 +923,31 @@ MarkForCompilationPassImpl::ClusteringWillIntroduceInterDeviceDependency(
}
absl::optional<string> MarkForCompilationPassImpl::GetXlaScope(Node* node) {
// Look for either _XlaScope or _XlaAutoJitScope on both nodes to guide
// Look for either _XlaScope or _XlaInternalScope on both nodes to guide
// clustering. If both nodes have a scope and the scopes do not match, do
// not cluster along this edge. If even one of the nodes lacks a scope
// attribute, then it is treated as a "bridge" and a cluster may be created
// along it.
//
// The difference between _XlaScope and _XlaAutoJitScope is that _XlaScope is
// provided by users through jit_scope APIs, while _XlaAutoJitScope is
// The difference between _XlaScope and _XlaInternalScope is that _XlaScope is
// provided by users through jit_scope APIs, while _XlaInternalScope is
// automatically generated by the ClusterScopingPass when auto_jit is on. As
// such, we respect _kXlaScope only when auto_jit is off, while respecting
// _kXlaAutoJitScope only when auto_jit is on.
// such, we respect _XlaScope only when auto_jit is off, while respecting
// _XlaInternalScope only when auto_jit is on.
//
// We may want to restrict the _XlaScope behavior to require all nodes marked
// with _XlaCompile=true to also have a _XlaScope property set (and raise an
// error otherwise); but for now we don't do this.
if (global_jit_level_ != OptimizerOptions::OFF) {
// If global_jit_level_ is ON, respect only kXlaAutoJitScope.
// If global_jit_level_ is ON, respect only _XlaInternalScope.
const string& scope =
GetNodeAttrString(node->attrs(), kXlaAutoJitScopeAttr);
GetNodeAttrString(node->attrs(), kXlaInternalScopeAttr);
if (!scope.empty()) {
return scope;
}
} else {
// If global_jit_level_ is OFF, respect only kXlaScope.
// If global_jit_level_ is OFF, respect only _XlaScope.
const string& scope = GetNodeAttrString(node->attrs(), kXlaScopeAttr);
if (!scope.empty()) {
return scope;