Rename _XlaAutoJitScope to _XlaInternalScope.
- Make it clear that the attribute is generated/used internally by auto_jit and distinguish it from the user-provided _XlaScope. - Also revise some other naming.
This commit is contained in:
parent
14bb933f42
commit
230cbd8568
@ -42,9 +42,9 @@ class ClusterScopingPassImpl {
|
||||
|
||||
size_t GetUniqueScopeId() { return unique_scope_id_++; }
|
||||
|
||||
void AddScopeToAllPredecessors(Node* start);
|
||||
void AddScopeToAllTransitivePredecessors(Node* start);
|
||||
|
||||
void AddScopeToAllSuccessors(Node* start);
|
||||
void AddScopeToAllTransitiveSuccessors(Node* start);
|
||||
|
||||
private:
|
||||
Graph* graph_;
|
||||
@ -52,50 +52,50 @@ class ClusterScopingPassImpl {
|
||||
size_t unique_scope_id_;
|
||||
};
|
||||
|
||||
absl::optional<string> GetXlaAutoJitScope(Node* node) {
|
||||
absl::optional<string> GetXlaInternalScope(Node* node) {
|
||||
string scope;
|
||||
if (GetNodeAttr(node->attrs(), kXlaAutoJitScopeAttr, &scope).ok()) {
|
||||
if (GetNodeAttr(node->attrs(), kXlaInternalScopeAttr, &scope).ok()) {
|
||||
return scope;
|
||||
}
|
||||
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
void SetXlaAutoJitScope(Node* node, StringPiece scope) {
|
||||
node->AddAttr(kXlaAutoJitScopeAttr, scope);
|
||||
void SetXlaInternalScope(Node* node, StringPiece scope) {
|
||||
node->AddAttr(kXlaInternalScopeAttr, scope);
|
||||
}
|
||||
|
||||
// NB! We append a new scope as suffix to the XlaAutoJitScope attribute instead
|
||||
// of overriding the old value. In this way, we respect the original scopes.
|
||||
// In other words, appending X to Y creates the conjunction of the scopes X
|
||||
// and Y (i.e, X & Y in effect).
|
||||
void AddOrAppendXlaAutoJitScope(Node* node, absl::string_view suffix) {
|
||||
// NB! We append a new scope as suffix to the _XlaInternalScope attribute
|
||||
// instead of overriding the old value. In this way, we respect the original
|
||||
// scopes. In other words, appending X to Y creates the conjunction of the
|
||||
// scopes X and Y (i.e, X & Y in effect).
|
||||
void AddOrAppendXlaInternalScope(Node* node, absl::string_view suffix) {
|
||||
string updated_scope;
|
||||
absl::optional<string> cur_scope = GetXlaAutoJitScope(node);
|
||||
absl::optional<string> cur_scope = GetXlaInternalScope(node);
|
||||
if (cur_scope == absl::nullopt) {
|
||||
updated_scope = std::string(suffix);
|
||||
} else {
|
||||
updated_scope = absl::StrCat(cur_scope.value(), "&", suffix);
|
||||
}
|
||||
SetXlaAutoJitScope(node, updated_scope);
|
||||
SetXlaInternalScope(node, updated_scope);
|
||||
}
|
||||
|
||||
void ClusterScopingPassImpl::AddScopeToAllPredecessors(Node* start) {
|
||||
void ClusterScopingPassImpl::AddScopeToAllTransitivePredecessors(Node* start) {
|
||||
const string unique_suffix = absl::StrCat("_", GetUniqueScopeId());
|
||||
|
||||
std::vector<Node*> starts;
|
||||
starts.push_back(start);
|
||||
auto enter = [&](Node* n) { AddOrAppendXlaAutoJitScope(n, unique_suffix); };
|
||||
auto enter = [&](Node* n) { AddOrAppendXlaInternalScope(n, unique_suffix); };
|
||||
ReverseDFSFrom(*graph_, starts, enter, /*leave=*/nullptr,
|
||||
/*stable_comparator=*/NodeComparatorName());
|
||||
}
|
||||
|
||||
void ClusterScopingPassImpl::AddScopeToAllSuccessors(Node* start) {
|
||||
void ClusterScopingPassImpl::AddScopeToAllTransitiveSuccessors(Node* start) {
|
||||
const string unique_suffix = absl::StrCat("_", GetUniqueScopeId());
|
||||
|
||||
std::vector<Node*> starts;
|
||||
starts.push_back(start);
|
||||
auto enter = [&](Node* n) { AddOrAppendXlaAutoJitScope(n, unique_suffix); };
|
||||
auto enter = [&](Node* n) { AddOrAppendXlaInternalScope(n, unique_suffix); };
|
||||
auto not_back_edge = [](const Edge& edge) -> bool {
|
||||
return !edge.src()->IsNextIteration();
|
||||
};
|
||||
@ -104,14 +104,26 @@ void ClusterScopingPassImpl::AddScopeToAllSuccessors(Node* start) {
|
||||
/*edge_filter=*/not_back_edge);
|
||||
}
|
||||
|
||||
// This preserves the parallelism between pipeline stages. For example, below
|
||||
// is a typical pattern of input pipelining in Tensorflow and this heuristic
|
||||
// ensures Node_X and Node_Y are put into different clusters. Without the
|
||||
// heuristic, they may be put into the same cluster and it can introduce
|
||||
// artificial dependencies and incur great performance loss. In this example,
|
||||
// Node_Y becomes dependent on IteratorGetNext and the latencies add up if
|
||||
// Node_X and Node_Y are in the same cluster.
|
||||
//
|
||||
// IteratorGetNext -> Node_X -> Stage
|
||||
//
|
||||
// Unstage -> Node_Y
|
||||
//
|
||||
Status ClusterScopingPassImpl::ScopingForPipelineStages() {
|
||||
for (Node* n : graph_->nodes()) {
|
||||
DCHECK(n);
|
||||
if (n->type_string() == "Unstage") {
|
||||
AddScopeToAllSuccessors(n);
|
||||
AddScopeToAllTransitiveSuccessors(n);
|
||||
}
|
||||
if (n->type_string() == "Stage") {
|
||||
AddScopeToAllPredecessors(n);
|
||||
AddScopeToAllTransitivePredecessors(n);
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,18 +135,6 @@ Status ClusterScopingPassImpl::Run() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// This preserves the parallelism between pipeline stages. For example,
|
||||
// below is a typical pattern of input pipelining in Tensorflow and this
|
||||
// heuristic ensures Node_X and Node_Y are put into different clusters.
|
||||
// Without the heuristic, they may be put into the same cluster and it
|
||||
// can introduce artificial dependencies and incur great performance loss.
|
||||
// In this example, Node_Y becomes dependent on IteratorGetNext and the
|
||||
// latencies add up if Node_X and Node_Y are in the same cluster.
|
||||
//
|
||||
// IteratorGetNext -> Node_X -> Stage
|
||||
//
|
||||
// Unstage -> Node_Y
|
||||
//
|
||||
return ScopingForPipelineStages();
|
||||
}
|
||||
} // namespace
|
||||
|
@ -20,7 +20,7 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// This pass adds scopes to nodes in the _XlaAutoJitScope attribute to guide
|
||||
// This pass adds scopes to nodes in the _XlaInternalScope attribute to guide
|
||||
// the later clustering passes. A major reason to do this is to prevent the
|
||||
// clustering from losing critical parallelism in the Tensorflow graph, which
|
||||
// can incur great performance degradation.
|
||||
|
@ -49,17 +49,17 @@ Status ClusterScoping(std::unique_ptr<Graph>* graph) {
|
||||
return pass.Run(opt_options);
|
||||
}
|
||||
|
||||
absl::flat_hash_map<string, string> GetXlaAutoJitScopes(const Graph& graph) {
|
||||
absl::flat_hash_map<string, string> GetXlaInternalScopes(const Graph& graph) {
|
||||
absl::flat_hash_map<string, string> scopes;
|
||||
for (Node* node : graph.nodes()) {
|
||||
string scope;
|
||||
if (GetNodeAttr(node->attrs(), kXlaAutoJitScopeAttr, &scope).ok()) {
|
||||
if (GetNodeAttr(node->attrs(), kXlaInternalScopeAttr, &scope).ok()) {
|
||||
scopes[node->name()] = scope;
|
||||
}
|
||||
}
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
VLOG(2) << "_XlaScopes:";
|
||||
VLOG(2) << "_XlaInternalScopes:";
|
||||
for (const auto& p : scopes) {
|
||||
VLOG(2) << " " << p.first << " -> " << p.second;
|
||||
}
|
||||
@ -120,7 +120,7 @@ TEST(XlaCompilationTest, StagePipelinePreserved) {
|
||||
|
||||
TF_ASSERT_OK(ClusterScoping(&graph));
|
||||
|
||||
auto scopes = GetXlaAutoJitScopes(*graph);
|
||||
auto scopes = GetXlaInternalScopes(*graph);
|
||||
EXPECT_NE(scopes["add0"], scopes["add1"]);
|
||||
EXPECT_EQ(scopes["add0"], scopes["relu0"]);
|
||||
EXPECT_EQ(scopes["add1"], scopes["relu1"]);
|
||||
@ -156,15 +156,15 @@ TEST(XlaCompilationTest, StagePipelinePreservedAndInitialScopesRespected) {
|
||||
// be separated by the ClusterScopingPass.
|
||||
Node* add0 =
|
||||
ops::BinaryOp("Add", a, b, builder.opts().WithName("add0").WithAttr(
|
||||
kXlaAutoJitScopeAttr, "ClusterA"));
|
||||
kXlaInternalScopeAttr, "ClusterA"));
|
||||
Node* add1 = ops::BinaryOp("Add", unstage, b,
|
||||
builder.opts().WithName("add1").WithAttr(
|
||||
kXlaAutoJitScopeAttr, "ClusterA"));
|
||||
kXlaInternalScopeAttr, "ClusterA"));
|
||||
Node* relu0 =
|
||||
ops::UnaryOp("Relu", add0, builder.opts().WithName("relu0").WithAttr(
|
||||
kXlaAutoJitScopeAttr, "ClusterB"));
|
||||
kXlaInternalScopeAttr, "ClusterB"));
|
||||
ops::UnaryOp("Relu", add1, builder.opts().WithName("relu1").WithAttr(
|
||||
kXlaAutoJitScopeAttr, "ClusterD"));
|
||||
kXlaInternalScopeAttr, "ClusterD"));
|
||||
BuildStageNode(builder, "stage", {DT_FLOAT}, {relu0});
|
||||
|
||||
TF_EXPECT_OK(GraphDefBuilderToGraph(builder, graph.get()));
|
||||
@ -172,7 +172,7 @@ TEST(XlaCompilationTest, StagePipelinePreservedAndInitialScopesRespected) {
|
||||
|
||||
TF_ASSERT_OK(ClusterScoping(&graph));
|
||||
|
||||
auto scopes = GetXlaAutoJitScopes(*graph);
|
||||
auto scopes = GetXlaInternalScopes(*graph);
|
||||
EXPECT_NE(scopes["add0"], scopes["add1"]);
|
||||
EXPECT_NE(scopes["add0"], scopes["relu0"]);
|
||||
EXPECT_NE(scopes["add1"], scopes["relu1"]);
|
||||
|
@ -19,11 +19,11 @@ namespace tensorflow {
|
||||
|
||||
const char* const kXlaCompileAttr = "_XlaCompile";
|
||||
|
||||
// User-provided through jit_scope. Effective only when auto_jit is OFF.
|
||||
// User-provided through jit_scope APIs. Effective only when auto_jit is OFF.
|
||||
const char* const kXlaScopeAttr = "_XlaScope";
|
||||
|
||||
// Automatically inserted by auto_jit to guide clustering results. Effective
|
||||
// only when auto_jit is ON.
|
||||
const char* const kXlaAutoJitScopeAttr = "_XlaAutoJitScope";
|
||||
const char* const kXlaInternalScopeAttr = "_XlaInternalScope";
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -24,7 +24,7 @@ namespace tensorflow {
|
||||
// Name of attribute used to tag operators for compilation with XLA
|
||||
extern const char* const kXlaCompileAttr; // "_XlaCompile"
|
||||
extern const char* const kXlaScopeAttr; // "_XlaScope"
|
||||
extern const char* const kXlaAutoJitScopeAttr; // "_XlaAutoJitScope"
|
||||
extern const char* const kXlaInternalScopeAttr; // "_XlaInternalScope"
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -923,31 +923,31 @@ MarkForCompilationPassImpl::ClusteringWillIntroduceInterDeviceDependency(
|
||||
}
|
||||
|
||||
absl::optional<string> MarkForCompilationPassImpl::GetXlaScope(Node* node) {
|
||||
// Look for either _XlaScope or _XlaAutoJitScope on both nodes to guide
|
||||
// Look for either _XlaScope or _XlaInternalScope on both nodes to guide
|
||||
// clustering. If both nodes have a scope and the scopes do not match, do
|
||||
// not cluster along this edge. If even one of the nodes lacks a scope
|
||||
// attribute, then it is treated as a "bridge" and a cluster may be created
|
||||
// along it.
|
||||
//
|
||||
// The difference between _XlaScope and _XlaAutoJitScope is that _XlaScope is
|
||||
// provided by users through jit_scope APIs, while _XlaAutoJitScope is
|
||||
// The difference between _XlaScope and _XlaInternalScope is that _XlaScope is
|
||||
// provided by users through jit_scope APIs, while _XlaInternalScope is
|
||||
// automatically generated by the ClusterScopingPass when auto_jit is on. As
|
||||
// such, we respect _kXlaScope only when auto_jit is off, while respecting
|
||||
// _kXlaAutoJitScope only when auto_jit is on.
|
||||
// such, we respect _XlaScope only when auto_jit is off, while respecting
|
||||
// _XlaInternalScope only when auto_jit is on.
|
||||
//
|
||||
// We may want to restrict the _XlaScope behavior to require all nodes marked
|
||||
// with _XlaCompile=true to also have a _XlaScope property set (and raise an
|
||||
// error otherwise); but for now we don't do this.
|
||||
|
||||
if (global_jit_level_ != OptimizerOptions::OFF) {
|
||||
// If global_jit_level_ is ON, respect only kXlaAutoJitScope.
|
||||
// If global_jit_level_ is ON, respect only _XlaInternalScope.
|
||||
const string& scope =
|
||||
GetNodeAttrString(node->attrs(), kXlaAutoJitScopeAttr);
|
||||
GetNodeAttrString(node->attrs(), kXlaInternalScopeAttr);
|
||||
if (!scope.empty()) {
|
||||
return scope;
|
||||
}
|
||||
} else {
|
||||
// If global_jit_level_ is OFF, respect only kXlaScope.
|
||||
// If global_jit_level_ is OFF, respect only _XlaScope.
|
||||
const string& scope = GetNodeAttrString(node->attrs(), kXlaScopeAttr);
|
||||
if (!scope.empty()) {
|
||||
return scope;
|
||||
|
Loading…
Reference in New Issue
Block a user