Include side effect analysis for clustering.

With side effects, there is the possibility of introducing improper clustering and cyclical dependency between clusters.  This is handled here by closing an outside compiled cluster for additional ops once a side effect with successors is encountered(Note: The ops are traversed in reverse order)

PiperOrigin-RevId: 332300167
Change-Id: Ieecc9cac3bfe056599a29e98bd24d66bf1a66b08
This commit is contained in:
Ken Franko 2020-09-17 13:13:47 -07:00 committed by TensorFlower Gardener
parent d20ce07ed4
commit 69662b03be
3 changed files with 58 additions and 21 deletions

View File

@ -135,6 +135,27 @@ func @two_clusters_with_two_ops_each() {
return
}
// CHECK-LABEL: func @resource_side_effect_cycle
func @resource_side_effect_cycle(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<!tf.resource<tensor<f32>>>) {
// CHECK: "tf.ReadVariableOp"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.Identity"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
// CHECK-NEXT: "tf.AssignVariableOp"
// CHECK-NOT: {_xla_outside_compilation = "[[CLUSTER1]]"
"tf_device.cluster"() ( {
%read0 = "tf.ReadVariableOp"(%arg0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
%idet0 = "tf.Identity"(%read0) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
"tf.AssignVariableOp"(%arg1, %idet0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
%read1 = "tf.ReadVariableOp"(%arg1) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
%idet1 = "tf.Identity"(%read1) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
%add0 = "tf.AddV2"(%idet0, %idet1) {_xla_outside_compilation = "0"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tf.AssignVariableOp"(%arg0, %add0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
tf_device.return
}) {cluster_attr = "cluster_attr"} : () -> ()
return
}
// CHECK-LABEL: func @two_clusters_with_same_parent
func @two_clusters_with_same_parent() {
// CHECK: "tf.opA"
@ -172,7 +193,7 @@ func @two_clusters_with_same_outside_compiled_parent() {
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opD"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER14:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opE"
// CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.opF"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13]]"
// CHECK-NEXT: "tf.opG"
@ -182,7 +203,7 @@ func @two_clusters_with_same_outside_compiled_parent() {
%b = "tf.opB"(%a) : (tensor<i32>) -> tensor<i32>
%c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%d = "tf.opD"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
%e = "tf.opE"(%d) : (tensor<i32>) -> tensor<i32>
%e = "tf.Identity"(%d) : (tensor<i32>) -> tensor<i32>
%f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%g = "tf.opG"(%c, %f) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
tf_device.return
@ -213,7 +234,8 @@ func @outside_compile_with_block() {
// CHECK-NEXT: "tf.opB"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]"
// CHECK: "tf.opC"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]"
// CHECK-NOT: _xla_outside_compilation = "[[CLUSTER14]]"
// CHECK-SAME: _xla_outside_compilation = "{{[a-zA-Z_0-9]+}}"
"tf_device.cluster"() ( {
%a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
%b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
@ -254,16 +276,16 @@ func @check_ops_with_data_dependency_added_as_host_cluster() {
// CHECK: "tf.opA"
// CHECK-NEXT: "tf.opB"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opC"
// CHECK-NEXT: "tf.opD"
// CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.opE"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16]]"
// CHECK-NEXT: "tf.opF"
"tf_device.cluster"() ( {
%a = "tf.opA"() : () -> tensor<i32>
%b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%c = "tf.opC"(%b) : (tensor<i32>) -> tensor<i32>
%d = "tf.opD"(%c) : (tensor<i32>) -> tensor<i32>
%c = "tf.Identity"(%b) : (tensor<i32>) -> tensor<i32>
%d = "tf.Identity"(%c) : (tensor<i32>) -> tensor<i32>
%e = "tf.opE"(%d, %b, %c) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<i32>
"tf.opF"(%e) : (tensor<i32>) -> ()
tf_device.return

View File

@ -331,7 +331,8 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass();
// Creates a pass that groups outside compiled operations (CPU ops inside TPU
// cluster) into clusters that can be extracted and run on the CPU.
std::unique_ptr<OperationPass<FuncOp>> CreateTPUOutsideCompilationClusterPass();
std::unique_ptr<OperationPass<ModuleOp>>
CreateTPUOutsideCompilationClusterPass();
// Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
// at head/tail of TPU cluster to run before/after TPU computation.

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "mlir/IR/Types.h" // from @llvm-project
#include "mlir/Support/LLVM.h" // from @llvm-project
#include "mlir/Support/LogicalResult.h" // from @llvm-project
#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@ -34,8 +35,10 @@ namespace {
constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation";
struct TPUOutsideCompilationCluster
: public PassWrapper<TPUOutsideCompilationCluster, FunctionPass> {
void runOnFunction() override;
: public TF::PerFunctionAggregateAnalysisConsumerPass<
TPUOutsideCompilationCluster, TF::SideEffectAnalysis> {
void runOnFunction(FuncOp func,
const TF::SideEffectAnalysis::Info& side_effect_analysis);
};
// Represents an outside compiled cluster. All ops that are added to the same
@ -47,11 +50,10 @@ class OutsideCompiledCluster {
// Attempts to add an op to this cluster. Ops can be grouped to the same
// cluster if they have data dependency and are inside the same block.
// TODO(kfranko): Ensure that side effecting ops are checked before being
// grouped to a same cluster.
bool AddOp(Operation* op) {
bool AddOp(Operation* op,
const TF::SideEffectAnalysis::Info& side_effect_analysis) {
// Check if the op is safe to add before adding it.
if (IsSafeToAdd(op)) {
if (IsSafeToAdd(op, side_effect_analysis)) {
op->setAttr(kXlaOutsideCompilationAttr,
StringAttr::get(cluster_name_, op->getContext()));
host_cluster_ops_.insert(op);
@ -62,11 +64,21 @@ class OutsideCompiledCluster {
private:
// Checks if it is safe for an op to be merged into this cluster.
bool IsSafeToAdd(Operation* op) {
bool IsSafeToAdd(Operation* op,
const TF::SideEffectAnalysis::Info& side_effect_analysis) {
if (closed_) return false;
// If the op is not marked for outside compilation it doesn't belong in a
// cluster.
if (!op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr))
if (!op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr)) {
auto successors = side_effect_analysis.DirectControlSuccessors(op);
// If non outside compiled op with side effect successors is encountered,
// close this cluster to additions so that no cluster cyclic dependencies
// can be created.
if (!successors.empty()) {
closed_ = true;
}
return false;
}
if (host_cluster_ops_.empty()) return true;
@ -90,13 +102,15 @@ class OutsideCompiledCluster {
// cluster.
llvm::SmallPtrSet<Operation*, 8> host_cluster_ops_;
std::string cluster_name_;
bool closed_ = false; // Cluster is closed to further additions.
};
void TPUOutsideCompilationCluster::runOnFunction() {
void TPUOutsideCompilationCluster::runOnFunction(
FuncOp func, const TF::SideEffectAnalysis::Info& side_effect_analysis) {
llvm::SmallVector<OutsideCompiledCluster, 8> clusters;
int cluster_counter = 0;
getFunction().walk([&](tf_device::ClusterOp tpu_cluster) {
func.walk([&](tf_device::ClusterOp tpu_cluster) {
llvm::SmallVector<Operation*, 4> tpu_cluster_ops;
tpu_cluster_ops.reserve(tpu_cluster.getBody()->getOperations().size());
@ -108,12 +122,12 @@ void TPUOutsideCompilationCluster::runOnFunction() {
// Try to add the op to existing clusters.
bool added = false;
for (auto& cluster : clusters)
if ((added = cluster.AddOp(op))) break;
if ((added = cluster.AddOp(op, side_effect_analysis))) break;
// If the op cannot be added to existing clusters, create a new cluster.
if (!added) {
OutsideCompiledCluster new_cluster(cluster_counter++);
new_cluster.AddOp(op);
new_cluster.AddOp(op, side_effect_analysis);
clusters.push_back(new_cluster);
}
}
@ -122,7 +136,7 @@ void TPUOutsideCompilationCluster::runOnFunction() {
} // anonymous namespace
std::unique_ptr<OperationPass<FuncOp>>
std::unique_ptr<OperationPass<ModuleOp>>
CreateTPUOutsideCompilationClusterPass() {
return std::make_unique<TPUOutsideCompilationCluster>();
}