Include side effect analysis for clustering.

With side effects, there is the possibility of introducing improper clustering and cyclical dependency between clusters.  This is handled here by closing an outside compiled cluster for additional ops once a side effect with successors is encountered(Note: The ops are traversed in reverse order)

PiperOrigin-RevId: 332300167
Change-Id: Ieecc9cac3bfe056599a29e98bd24d66bf1a66b08
This commit is contained in:
Ken Franko 2020-09-17 13:13:47 -07:00 committed by TensorFlower Gardener
parent d20ce07ed4
commit 69662b03be
3 changed files with 58 additions and 21 deletions

View File

@ -135,6 +135,27 @@ func @two_clusters_with_two_ops_each() {
return return
} }
// CHECK-LABEL: func @resource_side_effect_cycle
func @resource_side_effect_cycle(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<!tf.resource<tensor<f32>>>) {
// CHECK: "tf.ReadVariableOp"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.Identity"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
// CHECK-NEXT: "tf.AssignVariableOp"
// CHECK-NOT: {_xla_outside_compilation = "[[CLUSTER1]]"
"tf_device.cluster"() ( {
%read0 = "tf.ReadVariableOp"(%arg0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
%idet0 = "tf.Identity"(%read0) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
"tf.AssignVariableOp"(%arg1, %idet0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
%read1 = "tf.ReadVariableOp"(%arg1) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
%idet1 = "tf.Identity"(%read1) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
%add0 = "tf.AddV2"(%idet0, %idet1) {_xla_outside_compilation = "0"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
"tf.AssignVariableOp"(%arg0, %add0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
tf_device.return
}) {cluster_attr = "cluster_attr"} : () -> ()
return
}
// CHECK-LABEL: func @two_clusters_with_same_parent // CHECK-LABEL: func @two_clusters_with_same_parent
func @two_clusters_with_same_parent() { func @two_clusters_with_same_parent() {
// CHECK: "tf.opA" // CHECK: "tf.opA"
@ -172,7 +193,7 @@ func @two_clusters_with_same_outside_compiled_parent() {
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13:[a-zA-Z_0-9]+]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opD" // CHECK-NEXT: "tf.opD"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER14:[a-zA-Z_0-9]+]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER14:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opE" // CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.opF" // CHECK-NEXT: "tf.opF"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13]]"
// CHECK-NEXT: "tf.opG" // CHECK-NEXT: "tf.opG"
@ -182,7 +203,7 @@ func @two_clusters_with_same_outside_compiled_parent() {
%b = "tf.opB"(%a) : (tensor<i32>) -> tensor<i32> %b = "tf.opB"(%a) : (tensor<i32>) -> tensor<i32>
%c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32> %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%d = "tf.opD"() {_xla_outside_compilation = "0"} : () -> tensor<i32> %d = "tf.opD"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
%e = "tf.opE"(%d) : (tensor<i32>) -> tensor<i32> %e = "tf.Identity"(%d) : (tensor<i32>) -> tensor<i32>
%f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32> %f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%g = "tf.opG"(%c, %f) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32> %g = "tf.opG"(%c, %f) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
tf_device.return tf_device.return
@ -213,7 +234,8 @@ func @outside_compile_with_block() {
// CHECK-NEXT: "tf.opB" // CHECK-NEXT: "tf.opB"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]"
// CHECK: "tf.opC" // CHECK: "tf.opC"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]" // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER14]]"
// CHECK-SAME: _xla_outside_compilation = "{{[a-zA-Z_0-9]+}}"
"tf_device.cluster"() ( { "tf_device.cluster"() ( {
%a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32> %a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
%b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32> %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
@ -254,16 +276,16 @@ func @check_ops_with_data_dependency_added_as_host_cluster() {
// CHECK: "tf.opA" // CHECK: "tf.opA"
// CHECK-NEXT: "tf.opB" // CHECK-NEXT: "tf.opB"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16:[a-zA-Z_0-9]+]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16:[a-zA-Z_0-9]+]]"
// CHECK-NEXT: "tf.opC" // CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.opD" // CHECK-NEXT: "tf.Identity"
// CHECK-NEXT: "tf.opE" // CHECK-NEXT: "tf.opE"
// CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16]]" // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16]]"
// CHECK-NEXT: "tf.opF" // CHECK-NEXT: "tf.opF"
"tf_device.cluster"() ( { "tf_device.cluster"() ( {
%a = "tf.opA"() : () -> tensor<i32> %a = "tf.opA"() : () -> tensor<i32>
%b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32> %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
%c = "tf.opC"(%b) : (tensor<i32>) -> tensor<i32> %c = "tf.Identity"(%b) : (tensor<i32>) -> tensor<i32>
%d = "tf.opD"(%c) : (tensor<i32>) -> tensor<i32> %d = "tf.Identity"(%c) : (tensor<i32>) -> tensor<i32>
%e = "tf.opE"(%d, %b, %c) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<i32> %e = "tf.opE"(%d, %b, %c) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<i32>
"tf.opF"(%e) : (tensor<i32>) -> () "tf.opF"(%e) : (tensor<i32>) -> ()
tf_device.return tf_device.return

View File

@ -331,7 +331,8 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass();
// Creates a pass that groups outside compiled operations (CPU ops inside TPU // Creates a pass that groups outside compiled operations (CPU ops inside TPU
// cluster) into clusters that can be extracted and run on the CPU. // cluster) into clusters that can be extracted and run on the CPU.
std::unique_ptr<OperationPass<FuncOp>> CreateTPUOutsideCompilationClusterPass(); std::unique_ptr<OperationPass<ModuleOp>>
CreateTPUOutsideCompilationClusterPass();
// Creates a pass that extracts outside compilation (CPU ops inside TPU cluster) // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
// at head/tail of TPU cluster to run before/after TPU computation. // at head/tail of TPU cluster to run before/after TPU computation.

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "mlir/IR/Types.h" // from @llvm-project #include "mlir/IR/Types.h" // from @llvm-project
#include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project
#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project
#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
@ -34,8 +35,10 @@ namespace {
constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation"; constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation";
struct TPUOutsideCompilationCluster struct TPUOutsideCompilationCluster
: public PassWrapper<TPUOutsideCompilationCluster, FunctionPass> { : public TF::PerFunctionAggregateAnalysisConsumerPass<
void runOnFunction() override; TPUOutsideCompilationCluster, TF::SideEffectAnalysis> {
void runOnFunction(FuncOp func,
const TF::SideEffectAnalysis::Info& side_effect_analysis);
}; };
// Represents an outside compiled cluster. All ops that are added to the same // Represents an outside compiled cluster. All ops that are added to the same
@ -47,11 +50,10 @@ class OutsideCompiledCluster {
// Attempts to add an op to this cluster. Ops can be grouped to the same // Attempts to add an op to this cluster. Ops can be grouped to the same
// cluster if they have data dependency and are inside the same block. // cluster if they have data dependency and are inside the same block.
// TODO(kfranko): Ensure that side effecting ops are checked before being bool AddOp(Operation* op,
// grouped to a same cluster. const TF::SideEffectAnalysis::Info& side_effect_analysis) {
bool AddOp(Operation* op) {
// Check if the op is safe to add before adding it. // Check if the op is safe to add before adding it.
if (IsSafeToAdd(op)) { if (IsSafeToAdd(op, side_effect_analysis)) {
op->setAttr(kXlaOutsideCompilationAttr, op->setAttr(kXlaOutsideCompilationAttr,
StringAttr::get(cluster_name_, op->getContext())); StringAttr::get(cluster_name_, op->getContext()));
host_cluster_ops_.insert(op); host_cluster_ops_.insert(op);
@ -62,11 +64,21 @@ class OutsideCompiledCluster {
private: private:
// Checks if it is safe for an op to be merged into this cluster. // Checks if it is safe for an op to be merged into this cluster.
bool IsSafeToAdd(Operation* op) { bool IsSafeToAdd(Operation* op,
const TF::SideEffectAnalysis::Info& side_effect_analysis) {
if (closed_) return false;
// If the op is not marked for outside compilation it doesn't belong in a // If the op is not marked for outside compilation it doesn't belong in a
// cluster. // cluster.
if (!op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr)) if (!op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr)) {
auto successors = side_effect_analysis.DirectControlSuccessors(op);
// If non outside compiled op with side effect successors is encountered,
// close this cluster to additions so that no cluster cyclic dependencies
// can be created.
if (!successors.empty()) {
closed_ = true;
}
return false; return false;
}
if (host_cluster_ops_.empty()) return true; if (host_cluster_ops_.empty()) return true;
@ -90,13 +102,15 @@ class OutsideCompiledCluster {
// cluster. // cluster.
llvm::SmallPtrSet<Operation*, 8> host_cluster_ops_; llvm::SmallPtrSet<Operation*, 8> host_cluster_ops_;
std::string cluster_name_; std::string cluster_name_;
bool closed_ = false; // Cluster is closed to further additions.
}; };
void TPUOutsideCompilationCluster::runOnFunction() { void TPUOutsideCompilationCluster::runOnFunction(
FuncOp func, const TF::SideEffectAnalysis::Info& side_effect_analysis) {
llvm::SmallVector<OutsideCompiledCluster, 8> clusters; llvm::SmallVector<OutsideCompiledCluster, 8> clusters;
int cluster_counter = 0; int cluster_counter = 0;
getFunction().walk([&](tf_device::ClusterOp tpu_cluster) { func.walk([&](tf_device::ClusterOp tpu_cluster) {
llvm::SmallVector<Operation*, 4> tpu_cluster_ops; llvm::SmallVector<Operation*, 4> tpu_cluster_ops;
tpu_cluster_ops.reserve(tpu_cluster.getBody()->getOperations().size()); tpu_cluster_ops.reserve(tpu_cluster.getBody()->getOperations().size());
@ -108,12 +122,12 @@ void TPUOutsideCompilationCluster::runOnFunction() {
// Try to add the op to existing clusters. // Try to add the op to existing clusters.
bool added = false; bool added = false;
for (auto& cluster : clusters) for (auto& cluster : clusters)
if ((added = cluster.AddOp(op))) break; if ((added = cluster.AddOp(op, side_effect_analysis))) break;
// If the op cannot be added to existing clusters, create a new cluster. // If the op cannot be added to existing clusters, create a new cluster.
if (!added) { if (!added) {
OutsideCompiledCluster new_cluster(cluster_counter++); OutsideCompiledCluster new_cluster(cluster_counter++);
new_cluster.AddOp(op); new_cluster.AddOp(op, side_effect_analysis);
clusters.push_back(new_cluster); clusters.push_back(new_cluster);
} }
} }
@ -122,7 +136,7 @@ void TPUOutsideCompilationCluster::runOnFunction() {
} // anonymous namespace } // anonymous namespace
std::unique_ptr<OperationPass<FuncOp>> std::unique_ptr<OperationPass<ModuleOp>>
CreateTPUOutsideCompilationClusterPass() { CreateTPUOutsideCompilationClusterPass() {
return std::make_unique<TPUOutsideCompilationCluster>(); return std::make_unique<TPUOutsideCompilationCluster>();
} }