Update tf.DataFormatVecPermute, tf.Mean, tf.StringToHashBucketFast, and tf._FusedBatchNormEx to be generated ops in TensorFlow MLIR ODS (NFC).

- FusedBatchNormEx c++ op has been renamed to _FusedBatchNormEx - DataFormatVecPermute description has been updated to match across TensorFlow MLIR ODS and TensorFlow op registry PiperOrigin-RevId: 327691258 Change-Id: Ic813a0f0d80db770d285f6b32695f4bb3488676b
2020-08-20 13:51:06 -07:00 · 2020-08-20 13:51:06 -07:00 · 0dc35b4d7d
commit 0dc35b4d7d
parent 91e5ad0fad
5 changed files with 166 additions and 129 deletions
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@ -2236,6 +2236,48 @@ the source data format.
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 def TF_DataFormatVecPermuteOp : TF_Op<"DataFormatVecPermute", [NoSideEffect, SameOperandsAndResultType]> {
  let summary = "Permute input tensor from `src_format` to `dst_format`.";
  let description = [{
 Input tensor must be a vector of size 4, or a 4x2 tensor.
 For example, with `src_format` of `NHWC`, `dst_format` of `NCHW`, and inputs:
 ```
 [1, 2, 3, 4]
 ```
 and
 ```
 [[1, 2, 3, 4],
 [5, 6, 7, 8]]
 ```
 , the outputs will be (respectively):
 ```
 [1, 4, 2, 3]
 ```
 and
 ```
 [[1, 4, 2, 3],
 [5, 8, 6, 7]]
 ```
  }];
  let arguments = (ins
    TF_I32OrI64Tensor:$x,
    DefaultValuedAttr<StrAttr, "NHWC">:$src_format,
    DefaultValuedAttr<StrAttr, "NCHW">:$dst_format
  );
  let results = (outs
    TF_I32OrI64Tensor:$y
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  let verifier = [{ return Verify(*this); }];
 }
 def TF_DebugIdentityV2Op : TF_Op<"DebugIdentityV2", []> {
  let summary = "Debug Identity V2 Op.";
@ -6303,6 +6345,38 @@ def TF_MaximumOp : TF_Op<"Maximum", [NoSideEffect, ResultsBroadcastableShape, TF
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 def TF_MeanOp : TF_Op<"Mean", [NoSideEffect, TF_FoldOperandsTransposeInterface]> {
  let summary = "Computes the mean of elements across dimensions of a tensor.";
  let description = [{
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
  }];
  let arguments = (ins
    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$input,
    TF_I32OrI64Tensor:$reduction_indices,
    DefaultValuedAttr<BoolAttr, "false">:$keep_dims
  );
  let results = (outs
    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$output
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
  let extraClassDeclaration = [{
    // TF_FoldOperandsTransposeInterface:
    SmallVector<unsigned, 4> GetLayoutDependentArgs() { return {0}; }
    SmallVector<unsigned, 4> GetLayoutDependentResults() { return {}; }
    LogicalResult FoldOperandsPermutation(ArrayRef<int64_t> permutation);
  }];
 }
 def TF_MergeSummaryOp : TF_Op<"MergeSummary", [NoSideEffect, SameOperandsAndResultType]> {
  let summary = "Merges summaries.";
@ -10466,6 +10540,36 @@ Examples:
  TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>;
 }
 def TF_StringToHashBucketFastOp : TF_Op<"StringToHashBucketFast", [NoSideEffect]> {
  let summary = [{
 Converts each string in the input Tensor to its hash mod by a number of buckets.
  }];
  let description = [{
 The hash function is deterministic on the content of the string within the
 process and will never change. However, it is not suitable for cryptography.
 This function may be used when CPU time is scarce and inputs are trusted or
 unimportant. There is a risk of adversaries constructing inputs that all hash
 to the same bucket. To prevent this problem, use a strong hash function with
 `tf.string_to_hash_bucket_strong`.
 Examples:
 >>> tf.strings.to_hash_bucket_fast(["Hello", "TensorFlow", "2.x"], 3).numpy()
 array([0, 2, 2])
  }];
  let arguments = (ins
    TF_StrTensor:$input,
    Confined<I64Attr, [IntMinValue<1>]>:$num_buckets
  );
  let results = (outs
    I64Tensor:$output
  );
 }
 def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, TF_CwiseBinary, TF_SameOperandsAndResultElementTypeResolveRef]>,
               WithBroadcastableBinOpBuilder {
  let summary = "Returns x - y element-wise.";
@ -12715,6 +12819,43 @@ def TF_ZerosLikeOp : TF_Op<"ZerosLike", [NoSideEffect, SameOperandsAndResultType
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 def TF__FusedBatchNormExOp : TF_Op<"_FusedBatchNormEx", [NoSideEffect]> {
  let summary = "Internal FusedBatchNorm operation: reserved for internal use.";
  let description = [{
 Do not invoke this operator directly in Python. A fusion optimization is
 expected to create these operators.
  }];
  let arguments = (ins
    TensorOf<[F16, F32]>:$x,
    F32Tensor:$scale,
    F32Tensor:$offset,
    F32Tensor:$mean,
    F32Tensor:$variance,
    Variadic<TensorOf<[F16, F32]>>:$side_input,
    DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
    DefaultValuedAttr<F32Attr, "1.0f">:$exponential_avg_factor,
    DefaultValuedAttr<StrAttr, "Identity">:$activation_mode,
    DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
    DefaultValuedAttr<BoolAttr, "true">:$is_training
  );
  let results = (outs
    TensorOf<[F16, F32]>:$y,
    F32Tensor:$batch_mean,
    F32Tensor:$batch_variance,
    F32Tensor:$reserve_space_1,
    F32Tensor:$reserve_space_2,
    F32Tensor:$reserve_space_3
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>;
  TF_DerivedOperandSizeAttr num_side_inputs = TF_DerivedOperandSizeAttr<5>;
 }
 def TF__FusedConv2DOp : TF_Op<"_FusedConv2D", [NoSideEffect]> {
  let summary = [{
 Performs a convolution followed by a specified series of operations.
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
@ -217,30 +217,6 @@ source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 def TF_DataFormatVecPermuteOp : TF_Op<"DataFormatVecPermute", [NoSideEffect, SameOperandsAndResultType]> {
  let summary = "Permute input tensor from `src_format` to `dst_format`";
  let description = [{
 Input tensor must be a vector of size 4, or a 4x2 tensor.
  }];
  let arguments = (ins
    TF_I32OrI64Tensor:$x,
    DefaultValuedAttr<StrAttr, "NHWC">:$src_format,
    DefaultValuedAttr<StrAttr, "NCHW">:$dst_format
  );
  let results = (outs
    TF_I32OrI64Tensor:$y
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  let verifier = [{ return Verify(*this); }];
 }
 def TF_EmptyTensorListOp : TF_TensorListInitOp<"EmptyTensorList"> {
  let summary = "Creates and returns an empty tensor list.";
@ -394,38 +370,6 @@ else_branch: A region that computes the outputs of the op if cond = false.
  }];
 }
 def TF_MeanOp : TF_Op<"Mean", [NoSideEffect, TF_FoldOperandsTransposeInterface]> {
  let summary = "Computes the mean of elements across dimensions of a tensor.";
  let description = [{
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
  }];
  let arguments = (ins
    TF_NumberTensor:$input,
    TF_I32OrI64Tensor:$reduction_indices,
    DefaultValuedAttr<BoolAttr, "false">:$keep_dims
  );
  let results = (outs
    TF_NumberTensor:$output
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
  let extraClassDeclaration = [{
    // TF_FoldOperandsTransposeInterface:
    SmallVector<unsigned, 4> GetLayoutDependentArgs() { return {0}; }
    SmallVector<unsigned, 4> GetLayoutDependentResults() { return {}; }
    LogicalResult FoldOperandsPermutation(ArrayRef<int64_t> permutation);
  }];
 }
 def TF_LegacyCallOp : TF_Op<"LegacyCall",
                            [CallOpInterface, NoSideEffect]> {
  let summary =
@ -884,45 +828,6 @@ Example:
    TF_DerivedOperandOrResultHandleShapeAttr<"resource">;
 }
 // Not generated because it begins with an underscore, which isn't allowed by
 // the C++ standard.
 def TF_FusedBatchNormExOp : TF_Op<"_FusedBatchNormEx", [NoSideEffect]> {
  let summary = "Internal FusedBatchNorm operation: reserved for internal use";
  let description = [{
 Do not invoke this operator directly in Python. A fusion optimization is
 expected to create these operators.
  }];
  let arguments = (ins
    TensorOf<[F16, F32]>:$x,
    F32Tensor:$scale,
    F32Tensor:$offset,
    F32Tensor:$mean,
    F32Tensor:$variance,
    Variadic<TensorOf<[F16, F32]>>:$side_input,
    DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
    DefaultValuedAttr<F32Attr, "1.0f">:$exponential_avg_factor,
    DefaultValuedAttr<StrAttr, "Identity">:$activation_mode,
    DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
    DefaultValuedAttr<BoolAttr, "true">:$is_training
  );
  let results = (outs
    TensorOf<[F16, F32]>:$y,
    F32Tensor:$batch_mean,
    F32Tensor:$batch_variance,
    F32Tensor:$reserve_space_1,
    F32Tensor:$reserve_space_2,
    F32Tensor:$reserve_space_3
  );
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
  TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>;
  TF_DerivedOperandSizeAttr num_side_inputs = TF_DerivedOperandSizeAttr<5>;
 }
 // Multiple variadic operands with different sizes are not supported by the
 // dialect generator, so we manually added the op.
 def TF_SendTPUEmbeddingGradientsOp : TF_Op<"SendTPUEmbeddingGradients", [AttrSizedOperandSegments]> {
@ -1272,36 +1177,6 @@ This function is faster and numerically stabler than `bessel_i1(x)`.
  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 def TF_StringToHashBucketFastOp : TF_Op<"StringToHashBucketFast", [NoSideEffect]> {
  let summary = [{
 Converts each string in the input Tensor to its hash mod by a number of buckets.
  }];
  let description = [{
 The hash function is deterministic on the content of the string within the
 process and will never change. However, it is not suitable for cryptography.
 This function may be used when CPU time is scarce and inputs are trusted or
 unimportant. There is a risk of adversaries constructing inputs that all hash
 to the same bucket. To prevent this problem, use a strong hash function with
 `tf.string_to_hash_bucket_strong`.
 Examples:
 >>> tf.strings.to_hash_bucket_fast(["Hello", "TensorFlow", "2.x"], 3).numpy()
 array([0, 2, 2])
  }];
  let arguments = (ins
    TF_StrTensor:$input,
    Confined<I64Attr, [IntMinValue<1>]>:$num_buckets
  );
  let results = (outs
    I64Tensor:$output
  );
 }
 def TF_TPUPartitionedCallOp : TF_Op<"TPUPartitionedCall", [CallOpInterface]> {
  let summary = "Calls a function placed on a specified TPU device.";
--- a/tensorflow/compiler/mlir/tensorflow/transforms/gpu_fusion.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/gpu_fusion.cc
@ -91,7 +91,7 @@ struct ReluToFusedBatchNorm : public OpRewritePattern<ReluOp> {
    // Build the newly fused operation to replace the batch norm
    OperationState state(batch_norm.getLoc(),
-                         FusedBatchNormExOp::getOperationName());
+                         _FusedBatchNormExOp::getOperationName());
    state.addOperands(batch_norm.getOperands());
    if (side_input) state.operands.push_back(side_input);
    state.addTypes(batch_norm.getResultTypes());
--- a/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DataFormatVecPermute.pbtxt
@ -24,8 +24,27 @@ END
 destination data format.
 END
  }
-  summary: "Returns the permuted vector/tensor in the destination data format given the"
+  summary: "Permute input tensor from `src_format` to `dst_format`."
  description: <<END
-one in the source data format.
+Input tensor must be a vector of size 4, or a 4x2 tensor.
 For example, with `src_format` of `NHWC`, `dst_format` of `NCHW`, and inputs:
 ```
 [1, 2, 3, 4]
 ```
 and
 ```
 [[1, 2, 3, 4],
 [5, 6, 7, 8]]
 ```
 , the outputs will be (respectively):
 ```
 [1, 4, 2, 3]
 ```
 and
 ```
 [[1, 4, 2, 3],
 [5, 8, 6, 7]]
 ```
 END
 }
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@ -252,7 +252,9 @@ REGISTER_OP("_FusedBatchNormEx")
    .Attr("is_training: bool = true")
    .SetShapeFn(shape_inference::FusedBatchNormExShape)
    .Doc(R"doc(
-*NOTE*: Do not invoke this operator directly in Python. Grappler is
+Internal FusedBatchNorm operation: reserved for internal use.
 Do not invoke this operator directly in Python. A fusion optimization is
 expected to create these operators.
 )doc");