Move tf.FusedBatchNormV2, tf.FusedBatchNormV3, and tf.SparseMatMul from tf_ops.td to tf_generated_ops.td (NFC).
- tf.FusedBatchNormV2 and tf.FusedBatchNormV3 are now defined separately. - tf.SparseMatMul now matches the op in the TensorFlow op registry (attribute `transpose_a` is set to false by default). PiperOrigin-RevId: 328800784 Change-Id: I012b7a4a02de3b1138534fdbd269cd1d60f09924
This commit is contained in:
parent
d6ab958a9e
commit
be00a076db
@ -3853,6 +3853,95 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors.
|
||||
}];
|
||||
}
|
||||
|
||||
def TF_FusedBatchNormV2Op : TF_Op<"FusedBatchNormV2", [NoSideEffect, TF_FoldOperandsTransposeInterface, TF_LayoutSensitiveInterface]> {
|
||||
let summary = "Batch normalization.";
|
||||
|
||||
let description = [{
|
||||
Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
|
||||
The size of 1D Tensors matches the dimension C of the 4D Tensors.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TensorOf<[BF16, F16, F32]>:$x,
|
||||
F32Tensor:$scale,
|
||||
F32Tensor:$offset,
|
||||
F32Tensor:$mean,
|
||||
F32Tensor:$variance,
|
||||
|
||||
DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
|
||||
DefaultValuedAttr<F32Attr, "1.0f">:$exponential_avg_factor,
|
||||
DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
|
||||
DefaultValuedAttr<BoolAttr, "true">:$is_training
|
||||
);
|
||||
|
||||
let results = (outs
|
||||
TensorOf<[BF16, F16, F32]>:$y,
|
||||
F32Tensor:$batch_mean,
|
||||
F32Tensor:$batch_variance,
|
||||
F32Tensor:$reserve_space_1,
|
||||
F32Tensor:$reserve_space_2
|
||||
);
|
||||
|
||||
TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
|
||||
TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>;
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
// TF_FoldOperandsTransposeInterface:
|
||||
SmallVector<unsigned, 4> GetLayoutDependentArgs() { return {0}; }
|
||||
SmallVector<unsigned, 4> GetLayoutDependentResults() { return {0}; }
|
||||
LogicalResult FoldOperandsPermutation(ArrayRef<int64_t> permutation);
|
||||
|
||||
// TF_LayoutSensitiveInterface:
|
||||
StringRef GetOptimalLayout(const RuntimeDevices& devices);
|
||||
LogicalResult UpdateDataFormat(StringRef data_format);
|
||||
}];
|
||||
}
|
||||
|
||||
def TF_FusedBatchNormV3Op : TF_Op<"FusedBatchNormV3", [NoSideEffect, TF_FoldOperandsTransposeInterface, TF_LayoutSensitiveInterface]> {
|
||||
let summary = "Batch normalization.";
|
||||
|
||||
let description = [{
|
||||
Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
|
||||
The size of 1D Tensors matches the dimension C of the 4D Tensors.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TensorOf<[BF16, F16, F32]>:$x,
|
||||
F32Tensor:$scale,
|
||||
F32Tensor:$offset,
|
||||
F32Tensor:$mean,
|
||||
F32Tensor:$variance,
|
||||
|
||||
DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
|
||||
DefaultValuedAttr<F32Attr, "1.0f">:$exponential_avg_factor,
|
||||
DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
|
||||
DefaultValuedAttr<BoolAttr, "true">:$is_training
|
||||
);
|
||||
|
||||
let results = (outs
|
||||
TensorOf<[BF16, F16, F32]>:$y,
|
||||
F32Tensor:$batch_mean,
|
||||
F32Tensor:$batch_variance,
|
||||
F32Tensor:$reserve_space_1,
|
||||
F32Tensor:$reserve_space_2,
|
||||
F32Tensor:$reserve_space_3
|
||||
);
|
||||
|
||||
TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
|
||||
TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>;
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
// TF_FoldOperandsTransposeInterface:
|
||||
SmallVector<unsigned, 4> GetLayoutDependentArgs() { return {0}; }
|
||||
SmallVector<unsigned, 4> GetLayoutDependentResults() { return {0}; }
|
||||
LogicalResult FoldOperandsPermutation(ArrayRef<int64_t> permutation);
|
||||
|
||||
// TF_LayoutSensitiveInterface:
|
||||
StringRef GetOptimalLayout(const RuntimeDevices& devices);
|
||||
LogicalResult UpdateDataFormat(StringRef data_format);
|
||||
}];
|
||||
}
|
||||
|
||||
def TF_GatherOp : TF_Op<"Gather", [NoSideEffect]> {
|
||||
let summary = "Gather slices from `params` according to `indices`.";
|
||||
|
||||
@ -9834,6 +9923,41 @@ backpropagation,
|
||||
TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>;
|
||||
}
|
||||
|
||||
def TF_SparseMatMulOp : TF_Op<"SparseMatMul", [NoSideEffect]> {
|
||||
let summary = [{
|
||||
Multiply matrix "a" by matrix "b".
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
The inputs must be two-dimensional matrices and the inner dimension of "a" must
|
||||
match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not
|
||||
`SparseTensor`s. This op is optimized for the case where at least one of "a" or
|
||||
"b" is sparse, in the sense that they have a large proportion of zero values.
|
||||
The breakeven for using this versus a dense matrix multiply on one platform was
|
||||
30% zero values in the sparse matrix.
|
||||
|
||||
The gradient computation of this operation will only take advantage of sparsity
|
||||
in the input gradient when that gradient comes from a Relu.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TensorOf<[BF16, F32]>:$a,
|
||||
TensorOf<[BF16, F32]>:$b,
|
||||
|
||||
DefaultValuedAttr<BoolAttr, "false">:$transpose_a,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$transpose_b,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$a_is_sparse,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$b_is_sparse
|
||||
);
|
||||
|
||||
let results = (outs
|
||||
F32Tensor:$product
|
||||
);
|
||||
|
||||
TF_DerivedOperandTypeAttr Ta = TF_DerivedOperandTypeAttr<0>;
|
||||
TF_DerivedOperandTypeAttr Tb = TF_DerivedOperandTypeAttr<1>;
|
||||
}
|
||||
|
||||
def TF_SparseReshapeOp : TF_Op<"SparseReshape", [NoSideEffect]> {
|
||||
let summary = [{
|
||||
Reshapes a SparseTensor to represent values in a new dense shape.
|
||||
|
@ -570,36 +570,6 @@ def TF_PlaceholderWithDefaultOp : TF_Op<"PlaceholderWithDefault", [NoSideEffect]
|
||||
DerivedAttr shape = TF_DerivedResultShapeAttr;
|
||||
}
|
||||
|
||||
def TF_SparseMatMulOp : TF_Op<"SparseMatMul", [NoSideEffect]> {
|
||||
let summary = [{
|
||||
SparseMatMul is MatMul with hints on the sparseness of the matrices.
|
||||
}];
|
||||
|
||||
let description = [{
|
||||
Similar to MatMul, with a_is_sparse and b_is_sparse indicating whether a and b
|
||||
are sparse matrices.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TensorOf<[BF16, F32]>:$a,
|
||||
TensorOf<[BF16, F32]>:$b,
|
||||
|
||||
DefaultValuedAttr<BoolAttr, "true">:$a_is_sparse,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$b_is_sparse,
|
||||
|
||||
DefaultValuedAttr<BoolAttr, "false">:$transpose_a,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$transpose_b
|
||||
);
|
||||
|
||||
let results = (outs
|
||||
TensorOf<[F32]>:$product
|
||||
);
|
||||
|
||||
TF_DerivedOperandTypeAttr Ta = TF_DerivedOperandTypeAttr<0>;
|
||||
TF_DerivedOperandTypeAttr Tb = TF_DerivedOperandTypeAttr<1>;
|
||||
}
|
||||
|
||||
|
||||
def TF_StatefulPartitionedCallOp : TF_Op<"StatefulPartitionedCall",
|
||||
[CallOpInterface]> {
|
||||
let summary =
|
||||
@ -1213,63 +1183,6 @@ def TF_TPUPartitionedCallOp : TF_Op<"TPUPartitionedCall", [CallOpInterface]> {
|
||||
let verifier = [{ return VerifyPartitionedCall(*this); }];
|
||||
}
|
||||
|
||||
class TF_FusedBatchNormOpBase<string Name> : TF_Op<Name, [NoSideEffect, TF_FoldOperandsTransposeInterface, TF_LayoutSensitiveInterface]> {
|
||||
let summary = "Batch normalization.";
|
||||
|
||||
let description = [{
|
||||
Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
|
||||
The size of 1D Tensors matches the dimension C of the 4D Tensors.
|
||||
}];
|
||||
|
||||
let arguments = (ins
|
||||
TensorOf<[BF16, F16, F32]>:$x,
|
||||
F32Tensor:$scale,
|
||||
F32Tensor:$offset,
|
||||
F32Tensor:$mean,
|
||||
F32Tensor:$variance,
|
||||
|
||||
DefaultValuedAttr<F32Attr, "0.0001f">:$epsilon,
|
||||
DefaultValuedAttr<F32Attr, "1.0f">:$exponential_avg_factor,
|
||||
DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format,
|
||||
DefaultValuedAttr<BoolAttr, "true">:$is_training
|
||||
);
|
||||
|
||||
TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
|
||||
TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>;
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
// TF_FoldOperandsTransposeInterface:
|
||||
SmallVector<unsigned, 4> GetLayoutDependentArgs() { return {0}; }
|
||||
SmallVector<unsigned, 4> GetLayoutDependentResults() { return {0}; }
|
||||
LogicalResult FoldOperandsPermutation(ArrayRef<int64_t> permutation);
|
||||
|
||||
// TF_LayoutSensitiveInterface:
|
||||
StringRef GetOptimalLayout(const RuntimeDevices& devices);
|
||||
LogicalResult UpdateDataFormat(StringRef data_format);
|
||||
}];
|
||||
}
|
||||
|
||||
def TF_FusedBatchNormV2Op : TF_FusedBatchNormOpBase<"FusedBatchNormV2"> {
|
||||
let results = (outs
|
||||
TensorOf<[BF16, F16, F32]>:$y,
|
||||
F32Tensor:$batch_mean,
|
||||
F32Tensor:$batch_variance,
|
||||
F32Tensor:$reserve_space_1,
|
||||
F32Tensor:$reserve_space_2
|
||||
);
|
||||
}
|
||||
|
||||
def TF_FusedBatchNormV3Op : TF_FusedBatchNormOpBase<"FusedBatchNormV3"> {
|
||||
let results = (outs
|
||||
TensorOf<[BF16, F16, F32]>:$y,
|
||||
F32Tensor:$batch_mean,
|
||||
F32Tensor:$batch_variance,
|
||||
F32Tensor:$reserve_space_1,
|
||||
F32Tensor:$reserve_space_2,
|
||||
F32Tensor:$reserve_space_3
|
||||
);
|
||||
}
|
||||
|
||||
def TF_BatchFunctionOp : TF_Op<"BatchFunction", [AttrSizedOperandSegments]> {
|
||||
let summary = [{
|
||||
Batches all the inputs tensors to the computation done by the function.
|
||||
|
Loading…
Reference in New Issue
Block a user