Rollback of rollback of "Extend basic binary operations to more integral types"

Rollback of rollback of "Extend basic binary operations to more integral types" Fix failing tests. PiperOrigin-RevId: 354331434 Change-Id: I172aed649e4a3682659157377a79846d8f2e3a9d
2021-01-28 09:45:33 -08:00 · 2021-01-28 09:45:33 -08:00 · 6e1b85454d
commit 6e1b85454d
parent 77e6a080e5
20 changed files with 103 additions and 68 deletions
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@ -8080,12 +8080,12 @@ def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShap
  }];

  let arguments = (ins
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint8]>:$x,
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint8]>:$y
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint64, TF_Uint32, TF_Uint16, TF_Uint8]>:$x,
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint64, TF_Uint32, TF_Uint16, TF_Uint8]>:$y
  );

  let results = (outs
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint8]>:$z
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint64, TF_Uint32, TF_Uint16, TF_Uint8]>:$z
  );

  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
@ -9042,12 +9042,12 @@ tf.pow(x, y) ==> [[256, 65536], [9, 27]]
  }];

  let arguments = (ins
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int32, TF_Int64]>:$x,
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int32, TF_Int64]>:$y
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int8, TF_Int16, TF_Int32, TF_Int64]>:$x,
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int8, TF_Int16, TF_Int32, TF_Int64]>:$y
  );

  let results = (outs
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int32, TF_Int64]>:$z
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int8, TF_Int16, TF_Int32, TF_Int64]>:$z
  );

  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
@ -14355,12 +14355,12 @@ def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, TF_CwiseBi
  }];

  let arguments = (ins
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint32, TF_Uint8]>:$x,
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint32, TF_Uint8]>:$y
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint64, TF_Uint32, TF_Uint8]>:$x,
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint64, TF_Uint32, TF_Uint8]>:$y
  );

  let results = (outs
-    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint32, TF_Uint8]>:$z
+    TensorOf<[TF_Bfloat16, TF_Complex128, TF_Complex64, TF_Float16, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Uint16, TF_Uint64, TF_Uint32, TF_Uint8]>:$z
  );

  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
@ -1381,12 +1381,12 @@ def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastable
  }];

  let arguments = (ins
-    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint32]>:$x,
-    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint32]>:$y
+    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint16, TF_Uint32, TF_Uint64]>:$x,
+    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint16, TF_Uint32, TF_Uint64]>:$y
  );

  let results = (outs
-    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint32]>:$z
+    TensorOf<[TF_Float, TF_SInt, TF_Complex, TF_Uint8, TF_Uint16, TF_Uint32, TF_Uint64]>:$z
  );

  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
--- a/tensorflow/core/kernels/cwise_op_add_2.cc
+++ b/tensorflow/core/kernels/cwise_op_add_2.cc
@ -26,20 +26,20 @@ REGISTER6(BinaryOp, CPU, "Add", functor::add, int8, int16, complex64, uint8,
          complex128, tstring);
 // Notice: String is excluded to allow marking AddV2 is_commutative and
 // is_aggregate.
-REGISTER6(BinaryOp, CPU, "AddV2", functor::add, int8, int16, uint32, complex64,
-          uint8, complex128);
+REGISTER8(BinaryOp, CPU, "AddV2", functor::add, int8, int16, complex64, uint8,
+          uint16, uint32, uint64, complex128);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-REGISTER4(BinaryOp, GPU, "Add", functor::add, uint8, int64, complex64,
-          complex128);
+REGISTER6(BinaryOp, GPU, "Add", functor::add, uint8, uint16, uint64, int64,
+          complex64, complex128);

 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER5(BinaryOp, GPU, "AddV2", functor::add, uint8, uint32, int64, complex64,
-          complex128);
+REGISTER7(BinaryOp, GPU, "AddV2", functor::add, uint8, uint16, uint32, uint64,
+          int64, complex64, complex128);
 #else
 // There is an MLIR generated kernel for int64
-REGISTER4(BinaryOp, GPU, "AddV2", functor::add, uint8, uint32, complex64,
-          complex128);
+REGISTER6(BinaryOp, GPU, "AddV2", functor::add, uint8, uint16, uint32, uint64,
+          complex64, complex128);
 #endif

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_add.cu.cc
@ -19,8 +19,8 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY8(add, Eigen::half, float, double, uint8, uint32, int64, complex64,
-               complex128);
+DEFINE_BINARY10(add, Eigen::half, float, double, uint8, uint16, uint32, uint64,
+                int64, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_greater.cu.cc
@ -19,7 +19,8 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY7(greater, Eigen::half, float, double, int64, uint8, int8, int16);
+DEFINE_BINARY10(greater, Eigen::half, float, double, int64, uint8, uint16,
+                uint32, uint64, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_greater_equal.cu.cc
@ -19,8 +19,8 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY7(greater_equal, Eigen::half, float, double, int64, uint8, int8,
-               int16);
+DEFINE_BINARY10(greater_equal, Eigen::half, float, double, int64, uint8, uint16,
+                uint32, uint64, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_less.cu.cc
@ -19,7 +19,8 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY7(less, Eigen::half, float, double, int64, uint8, int8, int16);
+DEFINE_BINARY10(less, Eigen::half, float, double, int64, uint8, uint16, uint32,
+                uint64, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_less_equal.cu.cc
@ -19,8 +19,8 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY7(less_equal, Eigen::half, float, double, int64, uint8, int8,
-               int16);
+DEFINE_BINARY10(less_equal, Eigen::half, float, double, int64, uint8, uint16,
+                uint32, uint64, int8, int16);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_mul.cu.cc
@ -19,8 +19,9 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY11(mul, Eigen::half, float, double, uint8, int8, uint16, int16,
-                int32, int64, complex64, complex128);
+DEFINE_BINARY11(mul, Eigen::half, float, double, uint8, uint32, int8, uint16,
+                int16, int32, int64, complex64);
+DEFINE_BINARY2(mul, uint64, complex128);
 DEFINE_BINARY5(mul_no_nan, Eigen::half, float, double, complex64, complex128);
 }  // namespace functor
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_sub.cu.cc
@ -19,8 +19,9 @@ limitations under the License.

 namespace tensorflow {
 namespace functor {
-DEFINE_BINARY7(sub, Eigen::half, float, double, int64, complex64, complex128,
-               uint32);
+DEFINE_BINARY9(sub, Eigen::half, float, double, int64, complex64, complex128,
+               uint8, uint16, uint32);
+DEFINE_BINARY1(sub, uint64);
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/cwise_op_greater.cc
+++ b/tensorflow/core/kernels/cwise_op_greater.cc
@ -17,15 +17,19 @@ limitations under the License.

 namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half,
-          double, int32, int64, uint8, int8, int16, bfloat16);
+          double, int32, int64, uint8, uint16, uint32, uint64);
+REGISTER3(BinaryOp, CPU, "Greater", functor::greater, int8, int16, bfloat16);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half,
-          double, int64, uint8, int8, int16);
+REGISTER6(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half,
+          double, int8, int16, int64);
+REGISTER4(BinaryOp, GPU, "Greater", functor::greater, uint8, uint16, uint32,
+          uint64);
 #else
 // TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
-REGISTER(BinaryOp, GPU, "Greater", functor::greater, uint8);
+REGISTER(BinaryOp, GPU, "Greater", functor::greater, uint8, uint16, uint32,
+         uint64);
 #endif

 // A special GPU kernel for int32.
--- a/tensorflow/core/kernels/cwise_op_greater_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc
@ -17,15 +17,19 @@ limitations under the License.

 namespace tensorflow {
 REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float,
-          Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16);
+          Eigen::half, double, int32, int64, uint8, uint16, uint32, uint64);
+REGISTER3(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, int8, int16,
+          bfloat16);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float,
-          Eigen::half, double, int64, uint8, int8, int16);
+REGISTER9(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float,
+          Eigen::half, double, int64, uint8, uint16, uint32, uint64, int8);
+REGISTER(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, int16);
 #else
 // TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
-REGISTER(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, uint8);
+REGISTER4(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, uint8, uint16,
+          uint32, uint64);
 #endif

 // A special GPU kernel for int32.
--- a/tensorflow/core/kernels/cwise_op_less.cc
+++ b/tensorflow/core/kernels/cwise_op_less.cc
@ -18,16 +18,18 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double,
          bfloat16, int32);
-REGISTER4(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16);
+REGISTER7(BinaryOp, CPU, "Less", functor::less, uint8, uint16, uint32, uint64,
+          int8, int16, int64);

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double,
-          int64, uint8, int8, int16);
+REGISTER9(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double,
+          int64, uint8, uint16, uint32, uint64, int8);
+REGISTER(BinaryOp, GPU, "Less", functor::less, int16);
 #else
 // TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
-REGISTER(BinaryOp, GPU, "Less", functor::less, uint8);
+REGISTER4(BinaryOp, GPU, "Less", functor::less, uint8, uint16, uint32, uint64);
 #endif

 // A special GPU kernel for int32.
--- a/tensorflow/core/kernels/cwise_op_less_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_less_equal.cc
@ -18,17 +18,19 @@ limitations under the License.
 namespace tensorflow {
 REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half,
          bfloat16, double, int32);
-REGISTER4(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8,
-          int16);
+REGISTER7(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, uint16,
+          uint32, uint64, int8, int16);

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half,
-          double, int64, uint8, int8, int16);
+REGISTER9(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half,
+          double, int64, uint8, uint16, uint32, uint64, int8);
+REGISTER(BinaryOp, GPU, "LessEqual", functor::less_equal, int16);
 #else
 // TODO(b/172804967): We do not generate unsigned kernels for GPU via mlir.
-REGISTER(BinaryOp, GPU, "LessEqual", functor::less_equal, uint8);
+REGISTER4(BinaryOp, GPU, "LessEqual", functor::less_equal, uint8, uint16,
+          uint32, uint64);
 #endif

 // A special GPU kernel for int32.
--- a/tensorflow/core/kernels/cwise_op_mul_2.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_2.cc
@ -22,15 +22,16 @@ namespace tensorflow {
 // sharded files, only make its register calls when not __ANDROID_TYPES_SLIM__.
 #if !defined(__ANDROID_TYPES_SLIM__)

-REGISTER6(BinaryOp, CPU, "Mul", functor::mul, int8, uint16, int16, int64,
-          complex64, complex128);
+REGISTER8(BinaryOp, CPU, "Mul", functor::mul, int8, uint16, uint32, uint64,
+          int16, int64, complex64, complex128);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER6(BinaryOp, GPU, "Mul", functor::mul, int8, uint16, int16, int64,
-          complex64, complex128);
+REGISTER8(BinaryOp, GPU, "Mul", functor::mul, int8, uint16, uint32, uint64,
+          int16, int64, complex64, complex128);
 #else
-REGISTER3(BinaryOp, GPU, "Mul", functor::mul, uint16, complex64, complex128);
+REGISTER5(BinaryOp, GPU, "Mul", functor::mul, uint16, uint32, uint64, complex64,
+          complex128);
 #endif

 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/cwise_op_pow.cc
+++ b/tensorflow/core/kernels/cwise_op_pow.cc
@ -18,7 +18,7 @@ limitations under the License.
 namespace tensorflow {
 REGISTER6(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, bfloat16,
          double, complex64, complex128);
-REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64);
+REGISTER4(BinaryOp, CPU, "Pow", functor::safe_pow, int8, int16, int32, int64);

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@ -20,8 +20,8 @@ REGISTER8(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32,
          int64, bfloat16, complex64, complex128);
 #if !defined(__ANDROID_TYPES_SLIM__)
 // Sub op for int8, uint8, int16, uint16
-REGISTER5(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16,
-          uint32);
+REGISTER6(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16,
+          uint32, uint64);
 #else
 // We only register the first type when we have multi-argument calls in the
 // case where we're trying to reduce executable size, but it turns out that the
@ -32,10 +32,10 @@ REGISTER(BinaryOp, CPU, "Sub", functor::sub, int32);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
-REGISTER7(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
-          complex64, complex128, uint32);
+REGISTER8(BinaryOp, GPU, "Sub", functor::sub, float, Eigen::half, double, int64,
+          complex64, complex128, uint32, uint64);
 #else
-REGISTER3(BinaryOp, GPU, "Sub", functor::sub, complex64, complex128, uint32);
+REGISTER4(BinaryOp, GPU, "Sub", functor::sub, complex64, complex128, uint64);
 #endif

 // A special GPU kernel for int32.
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@ -361,7 +361,7 @@ REGISTER_OP("Rint")
 #define BINARY_MORE()                                                          \
  Input("x: T").Input("y: T").Output("z: T").Attr(                             \
      "T: {bfloat16, half, float, double, uint8, int8, uint16, int16, int32, " \
-      "int64, complex64, complex128}")
+      "uint32, uint64, int64, complex64, complex128}")

 #define BINARY_FEWER()                                               \
  Input("x: T").Input("y: T").Output("z: T").Attr(                   \
@ -382,8 +382,8 @@ REGISTER_OP("AddV2")
    .Input("y: T")
    .Output("z: T")
    .Attr(
-        "T: {bfloat16, half, float, double, uint8, int8, int16, uint32, int32, "
-        "int64, complex64, complex128}")
+        "T: {bfloat16, half, float, double, uint8, uint16, uint32, uint64, "
+        "int8, int16, int32, int64, complex64, complex128}")
    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
    .SetIsAggregate()
    .SetIsCommutative();
@ -433,7 +433,7 @@ REGISTER_OP("Sub")
    .Output("z: T")
    .Attr(
        "T: {bfloat16, half, float, double, uint8, int8, uint16, int16, int32, "
-        "int64, complex64, complex128, uint32}")
+        "int64, complex64, complex128, uint32, uint64}")
    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);

 REGISTER_OP("_MklSub")
@ -542,7 +542,9 @@ REGISTER_OP("Maximum")
    .Input("x: T")
    .Input("y: T")
    .Output("z: T")
-    .Attr("T: {bfloat16, half, float, double, uint8, int16, int32, int64}")
+    .Attr(
+        "T: {bfloat16, half, float, double, int8, int16, int32, int64, "
+        "uint8, uint16, uint32, uint64}")
    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);

 // Note: This op is not commutative w.r.t. to all its inputs.
@ -595,8 +597,8 @@ REGISTER_OP("Pow")
    .Input("y: T")
    .Output("z: T")
    .Attr(
-        "T: {bfloat16, float, half, double, int32, int64, complex64, "
-        "complex128}")
+        "T: {bfloat16, float, half, double, int8, int16, int32, int64, "
+        "complex64, complex128}")
    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn);

 REGISTER_OP("Igammac")
--- a/tensorflow/python/framework/python_api_info_test.py
+++ b/tensorflow/python/framework/python_api_info_test.py
@ -96,7 +96,8 @@ class PythonAPIInfoTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       "    {index=1, name=y, is_list=0},]\n"
       "  inputs_with_type_attr=[\n"
       "    {type_attr=T, tensor_params=[0, 1], ok_dtypes=[DT_BFLOAT16, "
-       "DT_HALF, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8, DT_INT16, DT_UINT32, "
+       "DT_HALF, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_UINT16, DT_UINT32, "
+       "DT_UINT64, DT_INT8, DT_INT16, "
       "DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128]},]\n"
       "  inferred_type_attrs=[T]\n"),
      # An op with an int attribute.
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@ -871,6 +871,11 @@ class MathOpsOverloadTest(test.TestCase):
        dtypes_lib.float32,
        dtypes_lib.float64,
        dtypes_lib.bfloat16,
+        dtypes_lib.uint16,
+        dtypes_lib.uint32,
+        dtypes_lib.uint64,
+        dtypes_lib.int8,
+        dtypes_lib.int16,
        dtypes_lib.int32,
        dtypes_lib.int64,
        dtypes_lib.complex64,
@ -890,6 +895,10 @@ class MathOpsOverloadTest(test.TestCase):
          if dtype in (dtypes_lib.complex64,
                       dtypes_lib.complex128) and tf_func == _FLOORDIV:
            continue  # floordiv makes no sense for complex
+          if dtype in (dtypes_lib.uint16, dtypes_lib.uint32,
+                       dtypes_lib.uint64) and tf_func in (_POW, _FLOORDIV,
+                                                          _TRUEDIV):
+            continue  # power and div not supported for unsigned types
          self._compareBinary(10, 5, dtype, np_func, tf_func)
    # Mod only works for int32 and int64.
    for dtype in [dtypes_lib.int32, dtypes_lib.int64]:
@ -901,6 +910,12 @@ class MathOpsOverloadTest(test.TestCase):
        dtypes_lib.float16,
        dtypes_lib.float32,
        dtypes_lib.float64,
+        dtypes_lib.uint8,
+        dtypes_lib.uint16,
+        dtypes_lib.uint32,
+        dtypes_lib.uint64,
+        dtypes_lib.int8,
+        dtypes_lib.int16,
        dtypes_lib.int32,
        dtypes_lib.int64,
    ]