From 13ce8851cb96a848c49f2050b3b98ee1762a5ad0 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 12 May 2020 02:30:36 -0700
Subject: [PATCH] Use uint8_t, uint32_t and uint32_t.

Also delete unused build_defs.bzl.

PiperOrigin-RevId: 311087719
Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
---
 .../mlir/tools/kernel_gen/build_defs.bzl      | 96 -------------------
 .../mlir/tools/kernel_gen/cubin_creator.cc    | 30 +++---
 .../mlir/tools/kernel_gen/cubin_creator.h     | 11 ++-
 .../mlir/tools/kernel_gen/tf_to_cubin.cc      |  2 +-
 4 files changed, 22 insertions(+), 117 deletions(-)
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl

diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl b/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
deleted file mode 100644
index cec9968e65b..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
+++ /dev/null
@@ -1,96 +0,0 @@
-load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
-
-def _lookup_file(filegroup, path):
-    """Extracts file at (relative) path in filegroup."""
-    for file in filegroup.files.to_list():
-        if file.path.endswith(path):
-            return file
-    return None
-
-def _gen_kernel_image_hdr_impl(ctx):
-    if not ctx.attr.gpu_archs:
-        fail("No GPU architecture specified, use --config=cuda or similar.")
-
-    name = ctx.attr.name
-    tile_sizes = ctx.attr.tile_size.replace("x", ",")
-    same_shape = []
-    if ctx.attr.same_shape:
-        same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
-
-    cubins = []
-    images = []
-    for arch in ctx.attr.gpu_archs:
-        filename = "%s.%s.cubin" % (name, arch)
-        cubin = ctx.actions.declare_file(filename)
-        ctx.actions.run(
-            outputs = [cubin],
-            executable = ctx.executable._tool,
-            arguments = same_shape + [
-                "--tile_sizes=%s" % tile_sizes,
-                "--arch=%s" % arch.split("_")[1],
-                "--output=%s" % cubin.path,
-                ctx.attr.op,
-            ],
-            mnemonic = "compile",
-        )
-        cubins.append(cubin)
-        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
-
-    # Generate fatbin file from all cubins.
-    fatbin = ctx.actions.declare_file("%s.fatbin" % name)
-    ctx.actions.run(
-        outputs = [fatbin],
-        inputs = cubins,
-        executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
-        arguments = [
-            "--64",
-            "--cmdline=--compile-only",
-            "--link",
-            "--compress-all",
-            "--create=%s" % fatbin.path,
-        ] + images,
-        mnemonic = "fatbinary",
-    )
-
-    bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
-    ctx.actions.run_shell(
-        outputs = [ctx.outputs.out],
-        inputs = [fatbin],
-        tools = [bin2c],
-        command = "%s --static --const --type=int --name=%s %s 1> %s" %
-                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
-        mnemonic = "bin2c",
-    )
-
-_gen_kernel_image_hdr = rule(
-    implementation = _gen_kernel_image_hdr_impl,
-    output_to_genfiles = True,
-    attrs = {
-        "op": attr.string(mandatory = True),
-        "tile_size": attr.string(mandatory = True),
-        "same_shape": attr.string(),
-        "out": attr.output(mandatory = True),
-        "symbol": attr.string(mandatory = True),
-        "gpu_archs": attr.string_list(mandatory = True),
-        "_cuda_root": attr.label(
-            default = Label("//third_party/gpus/cuda:cuda_root"),
-        ),
-        "_tool": attr.label(
-            executable = True,
-            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
-            cfg = "host",
-        ),
-    },
-)
-
-def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
-    """Generates a C header with fatbin data from a Tensorflow op."""
-    _gen_kernel_image_hdr(
-        name = name,
-        op = op,
-        tile_size = tile_size,
-        same_shape = same_shape,
-        out = "include/tfrt/gpu/ops/tf/%s.h" % name,
-        symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
-        gpu_archs = cuda_gpu_select_list("sm_{}"),
-    )
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
index 45d10214a42..b1c4b1beae1 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
@@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
     : public mlir::PassWrapper<PropagateStaticKnowledge,
                                mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
   explicit PropagateStaticKnowledge(mlir::FunctionType type,
-                                    llvm::ArrayRef<unsigned> same_shape_)
+                                    llvm::ArrayRef<uint32_t> same_shape_)
       : func_type(type), same_shape(same_shape_) {}
 
   void runOnOperation() override {
@@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
         func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
     mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
         func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
-    unsigned arg_pos = 0;
-    std::vector<unsigned> positions;
+    uint32_t arg_pos = 0;
+    std::vector<uint32_t> positions;
     for (mlir::Type arg_type : func_type.getInputs()) {
       positions.push_back(arg_pos);
       func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
@@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
     // can use that here. Simply replace usages of the shape parameters within
     // the function body to a single shape parameter.
     if (!same_shape.empty()) {
-      int first = same_shape.front();
-      int first_offset = positions.at(first);
+      auto first = same_shape.front();
+      auto first_offset = positions.at(first);
       mlir::ShapedType first_type =
           func_type.getInput(first).cast<mlir::ShapedType>();
-      unsigned rank = first_type.getRank();
-      for (int same : same_shape.drop_front(1)) {
-        unsigned same_offset = positions.at(same);
+      uint32_t rank = first_type.getRank();
+      for (auto same : same_shape.drop_front(1)) {
+        uint32_t same_offset = positions.at(same);
         auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
         if (same_type.getRank() != rank) {
           func.emitOpError() << "same shape constraints on arguments with "
@@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
           signalPassFailure();
         }
 
-        for (int i = 0; i < 2 * rank; ++i) {
+        for (uint32_t i = 0; i < 2 * rank; ++i) {
           // Replace uses for second arg data with first arg.
           auto same_arg = func.getArgument(same_offset + 3 + i);
           auto first_arg = func.getArgument(first_offset + 3 + i);
@@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
   }
 
   mlir::FunctionType func_type;
-  llvm::ArrayRef<unsigned> same_shape;
+  llvm::ArrayRef<uint32_t> same_shape;
 };
 
 Status PropagateStaticShapeKnowledgeToKernel(
-    mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) {
+    mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
   // Grab the original signature from the single function.
   auto func = *module.getBody()->op_begin<mlir::FuncOp>();
 
@@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
 }
 }  // namespace
 
-StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability,
-    llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape,
-    llvm::ArrayRef<unsigned> unroll_factors) {
+StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
+    llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
+    llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
+    llvm::ArrayRef<uint32_t> unroll_factors) {
   mlir::MLIRContext context;
   context.allowUnregisteredDialects();  // TODO(b/152572127)
   mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
index c8746330c49..47626ba9d0d 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
@@ -30,11 +30,12 @@ limitations under the License.
 
 namespace tensorflow {
 namespace kernel_gen {
-xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5},
-    llvm::ArrayRef<unsigned> tile_sizes = {16, 64},
-    llvm::ArrayRef<unsigned> same_shape = {},
-    llvm::ArrayRef<unsigned> unroll_factors = {});
+xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
+    llvm::StringRef tf_code,
+    std::pair<int32_t, int32_t> compute_capability = {7, 5},
+    llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
+    llvm::ArrayRef<uint32_t> same_shape = {},
+    llvm::ArrayRef<uint32_t> unroll_factors = {});
 }  // namespace kernel_gen
 }  // namespace tensorflow
 
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
index c9b447f5cad..8edc567e777 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@@ -102,7 +102,7 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie();
+  std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
 
   auto status = tensorflow::WriteStringToFile(
       tensorflow::Env::Default(), output_file,