Use uint8_t, uint32_t and uint32_t.

Also delete unused build_defs.bzl. PiperOrigin-RevId: 311087719 Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
2020-05-12 02:30:36 -07:00 · 2020-05-12 02:30:36 -07:00 · 13ce8851cb
parent e89413399b
commit 13ce8851cb
4 changed files with 22 additions and 117 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
@ -1,96 +0,0 @@
-load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
-
-def _lookup_file(filegroup, path):
-    """Extracts file at (relative) path in filegroup."""
-    for file in filegroup.files.to_list():
-        if file.path.endswith(path):
-            return file
-    return None
-
-def _gen_kernel_image_hdr_impl(ctx):
-    if not ctx.attr.gpu_archs:
-        fail("No GPU architecture specified, use --config=cuda or similar.")
-
-    name = ctx.attr.name
-    tile_sizes = ctx.attr.tile_size.replace("x", ",")
-    same_shape = []
-    if ctx.attr.same_shape:
-        same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
-
-    cubins = []
-    images = []
-    for arch in ctx.attr.gpu_archs:
-        filename = "%s.%s.cubin" % (name, arch)
-        cubin = ctx.actions.declare_file(filename)
-        ctx.actions.run(
-            outputs = [cubin],
-            executable = ctx.executable._tool,
-            arguments = same_shape + [
-                "--tile_sizes=%s" % tile_sizes,
-                "--arch=%s" % arch.split("_")[1],
-                "--output=%s" % cubin.path,
-                ctx.attr.op,
-            ],
-            mnemonic = "compile",
-        )
-        cubins.append(cubin)
-        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
-
-    # Generate fatbin file from all cubins.
-    fatbin = ctx.actions.declare_file("%s.fatbin" % name)
-    ctx.actions.run(
-        outputs = [fatbin],
-        inputs = cubins,
-        executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
-        arguments = [
-            "--64",
-            "--cmdline=--compile-only",
-            "--link",
-            "--compress-all",
-            "--create=%s" % fatbin.path,
-        ] + images,
-        mnemonic = "fatbinary",
-    )
-
-    bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
-    ctx.actions.run_shell(
-        outputs = [ctx.outputs.out],
-        inputs = [fatbin],
-        tools = [bin2c],
-        command = "%s --static --const --type=int --name=%s %s 1> %s" %
-                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
-        mnemonic = "bin2c",
-    )
-
-_gen_kernel_image_hdr = rule(
-    implementation = _gen_kernel_image_hdr_impl,
-    output_to_genfiles = True,
-    attrs = {
-        "op": attr.string(mandatory = True),
-        "tile_size": attr.string(mandatory = True),
-        "same_shape": attr.string(),
-        "out": attr.output(mandatory = True),
-        "symbol": attr.string(mandatory = True),
-        "gpu_archs": attr.string_list(mandatory = True),
-        "_cuda_root": attr.label(
-            default = Label("//third_party/gpus/cuda:cuda_root"),
-        ),
-        "_tool": attr.label(
-            executable = True,
-            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
-            cfg = "host",
-        ),
-    },
-)
-
-def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
-    """Generates a C header with fatbin data from a Tensorflow op."""
-    _gen_kernel_image_hdr(
-        name = name,
-        op = op,
-        tile_size = tile_size,
-        same_shape = same_shape,
-        out = "include/tfrt/gpu/ops/tf/%s.h" % name,
-        symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
-        gpu_archs = cuda_gpu_select_list("sm_{}"),
-    )
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
    : public mlir::PassWrapper<PropagateStaticKnowledge,
                               mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
  explicit PropagateStaticKnowledge(mlir::FunctionType type,
-                                    llvm::ArrayRef<unsigned> same_shape_)
+                                    llvm::ArrayRef<uint32_t> same_shape_)
      : func_type(type), same_shape(same_shape_) {}

  void runOnOperation() override {
@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
    mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
-    unsigned arg_pos = 0;
-    std::vector<unsigned> positions;
+    uint32_t arg_pos = 0;
+    std::vector<uint32_t> positions;
    for (mlir::Type arg_type : func_type.getInputs()) {
      positions.push_back(arg_pos);
      func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
    // can use that here. Simply replace usages of the shape parameters within
    // the function body to a single shape parameter.
    if (!same_shape.empty()) {
-      int first = same_shape.front();
-      int first_offset = positions.at(first);
+      auto first = same_shape.front();
+      auto first_offset = positions.at(first);
      mlir::ShapedType first_type =
          func_type.getInput(first).cast<mlir::ShapedType>();
-      unsigned rank = first_type.getRank();
-      for (int same : same_shape.drop_front(1)) {
-        unsigned same_offset = positions.at(same);
+      uint32_t rank = first_type.getRank();
+      for (auto same : same_shape.drop_front(1)) {
+        uint32_t same_offset = positions.at(same);
        auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
        if (same_type.getRank() != rank) {
          func.emitOpError() << "same shape constraints on arguments with "
@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
          signalPassFailure();
        }

-        for (int i = 0; i < 2 * rank; ++i) {
+        for (uint32_t i = 0; i < 2 * rank; ++i) {
          // Replace uses for second arg data with first arg.
          auto same_arg = func.getArgument(same_offset + 3 + i);
          auto first_arg = func.getArgument(first_offset + 3 + i);
@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
  }

  mlir::FunctionType func_type;
-  llvm::ArrayRef<unsigned> same_shape;
+  llvm::ArrayRef<uint32_t> same_shape;
 };

 Status PropagateStaticShapeKnowledgeToKernel(
-    mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) {
+    mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
  // Grab the original signature from the single function.
  auto func = *module.getBody()->op_begin<mlir::FuncOp>();

@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
 }
 }  // namespace

-StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability,
-    llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape,
-    llvm::ArrayRef<unsigned> unroll_factors) {
+StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
+    llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
+    llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
+    llvm::ArrayRef<uint32_t> unroll_factors) {
  mlir::MLIRContext context;
  context.allowUnregisteredDialects();  // TODO(b/152572127)
  mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
@ -30,11 +30,12 @@ limitations under the License.

 namespace tensorflow {
 namespace kernel_gen {
-xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5},
-    llvm::ArrayRef<unsigned> tile_sizes = {16, 64},
-    llvm::ArrayRef<unsigned> same_shape = {},
-    llvm::ArrayRef<unsigned> unroll_factors = {});
+xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
+    llvm::StringRef tf_code,
+    std::pair<int32_t, int32_t> compute_capability = {7, 5},
+    llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
+    llvm::ArrayRef<uint32_t> same_shape = {},
+    llvm::ArrayRef<uint32_t> unroll_factors = {});
 }  // namespace kernel_gen
 }  // namespace tensorflow

--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@ -102,7 +102,7 @@ int main(int argc, char** argv) {
    return 1;
  }

-  std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie();
+  std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();

  auto status = tensorflow::WriteStringToFile(
      tensorflow::Env::Default(), output_file,