Use uint8_t, uint32_t and uint32_t.

Also delete unused build_defs.bzl. PiperOrigin-RevId: 311087719 Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
2020-05-12 02:30:36 -07:00 · 2020-05-12 02:30:36 -07:00 · 13ce8851cb
parent e89413399b
commit 13ce8851cb
4 changed files with 22 additions and 117 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl
@ -1,96 +0,0 @@
 load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
 def _lookup_file(filegroup, path):
    """Extracts file at (relative) path in filegroup."""
    for file in filegroup.files.to_list():
        if file.path.endswith(path):
            return file
    return None
 def _gen_kernel_image_hdr_impl(ctx):
    if not ctx.attr.gpu_archs:
        fail("No GPU architecture specified, use --config=cuda or similar.")
    name = ctx.attr.name
    tile_sizes = ctx.attr.tile_size.replace("x", ",")
    same_shape = []
    if ctx.attr.same_shape:
        same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
    cubins = []
    images = []
    for arch in ctx.attr.gpu_archs:
        filename = "%s.%s.cubin" % (name, arch)
        cubin = ctx.actions.declare_file(filename)
        ctx.actions.run(
            outputs = [cubin],
            executable = ctx.executable._tool,
            arguments = same_shape + [
                "--tile_sizes=%s" % tile_sizes,
                "--arch=%s" % arch.split("_")[1],
                "--output=%s" % cubin.path,
                ctx.attr.op,
            ],
            mnemonic = "compile",
        )
        cubins.append(cubin)
        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
    # Generate fatbin file from all cubins.
    fatbin = ctx.actions.declare_file("%s.fatbin" % name)
    ctx.actions.run(
        outputs = [fatbin],
        inputs = cubins,
        executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
        arguments = [
            "--64",
            "--cmdline=--compile-only",
            "--link",
            "--compress-all",
            "--create=%s" % fatbin.path,
        ] + images,
        mnemonic = "fatbinary",
    )
    bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
    ctx.actions.run_shell(
        outputs = [ctx.outputs.out],
        inputs = [fatbin],
        tools = [bin2c],
        command = "%s --static --const --type=int --name=%s %s 1> %s" %
                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
        mnemonic = "bin2c",
    )
 _gen_kernel_image_hdr = rule(
    implementation = _gen_kernel_image_hdr_impl,
    output_to_genfiles = True,
    attrs = {
        "op": attr.string(mandatory = True),
        "tile_size": attr.string(mandatory = True),
        "same_shape": attr.string(),
        "out": attr.output(mandatory = True),
        "symbol": attr.string(mandatory = True),
        "gpu_archs": attr.string_list(mandatory = True),
        "_cuda_root": attr.label(
            default = Label("//third_party/gpus/cuda:cuda_root"),
        ),
        "_tool": attr.label(
            executable = True,
            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
            cfg = "host",
        ),
    },
 )
 def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
    """Generates a C header with fatbin data from a Tensorflow op."""
    _gen_kernel_image_hdr(
        name = name,
        op = op,
        tile_size = tile_size,
        same_shape = same_shape,
        out = "include/tfrt/gpu/ops/tf/%s.h" % name,
        symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
        gpu_archs = cuda_gpu_select_list("sm_{}"),
    )
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc
@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
    : public mlir::PassWrapper<PropagateStaticKnowledge,
                               mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
  explicit PropagateStaticKnowledge(mlir::FunctionType type,
-                                    llvm::ArrayRef<unsigned> same_shape_)
+                                    llvm::ArrayRef<uint32_t> same_shape_)
      : func_type(type), same_shape(same_shape_) {}
  void runOnOperation() override {
@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
    mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
        func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
-    unsigned arg_pos = 0;
+    uint32_t arg_pos = 0;
-    std::vector<unsigned> positions;
+    std::vector<uint32_t> positions;
    for (mlir::Type arg_type : func_type.getInputs()) {
      positions.push_back(arg_pos);
      func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
    // can use that here. Simply replace usages of the shape parameters within
    // the function body to a single shape parameter.
    if (!same_shape.empty()) {
-      int first = same_shape.front();
+      auto first = same_shape.front();
-      int first_offset = positions.at(first);
+      auto first_offset = positions.at(first);
      mlir::ShapedType first_type =
          func_type.getInput(first).cast<mlir::ShapedType>();
-      unsigned rank = first_type.getRank();
+      uint32_t rank = first_type.getRank();
-      for (int same : same_shape.drop_front(1)) {
+      for (auto same : same_shape.drop_front(1)) {
-        unsigned same_offset = positions.at(same);
+        uint32_t same_offset = positions.at(same);
        auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
        if (same_type.getRank() != rank) {
          func.emitOpError() << "same shape constraints on arguments with "
@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
          signalPassFailure();
        }
-        for (int i = 0; i < 2 * rank; ++i) {
+        for (uint32_t i = 0; i < 2 * rank; ++i) {
          // Replace uses for second arg data with first arg.
          auto same_arg = func.getArgument(same_offset + 3 + i);
          auto first_arg = func.getArgument(first_offset + 3 + i);
@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
  }
  mlir::FunctionType func_type;
-  llvm::ArrayRef<unsigned> same_shape;
+  llvm::ArrayRef<uint32_t> same_shape;
 };
 Status PropagateStaticShapeKnowledgeToKernel(
-    mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) {
+    mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
  // Grab the original signature from the single function.
  auto func = *module.getBody()->op_begin<mlir::FuncOp>();
@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
 }
 }  // namespace
-StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode(
+StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability,
+    llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
-    llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape,
+    llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
-    llvm::ArrayRef<unsigned> unroll_factors) {
+    llvm::ArrayRef<uint32_t> unroll_factors) {
  mlir::MLIRContext context;
  context.allowUnregisteredDialects();  // TODO(b/152572127)
  mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
--- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h
@ -30,11 +30,12 @@ limitations under the License.
 namespace tensorflow {
 namespace kernel_gen {
-xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode(
+xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
-    llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5},
+    llvm::StringRef tf_code,
-    llvm::ArrayRef<unsigned> tile_sizes = {16, 64},
+    std::pair<int32_t, int32_t> compute_capability = {7, 5},
-    llvm::ArrayRef<unsigned> same_shape = {},
+    llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
-    llvm::ArrayRef<unsigned> unroll_factors = {});
+    llvm::ArrayRef<uint32_t> same_shape = {},
    llvm::ArrayRef<uint32_t> unroll_factors = {});
 }  // namespace kernel_gen
 }  // namespace tensorflow
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@ -102,7 +102,7 @@ int main(int argc, char** argv) {
    return 1;
  }
-  std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie();
+  std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
  auto status = tensorflow::WriteStringToFile(
      tensorflow::Env::Default(), output_file,