diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl b/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl deleted file mode 100644 index cec9968e65b..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/build_defs.bzl +++ /dev/null @@ -1,96 +0,0 @@ -load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list") - -def _lookup_file(filegroup, path): - """Extracts file at (relative) path in filegroup.""" - for file in filegroup.files.to_list(): - if file.path.endswith(path): - return file - return None - -def _gen_kernel_image_hdr_impl(ctx): - if not ctx.attr.gpu_archs: - fail("No GPU architecture specified, use --config=cuda or similar.") - - name = ctx.attr.name - tile_sizes = ctx.attr.tile_size.replace("x", ",") - same_shape = [] - if ctx.attr.same_shape: - same_shape.append("--same_shape=%s" % ctx.attr.same_shape) - - cubins = [] - images = [] - for arch in ctx.attr.gpu_archs: - filename = "%s.%s.cubin" % (name, arch) - cubin = ctx.actions.declare_file(filename) - ctx.actions.run( - outputs = [cubin], - executable = ctx.executable._tool, - arguments = same_shape + [ - "--tile_sizes=%s" % tile_sizes, - "--arch=%s" % arch.split("_")[1], - "--output=%s" % cubin.path, - ctx.attr.op, - ], - mnemonic = "compile", - ) - cubins.append(cubin) - images.append("--image=profile=%s,file=%s" % (arch, cubin.path)) - - # Generate fatbin file from all cubins. - fatbin = ctx.actions.declare_file("%s.fatbin" % name) - ctx.actions.run( - outputs = [fatbin], - inputs = cubins, - executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"), - arguments = [ - "--64", - "--cmdline=--compile-only", - "--link", - "--compress-all", - "--create=%s" % fatbin.path, - ] + images, - mnemonic = "fatbinary", - ) - - bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c") - ctx.actions.run_shell( - outputs = [ctx.outputs.out], - inputs = [fatbin], - tools = [bin2c], - command = "%s --static --const --type=int --name=%s %s 1> %s" % - (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path), - mnemonic = "bin2c", - ) - -_gen_kernel_image_hdr = rule( - implementation = _gen_kernel_image_hdr_impl, - output_to_genfiles = True, - attrs = { - "op": attr.string(mandatory = True), - "tile_size": attr.string(mandatory = True), - "same_shape": attr.string(), - "out": attr.output(mandatory = True), - "symbol": attr.string(mandatory = True), - "gpu_archs": attr.string_list(mandatory = True), - "_cuda_root": attr.label( - default = Label("//third_party/gpus/cuda:cuda_root"), - ), - "_tool": attr.label( - executable = True, - default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"), - cfg = "host", - ), - }, -) - -def gen_kernel_image_hdr(name, op, tile_size, same_shape = None): - """Generates a C header with fatbin data from a Tensorflow op.""" - _gen_kernel_image_hdr( - name = name, - op = op, - tile_size = tile_size, - same_shape = same_shape, - out = "include/tfrt/gpu/ops/tf/%s.h" % name, - symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""), - gpu_archs = cuda_gpu_select_list("sm_{}"), - ) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc index 45d10214a42..b1c4b1beae1 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc @@ -136,7 +136,7 @@ struct PropagateStaticKnowledge : public mlir::PassWrapper> { explicit PropagateStaticKnowledge(mlir::FunctionType type, - llvm::ArrayRef same_shape_) + llvm::ArrayRef same_shape_) : func_type(type), same_shape(same_shape_) {} void runOnOperation() override { @@ -152,8 +152,8 @@ struct PropagateStaticKnowledge func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1)); mlir::Value zero = b.create( func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0)); - unsigned arg_pos = 0; - std::vector positions; + uint32_t arg_pos = 0; + std::vector positions; for (mlir::Type arg_type : func_type.getInputs()) { positions.push_back(arg_pos); func.getArgument(arg_pos + 2).replaceAllUsesWith(zero); @@ -165,13 +165,13 @@ struct PropagateStaticKnowledge // can use that here. Simply replace usages of the shape parameters within // the function body to a single shape parameter. if (!same_shape.empty()) { - int first = same_shape.front(); - int first_offset = positions.at(first); + auto first = same_shape.front(); + auto first_offset = positions.at(first); mlir::ShapedType first_type = func_type.getInput(first).cast(); - unsigned rank = first_type.getRank(); - for (int same : same_shape.drop_front(1)) { - unsigned same_offset = positions.at(same); + uint32_t rank = first_type.getRank(); + for (auto same : same_shape.drop_front(1)) { + uint32_t same_offset = positions.at(same); auto same_type = func_type.getInput(same).cast(); if (same_type.getRank() != rank) { func.emitOpError() << "same shape constraints on arguments with " @@ -180,7 +180,7 @@ struct PropagateStaticKnowledge signalPassFailure(); } - for (int i = 0; i < 2 * rank; ++i) { + for (uint32_t i = 0; i < 2 * rank; ++i) { // Replace uses for second arg data with first arg. auto same_arg = func.getArgument(same_offset + 3 + i); auto first_arg = func.getArgument(first_offset + 3 + i); @@ -191,11 +191,11 @@ struct PropagateStaticKnowledge } mlir::FunctionType func_type; - llvm::ArrayRef same_shape; + llvm::ArrayRef same_shape; }; Status PropagateStaticShapeKnowledgeToKernel( - mlir::ModuleOp module, llvm::ArrayRef same_shape) { + mlir::ModuleOp module, llvm::ArrayRef same_shape) { // Grab the original signature from the single function. auto func = *module.getBody()->op_begin(); @@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel( } } // namespace -StatusOr> tensorflow::kernel_gen::GenerateCubinForTfCode( - llvm::StringRef tf_code, std::pair compute_capability, - llvm::ArrayRef tile_sizes, llvm::ArrayRef same_shape, - llvm::ArrayRef unroll_factors) { +StatusOr> tensorflow::kernel_gen::GenerateCubinForTfCode( + llvm::StringRef tf_code, std::pair compute_capability, + llvm::ArrayRef tile_sizes, llvm::ArrayRef same_shape, + llvm::ArrayRef unroll_factors) { mlir::MLIRContext context; context.allowUnregisteredDialects(); // TODO(b/152572127) mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context); diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h index c8746330c49..47626ba9d0d 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.h @@ -30,11 +30,12 @@ limitations under the License. namespace tensorflow { namespace kernel_gen { -xla::StatusOr> GenerateCubinForTfCode( - llvm::StringRef tf_code, std::pair compute_capability = {7, 5}, - llvm::ArrayRef tile_sizes = {16, 64}, - llvm::ArrayRef same_shape = {}, - llvm::ArrayRef unroll_factors = {}); +xla::StatusOr> GenerateCubinForTfCode( + llvm::StringRef tf_code, + std::pair compute_capability = {7, 5}, + llvm::ArrayRef tile_sizes = {16, 64}, + llvm::ArrayRef same_shape = {}, + llvm::ArrayRef unroll_factors = {}); } // namespace kernel_gen } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc index c9b447f5cad..8edc567e777 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc @@ -102,7 +102,7 @@ int main(int argc, char** argv) { return 1; } - std::vector cubin_data = cubin.ConsumeValueOrDie(); + std::vector cubin_data = cubin.ConsumeValueOrDie(); auto status = tensorflow::WriteStringToFile( tensorflow::Env::Default(), output_file,