Use uint8_t, uint32_t and uint32_t.

Also delete unused build_defs.bzl.

PiperOrigin-RevId: 311087719
Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
This commit is contained in:
Adrian Kuegel 2020-05-12 02:30:36 -07:00 committed by TensorFlower Gardener
parent e89413399b
commit 13ce8851cb
4 changed files with 22 additions and 117 deletions

View File

@ -1,96 +0,0 @@
load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
def _lookup_file(filegroup, path):
"""Extracts file at (relative) path in filegroup."""
for file in filegroup.files.to_list():
if file.path.endswith(path):
return file
return None
def _gen_kernel_image_hdr_impl(ctx):
if not ctx.attr.gpu_archs:
fail("No GPU architecture specified, use --config=cuda or similar.")
name = ctx.attr.name
tile_sizes = ctx.attr.tile_size.replace("x", ",")
same_shape = []
if ctx.attr.same_shape:
same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
cubins = []
images = []
for arch in ctx.attr.gpu_archs:
filename = "%s.%s.cubin" % (name, arch)
cubin = ctx.actions.declare_file(filename)
ctx.actions.run(
outputs = [cubin],
executable = ctx.executable._tool,
arguments = same_shape + [
"--tile_sizes=%s" % tile_sizes,
"--arch=%s" % arch.split("_")[1],
"--output=%s" % cubin.path,
ctx.attr.op,
],
mnemonic = "compile",
)
cubins.append(cubin)
images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
# Generate fatbin file from all cubins.
fatbin = ctx.actions.declare_file("%s.fatbin" % name)
ctx.actions.run(
outputs = [fatbin],
inputs = cubins,
executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
arguments = [
"--64",
"--cmdline=--compile-only",
"--link",
"--compress-all",
"--create=%s" % fatbin.path,
] + images,
mnemonic = "fatbinary",
)
bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
ctx.actions.run_shell(
outputs = [ctx.outputs.out],
inputs = [fatbin],
tools = [bin2c],
command = "%s --static --const --type=int --name=%s %s 1> %s" %
(bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
mnemonic = "bin2c",
)
_gen_kernel_image_hdr = rule(
implementation = _gen_kernel_image_hdr_impl,
output_to_genfiles = True,
attrs = {
"op": attr.string(mandatory = True),
"tile_size": attr.string(mandatory = True),
"same_shape": attr.string(),
"out": attr.output(mandatory = True),
"symbol": attr.string(mandatory = True),
"gpu_archs": attr.string_list(mandatory = True),
"_cuda_root": attr.label(
default = Label("//third_party/gpus/cuda:cuda_root"),
),
"_tool": attr.label(
executable = True,
default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
cfg = "host",
),
},
)
def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
"""Generates a C header with fatbin data from a Tensorflow op."""
_gen_kernel_image_hdr(
name = name,
op = op,
tile_size = tile_size,
same_shape = same_shape,
out = "include/tfrt/gpu/ops/tf/%s.h" % name,
symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
gpu_archs = cuda_gpu_select_list("sm_{}"),
)

View File

@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
: public mlir::PassWrapper<PropagateStaticKnowledge, : public mlir::PassWrapper<PropagateStaticKnowledge,
mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> { mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
explicit PropagateStaticKnowledge(mlir::FunctionType type, explicit PropagateStaticKnowledge(mlir::FunctionType type,
llvm::ArrayRef<unsigned> same_shape_) llvm::ArrayRef<uint32_t> same_shape_)
: func_type(type), same_shape(same_shape_) {} : func_type(type), same_shape(same_shape_) {}
void runOnOperation() override { void runOnOperation() override {
@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1)); func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
mlir::Value zero = b.create<mlir::LLVM::ConstantOp>( mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0)); func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
unsigned arg_pos = 0; uint32_t arg_pos = 0;
std::vector<unsigned> positions; std::vector<uint32_t> positions;
for (mlir::Type arg_type : func_type.getInputs()) { for (mlir::Type arg_type : func_type.getInputs()) {
positions.push_back(arg_pos); positions.push_back(arg_pos);
func.getArgument(arg_pos + 2).replaceAllUsesWith(zero); func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
// can use that here. Simply replace usages of the shape parameters within // can use that here. Simply replace usages of the shape parameters within
// the function body to a single shape parameter. // the function body to a single shape parameter.
if (!same_shape.empty()) { if (!same_shape.empty()) {
int first = same_shape.front(); auto first = same_shape.front();
int first_offset = positions.at(first); auto first_offset = positions.at(first);
mlir::ShapedType first_type = mlir::ShapedType first_type =
func_type.getInput(first).cast<mlir::ShapedType>(); func_type.getInput(first).cast<mlir::ShapedType>();
unsigned rank = first_type.getRank(); uint32_t rank = first_type.getRank();
for (int same : same_shape.drop_front(1)) { for (auto same : same_shape.drop_front(1)) {
unsigned same_offset = positions.at(same); uint32_t same_offset = positions.at(same);
auto same_type = func_type.getInput(same).cast<mlir::ShapedType>(); auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
if (same_type.getRank() != rank) { if (same_type.getRank() != rank) {
func.emitOpError() << "same shape constraints on arguments with " func.emitOpError() << "same shape constraints on arguments with "
@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
signalPassFailure(); signalPassFailure();
} }
for (int i = 0; i < 2 * rank; ++i) { for (uint32_t i = 0; i < 2 * rank; ++i) {
// Replace uses for second arg data with first arg. // Replace uses for second arg data with first arg.
auto same_arg = func.getArgument(same_offset + 3 + i); auto same_arg = func.getArgument(same_offset + 3 + i);
auto first_arg = func.getArgument(first_offset + 3 + i); auto first_arg = func.getArgument(first_offset + 3 + i);
@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
} }
mlir::FunctionType func_type; mlir::FunctionType func_type;
llvm::ArrayRef<unsigned> same_shape; llvm::ArrayRef<uint32_t> same_shape;
}; };
Status PropagateStaticShapeKnowledgeToKernel( Status PropagateStaticShapeKnowledgeToKernel(
mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) { mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
// Grab the original signature from the single function. // Grab the original signature from the single function.
auto func = *module.getBody()->op_begin<mlir::FuncOp>(); auto func = *module.getBody()->op_begin<mlir::FuncOp>();
@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
} }
} // namespace } // namespace
StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode( StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
llvm::StringRef tf_code, std::pair<int, int> compute_capability, llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape, llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
llvm::ArrayRef<unsigned> unroll_factors) { llvm::ArrayRef<uint32_t> unroll_factors) {
mlir::MLIRContext context; mlir::MLIRContext context;
context.allowUnregisteredDialects(); // TODO(b/152572127) context.allowUnregisteredDialects(); // TODO(b/152572127)
mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context); mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);

View File

@ -30,11 +30,12 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
namespace kernel_gen { namespace kernel_gen {
xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode( xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5}, llvm::StringRef tf_code,
llvm::ArrayRef<unsigned> tile_sizes = {16, 64}, std::pair<int32_t, int32_t> compute_capability = {7, 5},
llvm::ArrayRef<unsigned> same_shape = {}, llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
llvm::ArrayRef<unsigned> unroll_factors = {}); llvm::ArrayRef<uint32_t> same_shape = {},
llvm::ArrayRef<uint32_t> unroll_factors = {});
} // namespace kernel_gen } // namespace kernel_gen
} // namespace tensorflow } // namespace tensorflow

View File

@ -102,7 +102,7 @@ int main(int argc, char** argv) {
return 1; return 1;
} }
std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie(); std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
auto status = tensorflow::WriteStringToFile( auto status = tensorflow::WriteStringToFile(
tensorflow::Env::Default(), output_file, tensorflow::Env::Default(), output_file,