Use uint8_t, uint32_t and uint32_t.
Also delete unused build_defs.bzl. PiperOrigin-RevId: 311087719 Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
This commit is contained in:
parent
e89413399b
commit
13ce8851cb
|
@ -1,96 +0,0 @@
|
||||||
load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
|
|
||||||
|
|
||||||
def _lookup_file(filegroup, path):
|
|
||||||
"""Extracts file at (relative) path in filegroup."""
|
|
||||||
for file in filegroup.files.to_list():
|
|
||||||
if file.path.endswith(path):
|
|
||||||
return file
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _gen_kernel_image_hdr_impl(ctx):
|
|
||||||
if not ctx.attr.gpu_archs:
|
|
||||||
fail("No GPU architecture specified, use --config=cuda or similar.")
|
|
||||||
|
|
||||||
name = ctx.attr.name
|
|
||||||
tile_sizes = ctx.attr.tile_size.replace("x", ",")
|
|
||||||
same_shape = []
|
|
||||||
if ctx.attr.same_shape:
|
|
||||||
same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
|
|
||||||
|
|
||||||
cubins = []
|
|
||||||
images = []
|
|
||||||
for arch in ctx.attr.gpu_archs:
|
|
||||||
filename = "%s.%s.cubin" % (name, arch)
|
|
||||||
cubin = ctx.actions.declare_file(filename)
|
|
||||||
ctx.actions.run(
|
|
||||||
outputs = [cubin],
|
|
||||||
executable = ctx.executable._tool,
|
|
||||||
arguments = same_shape + [
|
|
||||||
"--tile_sizes=%s" % tile_sizes,
|
|
||||||
"--arch=%s" % arch.split("_")[1],
|
|
||||||
"--output=%s" % cubin.path,
|
|
||||||
ctx.attr.op,
|
|
||||||
],
|
|
||||||
mnemonic = "compile",
|
|
||||||
)
|
|
||||||
cubins.append(cubin)
|
|
||||||
images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
|
|
||||||
|
|
||||||
# Generate fatbin file from all cubins.
|
|
||||||
fatbin = ctx.actions.declare_file("%s.fatbin" % name)
|
|
||||||
ctx.actions.run(
|
|
||||||
outputs = [fatbin],
|
|
||||||
inputs = cubins,
|
|
||||||
executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
|
|
||||||
arguments = [
|
|
||||||
"--64",
|
|
||||||
"--cmdline=--compile-only",
|
|
||||||
"--link",
|
|
||||||
"--compress-all",
|
|
||||||
"--create=%s" % fatbin.path,
|
|
||||||
] + images,
|
|
||||||
mnemonic = "fatbinary",
|
|
||||||
)
|
|
||||||
|
|
||||||
bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
|
|
||||||
ctx.actions.run_shell(
|
|
||||||
outputs = [ctx.outputs.out],
|
|
||||||
inputs = [fatbin],
|
|
||||||
tools = [bin2c],
|
|
||||||
command = "%s --static --const --type=int --name=%s %s 1> %s" %
|
|
||||||
(bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
|
|
||||||
mnemonic = "bin2c",
|
|
||||||
)
|
|
||||||
|
|
||||||
_gen_kernel_image_hdr = rule(
|
|
||||||
implementation = _gen_kernel_image_hdr_impl,
|
|
||||||
output_to_genfiles = True,
|
|
||||||
attrs = {
|
|
||||||
"op": attr.string(mandatory = True),
|
|
||||||
"tile_size": attr.string(mandatory = True),
|
|
||||||
"same_shape": attr.string(),
|
|
||||||
"out": attr.output(mandatory = True),
|
|
||||||
"symbol": attr.string(mandatory = True),
|
|
||||||
"gpu_archs": attr.string_list(mandatory = True),
|
|
||||||
"_cuda_root": attr.label(
|
|
||||||
default = Label("//third_party/gpus/cuda:cuda_root"),
|
|
||||||
),
|
|
||||||
"_tool": attr.label(
|
|
||||||
executable = True,
|
|
||||||
default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
|
|
||||||
cfg = "host",
|
|
||||||
),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
|
|
||||||
"""Generates a C header with fatbin data from a Tensorflow op."""
|
|
||||||
_gen_kernel_image_hdr(
|
|
||||||
name = name,
|
|
||||||
op = op,
|
|
||||||
tile_size = tile_size,
|
|
||||||
same_shape = same_shape,
|
|
||||||
out = "include/tfrt/gpu/ops/tf/%s.h" % name,
|
|
||||||
symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
|
|
||||||
gpu_archs = cuda_gpu_select_list("sm_{}"),
|
|
||||||
)
|
|
|
@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
|
||||||
: public mlir::PassWrapper<PropagateStaticKnowledge,
|
: public mlir::PassWrapper<PropagateStaticKnowledge,
|
||||||
mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
|
mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
|
||||||
explicit PropagateStaticKnowledge(mlir::FunctionType type,
|
explicit PropagateStaticKnowledge(mlir::FunctionType type,
|
||||||
llvm::ArrayRef<unsigned> same_shape_)
|
llvm::ArrayRef<uint32_t> same_shape_)
|
||||||
: func_type(type), same_shape(same_shape_) {}
|
: func_type(type), same_shape(same_shape_) {}
|
||||||
|
|
||||||
void runOnOperation() override {
|
void runOnOperation() override {
|
||||||
|
@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
|
||||||
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
|
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
|
||||||
mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
|
mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
|
||||||
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
|
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
|
||||||
unsigned arg_pos = 0;
|
uint32_t arg_pos = 0;
|
||||||
std::vector<unsigned> positions;
|
std::vector<uint32_t> positions;
|
||||||
for (mlir::Type arg_type : func_type.getInputs()) {
|
for (mlir::Type arg_type : func_type.getInputs()) {
|
||||||
positions.push_back(arg_pos);
|
positions.push_back(arg_pos);
|
||||||
func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
|
func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
|
||||||
|
@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
|
||||||
// can use that here. Simply replace usages of the shape parameters within
|
// can use that here. Simply replace usages of the shape parameters within
|
||||||
// the function body to a single shape parameter.
|
// the function body to a single shape parameter.
|
||||||
if (!same_shape.empty()) {
|
if (!same_shape.empty()) {
|
||||||
int first = same_shape.front();
|
auto first = same_shape.front();
|
||||||
int first_offset = positions.at(first);
|
auto first_offset = positions.at(first);
|
||||||
mlir::ShapedType first_type =
|
mlir::ShapedType first_type =
|
||||||
func_type.getInput(first).cast<mlir::ShapedType>();
|
func_type.getInput(first).cast<mlir::ShapedType>();
|
||||||
unsigned rank = first_type.getRank();
|
uint32_t rank = first_type.getRank();
|
||||||
for (int same : same_shape.drop_front(1)) {
|
for (auto same : same_shape.drop_front(1)) {
|
||||||
unsigned same_offset = positions.at(same);
|
uint32_t same_offset = positions.at(same);
|
||||||
auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
|
auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
|
||||||
if (same_type.getRank() != rank) {
|
if (same_type.getRank() != rank) {
|
||||||
func.emitOpError() << "same shape constraints on arguments with "
|
func.emitOpError() << "same shape constraints on arguments with "
|
||||||
|
@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
|
||||||
signalPassFailure();
|
signalPassFailure();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 2 * rank; ++i) {
|
for (uint32_t i = 0; i < 2 * rank; ++i) {
|
||||||
// Replace uses for second arg data with first arg.
|
// Replace uses for second arg data with first arg.
|
||||||
auto same_arg = func.getArgument(same_offset + 3 + i);
|
auto same_arg = func.getArgument(same_offset + 3 + i);
|
||||||
auto first_arg = func.getArgument(first_offset + 3 + i);
|
auto first_arg = func.getArgument(first_offset + 3 + i);
|
||||||
|
@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
|
||||||
}
|
}
|
||||||
|
|
||||||
mlir::FunctionType func_type;
|
mlir::FunctionType func_type;
|
||||||
llvm::ArrayRef<unsigned> same_shape;
|
llvm::ArrayRef<uint32_t> same_shape;
|
||||||
};
|
};
|
||||||
|
|
||||||
Status PropagateStaticShapeKnowledgeToKernel(
|
Status PropagateStaticShapeKnowledgeToKernel(
|
||||||
mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) {
|
mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
|
||||||
// Grab the original signature from the single function.
|
// Grab the original signature from the single function.
|
||||||
auto func = *module.getBody()->op_begin<mlir::FuncOp>();
|
auto func = *module.getBody()->op_begin<mlir::FuncOp>();
|
||||||
|
|
||||||
|
@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode(
|
StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
|
||||||
llvm::StringRef tf_code, std::pair<int, int> compute_capability,
|
llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
|
||||||
llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape,
|
llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
|
||||||
llvm::ArrayRef<unsigned> unroll_factors) {
|
llvm::ArrayRef<uint32_t> unroll_factors) {
|
||||||
mlir::MLIRContext context;
|
mlir::MLIRContext context;
|
||||||
context.allowUnregisteredDialects(); // TODO(b/152572127)
|
context.allowUnregisteredDialects(); // TODO(b/152572127)
|
||||||
mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
|
mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
|
||||||
|
|
|
@ -30,11 +30,12 @@ limitations under the License.
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace kernel_gen {
|
namespace kernel_gen {
|
||||||
xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode(
|
xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
|
||||||
llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5},
|
llvm::StringRef tf_code,
|
||||||
llvm::ArrayRef<unsigned> tile_sizes = {16, 64},
|
std::pair<int32_t, int32_t> compute_capability = {7, 5},
|
||||||
llvm::ArrayRef<unsigned> same_shape = {},
|
llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
|
||||||
llvm::ArrayRef<unsigned> unroll_factors = {});
|
llvm::ArrayRef<uint32_t> same_shape = {},
|
||||||
|
llvm::ArrayRef<uint32_t> unroll_factors = {});
|
||||||
} // namespace kernel_gen
|
} // namespace kernel_gen
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
|
|
@ -102,7 +102,7 @@ int main(int argc, char** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie();
|
std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
|
||||||
|
|
||||||
auto status = tensorflow::WriteStringToFile(
|
auto status = tensorflow::WriteStringToFile(
|
||||||
tensorflow::Env::Default(), output_file,
|
tensorflow::Env::Default(), output_file,
|
||||||
|
|
Loading…
Reference in New Issue