Use uint8_t, uint32_t and uint32_t.
Also delete unused build_defs.bzl. PiperOrigin-RevId: 311087719 Change-Id: Iaa27b214e5d2e5227c4a5d454cb244ee70311086
This commit is contained in:
parent
e89413399b
commit
13ce8851cb
|
@ -1,96 +0,0 @@
|
|||
load("//third_party/gpus/cuda:build_defs.bzl", "cuda_gpu_select_list")
|
||||
|
||||
def _lookup_file(filegroup, path):
|
||||
"""Extracts file at (relative) path in filegroup."""
|
||||
for file in filegroup.files.to_list():
|
||||
if file.path.endswith(path):
|
||||
return file
|
||||
return None
|
||||
|
||||
def _gen_kernel_image_hdr_impl(ctx):
|
||||
if not ctx.attr.gpu_archs:
|
||||
fail("No GPU architecture specified, use --config=cuda or similar.")
|
||||
|
||||
name = ctx.attr.name
|
||||
tile_sizes = ctx.attr.tile_size.replace("x", ",")
|
||||
same_shape = []
|
||||
if ctx.attr.same_shape:
|
||||
same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
|
||||
|
||||
cubins = []
|
||||
images = []
|
||||
for arch in ctx.attr.gpu_archs:
|
||||
filename = "%s.%s.cubin" % (name, arch)
|
||||
cubin = ctx.actions.declare_file(filename)
|
||||
ctx.actions.run(
|
||||
outputs = [cubin],
|
||||
executable = ctx.executable._tool,
|
||||
arguments = same_shape + [
|
||||
"--tile_sizes=%s" % tile_sizes,
|
||||
"--arch=%s" % arch.split("_")[1],
|
||||
"--output=%s" % cubin.path,
|
||||
ctx.attr.op,
|
||||
],
|
||||
mnemonic = "compile",
|
||||
)
|
||||
cubins.append(cubin)
|
||||
images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
|
||||
|
||||
# Generate fatbin file from all cubins.
|
||||
fatbin = ctx.actions.declare_file("%s.fatbin" % name)
|
||||
ctx.actions.run(
|
||||
outputs = [fatbin],
|
||||
inputs = cubins,
|
||||
executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
|
||||
arguments = [
|
||||
"--64",
|
||||
"--cmdline=--compile-only",
|
||||
"--link",
|
||||
"--compress-all",
|
||||
"--create=%s" % fatbin.path,
|
||||
] + images,
|
||||
mnemonic = "fatbinary",
|
||||
)
|
||||
|
||||
bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
|
||||
ctx.actions.run_shell(
|
||||
outputs = [ctx.outputs.out],
|
||||
inputs = [fatbin],
|
||||
tools = [bin2c],
|
||||
command = "%s --static --const --type=int --name=%s %s 1> %s" %
|
||||
(bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
|
||||
mnemonic = "bin2c",
|
||||
)
|
||||
|
||||
_gen_kernel_image_hdr = rule(
|
||||
implementation = _gen_kernel_image_hdr_impl,
|
||||
output_to_genfiles = True,
|
||||
attrs = {
|
||||
"op": attr.string(mandatory = True),
|
||||
"tile_size": attr.string(mandatory = True),
|
||||
"same_shape": attr.string(),
|
||||
"out": attr.output(mandatory = True),
|
||||
"symbol": attr.string(mandatory = True),
|
||||
"gpu_archs": attr.string_list(mandatory = True),
|
||||
"_cuda_root": attr.label(
|
||||
default = Label("//third_party/gpus/cuda:cuda_root"),
|
||||
),
|
||||
"_tool": attr.label(
|
||||
executable = True,
|
||||
default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
|
||||
cfg = "host",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
def gen_kernel_image_hdr(name, op, tile_size, same_shape = None):
|
||||
"""Generates a C header with fatbin data from a Tensorflow op."""
|
||||
_gen_kernel_image_hdr(
|
||||
name = name,
|
||||
op = op,
|
||||
tile_size = tile_size,
|
||||
same_shape = same_shape,
|
||||
out = "include/tfrt/gpu/ops/tf/%s.h" % name,
|
||||
symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
|
||||
gpu_archs = cuda_gpu_select_list("sm_{}"),
|
||||
)
|
|
@ -136,7 +136,7 @@ struct PropagateStaticKnowledge
|
|||
: public mlir::PassWrapper<PropagateStaticKnowledge,
|
||||
mlir::OperationPass<mlir::LLVM::LLVMFuncOp>> {
|
||||
explicit PropagateStaticKnowledge(mlir::FunctionType type,
|
||||
llvm::ArrayRef<unsigned> same_shape_)
|
||||
llvm::ArrayRef<uint32_t> same_shape_)
|
||||
: func_type(type), same_shape(same_shape_) {}
|
||||
|
||||
void runOnOperation() override {
|
||||
|
@ -152,8 +152,8 @@ struct PropagateStaticKnowledge
|
|||
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 1));
|
||||
mlir::Value zero = b.create<mlir::LLVM::ConstantOp>(
|
||||
func.getLoc(), index_type, b.getIntegerAttr(b.getIndexType(), 0));
|
||||
unsigned arg_pos = 0;
|
||||
std::vector<unsigned> positions;
|
||||
uint32_t arg_pos = 0;
|
||||
std::vector<uint32_t> positions;
|
||||
for (mlir::Type arg_type : func_type.getInputs()) {
|
||||
positions.push_back(arg_pos);
|
||||
func.getArgument(arg_pos + 2).replaceAllUsesWith(zero);
|
||||
|
@ -165,13 +165,13 @@ struct PropagateStaticKnowledge
|
|||
// can use that here. Simply replace usages of the shape parameters within
|
||||
// the function body to a single shape parameter.
|
||||
if (!same_shape.empty()) {
|
||||
int first = same_shape.front();
|
||||
int first_offset = positions.at(first);
|
||||
auto first = same_shape.front();
|
||||
auto first_offset = positions.at(first);
|
||||
mlir::ShapedType first_type =
|
||||
func_type.getInput(first).cast<mlir::ShapedType>();
|
||||
unsigned rank = first_type.getRank();
|
||||
for (int same : same_shape.drop_front(1)) {
|
||||
unsigned same_offset = positions.at(same);
|
||||
uint32_t rank = first_type.getRank();
|
||||
for (auto same : same_shape.drop_front(1)) {
|
||||
uint32_t same_offset = positions.at(same);
|
||||
auto same_type = func_type.getInput(same).cast<mlir::ShapedType>();
|
||||
if (same_type.getRank() != rank) {
|
||||
func.emitOpError() << "same shape constraints on arguments with "
|
||||
|
@ -180,7 +180,7 @@ struct PropagateStaticKnowledge
|
|||
signalPassFailure();
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2 * rank; ++i) {
|
||||
for (uint32_t i = 0; i < 2 * rank; ++i) {
|
||||
// Replace uses for second arg data with first arg.
|
||||
auto same_arg = func.getArgument(same_offset + 3 + i);
|
||||
auto first_arg = func.getArgument(first_offset + 3 + i);
|
||||
|
@ -191,11 +191,11 @@ struct PropagateStaticKnowledge
|
|||
}
|
||||
|
||||
mlir::FunctionType func_type;
|
||||
llvm::ArrayRef<unsigned> same_shape;
|
||||
llvm::ArrayRef<uint32_t> same_shape;
|
||||
};
|
||||
|
||||
Status PropagateStaticShapeKnowledgeToKernel(
|
||||
mlir::ModuleOp module, llvm::ArrayRef<unsigned> same_shape) {
|
||||
mlir::ModuleOp module, llvm::ArrayRef<uint32_t> same_shape) {
|
||||
// Grab the original signature from the single function.
|
||||
auto func = *module.getBody()->op_begin<mlir::FuncOp>();
|
||||
|
||||
|
@ -218,10 +218,10 @@ Status PropagateStaticShapeKnowledgeToKernel(
|
|||
}
|
||||
} // namespace
|
||||
|
||||
StatusOr<std::vector<uint8>> tensorflow::kernel_gen::GenerateCubinForTfCode(
|
||||
llvm::StringRef tf_code, std::pair<int, int> compute_capability,
|
||||
llvm::ArrayRef<unsigned> tile_sizes, llvm::ArrayRef<unsigned> same_shape,
|
||||
llvm::ArrayRef<unsigned> unroll_factors) {
|
||||
StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
|
||||
llvm::StringRef tf_code, std::pair<int32_t, int32_t> compute_capability,
|
||||
llvm::ArrayRef<uint32_t> tile_sizes, llvm::ArrayRef<uint32_t> same_shape,
|
||||
llvm::ArrayRef<uint32_t> unroll_factors) {
|
||||
mlir::MLIRContext context;
|
||||
context.allowUnregisteredDialects(); // TODO(b/152572127)
|
||||
mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
|
||||
|
|
|
@ -30,11 +30,12 @@ limitations under the License.
|
|||
|
||||
namespace tensorflow {
|
||||
namespace kernel_gen {
|
||||
xla::StatusOr<std::vector<uint8>> GenerateCubinForTfCode(
|
||||
llvm::StringRef tf_code, std::pair<int, int> compute_capability = {7, 5},
|
||||
llvm::ArrayRef<unsigned> tile_sizes = {16, 64},
|
||||
llvm::ArrayRef<unsigned> same_shape = {},
|
||||
llvm::ArrayRef<unsigned> unroll_factors = {});
|
||||
xla::StatusOr<std::vector<uint8_t>> GenerateCubinForTfCode(
|
||||
llvm::StringRef tf_code,
|
||||
std::pair<int32_t, int32_t> compute_capability = {7, 5},
|
||||
llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
|
||||
llvm::ArrayRef<uint32_t> same_shape = {},
|
||||
llvm::ArrayRef<uint32_t> unroll_factors = {});
|
||||
} // namespace kernel_gen
|
||||
} // namespace tensorflow
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ int main(int argc, char** argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
std::vector<uint8> cubin_data = cubin.ConsumeValueOrDie();
|
||||
std::vector<uint8_t> cubin_data = cubin.ConsumeValueOrDie();
|
||||
|
||||
auto status = tensorflow::WriteStringToFile(
|
||||
tensorflow::Env::Default(), output_file,
|
||||
|
|
Loading…
Reference in New Issue