Drop the implementation of the mlir_gpu plugin prototype in XLA.
This leaves some passes and the kernel_lowering in place for now. Those are pending removal,, too. PiperOrigin-RevId: 348621555 Change-Id: I4ff2ed11fb646bf76ceaa780956c6257d89f54ae
This commit is contained in:
parent
852b977596
commit
1a44a1f9dd
@ -1036,31 +1036,8 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# This flag enables experimental MLIR GPU support.
|
|
||||||
config_setting(
|
|
||||||
name = "with_mlir_gpu_support",
|
|
||||||
define_values = {"with_mlir_gpu_support": "true"},
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Lets us choose the right GPU plugin depending on whether the experimental MLIR
|
|
||||||
# GPU plugin should be used or not.
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "gpu_plugin",
|
name = "gpu_plugin",
|
||||||
deps = select(
|
|
||||||
{
|
|
||||||
":with_mlir_gpu_support": [
|
|
||||||
":gpu_plugin_mlir",
|
|
||||||
],
|
|
||||||
"//conditions:default": [
|
|
||||||
":gpu_plugin_no_mlir",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "gpu_plugin_no_mlir",
|
|
||||||
deps = [
|
deps = [
|
||||||
":service",
|
":service",
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_compiler",
|
"//tensorflow/compiler/xla/service/gpu:gpu_compiler",
|
||||||
@ -1075,17 +1052,6 @@ cc_library(
|
|||||||
]) + internal_cuda_deps(),
|
]) + internal_cuda_deps(),
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "gpu_plugin_mlir",
|
|
||||||
deps = [
|
|
||||||
":service",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_transfer_manager",
|
|
||||||
"//tensorflow/core/platform:stream_executor_no_cuda",
|
|
||||||
] + if_cuda_is_configured([
|
|
||||||
"//tensorflow/compiler/xla/service/mlir_gpu:mlir_compiler_impl",
|
|
||||||
]) + internal_cuda_deps(),
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "interpreter_plugin",
|
name = "interpreter_plugin",
|
||||||
deps = [
|
deps = [
|
||||||
|
@ -9,10 +9,6 @@ load("//tensorflow:tensorflow.bzl", "filegroup")
|
|||||||
# buildifier: disable=same-origin-load
|
# buildifier: disable=same-origin-load
|
||||||
load("//tensorflow:tensorflow.bzl", "get_compatible_with_cloud")
|
load("//tensorflow:tensorflow.bzl", "get_compatible_with_cloud")
|
||||||
load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
|
load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
|
||||||
load(
|
|
||||||
"//tensorflow/core/platform/default:cuda_build_defs.bzl",
|
|
||||||
"if_cuda_is_configured",
|
|
||||||
)
|
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
|
load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
|
||||||
|
|
||||||
package(
|
package(
|
||||||
@ -34,139 +30,6 @@ filegroup(
|
|||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "failover_compiler",
|
|
||||||
srcs = ["failover_compiler.cc"],
|
|
||||||
hdrs = ["failover_compiler.h"],
|
|
||||||
deps = [
|
|
||||||
"//tensorflow/compiler/xla/service:compiler",
|
|
||||||
"//tensorflow/core:lib",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "emission_context",
|
|
||||||
srcs = ["emission_context.cc"],
|
|
||||||
hdrs = ["emission_context.h"],
|
|
||||||
deps = [
|
|
||||||
"//tensorflow/compiler/mlir/hlo",
|
|
||||||
"//tensorflow/compiler/mlir/hlo:lhlo",
|
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
|
||||||
"@com_google_absl//absl/strings",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:StandardOps",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "inject_errors_pass",
|
|
||||||
srcs = ["inject_errors_pass.cc"],
|
|
||||||
hdrs = ["inject_errors_pass.h"],
|
|
||||||
deps = [
|
|
||||||
"@llvm-project//mlir:Pass",
|
|
||||||
"@llvm-project//mlir:StandardOps",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "mlir_compiler",
|
|
||||||
srcs = ["mlir_compiler.cc"],
|
|
||||||
hdrs = ["mlir_compiler.h"],
|
|
||||||
deps = [
|
|
||||||
":emission_context",
|
|
||||||
"//tensorflow/compiler/xla/service:compiler",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
|
||||||
"//tensorflow/core/platform:stream_executor_no_cuda",
|
|
||||||
"@llvm-project//llvm:Core",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:LLVMDialect",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "mlir_compiler_impl",
|
|
||||||
srcs = if_cuda_is_configured(["mlir_compiler_impl.cc"]),
|
|
||||||
deps = if_cuda_is_configured([
|
|
||||||
":mlir_compiler",
|
|
||||||
":failover_compiler",
|
|
||||||
":emission_context",
|
|
||||||
":kernel_lowering",
|
|
||||||
":lhlo_dialect_emitter",
|
|
||||||
"@com_google_absl//absl/container:flat_hash_map",
|
|
||||||
"@llvm-project//llvm:Core",
|
|
||||||
"@llvm-project//mlir:GPUDialect",
|
|
||||||
"@llvm-project//mlir:AllPassesAndDialectsNoRegistration",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:LLVMDialect",
|
|
||||||
"@llvm-project//mlir:LLVMTransforms",
|
|
||||||
"@llvm-project//mlir:StandardOps",
|
|
||||||
"@llvm-project//mlir:Support",
|
|
||||||
"@llvm-project//mlir:TargetNVVMIR",
|
|
||||||
"//tensorflow/compiler/xla:util",
|
|
||||||
"//tensorflow/compiler/xla/service:buffer_assignment",
|
|
||||||
"//tensorflow/compiler/xla/service:dump",
|
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_constants",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_executable",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_hlo_schedule",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:gpu_types",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:nvptx_compiler_impl",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:launch_dimensions",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:stream_assignment",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:stream_executor_util",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:target_constants",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend",
|
|
||||||
"//tensorflow/core/platform:cuda_libdevice_path",
|
|
||||||
"//tensorflow/core:lib",
|
|
||||||
"//tensorflow/stream_executor/gpu:asm_compiler",
|
|
||||||
]),
|
|
||||||
alwayslink = True, # Contains compiler registration
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "hlo_dialect_emitter",
|
|
||||||
srcs = ["hlo_dialect_emitter.cc"],
|
|
||||||
hdrs = ["hlo_dialect_emitter.h"],
|
|
||||||
deps = [
|
|
||||||
":emission_context",
|
|
||||||
"//tensorflow/compiler/mlir/hlo",
|
|
||||||
"//tensorflow/compiler/mlir/xla:hlo_utils",
|
|
||||||
"//tensorflow/compiler/xla:comparison_util",
|
|
||||||
"//tensorflow/compiler/xla:status",
|
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
|
||||||
"@com_google_absl//absl/types:span",
|
|
||||||
"@llvm-project//llvm:Support",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:StandardOps",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "lhlo_dialect_emitter",
|
|
||||||
srcs = ["lhlo_dialect_emitter.cc"],
|
|
||||||
hdrs = ["lhlo_dialect_emitter.h"],
|
|
||||||
deps = [
|
|
||||||
":emission_context",
|
|
||||||
":hlo_dialect_emitter",
|
|
||||||
"//tensorflow/compiler/mlir/hlo:lhlo",
|
|
||||||
"//tensorflow/compiler/mlir/xla:hlo_utils",
|
|
||||||
"//tensorflow/compiler/xla:status",
|
|
||||||
"//tensorflow/compiler/xla:status_macros",
|
|
||||||
"//tensorflow/compiler/xla/service:buffer_assignment",
|
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:thunk",
|
|
||||||
"//tensorflow/compiler/xla/service/gpu:thunk_emitter",
|
|
||||||
"//tensorflow/core:lib",
|
|
||||||
"//tensorflow/stream_executor:stream_executor_headers",
|
|
||||||
"@com_google_absl//absl/container:flat_hash_map",
|
|
||||||
"@llvm-project//llvm:Core",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:LLVMDialect",
|
|
||||||
"@llvm-project//mlir:StandardOps",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
gentbl(
|
gentbl(
|
||||||
name = "passes_inc_gen",
|
name = "passes_inc_gen",
|
||||||
compatible_with = get_compatible_with_cloud(),
|
compatible_with = get_compatible_with_cloud(),
|
||||||
@ -238,51 +101,6 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "xla_gpu_opt_lib",
|
|
||||||
testonly = True,
|
|
||||||
srcs = ["xla_gpu_opt.cc"],
|
|
||||||
hdrs = ["xla_gpu_opt.h"],
|
|
||||||
tags = ["no_pip"],
|
|
||||||
deps = [
|
|
||||||
":failover_compiler",
|
|
||||||
":inject_errors_pass",
|
|
||||||
":mlir_compiler",
|
|
||||||
"//tensorflow/compiler/xla:debug_options_flags",
|
|
||||||
"//tensorflow/compiler/xla:shape_util",
|
|
||||||
"//tensorflow/compiler/xla:status",
|
|
||||||
"//tensorflow/compiler/xla:statusor",
|
|
||||||
"//tensorflow/compiler/xla/service:backend",
|
|
||||||
"//tensorflow/compiler/xla/service:hlo_module_config",
|
|
||||||
"//tensorflow/compiler/xla/tests:verified_hlo_module",
|
|
||||||
"//tensorflow/core:lib",
|
|
||||||
"//tensorflow/stream_executor/lib",
|
|
||||||
"@com_google_absl//absl/strings",
|
|
||||||
"@llvm-project//llvm:Support",
|
|
||||||
"@llvm-project//mlir:IR",
|
|
||||||
"@llvm-project//mlir:Pass",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
tf_cc_binary(
|
|
||||||
name = "xla-gpu-opt",
|
|
||||||
testonly = True,
|
|
||||||
srcs = ["xla_gpu_opt_main.cc"],
|
|
||||||
tags = ["no_pip"],
|
|
||||||
deps = [
|
|
||||||
":mlir_compiler",
|
|
||||||
":xla_gpu_opt_lib",
|
|
||||||
"//tensorflow/compiler/mlir:init_mlir",
|
|
||||||
"//tensorflow/compiler/xla:status",
|
|
||||||
"//tensorflow/compiler/xla/service:gpu_plugin_mlir",
|
|
||||||
"//tensorflow/core:lib",
|
|
||||||
"@llvm-project//llvm:Support",
|
|
||||||
"@llvm-project//mlir:Pass",
|
|
||||||
"@llvm-project//mlir:SideEffects",
|
|
||||||
"@llvm-project//mlir:Support",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
tf_cc_binary(
|
tf_cc_binary(
|
||||||
name = "xla-mlir-gpu-opt",
|
name = "xla-mlir-gpu-opt",
|
||||||
srcs = ["xla_mlir_gpu_opt.cc"],
|
srcs = ["xla_mlir_gpu_opt.cc"],
|
||||||
|
@ -1,137 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
|
|
||||||
|
|
||||||
#include "absl/strings/substitute.h"
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Location.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
|
|
||||||
#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
EmissionContext::EmissionContext(std::unique_ptr<HloModule> module)
|
|
||||||
: module_(std::move(module)), context_() {
|
|
||||||
context_.loadDialect<mlir::mhlo::MhloDialect, mlir::lmhlo::LmhloDialect,
|
|
||||||
mlir::StandardOpsDialect>();
|
|
||||||
error_handler_ = [](const ErrorMap& instructions_with_error,
|
|
||||||
HloModule* module) {
|
|
||||||
std::set<const HloComputation*> computations_with_error;
|
|
||||||
for (const auto& err : instructions_with_error) {
|
|
||||||
computations_with_error.insert(err.first->parent());
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG(ERROR) << module->ToString(
|
|
||||||
HloPrintOptions()
|
|
||||||
.set_print_instruction(
|
|
||||||
[&instructions_with_error](const HloInstruction* instr) {
|
|
||||||
return instructions_with_error.count(instr);
|
|
||||||
})
|
|
||||||
.set_format_instruction(
|
|
||||||
// Returns the string representation of `instr` in the following
|
|
||||||
// format.
|
|
||||||
//
|
|
||||||
// ROOT? instr_name
|
|
||||||
// FAILED: err_0
|
|
||||||
// FAILED: err_1
|
|
||||||
// ...
|
|
||||||
[&instructions_with_error](const HloInstruction* instr,
|
|
||||||
const string& instr_name, int indent,
|
|
||||||
bool is_root) {
|
|
||||||
const string tab(2 * indent, ' ');
|
|
||||||
if (!instructions_with_error.count(instr)) {
|
|
||||||
return absl::StrCat(tab, is_root ? "ROOT " : "",
|
|
||||||
instr_name);
|
|
||||||
}
|
|
||||||
static constexpr char kStartBold[] = "\033[1m";
|
|
||||||
static constexpr char kStartRed[] = "\033[31m";
|
|
||||||
static constexpr char kBackToNormal[] = "\033[0m";
|
|
||||||
|
|
||||||
string result =
|
|
||||||
absl::StrCat(tab, kStartBold, is_root ? "ROOT " : "",
|
|
||||||
instr_name, kBackToNormal);
|
|
||||||
|
|
||||||
for (const string& err : instructions_with_error.at(instr)) {
|
|
||||||
absl::SubstituteAndAppend(
|
|
||||||
&result, "\n$0 $1$2FAILED:$3 $4$5$6", tab, kStartBold,
|
|
||||||
kStartRed, kBackToNormal, kStartBold, err,
|
|
||||||
kBackToNormal);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
})
|
|
||||||
.set_print_computation(
|
|
||||||
[&computations_with_error](const HloComputation* comp) {
|
|
||||||
return computations_with_error.find(comp) !=
|
|
||||||
computations_with_error.end();
|
|
||||||
}));
|
|
||||||
};
|
|
||||||
registerDiagnosticHandler();
|
|
||||||
}
|
|
||||||
|
|
||||||
EmissionContext::EmissionContext(
|
|
||||||
std::unique_ptr<HloModule> module,
|
|
||||||
std::function<void(const ErrorMap&, HloModule*)> callback)
|
|
||||||
: module_(std::move(module)), context_(), error_handler_(callback) {
|
|
||||||
registerDiagnosticHandler();
|
|
||||||
}
|
|
||||||
|
|
||||||
EmissionContext::~EmissionContext() { callErrorHandlerCallback(); }
|
|
||||||
|
|
||||||
mlir::Location EmissionContext::getLocation(const HloInstruction* instr) {
|
|
||||||
return mlir::OpaqueLoc::get<const HloInstruction*>(instr, &context_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmissionContext::addError(const HloInstruction* hlo_instruction,
|
|
||||||
const string& str) {
|
|
||||||
instructions_with_error_[hlo_instruction].push_back(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmissionContext::setErrorHandler(
|
|
||||||
std::function<void(const ErrorMap&, HloModule*)> callback) {
|
|
||||||
error_handler_ = callback;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<HloModule> EmissionContext::releaseHloModule() {
|
|
||||||
callErrorHandlerCallback();
|
|
||||||
return std::move(module_);
|
|
||||||
}
|
|
||||||
|
|
||||||
HloModule* EmissionContext::getHloModule() const { return module_.get(); }
|
|
||||||
|
|
||||||
mlir::MLIRContext* EmissionContext::getContext() { return &context_; }
|
|
||||||
|
|
||||||
void EmissionContext::registerDiagnosticHandler() {
|
|
||||||
context_.getDiagEngine().registerHandler([&](mlir::Diagnostic& diag) {
|
|
||||||
const HloInstruction* hloInstruction =
|
|
||||||
mlir::OpaqueLoc::getUnderlyingLocationOrNull<const HloInstruction*>(
|
|
||||||
diag.getLocation());
|
|
||||||
assert(hloInstruction);
|
|
||||||
addError(hloInstruction, diag.str());
|
|
||||||
return mlir::success();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmissionContext::callErrorHandlerCallback() {
|
|
||||||
if (module_.get() && !instructions_with_error_.empty()) {
|
|
||||||
error_handler_(instructions_with_error_, module_.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
@ -1,89 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EMISSION_CONTEXT_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EMISSION_CONTEXT_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "mlir/IR/Diagnostics.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_module.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
// Registers a diagnostic handler and collects all the errors as a map from
|
|
||||||
// HloInstruction* to a vector of string representations of all the errors that
|
|
||||||
// occurred at that hlo instruction. Also, it takes a function that handles
|
|
||||||
// those errors at the point when the instance gets destroyed or
|
|
||||||
// `releaseHloModule()` is called.
|
|
||||||
//
|
|
||||||
// EmissionContext uses an RAII pattern, it owns its hlo module and mlir
|
|
||||||
// context.
|
|
||||||
class EmissionContext {
|
|
||||||
public:
|
|
||||||
using ErrorMap =
|
|
||||||
std::unordered_map<const HloInstruction*, std::vector<std::string>>;
|
|
||||||
|
|
||||||
// Gets an hlo module and sets the default error handler which writes to the
|
|
||||||
// ERROR log and is executed when the instance gets destroyed or
|
|
||||||
// `releaseHloModule()` is called.
|
|
||||||
explicit EmissionContext(std::unique_ptr<HloModule> module);
|
|
||||||
|
|
||||||
// Gets an hlo module and an error handler function which is executed when the
|
|
||||||
// instance gets destroyed or `releaseHloModule()` is called.
|
|
||||||
EmissionContext(std::unique_ptr<HloModule> module,
|
|
||||||
std::function<void(const ErrorMap&, HloModule*)> callback);
|
|
||||||
|
|
||||||
// Handles all the errors according to the error handler function before
|
|
||||||
// getting destroyed.
|
|
||||||
~EmissionContext();
|
|
||||||
|
|
||||||
// Returns a location constructed from `instr` that then is used by
|
|
||||||
// the diagnostic handler to collect the errors.
|
|
||||||
mlir::Location getLocation(const HloInstruction* instr);
|
|
||||||
|
|
||||||
// Adds an error message associated with provided hlo instruction.
|
|
||||||
void addError(const HloInstruction* hlo_instruction, const string& str);
|
|
||||||
|
|
||||||
// Sets a function that handles the errors at the point when the instance
|
|
||||||
// gets destroyed or `releaseHloModule()` is called.
|
|
||||||
void setErrorHandler(
|
|
||||||
std::function<void(const ErrorMap&, HloModule*)> callback);
|
|
||||||
|
|
||||||
// Releases hlo module and handles all the errors according to the error
|
|
||||||
// handler function.
|
|
||||||
std::unique_ptr<HloModule> releaseHloModule();
|
|
||||||
|
|
||||||
HloModule* getHloModule() const;
|
|
||||||
|
|
||||||
mlir::MLIRContext* getContext();
|
|
||||||
|
|
||||||
private:
|
|
||||||
void registerDiagnosticHandler();
|
|
||||||
void callErrorHandlerCallback();
|
|
||||||
|
|
||||||
std::unique_ptr<HloModule> module_;
|
|
||||||
ErrorMap instructions_with_error_;
|
|
||||||
mlir::MLIRContext context_;
|
|
||||||
std::function<void(const ErrorMap&, HloModule*)> error_handler_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_EMISSION_CONTEXT_H_
|
|
@ -1,119 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
bool IsUnimplemented(StatusOr<T>& result) {
|
|
||||||
return result.status().code() == tensorflow::error::Code::UNIMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<HloModule>> FailoverCompiler::RunHloPasses(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
auto result = primary_->RunHloPasses(module->Clone(), stream_exec, options);
|
|
||||||
if (IsUnimplemented(result)) {
|
|
||||||
VLOG(2) << "RunHloPasses resulted in " << result.status()
|
|
||||||
<< ", falling back to secondary backend";
|
|
||||||
return secondary_->RunHloPasses(std::move(module), stream_exec, options);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<Executable>> FailoverCompiler::RunBackend(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
auto result = primary_->RunBackend(module->Clone(), stream_exec, options);
|
|
||||||
if (IsUnimplemented(result)) {
|
|
||||||
VLOG(2) << "RunBackend resulted in " << result.status()
|
|
||||||
<< ", falling back to secondary backend";
|
|
||||||
return secondary_->RunBackend(std::move(module), stream_exec, options);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<Executable>>> FailoverCompiler::Compile(
|
|
||||||
std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
std::vector<std::vector<se::StreamExecutor*>> stream_execs,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
std::vector<std::unique_ptr<Executable>> result;
|
|
||||||
std::vector<std::unique_ptr<HloModule>> modules =
|
|
||||||
module_group->ConsumeModules();
|
|
||||||
for (size_t i = 0; i < modules.size(); i++) {
|
|
||||||
if (stream_execs[i].size() != 1) {
|
|
||||||
// This is not supported by GPU compiler anyway.
|
|
||||||
return Unimplemented(
|
|
||||||
"Model partitioning not implemented for the failover compiler!");
|
|
||||||
}
|
|
||||||
auto executable = [stream_execs, &options, i,
|
|
||||||
this](std::unique_ptr<HloModule> module)
|
|
||||||
-> StatusOr<std::unique_ptr<Executable>> {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto processed_module,
|
|
||||||
primary_->RunHloPasses(std::move(module),
|
|
||||||
stream_execs[i][0], options));
|
|
||||||
TF_ASSIGN_OR_RETURN(auto result,
|
|
||||||
primary_->RunBackend(std::move(processed_module),
|
|
||||||
stream_execs[i][0], options));
|
|
||||||
return result;
|
|
||||||
}(modules[i]->Clone());
|
|
||||||
|
|
||||||
if (IsUnimplemented(executable)) {
|
|
||||||
VLOG(2) << "Compile resulted in " << executable.status()
|
|
||||||
<< ", falling back to secondary backend";
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
modules[i], secondary_->RunHloPasses(std::move(modules[i]),
|
|
||||||
stream_execs[i][0], options));
|
|
||||||
TF_ASSIGN_OR_RETURN(executable,
|
|
||||||
secondary_->RunBackend(std::move(modules[i]),
|
|
||||||
stream_execs[i][0], options));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!executable.ok()) {
|
|
||||||
return executable.status();
|
|
||||||
}
|
|
||||||
|
|
||||||
result.push_back(std::move(executable.ValueOrDie()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return {std::move(result)};
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
|
||||||
FailoverCompiler::CompileAheadOfTime(
|
|
||||||
std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
const AotCompilationOptions& options) {
|
|
||||||
// This is not supported by GPU compiler anyway.
|
|
||||||
return Unimplemented(
|
|
||||||
"CompileAheadOfTime not implemented in failover compiler!");
|
|
||||||
}
|
|
||||||
|
|
||||||
HloCostAnalysis::ShapeSizeFunction FailoverCompiler::ShapeSizeBytesFunction()
|
|
||||||
const {
|
|
||||||
auto prim_fun = primary_->ShapeSizeBytesFunction();
|
|
||||||
auto second_fun = secondary_->ShapeSizeBytesFunction();
|
|
||||||
return [prim_fun, second_fun](const Shape& shape) -> int64 {
|
|
||||||
int64 primary = prim_fun(shape);
|
|
||||||
assert(primary == second_fun(shape));
|
|
||||||
return primary;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace xla
|
|
@ -1,81 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_FAILOVER_COMPILER_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_FAILOVER_COMPILER_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/compiler.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
|
|
||||||
// FailoverCompiler implements a compiler that fails over between a primary
|
|
||||||
// and secondary compiler.
|
|
||||||
//
|
|
||||||
// For all methods, first the primary compiler is invoked. If that compiler's
|
|
||||||
// implementation of the method fails with an unimplemented error, the
|
|
||||||
// secondary's compiler method is invoked. In all other cases, the result of
|
|
||||||
// the primary compiler's method is returned.
|
|
||||||
//
|
|
||||||
// The primary compiler is invoked on a clone of the supplied HloModule. This
|
|
||||||
// ensures that partial updates to the module by one compiler to not leak into
|
|
||||||
// the other compiler.
|
|
||||||
//
|
|
||||||
// The FailoverCompiler is used to layer a partial compiler implementation on
|
|
||||||
// top of a full implementation.
|
|
||||||
class FailoverCompiler final : public Compiler {
|
|
||||||
public:
|
|
||||||
FailoverCompiler(std::unique_ptr<Compiler> primary,
|
|
||||||
std::unique_ptr<Compiler> secondary)
|
|
||||||
: primary_(std::move(primary)), secondary_(std::move(secondary)) {
|
|
||||||
// Both compilers should serve the same platform id.
|
|
||||||
assert(primary_->PlatformId() == secondary_->PlatformId());
|
|
||||||
}
|
|
||||||
|
|
||||||
se::Platform::Id PlatformId() const override {
|
|
||||||
return primary_->PlatformId();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<Executable>> RunBackend(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
|
|
||||||
std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
std::vector<std::vector<se::StreamExecutor*>> stream_execs,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
|
||||||
CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
const AotCompilationOptions& options) override;
|
|
||||||
|
|
||||||
HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override;
|
|
||||||
|
|
||||||
Compiler* GetPrimary() const { return primary_.get(); }
|
|
||||||
Compiler* GetSecondary() const { return secondary_.get(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unique_ptr<Compiler> primary_;
|
|
||||||
std::unique_ptr<Compiler> secondary_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_FAILOVER_COMPILER_H_
|
|
@ -1,276 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h"
|
|
||||||
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
#include "llvm/ADT/STLExtras.h"
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Attributes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinTypes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Types.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
|
|
||||||
#include "tensorflow/compiler/mlir/xla/hlo_utils.h"
|
|
||||||
#include "tensorflow/compiler/xla/comparison_util.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
using ::mlir::ArrayRef;
|
|
||||||
using ::mlir::Attribute;
|
|
||||||
using ::mlir::Identifier;
|
|
||||||
using ::mlir::Location;
|
|
||||||
using ::mlir::NamedAttribute;
|
|
||||||
using ::mlir::OpBuilder;
|
|
||||||
using ::mlir::RankedTensorType;
|
|
||||||
using ::mlir::Type;
|
|
||||||
using ::mlir::Value;
|
|
||||||
|
|
||||||
namespace hlo = ::mlir::mhlo;
|
|
||||||
|
|
||||||
// TODO(b/137624192) Use tablegen for this.
|
|
||||||
StatusOr<Value> InsertMlirOp(HloOpcode opcode, OpBuilder func_builder,
|
|
||||||
Location loc, ArrayRef<Type> rets,
|
|
||||||
ArrayRef<Value> args,
|
|
||||||
ArrayRef<std::pair<Identifier, Attribute>> attrs) {
|
|
||||||
switch (opcode) {
|
|
||||||
case HloOpcode::kAbs:
|
|
||||||
return {func_builder.create<hlo::AbsOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kAdd:
|
|
||||||
return {func_builder.create<hlo::AddOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kAnd:
|
|
||||||
return {func_builder.create<hlo::AndOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kCeil:
|
|
||||||
return {func_builder.create<hlo::CeilOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kComplex:
|
|
||||||
return {func_builder.create<hlo::ComplexOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kCopy:
|
|
||||||
return {func_builder.create<hlo::CopyOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kCos:
|
|
||||||
return {func_builder.create<hlo::CosOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kDivide:
|
|
||||||
return {func_builder.create<hlo::DivOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kExp:
|
|
||||||
return {func_builder.create<hlo::ExpOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kImag:
|
|
||||||
return {func_builder.create<hlo::ImagOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kLog:
|
|
||||||
return {func_builder.create<hlo::LogOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kMaximum:
|
|
||||||
return {func_builder.create<hlo::MaxOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kMinimum:
|
|
||||||
return {func_builder.create<hlo::MinOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kMultiply:
|
|
||||||
return {func_builder.create<hlo::MulOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kNegate:
|
|
||||||
return {func_builder.create<hlo::NegOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kReal:
|
|
||||||
return {func_builder.create<hlo::RealOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kRemainder:
|
|
||||||
return {func_builder.create<hlo::RemOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kRsqrt:
|
|
||||||
return {func_builder.create<hlo::RsqrtOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kSelect:
|
|
||||||
return {func_builder.create<hlo::SelectOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kSign:
|
|
||||||
return {func_builder.create<hlo::SignOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kSqrt:
|
|
||||||
return {func_builder.create<hlo::SqrtOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kSubtract:
|
|
||||||
return {func_builder.create<hlo::SubOp>(loc, rets, args, attrs)};
|
|
||||||
case HloOpcode::kTanh:
|
|
||||||
return {func_builder.create<hlo::TanhOp>(loc, rets, args, attrs)};
|
|
||||||
default:
|
|
||||||
return tensorflow::errors::Internal(absl::StrCat(
|
|
||||||
"HLO Opcode ", HloOpcodeString(opcode), " is not supported."));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
mlir::Location HloDialectEmitter::getLocation(
|
|
||||||
const HloInstruction* instr) const {
|
|
||||||
return emission_context_->getLocation(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<Value> HloDialectEmitter::EmitComputation(
|
|
||||||
const HloComputation& computation) {
|
|
||||||
const auto root = computation.root_instruction();
|
|
||||||
TF_RETURN_IF_ERROR(root->Accept(this));
|
|
||||||
return instruction_to_values_[root];
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::DefaultAction(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
llvm::SmallVector<Value, 4> arguments;
|
|
||||||
arguments.reserve(instr->operand_count());
|
|
||||||
for (auto operand : instr->operands()) {
|
|
||||||
arguments.push_back(instruction_to_values_[operand]);
|
|
||||||
}
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto inserted, InsertMlirOp(instr->opcode(), builder_, getLocation(instr),
|
|
||||||
res_type, arguments, llvm::None));
|
|
||||||
instruction_to_values_[instr] = inserted;
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleBroadcast(HloInstruction* instr) {
|
|
||||||
mlir::DenseIntElementsAttr broadcast_dim =
|
|
||||||
CreateDenseIntElementsAttrFromVector(instr->dimensions(), builder_);
|
|
||||||
TF_ASSIGN_OR_RETURN(Type res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
|
|
||||||
instruction_to_values_[instr] = builder_.create<hlo::BroadcastInDimOp>(
|
|
||||||
getLocation(instr), llvm::makeArrayRef(res_type),
|
|
||||||
instruction_to_values_[instr->operand(0)], broadcast_dim);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleConcatenate(HloInstruction* instr) {
|
|
||||||
int64 concatenate_dim = instr->concatenate_dimension();
|
|
||||||
TF_ASSIGN_OR_RETURN(Type res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
|
|
||||||
llvm::SmallVector<Value, 4> arguments;
|
|
||||||
arguments.reserve(instr->operand_count());
|
|
||||||
for (auto operand : instr->operands()) {
|
|
||||||
arguments.push_back(instruction_to_values_[operand]);
|
|
||||||
}
|
|
||||||
|
|
||||||
instruction_to_values_[instr] = builder_.create<hlo::ConcatenateOp>(
|
|
||||||
getLocation(instr), llvm::makeArrayRef(res_type), arguments,
|
|
||||||
builder_.getI64IntegerAttr(concatenate_dim));
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleParameter(HloInstruction* instr) {
|
|
||||||
auto argValue = arguments_[instr->parameter_number()];
|
|
||||||
instruction_to_values_[instr] = argValue;
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleConstant(HloInstruction* instr) {
|
|
||||||
auto shape = instr->shape();
|
|
||||||
if (!shape.IsArray() || shape.rank() != 0) {
|
|
||||||
return Unimplemented("non-scalar constants are not supported yet");
|
|
||||||
}
|
|
||||||
TF_ASSIGN_OR_RETURN(auto type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto value, CreateDenseElementsAttrFromLiteral(
|
|
||||||
instr->literal(), builder_));
|
|
||||||
|
|
||||||
auto const_value =
|
|
||||||
builder_.create<hlo::ConstOp>(getLocation(instr), type, value);
|
|
||||||
instruction_to_values_[instr] = const_value;
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleGather(HloInstruction* instr) {
|
|
||||||
HloGatherInstruction* gather = static_cast<HloGatherInstruction*>(instr);
|
|
||||||
mlir::mhlo::GatherDimensionNumbers dimension_numbers =
|
|
||||||
xla::CreateGatherDimensionNumbers(gather->gather_dimension_numbers(),
|
|
||||||
builder_);
|
|
||||||
mlir::DenseIntElementsAttr slice_sizes = CreateDenseIntElementsAttrFromVector(
|
|
||||||
llvm::SmallVector<int64, 4>{gather->gather_slice_sizes().begin(),
|
|
||||||
gather->gather_slice_sizes().end()},
|
|
||||||
builder_);
|
|
||||||
mlir::BoolAttr indices_are_sorted =
|
|
||||||
builder_.getBoolAttr(gather->indices_are_sorted());
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(Type res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
|
|
||||||
instruction_to_values_[instr] = builder_.create<hlo::GatherOp>(
|
|
||||||
getLocation(instr), res_type, instruction_to_values_[instr->operand(0)],
|
|
||||||
instruction_to_values_[instr->operand(1)], dimension_numbers, slice_sizes,
|
|
||||||
indices_are_sorted);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleReduce(HloInstruction* instr) {
|
|
||||||
llvm::SmallVector<Value, 4> operands;
|
|
||||||
for (auto operand : instr->operands()) {
|
|
||||||
operands.push_back(instruction_to_values_.at(operand));
|
|
||||||
}
|
|
||||||
const unsigned num_inputs = operands.size() / 2;
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
const auto return_type,
|
|
||||||
ConvertTensorShapeToType<RankedTensorType>(instr->shape(), builder_));
|
|
||||||
const auto dimensions_attr =
|
|
||||||
CreateDenseIntElementsAttrFromVector(instr->dimensions(), builder_);
|
|
||||||
auto reduceOp = builder_.create<hlo::ReduceOp>(
|
|
||||||
getLocation(instr), return_type,
|
|
||||||
llvm::makeArrayRef(operands).take_front(num_inputs),
|
|
||||||
llvm::makeArrayRef(operands).take_back(num_inputs), dimensions_attr);
|
|
||||||
{
|
|
||||||
auto computation = instr->to_apply();
|
|
||||||
auto block = new mlir::Block();
|
|
||||||
llvm::SmallVector<Value, 4> arguments;
|
|
||||||
arguments.reserve(computation->num_parameters());
|
|
||||||
for (auto parameter : computation->parameter_instructions()) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto param_type,
|
|
||||||
ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
parameter->shape(), builder_));
|
|
||||||
arguments.push_back(block->addArgument(param_type));
|
|
||||||
}
|
|
||||||
reduceOp.body().push_back(block);
|
|
||||||
HloDialectEmitter emitter(emission_context_, &reduceOp.body(), arguments);
|
|
||||||
TF_ASSIGN_OR_RETURN(auto result, emitter.EmitComputation(*computation));
|
|
||||||
OpBuilder body_builder = OpBuilder::atBlockEnd(block);
|
|
||||||
body_builder.setInsertionPointToEnd(block);
|
|
||||||
body_builder.create<hlo::ReturnOp>(getLocation(instr),
|
|
||||||
ArrayRef<Value>{result});
|
|
||||||
}
|
|
||||||
// TODO(b/137624192) Add support for multiple results.
|
|
||||||
instruction_to_values_[instr] = reduceOp.getResult(0);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleCompare(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(Type res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
auto comparison_direction_attr = builder_.getNamedAttr(
|
|
||||||
"comparison_direction",
|
|
||||||
builder_.getStringAttr(
|
|
||||||
ComparisonDirectionToString(instr->comparison_direction())));
|
|
||||||
llvm::SmallVector<Value, 4> arguments;
|
|
||||||
arguments.reserve(instr->operand_count());
|
|
||||||
for (auto operand : instr->operands()) {
|
|
||||||
arguments.push_back(instruction_to_values_[operand]);
|
|
||||||
}
|
|
||||||
instruction_to_values_[instr] = builder_.create<hlo::CompareOp>(
|
|
||||||
getLocation(instr), llvm::makeArrayRef(res_type), arguments,
|
|
||||||
comparison_direction_attr);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status HloDialectEmitter::HandleIota(HloInstruction* instr) {
|
|
||||||
mlir::IntegerAttr iota_dim = builder_.getI64IntegerAttr(
|
|
||||||
static_cast<HloIotaInstruction*>(instr)->iota_dimension());
|
|
||||||
TF_ASSIGN_OR_RETURN(Type res_type, ConvertTensorShapeToType<RankedTensorType>(
|
|
||||||
instr->shape(), builder_));
|
|
||||||
instruction_to_values_[instr] =
|
|
||||||
builder_.create<hlo::IotaOp>(getLocation(instr), res_type, iota_dim);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
@ -1,76 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_HLO_DIALECT_EMITTER_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_HLO_DIALECT_EMITTER_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "absl/types/span.h"
|
|
||||||
#include "llvm/ADT/ArrayRef.h"
|
|
||||||
#include "mlir/IR/Builders.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
|
|
||||||
#include "tensorflow/compiler/xla/status.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
class HloDialectEmitter : public DfsHloVisitorWithDefault {
|
|
||||||
public:
|
|
||||||
HloDialectEmitter(xla::mlir_gpu::EmissionContext* emission_context,
|
|
||||||
::mlir::Region* region,
|
|
||||||
llvm::ArrayRef<::mlir::Value> arguments)
|
|
||||||
: emission_context_(emission_context),
|
|
||||||
builder_(region),
|
|
||||||
arguments_(arguments) {}
|
|
||||||
|
|
||||||
HloDialectEmitter(xla::mlir_gpu::EmissionContext* emission_context,
|
|
||||||
::mlir::OpBuilder builder,
|
|
||||||
llvm::ArrayRef<::mlir::Value> arguments)
|
|
||||||
: emission_context_(emission_context),
|
|
||||||
builder_(builder),
|
|
||||||
arguments_(arguments) {}
|
|
||||||
|
|
||||||
StatusOr<mlir::Value> EmitComputation(const HloComputation& computation);
|
|
||||||
|
|
||||||
Status DefaultAction(HloInstruction* instr) override;
|
|
||||||
Status HandleBroadcast(HloInstruction* instr) override;
|
|
||||||
Status HandleCompare(HloInstruction* instr) override;
|
|
||||||
Status HandleConcatenate(HloInstruction* instr) override;
|
|
||||||
Status HandleConstant(HloInstruction* instr) override;
|
|
||||||
Status HandleGather(HloInstruction* instr) override;
|
|
||||||
Status HandleIota(HloInstruction* instr) override;
|
|
||||||
Status HandleParameter(HloInstruction* instr) override;
|
|
||||||
Status HandleReduce(HloInstruction* instr) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
mlir::Location getLocation(const HloInstruction* instr) const;
|
|
||||||
|
|
||||||
xla::mlir_gpu::EmissionContext* emission_context_;
|
|
||||||
::mlir::OpBuilder builder_;
|
|
||||||
llvm::ArrayRef<::mlir::Value> arguments_;
|
|
||||||
absl::flat_hash_map<const xla::HloInstruction*, ::mlir::Value>
|
|
||||||
instruction_to_values_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_HLO_DIALECT_EMITTER_H_
|
|
@ -1,41 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h"
|
|
||||||
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
|
||||||
|
|
||||||
namespace mlir {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
struct InjectErrorsForTestingPass
|
|
||||||
: public PassWrapper<InjectErrorsForTestingPass, FunctionPass> {
|
|
||||||
void runOnFunction() override {
|
|
||||||
getFunction().getBody().walk([&](Operation *op) {
|
|
||||||
op->emitError() << "failed for testing: " << op->getName();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
std::unique_ptr<OperationPass<FuncOp>> createInjectErrorsForTestingPass() {
|
|
||||||
return std::make_unique<InjectErrorsForTestingPass>();
|
|
||||||
}
|
|
||||||
|
|
||||||
static PassRegistration<InjectErrorsForTestingPass> pass(
|
|
||||||
"inject-errors", "Emits errors from all operations");
|
|
||||||
|
|
||||||
} // namespace mlir
|
|
@ -1,29 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_INJECT_ERRORS_PASS_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_INJECT_ERRORS_PASS_H_
|
|
||||||
|
|
||||||
#include "mlir/Pass/Pass.h" // from @llvm-project
|
|
||||||
|
|
||||||
namespace mlir {
|
|
||||||
|
|
||||||
// Returns a function pass that emits errors from all operations inside the
|
|
||||||
// function.
|
|
||||||
std::unique_ptr<OperationPass<FuncOp>> createInjectErrorsForTestingPass();
|
|
||||||
|
|
||||||
} // namespace mlir
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_INJECT_ERRORS_PASS_H_
|
|
@ -1,504 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h"
|
|
||||||
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
#include "llvm/IR/DataLayout.h"
|
|
||||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Attributes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Builders.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinTypes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Identifier.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Types.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
|
|
||||||
#include "tensorflow/compiler/mlir/xla/hlo_utils.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/thunk.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/thunk_emitter.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/hlo_dialect_emitter.h"
|
|
||||||
#include "tensorflow/compiler/xla/status_macros.h"
|
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
using ::mlir::ArrayRef;
|
|
||||||
using ::mlir::Attribute;
|
|
||||||
using ::mlir::Builder;
|
|
||||||
using ::mlir::DenseIntElementsAttr;
|
|
||||||
using ::mlir::FuncOp;
|
|
||||||
using ::mlir::Identifier;
|
|
||||||
using ::mlir::Location;
|
|
||||||
using ::mlir::MemRefType;
|
|
||||||
using ::mlir::ModuleOp;
|
|
||||||
using ::mlir::OpBuilder;
|
|
||||||
using ::mlir::Type;
|
|
||||||
using ::mlir::Value;
|
|
||||||
using ::mlir::LLVM::LLVMDialect;
|
|
||||||
using ::xla::gpu::Thunk;
|
|
||||||
using ::xla::gpu::ThunkEmitter;
|
|
||||||
using ::xla::gpu::ThunkSequence;
|
|
||||||
|
|
||||||
namespace lhlo = ::mlir::lmhlo;
|
|
||||||
|
|
||||||
// TODO(b/137624192) Use tablegen for this.
|
|
||||||
Status InsertMlirOp(HloOpcode opcode, OpBuilder func_builder, Location loc,
|
|
||||||
ArrayRef<Type> rets, ArrayRef<Value> args,
|
|
||||||
ArrayRef<std::pair<Identifier, Attribute>> attrs) {
|
|
||||||
switch (opcode) {
|
|
||||||
case HloOpcode::kAbs:
|
|
||||||
func_builder.create<lhlo::AbsOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kAdd:
|
|
||||||
func_builder.create<lhlo::AddOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kAnd:
|
|
||||||
func_builder.create<lhlo::AndOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kCeil:
|
|
||||||
func_builder.create<lhlo::CeilOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kComplex:
|
|
||||||
func_builder.create<lhlo::ComplexOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kCopy:
|
|
||||||
func_builder.create<lhlo::CopyOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kCos:
|
|
||||||
func_builder.create<lhlo::CosOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kDivide:
|
|
||||||
func_builder.create<lhlo::DivOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kExp:
|
|
||||||
func_builder.create<lhlo::ExpOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kImag:
|
|
||||||
func_builder.create<lhlo::ImagOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kLog:
|
|
||||||
func_builder.create<lhlo::LogOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kMaximum:
|
|
||||||
func_builder.create<lhlo::MaxOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kMinimum:
|
|
||||||
func_builder.create<lhlo::MinOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kMultiply:
|
|
||||||
func_builder.create<lhlo::MulOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kNegate:
|
|
||||||
func_builder.create<lhlo::NegOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kReal:
|
|
||||||
func_builder.create<lhlo::RealOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kRemainder:
|
|
||||||
func_builder.create<lhlo::RemOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kRsqrt:
|
|
||||||
func_builder.create<lhlo::RsqrtOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kSelect:
|
|
||||||
func_builder.create<lhlo::SelectOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kSign:
|
|
||||||
func_builder.create<lhlo::SignOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kSqrt:
|
|
||||||
func_builder.create<lhlo::SqrtOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kSubtract:
|
|
||||||
func_builder.create<lhlo::SubOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
case HloOpcode::kTanh:
|
|
||||||
func_builder.create<lhlo::TanhOp>(loc, rets, args, attrs);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return tensorflow::errors::Internal(absl::StrCat(
|
|
||||||
"LHLO opcode ", HloOpcodeString(opcode), " is not supported."));
|
|
||||||
}
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<llvm::SmallVector<Type, 4>> GetInstructionArgTypes(
|
|
||||||
const HloInstruction& instruction, Builder builder) {
|
|
||||||
llvm::SmallVector<Type, 4> arg_types;
|
|
||||||
for (auto operand : instruction.operands()) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto operand_type, ConvertShapeToType<MemRefType>(
|
|
||||||
operand->shape(), builder));
|
|
||||||
arg_types.push_back(operand_type);
|
|
||||||
}
|
|
||||||
TF_ASSIGN_OR_RETURN(auto operand_type, ConvertShapeToType<MemRefType>(
|
|
||||||
instruction.shape(), builder));
|
|
||||||
arg_types.push_back(operand_type);
|
|
||||||
return arg_types;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Converts HloComputation into a block with HLO dialect ops. The block gets
|
|
||||||
// memref arguments corresponding to HloComputation arguments and results.
|
|
||||||
Status SpliceHloComputation(OpBuilder builder, mlir::Location loc,
|
|
||||||
const HloComputation& hlo_computation,
|
|
||||||
xla::mlir_gpu::EmissionContext* emission_context) {
|
|
||||||
auto block = builder.getInsertionBlock();
|
|
||||||
builder.setInsertionPoint(block->getTerminator());
|
|
||||||
llvm::SmallVector<Value, 4> arg_values;
|
|
||||||
// First map parameters to memrefs on the operation.
|
|
||||||
for (auto param : hlo_computation.parameter_instructions()) {
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto arg_type, ConvertShapeToType<MemRefType>(param->shape(), builder));
|
|
||||||
auto block_arg = block->addArgument(arg_type);
|
|
||||||
arg_values.push_back(builder.create<::mlir::TensorLoadOp>(loc, block_arg));
|
|
||||||
}
|
|
||||||
HloDialectEmitter hlo_emitter(emission_context, builder, arg_values);
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto result,
|
|
||||||
hlo_emitter.EmitComputation(hlo_computation));
|
|
||||||
|
|
||||||
// Now add a block arg and store for the result.
|
|
||||||
builder.setInsertionPoint(block->getTerminator());
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto result_type,
|
|
||||||
ConvertShapeToType<MemRefType>(
|
|
||||||
hlo_computation.root_instruction()->shape(), builder));
|
|
||||||
auto block_arg = block->addArgument(result_type);
|
|
||||||
builder.create<::mlir::TensorStoreOp>(loc, result, block_arg);
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
mlir::Location LhloDialectEmitter::getLocation(
|
|
||||||
const HloInstruction* instr) const {
|
|
||||||
return emission_context_->getLocation(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
LhloDialectEmitter::LhloDialectEmitter(
|
|
||||||
xla::mlir_gpu::EmissionContext* emission_context,
|
|
||||||
const BufferAssignment& assignment, const se::Platform* platform,
|
|
||||||
ModuleOp mlir_module)
|
|
||||||
: emission_context_(emission_context),
|
|
||||||
mlir_module_(mlir_module),
|
|
||||||
builder_(mlir_module_.getContext()),
|
|
||||||
buffer_assignment_(assignment),
|
|
||||||
platform_(platform) {
|
|
||||||
llvm::DataLayout data_layout("");
|
|
||||||
if (auto data_layout_attr = mlir_module.getAttrOfType<mlir::StringAttr>(
|
|
||||||
mlir::LLVM::LLVMDialect::getDataLayoutAttrName())) {
|
|
||||||
data_layout.reset(data_layout_attr.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
pointer_size_ = data_layout.getPointerSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
void LhloDialectEmitter::AddThunkToThunkSequence(std::unique_ptr<Thunk> thunk) {
|
|
||||||
thunk_sequence_.push_back(std::move(thunk));
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<BufferAllocation::Slice> LhloDialectEmitter::MaybeGetAllocationSlice(
|
|
||||||
const HloInstruction& hlo, const ShapeIndex& index) const {
|
|
||||||
return buffer_assignment_.GetUniqueSlice(&hlo, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
int64 LhloDialectEmitter::ByteSizeOf(const Shape& shape) const {
|
|
||||||
return ShapeUtil::ByteSizeOf(shape, pointer_size_);
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::string_view LhloDialectEmitter::platform_name() const {
|
|
||||||
return platform_->Name();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<FuncOp> LhloDialectEmitter::CreateFunction(
|
|
||||||
const HloInstruction& instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto args, GetInstructionArgTypes(instr, builder_));
|
|
||||||
auto function_type = builder_.getFunctionType(args, {});
|
|
||||||
auto function =
|
|
||||||
FuncOp::create(getLocation(&instr), instr.name(), function_type);
|
|
||||||
mlir_module_.push_back(function);
|
|
||||||
function.addEntryBlock();
|
|
||||||
OpBuilder op_builder(function.getBody());
|
|
||||||
op_builder.create<::mlir::ReturnOp>(getLocation(&instr));
|
|
||||||
instruction_to_mlir_func_[&instr] = function;
|
|
||||||
return function;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
|
|
||||||
function.args_end()};
|
|
||||||
TF_RETURN_IF_ERROR(InsertMlirOp(instr->opcode(), func_builder,
|
|
||||||
getLocation(instr), ArrayRef<Type>{},
|
|
||||||
arg_values, llvm::None));
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleBroadcast(HloInstruction* instr) {
|
|
||||||
DenseIntElementsAttr broadcast_dim =
|
|
||||||
CreateDenseIntElementsAttrFromVector(instr->dimensions(), builder_);
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
func_builder.create<lhlo::BroadcastInDimOp>(
|
|
||||||
getLocation(instr), function.getArgument(0), function.getArgument(1),
|
|
||||||
broadcast_dim);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleConcatenate(HloInstruction* instr) {
|
|
||||||
mlir::IntegerAttr concatenate_dim = builder_.getI64IntegerAttr(
|
|
||||||
static_cast<HloConcatenateInstruction*>(instr)->concatenate_dimension());
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
func_builder.create<lhlo::ConcatenateOp>(
|
|
||||||
getLocation(instr), function.getArguments().drop_back(),
|
|
||||||
function.getArguments().back(), concatenate_dim);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleFusion(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
auto fusion_op = func_builder.create<lhlo::FusionOp>(getLocation(instr));
|
|
||||||
|
|
||||||
// Load the HLO argument tensors from the corresponding buffers. The last
|
|
||||||
// argument is for the result, so no need to load it.
|
|
||||||
OpBuilder body_builder(fusion_op.region());
|
|
||||||
llvm::SmallVector<Value, 4> arg_values;
|
|
||||||
for (int i = 0, e = function.getNumArguments() - 1; i < e; ++i) {
|
|
||||||
arg_values.push_back(body_builder.create<::mlir::TensorLoadOp>(
|
|
||||||
getLocation(instr), function.getArgument(i)));
|
|
||||||
}
|
|
||||||
HloDialectEmitter hlo_emitter(emission_context_, body_builder, arg_values);
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto result,
|
|
||||||
hlo_emitter.EmitComputation(*instr->fused_instructions_computation()));
|
|
||||||
|
|
||||||
// Insert the write-back from the HLO computation to the result argument
|
|
||||||
// buffer.
|
|
||||||
body_builder.setInsertionPoint(fusion_op.region().back().getTerminator());
|
|
||||||
Value result_memref = function.getArguments().back();
|
|
||||||
body_builder.create<::mlir::TensorStoreOp>(getLocation(instr), result,
|
|
||||||
result_memref);
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleGather(HloInstruction* instr) {
|
|
||||||
HloGatherInstruction* gather = static_cast<HloGatherInstruction*>(instr);
|
|
||||||
mlir::mhlo::GatherDimensionNumbers dim_numbers =
|
|
||||||
xla::CreateGatherDimensionNumbers(gather->gather_dimension_numbers(),
|
|
||||||
builder_);
|
|
||||||
mlir::DenseIntElementsAttr slice_sizes = CreateDenseIntElementsAttrFromVector(
|
|
||||||
llvm::SmallVector<int64, 4>{gather->gather_slice_sizes().begin(),
|
|
||||||
gather->gather_slice_sizes().end()},
|
|
||||||
builder_);
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
|
|
||||||
func_builder.create<lhlo::GatherOp>(
|
|
||||||
getLocation(instr), function.getArgument(0), function.getArgument(1),
|
|
||||||
dim_numbers, slice_sizes, function.getArgument(2));
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleReduce(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
|
|
||||||
function.args_end()};
|
|
||||||
OpBuilder builder(function.getBody());
|
|
||||||
auto loc = getLocation(instr);
|
|
||||||
int input_count = instr->operand_count() / 3;
|
|
||||||
auto inputs = llvm::makeArrayRef(arg_values).slice(input_count);
|
|
||||||
auto init_values =
|
|
||||||
llvm::makeArrayRef(arg_values).slice(input_count, input_count);
|
|
||||||
auto results =
|
|
||||||
llvm::makeArrayRef(arg_values).slice(2 * input_count, input_count);
|
|
||||||
auto dimensions_attr =
|
|
||||||
CreateDenseIntElementsAttrFromVector(instr->dimensions(), builder_);
|
|
||||||
auto reduce_op = builder.create<lhlo::ReduceOp>(loc, inputs, init_values,
|
|
||||||
results, dimensions_attr);
|
|
||||||
builder.createBlock(&reduce_op.body());
|
|
||||||
OpBuilder::atBlockEnd(&reduce_op.body().front())
|
|
||||||
.create<lhlo::TerminatorOp>(getLocation(instr));
|
|
||||||
return SpliceHloComputation(OpBuilder{&reduce_op.body()}, loc,
|
|
||||||
*instr->to_apply(), emission_context_);
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleReduceWindow(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
|
|
||||||
function.args_end()};
|
|
||||||
OpBuilder builder(function.getBody());
|
|
||||||
auto loc = getLocation(instr);
|
|
||||||
|
|
||||||
// Collect attribute values.
|
|
||||||
llvm::SmallVector<int64, 2> window_dimensions, window_strides, base_dilations,
|
|
||||||
window_dilations;
|
|
||||||
llvm::SmallVector<int64, 4> padding;
|
|
||||||
int64 rank = instr->window().dimensions_size();
|
|
||||||
window_dimensions.reserve(rank);
|
|
||||||
window_strides.reserve(rank);
|
|
||||||
base_dilations.reserve(rank);
|
|
||||||
window_dilations.reserve(rank);
|
|
||||||
padding.reserve(2 * rank);
|
|
||||||
for (const auto& window : instr->window().dimensions()) {
|
|
||||||
window_dimensions.push_back(window.size());
|
|
||||||
window_strides.push_back(window.stride());
|
|
||||||
base_dilations.push_back(window.base_dilation());
|
|
||||||
window_dilations.push_back(window.window_dilation());
|
|
||||||
padding.push_back(window.padding_low());
|
|
||||||
padding.push_back(window.padding_high());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto reduce_window_op = builder.create<lhlo::ReduceWindowOp>(
|
|
||||||
loc, /*operand=*/arg_values[0], /*init_value=*/arg_values[1],
|
|
||||||
/*out=*/arg_values[2],
|
|
||||||
CreateDenseIntElementsAttrFromVector(window_dimensions, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(window_strides, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(base_dilations, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(window_dilations, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(padding, builder, {rank, 2}));
|
|
||||||
reduce_window_op.ensureTerminator(reduce_window_op.body(), builder, loc);
|
|
||||||
return SpliceHloComputation(OpBuilder{&reduce_window_op.body()}, loc,
|
|
||||||
*instr->to_apply(), emission_context_);
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleSelectAndScatter(HloInstruction* instr) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
|
|
||||||
function.args_end()};
|
|
||||||
OpBuilder builder(function.getBody());
|
|
||||||
auto loc = getLocation(instr);
|
|
||||||
|
|
||||||
// Collect attribute values.
|
|
||||||
llvm::SmallVector<int64, 2> window_dimensions, window_strides, padding;
|
|
||||||
int64 rank = instr->window().dimensions_size();
|
|
||||||
window_dimensions.reserve(rank);
|
|
||||||
window_strides.reserve(rank);
|
|
||||||
padding.reserve(2 * rank);
|
|
||||||
for (const auto& window : instr->window().dimensions()) {
|
|
||||||
window_dimensions.push_back(window.size());
|
|
||||||
window_strides.push_back(window.stride());
|
|
||||||
padding.push_back(window.padding_low());
|
|
||||||
padding.push_back(window.padding_high());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto select_scatter_op = builder.create<lhlo::SelectAndScatterOp>(
|
|
||||||
loc, /*operand=*/arg_values[0], /*source=*/arg_values[1],
|
|
||||||
/*init_value=*/arg_values[2],
|
|
||||||
/*out=*/arg_values[3],
|
|
||||||
CreateDenseIntElementsAttrFromVector(window_dimensions, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(window_strides, builder),
|
|
||||||
CreateDenseIntElementsAttrFromVector(padding, builder, {rank, 2}));
|
|
||||||
|
|
||||||
// Convert `select` computation.
|
|
||||||
builder.createBlock(&select_scatter_op.select());
|
|
||||||
OpBuilder select_builder{&select_scatter_op.select()};
|
|
||||||
select_builder.create<lhlo::TerminatorOp>(loc);
|
|
||||||
TF_RETURN_IF_ERROR(SpliceHloComputation(select_builder, loc, *instr->select(),
|
|
||||||
emission_context_));
|
|
||||||
|
|
||||||
// Convert `scatter` computation.
|
|
||||||
builder.createBlock(&select_scatter_op.scatter());
|
|
||||||
OpBuilder scatter_builder{&select_scatter_op.scatter()};
|
|
||||||
scatter_builder.create<lhlo::TerminatorOp>(loc);
|
|
||||||
TF_RETURN_IF_ERROR(SpliceHloComputation(
|
|
||||||
scatter_builder, loc, *instr->scatter(), emission_context_));
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleCustomCall(HloInstruction* instr) {
|
|
||||||
return ThunkEmitter(this).HandleCustomCall(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleParameter(HloInstruction* instr) {
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleCompare(HloInstruction* instr) {
|
|
||||||
auto comparison_direction_attr = builder_.getNamedAttr(
|
|
||||||
"comparison_direction",
|
|
||||||
builder_.getStringAttr(
|
|
||||||
ComparisonDirectionToString(instr->comparison_direction())));
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
llvm::SmallVector<Value, 4> arg_values{function.args_begin(),
|
|
||||||
function.args_end()};
|
|
||||||
func_builder.create<lhlo::CompareOp>(getLocation(instr), llvm::None,
|
|
||||||
arg_values, comparison_direction_attr);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleConstant(HloInstruction* instr) {
|
|
||||||
auto shape = instr->shape();
|
|
||||||
if (!shape.IsArray() || shape.rank() != 0) {
|
|
||||||
return Unimplemented("non-scalar constants are not supported yet");
|
|
||||||
}
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto value, CreateDenseElementsAttrFromLiteral(
|
|
||||||
instr->literal(), func_builder));
|
|
||||||
func_builder.create<lhlo::ConstOp>(getLocation(instr), value,
|
|
||||||
*function.args_begin());
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleIota(HloInstruction* instr) {
|
|
||||||
mlir::IntegerAttr iota_dim = builder_.getI64IntegerAttr(
|
|
||||||
static_cast<HloIotaInstruction*>(instr)->iota_dimension());
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto function, CreateFunction(*instr));
|
|
||||||
OpBuilder func_builder(function.getBody());
|
|
||||||
func_builder.create<lhlo::IotaOp>(getLocation(instr), iota_dim,
|
|
||||||
function.getArgument(0));
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::HandleTuple(HloInstruction* instr) {
|
|
||||||
// For the root node of the entry computation we can elide writing the tuple
|
|
||||||
// buffer. We can always figure out the contents of the tuples from buffer
|
|
||||||
// assignment because we insert copies to ensure non-ambiguous output buffers.
|
|
||||||
// GpuExecutable never reads the tuple buffer.
|
|
||||||
if (instr ==
|
|
||||||
instr->parent()->parent()->entry_computation()->root_instruction()) {
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
return Unimplemented("handling of typles not yet implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
Status LhloDialectEmitter::FinishVisit(HloInstruction* root) {
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
@ -1,111 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_LHLO_DIALECT_EMITTER_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_LHLO_DIALECT_EMITTER_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
#include "absl/container/flat_hash_map.h"
|
|
||||||
#include "mlir/IR/Builders.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/thunk.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/thunk_emitter.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
|
|
||||||
#include "tensorflow/compiler/xla/status.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
// Implementation for the translation of HLO instructions to a ThunkSequence
|
|
||||||
// via MLIR using the LHLO dialect.
|
|
||||||
// Implements the DfsHloVisitor interface, emits LHLO computations as MLIR IR
|
|
||||||
// functions and transforms them into gpu::Thunk.
|
|
||||||
class LhloDialectEmitter : public DfsHloVisitorWithDefault,
|
|
||||||
private gpu::ThunkEmitter::EmissionContext {
|
|
||||||
public:
|
|
||||||
LhloDialectEmitter(xla::mlir_gpu::EmissionContext* emission_context,
|
|
||||||
const BufferAssignment& assignment,
|
|
||||||
const se::Platform* platform,
|
|
||||||
::mlir::ModuleOp mlir_module);
|
|
||||||
~LhloDialectEmitter() override = default;
|
|
||||||
|
|
||||||
// The following methods implement the DfsHloVisitor interface.
|
|
||||||
//
|
|
||||||
// Default action which emits code for most operations. Operations which are
|
|
||||||
// special in some way are handled explicitly in HandleFoo methods.
|
|
||||||
Status DefaultAction(HloInstruction* instr) override;
|
|
||||||
Status HandleBroadcast(HloInstruction* instr) override;
|
|
||||||
Status HandleCompare(HloInstruction* instr) override;
|
|
||||||
Status HandleConcatenate(HloInstruction* instr) override;
|
|
||||||
Status HandleConstant(HloInstruction* instr) override;
|
|
||||||
Status HandleCustomCall(HloInstruction* instr) override;
|
|
||||||
Status HandleFusion(HloInstruction* instr) override;
|
|
||||||
Status HandleGather(HloInstruction* instr) override;
|
|
||||||
Status HandleIota(HloInstruction* instr) override;
|
|
||||||
Status HandleParameter(HloInstruction* instr) override;
|
|
||||||
Status HandleReduce(HloInstruction* instr) override;
|
|
||||||
Status HandleReduceWindow(HloInstruction* instr) override;
|
|
||||||
Status HandleSelectAndScatter(HloInstruction* instr) override;
|
|
||||||
Status HandleTuple(HloInstruction* instr) override;
|
|
||||||
|
|
||||||
Status FinishVisit(HloInstruction* root) override;
|
|
||||||
|
|
||||||
// Transfers the ownship of thunk_sequence_ out.
|
|
||||||
gpu::ThunkSequence ConsumeThunkSequence() {
|
|
||||||
gpu::ThunkSequence result;
|
|
||||||
std::swap(result, thunk_sequence_);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const absl::flat_hash_map<const xla::HloInstruction*, ::mlir::FuncOp>&
|
|
||||||
InstructionToFunctionMap() const {
|
|
||||||
return instruction_to_mlir_func_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
StatusOr<::mlir::FuncOp> CreateFunction(const HloInstruction& instr);
|
|
||||||
// Interface required by ThunkEmitter
|
|
||||||
void AddThunkToThunkSequence(std::unique_ptr<gpu::Thunk> thunk) override;
|
|
||||||
StatusOr<BufferAllocation::Slice> MaybeGetAllocationSlice(
|
|
||||||
const HloInstruction& hlo, const ShapeIndex& index) const override;
|
|
||||||
int64 ByteSizeOf(const Shape& shape) const override;
|
|
||||||
absl::string_view platform_name() const override;
|
|
||||||
|
|
||||||
mlir::Location getLocation(const HloInstruction* instr) const;
|
|
||||||
|
|
||||||
xla::mlir_gpu::EmissionContext* emission_context_;
|
|
||||||
::mlir::ModuleOp mlir_module_;
|
|
||||||
::mlir::Builder builder_;
|
|
||||||
absl::flat_hash_map<const xla::HloInstruction*, ::mlir::FuncOp>
|
|
||||||
instruction_to_mlir_func_;
|
|
||||||
const BufferAssignment& buffer_assignment_;
|
|
||||||
const se::Platform* platform_;
|
|
||||||
// Cached pointer size extracted from the mlir module.
|
|
||||||
unsigned pointer_size_;
|
|
||||||
// The thunk sequence this IrEmitter generates for the input computation.
|
|
||||||
gpu::ThunkSequence thunk_sequence_;
|
|
||||||
|
|
||||||
TF_DISALLOW_COPY_AND_ASSIGN(LhloDialectEmitter);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_LHLO_DIALECT_EMITTER_H_
|
|
@ -1,50 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h"
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "llvm/IR/Module.h"
|
|
||||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
|
|
||||||
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
MlirCompiler::MlirCompiler() : data_layout_("") {}
|
|
||||||
|
|
||||||
se::Platform::Id MlirCompiler::PlatformId() const {
|
|
||||||
return stream_executor::cuda::kCudaPlatformId;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MlirCompiler::SetModuleHook(IRHook module_hook) {
|
|
||||||
module_hook_ = module_hook;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MlirCompiler::RemoveModuleHook() {
|
|
||||||
module_hook_ = {nullptr, IRHook::LoweringStage::LHLO};
|
|
||||||
}
|
|
||||||
|
|
||||||
void MlirCompiler::SetErrorHandler(ErrorHandler error_handler) {
|
|
||||||
error_handler_ = error_handler;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MlirCompiler::RemoveErrorHandler() { error_handler_ = nullptr; }
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
@ -1,70 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_MLIR_COMPILER_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_MLIR_COMPILER_H_
|
|
||||||
|
|
||||||
#include "llvm/IR/DataLayout.h"
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
// A Compiler implementation that converts XLAs IR to a matching MLIR dialect,
|
|
||||||
// performs all lowering on the MLIR IR and finally converts MLIR to LLVMIR for
|
|
||||||
// generation of a thunk suitable for XLAs runtime. MlirCompilerImpl contains
|
|
||||||
// the implementation.
|
|
||||||
class MlirCompiler : public Compiler {
|
|
||||||
using ErrorHandler =
|
|
||||||
std::function<void(const EmissionContext::ErrorMap&, HloModule*)>;
|
|
||||||
|
|
||||||
public:
|
|
||||||
MlirCompiler();
|
|
||||||
|
|
||||||
se::Platform::Id PlatformId() const override;
|
|
||||||
|
|
||||||
struct IRHook {
|
|
||||||
enum class LoweringStage { LHLO, GPU, LLVM, KERNEL };
|
|
||||||
|
|
||||||
Status invoke(LoweringStage stage_, mlir::ModuleOp module) {
|
|
||||||
if (callback && stage == stage_) {
|
|
||||||
return callback(module);
|
|
||||||
}
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::function<Status(mlir::ModuleOp)> callback;
|
|
||||||
LoweringStage stage;
|
|
||||||
};
|
|
||||||
|
|
||||||
void SetModuleHook(IRHook module_hook);
|
|
||||||
void RemoveModuleHook();
|
|
||||||
void SetErrorHandler(ErrorHandler error_handler);
|
|
||||||
void RemoveErrorHandler();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
::mlir::MLIRContext context_;
|
|
||||||
llvm::DataLayout data_layout_;
|
|
||||||
IRHook module_hook_;
|
|
||||||
ErrorHandler error_handler_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_MLIR_COMPILER_H_
|
|
@ -1,629 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "absl/container/flat_hash_map.h"
|
|
||||||
#include "llvm/IR/LLVMContext.h"
|
|
||||||
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project
|
|
||||||
#include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project
|
|
||||||
#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Attributes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/BuiltinTypes.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Location.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/OperationSupport.h" // from @llvm-project
|
|
||||||
#include "mlir/IR/Value.h" // from @llvm-project
|
|
||||||
#include "mlir/Support/LLVM.h" // from @llvm-project
|
|
||||||
#include "mlir/Target/NVVMIR.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/dump.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/gpu_constants.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_schedule.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/gpu_types.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/kernel_thunk.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/nvptx_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/stream_assignment.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/target_constants.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/gpu/thunk_schedule.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/emission_context.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/util.h"
|
|
||||||
#include "tensorflow/core/lib/core/errors.h"
|
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
|
||||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
|
||||||
#include "tensorflow/stream_executor/gpu/asm_compiler.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
using ::mlir::BlockArgument;
|
|
||||||
using ::mlir::dyn_cast;
|
|
||||||
using ::mlir::FuncOp;
|
|
||||||
using ::mlir::ModuleOp;
|
|
||||||
using ::mlir::OwningModuleRef;
|
|
||||||
using ::mlir::UnknownLoc;
|
|
||||||
using ::mlir::Value;
|
|
||||||
using ::mlir::gpu::LaunchFuncOp;
|
|
||||||
using ::mlir::LLVM::LLVMDialect;
|
|
||||||
using ::mlir::LLVM::LLVMFuncOp;
|
|
||||||
using ::mlir::LLVM::LLVMType;
|
|
||||||
using ::xla::gpu::GpuExecutable;
|
|
||||||
using ::xla::gpu::GpuHloSchedule;
|
|
||||||
using ::xla::gpu::GpuVersion;
|
|
||||||
using ::xla::gpu::StreamAssignment;
|
|
||||||
using ::xla::gpu::ThunkSchedule;
|
|
||||||
|
|
||||||
// A Compiler implementation that converts XLAs IR to a matching MLIR dialect,
|
|
||||||
// performs all lowering on the MLIR IR and finally converts MLIR to LLVMIR for
|
|
||||||
// generation of a thunk suitable for XLAs runtime.
|
|
||||||
class MlirCompilerImpl : public MlirCompiler {
|
|
||||||
public:
|
|
||||||
StatusOr<std::unique_ptr<HloModule>> RunHloPasses(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<Executable>> RunBackend(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<Executable>>> Compile(
|
|
||||||
std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
std::vector<std::vector<se::StreamExecutor*>> stream_execs,
|
|
||||||
const CompileOptions& options) override;
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
|
||||||
CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
const AotCompilationOptions& options) override;
|
|
||||||
|
|
||||||
HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override {
|
|
||||||
int64 pointer_size = data_layout_.getPointerSize();
|
|
||||||
return [pointer_size](const Shape& shape) {
|
|
||||||
return ShapeUtil::ByteSizeOf(shape, pointer_size);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO(b/137624192) Share with NVPTX compiler
|
|
||||||
static std::vector<std::string> CandidateCudaRoots(
|
|
||||||
const HloModuleConfig& config) {
|
|
||||||
return tensorflow::CandidateCudaRoots(
|
|
||||||
config.debug_options().xla_gpu_cuda_data_dir());
|
|
||||||
}
|
|
||||||
|
|
||||||
void PrintCantFindCudaMessage(absl::string_view msg,
|
|
||||||
const HloModuleConfig& hlo_module_config) {
|
|
||||||
LOG(WARNING) << msg;
|
|
||||||
LOG(WARNING) << "Searched for CUDA in the following directories:";
|
|
||||||
|
|
||||||
for (const auto& dir : CandidateCudaRoots(hlo_module_config)) {
|
|
||||||
LOG(WARNING) << " " << dir;
|
|
||||||
}
|
|
||||||
LOG(WARNING)
|
|
||||||
<< "You can choose the search directory by setting xla_gpu_cuda_data_dir "
|
|
||||||
"in HloModule's DebugOptions. For most apps, setting the environment "
|
|
||||||
"variable XLA_FLAGS=--xla_gpu_cuda_data_dir=/path/to/cuda will work.";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the directory containing nvvm libdevice files.
|
|
||||||
std::string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
|
|
||||||
for (const string& cuda_root : CandidateCudaRoots(hlo_module_config)) {
|
|
||||||
const std::string libdevice_dir =
|
|
||||||
tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
|
|
||||||
VLOG(2) << "Looking for libdevice at " << libdevice_dir;
|
|
||||||
if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
|
|
||||||
VLOG(2) << "Found libdevice dir " << libdevice_dir;
|
|
||||||
return libdevice_dir;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PrintCantFindCudaMessage(
|
|
||||||
"Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may "
|
|
||||||
"result in compilation or runtime failures, if the program we try to run "
|
|
||||||
"uses routines from libdevice.",
|
|
||||||
hlo_module_config);
|
|
||||||
|
|
||||||
// GetCudaRootCandidates always includes ".", but if everything fails, we
|
|
||||||
// return it anyway. Better than returning the empty string.
|
|
||||||
return ".";
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<HloModule>> MlirCompilerImpl::RunHloPasses(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
// Until we find a reason to do something different, run the same passes
|
|
||||||
// that the normal GPU backend runs.
|
|
||||||
gpu::NVPTXCompiler xla_compiler;
|
|
||||||
TF_RETURN_IF_ERROR(xla_compiler.OptimizeHloModule(module.get(), stream_exec,
|
|
||||||
options.device_allocator));
|
|
||||||
TF_RETURN_IF_ERROR(xla_compiler.PrepareHloModuleForIrEmitting(module.get()));
|
|
||||||
|
|
||||||
return std::move(module);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/137624192): Move this to custom call handling and share.
|
|
||||||
absl::optional<bool> CanShareBufferHint(const HloInstruction* user,
|
|
||||||
const HloInstruction* operand,
|
|
||||||
const ShapeIndex& user_index) {
|
|
||||||
if (user->opcode() == HloOpcode::kCustomCall) {
|
|
||||||
// Share the bias buffer with the parent instruction.
|
|
||||||
if (user->custom_call_target() == xla::gpu::kGemmCallTarget) {
|
|
||||||
if (user->operand_count() == 3 && user->operand(2) == operand) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// The operand of cholesky can be shared with the first output.
|
|
||||||
if (user->custom_call_target() == xla::gpu::kCusolverCholeskyCallTarget) {
|
|
||||||
return user_index.size() == 1 && user_index[0] == 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return absl::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/137624192): Share this with nvptx backend.
|
|
||||||
GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) {
|
|
||||||
int cc_major, cc_minor;
|
|
||||||
const auto& device_description = stream_exec->GetDeviceDescription();
|
|
||||||
if (!device_description.cuda_compute_capability(&cc_major, &cc_minor)) {
|
|
||||||
LOG(WARNING)
|
|
||||||
<< "Couldn't get compute capability for device; assuming sm_20.";
|
|
||||||
cc_major = 2;
|
|
||||||
cc_minor = 0;
|
|
||||||
}
|
|
||||||
return std::make_pair(cc_major, cc_minor);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the constant launch bound along the "x" dimension in "dim" if all the
|
|
||||||
// other dimensions are 1. Return nullopt otherwise or when any of the bounds
|
|
||||||
// is not constant.
|
|
||||||
static absl::optional<int64> getLaunchBound(const mlir::gpu::KernelDim3& dim) {
|
|
||||||
auto get_constant = [](mlir::Operation* op,
|
|
||||||
mlir::StringRef name) -> absl::optional<int64> {
|
|
||||||
if (auto constant = llvm::dyn_cast_or_null<mlir::ConstantOp>(op)) {
|
|
||||||
return constant.value().cast<mlir::IntegerAttr>().getInt();
|
|
||||||
}
|
|
||||||
op->emitError() << "bound " << name << " is not constant";
|
|
||||||
return absl::nullopt;
|
|
||||||
};
|
|
||||||
auto y_op = dim.y.getDefiningOp();
|
|
||||||
auto dim_y = get_constant(y_op, "y");
|
|
||||||
if (!dim_y.has_value() || dim_y.value() != 1) {
|
|
||||||
y_op->emitError() << "bound 'y' is not constant 1";
|
|
||||||
return absl::nullopt;
|
|
||||||
}
|
|
||||||
auto z_op = dim.z.getDefiningOp();
|
|
||||||
auto dim_z = get_constant(z_op, "z");
|
|
||||||
if (!dim_z.has_value() || dim_z.value() != 1) {
|
|
||||||
z_op->emitError() << "bound 'z' is not constant 1";
|
|
||||||
return absl::nullopt;
|
|
||||||
}
|
|
||||||
return get_constant(dim.x.getDefiningOp(), "x");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Indexes of a range of arguments in a GPU function. This is used to keep the
|
|
||||||
// range of arguments that correspond to a lowered kernel argument of
|
|
||||||
// (previously) memref type.
|
|
||||||
struct LaunchFuncArgument {
|
|
||||||
int kernel_argument_begin;
|
|
||||||
int kernel_argument_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
using OperandToValueMap =
|
|
||||||
absl::flat_hash_map<const HloInstruction*, std::vector<LaunchFuncArgument>>;
|
|
||||||
|
|
||||||
static StatusOr<std::vector<const HloInstruction*>> ComputeOperandToValueMap(
|
|
||||||
OperandToValueMap* operand_to_value_map, const HloInstruction* instr,
|
|
||||||
LaunchFuncOp launchOp, LLVMFuncOp kernel) {
|
|
||||||
auto operands = instr->operands();
|
|
||||||
std::vector<const HloInstruction*> ordered_operands;
|
|
||||||
bool has_failed = false;
|
|
||||||
// A memref will expand into multiple kernel operands, accumulate their number
|
|
||||||
// in order to find them later.
|
|
||||||
int cur_operand_position = 0;
|
|
||||||
|
|
||||||
for (int kernel_index = 0; kernel_index < launchOp.getNumKernelOperands();
|
|
||||||
++kernel_index) {
|
|
||||||
auto launchop_operand =
|
|
||||||
launchOp.getKernelOperand(kernel_index).dyn_cast<BlockArgument>();
|
|
||||||
if (!launchop_operand) {
|
|
||||||
launchOp.emitError("argument to kernel is not a function input");
|
|
||||||
has_failed = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
auto memref_type =
|
|
||||||
launchop_operand.getType().dyn_cast<::mlir::MemRefType>();
|
|
||||||
if (!memref_type) {
|
|
||||||
launchOp.emitError("only memref-typed arguments are supported");
|
|
||||||
has_failed = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// host_index is the argument position to the surrounding function that
|
|
||||||
// contains the launch. This index corresponds to HLO operand indices
|
|
||||||
// by construction.
|
|
||||||
auto host_index = launchop_operand.getArgNumber();
|
|
||||||
// The trailing argument to the outer function are the results.
|
|
||||||
auto operand =
|
|
||||||
(host_index < operands.size()) ? operands[host_index] : instr;
|
|
||||||
if (!operand_to_value_map->count(operand)) {
|
|
||||||
ordered_operands.push_back(operand);
|
|
||||||
}
|
|
||||||
// Associate the HLO operand with the argument values of the kernel
|
|
||||||
// function.
|
|
||||||
int num_unpacked =
|
|
||||||
mlir::MemRefDescriptor::getNumUnpackedValues(memref_type);
|
|
||||||
(*operand_to_value_map)[operand].push_back(
|
|
||||||
{cur_operand_position, num_unpacked});
|
|
||||||
cur_operand_position += num_unpacked;
|
|
||||||
}
|
|
||||||
if (has_failed) {
|
|
||||||
return InternalError("Mapping operands to kernel arguments has failed.");
|
|
||||||
}
|
|
||||||
return ordered_operands;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status InsertBufferLoadPreduleIntoKernel(
|
|
||||||
LLVMFuncOp kernel, const OperandToValueMap& operand_to_value_map,
|
|
||||||
const std::vector<const HloInstruction*>& ordered_operands,
|
|
||||||
BufferAssignment* assignment,
|
|
||||||
const std::vector<const BufferAllocation*>& buffers) {
|
|
||||||
mlir::OpBuilder builder(kernel.getBody());
|
|
||||||
auto* context = kernel.getContext();
|
|
||||||
auto offset_type = LLVMType::getInt64Ty(context);
|
|
||||||
auto ptr_type = LLVMType::getInt8PtrTy(context);
|
|
||||||
auto void_type = LLVMType::getVoidTy(context);
|
|
||||||
auto loc = kernel.getLoc();
|
|
||||||
|
|
||||||
auto num_original_args = kernel.getNumArguments();
|
|
||||||
std::vector<LLVMType> new_arg_types(buffers.size(), ptr_type);
|
|
||||||
kernel->setAttr(kernel.getTypeAttrName(),
|
|
||||||
mlir::TypeAttr::get(LLVMType::getFunctionTy(
|
|
||||||
void_type, new_arg_types, /*isVarArg=*/false)));
|
|
||||||
std::vector<Value> original_args(kernel.args_begin(), kernel.args_end());
|
|
||||||
|
|
||||||
std::vector<mlir::Type> as_mlir_types(new_arg_types.begin(),
|
|
||||||
new_arg_types.end());
|
|
||||||
auto new_args = kernel.front().addArguments(as_mlir_types);
|
|
||||||
std::vector<Value> buffer_args(new_args.begin(), new_args.end());
|
|
||||||
|
|
||||||
for (auto operand : ordered_operands) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto slice,
|
|
||||||
assignment->GetUniqueTopLevelSlice(operand));
|
|
||||||
auto buffer = std::find(buffers.begin(), buffers.end(), slice.allocation());
|
|
||||||
auto index = buffer - buffers.begin();
|
|
||||||
auto offset = builder.create<mlir::LLVM::ConstantOp>(
|
|
||||||
loc, offset_type, builder.getI64IntegerAttr(slice.offset()));
|
|
||||||
auto ptr = buffer_args[index];
|
|
||||||
|
|
||||||
// Replace uses of function arguments pertaining to memref descriptors with
|
|
||||||
// values derived from HLO buffers. The instructions inserting these values
|
|
||||||
// into memref descriptors were already introduced during the lowering phase
|
|
||||||
// as per MLIR calling convention.
|
|
||||||
for (auto arg : operand_to_value_map.at(operand)) {
|
|
||||||
mlir::MemRefDescriptorView original(
|
|
||||||
mlir::ValueRange(original_args)
|
|
||||||
.slice(arg.kernel_argument_begin, arg.kernel_argument_size));
|
|
||||||
|
|
||||||
// Allocated and aligned pointers are the same.
|
|
||||||
auto casted = builder.create<mlir::LLVM::BitcastOp>(
|
|
||||||
loc, original.alignedPtr().getType().cast<LLVMType>(),
|
|
||||||
mlir::ValueRange(ptr));
|
|
||||||
original.alignedPtr().replaceAllUsesWith(casted);
|
|
||||||
original.allocatedPtr().replaceAllUsesWith(casted);
|
|
||||||
|
|
||||||
// Use the offset of the HLO buffer instead of the one expected in the
|
|
||||||
// function call.
|
|
||||||
original.offset().replaceAllUsesWith(offset);
|
|
||||||
|
|
||||||
// Fill the shape.
|
|
||||||
auto shape = operand->shape();
|
|
||||||
// Unless the operand is a scalar pointer, also fill shape and strides.
|
|
||||||
if (shape.dimensions().empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/137624192) Pass in the descriptor to allow for dynamic shapes.
|
|
||||||
assert(shape.IsArray() && shape.is_static());
|
|
||||||
for (auto extent : llvm::enumerate(shape.dimensions())) {
|
|
||||||
auto shape = builder.create<mlir::LLVM::ConstantOp>(
|
|
||||||
loc, original.size(extent.index()).getType(),
|
|
||||||
builder.getI64IntegerAttr(extent.value()));
|
|
||||||
original.size(extent.index()).replaceAllUsesWith(shape);
|
|
||||||
}
|
|
||||||
// Finally, fill the strides.
|
|
||||||
// TODO(b/137624192): Take assigned layout into account.
|
|
||||||
uint64_t accumulator = 0;
|
|
||||||
for (int64_t idx = shape.rank() - 1; idx >= 0; --idx) {
|
|
||||||
if (accumulator == 0) {
|
|
||||||
accumulator = 1;
|
|
||||||
} else {
|
|
||||||
accumulator *= shape.dimensions(idx + 1);
|
|
||||||
}
|
|
||||||
auto stride = builder.create<mlir::LLVM::ConstantOp>(
|
|
||||||
loc, original.stride(idx).getType(),
|
|
||||||
builder.getI64IntegerAttr(accumulator));
|
|
||||||
original.stride(idx).replaceAllUsesWith(stride);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now we can remove the original arguments, as they should have no more
|
|
||||||
// users.
|
|
||||||
for (int i = 0; i < num_original_args; ++i) {
|
|
||||||
kernel.front().eraseArgument(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<gpu::KernelThunk>> TransformKernelToXlaThunk(
|
|
||||||
FuncOp func, const HloInstruction* const instr, ModuleOp kernel_module,
|
|
||||||
BufferAssignment* assignment) {
|
|
||||||
// Find the single LaunchFuncOp and compute a mapping from operands of
|
|
||||||
// the hlo instruction to the corresponding values of the kernel
|
|
||||||
// function in the target module;
|
|
||||||
LaunchFuncOp launchOp;
|
|
||||||
auto walkResult = func.walk([&launchOp](LaunchFuncOp op) {
|
|
||||||
if (launchOp) {
|
|
||||||
op.emitError("multiple kernels for single top-level HLO");
|
|
||||||
return mlir::WalkResult::interrupt();
|
|
||||||
}
|
|
||||||
launchOp = op;
|
|
||||||
return mlir::WalkResult::advance();
|
|
||||||
});
|
|
||||||
if (walkResult.wasInterrupted()) {
|
|
||||||
return InternalError("Multiple kernels for single top-level HLO");
|
|
||||||
}
|
|
||||||
if (!launchOp) {
|
|
||||||
// If there was no launchOp, then no kernel was generated, so the lowering
|
|
||||||
// from the LHLO ops to the GPU dialect is not implemented yet.
|
|
||||||
return Unimplemented("No kernel was generated.");
|
|
||||||
}
|
|
||||||
|
|
||||||
auto kernel =
|
|
||||||
kernel_module.lookupSymbol<LLVMFuncOp>(launchOp.getKernelName());
|
|
||||||
|
|
||||||
// Store the assignment of operands to block arguments. Note that an operand
|
|
||||||
// might be used in multiple argument positions, hence the vector.
|
|
||||||
OperandToValueMap operand_to_value_map;
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto ordered_operands,
|
|
||||||
ComputeOperandToValueMap(&operand_to_value_map, instr, launchOp, kernel));
|
|
||||||
|
|
||||||
// Get the required buffers to support the inputs. Use a set and vector here
|
|
||||||
// to keep the order fixed. This is mostly useful for testing.
|
|
||||||
std::unordered_set<const BufferAllocation*> buffers_needed;
|
|
||||||
std::vector<const BufferAllocation*> buffers;
|
|
||||||
// TODO(b/137624192) Add support for tuples.
|
|
||||||
for (auto operand : ordered_operands) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto buffer,
|
|
||||||
assignment->GetUniqueTopLevelSlice(operand));
|
|
||||||
if (buffers_needed.insert(buffer.allocation()).second) {
|
|
||||||
buffers.push_back(buffer.allocation());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/137624192) Add support for temp buffer.
|
|
||||||
// TODO(b/137624192) Add support for constant buffers.
|
|
||||||
|
|
||||||
// Change the signature to match what the XLA runtime expects from the
|
|
||||||
// kernel.
|
|
||||||
TF_RETURN_IF_ERROR(InsertBufferLoadPreduleIntoKernel(
|
|
||||||
kernel, operand_to_value_map, ordered_operands, assignment, buffers));
|
|
||||||
|
|
||||||
// Finally, create the thunk and set the launch dimensions.
|
|
||||||
gpu::Thunk::ThunkInfo info;
|
|
||||||
auto thunk = absl::make_unique<gpu::KernelThunk>(info, buffers,
|
|
||||||
kernel.getName().str());
|
|
||||||
|
|
||||||
// Set launch bounds.
|
|
||||||
mlir::gpu::KernelDim3 block = launchOp.getBlockSizeOperandValues();
|
|
||||||
mlir::gpu::KernelDim3 grid = launchOp.getGridSizeOperandValues();
|
|
||||||
absl::optional<int64> num_threads = getLaunchBound(block);
|
|
||||||
absl::optional<int64> num_blocks = getLaunchBound(grid);
|
|
||||||
if (!num_threads || !num_blocks) {
|
|
||||||
return Unimplemented("Unsupported launch bounds");
|
|
||||||
}
|
|
||||||
thunk->SetLaunchDimensions(gpu::LaunchDimensions(*num_blocks, *num_threads));
|
|
||||||
return std::move(thunk);
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
|
|
||||||
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
// Determine the HLO schedule, which is an ordering of HLO instructions. This
|
|
||||||
// is used by buffer assignment to enable buffer reuse, and the same ordering
|
|
||||||
// must also be used to determine the thunk launch schedule.
|
|
||||||
std::unique_ptr<StreamAssignment> stream_assignment =
|
|
||||||
xla::gpu::AssignStreams(*module);
|
|
||||||
TF_ASSIGN_OR_RETURN(std::unique_ptr<GpuHloSchedule> hlo_schedule,
|
|
||||||
GpuHloSchedule::Build(*module, *stream_assignment,
|
|
||||||
data_layout_.getPointerSize()));
|
|
||||||
|
|
||||||
// Run buffer analysis on the HLO graph. This analysis figures out which
|
|
||||||
// temporary buffers are required to run the computation.
|
|
||||||
TF_ASSIGN_OR_RETURN(std::unique_ptr<BufferAssignment> buffer_assignment,
|
|
||||||
BufferAssigner::Run(
|
|
||||||
module.get(), hlo_schedule->ConsumeHloOrdering(),
|
|
||||||
BufferSizeBytesFunction(),
|
|
||||||
/*color_alignment=*/
|
|
||||||
[](LogicalBuffer::Color) {
|
|
||||||
return xla::gpu::kXlaAllocatedBufferAlignBytes;
|
|
||||||
},
|
|
||||||
/*allocate_buffers_for_constants=*/true,
|
|
||||||
/*colorer=*/BufferAssigner::DefaultColorer(),
|
|
||||||
/*must_not_live_out=*/{}, &CanShareBufferHint));
|
|
||||||
DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations");
|
|
||||||
|
|
||||||
EmissionContext emission_context(std::move(module));
|
|
||||||
if (error_handler_) {
|
|
||||||
emission_context.setErrorHandler(error_handler_);
|
|
||||||
}
|
|
||||||
|
|
||||||
OwningModuleRef mlir_module =
|
|
||||||
ModuleOp::create(UnknownLoc::get(emission_context.getContext()));
|
|
||||||
LhloDialectEmitter lhlo_emitter(&emission_context, *buffer_assignment,
|
|
||||||
stream_exec->platform(), *mlir_module);
|
|
||||||
|
|
||||||
absl::flat_hash_map<const HloInstruction*, std::unique_ptr<gpu::Thunk>>
|
|
||||||
hlo_to_thunk;
|
|
||||||
for (HloInstruction* instruction : hlo_schedule->ThunkLaunchOrder()) {
|
|
||||||
TF_RETURN_IF_ERROR(instruction->Visit(&lhlo_emitter));
|
|
||||||
gpu::ThunkSequence thunks = lhlo_emitter.ConsumeThunkSequence();
|
|
||||||
TF_RET_CHECK(thunks.size() <= 1) << instruction->ToString();
|
|
||||||
if (!thunks.empty()) {
|
|
||||||
auto thunk = std::move(thunks.front());
|
|
||||||
hlo_to_thunk[instruction] = std::move(thunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(
|
|
||||||
module_hook_.invoke(IRHook::LoweringStage::LHLO, *mlir_module));
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(LowerLHLOToGPU(*mlir_module));
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(
|
|
||||||
module_hook_.invoke(IRHook::LoweringStage::GPU, *mlir_module));
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(LowerKernelBodiesToNVVM(*mlir_module));
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(
|
|
||||||
module_hook_.invoke(IRHook::LoweringStage::LLVM, *mlir_module));
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(OwningModuleRef kernel_module,
|
|
||||||
ExtractKernelModule(*mlir_module));
|
|
||||||
|
|
||||||
for (auto entry : lhlo_emitter.InstructionToFunctionMap()) {
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto thunk,
|
|
||||||
TransformKernelToXlaThunk(entry.second, entry.first, *kernel_module,
|
|
||||||
buffer_assignment.get()));
|
|
||||||
hlo_to_thunk[entry.first] = std::move(thunk);
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::flat_hash_map<const gpu::Thunk*, const HloInstruction*> thunk_to_hlo;
|
|
||||||
gpu::ThunkSequence thunk_sequence;
|
|
||||||
{
|
|
||||||
for (HloInstruction* hlo : hlo_schedule->ThunkLaunchOrder()) {
|
|
||||||
auto it = hlo_to_thunk.find(hlo);
|
|
||||||
if (it != hlo_to_thunk.end()) {
|
|
||||||
const HloInstruction* hlo = it->first;
|
|
||||||
auto& thunk = it->second;
|
|
||||||
thunk_to_hlo[thunk.get()] = hlo;
|
|
||||||
thunk_sequence.push_back(std::move(thunk));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TF_RETURN_IF_ERROR(
|
|
||||||
module_hook_.invoke(IRHook::LoweringStage::KERNEL, *kernel_module));
|
|
||||||
|
|
||||||
// Translate to LLVM IR in a fresh context. The module is further translated
|
|
||||||
// to textual PTX and a CUBIN blob so there is no need for the context to live
|
|
||||||
// longer than this function.
|
|
||||||
llvm::LLVMContext llvmContext;
|
|
||||||
auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext);
|
|
||||||
|
|
||||||
if (!llvmModule) {
|
|
||||||
return InternalError("Translation to LLVM failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
llvmModule->setModuleIdentifier(emission_context.getHloModule()->name());
|
|
||||||
// TODO(herhut): Why is this needed and does not come from the template?
|
|
||||||
llvmModule->setDataLayout(gpu::nvptx::kDataLayout);
|
|
||||||
|
|
||||||
const auto& config = emission_context.getHloModule()->config();
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
auto ptx, xla::gpu::nvptx::CompileToPtx(llvmModule.get(),
|
|
||||||
GetGpuVersion(stream_exec),
|
|
||||||
config, GetLibdeviceDir(config)));
|
|
||||||
// Allow to fallback to the driver compilation when ptxas isn't able to
|
|
||||||
// compile.
|
|
||||||
StatusOr<std::vector<uint8>> maybe_cubin =
|
|
||||||
se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
|
|
||||||
gpu::PtxOptsFromConfig(config));
|
|
||||||
std::vector<uint8> cubin;
|
|
||||||
if (maybe_cubin.ok()) {
|
|
||||||
cubin = std::move(maybe_cubin).ValueOrDie();
|
|
||||||
} else if (maybe_cubin.status().code() ==
|
|
||||||
tensorflow::error::Code::UNIMPLEMENTED) {
|
|
||||||
xla::gpu::WarnIfBadDriverJITVersion();
|
|
||||||
} else {
|
|
||||||
return maybe_cubin.status();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
|
|
||||||
std::make_unique<gpu::ThunkSequence>(std::move(thunk_sequence)),
|
|
||||||
std::move(stream_assignment), std::move(thunk_to_hlo));
|
|
||||||
|
|
||||||
if (DumpingEnabledForHloModule(*emission_context.getHloModule())) {
|
|
||||||
DumpToFileInDirOrStdout(*emission_context.getHloModule(), "",
|
|
||||||
"thunk_schedule", thunk_schedule->ToString());
|
|
||||||
}
|
|
||||||
|
|
||||||
module = emission_context.releaseHloModule();
|
|
||||||
|
|
||||||
TF_ASSIGN_OR_RETURN(auto output_info,
|
|
||||||
xla::gpu::GetOutputInfo(*module, *buffer_assignment));
|
|
||||||
std::vector<BufferAllocation> allocations =
|
|
||||||
buffer_assignment->ReleaseAllocations();
|
|
||||||
|
|
||||||
// TODO(b/137624192): Add profiling support.
|
|
||||||
return {absl::make_unique<GpuExecutable>(GpuExecutable::Params{
|
|
||||||
std::move(ptx), std::move(cubin), GetGpuVersion(stream_exec),
|
|
||||||
std::move(thunk_schedule), std::vector<GpuExecutable::ConstantInfo>(),
|
|
||||||
std::move(output_info), std::move(module), std::move(allocations)})};
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<Executable>>> MlirCompilerImpl::Compile(
|
|
||||||
std::unique_ptr<HloModuleGroup> module_group,
|
|
||||||
std::vector<std::vector<se::StreamExecutor*>> stream_execs,
|
|
||||||
const CompileOptions& options) {
|
|
||||||
return Unimplemented("Not yet implemented in MLIR compiler");
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
|
|
||||||
MlirCompilerImpl::CompileAheadOfTime(
|
|
||||||
std::unique_ptr<HloModuleGroup> /*module_group*/,
|
|
||||||
const AotCompilationOptions& /*options*/) {
|
|
||||||
return Unimplemented("Not yet implemented in MLIR compiler");
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
static bool InitModule() {
|
|
||||||
xla::Compiler::RegisterCompilerFactory(
|
|
||||||
stream_executor::cuda::kCudaPlatformId, []() {
|
|
||||||
return absl::make_unique<xla::FailoverCompiler>(
|
|
||||||
absl::make_unique<xla::mlir_gpu::MlirCompilerImpl>(),
|
|
||||||
absl::make_unique<xla::gpu::NVPTXCompiler>());
|
|
||||||
});
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
static bool module_initialized = InitModule();
|
|
@ -1,48 +0,0 @@
|
|||||||
load("//tensorflow:tensorflow.bzl", "filegroup")
|
|
||||||
load(
|
|
||||||
"//tensorflow/core/platform:build_config_root.bzl",
|
|
||||||
"tf_cuda_tests_tags",
|
|
||||||
"tf_exec_properties",
|
|
||||||
)
|
|
||||||
load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests")
|
|
||||||
|
|
||||||
package(
|
|
||||||
default_visibility = [":friends"],
|
|
||||||
licenses = ["notice"], # Apache 2.0
|
|
||||||
)
|
|
||||||
|
|
||||||
package_group(
|
|
||||||
name = "friends",
|
|
||||||
includes = [
|
|
||||||
"//tensorflow/compiler/xla:friends",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
glob_lit_tests(
|
|
||||||
data = [
|
|
||||||
":test_utilities",
|
|
||||||
"@llvm-project//mlir:run_lit.sh",
|
|
||||||
],
|
|
||||||
default_tags = tf_cuda_tests_tags() + [
|
|
||||||
"no_pip",
|
|
||||||
"config-cuda-only",
|
|
||||||
"no_rocm",
|
|
||||||
],
|
|
||||||
driver = "//tensorflow/compiler/mlir:run_lit.sh",
|
|
||||||
exclude = [
|
|
||||||
# TODO(b/137624192): Reenable once we can fuse reductions.
|
|
||||||
"fused_reduce.hlo",
|
|
||||||
],
|
|
||||||
exec_properties = tf_exec_properties({"tags": tf_cuda_tests_tags()}),
|
|
||||||
test_file_exts = ["hlo"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Bundle together all of the test utilities that are used by tests.
|
|
||||||
filegroup(
|
|
||||||
name = "test_utilities",
|
|
||||||
testonly = True,
|
|
||||||
data = [
|
|
||||||
"//tensorflow/compiler/xla/service/mlir_gpu:xla-gpu-opt",
|
|
||||||
"@llvm-project//llvm:FileCheck",
|
|
||||||
],
|
|
||||||
)
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Abs
|
|
||||||
ENTRY %Abs (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %abs = f32[2,2]{1,0} abs(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @abs(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.abs"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,12 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Add
|
|
||||||
|
|
||||||
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @add(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.add"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : ([[TYPE]], [[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,63 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt -lowering-stage=KERNEL %s | FileCheck %s
|
|
||||||
HloModule Add
|
|
||||||
|
|
||||||
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm\..*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check that relevant sizes and strides are emitted.
|
|
||||||
//
|
|
||||||
// CHECK: %[[CAST0:.*]] = llvm.bitcast %[[ARG0:.*]] : !llvm.ptr<i8> to !llvm.ptr<float>
|
|
||||||
// CHECK: %[[SIZE00:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[SIZE01:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE01:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE00:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
|
|
||||||
// CHECK: %[[CAST1:.*]] = llvm.bitcast %[[ARG1:.*]] : !llvm.ptr<i8> to !llvm.ptr<float>
|
|
||||||
// CHECK: %[[SIZE10:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[SIZE11:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE11:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE10:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
|
|
||||||
// CHECK: %[[CAST2:.*]] = llvm.bitcast %[[ARG2:.*]] : !llvm.ptr<i8> to !llvm.ptr<float>
|
|
||||||
// CHECK: %[[SIZE20:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[SIZE21:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE21:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
|
|
||||||
// CHECK: %[[STRIDE20:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64
|
|
||||||
|
|
||||||
//
|
|
||||||
// Check that the emitted sizes and strides, as well the pointers to HLO buffers,
|
|
||||||
// are inserted into the memref descriptors.
|
|
||||||
//
|
|
||||||
// CHECK: %[[DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC01:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC0]][0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC02:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC01]][1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC03:.*]] = llvm.insertvalue %{{.*}}, %[[DESC02]][2] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC04:.*]] = llvm.insertvalue %[[SIZE00]], %[[DESC03]][3, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC05:.*]] = llvm.insertvalue %[[STRIDE00]], %[[DESC04]][4, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC06:.*]] = llvm.insertvalue %[[SIZE01]], %[[DESC05]][3, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE01]], %[[DESC06]][4, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
|
|
||||||
// CHECK: %[[DESC1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC11:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC1]][0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC12:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC11]][1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC13:.*]] = llvm.insertvalue %{{.*}}, %[[DESC12]][2] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC14:.*]] = llvm.insertvalue %[[SIZE10]], %[[DESC13]][3, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC15:.*]] = llvm.insertvalue %[[STRIDE10]], %[[DESC14]][4, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC16:.*]] = llvm.insertvalue %[[SIZE11]], %[[DESC15]][3, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE11]], %[[DESC16]][4, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
|
|
||||||
// CHECK: %[[DESC2:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC21:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC2]][0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC22:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC21]][1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC23:.*]] = llvm.insertvalue %{{.*}}, %[[DESC22]][2] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC24:.*]] = llvm.insertvalue %[[SIZE20]], %[[DESC23]][3, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC25:.*]] = llvm.insertvalue %[[STRIDE20]], %[[DESC24]][4, 0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %[[DESC26:.*]] = llvm.insertvalue %[[SIZE21]], %[[DESC25]][3, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE21]], %[[DESC26]][4, 1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt -lowering-stage=GPU %s | FileCheck %s
|
|
||||||
HloModule Add
|
|
||||||
|
|
||||||
ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @add(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: gpu.launch_func
|
|
||||||
// CHECK-SAME: blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args
|
|
||||||
// CHECK-SAME: (%[[ARG0]] : [[TYPE]], %[[ARG1]] : [[TYPE]], %[[ARG2]] : [[TYPE]])
|
|
||||||
// CHECK: }
|
|
||||||
// CHECK: func @add_kernel(%[[ARG0]]: [[TYPE]], %[[ARG1]]: [[TYPE]], %[[ARG2]]: [[TYPE]]
|
|
||||||
// CHECK-DAG: subview %[[ARG0]]{{\[}}[[INDEX:.*]]]
|
|
||||||
// CHECK-DAG: subview %[[ARG1]]{{\[}}[[INDEX]]]
|
|
||||||
// CHECK-DAG: subview %[[ARG2]]{{\[}}[[INDEX]]]
|
|
||||||
// CHECK: %[[VAL1:.*]] = load %{{.*\[}}[[INDEX:.*]]]
|
|
||||||
// CHECK: %[[VAL2:.*]] = load %{{.*\[}}[[INDEX]]]
|
|
||||||
// CHECK: %[[RES:.*]] = addf %[[VAL1]], %[[VAL2]]
|
|
||||||
// CHECK: store %[[RES]], %{{.*\[}}[[INDEX]]]
|
|
@ -1,22 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule AddMultiply
|
|
||||||
|
|
||||||
ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
%z = f32[2,2]{1,0} parameter(2)
|
|
||||||
%add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
ROOT %mul = f32[2,2]{1,0} multiply(f32[2,2]{1,0} %add, f32[2,2]{1,0} %z)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]], %[[RESULT:.*]]: [[TYPE]])
|
|
||||||
// CHECK: "lmhlo.fusion"() ( {
|
|
||||||
// CHECK: %[[REF0:.*]] = tensor_load %[[ARG0]] : [[TYPE]]
|
|
||||||
// CHECK: %[[REF1:.*]] = tensor_load %[[ARG1]] : [[TYPE]]
|
|
||||||
// CHECK: %[[REF2:.*]] = tensor_load %[[ARG2]] : [[TYPE]]
|
|
||||||
// CHECK: %[[ADD:.*]] = mhlo.add %[[REF1]], %[[REF2]]
|
|
||||||
// CHECK: %[[MUL:.*]] = mhlo.multiply %[[ADD]], %[[REF0]]
|
|
||||||
// CHECK: tensor_store %[[MUL]], %[[RESULT]]
|
|
||||||
// CHECK: "lmhlo.terminator"()
|
|
||||||
// CHECK-NEXT: }
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt -lowering-stage=GPU %s | FileCheck %s
|
|
||||||
HloModule AddMultiply
|
|
||||||
|
|
||||||
ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
%z = f32[2,2]{1,0} parameter(2)
|
|
||||||
%add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
ROOT %mul = f32[2,2]{1,0} multiply(f32[2,2]{1,0} %add, f32[2,2]{1,0} %z)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @fusion_kernel(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]], %[[RESULT:.*]]: [[TYPE]])
|
|
||||||
// CHECK-DAG: subview %[[ARG0]]{{\[}}[[INDEX:.*]]]
|
|
||||||
// CHECK-DAG: subview %[[ARG1]]{{\[}}[[INDEX]]]
|
|
||||||
// CHECK-DAG: subview %[[ARG2]]{{\[}}[[INDEX]]]
|
|
||||||
// CHECK-DAG: subview %[[RESULT]]{{\[}}[[INDEX]]]
|
|
||||||
// CHECK: %[[V0:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
|
|
||||||
// CHECK: %[[V1:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
|
|
||||||
// CHECK: %[[ADD:.*]] = addf %[[V0]], %[[V1]]
|
|
||||||
// CHECK: %[[V2:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
|
|
||||||
// CHECK: %[[MUL:.*]] = mulf %[[ADD]], %[[V2]]
|
|
||||||
// CHECK: store %[[MUL]], %{{.*\[}}[[CSTIDX:.*]]]
|
|
||||||
// CHECK: return
|
|
@ -1,24 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule AddReduce
|
|
||||||
|
|
||||||
%add (x: f32[], y: f32[]) -> f32[] {
|
|
||||||
%x = f32[] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %add = f32[] add(f32[] %x, f32[] %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
ENTRY %AddReduce (x: f32[100,10], c: f32[]) -> f32[100] {
|
|
||||||
%x = f32[100,10]{1,0} parameter(0)
|
|
||||||
%c = f32[] parameter(1)
|
|
||||||
ROOT %reduce = f32[100]{0} reduce(f32[100,10]{1,0} %x, f32[] %c), dimensions={1}, to_apply=%add
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @reduce(%[[ARG:.*]]: [[ARGT:.*]], %[[CST:.*]]: memref<f32>, %[[RES:.*]]: [[REST:.*]]) {
|
|
||||||
// CHECK: "lmhlo.reduce"(%[[ARG]], %[[CST]], %[[RES]]) ( {
|
|
||||||
// CHECK: ^bb0(%[[FARG0:.*]]: memref<f32>, %[[FARG1:.*]]: memref<f32>, %[[FRES:.*]]: memref<f32>):
|
|
||||||
// CHECK: %[[LHS:.*]] = tensor_load %[[FARG0]] : memref<f32>
|
|
||||||
// CHECK: %[[RHS:.*]] = tensor_load %[[FARG1]] : memref<f32>
|
|
||||||
// CHECK: %[[RES:.*]] = mhlo.add %[[LHS]], %[[RHS]] : tensor<f32>
|
|
||||||
// CHECK: tensor_store %[[RES]], %[[FRES]] : memref<f32>
|
|
||||||
// CHECK: "lmhlo.terminator"() : () -> ()
|
|
||||||
// CHECK-NEXT: }) {dimensions = dense<1> : tensor<1xi64>} : ([[ARGT]], memref<f32>, [[REST]]) -> ()
|
|
@ -1,14 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Broadcast
|
|
||||||
|
|
||||||
ENTRY %Broadcast (x: f32[10]) -> f32[10, 5] {
|
|
||||||
%x = f32[10]{0} parameter(0)
|
|
||||||
ROOT %broadcast = f32[10, 5]{1,0} broadcast(f32[10]{0} %x), dimensions={0}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @broadcast(%[[IN:.*]]: [[IN_T:.*]], %[[OUT:.*]]: [[OUT_T:.*]]) {
|
|
||||||
// CHECK: "lmhlo.broadcast_in_dim"(%[[IN]], %[[OUT]])
|
|
||||||
// CHECK: {broadcast_dimensions = dense<0> : tensor<1xi64>}
|
|
||||||
// CHECK: : ([[IN_T]], [[OUT_T]]) -> ()
|
|
||||||
// CHECK: }
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt -verify-errors %s | FileCheck %s
|
|
||||||
HloModule Add
|
|
||||||
|
|
||||||
ENTRY %Add (x: f32[2,2,2], y: f32[2,2,2]) -> f32[2,2,2] {
|
|
||||||
%x = f32[2,2,2]{2,1,0} parameter(0)
|
|
||||||
%y = f32[2,2,2]{2,1,0} parameter(1)
|
|
||||||
ROOT %add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y), metadata={op_name="original_tf_op"}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: ERRORS FOUND: [%add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y), metadata={op_name="original_tf_op"}: failed for testing: lmhlo.add; failed for testing: std.return]
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Ceil
|
|
||||||
ENTRY %Ceil (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %ceil = f32[2,2]{1,0} ceil(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @ceil(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.ceil"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,13 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Compare
|
|
||||||
|
|
||||||
ENTRY %Compare (x: f32[2,2], y: f32[2,2]) -> pred[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %compare = pred[2,2]{1,0} compare(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y), direction=EQ
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @compare(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[PRED:.*]]: [[PRED_TYPE:.*]]) {
|
|
||||||
// CHECK: "lmhlo.compare"(%[[ARG0]], %[[ARG1]], %[[PRED]])
|
|
||||||
// CHECK: {comparison_direction = "EQ"} : ([[TYPE]], [[TYPE]], [[PRED_TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,12 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Complex
|
|
||||||
|
|
||||||
ENTRY %Complex (real: f32[2,2]{0,1}, imag: f32[2,2]{0,1}) -> c64[2,2] {
|
|
||||||
%real = f32[2,2]{0,1} parameter(0)
|
|
||||||
%imag = f32[2,2]{0,1} parameter(1)
|
|
||||||
ROOT %compl = c64[2,2]{0,1} complex(%real, %imag)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @complex(%[[REAL:.*]]: [[BUF_F32:.*]], %[[IMAG:.*]]: [[BUF_F32]], %[[OUT:.*]]: [[BUF_C64:.*]]) {
|
|
||||||
// CHECK: "lmhlo.complex"(%[[REAL]], %[[IMAG]], %[[OUT]]) : ([[BUF_F32]], [[BUF_F32]], [[BUF_C64]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,13 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Concatenate
|
|
||||||
|
|
||||||
ENTRY %Concatenate (x: f32[2,3], y: f32[2,2]) -> f32[2,5] {
|
|
||||||
%x = f32[2,3]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %concatenate = f32[2,5]{1,0} concatenate(f32[2,3]{1,0} %x, f32[2,2]{1,0} %y), dimensions={1}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @concatenate(%[[ARG0:.*]]: [[TYPE0:.*]], %[[ARG1:.*]]: [[TYPE1:.*]], %[[RESULT:.*]]: [[RTYPE:.*]]) {
|
|
||||||
// CHECK: "lmhlo.concatenate"(%[[ARG0]], %[[ARG1]], %[[RESULT]])
|
|
||||||
// CHECK: {dimension = 1 : i64} : ([[TYPE0]], [[TYPE1]], [[RTYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,12 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Const
|
|
||||||
|
|
||||||
ENTRY %Const () -> s32[100] {
|
|
||||||
%const.0 = s32[] constant(10)
|
|
||||||
ROOT %broadcast.0 = s32[100]{0} broadcast(s32[] %const.0), dimensions={}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @constant(%[[ARG0:.*]]: memref<i32>)
|
|
||||||
// CHECK: "lmhlo.constant"(%[[ARG0]]) {value = dense<10> : tensor<i32>}
|
|
||||||
// CHECK: func @broadcast(%[[ARG1:.*]]: memref<i32>, %[[ARG2:.*]]: memref<100xi32>)
|
|
||||||
// CHECK: "lmhlo.broadcast_in_dim"(%[[ARG1]], %[[ARG2]]) {broadcast_dimensions = dense<> : tensor<0xi64>}
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Copy
|
|
||||||
|
|
||||||
ENTRY %Copy (x: f32[2,4]) -> f32[2,4] {
|
|
||||||
%x = f32[2,4] parameter(0)
|
|
||||||
ROOT %copy = f32[2,4] copy(f32[2,4] %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @copy(%[[OPERAND:.*]]: memref<2x4xf32>, %[[RESULT:.*]]: memref<2x4xf32>) {
|
|
||||||
// CHECK: "lmhlo.copy"(%[[OPERAND]], %[[RESULT]]) : (memref<2x4xf32>, memref<2x4xf32>) -> ()
|
|
@ -1,13 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule CopyTranspose
|
|
||||||
|
|
||||||
ENTRY %CopyTranspose (x: f32[2,4]) -> f32[2,4]{0,1} {
|
|
||||||
%x = f32[2,4] parameter(0)
|
|
||||||
ROOT %copy = f32[2,4]{0,1} copy(f32[2,4] %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 2)>
|
|
||||||
// CHECK: func @copy(%[[OPERAND:.*]]: memref<2x4xf32>,
|
|
||||||
// CHECK-SAME: %[[RESULT:.*]]: memref<2x4xf32, #[[MAP0]]>)
|
|
||||||
// CHECK: "lmhlo.copy"(%[[OPERAND]], %[[RESULT]])
|
|
||||||
// CHECK-SAME: : (memref<2x4xf32>, memref<2x4xf32, #[[MAP0]]>)
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Cos
|
|
||||||
ENTRY %Cos (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %cos = f32[2,2]{1,0} cosine(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @cosine(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.cosine"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,12 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Exp
|
|
||||||
|
|
||||||
ENTRY %Exp (x: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %exp = f32[2,2]{1,0} exponential(f32[2,2]{1,0} %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @exponential(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.exponential"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule FusedReduce
|
|
||||||
|
|
||||||
%add (x: f32[], y: f32[]) -> f32[] {
|
|
||||||
%x = f32[] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %add = f32[] add(f32[] %x, f32[] %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
%fused_computation (param: f32[100,10]) -> f32[10] {
|
|
||||||
%param = f32[100,10] parameter(0)
|
|
||||||
%constant = f32[] constant(0)
|
|
||||||
ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant),
|
|
||||||
dimensions={0}, to_apply=%add
|
|
||||||
}
|
|
||||||
|
|
||||||
ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] {
|
|
||||||
%x = f32[100,10] parameter(0)
|
|
||||||
ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput,
|
|
||||||
calls=%fused_computation
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[RTYPE:.*]])
|
|
||||||
// CHECK: "lmhlo.fusion"() ( {
|
|
||||||
// CHECK: %[[REF0:.*]] = tensor_load %arg0 : [[TYPE]]
|
|
||||||
// CHECK: %[[CT0:.*]] = mhlo.constant dense<0.000000e+00>
|
|
||||||
// CHECK: %[[RED:.*]] = "mhlo.reduce"(%0, %1) ( {
|
|
||||||
// CHECK: ^bb0(%[[BARG0:.*]]: [[ETYPE:.*]], %[[BARG1:.*]]: [[ETYPE]])
|
|
||||||
// CHECK: %[[ADD:.*]] = mhlo.add %[[BARG0]], %[[BARG1]] : [[ETYPE]]
|
|
||||||
// CHECK: "mhlo.return"(%[[ADD]])
|
|
||||||
// CHECK: })
|
|
||||||
// CHECK: tensor_store %[[RED]], %[[RESULT]] : [[RTYPE]]
|
|
||||||
// CHECK: "lmhlo.terminator"()
|
|
||||||
// CHECK-NEXT: })
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Gather
|
|
||||||
|
|
||||||
ENTRY %Gather (x: f32[100,10], y: s64[4,6]) -> f32[4,6,10] {
|
|
||||||
%x = f32[100,10] parameter(0)
|
|
||||||
%y = s64[4,6] parameter(1)
|
|
||||||
ROOT %gather = f32[4,6,10]{2,1,0} gather(f32[100,10]{1,0} %x, s64[4,6]{1,0} %y),
|
|
||||||
collapsed_slice_dims={0}, index_vector_dim=2, offset_dims={2},
|
|
||||||
slice_sizes={1,10}, start_index_map={0}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @gather(%[[ARG0:.*]]: [[TYPE0:.*]], %[[ARG1:.*]]: [[TYPE1:.*]],
|
|
||||||
// CHECK-SAME: %[[RESULT:.*]]: [[RTYPE:.*]]) {
|
|
||||||
// CHECK-NEXT: "lmhlo.gather"(%[[ARG0]], %[[ARG1]], %[[RESULT]]) {
|
|
||||||
// CHECK-SAME: dimension_numbers = {
|
|
||||||
// CHECK-SAME: collapsed_slice_dims = dense<0> : tensor<1xi64>,
|
|
||||||
// CHECK-SAME: index_vector_dim = 2 : i64,
|
|
||||||
// CHECK-SAME: offset_dims = dense<2> : tensor<1xi64>,
|
|
||||||
// CHECK-SAME: start_index_map = dense<0> : tensor<1xi64>
|
|
||||||
// CHECK-SAME: },
|
|
||||||
// CHECK-SAME: slice_sizes = dense<[1, 10]> : tensor<2xi64>
|
|
||||||
// CHECK-SAME: } : ([[TYPE0]], [[TYPE1]], [[RTYPE]]) -> ()
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Imag
|
|
||||||
|
|
||||||
ENTRY %Imag (x: c64[2,2]{0,1}) -> f32[2,2] {
|
|
||||||
%x = c64[2,2]{0,1} parameter(0)
|
|
||||||
ROOT %imag = f32[2,2]{0,1} imag(%x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @imag(%[[IN:.*]]: [[BUF_C64:.*]], %[[OUT:.*]]: [[BUF_F32:.*]]) {
|
|
||||||
// CHECK: "lmhlo.imag"(%[[IN]], %[[OUT]]) : ([[BUF_C64]], [[BUF_F32]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Iota
|
|
||||||
|
|
||||||
ENTRY %Iota() -> s64[10, 5] {
|
|
||||||
ROOT %iota = s64[10, 5]{1,0} iota(), iota_dimension=0
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @iota(%[[OUT:.*]]: [[OUT_T:.*]]) {
|
|
||||||
// CHECK: "lmhlo.iota"(%[[OUT]])
|
|
||||||
// CHECK: {iota_dimension = 0 : i64} : ([[OUT_T]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,16 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt -lowering-stage=GPU %s | FileCheck %s
|
|
||||||
HloModule AddSubtract
|
|
||||||
|
|
||||||
ENTRY %AddSubtract (x: s32[2,2], y: s32[2,2]) -> s32[2,2] {
|
|
||||||
%x = s32[2,2]{1,0} parameter(0)
|
|
||||||
%y = s32[2,2]{1,0} parameter(1)
|
|
||||||
|
|
||||||
%add = s32[2,2]{1,0} add(s32[2,2]{1,0} %x, s32[2,2]{1,0} %y)
|
|
||||||
%iota = s32[2, 2]{1,0} iota(), iota_dimension=0
|
|
||||||
|
|
||||||
ROOT %sub = s32[2,2]{1,0} subtract(s32[2,2]{1,0} %add, s32[2,2]{1,0} %iota)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK-NOT: store
|
|
||||||
// CHECK: [[RESULT:%.*]] = subi %{{.*}}, %{{.*}}
|
|
||||||
// CHECK: store [[RESULT]]
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Log
|
|
||||||
|
|
||||||
ENTRY %Log (x: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %log = f32[2,2]{1,0} log(f32[2,2]{1,0} %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @log(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.log"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Neg
|
|
||||||
ENTRY %Neg (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %neg = f32[2,2]{1,0} negate(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @negate(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.negate"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Real
|
|
||||||
|
|
||||||
ENTRY %Real (x: c64[2,2]{0,1}) -> f32[2,2] {
|
|
||||||
%x = c64[2,2]{0,1} parameter(0)
|
|
||||||
ROOT %real = f32[2,2]{0,1} real(%x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @real(%[[IN:.*]]: [[BUF_C64:.*]], %[[OUT:.*]]: [[BUF_F32:.*]]) {
|
|
||||||
// CHECK: "lmhlo.real"(%[[IN]], %[[OUT]]) : ([[BUF_C64]], [[BUF_F32]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,35 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule ReduceWindow
|
|
||||||
|
|
||||||
%max (x: f32[], y: f32[]) -> f32[] {
|
|
||||||
%x = f32[] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %max = f32[] maximum(f32[] %x, f32[] %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
ENTRY %ReduceWindow (x: f32[128,64,112,112], y: f32[]) -> f32[128,64,56,56] {
|
|
||||||
%x = f32[128,64,112,112] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %reduce-window = f32[128,64,56,56] reduce-window(
|
|
||||||
f32[128,64,112,112] %x,
|
|
||||||
f32[] %y
|
|
||||||
),
|
|
||||||
window={size=1x1x3x3 stride=1x1x2x2 pad=0_0x0_0x0_1x0_1}, to_apply=%max
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @"reduce-window"(
|
|
||||||
// CHECK-SAME: [[ARG:%.*]]: [[ARGT:.*]], [[CST:%.*]]: memref<f32>, [[RES:%.*]]: [[REST:.*]]) {
|
|
||||||
// CHECK: "lmhlo.reduce_window"([[LHS:%.*]], [[RHS:%.*]], [[OUT:%.*]]) ( {
|
|
||||||
// CHECK: ^bb0([[LHS:%.*]]: memref<f32>, [[RHS:%.*]]: memref<f32>, [[OUT:%.*]]: memref<f32>):
|
|
||||||
// CHECK: [[LHS_TENSOR:%.*]] = tensor_load [[LHS]]
|
|
||||||
// CHECK: [[RHS_TENSOR:%.*]] = tensor_load [[RHS]]
|
|
||||||
// CHECK: [[OUT_TENSOR:%.*]] = mhlo.maximum [[LHS_TENSOR]], [[RHS_TENSOR]]
|
|
||||||
// CHECK: tensor_store [[OUT_TENSOR]], [[OUT]]
|
|
||||||
// CHECK: "lmhlo.terminator"() : () -> ()
|
|
||||||
// CHECK: }) {
|
|
||||||
// CHECK-SAME: base_dilations = dense<1> : tensor<4xi64>
|
|
||||||
// CHECK-SAME: padding = dense<{{\[}}[0, 0], [0, 0], [0, 1], [0, 1]]>
|
|
||||||
// CHECK-SAME: window_dilations = dense<1> : tensor<4xi64>
|
|
||||||
// CHECK-SAME: window_dimensions = dense<[1, 1, 3, 3]>
|
|
||||||
// CHECK-SAME: window_strides = dense<[1, 1, 2, 2]>
|
|
||||||
// CHECK: } : ([[ARGT]], memref<f32>, [[REST]]) -> ()
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Rem
|
|
||||||
ENTRY %Rem(x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
%y = f32[2,2]{1,0} parameter(1)
|
|
||||||
ROOT %rem = f32[2,2]{1,0} remainder(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @remainder(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.remainder"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : ([[TYPE]], [[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,11 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Rsqrt
|
|
||||||
|
|
||||||
ENTRY %Rsqrt (x: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %rsqrt = f32[2,2]{1,0} rsqrt(f32[2,2]{1,0} %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @rsqrt(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.rsqrt"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,14 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Select
|
|
||||||
|
|
||||||
ENTRY %Select (p: pred[2,2], x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
|
|
||||||
%p = pred[2,2]{1,0} parameter(0)
|
|
||||||
%x = f32[2,2]{1,0} parameter(1)
|
|
||||||
%y = f32[2,2]{1,0} parameter(2)
|
|
||||||
ROOT %select = f32[2,2]{1,0} select(pred[2,2]{1,0} %p, f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @select(%[[PRED:.*]]: [[PRED_TYPE:.*]], %[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.select"(%[[PRED]], %[[ARG0]], %[[ARG1]], %[[ARG2]]) : ([[PRED_TYPE]], [[TYPE]], [[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule SelectAndScatter
|
|
||||||
|
|
||||||
%ge (x: f32[], y: f32[]) -> pred[] {
|
|
||||||
%x = f32[] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %compare = pred[] compare(f32[] %x, f32[] %y), direction=GE
|
|
||||||
}
|
|
||||||
|
|
||||||
%add (x: f32[], y: f32[]) -> f32[] {
|
|
||||||
%x = f32[] parameter(0)
|
|
||||||
%y = f32[] parameter(1)
|
|
||||||
ROOT %add = f32[] add(f32[] %x, f32[] %y)
|
|
||||||
}
|
|
||||||
|
|
||||||
ENTRY %SelectAndScatter (x: f32[128,64,112,112],
|
|
||||||
y: f32[128,64,56,56],
|
|
||||||
z: f32[]) -> f32[128,64,112,112] {
|
|
||||||
%x = f32[128,64,112,112] parameter(0)
|
|
||||||
%y = f32[128,64,56,56] parameter(1)
|
|
||||||
%z = f32[] parameter(2)
|
|
||||||
ROOT %result = f32[128,64,112,112] select-and-scatter(
|
|
||||||
f32[128,64,112,112] %x,
|
|
||||||
f32[128,64,56,56] %y,
|
|
||||||
f32[] %z),
|
|
||||||
window={size=1x1x3x3 stride=1x1x2x2 pad=0_0x0_0x0_1x0_1},
|
|
||||||
select=%ge,
|
|
||||||
scatter=%add
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @"select-and-scatter"(
|
|
||||||
// CHECK-SAME: [[ARG:%.*]]: [[ARGT:.*]], [[SRC:%.*]]: [[SRCT:.*]], [[CST:%.*]]: memref<f32>, [[RES:%.*]]: [[REST:.*]]) {
|
|
||||||
// CHECK: "lmhlo.select_and_scatter"([[ARG]], [[SRC]], [[CST]], [[RES]]) ( {
|
|
||||||
// CHECK: ^bb0([[LHS:%.*]]: memref<f32>, [[RHS:%.*]]: memref<f32>,
|
|
||||||
// CHECK-SAME: [[OUT:%.*]]: memref<i1>):
|
|
||||||
// CHECK: [[LHS_TENSOR:%.*]] = tensor_load [[LHS]]
|
|
||||||
// CHECK: [[RHS_TENSOR:%.*]] = tensor_load [[RHS]]
|
|
||||||
// CHECK: [[OUT_TENSOR:%.*]] = "mhlo.compare"
|
|
||||||
// CHECK-SAME: ([[LHS_TENSOR]], [[RHS_TENSOR]]) {comparison_direction = "GE"}
|
|
||||||
// CHECK: tensor_store [[OUT_TENSOR]], [[OUT]]
|
|
||||||
// CHECK: lmhlo.terminator
|
|
||||||
// CHECK: }, {
|
|
||||||
// CHECK: ^bb0([[LHS_:%.*]]: memref<f32>, [[RHS_:%.*]]: memref<f32>,
|
|
||||||
// CHECK-SAME: [[OUT_:%.*]]: memref<f32>):
|
|
||||||
// CHECK: [[LHS_TENSOR_:%.*]] = tensor_load [[LHS_]]
|
|
||||||
// CHECK: [[RHS_TENSOR_:%.*]] = tensor_load [[RHS_]]
|
|
||||||
// CHECK: [[OUT_TENSOR_:%.*]] = mhlo.add [[LHS_TENSOR_]], [[RHS_TENSOR_]]
|
|
||||||
// CHECK: tensor_store [[OUT_TENSOR_]], [[OUT_]]
|
|
||||||
// CHECK: lmhlo.terminator
|
|
||||||
// CHECK: }) {
|
|
||||||
// CHECK-SAME: padding = dense<{{\[}}[0, 0], [0, 0], [0, 1], [0, 1]]>
|
|
||||||
// CHECK-SAME: window_dimensions = dense<[1, 1, 3, 3]>
|
|
||||||
// CHECK-SAME: window_strides = dense<[1, 1, 2, 2]>
|
|
||||||
// CHECK-SAME: } : ([[ARGT]], [[SRCT]], memref<f32>, [[REST]]) -> ()
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Sign
|
|
||||||
ENTRY %Sign (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %sign = f32[2,2]{1,0} sign(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @sign(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.sign"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,12 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Sqrt
|
|
||||||
|
|
||||||
ENTRY %Sqrt (x: f32[2,2]) -> f32[2,2] {
|
|
||||||
%x = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %sqrt = f32[2,2]{1,0} sqrt(f32[2,2]{1,0} %x)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @sqrt(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.sqrt"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
|||||||
// RUN: xla-gpu-opt %s | FileCheck %s
|
|
||||||
HloModule Tanh
|
|
||||||
ENTRY %Tanh (val: f32[2,2]) -> f32[2,2] {
|
|
||||||
%val = f32[2,2]{1,0} parameter(0)
|
|
||||||
ROOT %tanh = f32[2,2]{1,0} tanh(f32[2,2]{1,0} %val)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CHECK: func @tanh(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]]) {
|
|
||||||
// CHECK: "lmhlo.tanh"(%[[ARG0]], %[[ARG1]]) : ([[TYPE]], [[TYPE]]) -> ()
|
|
||||||
// CHECK: }
|
|
@ -1,167 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/xla_gpu_opt.h"
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "absl/strings/str_join.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "mlir/IR/BuiltinOps.h" // from @llvm-project
|
|
||||||
#include "mlir/Pass/PassManager.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/xla/debug_options_flags.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/hlo_module_config.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/failover_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/inject_errors_pass.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/shape_util.h"
|
|
||||||
#include "tensorflow/compiler/xla/status.h"
|
|
||||||
#include "tensorflow/compiler/xla/tests/verified_hlo_module.h"
|
|
||||||
#include "tensorflow/core/platform/logging.h"
|
|
||||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
Status XlaGpuOpt::CompileIr(std::unique_ptr<HloModule> hlo_module,
|
|
||||||
const MlirCompiler::IRHook& ir_hook) {
|
|
||||||
MlirCompiler* compiler = GetMLIRCompiler();
|
|
||||||
compiler->SetModuleHook(ir_hook);
|
|
||||||
TF_ASSIGN_OR_RETURN(hlo_module, backend_->compiler()->RunHloPasses(
|
|
||||||
std::move(hlo_module),
|
|
||||||
backend_->default_stream_executor(),
|
|
||||||
/*device_allocator=*/nullptr));
|
|
||||||
Status status = backend_->compiler()
|
|
||||||
->RunBackend(std::move(hlo_module),
|
|
||||||
backend_->default_stream_executor(),
|
|
||||||
/*device_allocator=*/nullptr)
|
|
||||||
.status();
|
|
||||||
compiler->RemoveModuleHook();
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::string> XlaGpuOpt::CompileIr(
|
|
||||||
std::unique_ptr<HloModule> hlo_module,
|
|
||||||
MlirCompiler::IRHook::LoweringStage printing_stage) {
|
|
||||||
std::string ir;
|
|
||||||
TF_RETURN_IF_ERROR(CompileIr(
|
|
||||||
std::move(hlo_module), {[&ir](mlir::ModuleOp module) -> Status {
|
|
||||||
std::string buffer_string;
|
|
||||||
llvm::raw_string_ostream ostream(buffer_string);
|
|
||||||
module.print(ostream);
|
|
||||||
ostream.flush();
|
|
||||||
ir = buffer_string;
|
|
||||||
return Status::OK();
|
|
||||||
},
|
|
||||||
printing_stage}));
|
|
||||||
return ir;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status XlaGpuOpt::CompileAndOutputIr(std::unique_ptr<HloModule> hlo_module,
|
|
||||||
llvm::raw_ostream& os,
|
|
||||||
LoweringStage printing_stage) {
|
|
||||||
TF_ASSIGN_OR_RETURN(std::string ir,
|
|
||||||
CompileIr(std::move(hlo_module), printing_stage));
|
|
||||||
os << ir;
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status XlaGpuOpt::CompileAndOutputIr(const std::string& hlo_text,
|
|
||||||
llvm::raw_ostream& os,
|
|
||||||
LoweringStage printing_stage) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto module, GetVerifiedHloModule(hlo_text));
|
|
||||||
return CompileAndOutputIr(std::move(module), os, printing_stage);
|
|
||||||
}
|
|
||||||
|
|
||||||
MlirCompiler::IRHook XlaGpuOpt::GetIRHookBreakingLoweringStage(
|
|
||||||
LoweringStage breaking_stage) {
|
|
||||||
return {[](mlir::ModuleOp module) -> Status {
|
|
||||||
mlir::PassManager pm(module.getContext());
|
|
||||||
pm.addNestedPass<::mlir::FuncOp>(
|
|
||||||
::mlir::createInjectErrorsForTestingPass());
|
|
||||||
if (failed(pm.run(module))) {
|
|
||||||
return InternalError("InjectErrorsForTestingPass failed.");
|
|
||||||
}
|
|
||||||
return Status::OK();
|
|
||||||
},
|
|
||||||
breaking_stage};
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<string> XlaGpuOpt::CompileAndInjectErrors(
|
|
||||||
std::unique_ptr<HloModule> hlo_module, LoweringStage breaking_stage) {
|
|
||||||
std::string errors;
|
|
||||||
auto error_handler = [&errors](const EmissionContext::ErrorMap& error_map,
|
|
||||||
HloModule* hlo_module) {
|
|
||||||
errors = "ERRORS FOUND: ";
|
|
||||||
for (auto& err : error_map) {
|
|
||||||
errors += "[" + err.first->ToString() + ": " +
|
|
||||||
absl::StrJoin(err.second, "; ") + "]";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
MlirCompiler* compiler = GetMLIRCompiler();
|
|
||||||
compiler->SetModuleHook(GetIRHookBreakingLoweringStage(breaking_stage));
|
|
||||||
compiler->SetErrorHandler(error_handler);
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
hlo_module, compiler->RunHloPasses(std::move(hlo_module),
|
|
||||||
backend_->default_stream_executor(),
|
|
||||||
/*device_allocator=*/nullptr));
|
|
||||||
Status status = compiler
|
|
||||||
->RunBackend(std::move(hlo_module),
|
|
||||||
backend_->default_stream_executor(),
|
|
||||||
/*device_allocator=*/nullptr)
|
|
||||||
.status();
|
|
||||||
compiler->RemoveModuleHook();
|
|
||||||
compiler->RemoveErrorHandler();
|
|
||||||
if (status.ok()) {
|
|
||||||
return errors;
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status XlaGpuOpt::CompileAndExpectErrors(const std::string& hlo_text,
|
|
||||||
llvm::raw_ostream& os,
|
|
||||||
LoweringStage breaking_stage) {
|
|
||||||
TF_ASSIGN_OR_RETURN(auto module, GetVerifiedHloModule(hlo_text));
|
|
||||||
TF_ASSIGN_OR_RETURN(
|
|
||||||
std::string errors,
|
|
||||||
CompileAndInjectErrors(std::move(module), breaking_stage));
|
|
||||||
os << errors;
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
StatusOr<std::unique_ptr<VerifiedHloModule>> XlaGpuOpt::GetVerifiedHloModule(
|
|
||||||
const std::string& hlo_text) {
|
|
||||||
HloModuleConfig config;
|
|
||||||
auto debug_options = GetDebugOptionsFromFlags();
|
|
||||||
debug_options.add_xla_disable_hlo_passes("constant_folding");
|
|
||||||
config.set_debug_options(debug_options);
|
|
||||||
auto module = absl::make_unique<VerifiedHloModule>(
|
|
||||||
"Module", config, /*verifier_layout_sensitive=*/true,
|
|
||||||
/*allow_mixed_precision_in_hlo_verifier=*/false,
|
|
||||||
/*shape_size_function=*/ShapeUtil::ByteSizeOfElements);
|
|
||||||
TF_RETURN_IF_ERROR(module->ParseHloStringAndVerifyModule(hlo_text));
|
|
||||||
return std::move(module);
|
|
||||||
}
|
|
||||||
|
|
||||||
MlirCompiler* XlaGpuOpt::GetMLIRCompiler() {
|
|
||||||
// TODO(b/137624192): Remove failover once no longer in place.
|
|
||||||
auto* failover = static_cast<FailoverCompiler*>(backend_->compiler());
|
|
||||||
return static_cast<MlirCompiler*>(failover->GetPrimary());
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
@ -1,76 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_XLA_GPU_OPT_H_
|
|
||||||
#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_XLA_GPU_OPT_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/backend.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/status.h"
|
|
||||||
#include "tensorflow/compiler/xla/statusor.h"
|
|
||||||
#include "tensorflow/compiler/xla/tests/verified_hlo_module.h"
|
|
||||||
|
|
||||||
namespace xla {
|
|
||||||
namespace mlir_gpu {
|
|
||||||
|
|
||||||
// Prints the IR created by the MLIR GPU backend at a certain lowering stage.
|
|
||||||
class XlaGpuOpt {
|
|
||||||
public:
|
|
||||||
using LoweringStage = MlirCompiler::IRHook::LoweringStage;
|
|
||||||
XlaGpuOpt() {
|
|
||||||
backend_ = std::move(Backend::CreateDefaultBackend().ValueOrDie());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compiles the HLO module given in 'hlo_text' to a GpuExecutable and prints
|
|
||||||
// the IR at the lowering stage 'printing_stage' to the 'os' stream.
|
|
||||||
//
|
|
||||||
// This function invokes the JIT compiler.
|
|
||||||
Status CompileAndOutputIr(const std::string& hlo_text, llvm::raw_ostream& os,
|
|
||||||
LoweringStage printing_stage = LoweringStage::LHLO);
|
|
||||||
|
|
||||||
// Adds the InjectErrorsForTestingPass to MLIRCompiler on the provided
|
|
||||||
// lowering stage 'breaking_stage', parses and compiles `hlo_text`, and prints
|
|
||||||
// the resulting errors to the 'os' stream.
|
|
||||||
Status CompileAndExpectErrors(const std::string& hlo_text,
|
|
||||||
llvm::raw_ostream& os,
|
|
||||||
LoweringStage breaking_stage);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unique_ptr<Backend> backend_;
|
|
||||||
StatusOr<std::unique_ptr<VerifiedHloModule>> GetVerifiedHloModule(
|
|
||||||
const std::string& hlo_text_filename);
|
|
||||||
|
|
||||||
Status CompileAndOutputIr(std::unique_ptr<HloModule> hlo_module,
|
|
||||||
llvm::raw_ostream& os,
|
|
||||||
LoweringStage printing_stage);
|
|
||||||
Status CompileIr(std::unique_ptr<HloModule> hlo_module,
|
|
||||||
const MlirCompiler::IRHook& ir_hook);
|
|
||||||
StatusOr<std::string> CompileIr(std::unique_ptr<HloModule> hlo_module,
|
|
||||||
LoweringStage printing_stage);
|
|
||||||
MlirCompiler::IRHook GetIRHookBreakingLoweringStage(
|
|
||||||
LoweringStage breaking_stage);
|
|
||||||
StatusOr<std::string> CompileAndInjectErrors(
|
|
||||||
std::unique_ptr<HloModule> hlo_module, LoweringStage breaking_stage);
|
|
||||||
MlirCompiler* GetMLIRCompiler();
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mlir_gpu
|
|
||||||
} // namespace xla
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_XLA_GPU_OPT_H_
|
|
@ -1,90 +0,0 @@
|
|||||||
/* Copyright 2020 Google Inc. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "llvm/Support/CommandLine.h"
|
|
||||||
#include "llvm/Support/ToolOutputFile.h"
|
|
||||||
#include "mlir/Pass/PassManager.h" // from @llvm-project
|
|
||||||
#include "mlir/Support/FileUtilities.h" // from @llvm-project
|
|
||||||
#include "tensorflow/compiler/mlir/init_mlir.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.h"
|
|
||||||
#include "tensorflow/compiler/xla/service/mlir_gpu/xla_gpu_opt.h"
|
|
||||||
#include "tensorflow/compiler/xla/status.h"
|
|
||||||
#include "tensorflow/core/platform/logging.h"
|
|
||||||
|
|
||||||
// NOLINTNEXTLINE
|
|
||||||
static llvm::cl::opt<std::string> input_filename(llvm::cl::Positional,
|
|
||||||
llvm::cl::desc("<input file>"),
|
|
||||||
llvm::cl::init("-"));
|
|
||||||
|
|
||||||
// NOLINTNEXTLINE
|
|
||||||
static llvm::cl::opt<std::string> output_filename(
|
|
||||||
"o", llvm::cl::desc("Output filename"), llvm::cl::value_desc("filename"),
|
|
||||||
llvm::cl::init("-"));
|
|
||||||
|
|
||||||
// NOLINTNEXTLINE
|
|
||||||
static llvm::cl::opt<bool> verify_errors(
|
|
||||||
"verify-errors",
|
|
||||||
llvm::cl::desc("Whether we expect errors which should be verified"),
|
|
||||||
llvm::cl::init(false));
|
|
||||||
|
|
||||||
static llvm::cl::opt<xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage>
|
|
||||||
// NOLINTNEXTLINE
|
|
||||||
lowering_stage(
|
|
||||||
"lowering-stage",
|
|
||||||
llvm::cl::desc(
|
|
||||||
"The lowering stage up to which the compiler will be run"),
|
|
||||||
llvm::cl::values(
|
|
||||||
clEnumValN(xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage::LHLO,
|
|
||||||
"LHLO", "LHLO"),
|
|
||||||
clEnumValN(xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage::GPU,
|
|
||||||
"GPU", "GPU"),
|
|
||||||
clEnumValN(xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage::LLVM,
|
|
||||||
"LLVM", "LLVM"),
|
|
||||||
clEnumValN(
|
|
||||||
xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage::KERNEL,
|
|
||||||
"KERNEL", "Kernel")),
|
|
||||||
llvm::cl::init(
|
|
||||||
xla::mlir_gpu::MlirCompiler::IRHook::LoweringStage::LHLO));
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
tensorflow::InitMlir y(&argc, &argv);
|
|
||||||
mlir::registerPassManagerCLOptions();
|
|
||||||
|
|
||||||
llvm::cl::ParseCommandLineOptions(argc, argv,
|
|
||||||
"XLA GPU modular optimizer driver\n");
|
|
||||||
|
|
||||||
// Set up the input file.
|
|
||||||
std::string error_message;
|
|
||||||
auto file = mlir::openInputFile(input_filename, &error_message);
|
|
||||||
QCHECK(file) << error_message;
|
|
||||||
|
|
||||||
auto output = mlir::openOutputFile(output_filename, &error_message);
|
|
||||||
QCHECK(output) << error_message;
|
|
||||||
|
|
||||||
xla::mlir_gpu::XlaGpuOpt opt;
|
|
||||||
xla::Status status =
|
|
||||||
verify_errors ? opt.CompileAndExpectErrors(file->getBuffer().str(),
|
|
||||||
output->os(), lowering_stage)
|
|
||||||
: opt.CompileAndOutputIr(file->getBuffer().str(),
|
|
||||||
output->os(), lowering_stage);
|
|
||||||
if (!status.ok()) {
|
|
||||||
LOG(ERROR) << status.error_message();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
output->keep();
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -94,7 +94,6 @@ tf_cc_binary(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# To run with MLIR GPU plugin enabled, pass --define=with_mlir_gpu_support=true.
|
|
||||||
tf_cc_binary(
|
tf_cc_binary(
|
||||||
name = "replay_computation_gpu",
|
name = "replay_computation_gpu",
|
||||||
tags = ["gpu"],
|
tags = ["gpu"],
|
||||||
@ -328,7 +327,6 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# To run with MLIR GPU plugin enabled, pass --define=with_mlir_gpu_support=true.
|
|
||||||
tf_cc_binary(
|
tf_cc_binary(
|
||||||
name = "run_hlo_module",
|
name = "run_hlo_module",
|
||||||
testonly = True,
|
testonly = True,
|
||||||
|
Loading…
Reference in New Issue
Block a user