Use constant buffer allocations for XLA:CPU
This is simpler than the corresponding change to XLA:GPU because on XLA:CPU all instructions are codegened so we can always embed a pointer to the constant global variable directly in the generated LLVM IR. PiperOrigin-RevId: 206363887
This commit is contained in:
parent
90fe37ab8d
commit
388d0d8601
@ -252,6 +252,7 @@ cc_library(
|
|||||||
"//tensorflow/compiler/xla/service:hlo_module_config",
|
"//tensorflow/compiler/xla/service:hlo_module_config",
|
||||||
"//tensorflow/compiler/xla/service:name_uniquer",
|
"//tensorflow/compiler/xla/service:name_uniquer",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
|
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
|
||||||
|
"//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
|
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
|
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
||||||
|
@ -562,7 +562,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
|
|||||||
BufferAssigner::Run(
|
BufferAssigner::Run(
|
||||||
module.get(),
|
module.get(),
|
||||||
xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
|
xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
|
||||||
BufferSizeBytesFunction(), memory_alignment));
|
BufferSizeBytesFunction(), memory_alignment,
|
||||||
|
/*allow_input_output_aliasing=*/false,
|
||||||
|
/*allocate_buffers_for_constants=*/true));
|
||||||
// BufferAssignment::ToString() includes a header, so no need for us to
|
// BufferAssignment::ToString() includes a header, so no need for us to
|
||||||
// print one ourselves.
|
// print one ourselves.
|
||||||
XLA_VLOG_LINES(2, assignment->ToString());
|
XLA_VLOG_LINES(2, assignment->ToString());
|
||||||
@ -584,6 +586,8 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
|
|||||||
std::move(computation_to_profile_idx),
|
std::move(computation_to_profile_idx),
|
||||||
&target_machine_features);
|
&target_machine_features);
|
||||||
|
|
||||||
|
TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
|
||||||
|
|
||||||
for (auto embedded_computation :
|
for (auto embedded_computation :
|
||||||
entry_computation->MakeEmbeddedComputationsList()) {
|
entry_computation->MakeEmbeddedComputationsList()) {
|
||||||
if (embedded_computation->IsFusionComputation()) {
|
if (embedded_computation->IsFusionComputation()) {
|
||||||
@ -747,7 +751,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
|
|||||||
BufferAssigner::Run(
|
BufferAssigner::Run(
|
||||||
module,
|
module,
|
||||||
xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
|
xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
|
||||||
BufferSizeBytesFunction(), memory_alignment));
|
BufferSizeBytesFunction(), memory_alignment,
|
||||||
|
/*allow_input_output_aliasing=*/false,
|
||||||
|
/*allocate_buffers_for_constants=*/true));
|
||||||
// BufferAssignment::ToString() includes a header, so no need for us to
|
// BufferAssignment::ToString() includes a header, so no need for us to
|
||||||
// print one ourselves.
|
// print one ourselves.
|
||||||
XLA_VLOG_LINES(2, assignment->ToString());
|
XLA_VLOG_LINES(2, assignment->ToString());
|
||||||
@ -776,6 +782,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
|
|||||||
std::move(instruction_to_profile_idx),
|
std::move(instruction_to_profile_idx),
|
||||||
std::move(computation_to_profile_idx),
|
std::move(computation_to_profile_idx),
|
||||||
&target_machine_features);
|
&target_machine_features);
|
||||||
|
|
||||||
|
TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
|
||||||
|
|
||||||
HloComputation* computation = module->entry_computation();
|
HloComputation* computation = module->entry_computation();
|
||||||
for (auto embedded_computation :
|
for (auto embedded_computation :
|
||||||
computation->MakeEmbeddedComputationsList()) {
|
computation->MakeEmbeddedComputationsList()) {
|
||||||
@ -832,7 +841,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
|
|||||||
BufferSizes buffer_sizes;
|
BufferSizes buffer_sizes;
|
||||||
for (const BufferAllocation& allocation : assignment->Allocations()) {
|
for (const BufferAllocation& allocation : assignment->Allocations()) {
|
||||||
// Callers don't need to allocate temporary buffers for parameters.
|
// Callers don't need to allocate temporary buffers for parameters.
|
||||||
if (allocation.is_entry_computation_parameter()) {
|
if (allocation.is_entry_computation_parameter() ||
|
||||||
|
allocation.is_constant()) {
|
||||||
buffer_sizes.push_back(-1);
|
buffer_sizes.push_back(-1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -88,6 +88,11 @@ Status CpuExecutable::AllocateBuffers(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (allocation.is_constant()) {
|
||||||
|
VLOG(3) << "allocation #" << i << " is a constant";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (allocation.is_thread_local()) {
|
if (allocation.is_thread_local()) {
|
||||||
VLOG(3) << "buffer #" << i << " is thread-local";
|
VLOG(3) << "buffer #" << i << " is thread-local";
|
||||||
continue;
|
continue;
|
||||||
|
@ -51,6 +51,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
|
#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
|
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
|
||||||
@ -175,23 +176,34 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) {
|
|||||||
result_global, IrShapeType(literal.shape())->getPointerTo());
|
result_global, IrShapeType(literal.shape())->getPointerTo());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status IrEmitter::EmitConstantGlobals() {
|
||||||
|
for (const BufferAllocation& allocation : assignment_.Allocations()) {
|
||||||
|
if (!allocation.is_constant()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
|
||||||
|
llvm::Constant* global_for_const;
|
||||||
|
auto it = emitted_literals_.find(&literal);
|
||||||
|
if (it != emitted_literals_.end()) {
|
||||||
|
global_for_const = it->second;
|
||||||
|
} else {
|
||||||
|
global_for_const = EmitGlobalForLiteral(literal);
|
||||||
|
InsertOrDie(&emitted_literals_, &literal, global_for_const);
|
||||||
|
}
|
||||||
|
|
||||||
|
InsertOrDie(&constant_buffer_to_global_, allocation.index(),
|
||||||
|
global_for_const);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status IrEmitter::HandleConstant(HloInstruction* constant) {
|
Status IrEmitter::HandleConstant(HloInstruction* constant) {
|
||||||
VLOG(2) << "HandleConstant: " << constant->ToString();
|
VLOG(2) << "HandleConstant: " << constant->ToString();
|
||||||
const Literal& literal = constant->literal();
|
// IrEmitter::EmitConstantGlobals has already taken care of emitting the body
|
||||||
llvm::Constant* global_for_const;
|
// of the constant.
|
||||||
|
return EmitTargetAddressForOp(constant);
|
||||||
auto it = emitted_literals_.find(&literal);
|
|
||||||
if (it != emitted_literals_.end()) {
|
|
||||||
global_for_const = it->second;
|
|
||||||
} else {
|
|
||||||
global_for_const = EmitGlobalForLiteral(literal);
|
|
||||||
emitted_literals_[&literal] = global_for_const;
|
|
||||||
}
|
|
||||||
emitted_value_[constant] = global_for_const;
|
|
||||||
VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const);
|
|
||||||
VLOG(2) << " its type: "
|
|
||||||
<< llvm_ir::DumpToString(*global_for_const->getType());
|
|
||||||
return Status::OK();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Status IrEmitter::HandleCopy(HloInstruction* copy) {
|
Status IrEmitter::HandleCopy(HloInstruction* copy) {
|
||||||
@ -2712,6 +2724,10 @@ llvm::Value* IrEmitter::EmitTempBufferPointer(
|
|||||||
return b_.CreateBitCast(tempbuf_address, element_type->getPointerTo());
|
return b_.CreateBitCast(tempbuf_address, element_type->getPointerTo());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (allocation.is_constant()) {
|
||||||
|
return FindOrDie(constant_buffer_to_global_, allocation.index());
|
||||||
|
}
|
||||||
|
|
||||||
llvm::Value* tempbuf_address_ptr = llvm_ir::EmitBufferIndexingGEP(
|
llvm::Value* tempbuf_address_ptr = llvm_ir::EmitBufferIndexingGEP(
|
||||||
GetTempBuffersArgument(), slice.index(), &b_);
|
GetTempBuffersArgument(), slice.index(), &b_);
|
||||||
llvm::LoadInst* tempbuf_address_base = b_.CreateLoad(tempbuf_address_ptr);
|
llvm::LoadInst* tempbuf_address_base = b_.CreateLoad(tempbuf_address_ptr);
|
||||||
|
@ -105,6 +105,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
|
|||||||
PrimitiveType return_type, HloComputation* computation,
|
PrimitiveType return_type, HloComputation* computation,
|
||||||
const std::vector<llvm::Value*>& arguments, tensorflow::StringPiece name);
|
const std::vector<llvm::Value*>& arguments, tensorflow::StringPiece name);
|
||||||
|
|
||||||
|
// Emit an LLVM global variable for every constant buffer allocation.
|
||||||
|
Status EmitConstantGlobals();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
//
|
//
|
||||||
// The following methods implement the DfsHloVisitor interface.
|
// The following methods implement the DfsHloVisitor interface.
|
||||||
@ -560,6 +563,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
|
|||||||
LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
|
LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
|
||||||
emitted_literals_;
|
emitted_literals_;
|
||||||
|
|
||||||
|
tensorflow::gtl::FlatMap<BufferAllocation::Index, llvm::Constant*>
|
||||||
|
constant_buffer_to_global_;
|
||||||
|
|
||||||
TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter);
|
TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -120,6 +120,7 @@ cc_library(
|
|||||||
"//tensorflow/compiler/xla/service:buffer_assignment",
|
"//tensorflow/compiler/xla/service:buffer_assignment",
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
"//tensorflow/compiler/xla/service:hlo",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
|
"//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
|
||||||
|
"//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
|
"//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
|
"//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
|
||||||
@ -165,6 +166,7 @@ cc_library(
|
|||||||
"//tensorflow/compiler/xla/service:elemental_ir_emitter",
|
"//tensorflow/compiler/xla/service:elemental_ir_emitter",
|
||||||
"//tensorflow/compiler/xla/service:hlo",
|
"//tensorflow/compiler/xla/service:hlo",
|
||||||
"//tensorflow/compiler/xla/service:name_uniquer",
|
"//tensorflow/compiler/xla/service:name_uniquer",
|
||||||
|
"//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
|
"//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
|
"//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
|
||||||
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
"//tensorflow/compiler/xla/service/llvm_ir:ir_array",
|
||||||
@ -323,9 +325,9 @@ cc_library(
|
|||||||
"//tensorflow/compiler/xla/service:hlo_execution_profile",
|
"//tensorflow/compiler/xla/service:hlo_execution_profile",
|
||||||
"//tensorflow/compiler/xla/service:logical_buffer",
|
"//tensorflow/compiler/xla/service:logical_buffer",
|
||||||
"//tensorflow/compiler/xla/service:shaped_buffer",
|
"//tensorflow/compiler/xla/service:shaped_buffer",
|
||||||
"//tensorflow/compiler/xla/service:stream_pool",
|
|
||||||
"//tensorflow/compiler/xla/service:transfer_manager",
|
"//tensorflow/compiler/xla/service:transfer_manager",
|
||||||
"//tensorflow/compiler/xla/service:tuple_points_to_analysis",
|
"//tensorflow/compiler/xla/service:tuple_points_to_analysis",
|
||||||
|
"//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:lib_internal",
|
"//tensorflow/core:lib_internal",
|
||||||
"//tensorflow/core:stream_executor_no_cuda",
|
"//tensorflow/core:stream_executor_no_cuda",
|
||||||
|
@ -173,45 +173,6 @@ void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index,
|
|||||||
buffers_[buffer_index] = buffer;
|
buffers_[buffer_index] = buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const HloInstruction& InstrForConstantBufferAllocation(
|
|
||||||
const BufferAllocation& allocation) {
|
|
||||||
CHECK(allocation.is_constant());
|
|
||||||
HloInstruction* const_instr = nullptr;
|
|
||||||
for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
|
|
||||||
const LogicalBuffer* buffer = buffer_offset_pair.first;
|
|
||||||
// BufferAssignment may have assigned non-constant instructions to this
|
|
||||||
// allocation too so we can't CHECK this condition. E.g. for
|
|
||||||
//
|
|
||||||
// while(init = constant, body = identity, cond = ...)
|
|
||||||
//
|
|
||||||
// the LogicalBuffer for the kWhile instruction will have the same
|
|
||||||
// BufferAllocation as the LogicalBuffer for the (init) constant.
|
|
||||||
if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
|
|
||||||
CHECK_EQ(const_instr, nullptr)
|
|
||||||
<< const_instr->ToString() << " " << buffer->ToString();
|
|
||||||
const_instr = buffer->instruction();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CHECK_NE(const_instr, nullptr);
|
|
||||||
return *const_instr;
|
|
||||||
}
|
|
||||||
|
|
||||||
string ConstantBufferAllocationToGlobalName(
|
|
||||||
const BufferAllocation& allocation) {
|
|
||||||
string instr_name = InstrForConstantBufferAllocation(allocation).name();
|
|
||||||
for (char& c : instr_name) {
|
|
||||||
if (c == '.') {
|
|
||||||
c = '_';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tensorflow::strings::StrCat("buffer_for_", instr_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
const Literal& LiteralForConstantAllocation(
|
|
||||||
const BufferAllocation& allocation) {
|
|
||||||
return InstrForConstantBufferAllocation(allocation).literal();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ShouldEmitLiteralInLlvmIr(const Literal& literal) {
|
bool ShouldEmitLiteralInLlvmIr(const Literal& literal) {
|
||||||
// LLVM can sometimes do interesting optimizations using scalar constants.
|
// LLVM can sometimes do interesting optimizations using scalar constants.
|
||||||
return ShapeUtil::IsScalar(literal.shape());
|
return ShapeUtil::IsScalar(literal.shape());
|
||||||
|
@ -107,15 +107,6 @@ class BufferAllocations {
|
|||||||
bool torn_down_ = false;
|
bool torn_down_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// In XLA:GPU we map constant buffer allocations to globals in the generated
|
|
||||||
// LLVM IR. This function gives us the name of the global variable a constant
|
|
||||||
// buffer is mapped to.
|
|
||||||
string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
|
|
||||||
|
|
||||||
// Return the Literal corresponding to `allocation`, which must be a constant
|
|
||||||
// allocation.
|
|
||||||
const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
|
|
||||||
|
|
||||||
// LLVM and PTXAS don't deal well with large constants, so we only emit very
|
// LLVM and PTXAS don't deal well with large constants, so we only emit very
|
||||||
// small constants directly in LLVM IR. Larger constants are emitted with zero
|
// small constants directly in LLVM IR. Larger constants are emitted with zero
|
||||||
// initializers in LLVM IR and are later overwritten when the PTX/CUBIN is
|
// initializers in LLVM IR and are later overwritten when the PTX/CUBIN is
|
||||||
|
@ -24,6 +24,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
|
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
|
#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/logical_buffer.h"
|
#include "tensorflow/compiler/xla/service/logical_buffer.h"
|
||||||
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
|
#include "tensorflow/compiler/xla/service/shaped_buffer.h"
|
||||||
#include "tensorflow/compiler/xla/service/transfer_manager.h"
|
#include "tensorflow/compiler/xla/service/transfer_manager.h"
|
||||||
@ -206,13 +207,15 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
|
|||||||
TF_ASSIGN_OR_RETURN(
|
TF_ASSIGN_OR_RETURN(
|
||||||
se::DeviceMemoryBase global,
|
se::DeviceMemoryBase global,
|
||||||
executor->GetUntypedSymbol(
|
executor->GetUntypedSymbol(
|
||||||
ConstantBufferAllocationToGlobalName(allocation), module_handle));
|
llvm_ir::ConstantBufferAllocationToGlobalName(allocation),
|
||||||
|
module_handle));
|
||||||
VLOG(3) << "Resolved global "
|
VLOG(3) << "Resolved global "
|
||||||
<< ConstantBufferAllocationToGlobalName(allocation) << " to "
|
<< llvm_ir::ConstantBufferAllocationToGlobalName(allocation)
|
||||||
<< global.opaque();
|
<< " to " << global.opaque();
|
||||||
InsertOrDie(&globals, i, global);
|
InsertOrDie(&globals, i, global);
|
||||||
|
|
||||||
const Literal& literal = LiteralForConstantAllocation(allocation);
|
const Literal& literal =
|
||||||
|
llvm_ir::LiteralForConstantAllocation(allocation);
|
||||||
CHECK(ShapeUtil::IsArray(literal.shape()));
|
CHECK(ShapeUtil::IsArray(literal.shape()));
|
||||||
if (!ShouldEmitLiteralInLlvmIr(literal)) {
|
if (!ShouldEmitLiteralInLlvmIr(literal)) {
|
||||||
VLOG(3) << "H2D memcpy for constant with shape "
|
VLOG(3) << "H2D memcpy for constant with shape "
|
||||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
|
#include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
|
||||||
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
|
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h"
|
||||||
#include "tensorflow/core/lib/strings/str_util.h"
|
#include "tensorflow/core/lib/strings/str_util.h"
|
||||||
@ -114,7 +115,8 @@ void HloToIrBindings::EmitBasePointersForHlos(
|
|||||||
} else if (slice.allocation()->is_constant()) {
|
} else if (slice.allocation()->is_constant()) {
|
||||||
llvm::Value* global_for_constant =
|
llvm::Value* global_for_constant =
|
||||||
module_->getGlobalVariable(llvm_ir::AsStringRef(
|
module_->getGlobalVariable(llvm_ir::AsStringRef(
|
||||||
ConstantBufferAllocationToGlobalName(*slice.allocation())));
|
llvm_ir::ConstantBufferAllocationToGlobalName(
|
||||||
|
*slice.allocation())));
|
||||||
BindHloToIrValue(*non_io_hlo, global_for_constant);
|
BindHloToIrValue(*non_io_hlo, global_for_constant);
|
||||||
} else {
|
} else {
|
||||||
const int64 offset = slice.offset();
|
const int64 offset = slice.offset();
|
||||||
|
@ -60,6 +60,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
#include "tensorflow/compiler/xla/service/hlo_computation.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
|
||||||
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
#include "tensorflow/compiler/xla/service/hlo_opcode.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
|
||||||
#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
|
#include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
|
||||||
@ -2411,8 +2412,8 @@ std::unique_ptr<KernelThunk> IrEmitterUnnested::BuildKernelThunk(
|
|||||||
llvm::Value* loc;
|
llvm::Value* loc;
|
||||||
if (slice.allocation()->is_constant()) {
|
if (slice.allocation()->is_constant()) {
|
||||||
loc = ir_emitter_context_->llvm_module()->getGlobalVariable(
|
loc = ir_emitter_context_->llvm_module()->getGlobalVariable(
|
||||||
llvm_ir::AsStringRef(
|
llvm_ir::AsStringRef(llvm_ir::ConstantBufferAllocationToGlobalName(
|
||||||
ConstantBufferAllocationToGlobalName(*slice.allocation())));
|
*slice.allocation())));
|
||||||
CHECK_NE(loc, nullptr);
|
CHECK_NE(loc, nullptr);
|
||||||
} else {
|
} else {
|
||||||
loc = b_.CreateInBoundsGEP(kernel_args.at(slice.allocation()),
|
loc = b_.CreateInBoundsGEP(kernel_args.at(slice.allocation()),
|
||||||
@ -3428,7 +3429,7 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Literal& literal = LiteralForConstantAllocation(allocation);
|
const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
|
||||||
const bool should_emit_initializer = ShouldEmitLiteralInLlvmIr(literal);
|
const bool should_emit_initializer = ShouldEmitLiteralInLlvmIr(literal);
|
||||||
llvm::ArrayType* global_type =
|
llvm::ArrayType* global_type =
|
||||||
llvm::ArrayType::get(b_.getInt8Ty(), allocation.size());
|
llvm::ArrayType::get(b_.getInt8Ty(), allocation.size());
|
||||||
@ -3453,7 +3454,8 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
|
|||||||
global_type, /*isConstant=*/should_emit_initializer,
|
global_type, /*isConstant=*/should_emit_initializer,
|
||||||
llvm::GlobalValue::ExternalLinkage,
|
llvm::GlobalValue::ExternalLinkage,
|
||||||
/*Initializer=*/initializer,
|
/*Initializer=*/initializer,
|
||||||
llvm_ir::AsStringRef(ConstantBufferAllocationToGlobalName(allocation)));
|
llvm_ir::AsStringRef(
|
||||||
|
llvm_ir::ConstantBufferAllocationToGlobalName(allocation)));
|
||||||
global_for_const->setAlignment(kConstantBufferAlignBytes);
|
global_for_const->setAlignment(kConstantBufferAlignBytes);
|
||||||
ir_emitter_context_->llvm_module()->getGlobalList().push_back(
|
ir_emitter_context_->llvm_module()->getGlobalList().push_back(
|
||||||
global_for_const);
|
global_for_const);
|
||||||
|
@ -224,6 +224,15 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "buffer_assignment_util",
|
||||||
|
srcs = ["buffer_assignment_util.cc"],
|
||||||
|
hdrs = ["buffer_assignment_util.h"],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/compiler/xla/service:buffer_assignment",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "math_ops",
|
name = "math_ops",
|
||||||
srcs = ["math_ops.cc"],
|
srcs = ["math_ops.cc"],
|
||||||
|
@ -58,7 +58,7 @@ ENTRY while3 {
|
|||||||
CompileAndVerifyIr(hlo_string, R"(
|
CompileAndVerifyIr(hlo_string, R"(
|
||||||
; CHECK-LABEL: @body(i8* align 4 dereferenceable(4) %retval
|
; CHECK-LABEL: @body(i8* align 4 dereferenceable(4) %retval
|
||||||
; CHECK: %[[add_result:.*]] = fadd fast float %[[fadd_lhs:.*]], %[[fadd_rhs:.*]]
|
; CHECK: %[[add_result:.*]] = fadd fast float %[[fadd_lhs:.*]], %[[fadd_rhs:.*]]
|
||||||
; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:.*]]
|
; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:[0-9]+]]
|
||||||
;
|
;
|
||||||
; CHECK-LABEL: @condition(i8* align 1 dereferenceable(1) %fusion, i8* noalias %run_options, i8** noalias %params
|
; CHECK-LABEL: @condition(i8* align 1 dereferenceable(1) %fusion, i8* noalias %run_options, i8** noalias %params
|
||||||
; CHECK: %[[cond_state_buf_ptr:.*]] = getelementptr inbounds i8*, i8** %params, i64 0
|
; CHECK: %[[cond_state_buf_ptr:.*]] = getelementptr inbounds i8*, i8** %params, i64 0
|
||||||
|
@ -0,0 +1,59 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
|
||||||
|
|
||||||
|
namespace xla {
|
||||||
|
namespace llvm_ir {
|
||||||
|
static const HloInstruction& InstrForConstantBufferAllocation(
|
||||||
|
const BufferAllocation& allocation) {
|
||||||
|
CHECK(allocation.is_constant());
|
||||||
|
HloInstruction* const_instr = nullptr;
|
||||||
|
for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
|
||||||
|
const LogicalBuffer* buffer = buffer_offset_pair.first;
|
||||||
|
// BufferAssignment may have assigned non-constant instructions to this
|
||||||
|
// allocation too so we can't CHECK this condition. E.g. for
|
||||||
|
//
|
||||||
|
// while(init = constant, body = identity, cond = ...)
|
||||||
|
//
|
||||||
|
// the LogicalBuffer for the kWhile instruction will have the same
|
||||||
|
// BufferAllocation as the LogicalBuffer for the (init) constant.
|
||||||
|
if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
|
||||||
|
CHECK_EQ(const_instr, nullptr)
|
||||||
|
<< const_instr->ToString() << " " << buffer->ToString();
|
||||||
|
const_instr = buffer->instruction();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CHECK_NE(const_instr, nullptr);
|
||||||
|
return *const_instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
string ConstantBufferAllocationToGlobalName(
|
||||||
|
const BufferAllocation& allocation) {
|
||||||
|
string instr_name = InstrForConstantBufferAllocation(allocation).name();
|
||||||
|
for (char& c : instr_name) {
|
||||||
|
if (c == '.') {
|
||||||
|
c = '_';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tensorflow::strings::StrCat("buffer_for_", instr_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Literal& LiteralForConstantAllocation(
|
||||||
|
const BufferAllocation& allocation) {
|
||||||
|
return InstrForConstantBufferAllocation(allocation).literal();
|
||||||
|
}
|
||||||
|
} // namespace llvm_ir
|
||||||
|
} // namespace xla
|
@ -0,0 +1,34 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
|
||||||
|
#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/buffer_assignment.h"
|
||||||
|
|
||||||
|
namespace xla {
|
||||||
|
namespace llvm_ir {
|
||||||
|
// In XLA:GPU we map constant buffer allocations to globals in the generated
|
||||||
|
// LLVM IR. This function gives us the name of the global variable a constant
|
||||||
|
// buffer is mapped to. Not used on XLA:CPU.
|
||||||
|
string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
|
||||||
|
|
||||||
|
// Returns the Literal corresponding to `allocation`, which must be a constant
|
||||||
|
// allocation.
|
||||||
|
const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
|
||||||
|
} // namespace llvm_ir
|
||||||
|
} // namespace xla
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
|
@ -92,9 +92,10 @@ int main(int argc, char** argv) {
|
|||||||
// It's lame to hard-code the buffer assignments, but we need
|
// It's lame to hard-code the buffer assignments, but we need
|
||||||
// local_client_aot_test.cc to be able to easily invoke the function.
|
// local_client_aot_test.cc to be able to easily invoke the function.
|
||||||
CHECK_EQ(result->result_buffer_index(), 1);
|
CHECK_EQ(result->result_buffer_index(), 1);
|
||||||
CHECK_EQ(result->buffer_sizes().size(), 2);
|
CHECK_EQ(result->buffer_sizes().size(), 3);
|
||||||
CHECK_EQ(result->buffer_sizes()[0], -1); // param buffer
|
CHECK_EQ(result->buffer_sizes()[0], -1); // param buffer
|
||||||
CHECK_EQ(result->buffer_sizes()[1], sizeof(float)); // result buffer
|
CHECK_EQ(result->buffer_sizes()[1], sizeof(float)); // result buffer
|
||||||
|
CHECK_EQ(result->buffer_sizes()[2], -1); // const buffer
|
||||||
if (triple.isOSBinFormatELF()) {
|
if (triple.isOSBinFormatELF()) {
|
||||||
// Check the ELF magic.
|
// Check the ELF magic.
|
||||||
CHECK_EQ(result->object_file_data()[0], 0x7F);
|
CHECK_EQ(result->object_file_data()[0], 0x7F);
|
||||||
|
Loading…
Reference in New Issue
Block a user