From 388d0d860110a19a9d133fe4de85f8f6fa060cde Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 27 Jul 2018 13:24:46 -0700 Subject: [PATCH] Use constant buffer allocations for XLA:CPU This is simpler than the corresponding change to XLA:GPU because on XLA:CPU all instructions are codegened so we can always embed a pointer to the constant global variable directly in the generated LLVM IR. PiperOrigin-RevId: 206363887 --- tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 16 ++++- .../xla/service/cpu/cpu_executable.cc | 5 ++ .../compiler/xla/service/cpu/ir_emitter.cc | 46 ++++++++++----- .../compiler/xla/service/cpu/ir_emitter.h | 6 ++ tensorflow/compiler/xla/service/gpu/BUILD | 4 +- .../xla/service/gpu/buffer_allocations.cc | 39 ------------ .../xla/service/gpu/buffer_allocations.h | 9 --- .../xla/service/gpu/gpu_executable.cc | 11 ++-- .../xla/service/gpu/hlo_to_ir_bindings.cc | 4 +- .../xla/service/gpu/ir_emitter_unnested.cc | 10 ++-- tensorflow/compiler/xla/service/llvm_ir/BUILD | 9 +++ .../service/llvm_ir/alias_analysis_test.cc | 2 +- .../service/llvm_ir/buffer_assignment_util.cc | 59 +++++++++++++++++++ .../service/llvm_ir/buffer_assignment_util.h | 34 +++++++++++ .../xla/tests/local_client_aot_test_helper.cc | 3 +- 16 files changed, 180 insertions(+), 78 deletions(-) create mode 100644 tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc create mode 100644 tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 6c997a068dd..504b61d134a 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -252,6 +252,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service:name_uniquer", "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis", + "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util", "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 29fa29d33ad..b49ea898962 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -562,7 +562,9 @@ StatusOr> CpuCompiler::RunBackend( BufferAssigner::Run( module.get(), xla::MakeUnique(module.get(), module_sequence), - BufferSizeBytesFunction(), memory_alignment)); + BufferSizeBytesFunction(), memory_alignment, + /*allow_input_output_aliasing=*/false, + /*allocate_buffers_for_constants=*/true)); // BufferAssignment::ToString() includes a header, so no need for us to // print one ourselves. XLA_VLOG_LINES(2, assignment->ToString()); @@ -584,6 +586,8 @@ StatusOr> CpuCompiler::RunBackend( std::move(computation_to_profile_idx), &target_machine_features); + TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); + for (auto embedded_computation : entry_computation->MakeEmbeddedComputationsList()) { if (embedded_computation->IsFusionComputation()) { @@ -747,7 +751,9 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, BufferAssigner::Run( module, xla::MakeUnique(module, module_sequence), - BufferSizeBytesFunction(), memory_alignment)); + BufferSizeBytesFunction(), memory_alignment, + /*allow_input_output_aliasing=*/false, + /*allocate_buffers_for_constants=*/true)); // BufferAssignment::ToString() includes a header, so no need for us to // print one ourselves. XLA_VLOG_LINES(2, assignment->ToString()); @@ -776,6 +782,9 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, std::move(instruction_to_profile_idx), std::move(computation_to_profile_idx), &target_machine_features); + + TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); + HloComputation* computation = module->entry_computation(); for (auto embedded_computation : computation->MakeEmbeddedComputationsList()) { @@ -832,7 +841,8 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, BufferSizes buffer_sizes; for (const BufferAllocation& allocation : assignment->Allocations()) { // Callers don't need to allocate temporary buffers for parameters. - if (allocation.is_entry_computation_parameter()) { + if (allocation.is_entry_computation_parameter() || + allocation.is_constant()) { buffer_sizes.push_back(-1); continue; } diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 1093559892d..81e17a5cd4d 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -88,6 +88,11 @@ Status CpuExecutable::AllocateBuffers( continue; } + if (allocation.is_constant()) { + VLOG(3) << "allocation #" << i << " is a constant"; + continue; + } + if (allocation.is_thread_local()) { VLOG(3) << "buffer #" << i << " is thread-local"; continue; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 9d9d3e04a93..a6d8551841d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -51,6 +51,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" @@ -175,23 +176,34 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) { result_global, IrShapeType(literal.shape())->getPointerTo()); } +Status IrEmitter::EmitConstantGlobals() { + for (const BufferAllocation& allocation : assignment_.Allocations()) { + if (!allocation.is_constant()) { + continue; + } + + const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation); + llvm::Constant* global_for_const; + auto it = emitted_literals_.find(&literal); + if (it != emitted_literals_.end()) { + global_for_const = it->second; + } else { + global_for_const = EmitGlobalForLiteral(literal); + InsertOrDie(&emitted_literals_, &literal, global_for_const); + } + + InsertOrDie(&constant_buffer_to_global_, allocation.index(), + global_for_const); + } + + return Status::OK(); +} + Status IrEmitter::HandleConstant(HloInstruction* constant) { VLOG(2) << "HandleConstant: " << constant->ToString(); - const Literal& literal = constant->literal(); - llvm::Constant* global_for_const; - - auto it = emitted_literals_.find(&literal); - if (it != emitted_literals_.end()) { - global_for_const = it->second; - } else { - global_for_const = EmitGlobalForLiteral(literal); - emitted_literals_[&literal] = global_for_const; - } - emitted_value_[constant] = global_for_const; - VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const); - VLOG(2) << " its type: " - << llvm_ir::DumpToString(*global_for_const->getType()); - return Status::OK(); + // IrEmitter::EmitConstantGlobals has already taken care of emitting the body + // of the constant. + return EmitTargetAddressForOp(constant); } Status IrEmitter::HandleCopy(HloInstruction* copy) { @@ -2712,6 +2724,10 @@ llvm::Value* IrEmitter::EmitTempBufferPointer( return b_.CreateBitCast(tempbuf_address, element_type->getPointerTo()); } + if (allocation.is_constant()) { + return FindOrDie(constant_buffer_to_global_, allocation.index()); + } + llvm::Value* tempbuf_address_ptr = llvm_ir::EmitBufferIndexingGEP( GetTempBuffersArgument(), slice.index(), &b_); llvm::LoadInst* tempbuf_address_base = b_.CreateLoad(tempbuf_address_ptr); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index cf7fa05b207..03bbb2afb58 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -105,6 +105,9 @@ class IrEmitter : public DfsHloVisitorWithDefault { PrimitiveType return_type, HloComputation* computation, const std::vector& arguments, tensorflow::StringPiece name); + // Emit an LLVM global variable for every constant buffer allocation. + Status EmitConstantGlobals(); + protected: // // The following methods implement the DfsHloVisitor interface. @@ -560,6 +563,9 @@ class IrEmitter : public DfsHloVisitorWithDefault { LiteralPtrHashFunctor, LiteralPtrEqualityFunctor> emitted_literals_; + tensorflow::gtl::FlatMap + constant_buffer_to_global_; + TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter); }; diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 885365105f9..a73a341fdb1 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -120,6 +120,7 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis", + "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", @@ -165,6 +166,7 @@ cc_library( "//tensorflow/compiler/xla/service:elemental_ir_emitter", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:name_uniquer", + "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util", "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", @@ -323,9 +325,9 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_execution_profile", "//tensorflow/compiler/xla/service:logical_buffer", "//tensorflow/compiler/xla/service:shaped_buffer", - "//tensorflow/compiler/xla/service:stream_pool", "//tensorflow/compiler/xla/service:transfer_manager", "//tensorflow/compiler/xla/service:tuple_points_to_analysis", + "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor_no_cuda", diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc index 20d42857663..537295292b6 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc +++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc @@ -173,45 +173,6 @@ void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index, buffers_[buffer_index] = buffer; } -static const HloInstruction& InstrForConstantBufferAllocation( - const BufferAllocation& allocation) { - CHECK(allocation.is_constant()); - HloInstruction* const_instr = nullptr; - for (const auto& buffer_offset_pair : allocation.assigned_buffers()) { - const LogicalBuffer* buffer = buffer_offset_pair.first; - // BufferAssignment may have assigned non-constant instructions to this - // allocation too so we can't CHECK this condition. E.g. for - // - // while(init = constant, body = identity, cond = ...) - // - // the LogicalBuffer for the kWhile instruction will have the same - // BufferAllocation as the LogicalBuffer for the (init) constant. - if (buffer->instruction()->opcode() == HloOpcode::kConstant) { - CHECK_EQ(const_instr, nullptr) - << const_instr->ToString() << " " << buffer->ToString(); - const_instr = buffer->instruction(); - } - } - CHECK_NE(const_instr, nullptr); - return *const_instr; -} - -string ConstantBufferAllocationToGlobalName( - const BufferAllocation& allocation) { - string instr_name = InstrForConstantBufferAllocation(allocation).name(); - for (char& c : instr_name) { - if (c == '.') { - c = '_'; - } - } - return tensorflow::strings::StrCat("buffer_for_", instr_name); -} - -const Literal& LiteralForConstantAllocation( - const BufferAllocation& allocation) { - return InstrForConstantBufferAllocation(allocation).literal(); -} - bool ShouldEmitLiteralInLlvmIr(const Literal& literal) { // LLVM can sometimes do interesting optimizations using scalar constants. return ShapeUtil::IsScalar(literal.shape()); diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h index f21861ed81c..f13eab0dd78 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h +++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h @@ -107,15 +107,6 @@ class BufferAllocations { bool torn_down_ = false; }; -// In XLA:GPU we map constant buffer allocations to globals in the generated -// LLVM IR. This function gives us the name of the global variable a constant -// buffer is mapped to. -string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation); - -// Return the Literal corresponding to `allocation`, which must be a constant -// allocation. -const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation); - // LLVM and PTXAS don't deal well with large constants, so we only emit very // small constants directly in LLVM IR. Larger constants are emitted with zero // initializers in LLVM IR and are later overwritten when the PTX/CUBIN is diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 0179b43240d..bb71c79fd76 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" @@ -206,13 +207,15 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) { TF_ASSIGN_OR_RETURN( se::DeviceMemoryBase global, executor->GetUntypedSymbol( - ConstantBufferAllocationToGlobalName(allocation), module_handle)); + llvm_ir::ConstantBufferAllocationToGlobalName(allocation), + module_handle)); VLOG(3) << "Resolved global " - << ConstantBufferAllocationToGlobalName(allocation) << " to " - << global.opaque(); + << llvm_ir::ConstantBufferAllocationToGlobalName(allocation) + << " to " << global.opaque(); InsertOrDie(&globals, i, global); - const Literal& literal = LiteralForConstantAllocation(allocation); + const Literal& literal = + llvm_ir::LiteralForConstantAllocation(allocation); CHECK(ShapeUtil::IsArray(literal.shape())); if (!ShouldEmitLiteralInLlvmIr(literal)) { VLOG(3) << "H2D memcpy for constant with shape " diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index c02a95d1935..8c11cd05419 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -114,7 +115,8 @@ void HloToIrBindings::EmitBasePointersForHlos( } else if (slice.allocation()->is_constant()) { llvm::Value* global_for_constant = module_->getGlobalVariable(llvm_ir::AsStringRef( - ConstantBufferAllocationToGlobalName(*slice.allocation()))); + llvm_ir::ConstantBufferAllocationToGlobalName( + *slice.allocation()))); BindHloToIrValue(*non_io_hlo, global_for_constant); } else { const int64 offset = slice.offset(); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index fb9540b7ef3..3a5394dac65 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -60,6 +60,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h" @@ -2411,8 +2412,8 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( llvm::Value* loc; if (slice.allocation()->is_constant()) { loc = ir_emitter_context_->llvm_module()->getGlobalVariable( - llvm_ir::AsStringRef( - ConstantBufferAllocationToGlobalName(*slice.allocation()))); + llvm_ir::AsStringRef(llvm_ir::ConstantBufferAllocationToGlobalName( + *slice.allocation()))); CHECK_NE(loc, nullptr); } else { loc = b_.CreateInBoundsGEP(kernel_args.at(slice.allocation()), @@ -3428,7 +3429,7 @@ Status IrEmitterUnnested::EmitConstantGlobals() { continue; } - const Literal& literal = LiteralForConstantAllocation(allocation); + const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation); const bool should_emit_initializer = ShouldEmitLiteralInLlvmIr(literal); llvm::ArrayType* global_type = llvm::ArrayType::get(b_.getInt8Ty(), allocation.size()); @@ -3453,7 +3454,8 @@ Status IrEmitterUnnested::EmitConstantGlobals() { global_type, /*isConstant=*/should_emit_initializer, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/initializer, - llvm_ir::AsStringRef(ConstantBufferAllocationToGlobalName(allocation))); + llvm_ir::AsStringRef( + llvm_ir::ConstantBufferAllocationToGlobalName(allocation))); global_for_const->setAlignment(kConstantBufferAlignBytes); ir_emitter_context_->llvm_module()->getGlobalList().push_back( global_for_const); diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 309a186e589..cdd3daf73b8 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -224,6 +224,15 @@ cc_library( ], ) +cc_library( + name = "buffer_assignment_util", + srcs = ["buffer_assignment_util.cc"], + hdrs = ["buffer_assignment_util.h"], + deps = [ + "//tensorflow/compiler/xla/service:buffer_assignment", + ], +) + cc_library( name = "math_ops", srcs = ["math_ops.cc"], diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc index 2552ff4a6a0..941d9406846 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc @@ -58,7 +58,7 @@ ENTRY while3 { CompileAndVerifyIr(hlo_string, R"( ; CHECK-LABEL: @body(i8* align 4 dereferenceable(4) %retval ; CHECK: %[[add_result:.*]] = fadd fast float %[[fadd_lhs:.*]], %[[fadd_rhs:.*]] -; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:.*]] +; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:[0-9]+]] ; ; CHECK-LABEL: @condition(i8* align 1 dereferenceable(1) %fusion, i8* noalias %run_options, i8** noalias %params ; CHECK: %[[cond_state_buf_ptr:.*]] = getelementptr inbounds i8*, i8** %params, i64 0 diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc new file mode 100644 index 00000000000..4eb5d9fb475 --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc @@ -0,0 +1,59 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h" + +namespace xla { +namespace llvm_ir { +static const HloInstruction& InstrForConstantBufferAllocation( + const BufferAllocation& allocation) { + CHECK(allocation.is_constant()); + HloInstruction* const_instr = nullptr; + for (const auto& buffer_offset_pair : allocation.assigned_buffers()) { + const LogicalBuffer* buffer = buffer_offset_pair.first; + // BufferAssignment may have assigned non-constant instructions to this + // allocation too so we can't CHECK this condition. E.g. for + // + // while(init = constant, body = identity, cond = ...) + // + // the LogicalBuffer for the kWhile instruction will have the same + // BufferAllocation as the LogicalBuffer for the (init) constant. + if (buffer->instruction()->opcode() == HloOpcode::kConstant) { + CHECK_EQ(const_instr, nullptr) + << const_instr->ToString() << " " << buffer->ToString(); + const_instr = buffer->instruction(); + } + } + CHECK_NE(const_instr, nullptr); + return *const_instr; +} + +string ConstantBufferAllocationToGlobalName( + const BufferAllocation& allocation) { + string instr_name = InstrForConstantBufferAllocation(allocation).name(); + for (char& c : instr_name) { + if (c == '.') { + c = '_'; + } + } + return tensorflow::strings::StrCat("buffer_for_", instr_name); +} + +const Literal& LiteralForConstantAllocation( + const BufferAllocation& allocation) { + return InstrForConstantBufferAllocation(allocation).literal(); +} +} // namespace llvm_ir +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h new file mode 100644 index 00000000000..bfb6eecb87f --- /dev/null +++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_ + +#include "tensorflow/compiler/xla/service/buffer_assignment.h" + +namespace xla { +namespace llvm_ir { +// In XLA:GPU we map constant buffer allocations to globals in the generated +// LLVM IR. This function gives us the name of the global variable a constant +// buffer is mapped to. Not used on XLA:CPU. +string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation); + +// Returns the Literal corresponding to `allocation`, which must be a constant +// allocation. +const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation); +} // namespace llvm_ir +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_ diff --git a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc index 9e21c53569d..74494e60e88 100644 --- a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc +++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc @@ -92,9 +92,10 @@ int main(int argc, char** argv) { // It's lame to hard-code the buffer assignments, but we need // local_client_aot_test.cc to be able to easily invoke the function. CHECK_EQ(result->result_buffer_index(), 1); - CHECK_EQ(result->buffer_sizes().size(), 2); + CHECK_EQ(result->buffer_sizes().size(), 3); CHECK_EQ(result->buffer_sizes()[0], -1); // param buffer CHECK_EQ(result->buffer_sizes()[1], sizeof(float)); // result buffer + CHECK_EQ(result->buffer_sizes()[2], -1); // const buffer if (triple.isOSBinFormatELF()) { // Check the ELF magic. CHECK_EQ(result->object_file_data()[0], 0x7F);