Use constant buffer allocations for XLA:CPU

This is simpler than the corresponding change to XLA:GPU because on XLA:CPU all instructions are codegened so we can always embed a pointer to the constant global variable directly in the generated LLVM IR. PiperOrigin-RevId: 206363887
2018-07-27 13:24:46 -07:00 · 2018-07-27 13:24:46 -07:00 · 388d0d8601
commit 388d0d8601
parent 90fe37ab8d
16 changed files with 180 additions and 78 deletions
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@ -252,6 +252,7 @@ cc_library(
        "//tensorflow/compiler/xla/service:hlo_module_config",
        "//tensorflow/compiler/xla/service:name_uniquer",
        "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
        "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@ -562,7 +562,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
      BufferAssigner::Run(
          module.get(),
          xla::MakeUnique<SequentialHloOrdering>(module.get(), module_sequence),
-          BufferSizeBytesFunction(), memory_alignment));
+          BufferSizeBytesFunction(), memory_alignment,
          /*allow_input_output_aliasing=*/false,
          /*allocate_buffers_for_constants=*/true));
  // BufferAssignment::ToString() includes a header, so no need for us to
  // print one ourselves.
  XLA_VLOG_LINES(2, assignment->ToString());
@ -584,6 +586,8 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
                       std::move(computation_to_profile_idx),
                       &target_machine_features);
  TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
  for (auto embedded_computation :
       entry_computation->MakeEmbeddedComputationsList()) {
    if (embedded_computation->IsFusionComputation()) {
@ -747,7 +751,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
        BufferAssigner::Run(
            module,
            xla::MakeUnique<SequentialHloOrdering>(module, module_sequence),
-            BufferSizeBytesFunction(), memory_alignment));
+            BufferSizeBytesFunction(), memory_alignment,
            /*allow_input_output_aliasing=*/false,
            /*allocate_buffers_for_constants=*/true));
    // BufferAssignment::ToString() includes a header, so no need for us to
    // print one ourselves.
    XLA_VLOG_LINES(2, assignment->ToString());
@ -776,6 +782,9 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
                         std::move(instruction_to_profile_idx),
                         std::move(computation_to_profile_idx),
                         &target_machine_features);
    TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals());
    HloComputation* computation = module->entry_computation();
    for (auto embedded_computation :
         computation->MakeEmbeddedComputationsList()) {
@ -832,7 +841,8 @@ CpuCompiler::CompileAheadOfTime(std::vector<std::unique_ptr<HloModule>> modules,
    BufferSizes buffer_sizes;
    for (const BufferAllocation& allocation : assignment->Allocations()) {
      // Callers don't need to allocate temporary buffers for parameters.
-      if (allocation.is_entry_computation_parameter()) {
+      if (allocation.is_entry_computation_parameter() ||
          allocation.is_constant()) {
        buffer_sizes.push_back(-1);
        continue;
      }
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@ -88,6 +88,11 @@ Status CpuExecutable::AllocateBuffers(
      continue;
    }
    if (allocation.is_constant()) {
      VLOG(3) << "allocation #" << i << " is a constant";
      continue;
    }
    if (allocation.is_thread_local()) {
      VLOG(3) << "buffer #" << i << " is thread-local";
      continue;
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@ -51,6 +51,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
@ -175,23 +176,34 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) {
      result_global, IrShapeType(literal.shape())->getPointerTo());
 }
 Status IrEmitter::EmitConstantGlobals() {
  for (const BufferAllocation& allocation : assignment_.Allocations()) {
    if (!allocation.is_constant()) {
      continue;
    }
    const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
    llvm::Constant* global_for_const;
    auto it = emitted_literals_.find(&literal);
    if (it != emitted_literals_.end()) {
      global_for_const = it->second;
    } else {
      global_for_const = EmitGlobalForLiteral(literal);
      InsertOrDie(&emitted_literals_, &literal, global_for_const);
    }
    InsertOrDie(&constant_buffer_to_global_, allocation.index(),
                global_for_const);
  }
  return Status::OK();
 }
 Status IrEmitter::HandleConstant(HloInstruction* constant) {
  VLOG(2) << "HandleConstant: " << constant->ToString();
-  const Literal& literal = constant->literal();
+  // IrEmitter::EmitConstantGlobals has already taken care of emitting the body
-  llvm::Constant* global_for_const;
+  // of the constant.
-
+  return EmitTargetAddressForOp(constant);
  auto it = emitted_literals_.find(&literal);
  if (it != emitted_literals_.end()) {
    global_for_const = it->second;
  } else {
    global_for_const = EmitGlobalForLiteral(literal);
    emitted_literals_[&literal] = global_for_const;
  }
  emitted_value_[constant] = global_for_const;
  VLOG(2) << "  emitted value: " << llvm_ir::DumpToString(*global_for_const);
  VLOG(2) << "  its type: "
          << llvm_ir::DumpToString(*global_for_const->getType());
  return Status::OK();
 }
 Status IrEmitter::HandleCopy(HloInstruction* copy) {
@ -2712,6 +2724,10 @@ llvm::Value* IrEmitter::EmitTempBufferPointer(
    return b_.CreateBitCast(tempbuf_address, element_type->getPointerTo());
  }
  if (allocation.is_constant()) {
    return FindOrDie(constant_buffer_to_global_, allocation.index());
  }
  llvm::Value* tempbuf_address_ptr = llvm_ir::EmitBufferIndexingGEP(
      GetTempBuffersArgument(), slice.index(), &b_);
  llvm::LoadInst* tempbuf_address_base = b_.CreateLoad(tempbuf_address_ptr);
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@ -105,6 +105,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
      PrimitiveType return_type, HloComputation* computation,
      const std::vector<llvm::Value*>& arguments, tensorflow::StringPiece name);
  // Emit an LLVM global variable for every constant buffer allocation.
  Status EmitConstantGlobals();
 protected:
  //
  // The following methods implement the DfsHloVisitor interface.
@ -560,6 +563,9 @@ class IrEmitter : public DfsHloVisitorWithDefault {
                           LiteralPtrHashFunctor, LiteralPtrEqualityFunctor>
      emitted_literals_;
  tensorflow::gtl::FlatMap<BufferAllocation::Index, llvm::Constant*>
      constant_buffer_to_global_;
  TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter);
 };
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@ -120,6 +120,7 @@ cc_library(
        "//tensorflow/compiler/xla/service:buffer_assignment",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis",
        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
        "//tensorflow/compiler/xla/service/llvm_ir:llvm_util",
        "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops",
@ -165,6 +166,7 @@ cc_library(
        "//tensorflow/compiler/xla/service:elemental_ir_emitter",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/compiler/xla/service:name_uniquer",
        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
        "//tensorflow/compiler/xla/service/llvm_ir:dynamic_update_slice_util",
        "//tensorflow/compiler/xla/service/llvm_ir:fused_ir_emitter",
        "//tensorflow/compiler/xla/service/llvm_ir:ir_array",
@ -323,9 +325,9 @@ cc_library(
        "//tensorflow/compiler/xla/service:hlo_execution_profile",
        "//tensorflow/compiler/xla/service:logical_buffer",
        "//tensorflow/compiler/xla/service:shaped_buffer",
        "//tensorflow/compiler/xla/service:stream_pool",
        "//tensorflow/compiler/xla/service:transfer_manager",
        "//tensorflow/compiler/xla/service:tuple_points_to_analysis",
        "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util",
        "//tensorflow/core:lib",
        "//tensorflow/core:lib_internal",
        "//tensorflow/core:stream_executor_no_cuda",
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
@ -173,45 +173,6 @@ void BufferAllocations::SetBuffer(BufferAllocation::Index buffer_index,
  buffers_[buffer_index] = buffer;
 }
 static const HloInstruction& InstrForConstantBufferAllocation(
    const BufferAllocation& allocation) {
  CHECK(allocation.is_constant());
  HloInstruction* const_instr = nullptr;
  for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
    const LogicalBuffer* buffer = buffer_offset_pair.first;
    // BufferAssignment may have assigned non-constant instructions to this
    // allocation too so we can't CHECK this condition.  E.g. for
    //
    //   while(init = constant, body = identity, cond = ...)
    //
    // the LogicalBuffer for the kWhile instruction will have the same
    // BufferAllocation as the LogicalBuffer for the (init) constant.
    if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
      CHECK_EQ(const_instr, nullptr)
          << const_instr->ToString() << " " << buffer->ToString();
      const_instr = buffer->instruction();
    }
  }
  CHECK_NE(const_instr, nullptr);
  return *const_instr;
 }
 string ConstantBufferAllocationToGlobalName(
    const BufferAllocation& allocation) {
  string instr_name = InstrForConstantBufferAllocation(allocation).name();
  for (char& c : instr_name) {
    if (c == '.') {
      c = '_';
    }
  }
  return tensorflow::strings::StrCat("buffer_for_", instr_name);
 }
 const Literal& LiteralForConstantAllocation(
    const BufferAllocation& allocation) {
  return InstrForConstantBufferAllocation(allocation).literal();
 }
 bool ShouldEmitLiteralInLlvmIr(const Literal& literal) {
  // LLVM can sometimes do interesting optimizations using scalar constants.
  return ShapeUtil::IsScalar(literal.shape());
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
@ -107,15 +107,6 @@ class BufferAllocations {
  bool torn_down_ = false;
 };
 // In XLA:GPU we map constant buffer allocations to globals in the generated
 // LLVM IR.  This function gives us the name of the global variable a constant
 // buffer is mapped to.
 string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
 // Return the Literal corresponding to `allocation`, which must be a constant
 // allocation.
 const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
 // LLVM and PTXAS don't deal well with large constants, so we only emit very
 // small constants directly in LLVM IR.  Larger constants are emitted with zero
 // initializers in LLVM IR and are later overwritten when the PTX/CUBIN is
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
 #include "tensorflow/compiler/xla/service/transfer_manager.h"
@ -206,13 +207,15 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
      TF_ASSIGN_OR_RETURN(
          se::DeviceMemoryBase global,
          executor->GetUntypedSymbol(
-              ConstantBufferAllocationToGlobalName(allocation), module_handle));
+              llvm_ir::ConstantBufferAllocationToGlobalName(allocation),
              module_handle));
      VLOG(3) << "Resolved global "
-              << ConstantBufferAllocationToGlobalName(allocation) << " to "
+              << llvm_ir::ConstantBufferAllocationToGlobalName(allocation)
-              << global.opaque();
+              << " to " << global.opaque();
      InsertOrDie(&globals, i, global);
-      const Literal& literal = LiteralForConstantAllocation(allocation);
+      const Literal& literal =
          llvm_ir::LiteralForConstantAllocation(allocation);
      CHECK(ShapeUtil::IsArray(literal.shape()));
      if (!ShouldEmitLiteralInLlvmIr(literal)) {
        VLOG(3) << "H2D memcpy for constant with shape "
--- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
+++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@ -114,7 +115,8 @@ void HloToIrBindings::EmitBasePointersForHlos(
          } else if (slice.allocation()->is_constant()) {
            llvm::Value* global_for_constant =
                module_->getGlobalVariable(llvm_ir::AsStringRef(
-                    ConstantBufferAllocationToGlobalName(*slice.allocation())));
+                    llvm_ir::ConstantBufferAllocationToGlobalName(
                        *slice.allocation())));
            BindHloToIrValue(*non_io_hlo, global_for_constant);
          } else {
            const int64 offset = slice.offset();
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@ -60,6 +60,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h"
 #include "tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h"
@ -2411,8 +2412,8 @@ std::unique_ptr<KernelThunk> IrEmitterUnnested::BuildKernelThunk(
    llvm::Value* loc;
    if (slice.allocation()->is_constant()) {
      loc = ir_emitter_context_->llvm_module()->getGlobalVariable(
-          llvm_ir::AsStringRef(
+          llvm_ir::AsStringRef(llvm_ir::ConstantBufferAllocationToGlobalName(
-              ConstantBufferAllocationToGlobalName(*slice.allocation())));
+              *slice.allocation())));
      CHECK_NE(loc, nullptr);
    } else {
      loc = b_.CreateInBoundsGEP(kernel_args.at(slice.allocation()),
@ -3428,7 +3429,7 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
      continue;
    }
-    const Literal& literal = LiteralForConstantAllocation(allocation);
+    const Literal& literal = llvm_ir::LiteralForConstantAllocation(allocation);
    const bool should_emit_initializer = ShouldEmitLiteralInLlvmIr(literal);
    llvm::ArrayType* global_type =
        llvm::ArrayType::get(b_.getInt8Ty(), allocation.size());
@ -3453,7 +3454,8 @@ Status IrEmitterUnnested::EmitConstantGlobals() {
        global_type, /*isConstant=*/should_emit_initializer,
        llvm::GlobalValue::ExternalLinkage,
        /*Initializer=*/initializer,
-        llvm_ir::AsStringRef(ConstantBufferAllocationToGlobalName(allocation)));
+        llvm_ir::AsStringRef(
            llvm_ir::ConstantBufferAllocationToGlobalName(allocation)));
    global_for_const->setAlignment(kConstantBufferAlignBytes);
    ir_emitter_context_->llvm_module()->getGlobalList().push_back(
        global_for_const);
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@ -224,6 +224,15 @@ cc_library(
    ],
 )
 cc_library(
    name = "buffer_assignment_util",
    srcs = ["buffer_assignment_util.cc"],
    hdrs = ["buffer_assignment_util.h"],
    deps = [
        "//tensorflow/compiler/xla/service:buffer_assignment",
    ],
 )
 cc_library(
    name = "math_ops",
    srcs = ["math_ops.cc"],
--- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis_test.cc
@ -58,7 +58,7 @@ ENTRY while3 {
  CompileAndVerifyIr(hlo_string, R"(
 ; CHECK-LABEL: @body(i8* align 4 dereferenceable(4) %retval
 ; CHECK: %[[add_result:.*]] = fadd fast float %[[fadd_lhs:.*]], %[[fadd_rhs:.*]]
-; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:.*]]
+; CHECK: store float %[[add_result]], float* %[[store_dest:.*]], !alias.scope ![[alias_scope_md_for_store:[0-9]+]]
 ;
 ; CHECK-LABEL: @condition(i8* align 1 dereferenceable(1) %fusion, i8* noalias %run_options, i8** noalias %params
 ; CHECK: %[[cond_state_buf_ptr:.*]] = getelementptr inbounds i8*, i8** %params, i64 0
--- a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc
@ -0,0 +1,59 @@
 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h"
 namespace xla {
 namespace llvm_ir {
 static const HloInstruction& InstrForConstantBufferAllocation(
    const BufferAllocation& allocation) {
  CHECK(allocation.is_constant());
  HloInstruction* const_instr = nullptr;
  for (const auto& buffer_offset_pair : allocation.assigned_buffers()) {
    const LogicalBuffer* buffer = buffer_offset_pair.first;
    // BufferAssignment may have assigned non-constant instructions to this
    // allocation too so we can't CHECK this condition.  E.g. for
    //
    //   while(init = constant, body = identity, cond = ...)
    //
    // the LogicalBuffer for the kWhile instruction will have the same
    // BufferAllocation as the LogicalBuffer for the (init) constant.
    if (buffer->instruction()->opcode() == HloOpcode::kConstant) {
      CHECK_EQ(const_instr, nullptr)
          << const_instr->ToString() << " " << buffer->ToString();
      const_instr = buffer->instruction();
    }
  }
  CHECK_NE(const_instr, nullptr);
  return *const_instr;
 }
 string ConstantBufferAllocationToGlobalName(
    const BufferAllocation& allocation) {
  string instr_name = InstrForConstantBufferAllocation(allocation).name();
  for (char& c : instr_name) {
    if (c == '.') {
      c = '_';
    }
  }
  return tensorflow::strings::StrCat("buffer_for_", instr_name);
 }
 const Literal& LiteralForConstantAllocation(
    const BufferAllocation& allocation) {
  return InstrForConstantBufferAllocation(allocation).literal();
 }
 }  // namespace llvm_ir
 }  // namespace xla
--- a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h
@ -0,0 +1,34 @@
 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 namespace xla {
 namespace llvm_ir {
 // In XLA:GPU we map constant buffer allocations to globals in the generated
 // LLVM IR.  This function gives us the name of the global variable a constant
 // buffer is mapped to.  Not used on XLA:CPU.
 string ConstantBufferAllocationToGlobalName(const BufferAllocation& allocation);
 // Returns the Literal corresponding to `allocation`, which must be a constant
 // allocation.
 const Literal& LiteralForConstantAllocation(const BufferAllocation& allocation);
 }  // namespace llvm_ir
 }  // namespace xla
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_BUFFER_ASSIGNMENT_UTIL_H_
--- a/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
+++ b/tensorflow/compiler/xla/tests/local_client_aot_test_helper.cc
@ -92,9 +92,10 @@ int main(int argc, char** argv) {
  // It's lame to hard-code the buffer assignments, but we need
  // local_client_aot_test.cc to be able to easily invoke the function.
  CHECK_EQ(result->result_buffer_index(), 1);
-  CHECK_EQ(result->buffer_sizes().size(), 2);
+  CHECK_EQ(result->buffer_sizes().size(), 3);
  CHECK_EQ(result->buffer_sizes()[0], -1);             // param buffer
  CHECK_EQ(result->buffer_sizes()[1], sizeof(float));  // result buffer
  CHECK_EQ(result->buffer_sizes()[2], -1);             // const buffer
  if (triple.isOSBinFormatELF()) {
    // Check the ELF magic.
    CHECK_EQ(result->object_file_data()[0], 0x7F);