Split TensorFlow type-related methods out of Allocator.

This CL reduces the dependencies of tensorflow::Allocator, by removing methods related to TensorFlow-specific element types (ResourceHandle, Variant) out of the Allocator base class and into a TypedAllocator utility class. In addition, this CL makes the following related changes to tidy up the Allocator interface and implementation: * Allocator::ShouldAllocateEmptyTensors() becomes Allocator::AllocatesOpaqueHandles(), and the result of this method is used (instead of virtual dispatch) to determine whether or not to run the complex type constructors/destructors when allocating/deleting a Tensor buffer. * The virtual Allocator::Run{String,Resource,Variant}{Ctor,Dtor}() methods are removed, and their dynamic logic moved to TypedAllocator. * AllocationAttributes is separated out into its own header. * A method forwarding bug in AllocatorWrapper is fixed. * A few unused methods are deleted and IWYU-related errors are fixed. PiperOrigin-RevId: 247210317
2019-05-08 07:38:22 -07:00 · 2019-05-08 07:38:22 -07:00 · 61c8837163
commit 61c8837163
parent 66b193faee
14 changed files with 241 additions and 182 deletions
--- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc
+++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc
@ -58,18 +58,13 @@ class XlaCompilationAllocator : public Allocator {
  // Make sure that even tensors with 0 elements have allocated
  // buffers, so they get ids to track.
-  bool ShouldAllocateEmptyTensors() const override { return true; }
+  //
-
+  // NOTE: It is the caller's responsibility to track whether an allocated
- private:
+  // object is a buffer or an opaque handle. In particular, when this allocator
-  // Don't run any constructors or destructors for complex objects,
+  // is used, the caller must not run any constructors or destructors for
-  // since there is no backing store for the tensor to run them
+  // complex objects, since there is no backing store for the tensor in which to
-  // on. strings are the only complex objects currently stored in
+  // place their outputs.
-  // Tensors. If others are added, this set of overrides must be
+  bool AllocatesOpaqueHandle() const override { return true; }
  // extended to include them.
  void RunStringCtor(string* p, size_t n) override {}
  void RunStringDtor(string* p, size_t n) override {}
  void RunResourceCtor(ResourceHandle* p, size_t n) override {}
  void RunResourceDtor(ResourceHandle* p, size_t n) override {}
 };
 XlaCompilationDevice::XlaCompilationDevice(const SessionOptions& options,
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -944,6 +944,7 @@ tf_cuda_library(
        "framework/tracking_allocator.h",
        "framework/type_index.h",
        "framework/type_traits.h",
        "framework/typed_allocator.h",
        "framework/types.h",
        "public/version.h",
        "util/activation_mode.h",
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc
@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/random/simple_philox.h"
@ -153,18 +154,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
  CheckStats(&a, 0, 0, 0, 0);
-  float* first_ptr = a.Allocate<float>(1024);
+  float* first_ptr = TypedAllocator::Allocate<float>(&a, 1024, {});
  a.DeallocateRaw(first_ptr);
  CheckStats(&a, 1, 0, 4096, 4096);
  for (int i = 0; i < 1024; ++i) {
    // Allocate several buffers of different sizes, and then clean them
    // all up.  We should be able to repeat this endlessly without
    // causing fragmentation and growth.
-    float* t1 = a.Allocate<float>(1024);
+    float* t1 = TypedAllocator::Allocate<float>(&a, 1024, {});
-    int64* t2 = a.Allocate<int64>(1048576);
+    int64* t2 = TypedAllocator::Allocate<int64>(&a, 1048576, {});
-    double* t3 = a.Allocate<double>(2048);
+    double* t3 = TypedAllocator::Allocate<double>(&a, 2048, {});
-    float* t4 = a.Allocate<float>(10485760);
+    float* t4 = TypedAllocator::Allocate<float>(&a, 10485760, {});
    a.DeallocateRaw(t1);
    a.DeallocateRaw(t2);
@ -179,7 +180,7 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) {
  // At the end, we should have coalesced all memory into one region
  // starting at the beginning, so validate that allocating a pointer
  // starts from this region.
-  float* first_ptr_after = a.Allocate<float>(1024);
+  float* first_ptr_after = TypedAllocator::Allocate<float>(&a, 1024, {});
  EXPECT_EQ(first_ptr, first_ptr_after);
  a.DeallocateRaw(first_ptr_after);
 }
@ -190,7 +191,7 @@ TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) {
      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
      platform_gpu_id, false /*use_unified_memory*/, {}, {});
  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
-  float* ptr = a.Allocate<float>(0);
+  float* ptr = TypedAllocator::Allocate<float>(&a, 0, {});
  EXPECT_EQ(nullptr, ptr);
 }
@ -209,7 +210,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) {
      GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
      platform_gpu_id, false /*use_unified_memory*/, {}, {});
  GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc");
-  float* t1 = a.Allocate<float>(1);
+  float* t1 = TypedAllocator::Allocate<float>(&a, 1, {});
  EXPECT_EQ(4, a.RequestedSize(t1));
  EXPECT_EQ(256, a.AllocatedSize(t1));
  a.DeallocateRaw(t1);
@ -223,8 +224,8 @@ TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) {
  // Configure a 1MiB byte limit
  GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc");
-  float* first_ptr = a.Allocate<float>(1 << 6);
+  float* first_ptr = TypedAllocator::Allocate<float>(&a, 1 << 6, {});
-  float* second_ptr = a.Allocate<float>(1 << 20);
+  float* second_ptr = TypedAllocator::Allocate<float>(&a, 1 << 20, {});
  EXPECT_NE(nullptr, first_ptr);
  EXPECT_EQ(nullptr, second_ptr);
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor.h"
@ -47,7 +48,8 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
  for (int s : {8}) {
    std::vector<int64> cpu_array(s);
    memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
-    int64* gpu_array = a.Allocate<int64>(cpu_array.size());
+    int64* gpu_array =
        TypedAllocator::Allocate<int64>(&a, cpu_array.size(), {});
    se::DeviceMemory<int64> gpu_array_ptr{se::DeviceMemoryBase{gpu_array}};
    ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0],
                                               s * sizeof(int64)));
@ -74,7 +76,8 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
          std::vector<int64> cpu_array(s);
          memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
-          int64* gpu_array = a.Allocate<int64>(cpu_array.size());
+          int64* gpu_array =
              TypedAllocator::Allocate<int64>(&a, cpu_array.size(), {});
          se::DeviceMemory<int64> gpu_array_ptr{
              se::DeviceMemoryBase{gpu_array}};
@ -110,7 +113,8 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
          std::vector<int64> cpu_array(s);
          memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
-          int64* gpu_array = a.Allocate<int64>(cpu_array.size());
+          int64* gpu_array =
              TypedAllocator::Allocate<int64>(&a, cpu_array.size(), {});
          se::DeviceMemory<int64> gpu_array_ptr{
              se::DeviceMemoryBase{gpu_array}};
@ -145,7 +149,7 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
  std::vector<float> cpu_array_result(1024);
  // Allocate 1024 floats
-  float* gpu_array = a.Allocate<float>(cpu_array.size());
+  float* gpu_array = TypedAllocator::Allocate<float>(&a, cpu_array.size(), {});
  se::DeviceMemory<float> gpu_array_ptr{se::DeviceMemoryBase{gpu_array}};
  ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr,
                                             cpu_array.size() * sizeof(float)));
@ -192,7 +196,7 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
  std::vector<float> cpu_array_result(1024);
  // Allocate 1024 floats
-  float* gpu_array = a.Allocate<float>(cpu_array.size());
+  float* gpu_array = TypedAllocator::Allocate<float>(&a, cpu_array.size(), {});
  se::DeviceMemory<float> gpu_array_ptr{se::DeviceMemoryBase{gpu_array}};
  ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr,
                                             cpu_array.size() * sizeof(float)));
@ -241,7 +245,7 @@ TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
                            platform_gpu_id),
      platform_gpu_id);
-  float* t1 = a.Allocate<float>(1);
+  float* t1 = TypedAllocator::Allocate<float>(&a, 1, {});
  EXPECT_EQ(4, a.RequestedSize(t1));
  EXPECT_EQ(256, a.AllocatedSize(t1));
  a.DeallocateRaw(t1);
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@ -151,7 +151,8 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
  if (total_bytes > 0) {
    tracing::ScopedAnnotation annotation("SetProtoFromGPU");
    alloc = GPUProcessState::singleton()->GetGpuHostAllocator(0);
-    buf = alloc->Allocate<char>(total_bytes);
+    buf = static_cast<char*>(
        alloc->AllocateRaw(Allocator::kAllocatorAlignment, total_bytes));
    if (LogMemory::IsEnabled()) {
      LogMemory::RecordRawAllocation("SetProtoFromGPU",
                                     LogMemory::PROTO_BUFFER_STEP_ID,
@ -178,7 +179,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
                                             LogMemory::PROTO_BUFFER_STEP_ID,
                                             buf, alloc, false);
          }
-          alloc->Deallocate<char>(buf, total_bytes);
+          alloc->DeallocateRaw(buf);
        }
        done(Status::OK());
      });
--- a/tensorflow/core/common_runtime/scoped_allocator.h
+++ b/tensorflow/core/common_runtime/scoped_allocator.h
@ -106,7 +106,6 @@ class ScopedAllocatorInstance : public Allocator {
  }
  void DeallocateRaw(void* p) LOCKS_EXCLUDED(mu_) override;
  bool TracksAllocationSizes() const override { return false; }
  bool ShouldAllocateEmptyTensors() const override { return false; }
  size_t RequestedSize(const void* ptr) const override { return 0; }
  size_t AllocatedSize(const void* ptr) const override { return 0; }
  int64 AllocationId(const void* ptr) const override { return 0; }
--- a/tensorflow/core/distributed_runtime/rpc/grpc_response_cache.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_response_cache.h
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/flatmap.h"
 #include "tensorflow/core/platform/mutex.h"
 // gRPC response caching.  Most WorkerService methods cannot be retried directly
 // as they will fail or deadlock.  To enable retrying, we can instead cache
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@ -18,7 +18,6 @@ limitations under the License.
 #include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/tracking_allocator.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/mutex.h"
@ -41,22 +40,6 @@ constexpr size_t Allocator::kAllocatorAlignment;
 Allocator::~Allocator() {}
 void RunResourceCtor(ResourceHandle* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) new (p) ResourceHandle();
 }
 void RunResourceDtor(ResourceHandle* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle();
 }
 void Allocator::RunVariantCtor(Variant* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) new (p) Variant();
 }
 void Allocator::RunVariantDtor(Variant* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) p->~Variant();
 }
 // If true, cpu allocator collects more stats.
 static bool cpu_allocator_collect_stats = false;
 // If true, cpu allocator collects full stats.
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@ -18,23 +18,20 @@ limitations under the License.
 #include <stdlib.h>
 #include <functional>
 #include <limits>
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/numa.h"
 #include "tensorflow/core/platform/types.h"
 namespace tensorflow {
 class Variant;
 // Attributes for a single allocation call. Different calls to the same
 // allocator could potentially have different allocation attributes.
 struct AllocationAttributes {
@ -129,51 +126,25 @@ class Allocator {
  // REQUIRES: "ptr" was previously returned by a call to AllocateRaw
  virtual void DeallocateRaw(void* ptr) = 0;
  // Convenience functions to do typed allocation.  C++ constructors
  // and destructors are invoked for complex types if necessary,
  // depending on the concrete Allocator implementation. May return
  // NULL if the tensor has too many elements to represent in a single
  // allocation.
  template <typename T>
  T* Allocate(size_t num_elements) {
    return Allocate<T>(num_elements, AllocationAttributes());
  }
  template <typename T>
  T* Allocate(size_t num_elements,
              const AllocationAttributes& allocation_attr) {
    // TODO(jeff): Do we need to allow clients to pass in alignment
    // requirements?
    if (num_elements > (std::numeric_limits<size_t>::max() / sizeof(T))) {
      return NULL;
    }
    void* p = AllocateRaw(kAllocatorAlignment, sizeof(T) * num_elements,
                          allocation_attr);
    T* typed_p = reinterpret_cast<T*>(p);
    if (typed_p) RunCtor<T>(typed_p, num_elements);
    return typed_p;
  }
  template <typename T>
  void Deallocate(T* ptr, size_t num_elements) {
    if (ptr) {
      RunDtor<T>(ptr, num_elements);
      DeallocateRaw(ptr);
    }
  }
  // Returns true if this allocator tracks the sizes of allocations.
  // RequestedSize and AllocatedSize must be overridden if
  // TracksAllocationSizes is overridden to return true.
  virtual bool TracksAllocationSizes() const { return false; }
-  // Returns true if this allocator requires tensors with 0 elements
+  // Returns true if this allocator allocates an opaque handle rather than the
-  // to allocate buffers. This is false for most allocators, but may
+  // requested number of bytes.
-  // be used by special-case allocators that want to track tensor
+  //
-  // usage.
+  // This method returns false for most allocators, but may be used by
-  virtual bool ShouldAllocateEmptyTensors() const { return false; }
+  // special-case allocators that track tensor usage. If this method returns
  // true, AllocateRaw() should be invoked for all values of `num_bytes`,
  // including 0.
  //
  // NOTE: It is the caller's responsibility to track whether an allocated
  // object is a buffer or an opaque handle. In particular, when this method
  // returns `true`, users of this allocator must not run any constructors or
  // destructors for complex objects, since there is no backing store for the
  // tensor in which to place their outputs.
  virtual bool AllocatesOpaqueHandle() const { return false; }
  // Returns the user-requested size of the data allocated at
  // 'ptr'.  Note that the actual buffer allocated might be larger
@ -232,80 +203,8 @@ class Allocator {
  virtual void ClearStats() {}
  virtual void SetSafeFrontier(uint64 count) {}
 private:
  // No constructors or destructors are run for simple types
  template <typename T>
  void RunCtor(T* p, size_t n) {
    static_assert(is_simple_type<T>::value, "T is not a simple type.");
  }
  template <typename T>
  void RunDtor(T* p, size_t n) {}
  // custom constructors and destructors that can be overridden for
  // non-standard allocators
  // Runs string's default constructor for  p[0], p[1], ..., p[n-1].
  virtual void RunStringCtor(string* p, size_t n) {
    for (size_t i = 0; i < n; ++p, ++i) new (p) string();
  }
  // Runs string's default destructor for  p[0], p[1], ..., p[n-1].
  virtual void RunStringDtor(string* p, size_t n) {
    for (size_t i = 0; i < n; ++p, ++i) p->~string();
  }
  virtual void RunResourceCtor(ResourceHandle* p, size_t n) {
    for (size_t i = 0; i < n; ++p, ++i) new (p) ResourceHandle();
  }
  // Runs string's default destructor for  p[0], p[1], ..., p[n-1].
  virtual void RunResourceDtor(ResourceHandle* p, size_t n) {
    for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle();
  }
  virtual void RunVariantCtor(Variant* p, size_t n);
  virtual void RunVariantDtor(Variant* p, size_t n);
  // TODO(jeff): Maybe provide some interface to give info about
  // current allocation state (total number of bytes available for
  // allocation, number of bytes free on device, etc.)
 };
 // Allocator-specific constructors and destructors are used for
 // strings
 template <>
 inline void Allocator::RunCtor(string* p, size_t n) {
  RunStringCtor(p, n);
 }
 template <>
 inline void Allocator::RunDtor(string* p, size_t n) {
  RunStringDtor(p, n);
 }
 template <>
 inline void Allocator::RunCtor(ResourceHandle* p, size_t n) {
  RunResourceCtor(p, n);
 }
 template <>
 inline void Allocator::RunDtor(ResourceHandle* p, size_t n) {
  RunResourceDtor(p, n);
 }
 template <>
 inline void Allocator::RunCtor(Variant* p, size_t n) {
  RunVariantCtor(p, n);
 }
 template <>
 inline void Allocator::RunDtor(Variant* p, size_t n) {
  RunVariantDtor(p, n);
 }
 // An implementation of Allocator that delegates all calls to another Allocator.
 //
 // Useful to clients who want to override part of the functionality of another
@ -336,8 +235,8 @@ class AllocatorWrapper : public Allocator {
    return wrapped_->TracksAllocationSizes();
  }
-  bool ShouldAllocateEmptyTensors() const override {
+  bool AllocatesOpaqueHandle() const override {
-    return wrapped_->TracksAllocationSizes();
+    return wrapped_->AllocatesOpaqueHandle();
  }
  size_t RequestedSize(const void* ptr) const override {
--- a/tensorflow/core/framework/allocator_test.cc
+++ b/tensorflow/core/framework/allocator_test.cc
@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
@ -102,14 +103,14 @@ TEST(CPUAllocatorTest, Simple) {
    a->DeallocateRaw(ptrs[i]);
  }
  CheckStats(a, 1023, 0, 552640, 1024);
-  float* t1 = a->Allocate<float>(1024);
+  float* t1 = TypedAllocator::Allocate<float>(a, 1024, {});
-  double* t2 = a->Allocate<double>(1048576);
+  double* t2 = TypedAllocator::Allocate<double>(a, 1048576, {});
  CheckStats(a, 1025, 1048576 * sizeof(double) + 1024 * sizeof(float),
             1048576 * sizeof(double) + 1024 * sizeof(float),
             1048576 * sizeof(double));
-  a->Deallocate(t1, 1024);
+  TypedAllocator::Deallocate(a, t1, 1024);
-  a->Deallocate(t2, 1048576);
+  TypedAllocator::Deallocate(a, t2, 1048576);
  CheckStats(a, 1025, 0, 1048576 * sizeof(double) + 1024 * sizeof(float),
             1048576 * sizeof(double));
@ -130,7 +131,8 @@ TEST(CPUAllocatorTest, AllocateOverflowMaxSizeT) {
  // The maximum size_t value will definitely overflow.
  size_t count_to_allocate = std::numeric_limits<size_t>::max();
-  TestStruct* const test_pointer = a->Allocate<TestStruct>(count_to_allocate);
+  TestStruct* const test_pointer =
      TypedAllocator::Allocate<TestStruct>(a, count_to_allocate, {});
  CHECK_EQ(test_pointer, reinterpret_cast<TestStruct*>(NULL));
 }
@ -141,7 +143,8 @@ TEST(CPUAllocatorTest, AllocateOverflowSmallest) {
  // count_to_allocate is the smallest count that will cause overflow.
  const size_t count_to_allocate =
      (std::numeric_limits<size_t>::max() / sizeof(TestStruct)) + 1;
-  TestStruct* const test_pointer = a->Allocate<TestStruct>(count_to_allocate);
+  TestStruct* const test_pointer =
      TypedAllocator::Allocate<TestStruct>(a, count_to_allocate, {});
  CHECK_EQ(test_pointer, reinterpret_cast<TestStruct*>(NULL));
 }
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_description.pb.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
@ -443,12 +444,14 @@ struct ProtoHelper<Eigen::half> {
 template <typename T>
 Buffer<T>::Buffer(Allocator* a, int64 n)
-    : BufferBase(a, a->Allocate<T>(n)), elem_(n) {}
+    : BufferBase(a, TypedAllocator::Allocate<T>(a, n, AllocationAttributes())),
      elem_(n) {}
 template <typename T>
 Buffer<T>::Buffer(Allocator* a, int64 n,
                  const AllocationAttributes& allocation_attr)
-    : BufferBase(a, a->Allocate<T>(n, allocation_attr)), elem_(n) {}
+    : BufferBase(a, TypedAllocator::Allocate<T>(a, n, allocation_attr)),
      elem_(n) {}
 template <typename T>
 Buffer<T>::~Buffer() {
@ -456,7 +459,7 @@ Buffer<T>::~Buffer() {
    if (LogMemory::IsEnabled()) {
      RecordDeallocation();
    }
-    alloc_->Deallocate<T>(static_cast<T*>(data()), elem_);
+    TypedAllocator::Deallocate<T>(alloc_, static_cast<T*>(data()), elem_);
  }
 }
@ -734,7 +737,7 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape)
    : shape_(shape), buf_(nullptr) {
  set_dtype(type);
  CHECK_NOTNULL(a);
-  if (shape_.num_elements() > 0 || a->ShouldAllocateEmptyTensors()) {
+  if (shape_.num_elements() > 0 || a->AllocatesOpaqueHandle()) {
    CASES(type, buf_ = new Buffer<T>(a, shape.num_elements()));
  }
  if (buf_ != nullptr && buf_->data() != nullptr && LogMemory::IsEnabled()) {
@ -748,7 +751,7 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape,
    : shape_(shape), buf_(nullptr) {
  set_dtype(type);
  CHECK_NOTNULL(a);
-  if (shape_.num_elements() > 0 || a->ShouldAllocateEmptyTensors()) {
+  if (shape_.num_elements() > 0 || a->AllocatesOpaqueHandle()) {
    CASES(type, buf_ = new Buffer<T>(a, shape.num_elements(), allocation_attr));
  }
  if (!allocation_attr.allocation_will_be_logged && buf_ != nullptr &&
--- a/tensorflow/core/framework/typed_allocator.cc
+++ b/tensorflow/core/framework/typed_allocator.cc
@ -0,0 +1,32 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/core/framework/variant.h"
 namespace tensorflow {
 /* static */
 void TypedAllocator::RunVariantCtor(Variant* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) new (p) Variant();
 }
 /* static */
 void TypedAllocator::RunVariantDtor(Variant* p, size_t n) {
  for (size_t i = 0; i < n; ++p, ++i) p->~Variant();
 }
 }  // namespace tensorflow
--- a/tensorflow/core/framework/typed_allocator.h
+++ b/tensorflow/core/framework/typed_allocator.h
@ -0,0 +1,133 @@
 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_CORE_FRAMEWORK_TYPED_ALLOCATOR_H_
 #define TENSORFLOW_CORE_FRAMEWORK_TYPED_ALLOCATOR_H_
 #include <limits>
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/resource_handle.h"
 #include "tensorflow/core/platform/types.h"
 namespace tensorflow {
 class Variant;
 // Convenience functions to do typed allocation.  C++ constructors
 // and destructors are invoked for complex types if necessary.
 class TypedAllocator {
 public:
  // May return NULL if the tensor has too many elements to represent in a
  // single allocation.
  template <typename T>
  static T* Allocate(Allocator* raw_allocator, size_t num_elements,
                     const AllocationAttributes& allocation_attr) {
    // TODO(jeff): Do we need to allow clients to pass in alignment
    // requirements?
    if (num_elements > (std::numeric_limits<size_t>::max() / sizeof(T))) {
      return nullptr;
    }
    void* p =
        raw_allocator->AllocateRaw(Allocator::kAllocatorAlignment,
                                   sizeof(T) * num_elements, allocation_attr);
    T* typed_p = reinterpret_cast<T*>(p);
    if (typed_p) RunCtor<T>(raw_allocator, typed_p, num_elements);
    return typed_p;
  }
  template <typename T>
  static void Deallocate(Allocator* raw_allocator, T* ptr,
                         size_t num_elements) {
    if (ptr) {
      RunDtor<T>(raw_allocator, ptr, num_elements);
      raw_allocator->DeallocateRaw(ptr);
    }
  }
 private:
  // No constructors or destructors are run for simple types
  template <typename T>
  static void RunCtor(Allocator* raw_allocator, T* p, size_t n) {
    static_assert(is_simple_type<T>::value, "T is not a simple type.");
  }
  template <typename T>
  static void RunDtor(Allocator* raw_allocator, T* p, size_t n) {}
  static void RunVariantCtor(Variant* p, size_t n);
  static void RunVariantDtor(Variant* p, size_t n);
 };
 template <>
 /* static */
 inline void TypedAllocator::RunCtor(Allocator* raw_allocator, string* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    for (size_t i = 0; i < n; ++p, ++i) new (p) string();
  }
 }
 template <>
 /* static */
 inline void TypedAllocator::RunDtor(Allocator* raw_allocator, string* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    for (size_t i = 0; i < n; ++p, ++i) p->~string();
  }
 }
 template <>
 /* static */
 inline void TypedAllocator::RunCtor(Allocator* raw_allocator, ResourceHandle* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    for (size_t i = 0; i < n; ++p, ++i) new (p) ResourceHandle();
  }
 }
 template <>
 /* static */
 inline void TypedAllocator::RunDtor(Allocator* raw_allocator, ResourceHandle* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle();
  }
 }
 template <>
 /* static */
 inline void TypedAllocator::RunCtor(Allocator* raw_allocator, Variant* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    RunVariantCtor(p, n);
  }
 }
 template <>
 /* static */
 inline void TypedAllocator::RunDtor(Allocator* raw_allocator, Variant* p,
                                    size_t n) {
  if (!raw_allocator->AllocatesOpaqueHandle()) {
    RunVariantDtor(p, n);
  }
 }
 }  // namespace tensorflow
 #endif  // TENSORFLOW_CORE_FRAMEWORK_TYPED_ALLOCATOR_H_
--- a/tensorflow/lite/delegates/flex/buffer_map.cc
+++ b/tensorflow/lite/delegates/flex/buffer_map.cc
@ -15,11 +15,12 @@ limitations under the License.
 #include "tensorflow/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/typed_allocator.h"
 #include "tensorflow/lite/delegates/flex/util.h"
 #include "tensorflow/lite/string.h"
 #include "tensorflow/lite/string_util.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/log_memory.h"
 namespace tflite {
 namespace flex {
@ -99,8 +100,9 @@ class StringTfLiteTensorBuffer : public BaseTfLiteTensorBuffer {
  ~StringTfLiteTensorBuffer() override {
    LogDeallocation();
-    tensorflow::cpu_allocator()->Deallocate<string>(
+    tensorflow::TypedAllocator::Deallocate<tensorflow::string>(
-        static_cast<string*>(data()), num_strings_);
+        tensorflow::cpu_allocator(), static_cast<tensorflow::string*>(data()),
        num_strings_);
  }
  size_t size() const override { return num_strings_ * sizeof(string); }
@ -109,7 +111,9 @@ class StringTfLiteTensorBuffer : public BaseTfLiteTensorBuffer {
  StringTfLiteTensorBuffer(const TfLiteTensor* tensor, int num_strings)
      : BaseTfLiteTensorBuffer(
            num_strings != 0
-                ? tensorflow::cpu_allocator()->Allocate<string>(num_strings)
+                ? tensorflow::TypedAllocator::Allocate<tensorflow::string>(
                      tensorflow::cpu_allocator(), num_strings,
                      tensorflow::AllocationAttributes())
                : nullptr),
        num_strings_(num_strings) {
    LogAllocation();