From d1022145203c1edb81a39010da3f61207533091d Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 13 May 2019 17:02:18 -0700
Subject: [PATCH] Factor out GPUMemAllocator into its own include.

This allows using GPUMemAllocator without pulling in TF protobufs as a dependency.

PiperOrigin-RevId: 248040160
---
 tensorflow/core/BUILD                         | 19 ++++-
 .../common_runtime/gpu/gpu_bfc_allocator.h    | 54 +------------
 .../common_runtime/gpu/gpu_mem_allocator.h    | 78 +++++++++++++++++++
 3 files changed, 96 insertions(+), 55 deletions(-)
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 553d1901a9e..4472b4a72dd 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -3447,16 +3447,17 @@ cc_library(
 )
 
 GPU_RUNTIME_HEADERS = [
-    "common_runtime/gpu/gpu_host_allocator.h",
     "common_runtime/gpu/gpu_bfc_allocator.h",
     "common_runtime/gpu/gpu_cudamalloc_allocator.h",
     "common_runtime/gpu/gpu_debug_allocator.h",
     "common_runtime/gpu/gpu_device.h",
+    "common_runtime/gpu/gpu_host_allocator.h",
     "common_runtime/gpu/gpu_id.h",
     "common_runtime/gpu/gpu_id_manager.h",
     "common_runtime/gpu/gpu_id_utils.h",
     "common_runtime/gpu/gpu_init.h",
     "common_runtime/gpu/gpu_managed_allocator.h",
+    "common_runtime/gpu/gpu_mem_allocator.h",
     "common_runtime/gpu/gpu_process_state.h",
     "common_runtime/gpu/gpu_stream_util.h",
     "common_runtime/gpu/gpu_util.h",
@@ -3520,16 +3521,30 @@ tf_cuda_library(
     name = "gpu_bfc_allocator",
     srcs = [
         "common_runtime/gpu/gpu_bfc_allocator.cc",
-        "common_runtime/gpu/gpu_id.h",
     ],
     hdrs = ["common_runtime/gpu/gpu_bfc_allocator.h"],
     features = ["parse_headers"],
     visibility = ["//visibility:public"],
     deps = [
         ":bfc_allocator",
+        ":gpu_mem_allocator",
         ":lib",
         ":lib_internal",
         ":protos_all_cc",
+    ],
+)
+
+tf_cuda_library(
+    name = "gpu_mem_allocator",
+    srcs = [
+        "common_runtime/gpu/gpu_id.h",
+    ],
+    hdrs = ["common_runtime/gpu/gpu_mem_allocator.h"],
+    features = ["parse_headers"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":allocator",
+        ":lib_internal",
         ":stream_executor",
     ],
 )
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index fb4ca3ffdb7..5cae743115f 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -22,65 +22,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/bfc_allocator.h"
-#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
-#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 
-// Suballocator for GPU memory.
-class GPUMemAllocator : public SubAllocator {
- public:
-  // 'platform_gpu_id' refers to the ID of the GPU device within
-  // the process and must reference a valid ID in the process.
-  // Note: stream_exec cannot be null.
-  explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
-                           PlatformGpuId gpu_id, bool use_unified_memory,
-                           const std::vector<Visitor>& alloc_visitors,
-                           const std::vector<Visitor>& free_visitors)
-      : SubAllocator(alloc_visitors, free_visitors),
-        stream_exec_(stream_exec),
-        gpu_id_(gpu_id),
-        use_unified_memory_(use_unified_memory) {
-    CHECK(stream_exec_ != nullptr);
-  }
-  ~GPUMemAllocator() override {}
-
-  void* Alloc(size_t alignment, size_t num_bytes) override {
-    void* ptr = nullptr;
-    if (num_bytes > 0) {
-      if (use_unified_memory_) {
-        ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);
-      } else {
-        ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
-      }
-      VisitAlloc(ptr, gpu_id_.value(), num_bytes);
-    }
-    return ptr;
-  }
-
-  void Free(void* ptr, size_t num_bytes) override {
-    if (ptr != nullptr) {
-      VisitFree(ptr, gpu_id_.value(), num_bytes);
-      if (use_unified_memory_) {
-        stream_exec_->UnifiedMemoryDeallocate(ptr);
-      } else {
-        se::DeviceMemoryBase gpu_ptr(ptr);
-        stream_exec_->Deallocate(&gpu_ptr);
-      }
-    }
-  }
-
- private:
-  se::StreamExecutor* stream_exec_;  // not owned, non-null
-  const PlatformGpuId gpu_id_;
-  const bool use_unified_memory_ = false;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
-};
-
 // A GPU memory allocator that implements a 'best-fit with coalescing'
 // algorithm.
 class GPUBFCAllocator : public BFCAllocator {
diff --git a/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h
new file mode 100644
index 00000000000..e14f2d9377a
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h
@@ -0,0 +1,78 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/stream_executor.h"
+
+namespace tensorflow {
+
+// Suballocator for GPU memory.
+class GPUMemAllocator : public SubAllocator {
+ public:
+  // 'platform_gpu_id' refers to the ID of the GPU device within
+  // the process and must reference a valid ID in the process.
+  // Note: stream_exec cannot be null.
+  explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
+                           PlatformGpuId gpu_id, bool use_unified_memory,
+                           const std::vector<Visitor>& alloc_visitors,
+                           const std::vector<Visitor>& free_visitors)
+      : SubAllocator(alloc_visitors, free_visitors),
+        stream_exec_(stream_exec),
+        gpu_id_(gpu_id),
+        use_unified_memory_(use_unified_memory) {
+    CHECK(stream_exec_ != nullptr);
+  }
+  ~GPUMemAllocator() override {}
+
+  void* Alloc(size_t alignment, size_t num_bytes) override {
+    void* ptr = nullptr;
+    if (num_bytes > 0) {
+      if (use_unified_memory_) {
+        ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);
+      } else {
+        ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
+      }
+      VisitAlloc(ptr, gpu_id_.value(), num_bytes);
+    }
+    return ptr;
+  }
+
+  void Free(void* ptr, size_t num_bytes) override {
+    if (ptr != nullptr) {
+      VisitFree(ptr, gpu_id_.value(), num_bytes);
+      if (use_unified_memory_) {
+        stream_exec_->UnifiedMemoryDeallocate(ptr);
+      } else {
+        se::DeviceMemoryBase gpu_ptr(ptr);
+        stream_exec_->Deallocate(&gpu_ptr);
+      }
+    }
+  }
+
+ private:
+  se::StreamExecutor* stream_exec_;  // not owned, non-null
+  const PlatformGpuId gpu_id_;
+  const bool use_unified_memory_ = false;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_