Implement something similar to std::align since it's not supported.

PiperOrigin-RevId: 205316355
2018-07-19 15:53:13 -07:00 · 2018-07-19 15:53:13 -07:00 · 3647625e53
commit 3647625e53
parent 716d15118f
5 changed files with 161 additions and 14 deletions
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@ -80,6 +80,7 @@ cc_library(
    copts = tf_copts(),
    visibility = ["//visibility:public"],
    deps = [
+        ":trt_allocator",
        ":trt_logging",
        ":trt_plugins",
        ":trt_resources",
@ -195,17 +196,16 @@ tf_py_wrap_cc(
 tf_cuda_library(
    name = "trt_resources",
    srcs = [
-        "resources/trt_allocator.cc",
        "resources/trt_int8_calibrator.cc",
        "resources/trt_resource_manager.cc",
    ],
    hdrs = [
-        "resources/trt_allocator.h",
        "resources/trt_int8_calibrator.h",
        "resources/trt_resource_manager.h",
        "resources/trt_resources.h",
    ],
    deps = [
+        ":trt_allocator",
        ":trt_logging",
        ":utils",
        "//tensorflow/core:framework_headers_lib",
@ -216,6 +216,31 @@ tf_cuda_library(
    ]),
 )

+tf_cuda_library(
+    name = "trt_allocator",
+    srcs = ["resources/trt_allocator.cc"],
+    hdrs = ["resources/trt_allocator.h"],
+    deps = [
+        "//tensorflow/core:framework_headers_lib",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:lib_proto_parsing",
+    ] + if_tensorrt([
+        "@local_config_tensorrt//:nv_infer",
+    ]),
+)
+
+tf_cc_test(
+    name = "trt_allocator_test",
+    size = "small",
+    srcs = ["resources/trt_allocator_test.cc"],
+    tags = ["no_windows"],
+    deps = [
+        ":trt_allocator",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 # Library for the node-level conversion portion of TensorRT operation creation
 tf_cuda_library(
    name = "trt_conversion",
@ -231,6 +256,7 @@ tf_cuda_library(
    ],
    deps = [
        ":segment",
+        ":trt_allocator",
        ":trt_plugins",
        ":trt_logging",
        ":trt_resources",
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@ -23,6 +23,7 @@ limitations under the License.
 #include <vector>

 #include "tensorflow/contrib/tensorrt/convert/utils.h"
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
 #include "tensorflow/core/framework/graph.pb.h"
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.cc
@ -19,12 +19,42 @@ limitations under the License.

 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
-
-#if NV_TENSORRT_MAJOR > 2
 #include "cuda/include/cuda_runtime_api.h"
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA

 namespace tensorflow {
 namespace tensorrt {
+
+// std::align is not supported, so this method mimic its behavior.
+void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+  QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
+  QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
+  QCHECK_GT(size, 0) << "size must be greater than 0.";
+  QCHECK(ptr) << "ptr must not be nullptr.";
+  QCHECK_GT(space, 0) << "space must be greater than 0.";
+  const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
+  QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
+
+  if (size > space) return nullptr;
+  const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
+  if (aligned_ptr_val > ptr_val + space - size) return nullptr;
+  ptr = reinterpret_cast<void*>(aligned_ptr_val);
+  const uintptr_t diff = aligned_ptr_val - ptr_val;
+  space -= diff;
+  return ptr;
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
+#if NV_TENSORRT_MAJOR > 2
+
+namespace tensorflow {
+namespace tensorrt {
+
 void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
                                 uint32_t flags) {
  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
@ -44,17 +74,16 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
  assert((alignment & (alignment - 1)) == 0);  // zero or a power of 2.
  size_t total_size = size + alignment;
  void* mem = allocator_->AllocateRaw(alignment, total_size);
-  if (!mem) {
-    return nullptr;
-  }
+  if (!mem) return nullptr;

  void* alloc_mem = mem;
-  CHECK(std::align(alignment, size, mem, total_size));
+  QCHECK(Align(alignment, size, mem, total_size));
  if (mem != alloc_mem) {
-    CHECK(mem_map_.insert({mem, alloc_mem}).second);
+    QCHECK(mem_map_.insert({mem, alloc_mem}).second);
  }
-  VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
-          << " @ " << mem;
+  VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
+          << "; aligned to " << size << " bytes @" << mem << " with alignment "
+          << alignment;
  return mem;
 }

@ -80,5 +109,5 @@ void TRTDeviceAllocator::free(void* memory) {
 }  // namespace tensorflow

 #endif
-#endif
-#endif
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator.h
@ -16,13 +16,25 @@ limitations under the License.
 #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
 #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_

-#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
+#include <unordered_map>
+
 #include "tensorflow/core/framework/allocator.h"

 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 #include "tensorrt/include/NvInfer.h"
+#endif  // GOOGLE_TENSORRT
+#endif  // GOOGLE_CUDA

+namespace tensorflow {
+namespace tensorrt {
+// std::align is not supported, so this function mimic its behavior.
+void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
+}  // namespace tensorrt
+}  // namespace tensorflow
+
+#if GOOGLE_CUDA
+#if GOOGLE_TENSORRT
 #if NV_TENSORRT_MAJOR == 3
 // Define interface here temporarily until TRT 4.0 is released
 namespace nvinfer1 {
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace tensorrt {
+
+bool RunTest(const size_t alignment, const size_t size,
+             const intptr_t orig_ptr_val, const size_t orig_space) {
+  void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
+  void* ptr = orig_ptr;
+  size_t space = orig_space;
+  void* result = Align(alignment, size, ptr, space);
+  if (result == nullptr) {
+    EXPECT_EQ(orig_ptr, ptr);
+    EXPECT_EQ(orig_space, space);
+    return false;
+  } else {
+    EXPECT_EQ(result, ptr);
+    const intptr_t ptr_val = reinterpret_cast<intptr_t>(ptr);
+    EXPECT_EQ(0, ptr_val % alignment);
+    EXPECT_GE(ptr_val, orig_ptr_val);
+    EXPECT_GE(space, size);
+    EXPECT_LE(space, orig_space);
+    EXPECT_EQ(ptr_val + space, orig_ptr_val + orig_space);
+    return true;
+  }
+}
+
+TEST(TRTAllocatorTest, Align) {
+  for (const size_t space :
+       {1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
+    for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
+      for (const intptr_t ptr_val :
+           {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+            alignment + (alignment / 2)}) {
+        if (ptr_val % alignment == 0) {
+          for (const size_t size :
+               {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+            EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
+          }
+        } else {
+          EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
+          const size_t diff = alignment - ptr_val % alignment;
+          if (space > diff) {
+            EXPECT_TRUE(
+                RunTest(alignment, space - diff, ptr_val + diff, space - diff));
+            for (const size_t size :
+                 {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+                  space - diff + 1, space - 1}) {
+              EXPECT_EQ(space - diff >= size,
+                        RunTest(alignment, size, ptr_val, space));
+            }
+          } else {
+            EXPECT_FALSE(RunTest(alignment, 1, ptr_val, space));
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace tensorrt
+}  // namespace tensorflow