Implement something similar to std::align since it's not supported.

PiperOrigin-RevId: 205316355
This commit is contained in:
Guangda Lai 2018-07-19 15:53:13 -07:00 committed by TensorFlower Gardener
parent 716d15118f
commit 3647625e53
5 changed files with 161 additions and 14 deletions

View File

@ -80,6 +80,7 @@ cc_library(
copts = tf_copts(),
visibility = ["//visibility:public"],
deps = [
":trt_allocator",
":trt_logging",
":trt_plugins",
":trt_resources",
@ -195,17 +196,16 @@ tf_py_wrap_cc(
tf_cuda_library(
name = "trt_resources",
srcs = [
"resources/trt_allocator.cc",
"resources/trt_int8_calibrator.cc",
"resources/trt_resource_manager.cc",
],
hdrs = [
"resources/trt_allocator.h",
"resources/trt_int8_calibrator.h",
"resources/trt_resource_manager.h",
"resources/trt_resources.h",
],
deps = [
":trt_allocator",
":trt_logging",
":utils",
"//tensorflow/core:framework_headers_lib",
@ -216,6 +216,31 @@ tf_cuda_library(
]),
)
tf_cuda_library(
name = "trt_allocator",
srcs = ["resources/trt_allocator.cc"],
hdrs = ["resources/trt_allocator.h"],
deps = [
"//tensorflow/core:framework_headers_lib",
"//tensorflow/core:framework_lite",
"//tensorflow/core:lib_proto_parsing",
] + if_tensorrt([
"@local_config_tensorrt//:nv_infer",
]),
)
tf_cc_test(
name = "trt_allocator_test",
size = "small",
srcs = ["resources/trt_allocator_test.cc"],
tags = ["no_windows"],
deps = [
":trt_allocator",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
# Library for the node-level conversion portion of TensorRT operation creation
tf_cuda_library(
name = "trt_conversion",
@ -231,6 +256,7 @@ tf_cuda_library(
],
deps = [
":segment",
":trt_allocator",
":trt_plugins",
":trt_logging",
":trt_resources",

View File

@ -23,6 +23,7 @@ limitations under the License.
#include <vector>
#include "tensorflow/contrib/tensorrt/convert/utils.h"
#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
#include "tensorflow/core/framework/graph.pb.h"

View File

@ -19,12 +19,42 @@ limitations under the License.
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR > 2
#include "cuda/include/cuda_runtime_api.h"
#endif // GOOGLE_TENSORRT
#endif // GOOGLE_CUDA
namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this method mimic its behavior.
void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
QCHECK_GT(size, 0) << "size must be greater than 0.";
QCHECK(ptr) << "ptr must not be nullptr.";
QCHECK_GT(space, 0) << "space must be greater than 0.";
const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
if (size > space) return nullptr;
const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
if (aligned_ptr_val > ptr_val + space - size) return nullptr;
ptr = reinterpret_cast<void*>(aligned_ptr_val);
const uintptr_t diff = aligned_ptr_val - ptr_val;
space -= diff;
return ptr;
}
} // namespace tensorrt
} // namespace tensorflow
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR > 2
namespace tensorflow {
namespace tensorrt {
void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
uint32_t flags) {
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
@ -44,17 +74,16 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
size_t total_size = size + alignment;
void* mem = allocator_->AllocateRaw(alignment, total_size);
if (!mem) {
return nullptr;
}
if (!mem) return nullptr;
void* alloc_mem = mem;
CHECK(std::align(alignment, size, mem, total_size));
QCHECK(Align(alignment, size, mem, total_size));
if (mem != alloc_mem) {
CHECK(mem_map_.insert({mem, alloc_mem}).second);
QCHECK(mem_map_.insert({mem, alloc_mem}).second);
}
VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
<< " @ " << mem;
VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
<< "; aligned to " << size << " bytes @" << mem << " with alignment "
<< alignment;
return mem;
}
@ -80,5 +109,5 @@ void TRTDeviceAllocator::free(void* memory) {
} // namespace tensorflow
#endif
#endif
#endif
#endif // GOOGLE_TENSORRT
#endif // GOOGLE_CUDA

View File

@ -16,13 +16,25 @@ limitations under the License.
#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
#include <unordered_map>
#include "tensorflow/core/framework/allocator.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#include "tensorrt/include/NvInfer.h"
#endif // GOOGLE_TENSORRT
#endif // GOOGLE_CUDA
namespace tensorflow {
namespace tensorrt {
// std::align is not supported, so this function mimic its behavior.
void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
} // namespace tensorrt
} // namespace tensorflow
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
#if NV_TENSORRT_MAJOR == 3
// Define interface here temporarily until TRT 4.0 is released
namespace nvinfer1 {

View File

@ -0,0 +1,79 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
namespace tensorrt {
bool RunTest(const size_t alignment, const size_t size,
const intptr_t orig_ptr_val, const size_t orig_space) {
void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
void* ptr = orig_ptr;
size_t space = orig_space;
void* result = Align(alignment, size, ptr, space);
if (result == nullptr) {
EXPECT_EQ(orig_ptr, ptr);
EXPECT_EQ(orig_space, space);
return false;
} else {
EXPECT_EQ(result, ptr);
const intptr_t ptr_val = reinterpret_cast<intptr_t>(ptr);
EXPECT_EQ(0, ptr_val % alignment);
EXPECT_GE(ptr_val, orig_ptr_val);
EXPECT_GE(space, size);
EXPECT_LE(space, orig_space);
EXPECT_EQ(ptr_val + space, orig_ptr_val + orig_space);
return true;
}
}
TEST(TRTAllocatorTest, Align) {
for (const size_t space :
{1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
for (const intptr_t ptr_val :
{1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
alignment + (alignment / 2)}) {
if (ptr_val % alignment == 0) {
for (const size_t size :
{1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
}
} else {
EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
const size_t diff = alignment - ptr_val % alignment;
if (space > diff) {
EXPECT_TRUE(
RunTest(alignment, space - diff, ptr_val + diff, space - diff));
for (const size_t size :
{1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
space - diff + 1, space - 1}) {
EXPECT_EQ(space - diff >= size,
RunTest(alignment, size, ptr_val, space));
}
} else {
EXPECT_FALSE(RunTest(alignment, 1, ptr_val, space));
}
}
}
}
}
}
} // namespace tensorrt
} // namespace tensorflow