Implement something similar to std::align since it's not supported.
PiperOrigin-RevId: 205316355
This commit is contained in:
parent
716d15118f
commit
3647625e53
@ -80,6 +80,7 @@ cc_library(
|
||||
copts = tf_copts(),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":trt_allocator",
|
||||
":trt_logging",
|
||||
":trt_plugins",
|
||||
":trt_resources",
|
||||
@ -195,17 +196,16 @@ tf_py_wrap_cc(
|
||||
tf_cuda_library(
|
||||
name = "trt_resources",
|
||||
srcs = [
|
||||
"resources/trt_allocator.cc",
|
||||
"resources/trt_int8_calibrator.cc",
|
||||
"resources/trt_resource_manager.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"resources/trt_allocator.h",
|
||||
"resources/trt_int8_calibrator.h",
|
||||
"resources/trt_resource_manager.h",
|
||||
"resources/trt_resources.h",
|
||||
],
|
||||
deps = [
|
||||
":trt_allocator",
|
||||
":trt_logging",
|
||||
":utils",
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
@ -216,6 +216,31 @@ tf_cuda_library(
|
||||
]),
|
||||
)
|
||||
|
||||
tf_cuda_library(
|
||||
name = "trt_allocator",
|
||||
srcs = ["resources/trt_allocator.cc"],
|
||||
hdrs = ["resources/trt_allocator.h"],
|
||||
deps = [
|
||||
"//tensorflow/core:framework_headers_lib",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core:lib_proto_parsing",
|
||||
] + if_tensorrt([
|
||||
"@local_config_tensorrt//:nv_infer",
|
||||
]),
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "trt_allocator_test",
|
||||
size = "small",
|
||||
srcs = ["resources/trt_allocator_test.cc"],
|
||||
tags = ["no_windows"],
|
||||
deps = [
|
||||
":trt_allocator",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
],
|
||||
)
|
||||
|
||||
# Library for the node-level conversion portion of TensorRT operation creation
|
||||
tf_cuda_library(
|
||||
name = "trt_conversion",
|
||||
@ -231,6 +256,7 @@ tf_cuda_library(
|
||||
],
|
||||
deps = [
|
||||
":segment",
|
||||
":trt_allocator",
|
||||
":trt_plugins",
|
||||
":trt_logging",
|
||||
":trt_resources",
|
||||
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/convert/utils.h"
|
||||
#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
|
||||
#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
|
||||
#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
|
@ -19,12 +19,42 @@ limitations under the License.
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_TENSORRT
|
||||
|
||||
#if NV_TENSORRT_MAJOR > 2
|
||||
#include "cuda/include/cuda_runtime_api.h"
|
||||
#endif // GOOGLE_TENSORRT
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
|
||||
// std::align is not supported, so this method mimic its behavior.
|
||||
void* Align(size_t alignment, size_t size, void*& ptr, size_t& space) {
|
||||
QCHECK_GT(alignment, 0) << "alignment must be greater than 0.";
|
||||
QCHECK_EQ(0, alignment & (alignment - 1)) << "Alignment must be power of 2.";
|
||||
QCHECK_GT(size, 0) << "size must be greater than 0.";
|
||||
QCHECK(ptr) << "ptr must not be nullptr.";
|
||||
QCHECK_GT(space, 0) << "space must be greater than 0.";
|
||||
const uintptr_t ptr_val = reinterpret_cast<uintptr_t>(ptr);
|
||||
QCHECK_GE(ptr_val + space, ptr_val) << "Provided space overflows.";
|
||||
|
||||
if (size > space) return nullptr;
|
||||
const uintptr_t aligned_ptr_val = ((ptr_val + alignment - 1) & -alignment);
|
||||
if (aligned_ptr_val > ptr_val + space - size) return nullptr;
|
||||
ptr = reinterpret_cast<void*>(aligned_ptr_val);
|
||||
const uintptr_t diff = aligned_ptr_val - ptr_val;
|
||||
space -= diff;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace tensorrt
|
||||
} // namespace tensorflow
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_TENSORRT
|
||||
#if NV_TENSORRT_MAJOR > 2
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
|
||||
void* TRTCudaAllocator::allocate(uint64_t size, uint64_t alignment,
|
||||
uint32_t flags) {
|
||||
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
|
||||
@ -44,17 +74,16 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment,
|
||||
assert((alignment & (alignment - 1)) == 0); // zero or a power of 2.
|
||||
size_t total_size = size + alignment;
|
||||
void* mem = allocator_->AllocateRaw(alignment, total_size);
|
||||
if (!mem) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!mem) return nullptr;
|
||||
|
||||
void* alloc_mem = mem;
|
||||
CHECK(std::align(alignment, size, mem, total_size));
|
||||
QCHECK(Align(alignment, size, mem, total_size));
|
||||
if (mem != alloc_mem) {
|
||||
CHECK(mem_map_.insert({mem, alloc_mem}).second);
|
||||
QCHECK(mem_map_.insert({mem, alloc_mem}).second);
|
||||
}
|
||||
VLOG(2) << "Allocated " << size << " bytes with alignment " << alignment
|
||||
<< " @ " << mem;
|
||||
VLOG(2) << "Allocated " << total_size << " bytes memory @" << alloc_mem
|
||||
<< "; aligned to " << size << " bytes @" << mem << " with alignment "
|
||||
<< alignment;
|
||||
return mem;
|
||||
}
|
||||
|
||||
@ -80,5 +109,5 @@ void TRTDeviceAllocator::free(void* memory) {
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif // GOOGLE_TENSORRT
|
||||
#endif // GOOGLE_CUDA
|
||||
|
@ -16,13 +16,25 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
|
||||
#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
|
||||
#include <unordered_map>
|
||||
|
||||
#include "tensorflow/core/framework/allocator.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_TENSORRT
|
||||
#include "tensorrt/include/NvInfer.h"
|
||||
#endif // GOOGLE_TENSORRT
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
// std::align is not supported, so this function mimic its behavior.
|
||||
void* Align(size_t alignment, size_t size, void*& ptr, size_t& space);
|
||||
} // namespace tensorrt
|
||||
} // namespace tensorflow
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_TENSORRT
|
||||
#if NV_TENSORRT_MAJOR == 3
|
||||
// Define interface here temporarily until TRT 4.0 is released
|
||||
namespace nvinfer1 {
|
||||
|
79
tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
Normal file
79
tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
Normal file
@ -0,0 +1,79 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
|
||||
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
|
||||
bool RunTest(const size_t alignment, const size_t size,
|
||||
const intptr_t orig_ptr_val, const size_t orig_space) {
|
||||
void* const orig_ptr = reinterpret_cast<void*>(orig_ptr_val);
|
||||
void* ptr = orig_ptr;
|
||||
size_t space = orig_space;
|
||||
void* result = Align(alignment, size, ptr, space);
|
||||
if (result == nullptr) {
|
||||
EXPECT_EQ(orig_ptr, ptr);
|
||||
EXPECT_EQ(orig_space, space);
|
||||
return false;
|
||||
} else {
|
||||
EXPECT_EQ(result, ptr);
|
||||
const intptr_t ptr_val = reinterpret_cast<intptr_t>(ptr);
|
||||
EXPECT_EQ(0, ptr_val % alignment);
|
||||
EXPECT_GE(ptr_val, orig_ptr_val);
|
||||
EXPECT_GE(space, size);
|
||||
EXPECT_LE(space, orig_space);
|
||||
EXPECT_EQ(ptr_val + space, orig_ptr_val + orig_space);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TRTAllocatorTest, Align) {
|
||||
for (const size_t space :
|
||||
{1, 2, 3, 4, 7, 8, 9, 10, 16, 32, 511, 512, 513, 700, 12345}) {
|
||||
for (size_t alignment = 1; alignment <= space * 4; alignment *= 2) {
|
||||
for (const intptr_t ptr_val :
|
||||
{1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
|
||||
alignment + (alignment / 2)}) {
|
||||
if (ptr_val % alignment == 0) {
|
||||
for (const size_t size :
|
||||
{1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
|
||||
EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
|
||||
}
|
||||
} else {
|
||||
EXPECT_FALSE(RunTest(alignment, space, ptr_val, space));
|
||||
const size_t diff = alignment - ptr_val % alignment;
|
||||
if (space > diff) {
|
||||
EXPECT_TRUE(
|
||||
RunTest(alignment, space - diff, ptr_val + diff, space - diff));
|
||||
for (const size_t size :
|
||||
{1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
|
||||
space - diff + 1, space - 1}) {
|
||||
EXPECT_EQ(space - diff >= size,
|
||||
RunTest(alignment, size, ptr_val, space));
|
||||
}
|
||||
} else {
|
||||
EXPECT_FALSE(RunTest(alignment, 1, ptr_val, space));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tensorrt
|
||||
} // namespace tensorflow
|
Loading…
Reference in New Issue
Block a user