Merge pull request #41713 from dnguyen28061:allocate_temp

PiperOrigin-RevId: 326305159
Change-Id: Ic95dcb86e7bc58ced1666c7adcbddb9cc99dd2f4
This commit is contained in:
TensorFlower Gardener 2020-08-12 14:08:54 -07:00
commit efa82dd9a7
6 changed files with 225 additions and 36 deletions

View File

@ -23,6 +23,7 @@ filegroup(
srcs = [
"c_api.h",
"c_api_experimental.h",
"c_api_macros.h",
"tensor_interface.h",
"tf_attrtype.h",
"tf_datatype.h",
@ -61,6 +62,7 @@ filegroup(
name = "pywrap_required_hdrs",
srcs = [
"c_api_internal.h",
"c_api_macros.h",
"conversion_macros.h",
"python_api.h",
"tensor_interface.h",
@ -79,6 +81,7 @@ tf_cuda_library(
hdrs = [
"c_api.h",
"c_api_internal.h",
"c_api_macros.h",
"tf_datatype.h",
"tf_tensor.h",
"tf_tstring.h",
@ -310,6 +313,7 @@ cc_library(
hdrs = ["tf_tensor.h"],
visibility = ["//visibility:public"],
deps = [
":c_api_macros",
":tensor_interface",
":tf_datatype",
":tf_status",
@ -336,6 +340,7 @@ tf_cuda_library(
],
visibility = ["//tensorflow:internal"],
deps = [
":c_api_macros",
":tensor_interface",
":tf_datatype",
":tf_status",

View File

@ -30,4 +30,17 @@ limitations under the License.
#endif // _WIN32
#endif // SWIG
// TF_Bool is the C API typedef for unsigned char, while TF_BOOL is
// the datatype for boolean tensors.
#ifndef TF_Bool
#define TF_Bool unsigned char
#endif // TF_Bool
// Macro used to calculate struct size for maintaining ABI stability across
// different struct implementations.
#ifndef TF_OFFSET_OF_END
#define TF_OFFSET_OF_END(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
#endif // TF_OFFSET_OF_END
#endif // TENSORFLOW_C_C_API_MACROS_H_

View File

@ -261,7 +261,6 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
size_t len, TF_Status* status) {
TF_SetStatus(status, TF_OK, "");
auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
"64-bit int types should match in size");
tensorflow::gtl::ArraySlice<tensorflow::int64> dimarray(
@ -280,3 +279,42 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
}
return tf_tensor;
}
TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype,
int64_t* dims, int num_dims,
TF_AllocatorAttributes* attributes,
TF_Status* status) {
auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
TF_SetStatus(status, TF_OK, "");
static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
"64-bit int types should match in size");
tensorflow::gtl::ArraySlice<tensorflow::int64> dimarray(
reinterpret_cast<tensorflow::int64*>(dims), num_dims);
if (attributes && !attributes->struct_size) {
TF_SetStatus(
status, TF_INVALID_ARGUMENT,
"TF_AllocatorAttributes struct "
"size member must be set to TF_ALLOCATOR_ATTRIBUTES_STRUCT_SIZE");
return nullptr;
}
tensorflow::AllocatorAttributes allocator_attr;
if (attributes && attributes->on_host) {
allocator_attr.set_on_host(true);
}
tensorflow::Status s;
tensorflow::Tensor tensor;
s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype),
tensorflow::TensorShape(dimarray), &tensor,
allocator_attr);
if (!s.ok()) {
::tensorflow::Set_TF_Status_from_Status(status, s);
return nullptr;
}
TF_Tensor* tf_tensor;
tf_tensor = TF_TensorFromTensor(tensor, &s);
if (!s.ok()) {
::tensorflow::Set_TF_Status_from_Status(status, s);
return nullptr;
}
return tf_tensor;
}

View File

@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/c/c_api.h"
#include "tensorflow/c/tf_datatype.h"
#include "tensorflow/c/tf_status.h"
#include "tensorflow/c/tf_tensor.h"
// Macro to control visibility of exported symbols in the shared library (.so,
// .dylib, .dll).
@ -199,6 +200,15 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
int64_t* dims, int num_dims,
size_t len, TF_Status* status);
// Allocates a temporary Tensor of the specified type and shape. The
// Tensor must not be used after kernel construction is
// complete.
//
// num_dims must equal the size of array dims
TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(
TF_OpKernelContext* context, TF_DataType dtype, int64_t* dims, int num_dims,
TF_AllocatorAttributes* alloc_attrs, TF_Status* status);
#ifdef __cplusplus
} /* end extern "C" */
#endif

View File

@ -368,6 +368,16 @@ class DeviceKernelOpTest : public OpsTestBase {
#endif
};
// Validates that the tensor has shape and type corresponding to
// dims and dtype.
void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims,
TF_DataType dtype);
// Copies data of length tensor_size_bytes from values to tensor.
template <typename T>
void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes,
TF_OpKernelContext* ctx);
REGISTER_OP("AllocateOutputOp1").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
@ -379,22 +389,11 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*len=*/tensor_size_bytes, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(1, TF_NumDims(output));
EXPECT_EQ(1, TF_Dim(output, 0));
validate_tensor(output, &dim, 1, TF_FLOAT);
// Set output to 3
float* data = reinterpret_cast<float*>(TF_TensorData(output));
float value = 3.0f;
#if GOOGLE_CUDA
OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value,
tensor_size_bytes);
#else
*data = value;
#endif
float values[1] = {3.0f};
set_tensor_data<float>(output, values, tensor_size_bytes, ctx);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
@ -417,12 +416,8 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) {
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*len=*/0, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(1, TF_NumDims(output));
EXPECT_EQ(0, TF_Dim(output, 0));
validate_tensor(output, &dim, 1, TF_FLOAT);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
@ -442,27 +437,16 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
TF_Status* s = TF_NewStatus();
// Allocate 2x3 output
int64_t dim[2] = {2, 3};
size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6;
TF_Tensor* output = TF_AllocateOutput(
/*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim,
/*num_dims=*/2, /*len=*/tensor_size_bytes, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
EXPECT_EQ(2, TF_NumDims(output));
EXPECT_EQ(2, TF_Dim(output, 0));
EXPECT_EQ(3, TF_Dim(output, 1));
validate_tensor(output, dim, 2, TF_FLOAT);
// Set output to [1 2 3 4 5 6]
void* data = TF_TensorData(output);
float value[6] = {1, 2, 3, 4, 5, 6};
#if GOOGLE_CUDA
OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value,
tensor_size_bytes);
#else
memcpy(data, value, tensor_size_bytes);
#endif
float values[6] = {1, 2, 3, 4, 5, 6};
set_tensor_data<float>(output, values, tensor_size_bytes, ctx);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
@ -474,4 +458,132 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
output->DebugString(100));
}
REGISTER_OP("AllocateTempOp1").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
// Allocate scalar TF_Tensor
TF_Status* s = TF_NewStatus();
int64_t dim = 1;
TF_AllocatorAttributes alloc_attrs;
alloc_attrs.struct_size = TF_ALLOCATOR_ATTRIBUTES_STRUCT_SIZE;
#if GOOGLE_CUDA
alloc_attrs.on_host = 0;
#else
alloc_attrs.on_host = 1;
#endif
TF_Tensor* output = TF_AllocateTemp(
/*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*allocator_attributes*/ &alloc_attrs, s);
size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT);
EXPECT_EQ(TF_OK, TF_GetCode(s));
validate_tensor(output, &dim, 1, TF_FLOAT);
// Set TF_Tensor value to 3
float values[1] = {3.0f};
set_tensor_data<float>(output, values, tensor_size_bytes, ctx);
TF_SetOutput(ctx, 0, output, s);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [1] values: 3>",
output->DebugString(100));
}
REGISTER_OP("AllocateTempOp0").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
TF_Status* s = TF_NewStatus();
// Allocate empty TF_Tensor
int64_t dim = 0;
TF_AllocatorAttributes alloc_attrs;
alloc_attrs.struct_size = TF_ALLOCATOR_ATTRIBUTES_STRUCT_SIZE;
#if GOOGLE_CUDA
alloc_attrs.on_host = 0;
#else
alloc_attrs.on_host = 1;
#endif
TF_Tensor* output = TF_AllocateTemp(
/*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
/*num_dims=*/1, /*allocator_attributes*/ &alloc_attrs, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
validate_tensor(output, &dim, 1, TF_FLOAT);
TF_SetOutput(ctx, 0, output, s);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [0] values: >",
output->DebugString(100));
}
REGISTER_OP("AllocateTempOp2x3").Output("output1: float");
TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) {
auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
TF_Status* s = TF_NewStatus();
size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
// Allocate 2x3 TF_Tensor
int64_t dim[2] = {2, 3};
TF_AllocatorAttributes alloc_attrs;
alloc_attrs.struct_size = TF_ALLOCATOR_ATTRIBUTES_STRUCT_SIZE;
#if GOOGLE_CUDA
alloc_attrs.on_host = 0;
#else
alloc_attrs.on_host = 1;
#endif
TF_Tensor* output = TF_AllocateTemp(
/*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim,
/*num_dims=*/2, /*allocator_attributes*/ &alloc_attrs, s);
EXPECT_EQ(TF_OK, TF_GetCode(s));
validate_tensor(output, dim, 2, TF_FLOAT);
// Set TF_Tensor values to [1 2 3 4 5 6]
float values[6] = {1, 2, 3, 4, 5, 6};
set_tensor_data<float>(output, values, tensor_size_bytes, ctx);
TF_SetOutput(ctx, 0, output, s);
TF_DeleteStatus(s);
TF_DeleteTensor(output);
};
SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func);
TF_ASSERT_OK(RunOpKernel());
Tensor* output = GetOutput(0);
EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
output->DebugString(100));
}
void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims,
TF_DataType dtype) {
EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor));
EXPECT_EQ(num_dims, TF_NumDims(tensor));
for (int i = 0; i < num_dims; ++i) {
EXPECT_EQ(dims[i], TF_Dim(tensor, i));
}
}
template <typename T>
void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes,
TF_OpKernelContext* ctx) {
T* data = reinterpret_cast<T*>(TF_TensorData(tensor));
#if GOOGLE_CUDA
OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values,
tensor_size_bytes);
#else
memcpy(data, values, tensor_size_bytes);
#endif
}
} // namespace tensorflow

View File

@ -19,6 +19,7 @@ limitations under the License.
#include <stdbool.h>
#include <stdint.h>
#include "tensorflow/c/c_api_macros.h"
#include "tensorflow/c/tf_datatype.h"
#include "tensorflow/c/tf_status.h"
@ -45,6 +46,16 @@ limitations under the License.
extern "C" {
#endif
// Allocator Attributes used for tensor allocation.
typedef struct TF_AllocatorAttributes {
size_t struct_size;
// Set boolean to 1 for CPU allocation, else 0.
TF_Bool on_host;
} TF_AllocatorAttributes;
#define TF_ALLOCATOR_ATTRIBUTES_STRUCT_SIZE \
TF_OFFSET_OF_END(TF_AllocatorAttributes, on_host)
// --------------------------------------------------------------------------
// TF_Tensor holds a multi-dimensional array of elements of a single data type.
// For all types other than TF_STRING, the data buffer stores elements