From c5ef52c5f0c698b76133eae0aa93d83fa7ab9f79 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen <nguyendaniel@google.com> Date: Fri, 31 Jul 2020 23:43:44 +0000 Subject: [PATCH 1/3] added draft of function --- tensorflow/c/kernels.cc | 26 +++++++++++++++++++++++++- tensorflow/c/kernels.h | 5 +++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 20a6c5117cf..0fa1c83cac2 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -279,4 +279,28 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, return nullptr; } return tf_tensor; -} \ No newline at end of file +} + +void TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, + int* candidate_input_indices, int num_input_indices, int output_index, + int64_t* output_dims, int output_num_dims, TF_Tensor** output, + int* forwarded_input, TF_Status* status) { + TF_SetStatus(status, TF_OK, ""); + auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); + tensorflow::gtl::ArraySlice<int> input_indices_array(candidate_input_indices, + num_input_indices); + tensorflow::gtl::ArraySlice<tensorflow::int64> output_dimarray( + reinterpret_cast<tensorflow::int64*>(output_dims), output_num_dims); + tensorflow::Tensor output_tensor; + tensorflow::Status s = TF_TensorToTensor(*output, &output_tensor); + if (!s.ok()) { + ::tensorflow::Set_TF_Status_from_Status(status, s); + return; + } + tensorflow::Tensor* output_tensor_pointer = &output_tensor; + tensorflow::Status forward_input_status = cc_ctx-> + forward_input_or_allocate_output(input_indices_array, output_index, + tensorflow::TensorShape(output_dimarray), &output_tensor_pointer, + forwarded_input); + ::tensorflow::Set_TF_Status_from_Status(status, s); +} diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index c7138a39c73..22424ddc096 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -199,6 +199,11 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); +TF_CAPI_EXPORT void TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, + int* candidate_input_indices, int num_input_indices, int output_index, + int64_t* output_dims, int output_num_dims, TF_Tensor** output, + int* forwarded_input, TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif From 0a79e7111037c4bb793964708acc27f4e7cc12ee Mon Sep 17 00:00:00 2001 From: Daniel Nguyen <nguyendaniel@google.com> Date: Mon, 10 Aug 2020 16:53:45 +0000 Subject: [PATCH 2/3] finished implementation and passes tests --- tensorflow/c/kernels.cc | 26 ++++++++------- tensorflow/c/kernels.h | 15 ++++++--- tensorflow/c/kernels_test.cc | 64 ++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 15 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 0fa1c83cac2..86d88943f9a 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -281,26 +281,30 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, return tf_tensor; } -void TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, +TF_Tensor* TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, int* candidate_input_indices, int num_input_indices, int output_index, - int64_t* output_dims, int output_num_dims, TF_Tensor** output, - int* forwarded_input, TF_Status* status) { + int64_t* output_dims, int output_num_dims, int* forwarded_input, + TF_Status* status) { TF_SetStatus(status, TF_OK, ""); auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); tensorflow::gtl::ArraySlice<int> input_indices_array(candidate_input_indices, num_input_indices); tensorflow::gtl::ArraySlice<tensorflow::int64> output_dimarray( reinterpret_cast<tensorflow::int64*>(output_dims), output_num_dims); - tensorflow::Tensor output_tensor; - tensorflow::Status s = TF_TensorToTensor(*output, &output_tensor); - if (!s.ok()) { - ::tensorflow::Set_TF_Status_from_Status(status, s); - return; - } - tensorflow::Tensor* output_tensor_pointer = &output_tensor; - tensorflow::Status forward_input_status = cc_ctx-> + tensorflow::Tensor* output_tensor_pointer; + tensorflow::Status s = cc_ctx-> forward_input_or_allocate_output(input_indices_array, output_index, tensorflow::TensorShape(output_dimarray), &output_tensor_pointer, forwarded_input); + if (!s.ok()) { ::tensorflow::Set_TF_Status_from_Status(status, s); + return nullptr; + } + TF_Tensor* tf_tensor_output = TF_TensorFromTensor( + *output_tensor_pointer, &s); + if (!s.ok()) { + ::tensorflow::Set_TF_Status_from_Status(status, s); + return nullptr; + } + return tf_tensor_output; } diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 22424ddc096..f9aae309df8 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -199,10 +199,17 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -TF_CAPI_EXPORT void TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, - int* candidate_input_indices, int num_input_indices, int output_index, - int64_t* output_dims, int output_num_dims, TF_Tensor** output, - int* forwarded_input, TF_Status* status); +// Tries to forward one of the inputs given in input_indices to +// output[output_index]. If none of the given inputs can be forwarded, calls +// allocate_output() to allocate a new output buffer. The index of the +// forwarded input will be assign to output argument forwarded_input (if it's +// not nullptr). If no inputs are forwarded, forwarded_input will be assigned +// -1. + +TF_CAPI_EXPORT TF_Tensor* TF_ForwardInputOrAllocateOutput( + TF_OpKernelContext* context, int* candidate_input_indices, + int num_input_indices, int output_index, int64_t* output_dims, + int output_num_dims, int* forwarded_input, TF_Status* status); #ifdef __cplusplus } /* end extern "C" */ diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 3c8ac934428..1ff461e0f03 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -474,4 +474,68 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>", output->DebugString(100)); } + +TEST_F(DeviceKernelOpTest, TestForwardInputOrAllocateOutput) { + const char* node_name = "TestForwardInputOrAllocateOutputKernel"; + const char* op_name = "BazOp"; + const char* device_name = "FakeDeviceName"; + + REGISTER_OP(op_name) + .Input("input1: float") + .Input("input2: float") + .Output("output1: float") + .Attr("SomeDataTypeAttr: type");; + + // A kernel whose Compute function that forwards one input to output + auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { + TF_Status* s = TF_NewStatus(); + int candidate_input_indices[1] = {0}; + int forwarded_input; + int64_t output_dims[1] = {}; + TF_Tensor* output = TF_ForwardInputOrAllocateOutput(ctx, + candidate_input_indices, 1, 0, output_dims, 0, &forwarded_input, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); + EXPECT_EQ(forwarded_input, 0); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(0, TF_NumDims(output)); + TF_DeleteStatus(s); + }; + + TF_KernelBuilder* builder = TF_NewKernelBuilder(op_name, device_name, nullptr, + my_compute_func, nullptr); + + { + TF_Status* status = TF_NewStatus(); + TF_RegisterKernelBuilder(node_name, builder, status); + EXPECT_EQ(TF_OK, TF_GetCode(status)); + TF_DeleteStatus(status); + } + + { + OpKernelContext::Params p; + DummyDevice dummy_device(nullptr); + p.device = &dummy_device; + AllocatorAttributes alloc_attrs; + p.output_attr_array = &alloc_attrs; + + Tensor t(static_cast<float>(123)); + + gtl::InlinedVector<TensorValue, 4> inputs; + // GetFakeKernel requires a NodeDef with two inputs + inputs.emplace_back(&t); + inputs.emplace_back(); + p.inputs = &inputs; + + Status status; + std::unique_ptr<OpKernel> kernel = + GetFakeKernel(device_name, op_name, node_name, &status); + TF_EXPECT_OK(status); + ASSERT_NE(nullptr, kernel.get()); + + p.op_kernel = kernel.get(); + OpKernelContext ctx(&p); + kernel->Compute(&ctx); + ASSERT_EQ(123, ctx.mutable_output(0)->scalar<float>()()); + } +} } // namespace tensorflow From aa88605eae286960f52d1dc3fdee06238221d6d2 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen <nguyendaniel@google.com> Date: Tue, 11 Aug 2020 18:18:25 +0000 Subject: [PATCH 3/3] clean up only --- tensorflow/c/kernels.cc | 13 ++++++++----- tensorflow/c/kernels.h | 3 +-- tensorflow/c/kernels_test.cc | 10 ++++++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 86d88943f9a..a3d4e6a90f6 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -282,13 +282,16 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } TF_Tensor* TF_ForwardInputOrAllocateOutput(TF_OpKernelContext* context, - int* candidate_input_indices, int num_input_indices, int output_index, - int64_t* output_dims, int output_num_dims, int* forwarded_input, - TF_Status* status) { + int* candidate_input_indices, int num_candidate_input_indices, + int output_index, int64_t* output_dims, int output_num_dims, + int* forwarded_input, TF_Status* status) { TF_SetStatus(status, TF_OK, ""); auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); - tensorflow::gtl::ArraySlice<int> input_indices_array(candidate_input_indices, - num_input_indices); + + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + tensorflow::gtl::ArraySlice<int> input_indices_array(candidate_input_indices, + num_candidate_input_indices); tensorflow::gtl::ArraySlice<tensorflow::int64> output_dimarray( reinterpret_cast<tensorflow::int64*>(output_dims), output_num_dims); tensorflow::Tensor* output_tensor_pointer; diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index f9aae309df8..fe388b98dbd 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -205,10 +205,9 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, // forwarded input will be assign to output argument forwarded_input (if it's // not nullptr). If no inputs are forwarded, forwarded_input will be assigned // -1. - TF_CAPI_EXPORT TF_Tensor* TF_ForwardInputOrAllocateOutput( TF_OpKernelContext* context, int* candidate_input_indices, - int num_input_indices, int output_index, int64_t* output_dims, + int num_candidate_input_indices, int output_index, int64_t* output_dims, int output_num_dims, int* forwarded_input, TF_Status* status); #ifdef __cplusplus diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 1ff461e0f03..e48e2bc4bb8 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -486,14 +486,16 @@ TEST_F(DeviceKernelOpTest, TestForwardInputOrAllocateOutput) { .Output("output1: float") .Attr("SomeDataTypeAttr: type");; - // A kernel whose Compute function that forwards one input to output + // A kernel whose Compute function that forwards a scalar input to output auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { TF_Status* s = TF_NewStatus(); int candidate_input_indices[1] = {0}; int forwarded_input; int64_t output_dims[1] = {}; - TF_Tensor* output = TF_ForwardInputOrAllocateOutput(ctx, - candidate_input_indices, 1, 0, output_dims, 0, &forwarded_input, s); + TF_Tensor* output = TF_ForwardInputOrAllocateOutput(/*context=*/ctx, + candidate_input_indices, /*num_candidate_input_indices=*/1, + /*output_index=*/0, output_dims, /*output_num_dims=*/0, + &forwarded_input, /*status=*/s); EXPECT_EQ(TF_OK, TF_GetCode(s)); EXPECT_EQ(forwarded_input, 0); EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); @@ -518,7 +520,7 @@ TEST_F(DeviceKernelOpTest, TestForwardInputOrAllocateOutput) { AllocatorAttributes alloc_attrs; p.output_attr_array = &alloc_attrs; - Tensor t(static_cast<float>(123)); + Tensor t(123.0f); gtl::InlinedVector<TensorValue, 4> inputs; // GetFakeKernel requires a NodeDef with two inputs