From f40cf089f0ecd0316c0150918b9bbfc930fba955 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Fri, 26 Jun 2020 22:05:12 +0000
Subject: [PATCH 01/10] completed allocate_temp

---
 tensorflow/c/kernels.cc      |  24 +++++
 tensorflow/c/kernels.h       |   9 ++
 tensorflow/c/kernels_test.cc | 166 +++++++++++++++++++++++++++--------
 3 files changed, 163 insertions(+), 36 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index a0ed0d9f245..80b5234b52d 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -26,6 +26,9 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/platform/types.h"
 
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
 // This file forms the basis of a stable ABI for third-party kernel
 // implementations. It is crucial that changes to this file are made cautiously
 // and with a focus on maintaining both source and binary compatibility.
@@ -260,3 +263,24 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
   }
   return result;
 }
+
+TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
+                     int64_t* dims, int num_dims, TF_Status* status){
+  auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
+  TF_SetStatus(status, TF_OK, ""); 
+  tensorflow::TensorShape shape;
+  for(int i = 0; i < num_dims; ++i){
+    shape.AddDim(dims[i]); 
+  }
+  tensorflow::Status s;
+  tensorflow::Tensor tensor_temp;  
+  TF_Tensor* tf_tensor_temp; 
+  s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor_temp);
+  if (s.ok()){ 
+    tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  }
+  if (s.ok()){ 
+    ::tensorflow::Set_TF_Status_from_Status(status, s); 
+    return tf_tensor_temp; 
+  }  
+}
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index 084717c1d9e..e450511da3a 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -190,6 +190,15 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
                                             int64_t* dims, int num_dims,
                                             size_t len, TF_Status* status);
 
+// Allocates a temporary Tensor of the specified type and shape. The
+// Tensor must not be used after kernel construction is
+// complete. 
+
+// num_dims must equal the size of array dims 
+TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
+                     int64_t* dims, int num_dims, TF_Status* status);
+
+
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc
index 423302741de..738c1e12c80 100644
--- a/tensorflow/c/kernels_test.cc
+++ b/tensorflow/c/kernels_test.cc
@@ -360,6 +360,17 @@ class DeviceKernelOpTest : public OpsTestBase {
 #endif
 };
 
+// Helper function for tests that validates that the tensor has 
+// shape and type corresponding to dims and dtype. 
+void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, 
+                     TF_DataType dtype);
+
+// Helper function for tests that copies data of length 
+// tensor_size_bytes from values to tensor 
+template <typename T> 
+void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, 
+                     TF_OpKernelContext* ctx);
+
 REGISTER_OP("AllocateOutputOp1").Output("output1: float");
 
 TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
@@ -371,22 +382,11 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
         /*num_dims=*/1, /*len=*/tensor_size_bytes, s);
-    EXPECT_EQ(TF_OK, TF_GetCode(s));
-    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
-    EXPECT_EQ(1, TF_NumDims(output));
-    EXPECT_EQ(1, TF_Dim(output, 0));
-
+    validate_tensor(output, &dim, 1, TF_FLOAT); 
+    
     // Set output to 3
-    float* data = reinterpret_cast<float*>(TF_TensorData(output));
-    float value = 3.0f;
-#if GOOGLE_CUDA
-    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
-    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value,
-                                                  tensor_size_bytes);
-#else
-    *data = value;
-#endif
-
+    float values[1] = {3.0f}; 
+    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -409,12 +409,8 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) {
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
         /*num_dims=*/1, /*len=*/0, s);
-
     EXPECT_EQ(TF_OK, TF_GetCode(s));
-    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
-    EXPECT_EQ(1, TF_NumDims(output));
-    EXPECT_EQ(0, TF_Dim(output, 0));
-
+    validate_tensor(output, &dim, 1, TF_FLOAT); 
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -434,27 +430,16 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
     TF_Status* s = TF_NewStatus();
     // Allocate 2x3 output
     int64_t dim[2] = {2, 3};
-    size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
+    size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6; 
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim,
         /*num_dims=*/2, /*len=*/tensor_size_bytes, s);
     EXPECT_EQ(TF_OK, TF_GetCode(s));
-    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
-    EXPECT_EQ(2, TF_NumDims(output));
-    EXPECT_EQ(2, TF_Dim(output, 0));
-    EXPECT_EQ(3, TF_Dim(output, 1));
+    validate_tensor(output, dim, 2, TF_FLOAT); 
 
     // Set output to [1 2 3 4 5 6]
-    void* data = TF_TensorData(output);
-    float value[6] = {1, 2, 3, 4, 5, 6};
-#if GOOGLE_CUDA
-    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
-    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value,
-                                                  tensor_size_bytes);
-#else
-    memcpy(data, value, tensor_size_bytes);
-#endif
-
+    float values[6] = {1, 2, 3, 4, 5, 6};
+    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -466,4 +451,113 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
   EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
             output->DebugString(100));
 }
-}  // namespace tensorflow
+
+REGISTER_OP("AllocateTempOp1").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) {
+  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+    // Allocate output
+    TF_Status* s = TF_NewStatus();
+    int64_t dim = 1;
+    TF_Tensor* output = TF_AllocateTemp(
+        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
+        /*num_dims=*/1, s);
+    size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT);
+    EXPECT_EQ(TF_OK, TF_GetCode(s));
+    validate_tensor(output, &dim, 1, TF_FLOAT); 
+
+    // Set output to 3
+    float values[1] = {3.0f};
+    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
+    TF_SetOutput(ctx, 0, output, s); 
+    TF_DeleteStatus(s);
+    TF_DeleteTensor(output);
+  };
+
+  SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func);
+
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor* output = GetOutput(0);
+  EXPECT_EQ("Tensor<type: float shape: [1] values: 3>",
+            output->DebugString(100));
+}
+
+REGISTER_OP("AllocateTempOp0").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) {
+  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+    TF_Status* s = TF_NewStatus();
+    // Allocate empty output
+    int64_t dim = 0;
+    TF_Tensor* output = TF_AllocateTemp(
+        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
+        /*num_dims=*/1, s);
+    EXPECT_EQ(TF_OK, TF_GetCode(s));
+    validate_tensor(output, &dim, 1, TF_FLOAT);
+    TF_SetOutput(ctx, 0, output, s); 
+    TF_DeleteStatus(s);
+    TF_DeleteTensor(output);
+  };
+
+  SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func);
+
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor* output = GetOutput(0);
+  EXPECT_EQ("Tensor<type: float shape: [0] values: >",
+            output->DebugString(100));
+}
+
+REGISTER_OP("AllocateTempOp2x3").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) {
+  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+    TF_Status* s = TF_NewStatus();
+    size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
+    // Allocate 2x3 output
+    int64_t dim[2] = {2, 3};
+    TF_Tensor* output = TF_AllocateTemp(
+        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim,
+        /*num_dims=*/2, s);
+    EXPECT_EQ(TF_OK, TF_GetCode(s));
+    validate_tensor(output, dim, 2, TF_FLOAT);
+
+    // Set output to [1 2 3 4 5 6]
+    void* data = TF_TensorData(output);
+    float values[6] = {1, 2, 3, 4, 5, 6};
+    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
+    TF_SetOutput(ctx, 0, output, s); 
+    TF_DeleteStatus(s);
+    TF_DeleteTensor(output);
+  };
+
+  SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func);
+
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor* output = GetOutput(0);
+  EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
+            output->DebugString(100));
+} 
+
+void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, 
+                     TF_DataType dtype){
+  EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor));
+  EXPECT_EQ(num_dims, TF_NumDims(tensor));
+  for(int i = 0; i < num_dims; ++i){ 
+    EXPECT_EQ(dims[i], TF_Dim(tensor, i)); 
+  }
+}
+
+template <typename T> 
+void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, 
+                     TF_OpKernelContext* ctx){ 
+    T* data = reinterpret_cast<T*>(TF_TensorData(tensor));
+#if GOOGLE_CUDA
+    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
+    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values,
+                                                  tensor_size_bytes);
+#else
+    memcpy(data, values, tensor_size_bytes);
+#endif
+}
+
+}  // namespace tensorflow
\ No newline at end of file

From 8b07609b2ee977a8a97120dbaad4c3fabc151b0f Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Fri, 26 Jun 2020 22:33:19 +0000
Subject: [PATCH 02/10] added priority function to TF_KernelDefBuilder

---
 tensorflow/c/kernels.cc | 5 +++++
 tensorflow/c/kernels.h  | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 80b5234b52d..905219c6e16 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -100,6 +100,11 @@ void TF_KernelBuilder_HostMemory(TF_KernelBuilder* kernel_builder,
   kernel_builder->cc_builder->HostMemory(arg_name);
 }
 
+void TF_KernelBuilder_Priority(TF_KernelBuilder* kernel_builder, 
+                               int32_t priority_number){ 
+  kernel_builder->cc_builder->Priority(priority_number); 
+}
+
 namespace tensorflow {
 namespace {
 
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index e450511da3a..b245dd8a7fc 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -107,6 +107,10 @@ TF_CAPI_EXPORT extern void TF_KernelBuilder_TypeConstraint(
 TF_CAPI_EXPORT extern void TF_KernelBuilder_HostMemory(
     TF_KernelBuilder* kernel_builder, const char* arg_name);
 
+// Specify a priority number for this kernel.
+TF_CAPI_EXPORT extern void TF_KernelBuilder_Priority(
+    TF_KernelBuilder* kernel_builder, int32_t priority_number); 
+
 // Register the given kernel builder with the TensorFlow runtime. If
 // registration fails, the given status will be populated.
 //

From 70feb59205f75ed0642eb091bd63960f7358039e Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Sun, 28 Jun 2020 20:55:41 +0000
Subject: [PATCH 03/10] fixed indentation and comments for allocate_temp

---
 tensorflow/c/kernels.cc | 11 +++++++----
 tensorflow/c/kernels.h  | 10 +++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 905219c6e16..02703d97bbe 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -281,11 +281,14 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype,
   tensorflow::Tensor tensor_temp;  
   TF_Tensor* tf_tensor_temp; 
   s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor_temp);
-  if (s.ok()){ 
-    tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  if (!s.ok()){ 
+  	::tensorflow::Set_TF_Status_from_Status(status, s); 
+  	return nullptr; 
   }
-  if (s.ok()){ 
+  tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  if (!s.ok()){ 
     ::tensorflow::Set_TF_Status_from_Status(status, s); 
-    return tf_tensor_temp; 
+    return nullptr; 
   }  
+  return tf_tensor_temp; 
 }
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index b245dd8a7fc..8ed3488988d 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -194,13 +194,13 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
                                             int64_t* dims, int num_dims,
                                             size_t len, TF_Status* status);
 
-// Allocates a temporary Tensor of the specified type and shape. The
-// Tensor must not be used after kernel construction is
-// complete. 
+// Allocates a temporary Tensor of the specified type and shape. Devices 
+// such as GPUs that enqueue Ops for lazy execution may retain references 
+// to the temporary tensors after the Op's Compute method has run. 
 
 // num_dims must equal the size of array dims 
-TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
-                     int64_t* dims, int num_dims, TF_Status* status);
+TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, 
+    TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status);
 
 
 #ifdef __cplusplus

From b58635dca87103807acd14b67545122d4d112ee0 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Sun, 28 Jun 2020 20:55:41 +0000
Subject: [PATCH 04/10] took out array_slice.h

---
 tensorflow/c/kernels.cc | 13 +++++++------
 tensorflow/c/kernels.h  | 10 +++++-----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 905219c6e16..3a8170575ad 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -25,9 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/platform/types.h"
-
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
 
 // This file forms the basis of a stable ABI for third-party kernel
 // implementations. It is crucial that changes to this file are made cautiously
@@ -281,11 +279,14 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype,
   tensorflow::Tensor tensor_temp;  
   TF_Tensor* tf_tensor_temp; 
   s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor_temp);
-  if (s.ok()){ 
-    tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  if (!s.ok()){ 
+  	::tensorflow::Set_TF_Status_from_Status(status, s); 
+  	return nullptr; 
   }
-  if (s.ok()){ 
+  tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  if (!s.ok()){ 
     ::tensorflow::Set_TF_Status_from_Status(status, s); 
-    return tf_tensor_temp; 
+    return nullptr; 
   }  
+  return tf_tensor_temp; 
 }
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index b245dd8a7fc..8ed3488988d 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -194,13 +194,13 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
                                             int64_t* dims, int num_dims,
                                             size_t len, TF_Status* status);
 
-// Allocates a temporary Tensor of the specified type and shape. The
-// Tensor must not be used after kernel construction is
-// complete. 
+// Allocates a temporary Tensor of the specified type and shape. Devices 
+// such as GPUs that enqueue Ops for lazy execution may retain references 
+// to the temporary tensors after the Op's Compute method has run. 
 
 // num_dims must equal the size of array dims 
-TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
-                     int64_t* dims, int num_dims, TF_Status* status);
+TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, 
+    TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status);
 
 
 #ifdef __cplusplus

From fb70daadc449ffc50d6d0cdce17375bd72b6ab07 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Wed, 1 Jul 2020 00:15:09 +0000
Subject: [PATCH 05/10] fixed allocate_temp indent errors

---
 tensorflow/c/kernels.cc | 2 +-
 tensorflow/c/kernels.h  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 3a8170575ad..749e6e89b8b 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -268,7 +268,7 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
 }
 
 TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
-                     int64_t* dims, int num_dims, TF_Status* status){
+    int64_t* dims, int num_dims, TF_Status* status){
   auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
   TF_SetStatus(status, TF_OK, ""); 
   tensorflow::TensorShape shape;
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index 8ed3488988d..1891ce31a23 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -200,7 +200,9 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
 
 // num_dims must equal the size of array dims 
 TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, 
-    TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status);
+                                                 TF_DataType dtype, 
+                                                 int64_t* dims, int num_dims, 
+                                                 TF_Status* status);
 
 
 #ifdef __cplusplus

From e52df6b5f34a76f3974132a5b3bf2653bed1e660 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Mon, 6 Jul 2020 23:27:50 +0000
Subject: [PATCH 06/10] fixed naming for variables

---
 tensorflow/c/kernels.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 749e6e89b8b..5f5bd9779b1 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -276,17 +276,17 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype,
     shape.AddDim(dims[i]); 
   }
   tensorflow::Status s;
-  tensorflow::Tensor tensor_temp;  
-  TF_Tensor* tf_tensor_temp; 
-  s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor_temp);
+  tensorflow::Tensor tensor;  
+  TF_Tensor* tf_tensor; 
+  s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor);
   if (!s.ok()){ 
   	::tensorflow::Set_TF_Status_from_Status(status, s); 
   	return nullptr; 
   }
-  tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); 
+  tf_tensor = TF_TensorFromTensor(tensor, &s); 
   if (!s.ok()){ 
     ::tensorflow::Set_TF_Status_from_Status(status, s); 
     return nullptr; 
   }  
-  return tf_tensor_temp; 
+  return tf_tensor; 
 }

From 7b0a2473467e922f7a3b00076d7c3878e7a132ae Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Wed, 15 Jul 2020 20:57:34 +0000
Subject: [PATCH 07/10] added Scalar Shape to TF_ShapeInfererence functions

---
 tensorflow/c/ops.cc | 6 ++++++
 tensorflow/c/ops.h  | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/tensorflow/c/ops.cc b/tensorflow/c/ops.cc
index 118385ed72c..b1a69922e5d 100644
--- a/tensorflow/c/ops.cc
+++ b/tensorflow/c/ops.cc
@@ -104,6 +104,12 @@ TF_ShapeHandle* TF_NewShapeHandle() {
   return reinterpret_cast<TF_ShapeHandle*>(new ShapeHandle);
 }
 
+TF_ShapeHandle* TF_ShapeInferenceContextScalar(TF_ShapeInferenceContext* ctx) {
+  auto* handle = new ShapeHandle; 
+  *handle = reinterpret_cast<InferenceContext*>(ctx)->Scalar(); 
+  return reinterpret_cast<TF_ShapeHandle*>(handle); 
+}
+
 TF_ShapeHandle* TF_ShapeInferenceContextVectorFromSize(
     TF_ShapeInferenceContext* ctx, size_t size) {
   auto* handle = new ShapeHandle;
diff --git a/tensorflow/c/ops.h b/tensorflow/c/ops.h
index 14868e40260..91cad73101f 100644
--- a/tensorflow/c/ops.h
+++ b/tensorflow/c/ops.h
@@ -280,6 +280,11 @@ extern void TF_ShapeInferenceContextSetOutput(TF_ShapeInferenceContext* ctx,
                                               int i, TF_ShapeHandle* handle,
                                               TF_Status* status);
 
+// Returns a newly-allocated scalar shape handle. The returned handle should 
+// be freed with TF_DeleteShapeHandle.
+TF_CAPI_EXPORT extern TF_ShapeHandle* TF_ShapeInferenceContextScalar(
+    TF_ShapeInferenceContext* ctx); 
+    
 // Returns a newly-allocate shape handle representing a vector of the given
 // size. The returned handle should be freed with TF_DeleteShapeHandle.
 TF_CAPI_EXPORT extern TF_ShapeHandle* TF_ShapeInferenceContextVectorFromSize(

From 23ebf04dbf7d11f7989bd85085844a9ffecd3c2f Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Wed, 15 Jul 2020 21:26:21 +0000
Subject: [PATCH 08/10] updated paths to upstream master for cleaner merge

---
 tensorflow/c/kernels.cc      |  25 ------
 tensorflow/c/kernels.h       |  11 ---
 tensorflow/c/kernels_test.cc | 166 ++++++++---------------------------
 3 files changed, 36 insertions(+), 166 deletions(-)

diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 7aef824cc69..8fa50711a8d 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/framework/tensor_shape.h"
 
 // This file forms the basis of a stable ABI for third-party kernel
 // implementations. It is crucial that changes to this file are made cautiously
@@ -273,27 +272,3 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
   }
   return tf_tensor;
 }
-
-TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, 
-    int64_t* dims, int num_dims, TF_Status* status){
-  auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
-  TF_SetStatus(status, TF_OK, ""); 
-  tensorflow::TensorShape shape;
-  for(int i = 0; i < num_dims; ++i){
-    shape.AddDim(dims[i]); 
-  }
-  tensorflow::Status s;
-  tensorflow::Tensor tensor;  
-  TF_Tensor* tf_tensor; 
-  s = cc_ctx->allocate_temp(static_cast<tensorflow::DataType>(dtype), shape, &tensor);
-  if (!s.ok()){ 
-  	::tensorflow::Set_TF_Status_from_Status(status, s); 
-  	return nullptr; 
-  }
-  tf_tensor = TF_TensorFromTensor(tensor, &s); 
-  if (!s.ok()){ 
-    ::tensorflow::Set_TF_Status_from_Status(status, s); 
-    return nullptr; 
-  }  
-  return tf_tensor; 
-}
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index e1397402dae..1428f7ab928 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -194,17 +194,6 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
                                             int64_t* dims, int num_dims,
                                             size_t len, TF_Status* status);
 
-// Allocates a temporary Tensor of the specified type and shape. Devices 
-// such as GPUs that enqueue Ops for lazy execution may retain references 
-// to the temporary tensors after the Op's Compute method has run. 
-
-// num_dims must equal the size of array dims 
-TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, 
-                                                 TF_DataType dtype, 
-                                                 int64_t* dims, int num_dims, 
-                                                 TF_Status* status);
-
-
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc
index 738c1e12c80..423302741de 100644
--- a/tensorflow/c/kernels_test.cc
+++ b/tensorflow/c/kernels_test.cc
@@ -360,17 +360,6 @@ class DeviceKernelOpTest : public OpsTestBase {
 #endif
 };
 
-// Helper function for tests that validates that the tensor has 
-// shape and type corresponding to dims and dtype. 
-void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, 
-                     TF_DataType dtype);
-
-// Helper function for tests that copies data of length 
-// tensor_size_bytes from values to tensor 
-template <typename T> 
-void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, 
-                     TF_OpKernelContext* ctx);
-
 REGISTER_OP("AllocateOutputOp1").Output("output1: float");
 
 TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
@@ -382,11 +371,22 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
         /*num_dims=*/1, /*len=*/tensor_size_bytes, s);
-    validate_tensor(output, &dim, 1, TF_FLOAT); 
-    
+    EXPECT_EQ(TF_OK, TF_GetCode(s));
+    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+    EXPECT_EQ(1, TF_NumDims(output));
+    EXPECT_EQ(1, TF_Dim(output, 0));
+
     // Set output to 3
-    float values[1] = {3.0f}; 
-    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
+    float* data = reinterpret_cast<float*>(TF_TensorData(output));
+    float value = 3.0f;
+#if GOOGLE_CUDA
+    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
+    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value,
+                                                  tensor_size_bytes);
+#else
+    *data = value;
+#endif
+
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -409,8 +409,12 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) {
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
         /*num_dims=*/1, /*len=*/0, s);
+
     EXPECT_EQ(TF_OK, TF_GetCode(s));
-    validate_tensor(output, &dim, 1, TF_FLOAT); 
+    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+    EXPECT_EQ(1, TF_NumDims(output));
+    EXPECT_EQ(0, TF_Dim(output, 0));
+
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -430,16 +434,27 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
     TF_Status* s = TF_NewStatus();
     // Allocate 2x3 output
     int64_t dim[2] = {2, 3};
-    size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6; 
+    size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
     TF_Tensor* output = TF_AllocateOutput(
         /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim,
         /*num_dims=*/2, /*len=*/tensor_size_bytes, s);
     EXPECT_EQ(TF_OK, TF_GetCode(s));
-    validate_tensor(output, dim, 2, TF_FLOAT); 
+    EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+    EXPECT_EQ(2, TF_NumDims(output));
+    EXPECT_EQ(2, TF_Dim(output, 0));
+    EXPECT_EQ(3, TF_Dim(output, 1));
 
     // Set output to [1 2 3 4 5 6]
-    float values[6] = {1, 2, 3, 4, 5, 6};
-    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
+    void* data = TF_TensorData(output);
+    float value[6] = {1, 2, 3, 4, 5, 6};
+#if GOOGLE_CUDA
+    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
+    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value,
+                                                  tensor_size_bytes);
+#else
+    memcpy(data, value, tensor_size_bytes);
+#endif
+
     TF_DeleteStatus(s);
     TF_DeleteTensor(output);
   };
@@ -451,113 +466,4 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
   EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
             output->DebugString(100));
 }
-
-REGISTER_OP("AllocateTempOp1").Output("output1: float");
-
-TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) {
-  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
-    // Allocate output
-    TF_Status* s = TF_NewStatus();
-    int64_t dim = 1;
-    TF_Tensor* output = TF_AllocateTemp(
-        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
-        /*num_dims=*/1, s);
-    size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT);
-    EXPECT_EQ(TF_OK, TF_GetCode(s));
-    validate_tensor(output, &dim, 1, TF_FLOAT); 
-
-    // Set output to 3
-    float values[1] = {3.0f};
-    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
-    TF_SetOutput(ctx, 0, output, s); 
-    TF_DeleteStatus(s);
-    TF_DeleteTensor(output);
-  };
-
-  SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func);
-
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor* output = GetOutput(0);
-  EXPECT_EQ("Tensor<type: float shape: [1] values: 3>",
-            output->DebugString(100));
-}
-
-REGISTER_OP("AllocateTempOp0").Output("output1: float");
-
-TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) {
-  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
-    TF_Status* s = TF_NewStatus();
-    // Allocate empty output
-    int64_t dim = 0;
-    TF_Tensor* output = TF_AllocateTemp(
-        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
-        /*num_dims=*/1, s);
-    EXPECT_EQ(TF_OK, TF_GetCode(s));
-    validate_tensor(output, &dim, 1, TF_FLOAT);
-    TF_SetOutput(ctx, 0, output, s); 
-    TF_DeleteStatus(s);
-    TF_DeleteTensor(output);
-  };
-
-  SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func);
-
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor* output = GetOutput(0);
-  EXPECT_EQ("Tensor<type: float shape: [0] values: >",
-            output->DebugString(100));
-}
-
-REGISTER_OP("AllocateTempOp2x3").Output("output1: float");
-
-TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) {
-  auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
-    TF_Status* s = TF_NewStatus();
-    size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
-    // Allocate 2x3 output
-    int64_t dim[2] = {2, 3};
-    TF_Tensor* output = TF_AllocateTemp(
-        /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim,
-        /*num_dims=*/2, s);
-    EXPECT_EQ(TF_OK, TF_GetCode(s));
-    validate_tensor(output, dim, 2, TF_FLOAT);
-
-    // Set output to [1 2 3 4 5 6]
-    void* data = TF_TensorData(output);
-    float values[6] = {1, 2, 3, 4, 5, 6};
-    set_tensor_data<float>(output, values, tensor_size_bytes, ctx); 
-    TF_SetOutput(ctx, 0, output, s); 
-    TF_DeleteStatus(s);
-    TF_DeleteTensor(output);
-  };
-
-  SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func);
-
-  TF_ASSERT_OK(RunOpKernel());
-  Tensor* output = GetOutput(0);
-  EXPECT_EQ("Tensor<type: float shape: [2,3] values: [1 2 3][4 5 6]>",
-            output->DebugString(100));
-} 
-
-void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, 
-                     TF_DataType dtype){
-  EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor));
-  EXPECT_EQ(num_dims, TF_NumDims(tensor));
-  for(int i = 0; i < num_dims; ++i){ 
-    EXPECT_EQ(dims[i], TF_Dim(tensor, i)); 
-  }
-}
-
-template <typename T> 
-void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, 
-                     TF_OpKernelContext* ctx){ 
-    T* data = reinterpret_cast<T*>(TF_TensorData(tensor));
-#if GOOGLE_CUDA
-    OpKernelContext* cc_ctx = reinterpret_cast<OpKernelContext*>(ctx);
-    cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values,
-                                                  tensor_size_bytes);
-#else
-    memcpy(data, values, tensor_size_bytes);
-#endif
-}
-
-}  // namespace tensorflow
\ No newline at end of file
+}  // namespace tensorflow

From 843987549843c7113efccf998c65fb004006a116 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Wed, 15 Jul 2020 23:40:50 +0000
Subject: [PATCH 09/10] added test for TF_ShapeInferenceContextScalar

---
 tensorflow/c/ops_test.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tensorflow/c/ops_test.cc b/tensorflow/c/ops_test.cc
index 482413f966c..63105b1ad00 100644
--- a/tensorflow/c/ops_test.cc
+++ b/tensorflow/c/ops_test.cc
@@ -316,5 +316,15 @@ TEST(OpsTest, ShapeInferenceSubshape) {
   TF_DeleteShapeHandle(handle);
 }
 
+TEST(OpsTest, ShapeInferenceScalarShape) { 
+  NodeDef def; 
+  shape_inference::InferenceContext c(0, def, MakeOpDef(0, 0), 
+      {S({})}, {}, {}, {});
+  TF_ShapeHandle* TF_scalar_shape = TF_ShapeInferenceContextScalar(C_CTX(&c)); 
+  shape_inference::ShapeHandle* scalar_shape = 
+      reinterpret_cast<shape_inference::ShapeHandle*>(TF_scalar_shape); 
+  ASSERT_EQ("[]", c.DebugString(*scalar_shape)); 
+}
+
 }  // namespace
 }  // namespace tensorflow

From 3f324403a59e312ee9a428bdd1b6c4baa529c127 Mon Sep 17 00:00:00 2001
From: Daniel Nguyen <nguyendaniel@google.com>
Date: Wed, 22 Jul 2020 17:49:59 +0000
Subject: [PATCH 10/10] fixed mem leak in ops_test

---
 tensorflow/c/ops_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/c/ops_test.cc b/tensorflow/c/ops_test.cc
index 63105b1ad00..5d6ec8df1e3 100644
--- a/tensorflow/c/ops_test.cc
+++ b/tensorflow/c/ops_test.cc
@@ -323,7 +323,8 @@ TEST(OpsTest, ShapeInferenceScalarShape) {
   TF_ShapeHandle* TF_scalar_shape = TF_ShapeInferenceContextScalar(C_CTX(&c)); 
   shape_inference::ShapeHandle* scalar_shape = 
       reinterpret_cast<shape_inference::ShapeHandle*>(TF_scalar_shape); 
-  ASSERT_EQ("[]", c.DebugString(*scalar_shape)); 
+  ASSERT_EQ("[]", c.DebugString(*scalar_shape));
+  TF_DeleteShapeHandle(TF_scalar_shape); 
 }
 
 }  // namespace