From 907a55ebadce3c78bef73148613b327ed84d92e4 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Thu, 2 Apr 2020 09:54:50 -0700
Subject: [PATCH] Remove implicit mirroring toggle

Implicit mirroring is set to true by default already and is essential
for eager performance. This CL just removes dead code since there is no
API to disable mirroring for tensors.

We also shouldn't have this in the TensorHandleInterface class since
mirroring is a runtime-specific implementation detail.

PiperOrigin-RevId: 304421014
Change-Id: I383fa24da08a86028cabb3a4b1c5f2612d57336d
---
 tensorflow/c/eager/c_api_experimental.cc            |  6 ------
 tensorflow/c/eager/c_api_experimental.h             |  6 ------
 tensorflow/c/eager/c_api_remote_test.cc             |  2 --
 tensorflow/c/eager/c_api_test.cc                    |  1 -
 tensorflow/c/eager/tensor_handle_interface.h        |  8 --------
 tensorflow/core/common_runtime/eager/core.cc        | 13 ++++++-------
 tensorflow/core/common_runtime/eager/execute.cc     |  8 ++++----
 .../core/common_runtime/eager/tensor_handle.cc      |  6 ------
 .../core/common_runtime/eager/tensor_handle.h       |  4 ----
 tensorflow/python/eager/pywrap_tensor.cc            |  9 ---------
 10 files changed, 10 insertions(+), 53 deletions(-)

diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc
index 72ddf166cbd..6e4ac19c3ce 100644
--- a/tensorflow/c/eager/c_api_experimental.cc
+++ b/tensorflow/c/eager/c_api_experimental.cc
@@ -580,12 +580,6 @@ void TFE_HostAddressSpace(TFE_Context* ctx, TF_Buffer* buf) {
   };
 }
 
-void TFE_TensorHandleEnableImplicitMirroring(TFE_TensorHandle* h,
-                                             TF_Status* status) {
-  h->handle->EnableImplicitMirroring();
-  status->status = tensorflow::Status::OK();
-}
-
 void TFE_ContextGetFunctionDef(TFE_Context* ctx, const char* function_name,
                                TF_Buffer* buf, TF_Status* status) {
   tensorflow::EagerContext* context =
diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h
index 5f9190af79a..45d15960a9f 100644
--- a/tensorflow/c/eager/c_api_experimental.h
+++ b/tensorflow/c/eager/c_api_experimental.h
@@ -392,12 +392,6 @@ TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx,
 TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context* ctx,
                                                 TF_Status* status);
 
-// If the TensorHandle is copied to another device as part of an op execution,
-// the copy is destroyed after the op has executed. Enabling implicit mirroring
-// causes the copy to be held as a mirror for the lifetime of the TensorHandle.
-TF_CAPI_EXPORT extern void TFE_TensorHandleEnableImplicitMirroring(
-    TFE_TensorHandle*, TF_Status*);
-
 // This function will block till the operation that produces `h` has
 // completed. This is only valid on local TFE_TensorHandles. The pointer
 // returned will be on the device in which the TFE_TensorHandle resides (so e.g.
diff --git a/tensorflow/c/eager/c_api_remote_test.cc b/tensorflow/c/eager/c_api_remote_test.cc
index a0c4830e5ef..a084795eef6 100644
--- a/tensorflow/c/eager/c_api_remote_test.cc
+++ b/tensorflow/c/eager/c_api_remote_test.cc
@@ -168,8 +168,6 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
   auto* h1_task2 =
       TFE_TensorHandleCopyToDevice(h1_task0, ctx, task2_name, status);
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-  TFE_TensorHandleEnableImplicitMirroring(h1_task2, status);
-  EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
   // Handles are on task0 (local), and task2, but op is on task1.
   TFE_Op* matmul = MatMulOp(ctx, h0_task0, h1_task2);
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index f939a4a3035..6c4877b2ea2 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -594,7 +594,6 @@ void ExecuteAdd(bool async, bool forward_input) {
     TFE_TensorHandle* n_gpu =
         TFE_TensorHandleCopyToDevice(n, ctx, gpu_device_name.c_str(), status);
     EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-    TFE_TensorHandleEnableImplicitMirroring(n_gpu, status);
     TFE_DeleteTensorHandle(n);
     n = n_gpu;
   }
diff --git a/tensorflow/c/eager/tensor_handle_interface.h b/tensorflow/c/eager/tensor_handle_interface.h
index 2b604f660b1..1ca40daec41 100644
--- a/tensorflow/c/eager/tensor_handle_interface.h
+++ b/tensorflow/c/eager/tensor_handle_interface.h
@@ -59,14 +59,6 @@ class AbstractTensorHandleInterface {
   // Return a copy of the handle.
   virtual AbstractTensorHandleInterface* Copy() = 0;
 
-  // Maintain mirror tensors for any implicit copies to local devices. This
-  // setting is offered on a per tensor handle basis to avoid potential memory
-  // over utilization due to holding on to mirrors as well as the original
-  // tensor. Note this setting overrides the context mirroring policy whereby if
-  // the mirroring policy is MIRRORING_NONE, we will still continue to mirror
-  // this tensor.
-  virtual void EnableImplicitMirroring() = 0;
-
  protected:
   virtual ~AbstractTensorHandleInterface() {}
 };
diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc
index 40af562b86f..20500ac210e 100644
--- a/tensorflow/core/common_runtime/eager/core.cc
+++ b/tensorflow/core/common_runtime/eager/core.cc
@@ -83,13 +83,12 @@ AbstractTensorInterface* TensorHandle::Resolve(Status* status) {
     } else {
       *status = CopyToDevice(*ctx_, ctx_->HostCPU(), &tensor);
       if (!status->ok()) return nullptr;
-      if (ImplicitMirroring()) {
-        *status = AddEmptyLocalMirror(nullptr);
-        if (!status->ok()) return nullptr;
-        tensorflow::Tensor mirror = tensor;
-        *status = SetTensor(std::move(mirror), nullptr);
-        if (!status->ok()) return nullptr;
-      }
+
+      *status = AddEmptyLocalMirror(nullptr);
+      if (!status->ok()) return nullptr;
+      tensorflow::Tensor mirror = tensor;
+      *status = SetTensor(std::move(mirror), nullptr);
+      if (!status->ok()) return nullptr;
     }
     // TODO(b/153052876): Change TF_TensorFromTensor to just return an
     // AbstractTensorInterface
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index b304fa77883..f1c90119bda 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -157,9 +157,9 @@ Status CopyInputToExpectedDevice(EagerContext* ctx, EagerOperation* op,
                             " to ", expected_input_device->name());
       },
       profiler::TraceMeLevel::kInfo);
-  Status status = EagerCopyToDevice(
-      handle, ctx, &op->Executor(), expected_input_device,
-      handle->ImplicitMirroring() || ctx->MirrorTensors(), &result_handle);
+  Status status =
+      EagerCopyToDevice(handle, ctx, &op->Executor(), expected_input_device,
+                        /* mirror= */ true, &result_handle);
   activity.Stop();
   if (!status.ok()) {
     return errors::Internal("Failed copying input tensor from ",
@@ -416,7 +416,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
         TensorHandle* handle = nullptr;
         TF_RETURN_IF_ERROR(EagerCopyToDevice(
             input, &ctx, &executor, device == nullptr ? ctx.HostCPU() : device,
-            input->ImplicitMirroring() || ctx.MirrorTensors(), &handle));
+            /* mirror= */ true, &handle));
         op->UpdateInput(i, handle);
         // Unref handle since it has a ref as an input now
         handle->Unref();
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc
index 385828a0426..21c873cb6f5 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.cc
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc
@@ -137,7 +137,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
       op_device_(op_device),
       resource_device_(resource_device),
       ctx_(ctx),
-      implicit_mirroring_(true),
       data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
   DVLOG(3) << "Creating Local TensorHandle: " << this
            << " device: " << VariantDeviceDebugString(device_)
@@ -152,7 +151,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
       resource_device_(
           GetResourceDevice(t.flat<class ResourceHandle>()(0), ctx)),
       ctx_(ctx),
-      implicit_mirroring_(true),
       resource_handle_info_(
           {t.flat<class ResourceHandle>()(0).dtypes_and_shapes(),
            t.flat<class ResourceHandle>()(0).allowed_devices()}),
@@ -169,7 +167,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, CustomDevice* d,
       op_device_(nullptr),
       resource_device_(nullptr),
       ctx_(ctx),
-      implicit_mirroring_(true),
       data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
   // TODO(allenl): Figure out a better op_device story for custom devices,
   // since always setting it to CPU=nullptr doesn't make much sense.
@@ -193,7 +190,6 @@ TensorHandle::TensorHandle(Device* d, Device* op_device,
       op_device_(op_device),
       resource_device_(resource_device),
       ctx_(ctx),
-      implicit_mirroring_(true),
       data_(absl::in_place_type<LocalTensorHandleData>) {
   DVLOG(3) << "Creating empty Local TensorHandle: " << this
            << " device: " << VariantDeviceDebugString(device_);
@@ -215,7 +211,6 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
       op_device_(d),
       resource_device_(dtype == DT_RESOURCE ? d : nullptr),
       ctx_(ctx),
-      implicit_mirroring_(true),
       data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
             remote_task, ctx) {
   DVLOG(3) << "Creating Unshaped Remote TensorHandle: " << this
@@ -238,7 +233,6 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
       op_device_(d),
       resource_device_(dtype == DT_RESOURCE ? d : nullptr),
       ctx_(ctx),
-      implicit_mirroring_(true),
       data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
             ctx->GetContextViewId()) {
   DVLOG(3) << "Creating Lazy Remote TensorHandle: " << this
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h
index 2eb72d65022..9309b4fcccd 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.h
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.h
@@ -115,8 +115,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
 
   AbstractTensorHandleInterface* Copy() override;
 
-  void EnableImplicitMirroring() override { implicit_mirroring_ = true; }
-
   // Return the Tensor from the default device.
   Status Tensor(const tensorflow::Tensor** t) const;
   // Return the Tensor from the specified device which could be either the
@@ -207,7 +205,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
   const tensorflow::DataType dtype;
 
   bool IsRemote() const;
-  bool ImplicitMirroring() const { return implicit_mirroring_; }
 
   string DebugString() const;
 
@@ -276,7 +273,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
   // Does not need synchronization because it can be accessed only after
   // WaitReady() has returned. At that point, is_poisoned_ is immutable.
   Status is_poisoned_;
-  bool implicit_mirroring_;
 
   // If this TensorHandle 1) is a local tensor, and 2) is a resource handle or
   // refers to a remote resource handle, we store data types, shapes and allowed
diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc
index f8e1fb568ac..7dd7eb53fb1 100644
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@@ -292,15 +292,6 @@ TFE_TensorHandle* ConvertToEagerTensorUncached(TFE_Context* ctx,
     }
   }
 
-  // We always enable implicit mirroring for constants. Without this, code
-  // written previously under the assumption that
-  //
-  //   with tf.device('GPU:0'): x = tf.constant(1.0)
-  //
-  // will be placed in the GPU will suffer a non-trivial performance regression
-  // (measured at ~20% for certain benchmarks).
-  handle->handle->EnableImplicitMirroring();
-
   return handle.release();
 }