From 0c9c859a7332c4d700ca131dd77944751d2feedc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 16 Aug 2019 09:47:23 -0700
Subject: [PATCH] Remove MetadataListener and StepStats from EagerContext

PiperOrigin-RevId: 263789105
---
 tensorflow/c/eager/c_api.cc                   |  2 -
 .../core/common_runtime/eager/context.cc      | 39 +-------
 .../core/common_runtime/eager/context.h       |  8 --
 .../core/common_runtime/eager/execute.cc      | 94 ++-----------------
 tensorflow/python/eager/core_test.py          | 13 ---
 tensorflow/python/eager/function_test.py      | 10 --
 6 files changed, 12 insertions(+), 154 deletions(-)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 49d28915b39..b70f40cc46a 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -952,12 +952,10 @@ unsigned char TFE_ContextHasFunction(TFE_Context* ctx, const char* name) {
 
 void TFE_ContextEnableRunMetadata(TFE_Context* ctx) {
   ctx->context->SetShouldStoreGraphs(true);
-  ctx->context->SetShouldStoreStepStats(true);
 }
 
 void TFE_ContextDisableRunMetadata(TFE_Context* ctx) {
   ctx->context->SetShouldStoreGraphs(false);
-  ctx->context->SetShouldStoreStepStats(false);
 }
 
 }  // extern "C"
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index eb1987f2405..8133ef35a45 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -342,28 +342,9 @@ Status EagerContext::FindDeviceByName(const string& name,
 }
 
 void EagerContext::ClearRunMetadata() {
-  if (metadata_listener_ != nullptr) {
-    metadata_listener_->BeforeClearRunMetadata();
-  }
   run_metadata_.Clear();
 }
 
-Status EagerContext::RegisterRunMetadataListener(
-    RunMetadataListener* listener) {
-  mutex_lock l(metadata_mu_);
-  if (metadata_listener_ != nullptr) {
-    return Status(error::Code::INVALID_ARGUMENT,
-                  "Cannot run two eager profiler at the same time");
-  }
-  metadata_listener_ = listener;
-  return Status::OK();
-}
-
-void EagerContext::ClearRunMetadataListener() {
-  mutex_lock l(metadata_mu_);
-  metadata_listener_ = nullptr;
-}
-
 void EagerContext::StartStep() {
   mutex_lock ml(metadata_mu_);
   num_active_steps_++;
@@ -504,28 +485,12 @@ void EagerContext::AddKernelToCache(Fprint128 cache_key,
   }
 }
 
-bool EagerContext::ShouldStoreGraphs() {
-  mutex_lock ml(metadata_mu_);
-  return should_store_graphs_.load() || metadata_listener_ != nullptr;
-}
-
-bool EagerContext::ShouldStoreStepStats() {
-  mutex_lock ml(metadata_mu_);
-  return should_store_step_stats_.load() || metadata_listener_ != nullptr;
-}
+bool EagerContext::ShouldStoreGraphs() { return should_store_graphs_.load(); }
 
 void EagerContext::SetShouldStoreGraphs(bool value) {
   mutex_lock ml(metadata_mu_);
   should_store_graphs_.store(value);
-  if (!value || metadata_listener_ != nullptr) {
-    run_metadata_.Clear();
-  }
-}
-
-void EagerContext::SetShouldStoreStepStats(bool value) {
-  mutex_lock ml(metadata_mu_);
-  should_store_step_stats_.store(value);
-  if (!value || metadata_listener_ != nullptr) {
+  if (!value) {
     run_metadata_.Clear();
   }
 }
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 88cae827b02..a60940d4021 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -243,17 +243,11 @@ class EagerContext : public core::RefCounted {
 
   // TODO(apassos) clean up RunMetadata storage.
   mutex* MetadataMu() LOCK_RETURNED(metadata_mu_) { return &metadata_mu_; }
-  bool ShouldStoreStepStats() LOCKS_EXCLUDED(metadata_mu_);
-  void SetShouldStoreStepStats(bool value);
   bool ShouldStoreGraphs() LOCKS_EXCLUDED(metadata_mu_);
   void SetShouldStoreGraphs(bool value);
   RunMetadata* RunMetadataProto() { return &run_metadata_; }
   void ClearRunMetadata() EXCLUSIVE_LOCKS_REQUIRED(metadata_mu_);
 
-  Status RegisterRunMetadataListener(RunMetadataListener* listener)
-      LOCKS_EXCLUDED(metadata_mu_);
-  void ClearRunMetadataListener() LOCKS_EXCLUDED(metadata_mu_);
-
   void StartStep();
   void EndStep();
   ScopedStepContainer* StepContainer();
@@ -399,11 +393,9 @@ class EagerContext : public core::RefCounted {
       GUARDED_BY(cache_mu_);
 
   // Whether we should compute RunMetadata.
-  std::atomic<bool> should_store_step_stats_{false};
   std::atomic<bool> should_store_graphs_{false};
   mutex metadata_mu_;
   RunMetadata run_metadata_ GUARDED_BY(metadata_mu_);
-  RunMetadataListener* metadata_listener_ GUARDED_BY(metadata_mu_) = nullptr;
   GraphCollector graph_collector_;
   // TODO(fishx): Allow update following two bool after context creation.
   const bool log_device_placement_;
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 6eafa5d780e..ec4c78dda6a 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -126,7 +126,6 @@ const string DeviceNameOrUnspecified(const DeviceNameUtils::ParsedName& name) {
 // unset and we might have selected some specific device to run this op on.
 Status MaybeCopyInputToExpectedDevice(EagerOperation* op, Device* op_device,
                                       int i, Device* expected_input_device,
-                                      RunMetadata* run_metadata,
                                       TensorHandle** result) {
   tensorflow::TensorHandle* handle = op->Inputs()[i];
   EagerContext* ctx = op->EagerContext();
@@ -175,29 +174,12 @@ Status MaybeCopyInputToExpectedDevice(EagerOperation* op, Device* op_device,
   }
   // We are only here if the policy is warn or silent copies, so we should
   // trigger a copy.
-  auto pre_time_nanos = Env::Default()->NowNanos();
   TensorHandle* result_handle = nullptr;
+  profiler::TraceMe activity("_Send", profiler::TraceMeLevel::kInfo);
   Status status =
       EagerCopyToDevice(handle, ctx, op->Executor(), expected_input_device,
                         ctx->MirrorTensors(), &result_handle);
-  if (run_metadata != nullptr) {
-    auto* step_stats = run_metadata->mutable_step_stats();
-    MaybeInitializeStepStats(step_stats, ctx);
-    // Record the sending on the source device for now.
-    int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device);
-    auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
-    auto* node_stats = dev_stats->add_node_stats();
-    node_stats->set_node_name("_Send");
-    node_stats->set_all_start_micros(pre_time_nanos / EnvTime::kMicrosToNanos);
-    node_stats->set_all_start_nanos(pre_time_nanos);
-    int64 now_nanos = Env::Default()->NowNanos();
-    node_stats->set_op_end_rel_micros((now_nanos - pre_time_nanos) /
-                                      EnvTime::kMicrosToNanos);
-    node_stats->set_op_end_rel_nanos(now_nanos - pre_time_nanos);
-    node_stats->set_all_end_rel_micros((now_nanos - pre_time_nanos) /
-                                       EnvTime::kMicrosToNanos);
-    node_stats->set_all_end_rel_nanos(now_nanos - pre_time_nanos);
-  }
+  activity.Stop();
   if (!status.ok()) {
     if (result_handle != nullptr) result_handle->Unref();
     return errors::Internal("Failed copying input tensor from ",
@@ -216,8 +198,7 @@ Status MaybeCopyInputToExpectedDevice(EagerOperation* op, Device* op_device,
 // unspecified.
 Status ValidateInputTypeAndPlacement(
     EagerContext* ctx, EagerOperation* op,
-    const core::RefCountPtr<KernelAndDevice>& kernel,
-    RunMetadata* run_metadata) {
+    const core::RefCountPtr<KernelAndDevice>& kernel) {
   profiler::TraceMe activity("ValidateInputTypeAndPlacement",
                              profiler::TraceMeLevel::kInfo);
   if (kernel->num_inputs() != op->Inputs().size()) {
@@ -228,7 +209,7 @@ Status ValidateInputTypeAndPlacement(
     Device* expected_device = kernel->InputDevice(i);
     TensorHandle* handle = nullptr;
     TF_RETURN_IF_ERROR(MaybeCopyInputToExpectedDevice(
-        op, kernel->device(), i, expected_device, run_metadata, &handle));
+        op, kernel->device(), i, expected_device, &handle));
     op->UpdateInput(i, handle);
     // Unref handle since it has a ref as an input now
     handle->Unref();
@@ -642,29 +623,13 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
                                    *num_retvals);
   }
   *num_retvals = num_outputs;
-  TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement(
-      ctx, op, kernel,
-      ctx->ShouldStoreStepStats() ? ctx->RunMetadataProto() : nullptr));
+  TF_RETURN_IF_ERROR(ValidateInputTypeAndPlacement(ctx, op, kernel));
 
-  std::unique_ptr<NodeExecStats> maybe_stats;
   StepStats* maybe_step_stats = nullptr;
   GraphCollector* graph_collector = nullptr;
   if (ctx->ShouldStoreGraphs()) {
     graph_collector = ctx->GetGraphCollector();
   }
-  if (ctx->ShouldStoreStepStats()) {
-    maybe_step_stats = ctx->RunMetadataProto()->mutable_step_stats();
-    int64 now_nanos = Env::Default()->NowNanos();
-    maybe_stats.reset(new NodeExecStats);
-    maybe_stats->set_node_name(op->Name());
-    maybe_stats->set_all_start_micros(now_nanos / EnvTime::kMicrosToNanos);
-    maybe_stats->set_all_start_nanos(now_nanos);
-    maybe_stats->set_op_start_rel_micros(0);
-    maybe_stats->set_op_start_rel_nanos(0);
-    maybe_stats->set_scheduled_micros(now_nanos / EnvTime::kMicrosToNanos);
-    maybe_stats->set_scheduled_nanos(now_nanos);
-    // TODO(apassos) track referenced tensors
-  }
 
   for (int i = 0; i < num_outputs; ++i) {
     TF_RETURN_IF_ERROR(TensorHandle::CreateAsyncLocalHandle(
@@ -674,10 +639,10 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
         output_dtypes[i], ctx, &retvals[i]));
   }
 
-  std::unique_ptr<EagerNode> node(new ExecuteNode(
-      ctx, op->Inputs(), std::move(kernel), maybe_stats.release(),
-      maybe_step_stats, graph_collector, output_dtypes,
-      op->GetCancellationManager(), {retvals, num_outputs}));
+  std::unique_ptr<EagerNode> node(
+      new ExecuteNode(ctx, op->Inputs(), std::move(kernel), nullptr,
+                      maybe_step_stats, graph_collector, output_dtypes,
+                      op->GetCancellationManager(), {retvals, num_outputs}));
   // Note that for async mode, execution order will make sure that all
   // input handles are ready before executing them.
   // TODO(b/137118203): Consider executing "cheap" kernels inline for
@@ -749,8 +714,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
         // the op might have its inputs on host memory.
         TensorHandle* handle = nullptr;
         TF_RETURN_IF_ERROR(MaybeCopyInputToExpectedDevice(
-            op, op->Device(), i, remote_cpu_device,
-            /* run_metadata= */ nullptr, &handle));
+            op, op->Device(), i, remote_cpu_device, &handle));
         op->UpdateInput(i, handle);
         input = handle;
         input_device = remote_cpu_device;
@@ -1049,44 +1013,6 @@ Status EagerKernelExecute(EagerContext* ctx,
       collector->ClearGraphs();
     }
   }
-  if (maybe_stats != nullptr) {
-    int64 nanos = Env::Default()->NowNanos();
-    maybe_stats->set_op_end_rel_micros(nanos / EnvTime::kMicrosToNanos -
-                                       maybe_stats->all_start_micros());
-    maybe_stats->set_op_end_rel_nanos(nanos - maybe_stats->all_start_nanos());
-    maybe_stats->set_all_end_rel_micros(nanos / EnvTime::kMicrosToNanos -
-                                        maybe_stats->all_start_micros());
-    maybe_stats->set_all_end_rel_nanos(nanos - maybe_stats->all_start_nanos());
-    if (ctx->ShouldStoreStepStats()) {
-      mutex_lock ml(*ctx->MetadataMu());
-      {
-        auto* step_stats = ctx->RunMetadataProto()->mutable_step_stats();
-        // Lazily initialize the RunMetadata with information about all devices
-        // if this is the first call.
-        while (step_stats->dev_stats_size() < ctx->devices()->size()) {
-          step_stats->add_dev_stats();
-        }
-        // Find the current device's index.
-        // If device is a nullptr (we are running a function without explicitly
-        // requested device), attribute the function runtime to CPU.
-        Device* attribution_device = kernel->device();
-        if (attribution_device == nullptr) {
-          attribution_device = ctx->HostCPU();
-        }
-        int device_idx = 0;
-        for (int i = 0; i < ctx->devices()->size(); ++i) {
-          if (ctx->devices()->at(i) == attribution_device) {
-            device_idx = i;
-            break;
-          }
-        }
-        // Populate the device stats for this device.
-        auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
-        dev_stats->set_device(attribution_device->name());
-        *dev_stats->add_node_stats() = *maybe_stats;
-      }
-    }
-  }
   DCHECK_EQ(retvals.size(), outputs.size());
   for (int i = 0; i < retvals.size(); ++i) {
     DCHECK_EQ(kernel->device(), retvals[i]->op_device());
diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py
index 4d177425e2b..be6517a3f8c 100644
--- a/tensorflow/python/eager/core_test.py
+++ b/tensorflow/python/eager/core_test.py
@@ -323,19 +323,6 @@ class TFETest(test_util.TensorFlowTestCase):
     self.assertTrue(has_cpu_device)
     del ctx
 
-  def testRunMetadata(self):
-    context.enable_run_metadata()
-    t = constant_op.constant(1.0)
-    _ = t + t  # Runs an operation which will be in the RunMetadata
-    run_metadata = context.export_run_metadata()
-    context.disable_run_metadata()
-    step_stats = run_metadata.step_stats
-    self.assertGreater(len(step_stats.dev_stats), 0)
-    cpu_stats = step_stats.dev_stats[0]
-    self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0',
-                     cpu_stats.device)
-    self.assertGreaterEqual(len(cpu_stats.node_stats), 1)
-
   def testMultiCpuPlacement(self):
     with ops.device('cpu:1'):
       x = constant_op.constant(1.0)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 4261de1527a..1e525e505b9 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -833,16 +833,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       f(constant_op.constant(1.0))
     run_metadata = context.export_run_metadata()
     context.disable_run_metadata()
-    step_stats = run_metadata.step_stats
-    self.assertNotEmpty(step_stats.dev_stats)
-    cpu_stats = step_stats.dev_stats[0]
-    self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0',
-                     cpu_stats.device)
-    # Testing for at least 2 because the function call should generate at most
-    # one entry in the step_stats; the ops inside function can generate
-    # arbitrarily many (placeholders, return identities, etc, might be included
-    # or not in the future, so shouldn't be tested for exactly.
-    self.assertGreaterEqual(len(cpu_stats.node_stats), 2)
     self.assertLen(run_metadata.partition_graphs, 1)
 
   def testGraphModeCaptureVariable(self):