Only use hardware traces on a single subgraph since tracing more than that

isn't supported PiperOrigin-RevId: 166364134
2017-08-24 10:55:08 -07:00 · 2017-08-24 10:55:08 -07:00 · 09fea258a3
commit 09fea258a3
parent a320bf1a78
4 changed files with 16 additions and 11 deletions
--- a/tensorflow/cc/training/queue_runner.cc
+++ b/tensorflow/cc/training/queue_runner.cc
@ -88,7 +88,7 @@ QueueRunner::~QueueRunner() {
 Status QueueRunner::Start(Session* sess) { return Start(sess, 0); }

 Status QueueRunner::StartAndCollectCostGraph(Session* sess,
-                                             const RunOptions* run_options) {
+                                             const RunOptions& run_options) {
  SetRunArgumentsAndCostGraph(run_options);
  return Start(sess, 0);
 }
@ -121,7 +121,7 @@ Status QueueRunner::Start(Session* sess, int wait_for) {
 }

 Status QueueRunner::StartAndCollectCostGraph(Session* session, int wait_for_ms,
-                                             const RunOptions* run_options) {
+                                             const RunOptions& run_options) {
  SetRunArgumentsAndCostGraph(run_options);
  return Start(session, wait_for_ms);
 }
@ -214,15 +214,13 @@ Status QueueRunner::ExportCostGraph(CostGraphDef* cost_graph) const {
  return Status::OK();
 }

-void QueueRunner::SetRunArgumentsAndCostGraph(const RunOptions* run_options) {
+void QueueRunner::SetRunArgumentsAndCostGraph(const RunOptions& run_options) {
  cg_mu_.reset(new mutex());
  {
    mutex_lock l(*cg_mu_);
    cost_graph_.reset(new CostGraphDef());
  }
-  if (run_options) {
-    run_options_ = *run_options;
-  }
+  run_options_ = run_options;
 }

 Status QueueRunner::RealRun(Session* sess, const string& op,
--- a/tensorflow/cc/training/queue_runner.h
+++ b/tensorflow/cc/training/queue_runner.h
@ -62,13 +62,13 @@ class QueueRunner : public RunnerInterface {
  /// Starts the queue runner with the given session and sets the run arguments
  /// for sess->Run. It also collects and stores the cost model.
  Status StartAndCollectCostGraph(Session* sess,
-                                  const RunOptions* run_options = nullptr);
+                                  const RunOptions& run_options = RunOptions());

  /// Starts the queue runner with the given session, and wait for up to the
  /// specified time (in milliseconds) for the queues to start to fill up.
  Status Start(Session* sess, int wait_for_ms);
  Status StartAndCollectCostGraph(Session* session, int wait_for_ms,
-                                  const RunOptions* run_options = nullptr);
+                                  const RunOptions& run_options = RunOptions());

  /// Requests to stop and runs the cancel op. It would be called in a separate
  /// thread when coordinator is set. If there is no coordinator it should be
@ -105,7 +105,7 @@ class QueueRunner : public RunnerInterface {

  bool IsRunning() const override { return !stopped_; }

-  void SetRunArgumentsAndCostGraph(const RunOptions* run_options);
+  void SetRunArgumentsAndCostGraph(const RunOptions& run_options);

  Status RealRun(Session* sess, const string& op, bool update_costs);

--- a/tensorflow/cc/training/queue_runner_test.cc
+++ b/tensorflow/cc/training/queue_runner_test.cc
@ -373,7 +373,7 @@ TEST(QueueRunnerTest, RunMetaDataTest) {
  std::unique_ptr<QueueRunner> qr;
  TF_EXPECT_OK(QueueRunner::New(queue_runner_def, &qr));
  RunOptions run_options;
-  TF_CHECK_OK(qr->StartAndCollectCostGraph(session.get(), &run_options));
+  TF_CHECK_OK(qr->StartAndCollectCostGraph(session.get(), run_options));

  // Make sure there was at least one element enqueued in q0: this prevents a
  // race condition where we close the queue before it was populated.
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@ -156,12 +156,19 @@ Status SingleMachine::Run(const GraphDef& graph_def,
        // Also clear the timeline to save memory
        init_metadata_.clear_step_stats();
      }
+      // We can have at most one hardware trace. Use it for the main graph, and
+      // downgrade tracing of the queue runners to a software trace.
+      RunOptions queue_options = run_options_;
+      if (queue_options.trace_level() >= RunOptions::HARDWARE_TRACE) {
+        queue_options.set_trace_level(RunOptions::SOFTWARE_TRACE);
+      }
      for (size_t i = 0; i < queue_runner_defs_.size(); ++i) {
        std::unique_ptr<QueueRunner> queue_runner;
        TF_RETURN_IF_ERROR(QueueRunner::New(queue_runner_defs_[i],
                                            coordinator_.get(), &queue_runner));
+
        TF_RETURN_IF_ERROR(queue_runner->StartAndCollectCostGraph(
-            session_.get(), &run_options_));
+            session_.get(), queue_options));
        TF_RETURN_IF_ERROR(
            coordinator_->RegisterRunner(std::move(queue_runner)));
        TF_RETURN_IF_ERROR(coordinator_->GetStatus());