This CL optimizes C++11 range-based for loops where the variable is copied in each iteration but it would suffice to obtain it by const reference. This is only applied to loop variables of types that are expensive to copy which means they are not trivially copyable or have a non-trivial copy constructor or destructor.

To ensure that it is safe to replace the copy with a const reference the following heuristic is employed: The loop variable is const qualified. The loop variable is not const, but only const methods or operators are invoked on it, or it is used as const reference or value argument in constructors or function calls. PiperOrigin-RevId: 305169937 Change-Id: I682f40e98a074f074332e6e4d0d47575c9909286
2020-04-06 19:48:10 -07:00 · 2020-04-06 19:48:10 -07:00 · ae0a9a4461
commit ae0a9a4461
parent 4cd7132e91
21 changed files with 31 additions and 30 deletions
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@ -1516,7 +1516,7 @@ void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs) {
  attrs->attributes->FillAttrValueMap(&m);
  tensorflow::EagerOperation* operation = OperationFromInterface(op->operation);
  tensorflow::AttrBuilder* destination = operation->MutableAttrs();
-  for (auto attribute : m) {
+  for (const auto& attribute : m) {
    destination->Set(attribute.first, attribute.second);
  }
 }
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@ -856,7 +856,7 @@ Status GradientTape<Gradient, BackwardFunction, TapeTensor>::ComputeGradient(
  }
  VLOG(1) << "Final gradients size: "
          << gradients.size() - used_gradient_ids.size();
-  for (auto grad_pair : gradients) {
+  for (const auto& grad_pair : gradients) {
    if (used_gradient_ids.find(grad_pair.first) == used_gradient_ids.end()) {
      for (const auto& g : grad_pair.second) {
        vspace.DeleteGradient(g);
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@ -537,7 +537,7 @@ Status CreateOutputWithScope(string op_name,
  TF_RETURN_IF_ERROR(scope.status());
  const auto unique_name = scope.GetUniqueNameForOp(op_name);
  auto builder = ::tensorflow::NodeBuilder(unique_name, op_name);
-  for (auto input : inputs) {
+  for (const auto& input : inputs) {
    TF_RETURN_IF_ERROR(scope.status());
    builder = builder.Input(input.node());
  }
--- a/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc
+++ b/tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.cc
@ -59,7 +59,7 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data,
  VLOG(1) << "Set Batch Waiting finished";

  // Sets the batch.
-  for (const auto it : data) {
+  for (const auto& it : data) {
    auto devptr = dev_buffers_.find(it.first);
    if (devptr == dev_buffers_.end()) {
      LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@ -1300,7 +1300,7 @@ Status GraphConstructor::Convert() {

 Status GraphConstructor::AddBackEdges() {
  // Add the back edges after all nodes are created.
-  for (auto e : back_edges_) {
+  for (const auto& e : back_edges_) {
    Node* src_node = gdef_nodes_[e.src_name].node;
    if (e.src_index == Graph::kControlSlot) {
      g_->AddControlEdge(src_node, e.dst_node, kDoNotCheckDuplicates);
--- a/tensorflow/core/kernels/data/dataset_utils.cc
+++ b/tensorflow/core/kernels/data/dataset_utils.cc
@ -245,7 +245,7 @@ class GraphHasher {

    // Hash regular inputs. We combine them in an ordered fashion.
    uint64 inputs_hash = 0;
-    for (auto input : node_rep->node_inputs) {
+    for (const auto& input : node_rep->node_inputs) {
      uint64 node_hash = 0;
      EdgeRep edge(node, input.first);
      // If the edge was pruned we get the non input node hash to avoid cycles.
--- a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
@ -1002,7 +1002,7 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
            // Book keeping to report some statistics.
            mutex_lock l(mu_);
            int64 num_bytes = 0;
-            for (auto out_tensor : *out_tensors) {
+            for (const auto& out_tensor : *out_tensors) {
              num_bytes += out_tensor.TotalBytes();
            }

--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@ -449,7 +449,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
          initial_elements_created_ = true;
        }
      }
-      for (auto element : future_elements_) {
+      for (const auto& element : future_elements_) {
        element->initialized = true;
      }
      last_valid_current_element_ = current_elements_.size() - 1;
@ -538,7 +538,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
      cancelled_ = true;
      // Wake up all threads so that they can exit. This will also wake up any
      // threads waiting in GetNextInternal.
-      for (auto element : current_elements_) {
+      for (const auto& element : current_elements_) {
        if (element) {
          element->cond_var.notify_all();
        }
--- a/tensorflow/core/profiler/internal/tfprof_code.cc
+++ b/tensorflow/core/profiler/internal/tfprof_code.cc
@ -189,7 +189,7 @@ class Samples {
    CHECK(!all_leaf.empty()) << node->name();

    for (const CodeNode* cn : all_leaf) {
-      for (auto gn_it : cn->node->graph_nodes()) {
+      for (const auto& gn_it : cn->node->graph_nodes()) {
        const TFGraphNode* gn = gn_it.second;
        string name = gn->name();
        // Generate a new trace name, in case the name is taken.
@ -436,7 +436,7 @@ void TFCode::AddNode(TFGraphNode* node) {

 void TFCode::Build() {
  int64 unaccounted_nodes = 0;
-  for (auto it : grad_nodes_) {
+  for (const auto& it : grad_nodes_) {
    const string& forward_name = it.first;
    auto forward_it = forward_nodes_.find(forward_name);
    if (forward_it == forward_nodes_.end()) {
--- a/tensorflow/core/profiler/internal/tfprof_node_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.cc
@ -139,7 +139,7 @@ bool ShowMultiNode::ReInit(int64 step,

  std::vector<ShowNode> snodes;
  mutable_proto()->mutable_graph_nodes()->Clear();
-  for (auto it : node->graph_nodes()) {
+  for (const auto& it : node->graph_nodes()) {
    ShowNode snode(it.second);
    snodes.push_back(snode);
    snodes.back().ReInit(step);
--- a/tensorflow/core/util/tensor_slice_reader.cc
+++ b/tensorflow/core/util/tensor_slice_reader.cc
@ -304,7 +304,7 @@ TensorSliceReader::GetVariableToDataTypeMap() const {
 const string TensorSliceReader::DebugString() const {
  string shape_str;
  if (status().ok()) {
-    for (auto e : Tensors()) {
+    for (const auto& e : Tensors()) {
      strings::StrAppend(&shape_str, e.first, " (",
                         DataType_Name(e.second->type()), ") ",
                         e.second->shape().DebugString());
--- a/tensorflow/core/util/tensor_slice_reader_cache.cc
+++ b/tensorflow/core/util/tensor_slice_reader_cache.cc
@ -44,7 +44,7 @@ const TensorSliceReader* TensorSliceReaderCacheWrapper::GetReader(
 TensorSliceReaderCache::TensorSliceReaderCache() {}

 TensorSliceReaderCache::~TensorSliceReaderCache() {
-  for (auto pair : readers_) {
+  for (const auto& pair : readers_) {
    delete pair.second.second;
  }
 }
--- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc
@ -324,7 +324,7 @@ absl::Status CreateProfilingCommandQueue(const CLDevice& device,

 absl::Duration ProfilingInfo::GetTotalTime() const {
  absl::Duration total_time;
-  for (auto dispatch : dispatches) {
+  for (const auto& dispatch : dispatches) {
    total_time += dispatch.duration;
  }
  return total_time;
@ -335,7 +335,7 @@ std::string ProfilingInfo::GetDetailedReport() const {
  std::map<std::string, double> timing;
  result +=
      "Per kernel timing(" + std::to_string(dispatches.size()) + " kernels):\n";
-  for (auto dispatch : dispatches) {
+  for (const auto& dispatch : dispatches) {
    result += "  " + dispatch.label + " - " +
              std::to_string(absl::ToDoubleMilliseconds(dispatch.duration)) +
              "ms\n";
--- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc
@ -187,7 +187,7 @@ MaliGPU GetMaliGPUVersion(const std::string& device_name) {
      {"G52", MaliGPU::G52},   {"G72", MaliGPU::G72},   {"G76", MaliGPU::G76},
      {"G57", MaliGPU::G57},   {"G77", MaliGPU::G77},
  };
-  for (auto v : kMapping) {
+  for (const auto& v : kMapping) {
    if (device_name.find(v.first) != std::string::npos) {
      return v.second;
    }
--- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
@ -115,10 +115,10 @@ int GetMaxSizeWithMinPenalty(int size, int max_size) {
 int2 GetMaxSizeWithMinPenalty(int2 size, int max_size) {
  std::vector<int2> base_groups = Get2DWorkgroupsEqualTo128();
  int min_penalty = std::numeric_limits<int>::max();
-  for (auto group : base_groups) {
+  for (const auto& group : base_groups) {
    min_penalty = std::min(GetPenalty(size, group), min_penalty);
  }
-  for (auto group : base_groups) {
+  for (const auto& group : base_groups) {
    for (int y = 1; y * group.y <= max_size; ++y) {
      int new_group_y = y * group.y;
      for (int x = 1; x * group.x <= max_size; ++x) {
--- a/tensorflow/lite/delegates/gpu/metal/api.cc
+++ b/tensorflow/lite/delegates/gpu/metal/api.cc
@ -450,7 +450,7 @@ absl::Status Compile(const GraphFloat32& graph, const DeviceInfo& device_info,
                             primary_status.message()));
      }
    }
-    for (auto task : tasks) {
+    for (const auto& task : tasks) {
      task->description = node->operation.type + "_" + std::to_string(node->id);
    }
    compiled_model->insert(compiled_model->end(), tasks.begin(), tasks.end());
--- a/tensorflow/lite/delegates/gpu/metal/compiled_model.cc
+++ b/tensorflow/lite/delegates/gpu/metal/compiled_model.cc
@ -244,7 +244,7 @@ std::list<FusionSequence> SortChains(

      // Collect all inputs also for linked operations.
      std::vector<ValueId> elements_input_buffer_ids;
-      for (auto element : chain) {
+      for (const auto& element : chain) {
        for (const auto& buffer : element->input_buffers) {
          if (!Contains(elements_output_buffer_ids, buffer.id)) {
            elements_input_buffer_ids.push_back(buffer.id);
@ -321,7 +321,7 @@ std::vector<ValueId> DeductOutputBufferIds(
    for (auto it2 = sorted_chains.begin(); it2 != sorted_chains.end(); ++it2) {
      if (it1 != it2) {
        std::vector<ValueId> input_ids;
-        for (auto element : *it2) {
+        for (const auto& element : *it2) {
          for (const auto& buffer : element->input_buffers) {
            input_ids.push_back(buffer.id);
          }
@ -358,7 +358,7 @@ std::vector<int> DeleteUnusedTasks(const std::vector<ValueId>& output_ids,
    bool output_used = false;
    for (auto it2 = chains->rbegin(); it2 != chains->rend(); ++it2) {
      std::vector<ValueId> input_ids;
-      for (auto element : *it2) {
+      for (const auto& element : *it2) {
        for (const auto& buffer : element->input_buffers) {
          input_ids.push_back(buffer.id);
        }
@ -516,7 +516,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
    fused_id = desc->output_buffer.id;
    invalid_id = false;

-    for (auto buffer : desc->immutable_buffers) {
+    for (const auto& buffer : desc->immutable_buffers) {
      std::string index = std::to_string(immutable_index);
      std::string name = (desc->is_linkable ? (" buffer" + index) : "");
      buffer_declarations +=
@ -526,7 +526,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
      fused_descriptor->immutable_buffers.push_back(buffer);
    }

-    for (auto buffer : desc->uniform_buffers) {
+    for (const auto& buffer : desc->uniform_buffers) {
      std::string index = std::to_string(uniform_index);
      std::string name = (desc->is_linkable ? (" buffer" + index) : "");
      buffer_declarations +=
--- a/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/interpreter_wrapper.cc
@ -193,7 +193,7 @@ InterpreterWrapper* InterpreterWrapper::CreateInterpreterWrapper(
  }

  auto resolver = absl::make_unique<tflite::ops::builtin::BuiltinOpResolver>();
-  for (const auto registerer : registerers) {
+  for (const auto& registerer : registerers) {
    if (!RegisterCustomOpByName(registerer.c_str(), resolver.get(), error_msg))
      return nullptr;
  }
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@ -64,7 +64,7 @@ namespace tflite {
 ::tflite::OpSignature GetVersioningOpSig(
    const ::tflite::BuiltinOperator op, const OperatorSignature& op_signature) {
  std::vector<::tflite::TensorType> input_types, output_types;
-  for (auto input_name : op_signature.op->inputs) {
+  for (const auto& input_name : op_signature.op->inputs) {
    ::tflite::TensorType input_type = static_cast<::tflite::TensorType>(-1);
    if (op_signature.model->HasArray(input_name)) {
      const Array& input_array = op_signature.model->GetArray(input_name);
@ -72,7 +72,7 @@ namespace tflite {
    }
    input_types.push_back(input_type);
  }
-  for (auto output_name : op_signature.op->outputs) {
+  for (const auto& output_name : op_signature.op->outputs) {
    ::tflite::TensorType output_type = static_cast<::tflite::TensorType>(-1);
    if (op_signature.model->HasArray(output_name)) {
      const Array& output_array = op_signature.model->GetArray(output_name);
--- a/tensorflow/lite/tools/evaluation/stages/object_detection_stage.cc
+++ b/tensorflow/lite/tools/evaluation/stages/object_detection_stage.cc
@ -172,7 +172,8 @@ TfLiteStatus PopulateGroundTruth(
  ObjectDetectionGroundTruth ground_truth_proto;
  google::protobuf::TextFormat::ParseFromString(proto_str, &ground_truth_proto);

-  for (auto image_ground_truth : ground_truth_proto.detection_results()) {
+  for (const auto& image_ground_truth :
+       ground_truth_proto.detection_results()) {
    (*ground_truth_mapping)[image_ground_truth.image_name()] =
        image_ground_truth;
  }
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@ -173,7 +173,7 @@ StreamExecutor::~StreamExecutor() {
  }

  if (FLAGS_check_device_leaks) {
-    for (auto it : mem_allocs_) {
+    for (const auto &it : mem_allocs_) {
      LOG(INFO) << "Memory alloced at executor exit: addr: "
                << absl::StrFormat("%p", it.first)
                << ", bytes: " << it.second.bytes << ", trace: \n"