This CL optimizes C++11 range-based for loops where the variable is copied in each iteration but it would suffice to obtain it by const reference. This is only applied to loop variables of types that are expensive to copy which means they are not trivially copyable or have a non-trivial copy constructor or destructor.

To ensure that it is safe to replace the copy with a const reference the following heuristic is employed: The loop variable is const qualified. The loop variable is not const, but only const methods or operators are invoked on it, or it is used as const reference or value argument in constructors or function calls. PiperOrigin-RevId: 305156493 Change-Id: Ice12baac76b26caad1284b8b75d7602c59e80a20
2020-04-06 17:48:07 -07:00 · 2020-04-06 17:48:07 -07:00 · 62eccd262a
commit 62eccd262a
parent 8d2955dcab
12 changed files with 18 additions and 18 deletions
--- a/tensorflow/compiler/xla/layout.cc
+++ b/tensorflow/compiler/xla/layout.cc
@ -78,7 +78,7 @@ LayoutProto Layout::ToProto() const {
 string Layout::ToString() const {
  if (format() == DENSE) {
    string colon_string = tiles().empty() ? "" : "T";
-    for (Tile tile : tiles()) {
+    for (const Tile& tile : tiles()) {
      absl::StrAppend(&colon_string, tile.ToString());
    }
    if (element_size_in_bits() != 0) {
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@ -63,7 +63,7 @@ void SetDefaultLayoutToContainer(T* minor_to_major) {
  for (int64 dimension_number : minor_to_major) {
    layout.add_minor_to_major(dimension_number);
  }
-  for (Tile tile : tiles) {
+  for (const Tile& tile : tiles) {
    for (int64 dim : tile.dimensions()) {
      if (dim < 0 && dim != Tile::kCombineDimension) {
        LOG(FATAL) << "Tile dimension size needs to be minimum int64 value if "
@ -441,7 +441,7 @@ Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, Shape* dst) {
  for (int64 minor_to_major : layout.minor_to_major()) {
    hash_value = Hash64Combine(hash_value, hash<int64>()(minor_to_major));
  }
-  for (Tile tile : layout.tiles()) {
+  for (const Tile& tile : layout.tiles()) {
    for (int64 tile_dim : tile.dimensions()) {
      hash_value = Hash64Combine(hash_value, hash<int64>()(tile_dim));
    }
--- a/tensorflow/compiler/xla/python/types.h
+++ b/tensorflow/compiler/xla/python/types.h
@ -233,7 +233,7 @@ struct type_caster<absl::Span<const T>> {
    auto seq = reinterpret_borrow<sequence>(src);
    storage_.clear();
    storage_.reserve(seq.size());
-    for (auto it : seq) {
+    for (const auto& it : seq) {
      value_conv conv;
      if (!conv.load(it, convert)) {
        return false;
@ -506,7 +506,7 @@ struct type_caster<xla::PaddingConfig> {
    sequence dimensions =
        reinterpret_borrow<sequence>(getattr(handle, "dimensions"));

-    for (auto dimension : dimensions) {
+    for (const auto& dimension : dimensions) {
      xla::PaddingConfig::PaddingConfigDimension* config_dim =
          value.add_dimensions();
      config_dim->set_edge_padding_low(
@ -561,7 +561,7 @@ struct type_caster<xla::PrecisionConfig> {
    sequence operand_precisions =
        reinterpret_borrow<sequence>(getattr(handle, "operand_precision"));

-    for (auto operand_precision : operand_precisions) {
+    for (const auto& operand_precision : operand_precisions) {
      value.add_operand_precision(
          operand_precision.cast<xla::PrecisionConfig::Precision>());
    }
@ -606,7 +606,7 @@ struct type_caster<xla::OpSharding> {
    sequence tuple_shardings =
        reinterpret_borrow<sequence>(getattr(handle_obj, "tuple_shardings"));

-    for (auto tuple_sharding : tuple_shardings) {
+    for (const auto& tuple_sharding : tuple_shardings) {
      xla::OpSharding* sharding = value.add_tuple_shardings();

      handle sharding_type = getattr(tuple_sharding, "type");
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
@ -101,7 +101,7 @@ bool HasCombinableReplicaGroup(HloInstruction* hlo, int64 num_replicas,
    if (replica_groups.size() != num_replicas) {
      return false;
    }
-    for (auto group : replica_groups) {
+    for (const auto& group : replica_groups) {
      if (group.replica_ids_size() != num_partitions) {
        return false;
      }
@ -534,7 +534,7 @@ StatusOr<bool> ArCrsCombiner::RewriteGraph() {
  if (all_reduce_map_.empty()) {
    return false;
  }
-  for (auto it : all_reduce_map_) {
+  for (const auto& it : all_reduce_map_) {
    auto pairs_vec = it.second;
    for (auto pair : pairs_vec) {
      auto all_reduce = pair.ar;
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@ -369,7 +369,7 @@ bool CallGraph::IsFlattened() const {
 std::vector<HloInstruction*> CallGraph::GetComputationCallers(
    HloComputation* c) {
  std::vector<HloInstruction*> callers;
-  for (auto callsite : GetNode(c).caller_callsites()) {
+  for (const auto& callsite : GetNode(c).caller_callsites()) {
    callers.push_back(callsite.instruction());
  }
  return callers;
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc
@ -88,7 +88,7 @@ void GpuDebugInfoManager::StopTracing(
    tensorflow::mutex_lock lock(mutex_);
    CHECK(tracing_active_);
    tracing_active_ = false;
-    for (const auto running_module_id : running_module_ids_) {
+    for (const auto& running_module_id : running_module_ids_) {
      const ModuleIdentifier& module_id = running_module_id.first;
      if (active_modules_.find(module_id) == active_modules_.end()) {
        LOG(ERROR) << "Cannot find debug info for module: " << module_id;
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
@ -90,7 +90,7 @@ class GpuDebugInfoManager {
  std::set<ModuleIdentifier> GetRunningModules() {
    tensorflow::mutex_lock lock(mutex_);
    std::set<ModuleIdentifier> running;
-    for (const auto id : running_module_ids_) {
+    for (const auto& id : running_module_ids_) {
      running.insert(id.first);
    }
    return running;
@ -98,7 +98,7 @@ class GpuDebugInfoManager {
  std::set<ModuleIdentifier> GetActiveModules() {
    tensorflow::mutex_lock lock(mutex_);
    std::set<ModuleIdentifier> active;
-    for (const auto id : active_modules_) {
+    for (const auto& id : active_modules_) {
      active.insert(id.first);
    }
    return active;
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@ -243,7 +243,7 @@ bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
  // and warn when a file is not used to ease catching typo in filename.
  std::string prefix = xla::FilenameFor(*module, "", *ptx);
  std::string matched_filename;
-  for (const string full_filename :
+  for (const string& full_filename :
       module->config().debug_options().xla_gpu_ptx_file()) {
    // To ease comparing many PTX versions, accept different suffixes then
    // the original filename.
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@ -564,7 +564,7 @@ HloAllReduceInstruction::HloAllReduceInstruction(
 }

 bool HloAllReduceInstruction::IsNoop() const {
-  for (auto replica_group : replica_groups()) {
+  for (const auto& replica_group : replica_groups()) {
    if (replica_group.replica_ids().size() != 1) {
      return false;
    }
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@ -1970,7 +1970,7 @@ Status LayoutAssignment::ConstrainChannelLayouts(

 Status LayoutAssignment::PropagateMemorySpace(HloModule* module) {
  TF_ASSIGN_OR_RETURN(auto alias_analysis, HloAliasAnalysis::Run(module));
-  for (auto buffer : alias_analysis->buffers()) {
+  for (const auto& buffer : alias_analysis->buffers()) {
    // First go through values to collect the memory spaces.
    int64 buffer_memory_space = Layout::kDefaultMemorySpace;
    for (auto value : buffer.values()) {
--- a/tensorflow/compiler/xla/service/memory_space_assignment.cc
+++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc
@ -705,7 +705,7 @@ bool AlternateMemoryBestFitHeap::AreIntervalsReservedInAlternateMemory(
 }

 void AlternateMemoryBestFitHeap::UncommitPendingChunks() {
-  for (auto interval_and_chunk : pending_chunks_) {
+  for (const auto& interval_and_chunk : pending_chunks_) {
    const BufferInterval& interval = interval_and_chunk.first;
    const Chunk& chunk = interval_and_chunk.second.chunk;
    interval_tree_.Remove(interval.start, interval.end, chunk);
--- a/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/emission_context.cc
@ -28,7 +28,7 @@ EmissionContext::EmissionContext(std::unique_ptr<HloModule> module)
  error_handler_ = [](const ErrorMap& instructions_with_error,
                      HloModule* module) {
    std::set<const HloComputation*> computations_with_error;
-    for (auto err : instructions_with_error) {
+    for (const auto& err : instructions_with_error) {
      computations_with_error.insert(err.first->parent());
    }