From 91cb053af048ae8bde8fd293c73e775f23e0458a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 7 Apr 2020 14:33:47 -0700 Subject: [PATCH] This CL optimizes C++11 range-based for loops where the variable is copied in each iteration but it would suffice to obtain it by const reference. This is only applied to loop variables of types that are expensive to copy which means they are not trivially copyable or have a non-trivial copy constructor or destructor. To ensure that it is safe to replace the copy with a const reference the following heuristic is employed: The loop variable is const qualified. The loop variable is not const, but only const methods or operators are invoked on it, or it is used as const reference or value argument in constructors or function calls. PiperOrigin-RevId: 305342272 Change-Id: I3e9955bf448d9670e41934437a90fa5c3b8c5b1c --- .../compiler/jit/extract_outside_compilation_pass.cc | 8 ++++---- tensorflow/compiler/jit/mark_for_compilation_pass.cc | 4 ++-- tensorflow/compiler/jit/node_matchers.cc | 2 +- tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc | 2 +- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 2 +- .../compiler/tf2tensorrt/convert/trt_optimization_pass.cc | 8 ++++---- tensorflow/compiler/tf2xla/functionalize_control_flow.cc | 2 +- tensorflow/compiler/tf2xla/resource_util.cc | 4 ++-- tensorflow/compiler/tf2xla/tf2xla_util.cc | 4 ++-- tensorflow/core/common_runtime/buf_rendezvous.cc | 2 +- tensorflow/core/common_runtime/direct_session.cc | 2 +- tensorflow/core/common_runtime/step_stats_collector.cc | 2 +- tensorflow/core/framework/attr_value_util.cc | 2 +- tensorflow/core/framework/collective.cc | 2 +- tensorflow/core/framework/dataset.cc | 2 +- tensorflow/core/framework/dataset.h | 2 +- tensorflow/core/framework/function_handle_cache.cc | 2 +- tensorflow/core/framework/function_testlib.cc | 3 ++- tensorflow/core/framework/op_segment.cc | 4 ++-- .../support/codegen/android_java_generator.cc | 2 +- .../lite/experimental/support/codegen/code_generator.cc | 6 +++--- .../lite/experimental/writer/option_writer_generator.cc | 2 +- tensorflow/lite/experimental/writer/writer_lib.cc | 2 +- tensorflow/tools/graph_transforms/quantize_nodes.cc | 4 ++-- tensorflow/tools/graph_transforms/sparsify_gather.cc | 2 +- 25 files changed, 39 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc index 9be72089dc3..5f1c3d536a8 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc @@ -321,7 +321,7 @@ xla::StatusOr BuildXlaHostComputeNodeDef( host_compute_builder.node_name()); // Copy all attributes. - for (auto attr : call_node->attrs()) { + for (const auto& attr : call_node->attrs()) { host_compute_builder.Attr(attr.first, attr.second); } @@ -346,7 +346,7 @@ xla::StatusOr BuildXlaHostComputeNodeDef( xla_token_input_nodes.emplace_back(kXlaTokenArgNodeName); auto cluster_deps_it = cluster_deps.find(original_oc_name); if (cluster_deps_it != cluster_deps.end()) { - for (auto dep : cluster_deps_it->second) { + for (const auto& dep : cluster_deps_it->second) { xla_token_input_nodes.emplace_back(host_compute_node_name(dep)); } } @@ -2359,7 +2359,7 @@ Status ExtractOutsideCompilationForFunction( } // For XlaHostCompute nodes with dependencies, add control edges between // them so XlaCompiler can handle them in correct order. - for (auto iter : host_compute_nodes) { + for (const auto& iter : host_compute_nodes) { Node* host_compute_node = iter.second; std::vector token_input_node_names; TF_RETURN_IF_ERROR(GetNodeAttr(host_compute_node->def(), @@ -2479,7 +2479,7 @@ Status ExtractOutsideCompilation( TF_RETURN_IF_ERROR(fld->RemoveFunction(host_graph_func_name)); - for (auto shape_inference_graph_name : shape_inference_graphs) { + for (const auto& shape_inference_graph_name : shape_inference_graphs) { TF_RETURN_IF_ERROR(RewriteShapeInferenceGraph( shape_inference_graph_name, g, pivot_node, fld)); } diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index 30318839908..77496fe7960 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -1161,7 +1161,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() { std::vector vall_ops = XlaOpRegistry::GetAllRegisteredOps(); absl::flat_hash_set all_ops(vall_ops.begin(), vall_ops.end()); // Check that user's provided TF operation really exists. - for (auto s : whitelist) { + for (const auto& s : whitelist) { if (!all_ops.contains(string(s))) { return errors::InvalidArgument( "The operation '", s, @@ -1475,7 +1475,7 @@ void MarkForCompilationPassImpl::VLogClusteringSummary() { << RatioToString(auto_clustering_info.clustered_node_count(), graph_->num_nodes()); - for (XlaAutoClusteringSummary::Cluster cluster : + for (const XlaAutoClusteringSummary::Cluster& cluster : auto_clustering_info.clusters()) { absl::string_view cluster_name = cluster.name(); int size = cluster.size(); diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc index 867bfe80202..bfdb6e751f5 100644 --- a/tensorflow/compiler/jit/node_matchers.cc +++ b/tensorflow/compiler/jit/node_matchers.cc @@ -493,7 +493,7 @@ std::pair impl::AttrLiteralHelper( const std::pair>& string_list_attr) { AttrValue attr_value; AttrValue::ListValue* list = attr_value.mutable_list(); - for (string s : string_list_attr.second) { + for (const string& s : string_list_attr.second) { list->add_s(s); } return {string_list_attr.first, attr_value}; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index 3e9a7954b03..278f49da71b 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -641,7 +641,7 @@ Status ConvertAfterShapes(const ConversionParams& params) { // Segment the graph into subgraphs that can be converted to TensorRT segment::SegmentOptions segment_options; // TODO(ben,jie,sami): exclude output nodes (DISCUSS IT) - for (auto node : *(params.output_names)) { + for (const auto& node : *(params.output_names)) { segment_options.exclude_node_list.insert(node); } segment_options.minimum_segment_size = params.minimum_segment_size; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 8083a55466a..bb705812c52 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -5738,7 +5738,7 @@ static void RegisterValidatableOpConverters( {"FusedBatchNorm", "FusedBatchNormV2", "FusedBatchNormV3"}) { (*registration)[normalization_op_type] = ConvertFusedBatchNorm; } - for (auto unary_op_pair : *UnaryOperationMap()) { + for (const auto& unary_op_pair : *UnaryOperationMap()) { (*registration)[unary_op_pair.first] = ConvertUnary; } for (auto reduce_op_type : {"Sum", "Prod", "Max", "Min", "Mean"}) { diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc index d9d8a4461a3..6ab719db54d 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc @@ -93,7 +93,7 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, const auto dev_names = cluster->GetDeviceNames(); if (!dev_names.empty()) { LOG(INFO) << offset << " Device names:"; - for (const auto s : dev_names) { + for (const auto& s : dev_names) { LOG(INFO) << offset2 << s; } } @@ -101,7 +101,7 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, auto status = cluster->GetPeakMemoryUsage(&peak_mem); if (status == Status::OK()) { LOG(INFO) << offset << "Peak Memory Usage :"; - for (auto s : peak_mem) { + for (const auto& s : peak_mem) { LOG(INFO) << offset2 << s.first << " = " << s.second; } } @@ -109,7 +109,7 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, const auto dev_props = cluster->GetDevices(); if (!dev_props.empty()) { LOG(INFO) << offset << "Device properties:"; - for (auto k : dev_props) { + for (const auto& k : dev_props) { LOG(INFO) << offset2 << k.first; const auto& dt = k.second; LOG(INFO) << offset3 << "type = " << dt.type(); @@ -127,7 +127,7 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, LOG(INFO) << offset3 << "bandwidth = " << dt.bandwidth(); if (dt.environment_size()) { LOG(INFO) << offset3 << "environment :"; - for (const auto e : dt.environment()) { + for (const auto& e : dt.environment()) { LOG(INFO) << offset4 << e.first << " = " << e.second; } } diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 4bc1a5dd688..033dae2292d 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -125,7 +125,7 @@ Status FunctionalizeControlFlowForFunction( nodes_to_associated_functions.push_back({n, associated_functions}); } } - for (auto iter : nodes_to_associated_functions) { + for (const auto& iter : nodes_to_associated_functions) { Node* n = iter.first; auto associated_functions = iter.second; for (auto& associated_function : associated_functions) { diff --git a/tensorflow/compiler/tf2xla/resource_util.cc b/tensorflow/compiler/tf2xla/resource_util.cc index 74e08060199..3537d09dde6 100644 --- a/tensorflow/compiler/tf2xla/resource_util.cc +++ b/tensorflow/compiler/tf2xla/resource_util.cc @@ -141,7 +141,7 @@ Status UpdateResourceUsageFromFunctionBodyAnalysis( caller_source_to_path) { std::unordered_map node_name_index = fbody.graph->BuildNodeNameIndex(); - for (auto it : called_function_source_to_path) { + for (const auto& it : called_function_source_to_path) { ResourceUsageAnalysis::NodeInfo src_node_info = it.first; // If source is an _Arg, then the true source is actually corresponding @@ -309,7 +309,7 @@ Status AnalyzeResourceUsage( } } - for (auto it : user_to_source) { + for (const auto& it : user_to_source) { (*source_to_path)[it.second].emplace(function_name, it.first->dst()->name(), it.first->dst()->type_string()); } diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index 1e164b30766..7475d3b5eac 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -621,7 +621,7 @@ Status RewriteAssociatedFunction( NodeDebugInfo debug_info(*node); NodeDefBuilder builder(node->name(), rewritten_function_name, fld, &debug_info); - for (auto attr : node->attrs()) { + for (const auto& attr : node->attrs()) { builder.Attr(attr.first, attr.second); } for (int i = 0; i < node->num_inputs(); i++) { @@ -695,7 +695,7 @@ Status CachedFunctionHandles::GetOrInstantiate( Status CachedFunctionHandles::ReleaseAllHandles() { Status result; - for (auto iter : handles_) { + for (const auto& iter : handles_) { result.Update(flr_->ReleaseHandle(iter.second)); } handles_.clear(); diff --git a/tensorflow/core/common_runtime/buf_rendezvous.cc b/tensorflow/core/common_runtime/buf_rendezvous.cc index b0780b3a297..6733a2e16a3 100644 --- a/tensorflow/core/common_runtime/buf_rendezvous.cc +++ b/tensorflow/core/common_runtime/buf_rendezvous.cc @@ -183,7 +183,7 @@ void BufRendezvous::LogContents() { LOG(INFO) << strings::StrCat("BufRendezvous ", strings::Hex(reinterpret_cast(this)), " step_id=", step_id_, " current contents:"); - for (auto it : hook_table_) { + for (const auto& it : hook_table_) { LOG(INFO) << it.first << ":" << it.second->DebugString(); } } diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 2f5fbd2353c..044d45711cb 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1624,7 +1624,7 @@ Status DirectSession::CreateGraphs( // Update our current state based on the execution_state's // placements. If there are any mismatches for a node, // we should fail, as this should never happen. - for (auto placement_pair : current_stateful_placements) { + for (const auto& placement_pair : current_stateful_placements) { const string& node_name = placement_pair.first; const string& placement = placement_pair.second; auto iter = stateful_placements_.find(node_name); diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index e9f913d72b9..3cac56db776 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -305,7 +305,7 @@ void StepStatsCollector::BuildCostModel( } } - for (auto itr : device_map) { + for (const auto& itr : device_map) { const StringPiece device = itr.first; if (per_device_stats.find(device) == per_device_stats.end()) { continue; diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc index fb51da9dee2..a307c8a18c1 100644 --- a/tensorflow/core/framework/attr_value_util.cc +++ b/tensorflow/core/framework/attr_value_util.cc @@ -216,7 +216,7 @@ string SummarizeTensor(const TensorProto& tensor_proto) { string SummarizeFunc(const NameAttrList& func) { std::vector entries; - for (auto p : func.attr()) { + for (const auto& p : func.attr()) { entries.push_back( strings::StrCat(p.first, "=", SummarizeAttrValue(p.second))); } diff --git a/tensorflow/core/framework/collective.cc b/tensorflow/core/framework/collective.cc index ad9165869a0..0b90adb78f8 100644 --- a/tensorflow/core/framework/collective.cc +++ b/tensorflow/core/framework/collective.cc @@ -102,7 +102,7 @@ string CollInstanceParams::ToString() const { strings::StrAppend(&v, n, ", "); } strings::StrAppend(&v, "} num_devices_per_task={"); - for (const auto dpt : num_devices_per_task) { + for (const auto& dpt : num_devices_per_task) { strings::StrAppend(&v, dpt.first, ": ", dpt.second, ", "); } strings::StrAppend(&v, "}, collective_name=", impl_details.collective_name, diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc index 24d6bb3bdc9..e99fc951391 100644 --- a/tensorflow/core/framework/dataset.cc +++ b/tensorflow/core/framework/dataset.cc @@ -228,7 +228,7 @@ Status GraphDefBuilderWrapper::AddDataset( opts.reset(new GraphDefBuilder::Options( opts->WithAttr("output_types", dataset->output_dtypes()))); } - for (auto attr : attrs) { + for (const auto& attr : attrs) { opts.reset( new GraphDefBuilder::Options(opts->WithAttr(attr.first, attr.second))); } diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index a909e8594a2..10ea8cb738e 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -250,7 +250,7 @@ class GraphDefBuilderWrapper { bool HasAttr(const string& op_type_name, const string& attr_name) const; bool HasAttr(const OpDef* op_def, const string& attr_name) const { - for (auto attr : op_def->attr()) { + for (const auto& attr : op_def->attr()) { if (attr.name() == attr_name) { return true; } diff --git a/tensorflow/core/framework/function_handle_cache.cc b/tensorflow/core/framework/function_handle_cache.cc index b64161d50b3..16b3fa95114 100644 --- a/tensorflow/core/framework/function_handle_cache.cc +++ b/tensorflow/core/framework/function_handle_cache.cc @@ -57,7 +57,7 @@ Status FunctionHandleCache::Instantiate( Status FunctionHandleCache::Clear() { mutex_lock l(mu_); - for (auto entry : handles_) { + for (const auto& entry : handles_) { TF_RETURN_IF_ERROR(lib_->ReleaseHandle(entry.second)); } handles_.clear(); diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc index d1562169915..5919ed7831b 100644 --- a/tensorflow/core/framework/function_testlib.cc +++ b/tensorflow/core/framework/function_testlib.cc @@ -53,7 +53,8 @@ NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice inputs, n.set_op(string(op)); for (const auto& in : inputs) n.add_input(in); n.set_device(device); - for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto}); + for (const auto& na : attrs) + n.mutable_attr()->insert({na.first, na.second.proto}); return n; } diff --git a/tensorflow/core/framework/op_segment.cc b/tensorflow/core/framework/op_segment.cc index f7e194baeed..5d7ca776fbc 100644 --- a/tensorflow/core/framework/op_segment.cc +++ b/tensorflow/core/framework/op_segment.cc @@ -26,13 +26,13 @@ limitations under the License. namespace tensorflow { OpSegment::Item::~Item() { - for (auto kv : name_kernel) delete kv.second; + for (const auto& kv : name_kernel) delete kv.second; } OpSegment::OpSegment() {} OpSegment::~OpSegment() { - for (auto kv : sessions_) delete kv.second; + for (const auto& kv : sessions_) delete kv.second; } Status OpSegment::FindOrCreate(const string& session_handle, diff --git a/tensorflow/lite/experimental/support/codegen/android_java_generator.cc b/tensorflow/lite/experimental/support/codegen/android_java_generator.cc index b16db570aaa..b0ac31c2fce 100644 --- a/tensorflow/lite/experimental/support/codegen/android_java_generator.cc +++ b/tensorflow/lite/experimental/support/codegen/android_java_generator.cc @@ -275,7 +275,7 @@ bool GenerateWrapperImports(CodeWriter* code_writer, const ModelInfo& model, } std::sort(imports.begin(), imports.end()); - for (const auto target : imports) { + for (const auto& target : imports) { code_writer->SetTokenValue("TARGET", target); code_writer->Append("import {{TARGET}};"); } diff --git a/tensorflow/lite/experimental/support/codegen/code_generator.cc b/tensorflow/lite/experimental/support/codegen/code_generator.cc index 687724815ef..39c0bd86d89 100644 --- a/tensorflow/lite/experimental/support/codegen/code_generator.cc +++ b/tensorflow/lite/experimental/support/codegen/code_generator.cc @@ -42,7 +42,7 @@ void ResolveConflictedNamesByAddingIndex(std::vector* names_ptr) { names[i].append(std::to_string(indexes[names[i]])); } } - for (const auto it : first_appearance) { + for (const auto& it : first_appearance) { const auto& name = it.first; const auto i = it.second; if (indexes[name] > 1) { @@ -148,11 +148,11 @@ void CodeGenerator::ResolveConflictedInputAndOutputNames( std::unordered_set io_conflict; auto& input_names = *inputs; auto& output_names = *outputs; - for (const auto input : input_names) { + for (const auto& input : input_names) { if (io_conflict.find(input) != io_conflict.end()) { continue; } - for (const auto output : output_names) { + for (const auto& output : output_names) { if (input == output) { io_conflict.insert(input); break; diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index 0ae895ffb51..a565422457c 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -197,7 +197,7 @@ class OpOptionData { option_to_struct_["MirrorPadOptions"] = "TfLiteMirrorPaddingParams"; // Now for every op, try to find an option. bool fatal = false; - for (auto op_name : ops_) { + for (const auto& op_name : ops_) { bool found_option = false; auto d = tflite::BuiltinOptionsTypeTable(); std::string collapsed_option_name_guess = diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc index a88544e95b1..85f57527c31 100644 --- a/tensorflow/lite/experimental/writer/writer_lib.cc +++ b/tensorflow/lite/experimental/writer/writer_lib.cc @@ -224,7 +224,7 @@ SubgraphWriter::ExportBuffers(flatbuffers::FlatBufferBuilder* fbb) { flatbuffers::Offset>> SubgraphWriter::CreateOpCodeTable(flatbuffers::FlatBufferBuilder* fbb) { std::vector> codes; - for (auto it : opcodes_) { + for (const auto& it : opcodes_) { const char* custom_name = it.custom.empty() ? nullptr : it.custom.c_str(); codes.push_back(CreateOperatorCodeDirect( *fbb, static_cast(it.builtin), custom_name)); diff --git a/tensorflow/tools/graph_transforms/quantize_nodes.cc b/tensorflow/tools/graph_transforms/quantize_nodes.cc index 40ca677480f..a03553af8f1 100644 --- a/tensorflow/tools/graph_transforms/quantize_nodes.cc +++ b/tensorflow/tools/graph_transforms/quantize_nodes.cc @@ -218,8 +218,8 @@ Status MergeDuplicateNodes(const GraphDef& input_graph_def, // duplicates and can be removed, unless they're stateful. std::map inputs_to_rename; GraphDef merged_graph_def; - for (const std::pair> hashed_node_info : - hashed_nodes) { + for (const std::pair>& + hashed_node_info : hashed_nodes) { const std::vector& hash_node_list = hashed_node_info.second; for (int i = 0; i < hash_node_list.size(); ++i) { diff --git a/tensorflow/tools/graph_transforms/sparsify_gather.cc b/tensorflow/tools/graph_transforms/sparsify_gather.cc index cc4078dfb85..9aaf9b98e19 100644 --- a/tensorflow/tools/graph_transforms/sparsify_gather.cc +++ b/tensorflow/tools/graph_transforms/sparsify_gather.cc @@ -478,7 +478,7 @@ Status SparsifyGatherInternal( } // Add nodes with a reference count of 0 for deletion. - for (auto entry : refs) { + for (const auto& entry : refs) { if (entry.second == 0) { removed_node_names.push_back(entry.first); }