This CL optimizes C++11 range-based for loops where the variable is copied in each iteration but it would suffice to obtain it by const reference. This is only applied to loop variables of types that are expensive to copy which means they are not trivially copyable or have a non-trivial copy constructor or destructor.

To ensure that it is safe to replace the copy with a const reference the following heuristic is employed: The loop variable is const qualified. The loop variable is not const, but only const methods or operators are invoked on it, or it is used as const reference or value argument in constructors or function calls. PiperOrigin-RevId: 305073051 Change-Id: Icfb5c016ad6c4be49ea67f6344307d013b352d78
2020-04-06 11:13:16 -07:00 · 2020-04-06 11:13:16 -07:00 · da2d11092e
commit da2d11092e
parent 9343f8b298
8 changed files with 9 additions and 9 deletions
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@ -999,7 +999,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
        conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
        &algorithms));
    std::vector<tensorflow::AutotuneResult> results;
-    for (auto profile_algorithm : algorithms) {
+    for (const auto& profile_algorithm : algorithms) {
      // TODO(zhengxq): profile each algorithm multiple times to better
      // accuracy.
      DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@ -1164,7 +1164,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
        conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
        &algorithms));
    std::vector<tensorflow::AutotuneResult> results;
-    for (auto profile_algorithm : algorithms) {
+    for (const auto& profile_algorithm : algorithms) {
      // TODO(zhengxq): profile each algorithm multiple times to better
      // accuracy.
      DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@ -1392,7 +1392,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
      ProfileResult best_result;
      ProfileResult best_result_no_scratch;
      std::vector<tensorflow::AutotuneResult> results;
-      for (auto profile_algorithm : algorithms) {
+      for (const auto& profile_algorithm : algorithms) {
        // TODO(zhengxq): profile each algorithm multiple times to better
        // accuracy.
        DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
@ -1880,7 +1880,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
          &algorithms));
      ProfileResult best_result;
      ProfileResult best_result_no_scratch;
-      for (auto profile_algorithm : algorithms) {
+      for (const auto& profile_algorithm : algorithms) {
        // TODO(zhengxq): profile each algorithm multiple times to better
        // accuracy.
        DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@ -1012,7 +1012,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
        WrapRedzoneBestEffort(&rz_allocator, output_ptr));

    std::vector<tensorflow::AutotuneResult> results;
-    for (auto profile_algorithm : algorithms) {
+    for (const auto& profile_algorithm : algorithms) {
      // TODO(zhengxq): profile each algorithm multiple times to better
      // accuracy.
      se::RedzoneAllocator rz_scratch_allocator(
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@ -338,7 +338,7 @@ struct LaunchConvOp<GPUDevice, T, OpKernelContext> {
                      "see if a warning log message was printed above."));

      std::vector<tensorflow::AutotuneResult> results;
-      for (auto profile_algorithm : algorithms) {
+      for (const auto& profile_algorithm : algorithms) {
        // TODO(zhengxq): profile each algorithm multiple times to better
        // accuracy.
        DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
--- a/tensorflow/core/kernels/conv_ops_fused_impl.h
+++ b/tensorflow/core/kernels/conv_ops_fused_impl.h
@ -334,7 +334,7 @@ Status FindBestConvolveAlgorithm(const FusedConvParameters& params,
      WrapRedzoneBestEffort(&rz_allocator, output_ptr));

  std::vector<tensorflow::AutotuneResult> results;
-  for (auto profile_algorithm : algorithms) {
+  for (const auto& profile_algorithm : algorithms) {
    DnnScratchAllocator scratch_allocator(ConvolveScratchSize(), context);
    se::RedzoneAllocator rz_scratch_allocator(
        stream, &tf_allocator_adapter, se::GpuAsmOpts(),
--- a/tensorflow/core/kernels/cuda_solvers.cc
+++ b/tensorflow/core/kernels/cuda_solvers.cc
@ -191,7 +191,7 @@ CudaSolver::CudaSolver(OpKernelContext* context) : context_(context) {
 }

 CudaSolver::~CudaSolver() {
-  for (auto tensor_ref : scratch_tensor_refs_) {
+  for (const auto& tensor_ref : scratch_tensor_refs_) {
    tensor_ref.Unref();
  }
 }
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@ -124,7 +124,7 @@ class UniqueOp : public OpKernel {
                     context->allocate_output(0, output_shape, &output));
      auto Tout = output->flat<T>();

-      for (auto it : uniq) {
+      for (const auto& it : uniq) {
        Tout(it.second) = it.first;
      }
    } else {