This CL optimizes C++11 range-based for loops where the variable is copied in each iteration but it would suffice to obtain it by const reference. This is only applied to loop variables of types that are expensive to copy which means they are not trivially copyable or have a non-trivial copy constructor or destructor.

To ensure that it is safe to replace the copy with a const reference the following heuristic is employed:
  The loop variable is const qualified.
  The loop variable is not const, but only const methods or operators are invoked on it, or it is used as const reference or value argument in constructors or function calls.

PiperOrigin-RevId: 305073051
Change-Id: Icfb5c016ad6c4be49ea67f6344307d013b352d78
This commit is contained in:
A. Unique TensorFlower 2020-04-06 11:13:16 -07:00 committed by TensorFlower Gardener
parent 9343f8b298
commit da2d11092e
8 changed files with 9 additions and 9 deletions

View File

@ -999,7 +999,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
&algorithms));
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,

View File

@ -1164,7 +1164,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
&algorithms));
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,

View File

@ -1392,7 +1392,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
ProfileResult best_result;
ProfileResult best_result_no_scratch;
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
@ -1880,7 +1880,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
&algorithms));
ProfileResult best_result;
ProfileResult best_result_no_scratch;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,

View File

@ -1012,7 +1012,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
WrapRedzoneBestEffort(&rz_allocator, output_ptr));
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
se::RedzoneAllocator rz_scratch_allocator(

View File

@ -338,7 +338,7 @@ struct LaunchConvOp<GPUDevice, T, OpKernelContext> {
"see if a warning log message was printed above."));
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);

View File

@ -334,7 +334,7 @@ Status FindBestConvolveAlgorithm(const FusedConvParameters& params,
WrapRedzoneBestEffort(&rz_allocator, output_ptr));
std::vector<tensorflow::AutotuneResult> results;
for (auto profile_algorithm : algorithms) {
for (const auto& profile_algorithm : algorithms) {
DnnScratchAllocator scratch_allocator(ConvolveScratchSize(), context);
se::RedzoneAllocator rz_scratch_allocator(
stream, &tf_allocator_adapter, se::GpuAsmOpts(),

View File

@ -191,7 +191,7 @@ CudaSolver::CudaSolver(OpKernelContext* context) : context_(context) {
}
CudaSolver::~CudaSolver() {
for (auto tensor_ref : scratch_tensor_refs_) {
for (const auto& tensor_ref : scratch_tensor_refs_) {
tensor_ref.Unref();
}
}

View File

@ -124,7 +124,7 @@ class UniqueOp : public OpKernel {
context->allocate_output(0, output_shape, &output));
auto Tout = output->flat<T>();
for (auto it : uniq) {
for (const auto& it : uniq) {
Tout(it.second) = it.first;
}
} else {