From 2002d5e2834fe0150ac8007b620e1c33f462655a Mon Sep 17 00:00:00 2001 From: leslie-fang-intel <leslie.fang@intel.com> Date: Wed, 20 Nov 2019 20:30:38 +0800 Subject: [PATCH] remove ParallelExecute and use d.pallelFor instead of Shard --- .../kernels/resize_nearest_neighbor_op.cc | 29 +++++-------------- .../core/kernels/resize_nearest_neighbor_op.h | 3 +- .../core/kernels/resize_op_benchmark_test.cc | 2 +- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc index 998881defe0..71f964dc1f6 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc @@ -69,13 +69,13 @@ class ResizeNearestNeighborOp : public OpKernel { /*half_pixe_centers=*/true, /*align_corners=*/true>()( context->eigen_device<Device>(), input_data, st.height_scale, - st.width_scale, output_data, context); + st.width_scale, output_data); } else { status = functor::ResizeNearestNeighbor<Device, T, /*half_pixe_centers=*/true, /*align_corners=*/false>()( context->eigen_device<Device>(), input_data, st.height_scale, - st.width_scale, output_data, context); + st.width_scale, output_data); } } else { if (align_corners_) { @@ -83,13 +83,13 @@ class ResizeNearestNeighborOp : public OpKernel { /*half_pixe_centers=*/false, /*align_corners=*/true>()( context->eigen_device<Device>(), input_data, st.height_scale, - st.width_scale, output_data, context); + st.width_scale, output_data); } else { status = functor::ResizeNearestNeighbor<Device, T, /*half_pixe_centers=*/false, /*align_corners=*/false>()( context->eigen_device<Device>(), input_data, st.height_scale, - st.width_scale, output_data, context); + st.width_scale, output_data); } } if (!status) { @@ -131,13 +131,9 @@ struct BoolToScaler<false> { namespace functor { template <typename T, bool half_pixel_centers, bool align_corners> struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> { - bool ParallelExecute(const CPUDevice& d, - typename TTypes<T, 4>::ConstTensor input, - const float height_scale, const float width_scale, - typename TTypes<T, 4>::Tensor output, - OpKernelContext* c) { - const DeviceBase::CpuWorkerThreads& worker_threads = - *(c->device()->tensorflow_cpu_worker_threads()); + bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor input, + const float height_scale, const float width_scale, + typename TTypes<T, 4>::Tensor output) { const Eigen::Index batch_size = input.dimension(0); const Eigen::Index in_height = input.dimension(1); const Eigen::Index in_width = input.dimension(2); @@ -170,18 +166,9 @@ struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> { } }; Eigen::Index N = batch_size * out_height * out_width; - Shard(worker_threads.num_threads, worker_threads.workers, N, 1000.0, - ParallelResize); // TODO: Come up with a good cost estimate: - // 3500:26~27fps, 1000:27~28fps. + d.parallelFor(N, Eigen::TensorOpCost(0, 0, 1000.0), ParallelResize); return true; } - bool operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor input, - const float height_scale, const float width_scale, - typename TTypes<T, 4>::Tensor output, - OpKernelContext* context) { - return ParallelExecute(d, input, height_scale, width_scale, output, - context); - } }; } // namespace functor diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.h b/tensorflow/core/kernels/resize_nearest_neighbor_op.h index 3f564bf23cc..46264790b34 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.h +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.h @@ -28,8 +28,7 @@ template <typename Device, typename T, bool half_pixel_centers, struct ResizeNearestNeighbor { bool operator()(const Device& d, typename TTypes<T, 4>::ConstTensor input, const float height_scale, const float width_scale, - typename TTypes<T, 4>::Tensor output, - OpKernelContext* context = NULL); + typename TTypes<T, 4>::Tensor output); }; template <typename Device, typename T, bool half_pixel_centers, diff --git a/tensorflow/core/kernels/resize_op_benchmark_test.cc b/tensorflow/core/kernels/resize_op_benchmark_test.cc index 81564c7d7e0..e77cbf1c20a 100644 --- a/tensorflow/core/kernels/resize_op_benchmark_test.cc +++ b/tensorflow/core/kernels/resize_op_benchmark_test.cc @@ -51,7 +51,7 @@ static Graph* BM_Resize(const char* algorithm, int batches, int width, BM_ResizeDev(cpu, ResizeNearestNeighbor, 10, 499, 499); BM_ResizeDev(cpu, ResizeBilinear, 10, 499, 499); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM BM_ResizeDev(gpu, ResizeNearestNeighbor, 10, 499, 499); BM_ResizeDev(gpu, ResizeBilinear, 10, 499, 499); #endif