Merge pull request #34212 from Intel-tensorflow:Leslie_fang/resize_nearest_neighbor_op
PiperOrigin-RevId: 282052293 Change-Id: Icd8b3fc1abf0040dc303db0bd62120d2bdb6dd77
This commit is contained in:
commit
5023853fd7
tensorflow/core/kernels
@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/resize_nearest_neighbor_op.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
#include "tensorflow/core/framework/op_kernel.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
@ -138,10 +139,13 @@ struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> {
|
||||
const Eigen::Index in_height = input.dimension(1);
|
||||
const Eigen::Index in_width = input.dimension(2);
|
||||
const Eigen::Index channels = input.dimension(3);
|
||||
|
||||
const Eigen::Index out_height = output.dimension(1);
|
||||
const Eigen::Index out_width = output.dimension(2);
|
||||
|
||||
#ifdef PLATFORM_GOOGLE
|
||||
// The parallel version is significantly slower than the serial version
|
||||
// internally. Only call the serial version for now.
|
||||
// TODO(b/145019377): Make the parallel version work for PLATFORM_GOOGLE.
|
||||
for (Eigen::Index b = 0; b < batch_size; ++b) {
|
||||
for (Eigen::Index y = 0; y < out_height; ++y) {
|
||||
Eigen::Index in_y = std::min(
|
||||
@ -165,6 +169,41 @@ struct ResizeNearestNeighbor<CPUDevice, T, half_pixel_centers, align_corners> {
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
auto ParallelResize = [&](Eigen::Index start, Eigen::Index end) {
|
||||
for (Eigen::Index b = start; b < end; ++b) {
|
||||
Eigen::Index x = b % out_width;
|
||||
Eigen::Index y = (b / out_width) % out_height;
|
||||
Eigen::Index bs = (b / out_width) / out_height;
|
||||
Eigen::Index in_y = std::min(
|
||||
(align_corners)
|
||||
? static_cast<Eigen::Index>(roundf(scaler(y, height_scale)))
|
||||
: static_cast<Eigen::Index>(floorf(scaler(y, height_scale))),
|
||||
in_height - 1);
|
||||
if (half_pixel_centers) {
|
||||
in_y = std::max(static_cast<Eigen::Index>(0), in_y);
|
||||
}
|
||||
Eigen::Index in_x = std::min(
|
||||
(align_corners)
|
||||
? static_cast<Eigen::Index>(roundf(scaler(x, width_scale)))
|
||||
: static_cast<Eigen::Index>(floorf(scaler(x, width_scale))),
|
||||
in_width - 1);
|
||||
if (half_pixel_centers) {
|
||||
in_x = std::max(static_cast<Eigen::Index>(0), in_x);
|
||||
}
|
||||
std::copy_n(&input(bs, in_y, in_x, 0), channels, &output(bs, y, x, 0));
|
||||
}
|
||||
};
|
||||
Eigen::Index N = batch_size * out_height * out_width;
|
||||
const int input_bytes = channels * sizeof(T);
|
||||
const int output_bytes = channels * sizeof(T);
|
||||
const int compute_cycles = (Eigen::TensorOpCost::ModCost<T>() * 2 +
|
||||
Eigen::TensorOpCost::DivCost<T>() * 3 +
|
||||
Eigen::TensorOpCost::AddCost<T>() * 2 +
|
||||
Eigen::TensorOpCost::MulCost<T>() * 2);
|
||||
const Eigen::TensorOpCost cost(input_bytes, output_bytes, compute_cycles);
|
||||
d.parallelFor(N, cost, ParallelResize);
|
||||
#endif // PLATFORM_GOOGLE
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -49,9 +49,11 @@ static Graph* BM_Resize(const char* algorithm, int batches, int width,
|
||||
BENCHMARK(BM_Resize_##ALGORITHM##_##DEVICE##_##B##_##W##_##H)
|
||||
|
||||
BM_ResizeDev(cpu, ResizeNearestNeighbor, 10, 499, 499);
|
||||
BM_ResizeDev(gpu, ResizeNearestNeighbor, 10, 499, 499);
|
||||
|
||||
BM_ResizeDev(cpu, ResizeBilinear, 10, 499, 499);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
BM_ResizeDev(gpu, ResizeNearestNeighbor, 10, 499, 499);
|
||||
BM_ResizeDev(gpu, ResizeBilinear, 10, 499, 499);
|
||||
#endif
|
||||
|
||||
} // namespace tensorflow
|
||||
|
Loading…
Reference in New Issue
Block a user