Improve CNMS performance by removing unnecessary allocation.

2020-11-17 09:04:54 +08:00 · 2020-11-17 09:04:54 +08:00 · 8e6ee2cfc6
commit 8e6ee2cfc6
parent fa25e04b1f
1 changed files with 5 additions and 10 deletions
--- a/tensorflow/core/kernels/image/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/image/non_max_suppression_op.cc
@ -24,7 +24,6 @@ limitations under the License.
 #include <queue>
 #include <vector>
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
@ -33,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 namespace tensorflow {
 namespace {
@ -320,12 +320,6 @@ void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
    }
  }
  // Copy class_boxes_data to a tensor
  TensorShape boxesShape({num_boxes, 4});
  Tensor boxes(DT_FLOAT, boxesShape);
  std::copy_n(class_boxes_data.begin(), class_boxes_data.size(),
              boxes.unaligned_flat<float>().data());
  // Do NMS, get the candidate indices of form vector<int>
  // Data structure for selection candidate in NMS.
  struct Candidate {
@ -347,9 +341,10 @@ void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
  Candidate next_candidate;
  std::sort(candidate_vector.begin(), candidate_vector.end(), cmp);
-  const Tensor const_boxes = boxes;
+  // Move class_boxes_data to a tensor
-  typename TTypes<float, 2>::ConstTensor boxes_data_t =
+  Eigen::array<Eigen::DenseIndex, 2> boxesShape = {num_boxes, 4};
-      const_boxes.tensor<float, 2>();
+  typename TTypes<float, 2>::ConstTensor boxes_data_t(class_boxes_data.data(),
                                                      boxesShape);
  int candidate_idx = 0;
  float iou;
  while (selected.size() < size_per_class &&