Improve CNMS performance by removing unnecessary allocation.

This commit is contained in:
Teng Lu 2020-11-17 09:04:54 +08:00
parent fa25e04b1f
commit 8e6ee2cfc6

View File

@ -24,7 +24,6 @@ limitations under the License.
#include <queue> #include <queue>
#include <vector> #include <vector>
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/bounds_check.h"
#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/register_types.h"
@ -33,6 +32,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow { namespace tensorflow {
namespace { namespace {
@ -320,12 +320,6 @@ void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
} }
} }
// Copy class_boxes_data to a tensor
TensorShape boxesShape({num_boxes, 4});
Tensor boxes(DT_FLOAT, boxesShape);
std::copy_n(class_boxes_data.begin(), class_boxes_data.size(),
boxes.unaligned_flat<float>().data());
// Do NMS, get the candidate indices of form vector<int> // Do NMS, get the candidate indices of form vector<int>
// Data structure for selection candidate in NMS. // Data structure for selection candidate in NMS.
struct Candidate { struct Candidate {
@ -347,9 +341,10 @@ void DoNMSPerClass(int batch_idx, int class_idx, const float* boxes_data,
Candidate next_candidate; Candidate next_candidate;
std::sort(candidate_vector.begin(), candidate_vector.end(), cmp); std::sort(candidate_vector.begin(), candidate_vector.end(), cmp);
const Tensor const_boxes = boxes; // Move class_boxes_data to a tensor
typename TTypes<float, 2>::ConstTensor boxes_data_t = Eigen::array<Eigen::DenseIndex, 2> boxesShape = {num_boxes, 4};
const_boxes.tensor<float, 2>(); typename TTypes<float, 2>::ConstTensor boxes_data_t(class_boxes_data.data(),
boxesShape);
int candidate_idx = 0; int candidate_idx = 0;
float iou; float iou;
while (selected.size() < size_per_class && while (selected.size() < size_per_class &&