301 lines
10 KiB
C++
301 lines
10 KiB
C++
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
|
|
#define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
|
|
|
|
#include <math.h>
|
|
|
|
#include "tensorflow/examples/android/jni/object_tracking/geom.h"
|
|
|
|
namespace tf_tracking {
|
|
|
|
// Arbitrary keypoint type ids for labeling the origin of tracked keypoints.
|
|
enum KeypointType {
|
|
KEYPOINT_TYPE_DEFAULT = 0,
|
|
KEYPOINT_TYPE_FAST = 1,
|
|
KEYPOINT_TYPE_INTEREST = 2
|
|
};
|
|
|
|
// Struct that can be used to more richly store the results of a detection
|
|
// than a single number, while still maintaining comparability.
|
|
struct MatchScore {
|
|
explicit MatchScore(double val) : value(val) {}
|
|
MatchScore() { value = 0.0; }
|
|
|
|
double value;
|
|
|
|
MatchScore& operator+(const MatchScore& rhs) {
|
|
value += rhs.value;
|
|
return *this;
|
|
}
|
|
|
|
friend std::ostream& operator<<(std::ostream& stream,
|
|
const MatchScore& detection) {
|
|
stream << detection.value;
|
|
return stream;
|
|
}
|
|
};
|
|
inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) {
|
|
return cC1.value < cC2.value;
|
|
}
|
|
inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) {
|
|
return cC1.value > cC2.value;
|
|
}
|
|
inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) {
|
|
return cC1.value >= cC2.value;
|
|
}
|
|
inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) {
|
|
return cC1.value <= cC2.value;
|
|
}
|
|
|
|
// Fixed seed used for all random number generators.
|
|
static const int kRandomNumberSeed = 11111;
|
|
|
|
// TODO(andrewharp): Move as many of these settings as possible into a settings
|
|
// object which can be passed in from Java at runtime.
|
|
|
|
// Whether or not to use ESM instead of LK flow.
|
|
static const bool kUseEsm = false;
|
|
|
|
// This constant gets added to the diagonal of the Hessian
|
|
// before solving for translation in 2dof ESM.
|
|
// It ensures better behavior especially in the absence of
|
|
// strong texture.
|
|
static const int kEsmRegularizer = 20;
|
|
|
|
// Do we want to brightness-normalize each keypoint patch when we compute
|
|
// its flow using ESM?
|
|
static const bool kDoBrightnessNormalize = true;
|
|
|
|
// Whether or not to use fixed-point interpolated pixel lookups in optical flow.
|
|
#define USE_FIXED_POINT_FLOW 1
|
|
|
|
// Whether to normalize keypoint windows for intensity in LK optical flow.
|
|
// This is a define for now because it helps keep the code streamlined.
|
|
#define NORMALIZE 1
|
|
|
|
// Number of keypoints to store per frame.
|
|
static const int kMaxKeypoints = 76;
|
|
|
|
// Keypoint detection.
|
|
static const int kMaxTempKeypoints = 1024;
|
|
|
|
// Number of floats each keypoint takes up when exporting to an array.
|
|
static const int kKeypointStep = 7;
|
|
|
|
// Number of frame deltas to keep around in the circular queue.
|
|
static const int kNumFrames = 512;
|
|
|
|
// Number of iterations to do tracking on each keypoint at each pyramid level.
|
|
static const int kNumIterations = 3;
|
|
|
|
// The number of bins (on a side) to divide each bin from the previous
|
|
// cache level into. Higher numbers will decrease performance by increasing
|
|
// cache misses, but mean that cache hits are more locally relevant.
|
|
static const int kCacheBranchFactor = 2;
|
|
|
|
// Number of levels to put in the cache.
|
|
// Each level of the cache is a square grid of bins, length:
|
|
// branch_factor^(level - 1) on each side.
|
|
//
|
|
// This may be greater than kNumPyramidLevels. Setting it to 0 means no
|
|
// caching is enabled.
|
|
static const int kNumCacheLevels = 3;
|
|
|
|
// The level at which the cache pyramid gets cut off and replaced by a matrix
|
|
// transform if such a matrix has been provided to the cache.
|
|
static const int kCacheCutoff = 1;
|
|
|
|
static const int kNumPyramidLevels = 4;
|
|
|
|
// The minimum number of keypoints needed in an object's area.
|
|
static const int kMaxKeypointsForObject = 16;
|
|
|
|
// Minimum number of pyramid levels to use after getting cached value.
|
|
// This allows fine-scale adjustment from the cached value, which is taken
|
|
// from the center of the corresponding top cache level box.
|
|
// Can be [0, kNumPyramidLevels).
|
|
static const int kMinNumPyramidLevelsToUseForAdjustment = 1;
|
|
|
|
// Window size to integrate over to find local image derivative.
|
|
static const int kFlowIntegrationWindowSize = 3;
|
|
|
|
// Total area of integration windows.
|
|
static const int kFlowArraySize =
|
|
(2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1);
|
|
|
|
// Error that's considered good enough to early abort tracking.
|
|
static const float kTrackingAbortThreshold = 0.03f;
|
|
|
|
// Maximum number of deviations a keypoint-correspondence delta can be from the
|
|
// weighted average before being thrown out for region-based queries.
|
|
static const float kNumDeviations = 2.0f;
|
|
|
|
// The length of the allowed delta between the forward and the backward
|
|
// flow deltas in terms of the length of the forward flow vector.
|
|
static const float kMaxForwardBackwardErrorAllowed = 0.5f;
|
|
|
|
// Threshold for pixels to be considered different.
|
|
static const int kFastDiffAmount = 10;
|
|
|
|
// How far from edge of frame to stop looking for FAST keypoints.
|
|
static const int kFastBorderBuffer = 10;
|
|
|
|
// Determines if non-detected arbitrary keypoints should be added to regions.
|
|
// This will help if no keypoints have been detected in the region yet.
|
|
static const bool kAddArbitraryKeypoints = true;
|
|
|
|
// How many arbitrary keypoints to add along each axis as candidates for each
|
|
// region?
|
|
static const int kNumToAddAsCandidates = 1;
|
|
|
|
// In terms of region dimensions, how closely can we place keypoints
|
|
// next to each other?
|
|
static const float kClosestPercent = 0.6f;
|
|
|
|
// How many FAST qualifying pixels must be connected to a pixel for it to be
|
|
// considered a candidate keypoint for Harris filtering.
|
|
static const int kMinNumConnectedForFastKeypoint = 8;
|
|
|
|
// Size of the window to integrate over for Harris filtering.
|
|
// Compare to kFlowIntegrationWindowSize.
|
|
static const int kHarrisWindowSize = 2;
|
|
|
|
|
|
// DETECTOR PARAMETERS
|
|
|
|
// Before relocalizing, make sure the new proposed position is better than
|
|
// the existing position by a small amount to prevent thrashing.
|
|
static const MatchScore kMatchScoreBuffer(0.01f);
|
|
|
|
// Minimum score a tracked object can have and still be considered a match.
|
|
// TODO(andrewharp): Make this a per detector thing.
|
|
static const MatchScore kMinimumMatchScore(0.5f);
|
|
|
|
static const float kMinimumCorrelationForTracking = 0.4f;
|
|
|
|
static const MatchScore kMatchScoreForImmediateTermination(0.0f);
|
|
|
|
// Run the detector every N frames.
|
|
static const int kDetectEveryNFrames = 4;
|
|
|
|
// How many features does each feature_set contain?
|
|
static const int kFeaturesPerFeatureSet = 10;
|
|
|
|
// The number of FeatureSets managed by the object detector.
|
|
// More FeatureSets can increase recall at the cost of performance.
|
|
static const int kNumFeatureSets = 7;
|
|
|
|
// How many FeatureSets must respond affirmatively for a candidate descriptor
|
|
// and position to be given more thorough attention?
|
|
static const int kNumFeatureSetsForCandidate = 2;
|
|
|
|
// How large the thumbnails used for correlation validation are. Used for both
|
|
// width and height.
|
|
static const int kNormalizedThumbnailSize = 11;
|
|
|
|
// The area of intersection divided by union for the bounding boxes that tells
|
|
// if this tracking has slipped enough to invalidate all unlocked examples.
|
|
static const float kPositionOverlapThreshold = 0.6f;
|
|
|
|
// The number of detection failures allowed before an object goes invisible.
|
|
// Tracking will still occur, so if it is actually still being tracked and
|
|
// comes back into a detectable position, it's likely to be found.
|
|
static const int kMaxNumDetectionFailures = 4;
|
|
|
|
|
|
// Minimum square size to scan with sliding window.
|
|
static const float kScanMinSquareSize = 16.0f;
|
|
|
|
// Minimum square size to scan with sliding window.
|
|
static const float kScanMaxSquareSize = 64.0f;
|
|
|
|
// Scale difference for consecutive scans of the sliding window.
|
|
static const float kScanScaleFactor = sqrtf(2.0f);
|
|
|
|
// Step size for sliding window.
|
|
static const int kScanStepSize = 10;
|
|
|
|
|
|
// How tightly to pack the descriptor boxes for confirmed exemplars.
|
|
static const float kLockedScaleFactor = 1 / sqrtf(2.0f);
|
|
|
|
// How tightly to pack the descriptor boxes for unconfirmed exemplars.
|
|
static const float kUnlockedScaleFactor = 1 / 2.0f;
|
|
|
|
// How tightly the boxes to scan centered at the last known position will be
|
|
// packed.
|
|
static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f);
|
|
|
|
// The bounds on how close a new object example must be to existing object
|
|
// examples for detection to be valid.
|
|
static const float kMinCorrelationForNewExample = 0.75f;
|
|
static const float kMaxCorrelationForNewExample = 0.99f;
|
|
|
|
|
|
// The number of safe tries an exemplar has after being created before
|
|
// missed detections count against it.
|
|
static const int kFreeTries = 5;
|
|
|
|
// A false positive is worth this many missed detections.
|
|
static const int kFalsePositivePenalty = 5;
|
|
|
|
struct ObjectDetectorConfig {
|
|
const Size image_size;
|
|
|
|
explicit ObjectDetectorConfig(const Size& image_size)
|
|
: image_size(image_size) {}
|
|
virtual ~ObjectDetectorConfig() = default;
|
|
};
|
|
|
|
struct KeypointDetectorConfig {
|
|
const Size image_size;
|
|
|
|
bool detect_skin;
|
|
|
|
explicit KeypointDetectorConfig(const Size& image_size)
|
|
: image_size(image_size),
|
|
detect_skin(false) {}
|
|
};
|
|
|
|
|
|
struct OpticalFlowConfig {
|
|
const Size image_size;
|
|
|
|
explicit OpticalFlowConfig(const Size& image_size)
|
|
: image_size(image_size) {}
|
|
};
|
|
|
|
struct TrackerConfig {
|
|
const Size image_size;
|
|
KeypointDetectorConfig keypoint_detector_config;
|
|
OpticalFlowConfig flow_config;
|
|
bool always_track;
|
|
|
|
float object_box_scale_factor_for_features;
|
|
|
|
explicit TrackerConfig(const Size& image_size)
|
|
: image_size(image_size),
|
|
keypoint_detector_config(image_size),
|
|
flow_config(image_size),
|
|
always_track(false),
|
|
object_box_scale_factor_for_features(1.0f) {}
|
|
};
|
|
|
|
} // namespace tf_tracking
|
|
|
|
#endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_
|