Refactor test and add default ops type
This commit is contained in:
parent
2aabaa4b27
commit
51cd587741
tensorflow/core
@ -27,8 +27,6 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// typedef Eigen::ThreadPoolDevice CPUDevice;
|
||||
|
||||
template<typename T>
|
||||
class CTCLossOp : public OpKernel {
|
||||
typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic,
|
||||
|
@ -34,7 +34,7 @@ REGISTER_OP("CTCLoss")
|
||||
.Attr("ignore_longer_outputs_than_inputs: bool = false")
|
||||
.Output("loss: T")
|
||||
.Output("gradient: T")
|
||||
.Attr("T: {float, double}")
|
||||
.Attr("T: {float, double} = DT_FLOAT")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle inputs;
|
||||
ShapeHandle labels_indices;
|
||||
@ -70,7 +70,7 @@ REGISTER_OP("CTCGreedyDecoder")
|
||||
.Output("decoded_values: int64")
|
||||
.Output("decoded_shape: int64")
|
||||
.Output("log_probability: T")
|
||||
.Attr("T: {float, double}")
|
||||
.Attr("T: {float, double} = DT_FLOAT")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle inputs;
|
||||
ShapeHandle sequence_length;
|
||||
@ -101,7 +101,7 @@ REGISTER_OP("CTCBeamSearchDecoder")
|
||||
.Output("decoded_values: top_paths * int64")
|
||||
.Output("decoded_shape: top_paths * int64")
|
||||
.Output("log_probability: T")
|
||||
.Attr("T: {float, double}")
|
||||
.Attr("T: {float, double} = DT_FLOAT")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ShapeHandle inputs;
|
||||
ShapeHandle sequence_length;
|
||||
|
@ -59,8 +59,7 @@ tf_cc_tests(
|
||||
name = "ctc_beam_search_test",
|
||||
size = "small",
|
||||
srcs = [
|
||||
"ctc_beam_search_test_float.cc",
|
||||
"ctc_beam_search_test_double.cc",
|
||||
"ctc_beam_search_test.cc",
|
||||
],
|
||||
deps = [
|
||||
":ctc_beam_search_lib",
|
||||
|
@ -67,7 +67,7 @@ struct BeamEntry {
|
||||
inline bool Active() const { return newp.total != kLogZero<T>::val; }
|
||||
// Return the child at the given index, or construct a new one in-place if
|
||||
// none was found.
|
||||
BeamEntry<T>& GetChild(int ind) {
|
||||
BeamEntry<T, CTCBeamState>& GetChild(int ind) {
|
||||
auto entry = children.emplace(ind, nullptr);
|
||||
auto& child_entry = entry.first->second;
|
||||
// If this is a new child, populate the BeamEntry<CTCBeamState>*.
|
||||
@ -79,7 +79,7 @@ struct BeamEntry {
|
||||
std::vector<int> LabelSeq(bool merge_repeated) const {
|
||||
std::vector<int> labels;
|
||||
int prev_label = -1;
|
||||
const BeamEntry<T>* c = this;
|
||||
const BeamEntry<T, CTCBeamState>* c = this;
|
||||
while (c->parent != nullptr) { // Checking c->parent to skip root leaf.
|
||||
if (!merge_repeated || c->label != prev_label) {
|
||||
labels.push_back(c->label);
|
||||
|
@ -24,14 +24,18 @@ limitations under the License.
|
||||
|
||||
namespace {
|
||||
|
||||
typedef std::vector<std::vector<std::vector<double>>> TestData;
|
||||
typedef tensorflow::ctc::CTCBeamSearchDecoder<double> CTCBeamSearchDecoder
|
||||
typedef tensorflow::ctc::CTCDecoder<double> CTCDecoder;
|
||||
template <class T>
|
||||
using TestData = std::vector<std::vector<std::vector<T>>>;
|
||||
|
||||
//using tensorflow::ctc::CTCBeamSearchDecoder;
|
||||
//using tensorflow::ctc::CTCDecoder;
|
||||
using namespace tensorflow::ctc;
|
||||
|
||||
// The HistoryBeamState is used to keep track of the current candidate and
|
||||
// caches the expansion score (needed by the scorer).
|
||||
template <class T>
|
||||
struct HistoryBeamState {
|
||||
double score;
|
||||
T score;
|
||||
std::vector<int> labels;
|
||||
};
|
||||
|
||||
@ -40,32 +44,33 @@ struct HistoryBeamState {
|
||||
// a prefix of a dictionary word it gets a low probability at each step.
|
||||
//
|
||||
// The dictionary itself is hard-coded a static const variable of the class.
|
||||
template <class T, class BeamState>
|
||||
class DictionaryBeamScorer
|
||||
: public tensorflow::ctc::BaseBeamScorer<HistoryBeamState> {
|
||||
: public tensorflow::ctc::BaseBeamScorer<T, BeamState > {
|
||||
public:
|
||||
void InitializeState(HistoryBeamState* root) const override {
|
||||
void InitializeState(BeamState* root) const override {
|
||||
root->score = 0;
|
||||
}
|
||||
|
||||
void ExpandState(const HistoryBeamState& from_state, int from_label,
|
||||
HistoryBeamState* to_state, int to_label) const override {
|
||||
void ExpandState(const BeamState& from_state, int from_label,
|
||||
BeamState* to_state, int to_label) const override {
|
||||
// Keep track of the current complete candidate by storing the labels along
|
||||
// the expansion path in the beam state.
|
||||
to_state->labels.push_back(to_label);
|
||||
SetStateScoreAccordingToDict(to_state);
|
||||
}
|
||||
|
||||
void ExpandStateEnd(HistoryBeamState* state) const override {
|
||||
void ExpandStateEnd(BeamState* state) const override {
|
||||
SetStateScoreAccordingToDict(state);
|
||||
}
|
||||
|
||||
double GetStateExpansionScore(const HistoryBeamState& state,
|
||||
double previous_score) const override {
|
||||
T GetStateExpansionScore(const BeamState& state,
|
||||
T previous_score) const override {
|
||||
return previous_score + state.score;
|
||||
}
|
||||
|
||||
double GetStateEndExpansionScore(
|
||||
const HistoryBeamState& state) const override {
|
||||
T GetStateEndExpansionScore(
|
||||
const BeamState& state) const override {
|
||||
return state.score;
|
||||
}
|
||||
|
||||
@ -74,14 +79,16 @@ class DictionaryBeamScorer
|
||||
static const std::vector<std::vector<int>> dictionary_;
|
||||
|
||||
private:
|
||||
void SetStateScoreAccordingToDict(HistoryBeamState* state) const;
|
||||
void SetStateScoreAccordingToDict(BeamState* state) const;
|
||||
};
|
||||
|
||||
const std::vector<std::vector<int>> DictionaryBeamScorer::dictionary_ = {
|
||||
template<class T, class BeamState>
|
||||
const std::vector<std::vector<int>> DictionaryBeamScorer<T, BeamState>::dictionary_ = {
|
||||
{3}, {3, 1}};
|
||||
|
||||
void DictionaryBeamScorer::SetStateScoreAccordingToDict(
|
||||
HistoryBeamState* state) const {
|
||||
template <class T, class BeamState>
|
||||
void DictionaryBeamScorer<T, BeamState>::SetStateScoreAccordingToDict(
|
||||
BeamState* state) const {
|
||||
// Check if the beam can still be a dictionary word (e.g. prefix of one).
|
||||
const std::vector<int>& candidate = state->labels;
|
||||
for (int w = 0; w < dictionary_.size(); ++w) {
|
||||
@ -92,32 +99,33 @@ void DictionaryBeamScorer::SetStateScoreAccordingToDict(
|
||||
}
|
||||
if (std::equal(word.begin(), word.begin() + candidate.size(),
|
||||
candidate.begin())) {
|
||||
state->score = std::log(1.0);
|
||||
state->score = std::log(T(1.0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
// At this point, the candidate certainly can't be in the dictionary.
|
||||
state->score = std::log(0.01);
|
||||
state->score = std::log(T(0.01));
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
template<class T>
|
||||
void ctc_beam_search_decoding_with_and_without_dictionary() {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 5;
|
||||
const int top_paths = 3;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Plain decoder using hibernating beam search algorithm.
|
||||
CTCBeamSearchDecoder<>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<> decoder(num_classes, 10 * top_paths, &default_scorer);
|
||||
typename CTCBeamSearchDecoder<T>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<T> decoder(num_classes, 10 * top_paths, &default_scorer);
|
||||
|
||||
// Dictionary decoder, allowing only two dictionary words : {3}, {3, 1}.
|
||||
DictionaryBeamScorer dictionary_scorer;
|
||||
CTCBeamSearchDecoder<HistoryBeamState> dictionary_decoder(
|
||||
DictionaryBeamScorer<T, HistoryBeamState<T>> dictionary_scorer;
|
||||
CTCBeamSearchDecoder<T, HistoryBeamState<T> > dictionary_decoder(
|
||||
num_classes, top_paths, &dictionary_scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
double input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
T input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{0, 0.6, 0, 0.4, 0, 0}},
|
||||
{{0, 0.5, 0, 0.5, 0, 0}},
|
||||
{{0, 0.4, 0, 0.6, 0, 0}},
|
||||
@ -134,14 +142,14 @@ TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
}
|
||||
|
||||
// Plain output, without any additional scoring.
|
||||
std::vector<CTCDecoder::Output> expected_output = {
|
||||
std::vector<typename CTCDecoder<T>::Output> expected_output = {
|
||||
{{1, 3}, {1, 3, 1}, {3, 1, 3}},
|
||||
};
|
||||
|
||||
// Dictionary outputs: preference for dictionary candidates. The
|
||||
// second-candidate is there, despite it not being a dictionary word, due to
|
||||
// stronger probability in the input to the decoder.
|
||||
std::vector<CTCDecoder::Output> expected_dict_output = {
|
||||
std::vector<typename CTCDecoder<T>::Output> expected_dict_output = {
|
||||
{{3}, {1, 3}, {3, 1}},
|
||||
};
|
||||
|
||||
@ -149,19 +157,19 @@ TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
std::vector<Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
std::vector<typename CTCDecoder<T>::Output> outputs(top_paths);
|
||||
for (typename CTCDecoder<T>::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
double score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
T score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
@ -169,8 +177,8 @@ TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
}
|
||||
|
||||
// Prepare dictionary outputs.
|
||||
std::vector<CTCDecoder::Output> dict_outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : dict_outputs) {
|
||||
std::vector<typename CTCDecoder<T>::Output> dict_outputs(top_paths);
|
||||
for (typename CTCDecoder<T>::Output& output : dict_outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
EXPECT_TRUE(
|
||||
@ -180,38 +188,39 @@ TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, AllBeamElementsHaveFiniteScores) {
|
||||
template<class T>
|
||||
void ctc_beam_search_decoding_all_beam_elements_have_finite_scores() {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 1;
|
||||
const int top_paths = 3;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Plain decoder using hibernating beam search algorithm.
|
||||
CTCBeamSearchDecoder<>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<> decoder(num_classes, top_paths, &default_scorer);
|
||||
typename CTCBeamSearchDecoder<T>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<T> decoder(num_classes, top_paths, &default_scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
double input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
T input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{0.4, 0.3, 0, 0, 0, 0.5}}};
|
||||
|
||||
// Convert data containers to the format accepted by the decoder, simply
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
std::vector<Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
std::vector<typename CTCDecoder<T>::Output> outputs(top_paths);
|
||||
for (typename CTCDecoder<T>::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
double score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
T score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
// Make sure all scores are finite.
|
||||
@ -226,8 +235,9 @@ TEST(CtcBeamSearch, AllBeamElementsHaveFiniteScores) {
|
||||
|
||||
typedef int LabelState; // The state is simply the final label.
|
||||
|
||||
template <class T>
|
||||
class RapidlyDroppingLabelScorer
|
||||
: public tensorflow::ctc::BaseBeamScorer<LabelState> {
|
||||
: public tensorflow::ctc::BaseBeamScorer<T, LabelState> {
|
||||
public:
|
||||
void InitializeState(LabelState* root) const override {}
|
||||
|
||||
@ -238,27 +248,28 @@ class RapidlyDroppingLabelScorer
|
||||
|
||||
void ExpandStateEnd(LabelState* state) const override {}
|
||||
|
||||
double GetStateExpansionScore(const LabelState& state,
|
||||
double previous_score) const override {
|
||||
T GetStateExpansionScore(const LabelState& state,
|
||||
T previous_score) const override {
|
||||
// Drop off rapidly for later labels.
|
||||
const double kRapidly = 100;
|
||||
const T kRapidly = 100;
|
||||
return previous_score - kRapidly * state;
|
||||
}
|
||||
|
||||
double GetStateEndExpansionScore(const LabelState& state) const override {
|
||||
return 0;
|
||||
T GetStateEndExpansionScore(const LabelState& state) const override {
|
||||
return T(0);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(CtcBeamSearch, LabelSelection) {
|
||||
template<class T>
|
||||
void ctc_beam_search_label_selection() {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 3;
|
||||
const int top_paths = 5;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Decoder which drops off log-probabilities for labels 0 >> 1 >> 2 >> 3.
|
||||
RapidlyDroppingLabelScorer scorer;
|
||||
CTCBeamSearchDecoder<LabelState> decoder(num_classes, top_paths, &scorer);
|
||||
RapidlyDroppingLabelScorer<T> scorer;
|
||||
CTCBeamSearchDecoder<T, LabelState> decoder(num_classes, top_paths, &scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
@ -267,26 +278,26 @@ TEST(CtcBeamSearch, LabelSelection) {
|
||||
// The last one is empty label, and for simplicity we give it an extremely
|
||||
// high cost to ignore it. We also use the first label to break up the
|
||||
// repeated label sequence.
|
||||
double input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
T input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{-1e6, 1, 2, 3, 4, -1e6}},
|
||||
{{1e6, 0, 0, 0, 0, -1e6}}, // force label 0 to break up repeated
|
||||
{{-1e6, 1.1, 2.2, 3.3, 4.4, -1e6}},
|
||||
};
|
||||
|
||||
// Expected output without label selection
|
||||
std::vector<CTCDecoder::Output> expected_default_output = {
|
||||
std::vector<typename CTCDecoder<T>::Output> expected_default_output = {
|
||||
{{1, 0, 1}, {1, 0, 2}, {2, 0, 1}, {1, 0, 3}, {2, 0, 2}},
|
||||
};
|
||||
|
||||
// Expected output with label selection limiting to 2 items
|
||||
// this is suboptimal because only labels 3 and 4 were allowed to be seen.
|
||||
std::vector<CTCDecoder::Output> expected_output_size2 = {
|
||||
std::vector<typename CTCDecoder<T>::Output> expected_output_size2 = {
|
||||
{{3, 0, 3}, {3, 0, 4}, {4, 0, 3}, {4, 0, 4}, {3}},
|
||||
};
|
||||
|
||||
// Expected output with label width of 2.0. This would permit three labels at
|
||||
// the first timestep, but only two at the last.
|
||||
std::vector<CTCDecoder::Output> expected_output_width2 = {
|
||||
std::vector<typename CTCDecoder<T>::Output> expected_output_width2 = {
|
||||
{{2, 0, 3}, {2, 0, 4}, {3, 0, 3}, {3, 0, 4}, {4, 0, 3}},
|
||||
};
|
||||
|
||||
@ -294,19 +305,19 @@ TEST(CtcBeamSearch, LabelSelection) {
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
std::vector<Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
std::vector<typename CTCDecoder<T>::Output> outputs(top_paths);
|
||||
for (typename CTCDecoder<T>::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
double score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
T score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
@ -314,14 +325,14 @@ TEST(CtcBeamSearch, LabelSelection) {
|
||||
}
|
||||
|
||||
// Try label selection size 2
|
||||
decoder.SetLabelSelectionParameters(2, -1);
|
||||
decoder.SetLabelSelectionParameters(2, T(-1));
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_size2[0][path]);
|
||||
}
|
||||
|
||||
// Try label selection width 2.0
|
||||
decoder.SetLabelSelectionParameters(0, 2.0);
|
||||
decoder.SetLabelSelectionParameters(0, T(2.0));
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_width2[0][path]);
|
||||
@ -329,18 +340,44 @@ TEST(CtcBeamSearch, LabelSelection) {
|
||||
|
||||
// Try both size 2 and width 2.0: the former is more constraining, so
|
||||
// it's equivalent to that.
|
||||
decoder.SetLabelSelectionParameters(2, 2.0);
|
||||
decoder.SetLabelSelectionParameters(2, T(2.0));
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_size2[0][path]);
|
||||
}
|
||||
|
||||
// Size 4 and width > 3.3 are equivalent to no label selection
|
||||
decoder.SetLabelSelectionParameters(4, 3.3001);
|
||||
decoder.SetLabelSelectionParameters(4, T(3.3001));
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_default_output[0][path]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(CtcBeamSearch, FloatDecodingWithAndWithoutDictionary) {
|
||||
ctc_beam_search_decoding_with_and_without_dictionary<float>();
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, DoubleDecodingWithAndWithoutDictionary) {
|
||||
ctc_beam_search_decoding_with_and_without_dictionary<double>();
|
||||
}
|
||||
|
||||
|
||||
TEST(CtcBeamSearch, FloatAllBeamElementsHaveFiniteScores) {
|
||||
ctc_beam_search_decoding_all_beam_elements_have_finite_scores<float>();
|
||||
};
|
||||
|
||||
TEST(CtcBeamSearch, DoubleAllBeamElementsHaveFiniteScores) {
|
||||
ctc_beam_search_decoding_all_beam_elements_have_finite_scores<double>();
|
||||
};
|
||||
|
||||
TEST(CtcBeamSearch, FloatLabelSelection) {
|
||||
ctc_beam_search_label_selection<float>();
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, DoubleLabelSelection) {
|
||||
ctc_beam_search_label_selection<double>();
|
||||
}
|
||||
|
||||
} // namespace
|
@ -1,346 +0,0 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This test illustrates how to make use of the CTCBeamSearchDecoder using a
|
||||
// custom BeamScorer and BeamState based on a dictionary with a few artificial
|
||||
// words.
|
||||
#include "tensorflow/core/util/ctc/ctc_beam_search.h"
|
||||
|
||||
#include <cmath>
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace {
|
||||
|
||||
typedef std::vector<std::vector<std::vector<float>>> TestData;
|
||||
typedef tensorflow::ctc::CTCBeamSearchDecoder<float> CTCBeamSearchDecoder;
|
||||
using tensorflow::ctc::CTCDecoder<double> CTCDecoder;
|
||||
|
||||
// The HistoryBeamState is used to keep track of the current candidate and
|
||||
// caches the expansion score (needed by the scorer).
|
||||
struct HistoryBeamState {
|
||||
float score;
|
||||
std::vector<int> labels;
|
||||
};
|
||||
|
||||
// DictionaryBeamScorer essentially favors candidates that can still become
|
||||
// dictionary words. As soon as a beam candidate is not a dictionary word or
|
||||
// a prefix of a dictionary word it gets a low probability at each step.
|
||||
//
|
||||
// The dictionary itself is hard-coded a static const variable of the class.
|
||||
class DictionaryBeamScorer
|
||||
: public tensorflow::ctc::BaseBeamScorer<HistoryBeamState> {
|
||||
public:
|
||||
void InitializeState(HistoryBeamState* root) const override {
|
||||
root->score = 0;
|
||||
}
|
||||
|
||||
void ExpandState(const HistoryBeamState& from_state, int from_label,
|
||||
HistoryBeamState* to_state, int to_label) const override {
|
||||
// Keep track of the current complete candidate by storing the labels along
|
||||
// the expansion path in the beam state.
|
||||
to_state->labels.push_back(to_label);
|
||||
SetStateScoreAccordingToDict(to_state);
|
||||
}
|
||||
|
||||
void ExpandStateEnd(HistoryBeamState* state) const override {
|
||||
SetStateScoreAccordingToDict(state);
|
||||
}
|
||||
|
||||
float GetStateExpansionScore(const HistoryBeamState& state,
|
||||
float previous_score) const override {
|
||||
return previous_score + state.score;
|
||||
}
|
||||
|
||||
float GetStateEndExpansionScore(
|
||||
const HistoryBeamState& state) const override {
|
||||
return state.score;
|
||||
}
|
||||
|
||||
// Simple dictionary used when scoring the beams to check if they are prefixes
|
||||
// of dictionary words (see SetStateScoreAccordingToDict below).
|
||||
static const std::vector<std::vector<int>> dictionary_;
|
||||
|
||||
private:
|
||||
void SetStateScoreAccordingToDict(HistoryBeamState* state) const;
|
||||
};
|
||||
|
||||
const std::vector<std::vector<int>> DictionaryBeamScorer::dictionary_ = {
|
||||
{3}, {3, 1}};
|
||||
|
||||
void DictionaryBeamScorer::SetStateScoreAccordingToDict(
|
||||
HistoryBeamState* state) const {
|
||||
// Check if the beam can still be a dictionary word (e.g. prefix of one).
|
||||
const std::vector<int>& candidate = state->labels;
|
||||
for (int w = 0; w < dictionary_.size(); ++w) {
|
||||
const std::vector<int>& word = dictionary_[w];
|
||||
// If the length of the current beam is already larger, skip.
|
||||
if (candidate.size() > word.size()) {
|
||||
continue;
|
||||
}
|
||||
if (std::equal(word.begin(), word.begin() + candidate.size(),
|
||||
candidate.begin())) {
|
||||
state->score = std::log(1.0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// At this point, the candidate certainly can't be in the dictionary.
|
||||
state->score = std::log(0.01);
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, DecodingWithAndWithoutDictionary) {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 5;
|
||||
const int top_paths = 3;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Plain decoder using hibernating beam search algorithm.
|
||||
CTCBeamSearchDecoder<>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<> decoder(num_classes, 10 * top_paths, &default_scorer);
|
||||
|
||||
// Dictionary decoder, allowing only two dictionary words : {3}, {3, 1}.
|
||||
DictionaryBeamScorer dictionary_scorer;
|
||||
CTCBeamSearchDecoder<HistoryBeamState> dictionary_decoder(
|
||||
num_classes, top_paths, &dictionary_scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
float input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{0, 0.6, 0, 0.4, 0, 0}},
|
||||
{{0, 0.5, 0, 0.5, 0, 0}},
|
||||
{{0, 0.4, 0, 0.6, 0, 0}},
|
||||
{{0, 0.4, 0, 0.6, 0, 0}},
|
||||
{{0, 0.4, 0, 0.6, 0, 0}}};
|
||||
|
||||
// The CTCDecoder works with log-probs.
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
for (int c = 0; c < num_classes; ++c) {
|
||||
input_data_mat[t][b][c] = std::log(input_data_mat[t][b][c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Plain output, without any additional scoring.
|
||||
std::vector<CTCDecoder::Output> expected_output = {
|
||||
{{1, 3}, {1, 3, 1}, {3, 1, 3}},
|
||||
};
|
||||
|
||||
// Dictionary outputs: preference for dictionary candidates. The
|
||||
// second-candidate is there, despite it not being a dictionary word, due to
|
||||
// stronger probability in the input to the decoder.
|
||||
std::vector<CTCDecoder::Output> expected_dict_output = {
|
||||
{{3}, {1, 3}, {3, 1}},
|
||||
};
|
||||
|
||||
// Convert data containers to the format accepted by the decoder, simply
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
float score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output[0][path]);
|
||||
}
|
||||
|
||||
// Prepare dictionary outputs.
|
||||
std::vector<CTCDecoder::Output> dict_outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : dict_outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
EXPECT_TRUE(
|
||||
dictionary_decoder.Decode(seq_len, inputs, &dict_outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(dict_outputs[path][0], expected_dict_output[0][path]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CtcBeamSearch, AllBeamElementsHaveFiniteScores) {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 1;
|
||||
const int top_paths = 3;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Plain decoder using hibernating beam search algorithm.
|
||||
CTCBeamSearchDecoder<>::DefaultBeamScorer default_scorer;
|
||||
CTCBeamSearchDecoder<> decoder(num_classes, top_paths, &default_scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
float input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{0.4, 0.3, 0, 0, 0, 0.5}}};
|
||||
|
||||
// Convert data containers to the format accepted by the decoder, simply
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
float score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
// Make sure all scores are finite.
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
LOG(INFO) << "path " << path;
|
||||
EXPECT_FALSE(std::isinf(score[0][path]));
|
||||
}
|
||||
}
|
||||
|
||||
// A beam decoder to test label selection. It simply models N labels with
|
||||
// rapidly dropping off log-probability.
|
||||
|
||||
typedef int LabelState; // The state is simply the final label.
|
||||
|
||||
class RapidlyDroppingLabelScorer
|
||||
: public tensorflow::ctc::BaseBeamScorer<LabelState> {
|
||||
public:
|
||||
void InitializeState(LabelState* root) const override {}
|
||||
|
||||
void ExpandState(const LabelState& from_state, int from_label,
|
||||
LabelState* to_state, int to_label) const override {
|
||||
*to_state = to_label;
|
||||
}
|
||||
|
||||
void ExpandStateEnd(LabelState* state) const override {}
|
||||
|
||||
float GetStateExpansionScore(const LabelState& state,
|
||||
float previous_score) const override {
|
||||
// Drop off rapidly for later labels.
|
||||
const float kRapidly = 100;
|
||||
return previous_score - kRapidly * state;
|
||||
}
|
||||
|
||||
float GetStateEndExpansionScore(const LabelState& state) const override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(CtcBeamSearch, LabelSelection) {
|
||||
const int batch_size = 1;
|
||||
const int timesteps = 3;
|
||||
const int top_paths = 5;
|
||||
const int num_classes = 6;
|
||||
|
||||
// Decoder which drops off log-probabilities for labels 0 >> 1 >> 2 >> 3.
|
||||
RapidlyDroppingLabelScorer scorer;
|
||||
CTCBeamSearchDecoder<LabelState> decoder(num_classes, top_paths, &scorer);
|
||||
|
||||
// Raw data containers (arrays of floats64, ints, etc.).
|
||||
int sequence_lengths[batch_size] = {timesteps};
|
||||
// Log probabilities, slightly preferring later labels, this decision
|
||||
// should be overridden by the scorer which strongly prefers earlier labels.
|
||||
// The last one is empty label, and for simplicity we give it an extremely
|
||||
// high cost to ignore it. We also use the first label to break up the
|
||||
// repeated label sequence.
|
||||
float input_data_mat[timesteps][batch_size][num_classes] = {
|
||||
{{-1e6, 1, 2, 3, 4, -1e6}},
|
||||
{{1e6, 0, 0, 0, 0, -1e6}}, // force label 0 to break up repeated
|
||||
{{-1e6, 1.1, 2.2, 3.3, 4.4, -1e6}},
|
||||
};
|
||||
|
||||
// Expected output without label selection
|
||||
std::vector<CTCDecoder::Output> expected_default_output = {
|
||||
{{1, 0, 1}, {1, 0, 2}, {2, 0, 1}, {1, 0, 3}, {2, 0, 2}},
|
||||
};
|
||||
|
||||
// Expected output with label selection limiting to 2 items
|
||||
// this is suboptimal because only labels 3 and 4 were allowed to be seen.
|
||||
std::vector<CTCDecoder::Output> expected_output_size2 = {
|
||||
{{3, 0, 3}, {3, 0, 4}, {4, 0, 3}, {4, 0, 4}, {3}},
|
||||
};
|
||||
|
||||
// Expected output with label width of 2.0. This would permit three labels at
|
||||
// the first timestep, but only two at the last.
|
||||
std::vector<CTCDecoder::Output> expected_output_width2 = {
|
||||
{{2, 0, 3}, {2, 0, 4}, {3, 0, 3}, {3, 0, 4}, {4, 0, 3}},
|
||||
};
|
||||
|
||||
// Convert data containers to the format accepted by the decoder, simply
|
||||
// mapping the memory from the container to an Eigen::ArrayXi,::MatrixXf,
|
||||
// using Eigen::Map.
|
||||
Eigen::Map<const Eigen::ArrayXi> seq_len(&sequence_lengths[0], batch_size);
|
||||
std::vector<Eigen::Map<const Eigen::MatrixXd>> inputs;
|
||||
inputs.reserve(timesteps);
|
||||
for (int t = 0; t < timesteps; ++t) {
|
||||
inputs.emplace_back(&input_data_mat[t][0][0], batch_size, num_classes);
|
||||
}
|
||||
|
||||
// Prepare containers for output and scores.
|
||||
std::vector<CTCDecoder::Output> outputs(top_paths);
|
||||
for (CTCDecoder::Output& output : outputs) {
|
||||
output.resize(batch_size);
|
||||
}
|
||||
float score[batch_size][top_paths] = {{0.0}};
|
||||
Eigen::Map<Eigen::MatrixXd> scores(&score[0][0], batch_size, top_paths);
|
||||
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_default_output[0][path]);
|
||||
}
|
||||
|
||||
// Try label selection size 2
|
||||
decoder.SetLabelSelectionParameters(2, -1);
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_size2[0][path]);
|
||||
}
|
||||
|
||||
// Try label selection width 2.0
|
||||
decoder.SetLabelSelectionParameters(0, 2.0);
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_width2[0][path]);
|
||||
}
|
||||
|
||||
// Try both size 2 and width 2.0: the former is more constraining, so
|
||||
// it's equivalent to that.
|
||||
decoder.SetLabelSelectionParameters(2, 2.0);
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_output_size2[0][path]);
|
||||
}
|
||||
|
||||
// Size 4 and width > 3.3 are equivalent to no label selection
|
||||
decoder.SetLabelSelectionParameters(4, 3.3001);
|
||||
EXPECT_TRUE(decoder.Decode(seq_len, inputs, &outputs, &scores).ok());
|
||||
for (int path = 0; path < top_paths; ++path) {
|
||||
EXPECT_EQ(outputs[path][0], expected_default_output[0][path]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
Reference in New Issue
Block a user