2019-06-02 01:42:23 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "ctcdecode/ctc_beam_search_decoder.h"
|
|
|
|
|
|
|
|
#include "modelstate.h"
|
|
|
|
|
|
|
|
using std::vector;
|
|
|
|
|
|
|
|
ModelState::ModelState()
|
2019-08-22 19:17:23 +00:00
|
|
|
: beam_width_(-1)
|
2019-06-02 01:42:23 +00:00
|
|
|
, n_steps_(-1)
|
|
|
|
, n_context_(-1)
|
|
|
|
, n_features_(-1)
|
|
|
|
, mfcc_feats_per_timestep_(-1)
|
2019-10-14 12:25:02 +00:00
|
|
|
, sample_rate_(-1)
|
|
|
|
, audio_win_len_(-1)
|
|
|
|
, audio_win_step_(-1)
|
2019-06-06 19:40:19 +00:00
|
|
|
, state_size_(-1)
|
2019-06-02 01:42:23 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
ModelState::~ModelState()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2020-01-27 15:03:03 +00:00
|
|
|
ModelState::init(const char* model_path)
|
2019-06-02 01:42:23 +00:00
|
|
|
{
|
|
|
|
return DS_ERR_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
2020-02-25 10:15:45 +00:00
|
|
|
ModelState::decode(const DecoderState& state) const
|
2019-06-02 01:42:23 +00:00
|
|
|
{
|
2020-02-05 07:55:15 +00:00
|
|
|
vector<Output> out = state.decode(1);
|
2019-08-22 19:17:23 +00:00
|
|
|
return strdup(alphabet_.LabelsToString(out[0].tokens).c_str());
|
2019-06-02 01:42:23 +00:00
|
|
|
}
|
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
vector<Metadata*>
|
|
|
|
ModelState::decode_metadata(const DecoderState& state,
|
|
|
|
size_t top_paths)
|
2019-06-02 01:42:23 +00:00
|
|
|
{
|
2020-02-05 07:55:15 +00:00
|
|
|
vector<Output> out = state.decode(top_paths);
|
2019-06-02 01:42:23 +00:00
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
vector<Metadata*> meta_out;
|
2019-06-02 01:42:23 +00:00
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
size_t max_results = std::min(top_paths, out.size());
|
2019-06-02 01:42:23 +00:00
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
for (int j = 0; j < max_results; ++j) {
|
|
|
|
std::unique_ptr<Metadata> metadata(new Metadata());
|
|
|
|
metadata->num_items = out[j].tokens.size();
|
|
|
|
metadata->confidence = out[j].confidence;
|
2019-06-02 01:42:23 +00:00
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
std::unique_ptr<MetadataItem[]> items(new MetadataItem[metadata->num_items]());
|
|
|
|
|
|
|
|
// Loop through each character
|
|
|
|
for (int i = 0; i < out[j].tokens.size(); ++i) {
|
|
|
|
items[i].character = strdup(alphabet_.StringFromLabel(out[j].tokens[i]).c_str());
|
|
|
|
items[i].timestep = out[j].timesteps[i];
|
|
|
|
items[i].start_time = out[j].timesteps[i] * ((float)audio_win_step_ / sample_rate_);
|
|
|
|
|
|
|
|
if (items[i].start_time < 0) {
|
|
|
|
items[i].start_time = 0;
|
|
|
|
}
|
2019-06-02 01:42:23 +00:00
|
|
|
}
|
2020-02-05 07:55:15 +00:00
|
|
|
|
|
|
|
metadata->items = items.release();
|
|
|
|
meta_out.push_back(metadata.release());
|
2019-06-02 01:42:23 +00:00
|
|
|
}
|
|
|
|
|
2020-02-05 07:55:15 +00:00
|
|
|
return meta_out;
|
2019-06-02 01:42:23 +00:00
|
|
|
}
|