Merge pull request #2350 from mozilla/breaking-api-cleanup
[BREAKING] API cleanup
This commit is contained in:
commit
6b7ebf47f2
|
@ -29,13 +29,11 @@ Then run with a TF Lite model, alphabet, LM/trie and a CSV test file
|
||||||
BEAM_WIDTH = 500
|
BEAM_WIDTH = 500
|
||||||
LM_ALPHA = 0.75
|
LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
N_FEATURES = 26
|
|
||||||
N_CONTEXT = 9
|
|
||||||
|
|
||||||
def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
|
def tflite_worker(model, alphabet, lm, trie, queue_in, queue_out, gpu_mask):
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_mask)
|
||||||
ds = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
|
ds = Model(model, alphabet, BEAM_WIDTH)
|
||||||
ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
msg = queue_in.get()
|
msg = queue_in.get()
|
||||||
|
|
|
@ -22,8 +22,6 @@ namespace DeepSpeechWPF
|
||||||
{
|
{
|
||||||
private readonly IDeepSpeech _sttClient;
|
private readonly IDeepSpeech _sttClient;
|
||||||
|
|
||||||
private const uint N_CEP = 26;
|
|
||||||
private const uint N_CONTEXT = 9;
|
|
||||||
private const uint BEAM_WIDTH = 500;
|
private const uint BEAM_WIDTH = 500;
|
||||||
private const float LM_ALPHA = 0.75f;
|
private const float LM_ALPHA = 0.75f;
|
||||||
private const float LM_BETA = 1.85f;
|
private const float LM_BETA = 1.85f;
|
||||||
|
@ -79,7 +77,7 @@ namespace DeepSpeechWPF
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_sttClient.CreateModel("output_graph.pbmm", N_CEP, N_CONTEXT, "alphabet.txt", BEAM_WIDTH);
|
_sttClient.CreateModel("output_graph.pbmm", "alphabet.txt", BEAM_WIDTH);
|
||||||
Dispatcher.Invoke(() => { EnableControls(); });
|
Dispatcher.Invoke(() => { EnableControls(); });
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
|
@ -155,7 +153,7 @@ namespace DeepSpeechWPF
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_sttClient.EnableDecoderWithLM("alphabet.txt", "lm.binary", "trie", LM_ALPHA, LM_BETA);
|
_sttClient.EnableDecoderWithLM("lm.binary", "trie", LM_ALPHA, LM_BETA);
|
||||||
Dispatcher.Invoke(() => lblStatus.Content = "LM loaded.");
|
Dispatcher.Invoke(() => lblStatus.Content = "LM loaded.");
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
|
@ -198,7 +196,7 @@ namespace DeepSpeechWPF
|
||||||
_soundInSource.Dispose();
|
_soundInSource.Dispose();
|
||||||
_convertedSource.Dispose();
|
_convertedSource.Dispose();
|
||||||
_audioCapture.DataAvailable -= _capture_DataAvailable;
|
_audioCapture.DataAvailable -= _capture_DataAvailable;
|
||||||
_sttClient.DiscardStream(); //this a good example of discardstream, the user changed the audio input, so we no longer need the current stream
|
_sttClient.FreeStream(); //this a good example of FreeStream, the user changed the audio input, so we no longer need the current stream
|
||||||
}
|
}
|
||||||
if (_audioCaptureDevices!=null)
|
if (_audioCaptureDevices!=null)
|
||||||
{
|
{
|
||||||
|
@ -252,7 +250,7 @@ namespace DeepSpeechWPF
|
||||||
|
|
||||||
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
|
private void BtnStartRecording_Click(object sender, RoutedEventArgs e)
|
||||||
{
|
{
|
||||||
_sttClient.SetupStream(16000);
|
_sttClient.CreateStream(16000);
|
||||||
_audioCapture.Start();
|
_audioCapture.Start();
|
||||||
btnStartRecording.IsEnabled = false;
|
btnStartRecording.IsEnabled = false;
|
||||||
btnStopRecording.IsEnabled = true;
|
btnStopRecording.IsEnabled = true;
|
||||||
|
|
|
@ -33,8 +33,6 @@
|
||||||
#include "deepspeech.h"
|
#include "deepspeech.h"
|
||||||
#include "args.h"
|
#include "args.h"
|
||||||
|
|
||||||
#define N_CEP 26
|
|
||||||
#define N_CONTEXT 9
|
|
||||||
#define BEAM_WIDTH 500
|
#define BEAM_WIDTH 500
|
||||||
#define LM_ALPHA 0.75f
|
#define LM_ALPHA 0.75f
|
||||||
#define LM_BETA 1.85f
|
#define LM_BETA 1.85f
|
||||||
|
@ -72,7 +70,7 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
||||||
DS_FreeMetadata(metadata);
|
DS_FreeMetadata(metadata);
|
||||||
} else if (stream_size > 0) {
|
} else if (stream_size > 0) {
|
||||||
StreamingState* ctx;
|
StreamingState* ctx;
|
||||||
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
int status = DS_CreateStream(aCtx, aSampleRate, &ctx);
|
||||||
if (status != DS_ERR_OK) {
|
if (status != DS_ERR_OK) {
|
||||||
res.string = strdup("");
|
res.string = strdup("");
|
||||||
return res;
|
return res;
|
||||||
|
@ -377,7 +375,7 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
// Initialise DeepSpeech
|
// Initialise DeepSpeech
|
||||||
ModelState* ctx;
|
ModelState* ctx;
|
||||||
int status = DS_CreateModel(model, N_CEP, N_CONTEXT, alphabet, BEAM_WIDTH, &ctx);
|
int status = DS_CreateModel(model, alphabet, BEAM_WIDTH, &ctx);
|
||||||
if (status != 0) {
|
if (status != 0) {
|
||||||
fprintf(stderr, "Could not create model.\n");
|
fprintf(stderr, "Could not create model.\n");
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -385,7 +383,6 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
if (lm && (trie || load_without_trie)) {
|
if (lm && (trie || load_without_trie)) {
|
||||||
int status = DS_EnableDecoderWithLM(ctx,
|
int status = DS_EnableDecoderWithLM(ctx,
|
||||||
alphabet,
|
|
||||||
lm,
|
lm,
|
||||||
trie,
|
trie,
|
||||||
LM_ALPHA,
|
LM_ALPHA,
|
||||||
|
@ -449,7 +446,7 @@ main(int argc, char **argv)
|
||||||
sox_quit();
|
sox_quit();
|
||||||
#endif // NO_SOX
|
#endif // NO_SOX
|
||||||
|
|
||||||
DS_DestroyModel(ctx);
|
DS_FreeModel(ctx);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -257,8 +257,6 @@ StreamingState::processBatch(const vector<float>& buf, unsigned int n_steps)
|
||||||
|
|
||||||
int
|
int
|
||||||
DS_CreateModel(const char* aModelPath,
|
DS_CreateModel(const char* aModelPath,
|
||||||
unsigned int aNCep,
|
|
||||||
unsigned int aNContext,
|
|
||||||
const char* aAlphabetConfigPath,
|
const char* aAlphabetConfigPath,
|
||||||
unsigned int aBeamWidth,
|
unsigned int aBeamWidth,
|
||||||
ModelState** retval)
|
ModelState** retval)
|
||||||
|
@ -285,7 +283,7 @@ DS_CreateModel(const char* aModelPath,
|
||||||
return DS_ERR_FAIL_CREATE_MODEL;
|
return DS_ERR_FAIL_CREATE_MODEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int err = model->init(aModelPath, aNCep, aNContext, aAlphabetConfigPath, aBeamWidth);
|
int err = model->init(aModelPath, aAlphabetConfigPath, aBeamWidth);
|
||||||
if (err != DS_ERR_OK) {
|
if (err != DS_ERR_OK) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -295,14 +293,13 @@ DS_CreateModel(const char* aModelPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
DS_DestroyModel(ModelState* ctx)
|
DS_FreeModel(ModelState* ctx)
|
||||||
{
|
{
|
||||||
delete ctx;
|
delete ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
DS_EnableDecoderWithLM(ModelState* aCtx,
|
DS_EnableDecoderWithLM(ModelState* aCtx,
|
||||||
const char* aAlphabetConfigPath,
|
|
||||||
const char* aLMPath,
|
const char* aLMPath,
|
||||||
const char* aTriePath,
|
const char* aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
|
@ -320,7 +317,7 @@ DS_EnableDecoderWithLM(ModelState* aCtx,
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
DS_SetupStream(ModelState* aCtx,
|
DS_CreateStream(ModelState* aCtx,
|
||||||
unsigned int aSampleRate,
|
unsigned int aSampleRate,
|
||||||
StreamingState** retval)
|
StreamingState** retval)
|
||||||
{
|
{
|
||||||
|
@ -371,7 +368,7 @@ char*
|
||||||
DS_FinishStream(StreamingState* aSctx)
|
DS_FinishStream(StreamingState* aSctx)
|
||||||
{
|
{
|
||||||
char* str = aSctx->finishStream();
|
char* str = aSctx->finishStream();
|
||||||
DS_DiscardStream(aSctx);
|
DS_FreeStream(aSctx);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -379,18 +376,18 @@ Metadata*
|
||||||
DS_FinishStreamWithMetadata(StreamingState* aSctx)
|
DS_FinishStreamWithMetadata(StreamingState* aSctx)
|
||||||
{
|
{
|
||||||
Metadata* metadata = aSctx->finishStreamWithMetadata();
|
Metadata* metadata = aSctx->finishStreamWithMetadata();
|
||||||
DS_DiscardStream(aSctx);
|
DS_FreeStream(aSctx);
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamingState*
|
StreamingState*
|
||||||
SetupStreamAndFeedAudioContent(ModelState* aCtx,
|
CreateStreamAndFeedAudioContent(ModelState* aCtx,
|
||||||
const short* aBuffer,
|
const short* aBuffer,
|
||||||
unsigned int aBufferSize,
|
unsigned int aBufferSize,
|
||||||
unsigned int aSampleRate)
|
unsigned int aSampleRate)
|
||||||
{
|
{
|
||||||
StreamingState* ctx;
|
StreamingState* ctx;
|
||||||
int status = DS_SetupStream(aCtx, aSampleRate, &ctx);
|
int status = DS_CreateStream(aCtx, aSampleRate, &ctx);
|
||||||
if (status != DS_ERR_OK) {
|
if (status != DS_ERR_OK) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -404,7 +401,7 @@ DS_SpeechToText(ModelState* aCtx,
|
||||||
unsigned int aBufferSize,
|
unsigned int aBufferSize,
|
||||||
unsigned int aSampleRate)
|
unsigned int aSampleRate)
|
||||||
{
|
{
|
||||||
StreamingState* ctx = SetupStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
|
StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
|
||||||
return DS_FinishStream(ctx);
|
return DS_FinishStream(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,12 +411,12 @@ DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||||
unsigned int aBufferSize,
|
unsigned int aBufferSize,
|
||||||
unsigned int aSampleRate)
|
unsigned int aSampleRate)
|
||||||
{
|
{
|
||||||
StreamingState* ctx = SetupStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
|
StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize, aSampleRate);
|
||||||
return DS_FinishStreamWithMetadata(ctx);
|
return DS_FinishStreamWithMetadata(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
DS_DiscardStream(StreamingState* aSctx)
|
DS_FreeStream(StreamingState* aSctx)
|
||||||
{
|
{
|
||||||
delete aSctx;
|
delete aSctx;
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,8 +63,6 @@ enum DeepSpeech_Error_Codes
|
||||||
* @brief An object providing an interface to a trained DeepSpeech model.
|
* @brief An object providing an interface to a trained DeepSpeech model.
|
||||||
*
|
*
|
||||||
* @param aModelPath The path to the frozen model graph.
|
* @param aModelPath The path to the frozen model graph.
|
||||||
* @param aNCep The number of cepstrum the model was trained with.
|
|
||||||
* @param aNContext The context window the model was trained with.
|
|
||||||
* @param aAlphabetConfigPath The path to the configuration file specifying
|
* @param aAlphabetConfigPath The path to the configuration file specifying
|
||||||
* the alphabet used by the network. See alphabet.h.
|
* the alphabet used by the network. See alphabet.h.
|
||||||
* @param aBeamWidth The beam width used by the decoder. A larger beam
|
* @param aBeamWidth The beam width used by the decoder. A larger beam
|
||||||
|
@ -76,8 +74,6 @@ enum DeepSpeech_Error_Codes
|
||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
int DS_CreateModel(const char* aModelPath,
|
int DS_CreateModel(const char* aModelPath,
|
||||||
unsigned int aNCep,
|
|
||||||
unsigned int aNContext,
|
|
||||||
const char* aAlphabetConfigPath,
|
const char* aAlphabetConfigPath,
|
||||||
unsigned int aBeamWidth,
|
unsigned int aBeamWidth,
|
||||||
ModelState** retval);
|
ModelState** retval);
|
||||||
|
@ -86,7 +82,7 @@ int DS_CreateModel(const char* aModelPath,
|
||||||
* @brief Frees associated resources and destroys model object.
|
* @brief Frees associated resources and destroys model object.
|
||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
void DS_DestroyModel(ModelState* ctx);
|
void DS_FreeModel(ModelState* ctx);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Enable decoding using beam scoring with a KenLM language model.
|
* @brief Enable decoding using beam scoring with a KenLM language model.
|
||||||
|
@ -106,7 +102,6 @@ void DS_DestroyModel(ModelState* ctx);
|
||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
int DS_EnableDecoderWithLM(ModelState* aCtx,
|
int DS_EnableDecoderWithLM(ModelState* aCtx,
|
||||||
const char* aAlphabetConfigPath,
|
|
||||||
const char* aLMPath,
|
const char* aLMPath,
|
||||||
const char* aTriePath,
|
const char* aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
|
@ -162,14 +157,14 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||||
* @return Zero for success, non-zero on failure.
|
* @return Zero for success, non-zero on failure.
|
||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
int DS_SetupStream(ModelState* aCtx,
|
int DS_CreateStream(ModelState* aCtx,
|
||||||
unsigned int aSampleRate,
|
unsigned int aSampleRate,
|
||||||
StreamingState** retval);
|
StreamingState** retval);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Feed audio samples to an ongoing streaming inference.
|
* @brief Feed audio samples to an ongoing streaming inference.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
* @param aBuffer An array of 16-bit, mono raw audio samples at the
|
* @param aBuffer An array of 16-bit, mono raw audio samples at the
|
||||||
* appropriate sample rate.
|
* appropriate sample rate.
|
||||||
* @param aBufferSize The number of samples in @p aBuffer.
|
* @param aBufferSize The number of samples in @p aBuffer.
|
||||||
|
@ -185,7 +180,7 @@ void DS_FeedAudioContent(StreamingState* aSctx,
|
||||||
* currently capable of streaming, so it always starts from the beginning
|
* currently capable of streaming, so it always starts from the beginning
|
||||||
* of the audio.
|
* of the audio.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
*
|
*
|
||||||
* @return The STT intermediate result. The user is responsible for freeing the
|
* @return The STT intermediate result. The user is responsible for freeing the
|
||||||
* string using {@link DS_FreeString()}.
|
* string using {@link DS_FreeString()}.
|
||||||
|
@ -197,7 +192,7 @@ char* DS_IntermediateDecode(StreamingState* aSctx);
|
||||||
* @brief Signal the end of an audio signal to an ongoing streaming
|
* @brief Signal the end of an audio signal to an ongoing streaming
|
||||||
* inference, returns the STT result over the whole audio signal.
|
* inference, returns the STT result over the whole audio signal.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
*
|
*
|
||||||
* @return The STT result. The user is responsible for freeing the string using
|
* @return The STT result. The user is responsible for freeing the string using
|
||||||
* {@link DS_FreeString()}.
|
* {@link DS_FreeString()}.
|
||||||
|
@ -211,7 +206,7 @@ char* DS_FinishStream(StreamingState* aSctx);
|
||||||
* @brief Signal the end of an audio signal to an ongoing streaming
|
* @brief Signal the end of an audio signal to an ongoing streaming
|
||||||
* inference, returns per-letter metadata.
|
* inference, returns per-letter metadata.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
*
|
*
|
||||||
* @return Outputs a struct of individual letters along with their timing information.
|
* @return Outputs a struct of individual letters along with their timing information.
|
||||||
* The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
|
* The user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. Returns NULL on error.
|
||||||
|
@ -226,12 +221,12 @@ Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx);
|
||||||
* can be used if you no longer need the result of an ongoing streaming
|
* can be used if you no longer need the result of an ongoing streaming
|
||||||
* inference and don't want to perform a costly decode operation.
|
* inference and don't want to perform a costly decode operation.
|
||||||
*
|
*
|
||||||
* @param aSctx A streaming state pointer returned by {@link DS_SetupStream()}.
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
*
|
*
|
||||||
* @note This method will free the state pointer (@p aSctx).
|
* @note This method will free the state pointer (@p aSctx).
|
||||||
*/
|
*/
|
||||||
DEEPSPEECH_EXPORT
|
DEEPSPEECH_EXPORT
|
||||||
void DS_DiscardStream(StreamingState* aSctx);
|
void DS_FreeStream(StreamingState* aSctx);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Free memory allocated for metadata information.
|
* @brief Free memory allocated for metadata information.
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
#ifndef DEEPSPEECH_COMPAT_H
|
||||||
|
#define DEEPSPEECH_COMPAT_H
|
||||||
|
|
||||||
|
#include "deepspeech.h"
|
||||||
|
|
||||||
|
#warning This header is a convenience wrapper for compatibility with \
|
||||||
|
the previous API, it has deprecated function names and arguments. \
|
||||||
|
If possible, update your code instead of using this header.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief An object providing an interface to a trained DeepSpeech model.
|
||||||
|
*
|
||||||
|
* @param aModelPath The path to the frozen model graph.
|
||||||
|
* @param aNCep UNUSED, DEPRECATED.
|
||||||
|
* @param aNContext UNUSED, DEPRECATED.
|
||||||
|
* @param aAlphabetConfigPath The path to the configuration file specifying
|
||||||
|
* the alphabet used by the network. See alphabet.h.
|
||||||
|
* @param aBeamWidth The beam width used by the decoder. A larger beam
|
||||||
|
* width generates better results at the cost of decoding
|
||||||
|
* time.
|
||||||
|
* @param[out] retval a ModelState pointer
|
||||||
|
*
|
||||||
|
* @return Zero on success, non-zero on failure.
|
||||||
|
*/
|
||||||
|
int DS_CreateModel(const char* aModelPath,
|
||||||
|
unsigned int /*aNCep*/,
|
||||||
|
unsigned int /*aNContext*/,
|
||||||
|
const char* aAlphabetConfigPath,
|
||||||
|
unsigned int aBeamWidth,
|
||||||
|
ModelState** retval)
|
||||||
|
{
|
||||||
|
return DS_CreateModel(aModelPath, aAlphabetConfigPath, aBeamWidth, retval);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Frees associated resources and destroys model object.
|
||||||
|
*/
|
||||||
|
void DS_DestroyModel(ModelState* ctx)
|
||||||
|
{
|
||||||
|
return DS_FreeModel(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable decoding using beam scoring with a KenLM language model.
|
||||||
|
*
|
||||||
|
* @param aCtx The ModelState pointer for the model being changed.
|
||||||
|
* @param aAlphabetConfigPath UNUSED, DEPRECATED.
|
||||||
|
* @param aLMPath The path to the language model binary file.
|
||||||
|
* @param aTriePath The path to the trie file build from the same vocabu-
|
||||||
|
* lary as the language model binary.
|
||||||
|
* @param aLMAlpha The alpha hyperparameter of the CTC decoder. Language Model
|
||||||
|
weight.
|
||||||
|
* @param aLMBeta The beta hyperparameter of the CTC decoder. Word insertion
|
||||||
|
weight.
|
||||||
|
*
|
||||||
|
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||||
|
*/
|
||||||
|
int DS_EnableDecoderWithLM(ModelState* aCtx,
|
||||||
|
const char* /*aAlphabetConfigPath*/,
|
||||||
|
const char* aLMPath,
|
||||||
|
const char* aTriePath,
|
||||||
|
float aLMAlpha,
|
||||||
|
float aLMBeta)
|
||||||
|
{
|
||||||
|
return DS_EnableDecoderWithLM(aCtx, aLMPath, aTriePath, aLMAlpha, aLMBeta);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Create a new streaming inference state. The streaming state returned
|
||||||
|
* by this function can then be passed to {@link DS_FeedAudioContent()}
|
||||||
|
* and {@link DS_FinishStream()}.
|
||||||
|
*
|
||||||
|
* @param aCtx The ModelState pointer for the model to use.
|
||||||
|
* @param aSampleRate The sample-rate of the audio signal.
|
||||||
|
* @param[out] retval an opaque pointer that represents the streaming state. Can
|
||||||
|
* be NULL if an error occurs.
|
||||||
|
*
|
||||||
|
* @return Zero for success, non-zero on failure.
|
||||||
|
*/
|
||||||
|
int DS_SetupStream(ModelState* aCtx,
|
||||||
|
unsigned int aSampleRate,
|
||||||
|
StreamingState** retval)
|
||||||
|
{
|
||||||
|
return DS_CreateStream(aCtx, aSampleRate, retval);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy a streaming state without decoding the computed logits. This
|
||||||
|
* can be used if you no longer need the result of an ongoing streaming
|
||||||
|
* inference and don't want to perform a costly decode operation.
|
||||||
|
*
|
||||||
|
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||||
|
*
|
||||||
|
* @note This method will free the state pointer (@p aSctx).
|
||||||
|
*/
|
||||||
|
void DS_DiscardStream(StreamingState* aSctx)
|
||||||
|
{
|
||||||
|
return DS_FreeStream(aSctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* DEEPSPEECH_COMPAT_H */
|
|
@ -32,13 +32,11 @@ namespace DeepSpeechClient
|
||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
|
||||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
public unsafe void CreateModel(string aModelPath, uint aNCep,
|
public unsafe void CreateModel(string aModelPath,
|
||||||
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
|
string aAlphabetConfigPath, uint aBeamWidth)
|
||||||
{
|
{
|
||||||
string exceptionMessage = null;
|
string exceptionMessage = null;
|
||||||
if (string.IsNullOrWhiteSpace(aModelPath))
|
if (string.IsNullOrWhiteSpace(aModelPath))
|
||||||
|
@ -63,8 +61,6 @@ namespace DeepSpeechClient
|
||||||
throw new FileNotFoundException(exceptionMessage);
|
throw new FileNotFoundException(exceptionMessage);
|
||||||
}
|
}
|
||||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||||
aNCep,
|
|
||||||
aNContext,
|
|
||||||
aAlphabetConfigPath,
|
aAlphabetConfigPath,
|
||||||
aBeamWidth,
|
aBeamWidth,
|
||||||
ref _modelStatePP);
|
ref _modelStatePP);
|
||||||
|
@ -116,20 +112,18 @@ namespace DeepSpeechClient
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public unsafe void Dispose()
|
public unsafe void Dispose()
|
||||||
{
|
{
|
||||||
NativeImp.DS_DestroyModel(_modelStatePP);
|
NativeImp.DS_FreeModel(_modelStatePP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Enable decoding using beam scoring with a KenLM language model.
|
/// Enable decoding using beam scoring with a KenLM language model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||||
public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
|
||||||
string aLMPath, string aTriePath,
|
|
||||||
float aLMAlpha, float aLMBeta)
|
float aLMAlpha, float aLMBeta)
|
||||||
{
|
{
|
||||||
string exceptionMessage = null;
|
string exceptionMessage = null;
|
||||||
|
@ -148,7 +142,6 @@ namespace DeepSpeechClient
|
||||||
}
|
}
|
||||||
|
|
||||||
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
|
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
|
||||||
aAlphabetConfigPath,
|
|
||||||
aLMPath,
|
aLMPath,
|
||||||
aTriePath,
|
aTriePath,
|
||||||
aLMAlpha,
|
aLMAlpha,
|
||||||
|
@ -206,9 +199,9 @@ namespace DeepSpeechClient
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
public unsafe void SetupStream(uint aSampleRate)
|
public unsafe void CreateStream(uint aSampleRate)
|
||||||
{
|
{
|
||||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||||
EvaluateResultCode(resultCode);
|
EvaluateResultCode(resultCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,9 +210,9 @@ namespace DeepSpeechClient
|
||||||
/// This can be used if you no longer need the result of an ongoing streaming
|
/// This can be used if you no longer need the result of an ongoing streaming
|
||||||
/// inference and don't want to perform a costly decode operation.
|
/// inference and don't want to perform a costly decode operation.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public unsafe void DiscardStream()
|
public unsafe void FreeStream()
|
||||||
{
|
{
|
||||||
NativeImp.DS_DiscardStream(ref _streamingStatePP);
|
NativeImp.DS_FreeStream(ref _streamingStatePP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
|
||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
|
||||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
unsafe void CreateModel(string aModelPath, uint aNCep,
|
unsafe void CreateModel(string aModelPath,
|
||||||
uint aNContext,
|
|
||||||
string aAlphabetConfigPath,
|
string aAlphabetConfigPath,
|
||||||
uint aBeamWidth);
|
uint aBeamWidth);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Enable decoding using beam scoring with a KenLM language model.
|
/// Enable decoding using beam scoring with a KenLM language model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||||
unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
unsafe void EnableDecoderWithLM(string aLMPath,
|
||||||
string aLMPath,
|
|
||||||
string aTriePath,
|
string aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
float aLMBeta);
|
float aLMBeta);
|
||||||
|
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
|
||||||
/// This can be used if you no longer need the result of an ongoing streaming
|
/// This can be used if you no longer need the result of an ongoing streaming
|
||||||
/// inference and don't want to perform a costly decode operation.
|
/// inference and don't want to perform a costly decode operation.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
unsafe void DiscardStream();
|
unsafe void FreeStream();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Free a DeepSpeech allocated string
|
/// Free a DeepSpeech allocated string
|
||||||
|
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
unsafe void SetupStream(uint aSampleRate);
|
unsafe void CreateStream(uint aSampleRate);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Feeds audio samples to an ongoing streaming inference.
|
/// Feeds audio samples to an ongoing streaming inference.
|
||||||
|
|
|
@ -17,15 +17,12 @@ namespace DeepSpeechClient
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||||
uint aNCep,
|
|
||||||
uint aNContext,
|
|
||||||
string aAlphabetConfigPath,
|
string aAlphabetConfigPath,
|
||||||
uint aBeamWidth,
|
uint aBeamWidth,
|
||||||
ref ModelState** pint);
|
ref ModelState** pint);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
|
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
|
||||||
string aAlphabetConfigPath,
|
|
||||||
string aLMPath,
|
string aLMPath,
|
||||||
string aTriePath,
|
string aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
|
@ -45,14 +42,14 @@ namespace DeepSpeechClient
|
||||||
uint aSampleRate);
|
uint aSampleRate);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
|
internal static unsafe extern void DS_FreeModel(ModelState** aCtx);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
|
||||||
uint aSampleRate, ref StreamingState** retval);
|
uint aSampleRate, ref StreamingState** retval);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx);
|
internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
||||||
|
|
|
@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;
|
||||||
|
|
||||||
namespace DeepSpeechClient.Structs
|
namespace DeepSpeechClient.Structs
|
||||||
{
|
{
|
||||||
|
//FIXME: ModelState is an opaque pointer to the API, why is this code reverse
|
||||||
|
// engineering its contents?
|
||||||
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
|
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
|
||||||
public unsafe struct ModelState
|
public unsafe struct ModelState
|
||||||
{
|
{
|
||||||
|
|
|
@ -50,8 +50,6 @@ namespace CSharpExamples
|
||||||
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
|
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint N_CEP = 26;
|
|
||||||
const uint N_CONTEXT = 9;
|
|
||||||
const uint BEAM_WIDTH = 500;
|
const uint BEAM_WIDTH = 500;
|
||||||
const float LM_ALPHA = 0.75f;
|
const float LM_ALPHA = 0.75f;
|
||||||
const float LM_BETA = 1.85f;
|
const float LM_BETA = 1.85f;
|
||||||
|
@ -66,7 +64,6 @@ namespace CSharpExamples
|
||||||
stopwatch.Start();
|
stopwatch.Start();
|
||||||
sttClient.CreateModel(
|
sttClient.CreateModel(
|
||||||
model ?? "output_graph.pbmm",
|
model ?? "output_graph.pbmm",
|
||||||
N_CEP, N_CONTEXT,
|
|
||||||
alphabet ?? "alphabet.txt",
|
alphabet ?? "alphabet.txt",
|
||||||
BEAM_WIDTH);
|
BEAM_WIDTH);
|
||||||
stopwatch.Stop();
|
stopwatch.Stop();
|
||||||
|
@ -77,7 +74,6 @@ namespace CSharpExamples
|
||||||
{
|
{
|
||||||
Console.WriteLine("Loadin LM...");
|
Console.WriteLine("Loadin LM...");
|
||||||
sttClient.EnableDecoderWithLM(
|
sttClient.EnableDecoderWithLM(
|
||||||
alphabet ?? "alphabet.txt",
|
|
||||||
lm ?? "lm.binary",
|
lm ?? "lm.binary",
|
||||||
trie ?? "trie",
|
trie ?? "trie",
|
||||||
LM_ALPHA, LM_BETA);
|
LM_ALPHA, LM_BETA);
|
||||||
|
|
|
@ -31,8 +31,6 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||||
|
|
||||||
Button _startInference;
|
Button _startInference;
|
||||||
|
|
||||||
final int N_CEP = 26;
|
|
||||||
final int N_CONTEXT = 9;
|
|
||||||
final int BEAM_WIDTH = 50;
|
final int BEAM_WIDTH = 50;
|
||||||
final float LM_ALPHA = 0.75f;
|
final float LM_ALPHA = 0.75f;
|
||||||
final float LM_BETA = 1.85f;
|
final float LM_BETA = 1.85f;
|
||||||
|
@ -54,7 +52,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||||
private void newModel(String tfliteModel, String alphabet) {
|
private void newModel(String tfliteModel, String alphabet) {
|
||||||
this._tfliteStatus.setText("Creating model");
|
this._tfliteStatus.setText("Creating model");
|
||||||
if (this._m == null) {
|
if (this._m == null) {
|
||||||
this._m = new DeepSpeechModel(tfliteModel, N_CEP, N_CONTEXT, alphabet, BEAM_WIDTH);
|
this._m = new DeepSpeechModel(tfliteModel, alphabet, BEAM_WIDTH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +165,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
||||||
super.onDestroy();
|
super.onDestroy();
|
||||||
|
|
||||||
if (this._m != null) {
|
if (this._m != null) {
|
||||||
this._m.destroyModel();
|
this._m.freeModel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,6 @@ public class BasicTest {
|
||||||
public static final String trieFile = "/data/local/tmp/test/trie";
|
public static final String trieFile = "/data/local/tmp/test/trie";
|
||||||
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
|
public static final String wavFile = "/data/local/tmp/test/LDC93S1.wav";
|
||||||
|
|
||||||
public static final int N_CEP = 26;
|
|
||||||
public static final int N_CONTEXT = 9;
|
|
||||||
public static final int BEAM_WIDTH = 50;
|
public static final int BEAM_WIDTH = 50;
|
||||||
|
|
||||||
public static final float LM_ALPHA = 0.75f;
|
public static final float LM_ALPHA = 0.75f;
|
||||||
|
@ -66,8 +64,8 @@ public class BasicTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_basic() {
|
public void loadDeepSpeech_basic() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, N_CEP, N_CONTEXT, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||||
m.destroyModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String metadataToString(Metadata m) {
|
private String metadataToString(Metadata m) {
|
||||||
|
@ -123,39 +121,39 @@ public class BasicTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_stt_noLM() {
|
public void loadDeepSpeech_stt_noLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, N_CEP, N_CONTEXT, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||||
|
|
||||||
String decoded = doSTT(m, false);
|
String decoded = doSTT(m, false);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
m.destroyModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_stt_withLM() {
|
public void loadDeepSpeech_stt_withLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, N_CEP, N_CONTEXT, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||||
m.enableDecoderWihLM(alphabetFile, lmFile, trieFile, LM_ALPHA, LM_BETA);
|
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||||
|
|
||||||
String decoded = doSTT(m, false);
|
String decoded = doSTT(m, false);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
m.destroyModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, N_CEP, N_CONTEXT, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||||
|
|
||||||
String decoded = doSTT(m, true);
|
String decoded = doSTT(m, true);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
m.destroyModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
||||||
DeepSpeechModel m = new DeepSpeechModel(modelFile, N_CEP, N_CONTEXT, alphabetFile, BEAM_WIDTH);
|
DeepSpeechModel m = new DeepSpeechModel(modelFile, alphabetFile, BEAM_WIDTH);
|
||||||
m.enableDecoderWihLM(alphabetFile, lmFile, trieFile, LM_ALPHA, LM_BETA);
|
m.enableDecoderWihLM(lmFile, trieFile, LM_ALPHA, LM_BETA);
|
||||||
|
|
||||||
String decoded = doSTT(m, true);
|
String decoded = doSTT(m, true);
|
||||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||||
m.destroyModel();
|
m.freeModel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,18 +11,18 @@ public class DeepSpeechModel {
|
||||||
SWIGTYPE_p_p_ModelState _mspp;
|
SWIGTYPE_p_p_ModelState _mspp;
|
||||||
SWIGTYPE_p_ModelState _msp;
|
SWIGTYPE_p_ModelState _msp;
|
||||||
|
|
||||||
public DeepSpeechModel(String modelPath, int n_cep, int n_context, String alphabetPath, int beam_width) {
|
public DeepSpeechModel(String modelPath, String alphabetPath, int beam_width) {
|
||||||
this._mspp = impl.new_modelstatep();
|
this._mspp = impl.new_modelstatep();
|
||||||
impl.CreateModel(modelPath, n_cep, n_context, alphabetPath, beam_width, this._mspp);
|
impl.CreateModel(modelPath, alphabetPath, beam_width, this._mspp);
|
||||||
this._msp = impl.modelstatep_value(this._mspp);
|
this._msp = impl.modelstatep_value(this._mspp);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void destroyModel() {
|
public void freeModel() {
|
||||||
impl.DestroyModel(this._msp);
|
impl.FreeModel(this._msp);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void enableDecoderWihLM(String alphabet, String lm, String trie, float lm_alpha, float lm_beta) {
|
public void enableDecoderWihLM(String lm, String trie, float lm_alpha, float lm_beta) {
|
||||||
impl.EnableDecoderWithLM(this._msp, alphabet, lm, trie, lm_alpha, lm_beta);
|
impl.EnableDecoderWithLM(this._msp, lm, trie, lm_alpha, lm_beta);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String stt(short[] buffer, int buffer_size, int sample_rate) {
|
public String stt(short[] buffer, int buffer_size, int sample_rate) {
|
||||||
|
@ -33,9 +33,9 @@ public class DeepSpeechModel {
|
||||||
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
return impl.SpeechToTextWithMetadata(this._msp, buffer, buffer_size, sample_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DeepSpeechStreamingState setupStream(int sample_rate) {
|
public DeepSpeechStreamingState createStream(int sample_rate) {
|
||||||
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
||||||
impl.SetupStream(this._msp, sample_rate, ssp);
|
impl.CreateStream(this._msp, sample_rate, ssp);
|
||||||
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,16 +22,6 @@ const LM_ALPHA = 0.75;
|
||||||
const LM_BETA = 1.85;
|
const LM_BETA = 1.85;
|
||||||
|
|
||||||
|
|
||||||
// These constants are tied to the shape of the graph used (changing them changes
|
|
||||||
// the geometry of the first layer), so make sure you use the same constants that
|
|
||||||
// were used during training
|
|
||||||
|
|
||||||
// Number of MFCC features to use
|
|
||||||
const N_FEATURES = 26;
|
|
||||||
|
|
||||||
// Size of the context window used for producing timesteps in the input vector
|
|
||||||
const N_CONTEXT = 9;
|
|
||||||
|
|
||||||
var VersionAction = function VersionAction(options) {
|
var VersionAction = function VersionAction(options) {
|
||||||
options = options || {};
|
options = options || {};
|
||||||
options.nargs = 0;
|
options.nargs = 0;
|
||||||
|
@ -109,15 +99,14 @@ audioStream.on('finish', () => {
|
||||||
|
|
||||||
console.error('Loading model from file %s', args['model']);
|
console.error('Loading model from file %s', args['model']);
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
var model = new Ds.Model(args['model'], N_FEATURES, N_CONTEXT, args['alphabet'], BEAM_WIDTH);
|
var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
|
||||||
const model_load_end = process.hrtime(model_load_start);
|
const model_load_end = process.hrtime(model_load_start);
|
||||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||||
|
|
||||||
if (args['lm'] && args['trie']) {
|
if (args['lm'] && args['trie']) {
|
||||||
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
|
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
|
||||||
const lm_load_start = process.hrtime();
|
const lm_load_start = process.hrtime();
|
||||||
model.enableDecoderWithLM(args['alphabet'], args['lm'], args['trie'],
|
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
|
||||||
LM_ALPHA, LM_BETA);
|
|
||||||
const lm_load_end = process.hrtime(lm_load_start);
|
const lm_load_end = process.hrtime(lm_load_start);
|
||||||
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
||||||
}
|
}
|
||||||
|
@ -135,6 +124,6 @@ audioStream.on('finish', () => {
|
||||||
}
|
}
|
||||||
const inference_stop = process.hrtime(inference_start);
|
const inference_stop = process.hrtime(inference_start);
|
||||||
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4));
|
console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4));
|
||||||
Ds.DestroyModel(model);
|
Ds.FreeModel(model);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
});
|
});
|
||||||
|
|
|
@ -47,7 +47,7 @@ using namespace node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// convert double pointer retval in SetupStream to an output
|
// convert double pointer retval in CreateStream to an output
|
||||||
%typemap(in, numinputs=0) StreamingState **retval (StreamingState *ret) {
|
%typemap(in, numinputs=0) StreamingState **retval (StreamingState *ret) {
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
$1 = &ret;
|
$1 = &ret;
|
||||||
|
|
|
@ -48,13 +48,13 @@ Model.prototype.sttWithMetadata = function() {
|
||||||
return binding.SpeechToTextWithMetadata.apply(null, args);
|
return binding.SpeechToTextWithMetadata.apply(null, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
Model.prototype.setupStream = function() {
|
Model.prototype.createStream = function() {
|
||||||
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
const args = [this._impl].concat(Array.prototype.slice.call(arguments));
|
||||||
const rets = binding.SetupStream.apply(null, args);
|
const rets = binding.CreateStream.apply(null, args);
|
||||||
const status = rets[0];
|
const status = rets[0];
|
||||||
const ctx = rets[1];
|
const ctx = rets[1];
|
||||||
if (status !== 0) {
|
if (status !== 0) {
|
||||||
throw "SetupStream failed with error code " + status;
|
throw "CreateStream failed with error code " + status;
|
||||||
}
|
}
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
@ -75,13 +75,14 @@ Model.prototype.finishStreamWithMetadata = function() {
|
||||||
return binding.FinishStreamWithMetadata.apply(null, arguments);
|
return binding.FinishStreamWithMetadata.apply(null, arguments);
|
||||||
}
|
}
|
||||||
|
|
||||||
function DestroyModel(model) {
|
function FreeModel(model) {
|
||||||
return binding.DestroyModel(model._impl);
|
return binding.FreeModel(model._impl);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
Model: Model,
|
Model: Model,
|
||||||
printVersions: binding.PrintVersions,
|
printVersions: binding.PrintVersions,
|
||||||
DestroyModel: DestroyModel,
|
FreeModel: FreeModel,
|
||||||
|
FreeStream: binding.FreeStream,
|
||||||
FreeMetadata: binding.FreeMetadata
|
FreeMetadata: binding.FreeMetadata
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,13 +25,9 @@ ModelState::~ModelState()
|
||||||
|
|
||||||
int
|
int
|
||||||
ModelState::init(const char* model_path,
|
ModelState::init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width)
|
unsigned int beam_width)
|
||||||
{
|
{
|
||||||
n_features_ = n_features;
|
|
||||||
n_context_ = n_context;
|
|
||||||
if (alphabet_.init(alphabet_path)) {
|
if (alphabet_.init(alphabet_path)) {
|
||||||
return DS_ERR_INVALID_ALPHABET;
|
return DS_ERR_INVALID_ALPHABET;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,6 @@ struct ModelState {
|
||||||
virtual ~ModelState();
|
virtual ~ModelState();
|
||||||
|
|
||||||
virtual int init(const char* model_path,
|
virtual int init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width);
|
unsigned int beam_width);
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
|
#The API is not snake case which triggers linter errors
|
||||||
|
#pylint: disable=invalid-name
|
||||||
|
|
||||||
# On Windows, we can't rely on RPATH being set to $ORIGIN/lib/ or on
|
# On Windows, we can't rely on RPATH being set to $ORIGIN/lib/ or on
|
||||||
# @loader_path/lib but we can change the PATH to include the proper directory
|
# @loader_path/lib but we can change the PATH to include the proper directory
|
||||||
# for the dynamic linker
|
# for the dynamic linker
|
||||||
|
@ -12,6 +15,7 @@ import deepspeech
|
||||||
|
|
||||||
# rename for backwards compatibility
|
# rename for backwards compatibility
|
||||||
from deepspeech.impl import PrintVersions as printVersions
|
from deepspeech.impl import PrintVersions as printVersions
|
||||||
|
from deepspeech.impl import FreeStream as freeStream
|
||||||
|
|
||||||
class Model(object):
|
class Model(object):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -25,7 +29,7 @@ class Model(object):
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if self._impl:
|
if self._impl:
|
||||||
deepspeech.impl.DestroyModel(self._impl)
|
deepspeech.impl.FreeModel(self._impl)
|
||||||
self._impl = None
|
self._impl = None
|
||||||
|
|
||||||
def enableDecoderWithLM(self, *args, **kwargs):
|
def enableDecoderWithLM(self, *args, **kwargs):
|
||||||
|
@ -37,11 +41,11 @@ class Model(object):
|
||||||
def sttWithMetadata(self, *args, **kwargs):
|
def sttWithMetadata(self, *args, **kwargs):
|
||||||
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
return deepspeech.impl.SpeechToTextWithMetadata(self._impl, *args, **kwargs)
|
||||||
|
|
||||||
def setupStream(self, sample_rate=16000):
|
def createStream(self, sample_rate=16000):
|
||||||
status, ctx = deepspeech.impl.SetupStream(self._impl,
|
status, ctx = deepspeech.impl.CreateStream(self._impl,
|
||||||
aSampleRate=sample_rate)
|
aSampleRate=sample_rate)
|
||||||
if status != 0:
|
if status != 0:
|
||||||
raise RuntimeError("SetupStream failed with error code {}".format(status))
|
raise RuntimeError("CreateStream failed with error code {}".format(status))
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
def feedAudioContent(self, *args, **kwargs):
|
def feedAudioContent(self, *args, **kwargs):
|
||||||
|
|
|
@ -32,17 +32,6 @@ LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
|
|
||||||
|
|
||||||
# These constants are tied to the shape of the graph used (changing them changes
|
|
||||||
# the geometry of the first layer), so make sure you use the same constants that
|
|
||||||
# were used during training
|
|
||||||
|
|
||||||
# Number of MFCC features to use
|
|
||||||
N_FEATURES = 26
|
|
||||||
|
|
||||||
# Size of the context window used for producing timesteps in the input vector
|
|
||||||
N_CONTEXT = 9
|
|
||||||
|
|
||||||
|
|
||||||
def convert_samplerate(audio_path):
|
def convert_samplerate(audio_path):
|
||||||
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), SAMPLE_RATE)
|
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(quote(audio_path), SAMPLE_RATE)
|
||||||
try:
|
try:
|
||||||
|
@ -88,14 +77,14 @@ def main():
|
||||||
|
|
||||||
print('Loading model from file {}'.format(args.model), file=sys.stderr)
|
print('Loading model from file {}'.format(args.model), file=sys.stderr)
|
||||||
model_load_start = timer()
|
model_load_start = timer()
|
||||||
ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH)
|
ds = Model(args.model, args.alphabet, BEAM_WIDTH)
|
||||||
model_load_end = timer() - model_load_start
|
model_load_end = timer() - model_load_start
|
||||||
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
||||||
|
|
||||||
if args.lm and args.trie:
|
if args.lm and args.trie:
|
||||||
print('Loading language model from files {} {}'.format(args.lm, args.trie), file=sys.stderr)
|
print('Loading language model from files {} {}'.format(args.lm, args.trie), file=sys.stderr)
|
||||||
lm_load_start = timer()
|
lm_load_start = timer()
|
||||||
ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)
|
||||||
lm_load_end = timer() - lm_load_start
|
lm_load_end = timer() - lm_load_start
|
||||||
print('Loaded language model in {:.3}s.'.format(lm_load_end), file=sys.stderr)
|
print('Loaded language model in {:.3}s.'.format(lm_load_end), file=sys.stderr)
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ import_array();
|
||||||
}
|
}
|
||||||
|
|
||||||
%typemap(argout) ModelState **retval {
|
%typemap(argout) ModelState **retval {
|
||||||
// not owned, Python wrapper in __init__.py calls DS_DestroyModel
|
// not owned, Python wrapper in __init__.py calls DS_FreeModel
|
||||||
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
|
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,17 +21,6 @@ LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
|
|
||||||
|
|
||||||
# These constants are tied to the shape of the graph used (changing them changes
|
|
||||||
# the geometry of the first layer), so make sure you use the same constants that
|
|
||||||
# were used during training
|
|
||||||
|
|
||||||
# Number of MFCC features to use
|
|
||||||
N_FEATURES = 26
|
|
||||||
|
|
||||||
# Size of the context window used for producing timesteps in the input vector
|
|
||||||
N_CONTEXT = 9
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
parser = argparse.ArgumentParser(description='Running DeepSpeech inference.')
|
||||||
parser.add_argument('--model', required=True,
|
parser.add_argument('--model', required=True,
|
||||||
|
@ -48,10 +37,10 @@ def main():
|
||||||
help='Second audio file to use in interleaved streams')
|
help='Second audio file to use in interleaved streams')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH)
|
ds = Model(args.model, args.alphabet, BEAM_WIDTH)
|
||||||
|
|
||||||
if args.lm and args.trie:
|
if args.lm and args.trie:
|
||||||
ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(args.lm, args.trie, LM_ALPHA, LM_BETA)
|
||||||
|
|
||||||
fin = wave.open(args.audio1, 'rb')
|
fin = wave.open(args.audio1, 'rb')
|
||||||
fs1 = fin.getframerate()
|
fs1 = fin.getframerate()
|
||||||
|
@ -63,8 +52,8 @@ def main():
|
||||||
audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
||||||
fin.close()
|
fin.close()
|
||||||
|
|
||||||
stream1 = ds.setupStream(sample_rate=fs1)
|
stream1 = ds.createStream(sample_rate=fs1)
|
||||||
stream2 = ds.setupStream(sample_rate=fs2)
|
stream2 = ds.createStream(sample_rate=fs2)
|
||||||
|
|
||||||
splits1 = np.array_split(audio1, 10)
|
splits1 = np.array_split(audio1, 10)
|
||||||
splits2 = np.array_split(audio2, 10)
|
splits2 = np.array_split(audio2, 10)
|
||||||
|
|
|
@ -89,12 +89,10 @@ TFLiteModelState::~TFLiteModelState()
|
||||||
|
|
||||||
int
|
int
|
||||||
TFLiteModelState::init(const char* model_path,
|
TFLiteModelState::init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width)
|
unsigned int beam_width)
|
||||||
{
|
{
|
||||||
int err = ModelState::init(model_path, n_features, n_context, alphabet_path, beam_width);
|
int err = ModelState::init(model_path, alphabet_path, beam_width);
|
||||||
if (err != DS_ERR_OK) {
|
if (err != DS_ERR_OK) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,8 +31,6 @@ struct TFLiteModelState : public ModelState
|
||||||
virtual ~TFLiteModelState();
|
virtual ~TFLiteModelState();
|
||||||
|
|
||||||
virtual int init(const char* model_path,
|
virtual int init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width) override;
|
unsigned int beam_width) override;
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,10 @@ TFModelState::~TFModelState()
|
||||||
|
|
||||||
int
|
int
|
||||||
TFModelState::init(const char* model_path,
|
TFModelState::init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width)
|
unsigned int beam_width)
|
||||||
{
|
{
|
||||||
int err = ModelState::init(model_path, n_features, n_context, alphabet_path, beam_width);
|
int err = ModelState::init(model_path, alphabet_path, beam_width);
|
||||||
if (err != DS_ERR_OK) {
|
if (err != DS_ERR_OK) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,6 @@ struct TFModelState : public ModelState
|
||||||
virtual ~TFModelState();
|
virtual ~TFModelState();
|
||||||
|
|
||||||
virtual int init(const char* model_path,
|
virtual int init(const char* model_path,
|
||||||
unsigned int n_features,
|
|
||||||
unsigned int n_context,
|
|
||||||
const char* alphabet_path,
|
const char* alphabet_path,
|
||||||
unsigned int beam_width) override;
|
unsigned int beam_width) override;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue