Add libdeepspeech python bindings
This commit is contained in:
parent
c9cd4ff6f6
commit
10068fc40e
|
@ -1,7 +1,14 @@
|
|||
.ipynb_checkpoints
|
||||
*.pyc
|
||||
*.swp
|
||||
*.DS_Store
|
||||
/werlog.js
|
||||
/data
|
||||
/logs
|
||||
/exports
|
||||
/native_client/setup.cfg
|
||||
/native_client/build
|
||||
/native_client/deepspeech.egg-info
|
||||
/native_client/dist
|
||||
/native_client/python/deepspeech.py
|
||||
/native_client/python/deepspeech_wrap.cpp
|
||||
|
|
|
@ -9,9 +9,12 @@
|
|||
### $ make -C native_client/ TARGET=rpi3 TFDIR=../../tensorflow/tensorflow/
|
||||
###
|
||||
|
||||
.PHONY: clean run bindings
|
||||
|
||||
TARGET ?= host
|
||||
TFDIR ?= ../../tensorflow
|
||||
CXX ?= c++
|
||||
PREFIX ?= /usr/local
|
||||
|
||||
ifeq ($(TARGET),host)
|
||||
TOOLCHAIN :=
|
||||
|
@ -38,10 +41,30 @@ endif
|
|||
default: deepspeech
|
||||
|
||||
clean:
|
||||
rm -f deepspeech
|
||||
rm -rf build dist deepspeech.egg-info
|
||||
rm -f deepspeech setup.cfg python/deepspeech_wrap.cpp python/deepspeech.py
|
||||
|
||||
deepspeech: client.cc
|
||||
$(TOOLCHAIN)$(CXX) -o deepspeech $(CFLAGS) client.cc $(LDFLAGS)
|
||||
|
||||
setup.cfg: setup.cfg.in
|
||||
sed -e 's:@LIBDIRS@:${TFDIR}/bazel-bin/tensorflow\:${TFDIR}/bazel-bin/native_client:g' setup.cfg.in > setup.cfg
|
||||
|
||||
bindings: setup.cfg
|
||||
python ./setup.py bdist_wheel
|
||||
|
||||
run: deepspeech
|
||||
${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/tensorflow:${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS}
|
||||
|
||||
install:
|
||||
install -d ${PREFIX}/lib
|
||||
install -m 0644 ${TFDIR}/bazel-bin/tensorflow/libtensorflow.so ${PREFIX}/lib/
|
||||
install -m 0644 ${TFDIR}/bazel-bin/native_client/libkissfft.so ${PREFIX}/lib/
|
||||
install -m 0644 ${TFDIR}/bazel-bin/native_client/libc_speech_features.so ${PREFIX}/lib/
|
||||
install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/
|
||||
|
||||
uninstall:
|
||||
rm -f ${PREFIX}/lib/libtensorflow.so
|
||||
rm -f ${PREFIX}/lib/libkissfft.so
|
||||
rm -f ${PREFIX}/lib/libc_speech_features.so
|
||||
rm -f ${PREFIX}/lib/libdeepspeech.so
|
||||
|
|
|
@ -46,3 +46,15 @@ The client can be run via the `Makefile`. The client will accept audio of any fo
|
|||
```
|
||||
ARGS="/path/to/output_graph.pb /path/to/audio/file.ogg" make run
|
||||
```
|
||||
|
||||
## Python bindings
|
||||
|
||||
Included are a set of generated Python bindings. After following the above build instructions, these can be installed by executing the following commands (or equivalent on your system):
|
||||
|
||||
```
|
||||
PREFIX=/usr/local make install
|
||||
make bindings
|
||||
sudo pip install dist/deepspeech*
|
||||
```
|
||||
|
||||
It is assumed that `$PREFIX/lib` exists in the library path, otherwise you may need to alter your environment. The API mirrors the C++ API and is demonstrated in [client.py](client.py). Refer to [deepspeech.h](deepspeech.h) for documentation.
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
import sys
|
||||
import scipy.io.wavfile as wav
|
||||
from deepspeech import DeepSpeech
|
||||
|
||||
ds = DeepSpeech(sys.argv[1], 26, 9)
|
||||
fs, audio = wav.read(sys.argv[2])
|
||||
print ds.stt(audio, fs)
|
|
@ -36,14 +36,14 @@ DeepSpeech::DeepSpeech(const char* aModelPath, int aNCep, int aNContext)
|
|||
status = ReadBinaryProto(Env::Default(), aModelPath, &mPriv->graph_def);
|
||||
if (!status.ok()) {
|
||||
mPriv->session->Close();
|
||||
mPriv->session = nullptr;
|
||||
mPriv->session = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
status = mPriv->session->Create(mPriv->graph_def);
|
||||
if (!status.ok()) {
|
||||
mPriv->session->Close();
|
||||
mPriv->session = nullptr;
|
||||
mPriv->session = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ DeepSpeech::getMfccFrames(const short* aBuffer, unsigned int aBufferSize,
|
|||
*aNFrames = ds_input_length;
|
||||
}
|
||||
if (aFrameLen) {
|
||||
*aFrameLen = contextSize;
|
||||
*aFrameLen = frameSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,7 +144,7 @@ char*
|
|||
DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
|
||||
{
|
||||
if (!mPriv->session) {
|
||||
return nullptr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const int frameSize = mPriv->ncep + (2 * mPriv->ncep * mPriv->ncontext);
|
||||
|
@ -153,7 +153,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
|
|||
} else if (aFrameLen < frameSize) {
|
||||
std::cerr << "mfcc features array is too small (expected " <<
|
||||
frameSize << ", got " << aFrameLen << ")\n";
|
||||
return nullptr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Tensor input(DT_FLOAT, TensorShape({1, aNFrames, frameSize}));
|
||||
|
@ -175,7 +175,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
|
|||
{"output_node"}, {}, &outputs);
|
||||
if (!status.ok()) {
|
||||
std::cerr << "Error running session: " << status.ToString() << "\n";
|
||||
return nullptr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Output is an array of shape (1, n_results, result_length).
|
||||
|
@ -199,7 +199,7 @@ DeepSpeech::stt(const short* aBuffer, unsigned int aBufferSize, int aSampleRate)
|
|||
char* string;
|
||||
int n_frames;
|
||||
|
||||
getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, nullptr);
|
||||
getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, NULL);
|
||||
string = infer(mfcc, n_frames);
|
||||
free(mfcc);
|
||||
return string;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#ifndef __DEEPSPEECH_H__
|
||||
#define __DEEPSPEECH_H__
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
typedef struct _DeepSpeechPrivate DeepSpeechPrivate;
|
||||
|
||||
class DeepSpeech {
|
||||
|
@ -27,7 +29,6 @@ class DeepSpeech {
|
|||
* Extracts MFCC features from a given audio signal and adds the appropriate
|
||||
* amount of context to run inference with the given DeepSpeech context.
|
||||
*
|
||||
* @param aCtx A DeepSpeech context.
|
||||
* @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
|
||||
* rate.
|
||||
* @param aBufferSize The sample-length of the audio signal.
|
||||
|
@ -43,8 +44,8 @@ class DeepSpeech {
|
|||
unsigned int aBufferSize,
|
||||
int aSampleRate,
|
||||
float** aMfcc,
|
||||
int* aNFrames = nullptr,
|
||||
int* aFrameLen = nullptr);
|
||||
int* aNFrames = NULL,
|
||||
int* aFrameLen = NULL);
|
||||
|
||||
/**
|
||||
* @brief Run inference on the given audio.
|
||||
|
@ -52,7 +53,6 @@ class DeepSpeech {
|
|||
* Runs inference on the given MFCC audio features with the given DeepSpeech
|
||||
* context. See DsGetMfccFrames().
|
||||
*
|
||||
* @param aCtx A DeepSpeech context.
|
||||
* @param aMfcc MFCC features with the appropriate amount of context per
|
||||
* frame.
|
||||
* @param aNFrames The number of frames in @p aMfcc.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
from .deepspeech import *
|
|
@ -0,0 +1,17 @@
|
|||
%module deepspeech
|
||||
|
||||
%{
|
||||
#define SWIG_FILE_WITH_INIT
|
||||
#include "deepspeech.h"
|
||||
%}
|
||||
|
||||
%include "numpy.i"
|
||||
%init %{
|
||||
import_array();
|
||||
%}
|
||||
|
||||
%apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)};
|
||||
%apply (float** ARGOUTVIEWM_ARRAY2, int* DIM1, int* DIM2) {(float** aMfcc, int* aNFrames, int* aFrameLen)};
|
||||
%apply (float* IN_ARRAY2, int DIM1, int DIM2) {(float* aMfcc, int aNFrames, int aFrameLen)};
|
||||
|
||||
%include "../deepspeech.h"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,4 @@
|
|||
[build_ext]
|
||||
include-dirs=./
|
||||
library-dirs=@LIBDIRS@
|
||||
swig-opts=-c++ -keyword
|
|
@ -0,0 +1,36 @@
|
|||
#! /usr/bin/env python
|
||||
|
||||
from setuptools import setup, Extension
|
||||
from distutils.command.build import build
|
||||
|
||||
import os
|
||||
import numpy
|
||||
import subprocess
|
||||
|
||||
try:
|
||||
numpy_include = numpy.get_include()
|
||||
except AttributeError:
|
||||
numpy_include = numpy.get_numpy_include()
|
||||
|
||||
class BuildExtFirst(build):
|
||||
sub_commands = [('build_ext', build.has_ext_modules),
|
||||
('build_py', build.has_pure_modules),
|
||||
('build_clib', build.has_c_libraries),
|
||||
('build_scripts', build.has_scripts)]
|
||||
|
||||
deepspeech = Extension('_deepspeech',
|
||||
['python/deepspeech.i'],
|
||||
include_dirs = [numpy_include],
|
||||
libraries = ['tensorflow', 'deepspeech', 'c_speech_features', 'kissfft'])
|
||||
|
||||
setup(name = 'deepspeech',
|
||||
description = 'A library for running inference on a DeepSpeech model',
|
||||
author = 'Chris Lord',
|
||||
author_email='chrislord.net@gmail.com',
|
||||
version = '0.0.1',
|
||||
package_dir = {'deepspeech': 'python'},
|
||||
packages = [ 'deepspeech' ],
|
||||
cmdclass = { 'build': BuildExtFirst },
|
||||
license = 'MPL-2.0',
|
||||
url = 'https://github.com/mozilla/DeepSpeech',
|
||||
ext_modules = [deepspeech])
|
Loading…
Reference in New Issue