Add libdeepspeech python bindings

2017-05-02 10:41:59 +01:00 · 2017-05-02 10:41:59 +01:00 · 10068fc40e
commit 10068fc40e
parent c9cd4ff6f6
11 changed files with 3285 additions and 12 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,14 @@
 .ipynb_checkpoints
 *.pyc
 *.swp
 *.DS_Store
 /werlog.js
 /data
 /logs
 /exports
 /native_client/setup.cfg
 /native_client/build
 /native_client/deepspeech.egg-info
 /native_client/dist
 /native_client/python/deepspeech.py
 /native_client/python/deepspeech_wrap.cpp
--- a/native_client/Makefile
+++ b/native_client/Makefile
@ -9,9 +9,12 @@
 ### $ make -C native_client/ TARGET=rpi3 TFDIR=../../tensorflow/tensorflow/
 ###
 .PHONY: clean run bindings
 TARGET ?= host
 TFDIR  ?= ../../tensorflow
 CXX    ?= c++
 PREFIX ?= /usr/local
 ifeq ($(TARGET),host)
 TOOLCHAIN :=
@ -38,10 +41,30 @@ endif
 default: deepspeech
 clean:
-	rm -f deepspeech
+	rm -rf build dist deepspeech.egg-info
 	rm -f deepspeech setup.cfg python/deepspeech_wrap.cpp python/deepspeech.py
 deepspeech: client.cc
 	$(TOOLCHAIN)$(CXX) -o deepspeech $(CFLAGS) client.cc $(LDFLAGS)
 setup.cfg: setup.cfg.in
 	sed -e 's:@LIBDIRS@:${TFDIR}/bazel-bin/tensorflow\:${TFDIR}/bazel-bin/native_client:g' setup.cfg.in > setup.cfg
 bindings: setup.cfg
 	python ./setup.py bdist_wheel
 run: deepspeech
 	${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/tensorflow:${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS}
 install:
 	install -d ${PREFIX}/lib
 	install -m 0644 ${TFDIR}/bazel-bin/tensorflow/libtensorflow.so ${PREFIX}/lib/
 	install -m 0644 ${TFDIR}/bazel-bin/native_client/libkissfft.so ${PREFIX}/lib/
 	install -m 0644 ${TFDIR}/bazel-bin/native_client/libc_speech_features.so ${PREFIX}/lib/
 	install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/
 uninstall:
 	rm -f ${PREFIX}/lib/libtensorflow.so
 	rm -f ${PREFIX}/lib/libkissfft.so
 	rm -f ${PREFIX}/lib/libc_speech_features.so
 	rm -f ${PREFIX}/lib/libdeepspeech.so
--- a/native_client/README.md
+++ b/native_client/README.md
@ -46,3 +46,15 @@ The client can be run via the `Makefile`. The client will accept audio of any fo
 ```
 ARGS="/path/to/output_graph.pb /path/to/audio/file.ogg" make run
 ```
 ## Python bindings
 Included are a set of generated Python bindings. After following the above build instructions, these can be installed by executing the following commands (or equivalent on your system):
 ```
 PREFIX=/usr/local make install
 make bindings
 sudo pip install dist/deepspeech*
 ```
 It is assumed that `$PREFIX/lib` exists in the library path, otherwise you may need to alter your environment. The API mirrors the C++ API and is demonstrated in [client.py](client.py). Refer to [deepspeech.h](deepspeech.h) for documentation.
--- a/native_client/client.py
+++ b/native_client/client.py
@ -0,0 +1,7 @@
 import sys
 import scipy.io.wavfile as wav
 from deepspeech import DeepSpeech
 ds = DeepSpeech(sys.argv[1], 26, 9)
 fs, audio = wav.read(sys.argv[2])
 print ds.stt(audio, fs)
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -36,14 +36,14 @@ DeepSpeech::DeepSpeech(const char* aModelPath, int aNCep, int aNContext)
  status = ReadBinaryProto(Env::Default(), aModelPath, &mPriv->graph_def);
  if (!status.ok()) {
    mPriv->session->Close();
-    mPriv->session = nullptr;
+    mPriv->session = NULL;
    return;
  }
  status = mPriv->session->Create(mPriv->graph_def);
  if (!status.ok()) {
    mPriv->session->Close();
-    mPriv->session = nullptr;
+    mPriv->session = NULL;
    return;
  }
@ -136,7 +136,7 @@ DeepSpeech::getMfccFrames(const short* aBuffer, unsigned int aBufferSize,
    *aNFrames = ds_input_length;
  }
  if (aFrameLen) {
-    *aFrameLen = contextSize;
+    *aFrameLen = frameSize;
  }
 }
@ -144,7 +144,7 @@ char*
 DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
 {
  if (!mPriv->session) {
-    return nullptr;
+    return NULL;
  }
  const int frameSize = mPriv->ncep + (2 * mPriv->ncep * mPriv->ncontext);
@ -153,7 +153,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
  } else if (aFrameLen < frameSize) {
    std::cerr << "mfcc features array is too small (expected " <<
      frameSize << ", got " << aFrameLen << ")\n";
-    return nullptr;
+    return NULL;
  }
  Tensor input(DT_FLOAT, TensorShape({1, aNFrames, frameSize}));
@ -175,7 +175,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
    {"output_node"}, {}, &outputs);
  if (!status.ok()) {
    std::cerr << "Error running session: " << status.ToString() << "\n";
-    return nullptr;
+    return NULL;
  }
  // Output is an array of shape (1, n_results, result_length).
@ -199,7 +199,7 @@ DeepSpeech::stt(const short* aBuffer, unsigned int aBufferSize, int aSampleRate)
  char* string;
  int n_frames;
-  getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, nullptr);
+  getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, NULL);
  string = infer(mfcc, n_frames);
  free(mfcc);
  return string;
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@ -2,6 +2,8 @@
 #ifndef __DEEPSPEECH_H__
 #define __DEEPSPEECH_H__
 #include <cstddef>
 typedef struct _DeepSpeechPrivate DeepSpeechPrivate;
 class DeepSpeech {
@ -27,7 +29,6 @@ class DeepSpeech {
     * Extracts MFCC features from a given audio signal and adds the appropriate
     * amount of context to run inference with the given DeepSpeech context.
     *
     * @param aCtx A DeepSpeech context.
     * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
     *                rate.
     * @param aBufferSize The sample-length of the audio signal.
@ -43,8 +44,8 @@ class DeepSpeech {
                       unsigned int aBufferSize,
                       int aSampleRate,
                       float** aMfcc,
-                       int* aNFrames = nullptr,
+                       int* aNFrames = NULL,
-                       int* aFrameLen = nullptr);
+                       int* aFrameLen = NULL);
    /**
     * @brief Run inference on the given audio.
@ -52,7 +53,6 @@ class DeepSpeech {
     * Runs inference on the given MFCC audio features with the given DeepSpeech
     * context. See DsGetMfccFrames().
     *
     * @param aCtx A DeepSpeech context.
     * @param aMfcc MFCC features with the appropriate amount of context per
     *              frame.
     * @param aNFrames The number of frames in @p aMfcc.
--- a/native_client/python/init.py
+++ b/native_client/python/init.py
@ -0,0 +1 @@
 from .deepspeech import *
--- a/native_client/python/deepspeech.i
+++ b/native_client/python/deepspeech.i
@ -0,0 +1,17 @@
 %module deepspeech
 %{
 #define SWIG_FILE_WITH_INIT
 #include "deepspeech.h"
 %}
 %include "numpy.i"
 %init %{
 import_array();
 %}
 %apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)};
 %apply (float** ARGOUTVIEWM_ARRAY2, int* DIM1, int* DIM2) {(float** aMfcc, int* aNFrames, int* aFrameLen)};
 %apply (float* IN_ARRAY2, int DIM1, int DIM2) {(float* aMfcc, int aNFrames, int aFrameLen)};
 %include "../deepspeech.h"
--- a/native_client/python/numpy.i
+++ b/native_client/python/numpy.i
--- a/native_client/setup.cfg.in
+++ b/native_client/setup.cfg.in
@ -0,0 +1,4 @@
 [build_ext]
 include-dirs=./
 library-dirs=@LIBDIRS@
 swig-opts=-c++ -keyword
--- a/native_client/setup.py
+++ b/native_client/setup.py
@ -0,0 +1,36 @@
 #! /usr/bin/env python
 from setuptools import setup, Extension
 from distutils.command.build import build
 import os
 import numpy
 import subprocess
 try:
    numpy_include = numpy.get_include()
 except AttributeError:
    numpy_include = numpy.get_numpy_include()
 class BuildExtFirst(build):
    sub_commands = [('build_ext', build.has_ext_modules),
                    ('build_py', build.has_pure_modules),
                    ('build_clib', build.has_c_libraries),
                    ('build_scripts', build.has_scripts)]
 deepspeech = Extension('_deepspeech',
        ['python/deepspeech.i'],
        include_dirs = [numpy_include],
        libraries = ['tensorflow', 'deepspeech', 'c_speech_features', 'kissfft'])
 setup(name = 'deepspeech',
      description = 'A library for running inference on a DeepSpeech model',
      author = 'Chris Lord',
      author_email='chrislord.net@gmail.com',
      version = '0.0.1',
      package_dir = {'deepspeech': 'python'},
      packages = [ 'deepspeech' ],
      cmdclass = { 'build': BuildExtFirst },
      license = 'MPL-2.0',
      url = 'https://github.com/mozilla/DeepSpeech',
      ext_modules = [deepspeech])