Add libdeepspeech python bindings

2017-05-02 10:41:59 +01:00 · 2017-05-02 10:41:59 +01:00 · 10068fc40e
commit 10068fc40e
parent c9cd4ff6f6
11 changed files with 3285 additions and 12 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,14 @@
 .ipynb_checkpoints
 *.pyc
+*.swp
 *.DS_Store
 /werlog.js
 /data
 /logs
 /exports
+/native_client/setup.cfg
+/native_client/build
+/native_client/deepspeech.egg-info
+/native_client/dist
+/native_client/python/deepspeech.py
+/native_client/python/deepspeech_wrap.cpp
--- a/native_client/Makefile
+++ b/native_client/Makefile
@ -9,9 +9,12 @@
 ### $ make -C native_client/ TARGET=rpi3 TFDIR=../../tensorflow/tensorflow/
 ###

+.PHONY: clean run bindings
+
 TARGET ?= host
 TFDIR  ?= ../../tensorflow
 CXX    ?= c++
+PREFIX ?= /usr/local

 ifeq ($(TARGET),host)
 TOOLCHAIN :=
@ -38,10 +41,30 @@ endif
 default: deepspeech

 clean:
-	rm -f deepspeech
+	rm -rf build dist deepspeech.egg-info
+	rm -f deepspeech setup.cfg python/deepspeech_wrap.cpp python/deepspeech.py

 deepspeech: client.cc
 	$(TOOLCHAIN)$(CXX) -o deepspeech $(CFLAGS) client.cc $(LDFLAGS)

+setup.cfg: setup.cfg.in
+	sed -e 's:@LIBDIRS@:${TFDIR}/bazel-bin/tensorflow\:${TFDIR}/bazel-bin/native_client:g' setup.cfg.in > setup.cfg
+
+bindings: setup.cfg
+	python ./setup.py bdist_wheel
+
 run: deepspeech
 	${META_LD_LIBRARY_PATH}=${TFDIR}/bazel-bin/tensorflow:${TFDIR}/bazel-bin/native_client:${${META_LD_LIBRARY_PATH}} ./deepspeech ${ARGS}
+
+install:
+	install -d ${PREFIX}/lib
+	install -m 0644 ${TFDIR}/bazel-bin/tensorflow/libtensorflow.so ${PREFIX}/lib/
+	install -m 0644 ${TFDIR}/bazel-bin/native_client/libkissfft.so ${PREFIX}/lib/
+	install -m 0644 ${TFDIR}/bazel-bin/native_client/libc_speech_features.so ${PREFIX}/lib/
+	install -m 0644 ${TFDIR}/bazel-bin/native_client/libdeepspeech.so ${PREFIX}/lib/
+
+uninstall:
+	rm -f ${PREFIX}/lib/libtensorflow.so
+	rm -f ${PREFIX}/lib/libkissfft.so
+	rm -f ${PREFIX}/lib/libc_speech_features.so
+	rm -f ${PREFIX}/lib/libdeepspeech.so
--- a/native_client/README.md
+++ b/native_client/README.md
@ -46,3 +46,15 @@ The client can be run via the `Makefile`. The client will accept audio of any fo
 ```
 ARGS="/path/to/output_graph.pb /path/to/audio/file.ogg" make run
 ```
+
+## Python bindings
+
+Included are a set of generated Python bindings. After following the above build instructions, these can be installed by executing the following commands (or equivalent on your system):
+
+```
+PREFIX=/usr/local make install
+make bindings
+sudo pip install dist/deepspeech*
+```
+
+It is assumed that `$PREFIX/lib` exists in the library path, otherwise you may need to alter your environment. The API mirrors the C++ API and is demonstrated in [client.py](client.py). Refer to [deepspeech.h](deepspeech.h) for documentation.
--- a/native_client/client.py
+++ b/native_client/client.py
@ -0,0 +1,7 @@
+import sys
+import scipy.io.wavfile as wav
+from deepspeech import DeepSpeech
+
+ds = DeepSpeech(sys.argv[1], 26, 9)
+fs, audio = wav.read(sys.argv[2])
+print ds.stt(audio, fs)
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -36,14 +36,14 @@ DeepSpeech::DeepSpeech(const char* aModelPath, int aNCep, int aNContext)
  status = ReadBinaryProto(Env::Default(), aModelPath, &mPriv->graph_def);
  if (!status.ok()) {
    mPriv->session->Close();
-    mPriv->session = nullptr;
+    mPriv->session = NULL;
    return;
  }

  status = mPriv->session->Create(mPriv->graph_def);
  if (!status.ok()) {
    mPriv->session->Close();
-    mPriv->session = nullptr;
+    mPriv->session = NULL;
    return;
  }

@ -136,7 +136,7 @@ DeepSpeech::getMfccFrames(const short* aBuffer, unsigned int aBufferSize,
    *aNFrames = ds_input_length;
  }
  if (aFrameLen) {
-    *aFrameLen = contextSize;
+    *aFrameLen = frameSize;
  }
 }

@ -144,7 +144,7 @@ char*
 DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
 {
  if (!mPriv->session) {
-    return nullptr;
+    return NULL;
  }

  const int frameSize = mPriv->ncep + (2 * mPriv->ncep * mPriv->ncontext);
@ -153,7 +153,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
  } else if (aFrameLen < frameSize) {
    std::cerr << "mfcc features array is too small (expected " <<
      frameSize << ", got " << aFrameLen << ")\n";
-    return nullptr;
+    return NULL;
  }

  Tensor input(DT_FLOAT, TensorShape({1, aNFrames, frameSize}));
@ -175,7 +175,7 @@ DeepSpeech::infer(float* aMfcc, int aNFrames, int aFrameLen)
    {"output_node"}, {}, &outputs);
  if (!status.ok()) {
    std::cerr << "Error running session: " << status.ToString() << "\n";
-    return nullptr;
+    return NULL;
  }

  // Output is an array of shape (1, n_results, result_length).
@ -199,7 +199,7 @@ DeepSpeech::stt(const short* aBuffer, unsigned int aBufferSize, int aSampleRate)
  char* string;
  int n_frames;

-  getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, nullptr);
+  getMfccFrames(aBuffer, aBufferSize, aSampleRate, &mfcc, &n_frames, NULL);
  string = infer(mfcc, n_frames);
  free(mfcc);
  return string;
--- a/native_client/deepspeech.h
+++ b/native_client/deepspeech.h
@ -2,6 +2,8 @@
 #ifndef __DEEPSPEECH_H__
 #define __DEEPSPEECH_H__

+#include <cstddef>
+
 typedef struct _DeepSpeechPrivate DeepSpeechPrivate;

 class DeepSpeech {
@ -27,7 +29,6 @@ class DeepSpeech {
     * Extracts MFCC features from a given audio signal and adds the appropriate
     * amount of context to run inference with the given DeepSpeech context.
     *
-     * @param aCtx A DeepSpeech context.
     * @param aBuffer A 16-bit, mono raw audio signal at the appropriate sample
     *                rate.
     * @param aBufferSize The sample-length of the audio signal.
@ -43,8 +44,8 @@ class DeepSpeech {
                       unsigned int aBufferSize,
                       int aSampleRate,
                       float** aMfcc,
-                       int* aNFrames = nullptr,
-                       int* aFrameLen = nullptr);
+                       int* aNFrames = NULL,
+                       int* aFrameLen = NULL);

    /**
     * @brief Run inference on the given audio.
@ -52,7 +53,6 @@ class DeepSpeech {
     * Runs inference on the given MFCC audio features with the given DeepSpeech
     * context. See DsGetMfccFrames().
     *
-     * @param aCtx A DeepSpeech context.
     * @param aMfcc MFCC features with the appropriate amount of context per
     *              frame.
     * @param aNFrames The number of frames in @p aMfcc.
--- a/native_client/python/init.py
+++ b/native_client/python/init.py
@ -0,0 +1 @@
+from .deepspeech import *
--- a/native_client/python/deepspeech.i
+++ b/native_client/python/deepspeech.i
@ -0,0 +1,17 @@
+%module deepspeech
+
+%{
+#define SWIG_FILE_WITH_INIT
+#include "deepspeech.h"
+%}
+
+%include "numpy.i"
+%init %{
+import_array();
+%}
+
+%apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)};
+%apply (float** ARGOUTVIEWM_ARRAY2, int* DIM1, int* DIM2) {(float** aMfcc, int* aNFrames, int* aFrameLen)};
+%apply (float* IN_ARRAY2, int DIM1, int DIM2) {(float* aMfcc, int aNFrames, int aFrameLen)};
+
+%include "../deepspeech.h"
--- a/native_client/python/numpy.i
+++ b/native_client/python/numpy.i
--- a/native_client/setup.cfg.in
+++ b/native_client/setup.cfg.in
@ -0,0 +1,4 @@
+[build_ext]
+include-dirs=./
+library-dirs=@LIBDIRS@
+swig-opts=-c++ -keyword
--- a/native_client/setup.py
+++ b/native_client/setup.py
@ -0,0 +1,36 @@
+#! /usr/bin/env python
+
+from setuptools import setup, Extension
+from distutils.command.build import build
+
+import os
+import numpy
+import subprocess
+
+try:
+    numpy_include = numpy.get_include()
+except AttributeError:
+    numpy_include = numpy.get_numpy_include()
+
+class BuildExtFirst(build):
+    sub_commands = [('build_ext', build.has_ext_modules),
+                    ('build_py', build.has_pure_modules),
+                    ('build_clib', build.has_c_libraries),
+                    ('build_scripts', build.has_scripts)]
+
+deepspeech = Extension('_deepspeech',
+        ['python/deepspeech.i'],
+        include_dirs = [numpy_include],
+        libraries = ['tensorflow', 'deepspeech', 'c_speech_features', 'kissfft'])
+
+setup(name = 'deepspeech',
+      description = 'A library for running inference on a DeepSpeech model',
+      author = 'Chris Lord',
+      author_email='chrislord.net@gmail.com',
+      version = '0.0.1',
+      package_dir = {'deepspeech': 'python'},
+      packages = [ 'deepspeech' ],
+      cmdclass = { 'build': BuildExtFirst },
+      license = 'MPL-2.0',
+      url = 'https://github.com/mozilla/DeepSpeech',
+      ext_modules = [deepspeech])