Cache common objects in decoder build

This commit is contained in:
Reuben Morais 2018-11-12 13:49:22 -02:00
parent 0dcb1f87c5
commit cfed8ccd4f
6 changed files with 113 additions and 58 deletions

View File

@ -11,11 +11,14 @@ endif
all: bindings
clean:
clean-keep-common:
rm -rf dist temp_build ds_ctcdecoder.egg-info
rm -f swigwrapper_wrap.cpp swigwrapper.py
bindings: clean
clean: clean-keep-common
rm -f common.a
bindings: clean-keep-common
pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
find temp_build -type f -name "*.o" -delete

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
from __future__ import absolute_import, division, print_function
import glob
import os
import shlex
import subprocess
import sys
from multiprocessing.dummy import Pool
ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11',
'-Wno-unused-local-typedef', '-Wno-sign-compare']
INCLUDES = [
'..',
'../kenlm',
'third_party/openfst-1.6.7/src/include',
'third_party/ThreadPool'
]
COMMON_FILES = (glob.glob('../kenlm/util/*.cc')
+ glob.glob('../kenlm/lm/*.cc')
+ glob.glob('../kenlm/util/double-conversion/*.cc'))
COMMON_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc')
COMMON_FILES = [
fn for fn in COMMON_FILES
if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith(
'unittest.cc'))
]
COMMON_FILES += glob.glob('*.cpp')
def build_common(out_name='common.a', build_dir='temp_build/temp_build', num_parallel=1):
compiler = os.environ.get('CXX', 'g++')
ar = os.environ.get('AR', 'ar')
libtool = os.environ.get('LIBTOOL', 'libtool')
cflags = os.environ.get('CFLAGS', '') + os.environ.get('CXXFLAGS', '')
for file in COMMON_FILES:
outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o')
outdir = os.path.dirname(outfile)
if not os.path.exists(outdir):
print('mkdir', outdir)
os.makedirs(outdir)
def build_one(file):
outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o')
if os.path.exists(outfile):
return
cmd = '{cc} -fPIC -c {cflags} {args} {includes} {infile} -o {outfile}'.format(
cc=compiler,
cflags=cflags,
args=' '.join(ARGS),
includes=' '.join('-I' + i for i in INCLUDES),
infile=file,
outfile=outfile,
)
print(cmd)
subprocess.check_call(shlex.split(cmd))
return outfile
pool = Pool(num_parallel)
obj_files = list(pool.imap_unordered(build_one, COMMON_FILES))
if sys.platform.startswith('darwin'):
cmd = '{libtool} -static -o {outfile} {infiles}'.format(
libtool=libtool,
outfile=out_name,
infiles=' '.join(obj_files),
)
print(cmd)
subprocess.check_call(shlex.split(cmd))
else:
cmd = '{ar} rcs {outfile} {infiles}'.format(
ar=ar,
outfile=out_name,
infiles=' '.join(obj_files)
)
print(cmd)
subprocess.check_call(shlex.split(cmd))
if __name__ == '__main__':
build_common()

View File

@ -5,12 +5,13 @@ from distutils.command.build import build
from setuptools import setup, Extension, distutils
import argparse
import glob
import multiprocessing.pool
import os
import platform
import sys
from build_common import *
try:
import numpy
try:
@ -40,66 +41,25 @@ def read(fname):
project_version = read('../../VERSION').strip()
# monkey-patch for parallel compilation
# See: https://stackoverflow.com/a/13176803
def parallelCCompile(self,
sources,
output_dir=None,
macros=None,
include_dirs=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
depends=None):
# those lines are copied from distutils.ccompiler.CCompiler directly
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
build_dir = 'temp_build/temp_build'
common_build = 'common.a'
# parallel code
def _single_compile(obj):
try:
src, ext = build[obj]
except KeyError:
return
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
if not os.path.exists(common_build):
if not os.path.exists(build_dir):
os.makedirs(build_dir)
# convert to list, imap is evaluated on-demand
thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes)
list(thread_pool.imap(_single_compile, objects))
return objects
# hack compile to support parallel compiling
distutils.ccompiler.CCompiler.compile = parallelCCompile
FILES = glob.glob('../kenlm/util/*.cc') \
+ glob.glob('../kenlm/lm/*.cc') \
+ glob.glob('../kenlm/util/double-conversion/*.cc')
FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc')
FILES = [
fn for fn in FILES
if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith(
'unittest.cc'))
]
ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11',
'-Wno-unused-local-typedef', '-Wno-sign-compare']
build_common(out_name='common.a',
build_dir=build_dir,
num_parallel=args[0].num_processes)
decoder_module = Extension(
name='ds_ctcdecoder._swigwrapper',
sources=['swigwrapper.i'] + FILES + glob.glob('*.cpp'),
sources=['swigwrapper.i'],
swig_opts=['-c++', '-extranative'],
language='c++',
include_dirs=[
numpy_include,
'..',
'../kenlm',
'third_party/openfst-1.6.7/src/include',
'third_party/ThreadPool',
],
extra_compile_args=ARGS
include_dirs=INCLUDES + [numpy_include],
extra_compile_args=ARGS,
extra_link_args=[common_build],
)
class BuildExtFirst(build):

View File

@ -19,6 +19,11 @@ do_bazel_build
do_deepspeech_binary_build
# Decoder packages for Linux are built a separate task
if [ "${OS}" = "Darwin" ]; then
do_deepspeech_decoder_build
fi
do_deepspeech_python_build
do_deepspeech_nodejs_build

View File

@ -562,7 +562,7 @@ do_deepspeech_decoder_build()
cp native_client/ctcdecode/dist/*.whl wheels
make -C native_client/ctcdecode clean
make -C native_client/ctcdecode clean-keep-common
unset NUMPY_BUILD_VERSION
unset NUMPY_DEP_VERSION

View File

@ -73,7 +73,7 @@ if __name__ == '__main__':
parser.add_argument('--target', required=False,
help='Where to put the native client binary files')
parser.add_argument('--arch', required=False,
help='Which architecture to download binaries for. "arm" for ARM 7 (32-bit), "gpu" for CUDA enabled x86_64 binaries, "cpu" for CPU-only x86_64 binaries, "osx" for CPU-only x86_64 OSX binaries. Optional ("cpu" by default)')
help='Which architecture to download binaries for. "arm" for ARM 7 (32-bit), "arm64" for ARM64, "gpu" for CUDA enabled x86_64 binaries, "cpu" for CPU-only x86_64 binaries, "osx" for CPU-only x86_64 OSX binaries. Optional ("cpu" by default)')
parser.add_argument('--artifact', required=False,
default='native_client.tar.xz',
help='Name of the artifact to download. Defaults to "native_client.tar.xz"')