diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index d4b9bf4c..962e917f 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -11,11 +11,14 @@ endif all: bindings -clean: +clean-keep-common: rm -rf dist temp_build ds_ctcdecoder.egg-info rm -f swigwrapper_wrap.cpp swigwrapper.py -bindings: clean +clean: clean-keep-common + rm -f common.a + +bindings: clean-keep-common pip install --quiet $(PYTHON_PACKAGES) wheel==0.31.0 setuptools==39.1.0 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete diff --git a/native_client/ctcdecode/build_common.py b/native_client/ctcdecode/build_common.py new file mode 100644 index 00000000..be190721 --- /dev/null +++ b/native_client/ctcdecode/build_common.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +from __future__ import absolute_import, division, print_function + +import glob +import os +import shlex +import subprocess +import sys + +from multiprocessing.dummy import Pool + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11', + '-Wno-unused-local-typedef', '-Wno-sign-compare'] + +INCLUDES = [ + '..', + '../kenlm', + 'third_party/openfst-1.6.7/src/include', + 'third_party/ThreadPool' +] + +COMMON_FILES = (glob.glob('../kenlm/util/*.cc') + + glob.glob('../kenlm/lm/*.cc') + + glob.glob('../kenlm/util/double-conversion/*.cc')) + +COMMON_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc') + +COMMON_FILES = [ + fn for fn in COMMON_FILES + if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( + 'unittest.cc')) +] + +COMMON_FILES += glob.glob('*.cpp') + +def build_common(out_name='common.a', build_dir='temp_build/temp_build', num_parallel=1): + compiler = os.environ.get('CXX', 'g++') + ar = os.environ.get('AR', 'ar') + libtool = os.environ.get('LIBTOOL', 'libtool') + cflags = os.environ.get('CFLAGS', '') + os.environ.get('CXXFLAGS', '') + + for file in COMMON_FILES: + outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o') + outdir = os.path.dirname(outfile) + if not os.path.exists(outdir): + print('mkdir', outdir) + os.makedirs(outdir) + + def build_one(file): + outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o') + if os.path.exists(outfile): + return + + cmd = '{cc} -fPIC -c {cflags} {args} {includes} {infile} -o {outfile}'.format( + cc=compiler, + cflags=cflags, + args=' '.join(ARGS), + includes=' '.join('-I' + i for i in INCLUDES), + infile=file, + outfile=outfile, + ) + print(cmd) + subprocess.check_call(shlex.split(cmd)) + return outfile + + pool = Pool(num_parallel) + obj_files = list(pool.imap_unordered(build_one, COMMON_FILES)) + + if sys.platform.startswith('darwin'): + cmd = '{libtool} -static -o {outfile} {infiles}'.format( + libtool=libtool, + outfile=out_name, + infiles=' '.join(obj_files), + ) + print(cmd) + subprocess.check_call(shlex.split(cmd)) + else: + cmd = '{ar} rcs {outfile} {infiles}'.format( + ar=ar, + outfile=out_name, + infiles=' '.join(obj_files) + ) + print(cmd) + subprocess.check_call(shlex.split(cmd)) + +if __name__ == '__main__': + build_common() diff --git a/native_client/ctcdecode/setup.py b/native_client/ctcdecode/setup.py index 74306a0e..c6c256cc 100644 --- a/native_client/ctcdecode/setup.py +++ b/native_client/ctcdecode/setup.py @@ -5,12 +5,13 @@ from distutils.command.build import build from setuptools import setup, Extension, distutils import argparse -import glob import multiprocessing.pool import os import platform import sys +from build_common import * + try: import numpy try: @@ -40,66 +41,25 @@ def read(fname): project_version = read('../../VERSION').strip() -# monkey-patch for parallel compilation -# See: https://stackoverflow.com/a/13176803 -def parallelCCompile(self, - sources, - output_dir=None, - macros=None, - include_dirs=None, - debug=0, - extra_preargs=None, - extra_postargs=None, - depends=None): - # those lines are copied from distutils.ccompiler.CCompiler directly - macros, objects, extra_postargs, pp_opts, build = self._setup_compile( - output_dir, macros, include_dirs, sources, depends, extra_postargs) - cc_args = self._get_cc_args(pp_opts, debug, extra_preargs) +build_dir = 'temp_build/temp_build' +common_build = 'common.a' - # parallel code - def _single_compile(obj): - try: - src, ext = build[obj] - except KeyError: - return - self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts) +if not os.path.exists(common_build): + if not os.path.exists(build_dir): + os.makedirs(build_dir) - # convert to list, imap is evaluated on-demand - thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes) - list(thread_pool.imap(_single_compile, objects)) - return objects - -# hack compile to support parallel compiling -distutils.ccompiler.CCompiler.compile = parallelCCompile - -FILES = glob.glob('../kenlm/util/*.cc') \ - + glob.glob('../kenlm/lm/*.cc') \ - + glob.glob('../kenlm/util/double-conversion/*.cc') - -FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc') - -FILES = [ - fn for fn in FILES - if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( - 'unittest.cc')) -] - -ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11', - '-Wno-unused-local-typedef', '-Wno-sign-compare'] + build_common(out_name='common.a', + build_dir=build_dir, + num_parallel=args[0].num_processes) decoder_module = Extension( name='ds_ctcdecoder._swigwrapper', - sources=['swigwrapper.i'] + FILES + glob.glob('*.cpp'), + sources=['swigwrapper.i'], swig_opts=['-c++', '-extranative'], language='c++', - include_dirs=[ - numpy_include, - '..', - '../kenlm', - 'third_party/openfst-1.6.7/src/include', - 'third_party/ThreadPool', - ], - extra_compile_args=ARGS + include_dirs=INCLUDES + [numpy_include], + extra_compile_args=ARGS, + extra_link_args=[common_build], ) class BuildExtFirst(build): diff --git a/taskcluster/host-build.sh b/taskcluster/host-build.sh index d6de260e..3b3f3cd9 100755 --- a/taskcluster/host-build.sh +++ b/taskcluster/host-build.sh @@ -19,6 +19,11 @@ do_bazel_build do_deepspeech_binary_build +# Decoder packages for Linux are built a separate task +if [ "${OS}" = "Darwin" ]; then + do_deepspeech_decoder_build +fi + do_deepspeech_python_build do_deepspeech_nodejs_build diff --git a/tc-tests-utils.sh b/tc-tests-utils.sh index e3ed29ba..ba40617b 100755 --- a/tc-tests-utils.sh +++ b/tc-tests-utils.sh @@ -562,7 +562,7 @@ do_deepspeech_decoder_build() cp native_client/ctcdecode/dist/*.whl wheels - make -C native_client/ctcdecode clean + make -C native_client/ctcdecode clean-keep-common unset NUMPY_BUILD_VERSION unset NUMPY_DEP_VERSION diff --git a/util/taskcluster.py b/util/taskcluster.py index d2f6c14b..4b6929d7 100644 --- a/util/taskcluster.py +++ b/util/taskcluster.py @@ -73,7 +73,7 @@ if __name__ == '__main__': parser.add_argument('--target', required=False, help='Where to put the native client binary files') parser.add_argument('--arch', required=False, - help='Which architecture to download binaries for. "arm" for ARM 7 (32-bit), "gpu" for CUDA enabled x86_64 binaries, "cpu" for CPU-only x86_64 binaries, "osx" for CPU-only x86_64 OSX binaries. Optional ("cpu" by default)') + help='Which architecture to download binaries for. "arm" for ARM 7 (32-bit), "arm64" for ARM64, "gpu" for CUDA enabled x86_64 binaries, "cpu" for CPU-only x86_64 binaries, "osx" for CPU-only x86_64 OSX binaries. Optional ("cpu" by default)') parser.add_argument('--artifact', required=False, default='native_client.tar.xz', help='Name of the artifact to download. Defaults to "native_client.tar.xz"')