From 0add08e30d22a1d5d25896e1f28dfa1c3afd5feb Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Fri, 14 Feb 2020 14:51:04 +0100 Subject: [PATCH] Ensure python builds uses all ressources --- native_client/ctcdecode/Makefile | 12 +++---- .../{build_common.py => build_archive.py} | 22 ++++++++---- native_client/ctcdecode/setup.py | 35 ++++++++++--------- taskcluster/tc-all-vars.sh | 3 ++ taskcluster/tc-build-utils.sh | 3 +- taskcluster/tc-py-utils.sh | 7 +++- 6 files changed, 50 insertions(+), 32 deletions(-) rename native_client/ctcdecode/{build_common.py => build_archive.py} (82%) diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index eff83f38..fb3ca6a4 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -17,12 +17,12 @@ endif all: bindings -clean-keep-common: +clean-keep-third-party: rm -rf dist temp_build ds_ctcdecoder.egg-info - rm -f swigwrapper_wrap.cpp swigwrapper.py + rm -f swigwrapper_wrap.cpp swigwrapper.py first_party.a -clean: clean-keep-common - rm -f common.a +clean: clean-keep-third-party + rm -f third_party.a rm workspace_status.cc rm -fr bazel-out/ @@ -31,14 +31,14 @@ workspace_status.cc: ../bazel_workspace_status_cmd.sh > bazel-out/stable-status.txt && \ ../gen_workspace_status.sh > $@ -bindings: clean-keep-common workspace_status.cc +bindings: clean-keep-third-party workspace_status.cc pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build -bindings-debug: clean-keep-common workspace_status.cc +bindings-debug: clean-keep-third-party workspace_status.cc pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS) diff --git a/native_client/ctcdecode/build_common.py b/native_client/ctcdecode/build_archive.py similarity index 82% rename from native_client/ctcdecode/build_common.py rename to native_client/ctcdecode/build_archive.py index 2388236d..6b36ea45 100644 --- a/native_client/ctcdecode/build_common.py +++ b/native_client/ctcdecode/build_archive.py @@ -20,26 +20,34 @@ INCLUDES = [ 'third_party/ThreadPool' ] -COMMON_FILES = (glob.glob('../kenlm/util/*.cc') +KENLM_FILES = (glob.glob('../kenlm/util/*.cc') + glob.glob('../kenlm/lm/*.cc') + glob.glob('../kenlm/util/double-conversion/*.cc')) -COMMON_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc') +KENLM_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc') -COMMON_FILES = [ - fn for fn in COMMON_FILES +KENLM_FILES = [ + fn for fn in KENLM_FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith( 'unittest.cc')) ] -def build_common(out_name='common.a', build_dir='temp_build/temp_build', debug=False, num_parallel=1): +CTC_DECODER_FILES = [ + 'ctc_beam_search_decoder.cpp', + 'scorer.cpp', + 'path_trie.cpp', + 'decoder_utils.cpp', + 'workspace_status.cc' +] + +def build_archive(srcs=[], out_name='', build_dir='temp_build/temp_build', debug=False, num_parallel=1): compiler = os.environ.get('CXX', 'g++') ar = os.environ.get('AR', 'ar') libtool = os.environ.get('LIBTOOL', 'libtool') cflags = os.environ.get('CFLAGS', '') + os.environ.get('CXXFLAGS', '') args = ARGS + (DBG_ARGS if debug else OPT_ARGS) - for file in COMMON_FILES: + for file in srcs: outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o') outdir = os.path.dirname(outfile) if not os.path.exists(outdir): @@ -64,7 +72,7 @@ def build_common(out_name='common.a', build_dir='temp_build/temp_build', debug=F return outfile pool = Pool(num_parallel) - obj_files = list(pool.imap_unordered(build_one, COMMON_FILES)) + obj_files = list(pool.imap_unordered(build_one, srcs)) if sys.platform.startswith('darwin'): cmd = '{libtool} -static -o {outfile} {infiles}'.format( diff --git a/native_client/ctcdecode/setup.py b/native_client/ctcdecode/setup.py index 8a77ac31..fb5a7114 100644 --- a/native_client/ctcdecode/setup.py +++ b/native_client/ctcdecode/setup.py @@ -10,7 +10,7 @@ import os import platform import sys -from build_common import * +from build_archive import * try: import numpy @@ -40,33 +40,34 @@ sys.argv = [sys.argv[0]] + unknown_args def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() +def maybe_rebuild(srcs, out_name, build_dir): + if not os.path.exists(out_name): + if not os.path.exists(build_dir): + os.makedirs(build_dir) + + build_archive(srcs=srcs, + out_name=out_name, + build_dir=build_dir, + num_parallel=known_args.num_processes, + debug=debug) + project_version = read('../../VERSION').strip() build_dir = 'temp_build/temp_build' -common_build = 'common.a' +third_party_build = 'third_party.a' +ctc_decoder_build = 'first_party.a' -if not os.path.exists(common_build): - if not os.path.exists(build_dir): - os.makedirs(build_dir) - - build_common(out_name=common_build, - build_dir=build_dir, - num_parallel=known_args.num_processes, - debug=debug) +maybe_rebuild(KENLM_FILES, third_party_build, build_dir) +maybe_rebuild(CTC_DECODER_FILES, ctc_decoder_build, build_dir) decoder_module = Extension( name='ds_ctcdecoder._swigwrapper', - sources=['swigwrapper.i', - 'ctc_beam_search_decoder.cpp', - 'scorer.cpp', - 'path_trie.cpp', - 'decoder_utils.cpp', - 'workspace_status.cc'], + sources=['swigwrapper.i'], swig_opts=['-c++', '-extranative'], language='c++', include_dirs=INCLUDES + [numpy_include], extra_compile_args=ARGS + (DBG_ARGS if debug else OPT_ARGS), - extra_link_args=[common_build], + extra_link_args=[ctc_decoder_build, third_party_build], ) class BuildExtFirst(build): diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index 199e2eea..4bbadf25 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -6,17 +6,20 @@ export OS=$(uname) if [ "${OS}" = "Linux" ]; then export DS_ROOT_TASK=${HOME} export SWIG_ROOT="${HOME}/ds-swig" + export DS_CPU_COUNT=$(nproc) fi; if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR} export SWIG_ROOT="$(cygpath ${USERPROFILE})/ds-swig" export PLATFORM_EXE_SUFFIX=.exe + export DS_CPU_COUNT=$(nproc) fi; if [ "${OS}" = "Darwin" ]; then export SWIG_ROOT="${TASKCLUSTER_ORIG_TASKDIR}/ds-swig" export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR} + export DS_CPU_COUNT=$(sysctl hw.ncpu |cut -d' ' -f2) # It seems chaining |export DYLD_LIBRARY_PATH=...| does not work, maybe # because of SIP? Who knows ... diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 7c30555a..88b8f29d 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -126,11 +126,12 @@ do_deepspeech_decoder_build() TARGET=${SYSTEM_TARGET} \ RASPBIAN=${SYSTEM_RASPBIAN} \ TFDIR=${DS_TFDIR} \ + NUM_PROCESSES=${DS_CPU_COUNT} \ bindings cp native_client/ctcdecode/dist/*.whl wheels - make -C native_client/ctcdecode clean-keep-common + make -C native_client/ctcdecode clean-keep-third-party unset NUMPY_BUILD_VERSION unset NUMPY_DEP_VERSION diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index 8df81811..586124bf 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -142,6 +142,11 @@ pyenv_install() exit 1; fi; + if [ -z "${DS_CPU_COUNT}" ]; then + echo "No idea of parallelism"; + exit 1; + fi; + if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then PATH=$(cygpath ${ChocolateyInstall})/bin:$PATH nuget install python -Version ${version} -OutputDirectory ${PYENV_ROOT}/versions/ @@ -159,7 +164,7 @@ pyenv_install() # otherwise, pyenv install will force-rebuild ls -hal "${PYENV_ROOT}/versions/${version_alias}/" || true if [ ! -d "${PYENV_ROOT}/versions/${version_alias}/" ]; then - VERSION_ALIAS=${version_alias} pyenv install ${version} + VERSION_ALIAS=${version_alias} MAKEOPTS=-j${DS_CPU_COUNT} pyenv install ${version} fi; fi }