Merge pull request #2757 from lissyx/speed-python-builds

Ensure python builds uses all ressources
This commit is contained in:
lissyx 2020-02-14 20:03:55 +01:00 committed by GitHub
commit 22b518f8fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 50 additions and 32 deletions

View File

@ -17,12 +17,12 @@ endif
all: bindings
clean-keep-common:
clean-keep-third-party:
rm -rf dist temp_build ds_ctcdecoder.egg-info
rm -f swigwrapper_wrap.cpp swigwrapper.py
rm -f swigwrapper_wrap.cpp swigwrapper.py first_party.a
clean: clean-keep-common
rm -f common.a
clean: clean-keep-third-party
rm -f third_party.a
rm workspace_status.cc
rm -fr bazel-out/
@ -31,14 +31,14 @@ workspace_status.cc:
../bazel_workspace_status_cmd.sh > bazel-out/stable-status.txt && \
../gen_workspace_status.sh > $@
bindings: clean-keep-common workspace_status.cc
bindings: clean-keep-third-party workspace_status.cc
pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
find temp_build -type f -name "*.o" -delete
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
rm -rf temp_build
bindings-debug: clean-keep-common workspace_status.cc
bindings-debug: clean-keep-third-party workspace_status.cc
pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0
AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
$(GENERATE_DEBUG_SYMS)

View File

@ -20,26 +20,34 @@ INCLUDES = [
'third_party/ThreadPool'
]
COMMON_FILES = (glob.glob('../kenlm/util/*.cc')
KENLM_FILES = (glob.glob('../kenlm/util/*.cc')
+ glob.glob('../kenlm/lm/*.cc')
+ glob.glob('../kenlm/util/double-conversion/*.cc'))
COMMON_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc')
KENLM_FILES += glob.glob('third_party/openfst-1.6.7/src/lib/*.cc')
COMMON_FILES = [
fn for fn in COMMON_FILES
KENLM_FILES = [
fn for fn in KENLM_FILES
if not (fn.endswith('main.cc') or fn.endswith('test.cc') or fn.endswith(
'unittest.cc'))
]
def build_common(out_name='common.a', build_dir='temp_build/temp_build', debug=False, num_parallel=1):
CTC_DECODER_FILES = [
'ctc_beam_search_decoder.cpp',
'scorer.cpp',
'path_trie.cpp',
'decoder_utils.cpp',
'workspace_status.cc'
]
def build_archive(srcs=[], out_name='', build_dir='temp_build/temp_build', debug=False, num_parallel=1):
compiler = os.environ.get('CXX', 'g++')
ar = os.environ.get('AR', 'ar')
libtool = os.environ.get('LIBTOOL', 'libtool')
cflags = os.environ.get('CFLAGS', '') + os.environ.get('CXXFLAGS', '')
args = ARGS + (DBG_ARGS if debug else OPT_ARGS)
for file in COMMON_FILES:
for file in srcs:
outfile = os.path.join(build_dir, os.path.splitext(file)[0] + '.o')
outdir = os.path.dirname(outfile)
if not os.path.exists(outdir):
@ -64,7 +72,7 @@ def build_common(out_name='common.a', build_dir='temp_build/temp_build', debug=F
return outfile
pool = Pool(num_parallel)
obj_files = list(pool.imap_unordered(build_one, COMMON_FILES))
obj_files = list(pool.imap_unordered(build_one, srcs))
if sys.platform.startswith('darwin'):
cmd = '{libtool} -static -o {outfile} {infiles}'.format(

View File

@ -10,7 +10,7 @@ import os
import platform
import sys
from build_common import *
from build_archive import *
try:
import numpy
@ -40,33 +40,34 @@ sys.argv = [sys.argv[0]] + unknown_args
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
project_version = read('../../VERSION').strip()
build_dir = 'temp_build/temp_build'
common_build = 'common.a'
if not os.path.exists(common_build):
def maybe_rebuild(srcs, out_name, build_dir):
if not os.path.exists(out_name):
if not os.path.exists(build_dir):
os.makedirs(build_dir)
build_common(out_name=common_build,
build_archive(srcs=srcs,
out_name=out_name,
build_dir=build_dir,
num_parallel=known_args.num_processes,
debug=debug)
project_version = read('../../VERSION').strip()
build_dir = 'temp_build/temp_build'
third_party_build = 'third_party.a'
ctc_decoder_build = 'first_party.a'
maybe_rebuild(KENLM_FILES, third_party_build, build_dir)
maybe_rebuild(CTC_DECODER_FILES, ctc_decoder_build, build_dir)
decoder_module = Extension(
name='ds_ctcdecoder._swigwrapper',
sources=['swigwrapper.i',
'ctc_beam_search_decoder.cpp',
'scorer.cpp',
'path_trie.cpp',
'decoder_utils.cpp',
'workspace_status.cc'],
sources=['swigwrapper.i'],
swig_opts=['-c++', '-extranative'],
language='c++',
include_dirs=INCLUDES + [numpy_include],
extra_compile_args=ARGS + (DBG_ARGS if debug else OPT_ARGS),
extra_link_args=[common_build],
extra_link_args=[ctc_decoder_build, third_party_build],
)
class BuildExtFirst(build):

View File

@ -6,17 +6,20 @@ export OS=$(uname)
if [ "${OS}" = "Linux" ]; then
export DS_ROOT_TASK=${HOME}
export SWIG_ROOT="${HOME}/ds-swig"
export DS_CPU_COUNT=$(nproc)
fi;
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
export SWIG_ROOT="$(cygpath ${USERPROFILE})/ds-swig"
export PLATFORM_EXE_SUFFIX=.exe
export DS_CPU_COUNT=$(nproc)
fi;
if [ "${OS}" = "Darwin" ]; then
export SWIG_ROOT="${TASKCLUSTER_ORIG_TASKDIR}/ds-swig"
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
export DS_CPU_COUNT=$(sysctl hw.ncpu |cut -d' ' -f2)
# It seems chaining |export DYLD_LIBRARY_PATH=...| does not work, maybe
# because of SIP? Who knows ...

View File

@ -126,11 +126,12 @@ do_deepspeech_decoder_build()
TARGET=${SYSTEM_TARGET} \
RASPBIAN=${SYSTEM_RASPBIAN} \
TFDIR=${DS_TFDIR} \
NUM_PROCESSES=${DS_CPU_COUNT} \
bindings
cp native_client/ctcdecode/dist/*.whl wheels
make -C native_client/ctcdecode clean-keep-common
make -C native_client/ctcdecode clean-keep-third-party
unset NUMPY_BUILD_VERSION
unset NUMPY_DEP_VERSION

View File

@ -142,6 +142,11 @@ pyenv_install()
exit 1;
fi;
if [ -z "${DS_CPU_COUNT}" ]; then
echo "No idea of parallelism";
exit 1;
fi;
if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then
PATH=$(cygpath ${ChocolateyInstall})/bin:$PATH nuget install python -Version ${version} -OutputDirectory ${PYENV_ROOT}/versions/
@ -159,7 +164,7 @@ pyenv_install()
# otherwise, pyenv install will force-rebuild
ls -hal "${PYENV_ROOT}/versions/${version_alias}/" || true
if [ ! -d "${PYENV_ROOT}/versions/${version_alias}/" ]; then
VERSION_ALIAS=${version_alias} pyenv install ${version}
VERSION_ALIAS=${version_alias} MAKEOPTS=-j${DS_CPU_COUNT} pyenv install ${version}
fi;
fi
}