Merge pull request #2871 from mozilla/decoder-as-dep
Automatically install ds_ctcdecoder in setup.py
This commit is contained in:
commit
7c3c9d0b8d
|
@ -46,7 +46,8 @@ Install the required dependencies using ``pip3``\ :
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
cd DeepSpeech
|
cd DeepSpeech
|
||||||
pip3 install -e .
|
pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
|
||||||
|
pip3 install --upgrade --force-reinstall -e .
|
||||||
|
|
||||||
The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules:
|
The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules:
|
||||||
|
|
||||||
|
@ -54,14 +55,6 @@ The ``webrtcvad`` Python package might require you to ensure you have proper too
|
||||||
|
|
||||||
sudo apt-get install python3-dev
|
sudo apt-get install python3-dev
|
||||||
|
|
||||||
You'll also need to install the ``ds_ctcdecoder`` Python package. ``ds_ctcdecoder`` is required for decoding the outputs of the ``deepspeech`` acoustic model into text. You can use ``util/taskcluster.py`` with the ``--decoder`` flag to get a URL to a binary of the decoder package appropriate for your platform and Python version:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
pip3 install $(python3 util/taskcluster.py --decoder)
|
|
||||||
|
|
||||||
This command will download and install the ``ds_ctcdecoder`` package. You can override the platform with ``--arch`` if you want the package for ARM7 (\ ``--arch arm``\ ) or ARM64 (\ ``--arch arm64``\ ). If you prefer building the ``ds_ctcdecoder`` package from source, see the :github:`native_client README file <native_client/README.rst>`.
|
|
||||||
|
|
||||||
Recommendations
|
Recommendations
|
||||||
^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
|
101
setup.py
101
setup.py
|
@ -1,13 +1,95 @@
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pkg_resources import parse_version
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
|
||||||
|
def get_decoder_pkg_url(version, artifacts_root=None):
|
||||||
|
is_arm = 'arm' in platform.machine()
|
||||||
|
is_mac = 'darwin' in sys.platform
|
||||||
|
is_64bit = sys.maxsize > (2**31 - 1)
|
||||||
|
|
||||||
|
if is_arm:
|
||||||
|
tc_arch = 'arm64-ctc' if is_64bit else 'arm-ctc'
|
||||||
|
elif is_mac:
|
||||||
|
tc_arch = 'osx-ctc'
|
||||||
|
else:
|
||||||
|
tc_arch = 'cpu-ctc'
|
||||||
|
|
||||||
|
ds_version = parse_version(version)
|
||||||
|
branch = "v{}".format(version)
|
||||||
|
|
||||||
|
plat = platform.system().lower()
|
||||||
|
arch = platform.machine()
|
||||||
|
|
||||||
|
if plat == 'linux' and arch == 'x86_64':
|
||||||
|
plat = 'manylinux1'
|
||||||
|
|
||||||
|
if plat == 'darwin':
|
||||||
|
plat = 'macosx_10_10'
|
||||||
|
|
||||||
|
is_ucs2 = sys.maxunicode < 0x10ffff
|
||||||
|
m_or_mu = 'mu' if is_ucs2 else 'm'
|
||||||
|
|
||||||
|
pyver = ''.join(str(i) for i in sys.version_info[0:2])
|
||||||
|
|
||||||
|
if not artifacts_root:
|
||||||
|
artifacts_root = 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.deepspeech.native_client.{branch_name}.{tc_arch_string}/artifacts/public'.format(
|
||||||
|
branch_name=branch,
|
||||||
|
tc_arch_string=tc_arch)
|
||||||
|
|
||||||
|
return 'ds_ctcdecoder @ {artifacts_root}/ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl'.format(
|
||||||
|
artifacts_root=artifacts_root,
|
||||||
|
ds_version=ds_version,
|
||||||
|
pyver=pyver,
|
||||||
|
m_or_mu=m_or_mu,
|
||||||
|
platform=plat,
|
||||||
|
arch=arch,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
version_file = Path(__file__).parent / 'VERSION'
|
version_file = Path(__file__).parent / 'VERSION'
|
||||||
with open(str(version_file)) as fin:
|
with open(str(version_file)) as fin:
|
||||||
version = fin.read().strip()
|
version = fin.read().strip()
|
||||||
|
|
||||||
|
decoder_pkg_url = get_decoder_pkg_url(version)
|
||||||
|
|
||||||
|
install_requires_base = [
|
||||||
|
'tensorflow == 1.15.2',
|
||||||
|
'numpy == 1.18.1',
|
||||||
|
'progressbar2',
|
||||||
|
'six',
|
||||||
|
'pyxdg',
|
||||||
|
'attrdict',
|
||||||
|
'absl-py',
|
||||||
|
'semver',
|
||||||
|
'opuslib == 2.0.0',
|
||||||
|
'optuna',
|
||||||
|
'sox',
|
||||||
|
'bs4',
|
||||||
|
'pandas',
|
||||||
|
'requests',
|
||||||
|
'librosa',
|
||||||
|
'soundfile',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Due to pip craziness environment variables are the only consistent way to
|
||||||
|
# get options into this script when doing `pip install`.
|
||||||
|
tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '')
|
||||||
|
if tc_decoder_artifacts_root:
|
||||||
|
# We're running inside the TaskCluster environment, override the decoder
|
||||||
|
# package URL with the one we just built.
|
||||||
|
decoder_pkg_url = get_decoder_pkg_url(version, tc_decoder_artifacts_root)
|
||||||
|
install_requires = install_requires_base + [decoder_pkg_url]
|
||||||
|
elif os.environ.get('DS_NODECODER', ''):
|
||||||
|
install_requires = install_requires_base
|
||||||
|
else:
|
||||||
|
install_requires = install_requires_base + [decoder_pkg_url]
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='deepspeech_training',
|
name='deepspeech_training',
|
||||||
version=version,
|
version=version,
|
||||||
|
@ -28,24 +110,7 @@ def main():
|
||||||
package_dir={'': 'training'},
|
package_dir={'': 'training'},
|
||||||
packages=find_packages(where='training'),
|
packages=find_packages(where='training'),
|
||||||
python_requires='>=3.5, <4',
|
python_requires='>=3.5, <4',
|
||||||
install_requires=[
|
install_requires=install_requires,
|
||||||
'tensorflow == 1.15.2',
|
|
||||||
'numpy == 1.18.1',
|
|
||||||
'progressbar2',
|
|
||||||
'six',
|
|
||||||
'pyxdg',
|
|
||||||
'attrdict',
|
|
||||||
'absl-py',
|
|
||||||
'semver',
|
|
||||||
'opuslib == 2.0.0',
|
|
||||||
'optuna',
|
|
||||||
'sox',
|
|
||||||
'bs4',
|
|
||||||
'pandas',
|
|
||||||
'requests',
|
|
||||||
'librosa',
|
|
||||||
'soundfile',
|
|
||||||
],
|
|
||||||
# If there are data files included in your packages that need to be
|
# If there are data files included in your packages that need to be
|
||||||
# installed, specify them here.
|
# installed, specify them here.
|
||||||
package_data={
|
package_data={
|
||||||
|
|
|
@ -122,25 +122,3 @@ verify_bazel_rebuild()
|
||||||
exit 1
|
exit 1
|
||||||
fi;
|
fi;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Should be called from context where Python virtualenv is set
|
|
||||||
verify_ctcdecoder_url()
|
|
||||||
{
|
|
||||||
default_url=$(python util/taskcluster.py --decoder)
|
|
||||||
echo "${default_url}" | grep -F "deepspeech.native_client.v${DS_VERSION}"
|
|
||||||
rc_default_url=$?
|
|
||||||
|
|
||||||
tag_url=$(python util/taskcluster.py --decoder --branch 'v1.2.3')
|
|
||||||
echo "${tag_url}" | grep -F "deepspeech.native_client.v1.2.3"
|
|
||||||
rc_tag_url=$?
|
|
||||||
|
|
||||||
master_url=$(python util/taskcluster.py --decoder --branch 'master')
|
|
||||||
echo "${master_url}" | grep -F "deepspeech.native_client.master"
|
|
||||||
rc_master_url=$?
|
|
||||||
|
|
||||||
if [ ${rc_default_url} -eq 0 -a ${rc_tag_url} -eq 0 -a ${rc_master_url} -eq 0 ]; then
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
return 1
|
|
||||||
fi;
|
|
||||||
}
|
|
||||||
|
|
|
@ -22,9 +22,6 @@ pushd ${HOME}/DeepSpeech/ds
|
||||||
popd
|
popd
|
||||||
set +o pipefail
|
set +o pipefail
|
||||||
|
|
||||||
decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
|
|
||||||
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: --upgrade ${decoder_pkg_url} | cat
|
|
||||||
|
|
||||||
pushd ${HOME}/DeepSpeech/ds/
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
time ./bin/run-tc-ldc93s1_singleshotinference.sh
|
time ./bin/run-tc-ldc93s1_singleshotinference.sh
|
||||||
popd
|
popd
|
||||||
|
|
|
@ -21,13 +21,6 @@ pushd ${HOME}/DeepSpeech/ds
|
||||||
popd
|
popd
|
||||||
set +o pipefail
|
set +o pipefail
|
||||||
|
|
||||||
pushd ${HOME}/DeepSpeech/ds/
|
|
||||||
verify_ctcdecoder_url
|
|
||||||
popd
|
|
||||||
|
|
||||||
decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
|
|
||||||
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
|
|
||||||
|
|
||||||
# Prepare correct arguments for training
|
# Prepare correct arguments for training
|
||||||
case "${bitrate}" in
|
case "${bitrate}" in
|
||||||
8k)
|
8k)
|
||||||
|
|
|
@ -15,17 +15,10 @@ virtualenv_activate "${pyalias}" "deepspeech"
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
|
pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat
|
||||||
pushd ${HOME}/DeepSpeech/ds
|
pushd ${HOME}/DeepSpeech/ds
|
||||||
pip install --upgrade . | cat
|
DS_NODECODER=1 pip install --upgrade . | cat
|
||||||
popd
|
popd
|
||||||
set +o pipefail
|
set +o pipefail
|
||||||
|
|
||||||
pushd ${HOME}/DeepSpeech/ds/
|
|
||||||
verify_ctcdecoder_url
|
|
||||||
popd
|
|
||||||
|
|
||||||
decoder_pkg_url=$(get_python_pkg_url ${pyver_pkg} ${py_unicode_type} "ds_ctcdecoder" "${DECODER_ARTIFACTS_ROOT}")
|
|
||||||
LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} ${decoder_pkg_url} | cat
|
|
||||||
|
|
||||||
pushd ${HOME}/DeepSpeech/ds/
|
pushd ${HOME}/DeepSpeech/ds/
|
||||||
time ./bin/run-tc-transfer.sh
|
time ./bin/run-tc-transfer.sh
|
||||||
popd
|
popd
|
||||||
|
|
|
@ -90,8 +90,6 @@ def main():
|
||||||
help='Name of the TaskCluster scheme to use.')
|
help='Name of the TaskCluster scheme to use.')
|
||||||
parser.add_argument('--branch', required=False,
|
parser.add_argument('--branch', required=False,
|
||||||
help='Branch name to use. Defaulting to current content of VERSION file.')
|
help='Branch name to use. Defaulting to current content of VERSION file.')
|
||||||
parser.add_argument('--decoder', action='store_true',
|
|
||||||
help='Get URL to ds_ctcdecoder Python package.')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -119,32 +117,6 @@ def main():
|
||||||
else:
|
else:
|
||||||
ds_version = parse_version(args.branch)
|
ds_version = parse_version(args.branch)
|
||||||
|
|
||||||
if args.decoder:
|
|
||||||
plat = platform.system().lower()
|
|
||||||
arch = platform.machine()
|
|
||||||
|
|
||||||
if plat == 'linux' and arch == 'x86_64':
|
|
||||||
plat = 'manylinux1'
|
|
||||||
|
|
||||||
if plat == 'darwin':
|
|
||||||
plat = 'macosx_10_10'
|
|
||||||
|
|
||||||
m_or_mu = 'mu' if is_ucs2 else 'm'
|
|
||||||
pyver = ''.join(map(str, sys.version_info[0:2]))
|
|
||||||
|
|
||||||
artifact = "ds_ctcdecoder-{ds_version}-cp{pyver}-cp{pyver}{m_or_mu}-{platform}_{arch}.whl".format(
|
|
||||||
ds_version=ds_version,
|
|
||||||
pyver=pyver,
|
|
||||||
m_or_mu=m_or_mu,
|
|
||||||
platform=plat,
|
|
||||||
arch=arch
|
|
||||||
)
|
|
||||||
|
|
||||||
ctc_arch = args.arch + '-ctc'
|
|
||||||
|
|
||||||
print(get_tc_url(ctc_arch, artifact, args.branch))
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if args.source is not None:
|
if args.source is not None:
|
||||||
if args.source in DEFAULT_SCHEMES:
|
if args.source in DEFAULT_SCHEMES:
|
||||||
global TASKCLUSTER_SCHEME
|
global TASKCLUSTER_SCHEME
|
||||||
|
|
Loading…
Reference in New Issue