Compare commits

..

39 Commits

Author SHA1 Message Date
lissyx
c119aa6b0e
Merge pull request #3513 from lissyx/fix-decision-task-r0.9
Fix decision task
2021-01-25 10:46:57 +01:00
Alexandre Lissy
9dde5726cd Fix decision task 2021-01-25 10:40:46 +01:00
Reuben Morais
f2e9c85880 Bump VERSION to 0.9.3 2020-12-10 17:56:44 +02:00
Reuben Morais
056f5a4c6f Merge branch 'master' into r0.9 2020-12-09 14:00:16 +02:00
lissyx
504e55b2d8
Merge pull request #3461 from lissyx/pr-3414-r0.9
Hotword support for .NET client tests
2020-12-08 14:48:38 +01:00
imrahul3610
948a232ae2 Hotword support for .NET client tests 2020-12-08 13:43:23 +01:00
Reuben Morais
b2920c7557 Bump VERSION to 0.9.2 2020-12-03 18:35:16 +02:00
Catalin Voss
cf2d7e636a Make variables consistent 2020-12-03 18:27:19 +02:00
Catalin Voss
c5410fc550 Call the logits probs in create_inference_graph after they go thru softmax 2020-12-03 18:25:32 +02:00
lissyx
4270e22fe0
Merge pull request #3445 from lissyx/doc-cuda-r0.9
Fix #3443: Link to upstream Dockerfile for lack of correct TensorFlow…
2020-11-27 12:37:48 +01:00
Alexandre Lissy
29fa2dd405 Fix #3443: Link to upstream Dockerfile for lack of correct TensorFlow GPU deps doc. 2020-11-27 12:36:43 +01:00
lissyx
a8abca0841
Merge pull request #3442 from mozilla/fix-tcyml-branch
Fix wrong branch for TaskCluster
2020-11-26 20:37:26 +01:00
lissyx
82f84c5853
Fix wrong branch for TaskCluster 2020-11-26 19:10:50 +01:00
lissyx
67a9e35028
Merge pull request #3441 from lissyx/electronjs_11-r0.9
Adding support for ElectronJS v11.0
2020-11-26 16:08:50 +01:00
Alexandre Lissy
440e30c097 Adding support for ElectronJS v11.0 2020-11-26 13:48:38 +01:00
lissyx
9c65ddadbf
Merge pull request #3438 from lissyx/import-ccef-r0.9
Importer for dataset from Centre de Conférences Pierre Mendès-France
2020-11-24 09:51:33 +01:00
Alexandre Lissy
f252de4a8d Importer for dataset from Centre de Conférences Pierre Mendès-France
Released by Ministère de l'Economie, des Finances, et de la Relance
2020-11-24 09:50:33 +01:00
lissyx
d7d7ea7db1
Merge pull request #3431 from lissyx/fix-tc-gzip-r0.9
Fix #3429: TaskCluster behavioral change wrt compression of artifacts
2020-11-19 14:32:58 +01:00
Alexandre Lissy
b65186907f Fix #3429: TaskCluster behavioral change wrt compression of artifacts 2020-11-19 13:22:26 +01:00
lissyx
36a06634a3
Merge pull request #3410 from imrahul361/r0.9
Run test On Java Client
2020-11-05 20:04:09 +01:00
lissyx
b0f2d37d6f
Merge pull request #3409 from lissyx/py39-r0.9
initial commit for py39 support
2020-11-05 13:13:48 +01:00
dag7dev
965f209665 initial commit for py39 support 2020-11-05 09:51:25 +01:00
Reuben Morais
ab8bd3e11c Bump VERSION to 0.9.1 2020-11-04 13:42:40 +01:00
lissyx
7cdb5b1ed9
Merge pull request #3399 from lissyx/fix-rtd-r0.9
Force npm install on RTD and set appropriate PATH value
2020-11-03 14:38:37 +01:00
Alexandre Lissy
617ce141d0 Force npm install on RTD and set appropriate PATH value 2020-11-03 14:37:38 +01:00
Reuben Morais
2d04fbe049 Bump VERSION to 0.9.0 2020-11-02 13:53:19 +01:00
Reuben Morais
0ae425525d Bump VERSION to 0.9.0-alpha.12 2020-10-30 17:31:56 +01:00
Reuben Morais
f7e816c014 Merge branch 'master' into r0.9 2020-10-30 17:31:35 +01:00
lissyx
2368fca0f1
Merge pull request #3388 from ftyers/node15-r0.9
Node15 r0.9
2020-10-27 10:08:58 +01:00
Francis Tyers
719fedbd93 update for NodeJS 15 2020-10-26 17:10:08 +00:00
Reuben Morais
a1d2d4181b
Merge pull request #3376 from reuben/pr-3375-rebase
PR #3375 rebased - JS Binding fixes and CI testing for hot word boosting
2020-10-13 16:35:29 +02:00
imrahul3610
3eaa44b358 Fix JavaScript binding calls for Hot Words 2020-10-13 14:57:49 +02:00
imrahul3610
aee7fc502c Run Tests on CI for JS Client 2020-10-13 14:57:49 +02:00
imrahul3610
9210a97d48 JS Binding Fix 2020-10-13 14:57:49 +02:00
lissyx
64fad81e10
Merge pull request #3371 from nmstoker/patch-1
Tiny fix to addHotWord doc string parameters
2020-10-12 21:39:40 +02:00
Neil Stoker
7056241f37
Tiny fix to addHotWord doc string parameters
Already applied to master applying to r0.9 as requested
2020-10-12 12:51:57 +01:00
Reuben Morais
065c8a6cdf Bump VERSION to 0.9.0-alpha.11 2020-10-09 14:37:38 +02:00
Reuben Morais
636b7133a1
Merge pull request #3367 from reuben/create-r0.9
Add r0.9 branch
2020-10-08 10:37:55 +02:00
Reuben Morais
ed09fd3610 Add r0.9 branch 2020-10-07 18:58:56 +02:00
540 changed files with 8023 additions and 60119 deletions

117
.circleci/config.yml Normal file
View File

@ -0,0 +1,117 @@
# These environment variables must be set in CircleCI UI
#
# DOCKERHUB_REPO - docker hub repo, format: <username>/<repo>
# DOCKER_USER - login info for docker hub
# DOCKER_PASS
#
version: 2
jobs:
build:
docker:
- image: docker:stable-git
working_directory: /dockerflow
steps:
- checkout
- setup_remote_docker
- run:
name: os-release
command: |
cat /etc/os-release
- run:
name: install make
command: |
apk add make
- run:
name: Create a Dockerfile.train
command: |
make Dockerfile.train \
DEEPSPEECH_REPO="https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME" \
DEEPSPEECH_SHA=$CIRCLE_SHA1
- run:
name: Build Docker image
command: docker build -t app:build -f Dockerfile.train .
# save the built docker container into CircleCI's cache. This is
# required since Workflows do not have the same remote docker instance.
- run:
name: docker save app:build
command: mkdir -p /cache; docker save -o /cache/docker.tar "app:build"
- save_cache:
key: v1-{{ .Branch }}-{{epoch}}
paths:
- /cache/docker.tar
deploy:
docker:
- image: docker:18.02.0-ce
steps:
- setup_remote_docker
- restore_cache:
key: v1-{{.Branch}}
- run:
name: Restore Docker image cache
command: docker load -i /cache/docker.tar
- run:
name: Deploy to Dockerhub
command: |
echo $DOCKER_PASS | docker login -u $DOCKER_USER --password-stdin
# deploy master
if [ "${CIRCLE_BRANCH}" == "master" ]; then
docker tag app:build ${DOCKERHUB_REPO}:latest
docker push ${DOCKERHUB_REPO}:latest
elif [ ! -z "${CIRCLE_TAG}" ]; then
# deploy a release tag...
echo "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
docker tag app:build "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
docker images
docker push "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
fi
lint:
docker:
- image: circleci/python:3.7.9
steps:
- checkout
- run:
name: Install dependencies
command: |
pip install --upgrade cardboardlint pylint
- run:
name: Run linter
command: |
set -ex
# Check if branch can be merged with master (if failing script will stop due to set -e)
git config user.email "you@example.com"
git config user.name "Your Name"
git merge --no-commit --no-ff origin/master
# Undo merge changes if any
git reset --hard $CIRCLE_BRANCH
# Lint differences against master
cardboardlinter --refspec origin/master -n auto;
workflows:
version: 2
build-deploy:
jobs:
- build:
filters:
tags:
only: /.*/
- deploy:
requires:
- build
filters:
tags:
only: /.*/
lint:
jobs:
- lint

View File

@ -1,5 +0,0 @@
.git/lfs
native_client/ds-swig
native_client/python/dist/*.whl
native_client/ctcdecode/*.a
native_client/javascript/build/

1
.gitattributes vendored
View File

@ -1,2 +1 @@
data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text
.github/actions/check_artifact_exists/dist/index.js binary

View File

@ -1,40 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: 'Bug: '
labels: bug
assignees: ''
---
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
If you've found a bug, please provide the following information:
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Run the following command '...'
2. ...
3. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Environment (please complete the following information):**
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
- **TensorFlow installed from (our builds, or upstream TensorFlow)**:
- **TensorFlow version (use command below)**:
- **Python version**:
- **Bazel version (if compiling from source)**:
- **GCC/Compiler version (if compiling from source)**:
- **CUDA/cuDNN version**:
- **GPU model and memory**:
- **Exact command to reproduce**:
**Additional context**
Add any other context about the problem here.

View File

@ -1,8 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: Coqui STT GitHub Discussions
url: https://github.com/coqui-ai/STT/discussions
about: Please ask and answer questions here.
- name: Coqui Security issue disclosure
url: mailto:info@coqui.ai
about: Please report security vulnerabilities here.

View File

@ -1,26 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: 'Feature request: '
labels: enhancement
assignees: ''
---
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
If you have a feature request, then please provide the following information:
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@ -1,11 +0,0 @@
name: "Build TensorFlow"
description: "Build TensorFlow Build"
inputs:
flavor:
description: "Build flavor"
required: true
runs:
using: "composite"
steps:
- run: ./ci_scripts/tf-build.sh ${{ inputs.flavor }}
shell: bash

View File

@ -1,43 +0,0 @@
Building and using a TensorFlow cache:
======================================
The present action will check the existence of an artifact in the list of the
repo artifacts. Since we don't want always to download the artifact, we can't
rely on the official download-artifact action.
Rationale:
----------
Because of the amount of code required to build TensorFlow, the library build
is split into two main parts to make it much faster to run PRs:
- a TensorFlow prebuild cache
- actual code of the library
The TensorFlow prebuild cache exists because building tensorflow (even just the
`libtensorflow_cpp.so`) is a huge amount of code and it will take several hours
even on decent systems. So we perform a cache build of it, because the
tensorflow version does not change that often.
However, each PR might have changes to the actual library code, so we rebuild
this everytime.
The `tensorflow_opt-macOS` job checks whether such build cache exists alrady.
Those cache are stored as artifacts because [GitHub Actions
cache](https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows)
has size limitations.
The `build-tensorflow-macOS` job has a dependency against the cache check to
know whether it needs to run an actual build or not.
Hacking:
--------
For hacking into the action, please follow the [GitHub JavaScript
Actions](https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action#commit-tag-and-push-your-action-to-github)
and specifically the usage of `ncc`.
```
$ npm install
$ npx ncc build main.js --license licenses.txt
$ git add dist/
```

View File

@ -1,32 +0,0 @@
name: "check/download artifacts"
description: "Check and download that an artifact exists"
inputs:
name:
description: "Artifact name"
required: true
github_token:
description: "GitHub token"
required: false
default: ${{ github.token }}
download:
description: "Should we download?"
required: false
default: false
path:
description: "Where to unpack the artifact"
required: false
default: "./"
repo:
description: "Repository name with owner (like actions/checkout)"
required: false
default: ${{ github.repository }}
release-tag:
description: "Tag of release to check artifacts under"
required: false
default: "v0.10.0-alpha.7"
outputs:
status:
description: "Status string of the artifact: 'missing' or 'found'"
runs:
using: "node12"
main: "dist/index.js"

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
const core = require('@actions/core');
const github = require('@actions/github');
const AdmZip = require('adm-zip');
const filesize = require('filesize');
const pathname = require('path');
const fs = require('fs');
const { throttling } = require('@octokit/plugin-throttling');
const { GitHub } = require('@actions/github/lib/utils');
const Download = require('download');
const Util = require('util');
const Stream = require('stream');
const Pipeline = Util.promisify(Stream.pipeline);
async function getGoodArtifacts(client, owner, repo, releaseId, name) {
console.log(`==> GET /repos/${owner}/${repo}/releases/${releaseId}/assets`);
const goodRepoArtifacts = await client.paginate(
"GET /repos/{owner}/{repo}/releases/{release_id}/assets",
{
owner: owner,
repo: repo,
release_id: releaseId,
per_page: 100,
},
(releaseAssets, done) => {
console.log(" ==> releaseAssets", releaseAssets);
const goodAssets = releaseAssets.data.filter((a) => {
console.log("==> Asset check", a);
return a.name == name
});
if (goodAssets.length > 0) {
done();
}
return goodAssets;
}
);
console.log("==> maybe goodRepoArtifacts:", goodRepoArtifacts);
return goodRepoArtifacts;
}
async function main() {
try {
const token = core.getInput("github_token", { required: true });
const [owner, repo] = core.getInput("repo", { required: true }).split("/");
const path = core.getInput("path", { required: true });
const name = core.getInput("name");
const download = core.getInput("download");
const releaseTag = core.getInput("release-tag");
const OctokitWithThrottling = GitHub.plugin(throttling);
const client = new OctokitWithThrottling({
auth: token,
throttle: {
onRateLimit: (retryAfter, options) => {
console.log(
`Request quota exhausted for request ${options.method} ${options.url}`
);
// Retry twice after hitting a rate limit error, then give up
if (options.request.retryCount <= 2) {
console.log(`Retrying after ${retryAfter} seconds!`);
return true;
} else {
console.log("Exhausted 2 retries");
core.setFailed("Exhausted 2 retries");
}
},
onAbuseLimit: (retryAfter, options) => {
// does not retry, only logs a warning
console.log(
`Abuse detected for request ${options.method} ${options.url}`
);
core.setFailed(`GitHub REST API Abuse detected for request ${options.method} ${options.url}`)
},
},
});
console.log("==> Repo:", owner + "/" + repo);
const releaseInfo = await client.repos.getReleaseByTag({
owner,
repo,
tag: releaseTag,
});
console.log(`==> Release info for tag ${releaseTag} = ${JSON.stringify(releaseInfo.data, null, 2)}`);
const releaseId = releaseInfo.data.id;
const goodArtifacts = await getGoodArtifacts(client, owner, repo, releaseId, name);
console.log("==> goodArtifacts:", goodArtifacts);
const artifactStatus = goodArtifacts.length === 0 ? "missing" : "found";
console.log("==> Artifact", name, artifactStatus);
console.log("==> download", download);
core.setOutput("status", artifactStatus);
if (artifactStatus === "found" && download == "true") {
console.log("==> # artifacts:", goodArtifacts.length);
const artifact = goodArtifacts[0];
console.log("==> Artifact:", artifact.id)
const size = filesize(artifact.size, { base: 10 })
console.log(`==> Downloading: ${artifact.name} (${size}) to path: ${path}`)
const dir = pathname.dirname(path)
console.log(`==> Creating containing dir if needed: ${dir}`)
fs.mkdirSync(dir, { recursive: true })
await Pipeline(
Download(artifact.url, {
headers: {
"Accept": "application/octet-stream",
"Authorization": `token ${token}`,
},
}),
fs.createWriteStream(path)
)
}
if (artifactStatus === "missing" && download == "true") {
core.setFailed("Required", name, "that is missing");
}
return;
} catch (err) {
console.error(err.stack);
core.setFailed(err.message);
}
}
main();

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +0,0 @@
{
"name": "check_artifact_exists",
"main": "main.js",
"devDependencies": {
"@actions/core": "^1.2.6",
"@actions/github": "^4.0.0",
"@octokit/plugin-throttling": "^3.4.1",
"@vercel/ncc": "^0.27.0",
"adm-zip": "^0.5.2",
"download": "^8.0.0",
"filesize": "^6.1.0"
}
}

View File

@ -1,29 +0,0 @@
name: "chroot bind mount"
description: "Bind mount into chroot"
inputs:
mounts:
description: "Path to consider"
required: true
runs:
using: "composite"
steps:
- id: install_qemu
run: |
sudo apt-get update -y
sudo apt-get install -y --no-install-recommends qemu-user-static
shell: bash
- id: bind_mount_chroot
run: |
set -xe
# Bind-mount so that we have the same tree inside the chroot
for dev in ${{ github.workspace }} ${{ inputs.mounts }};
do
sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev}
done;
for dev in ${{ inputs.mounts }};
do
sudo mount -o bind /${dev} ${{ env.SYSTEM_RASPBIAN }}/${dev}
done;
shell: bash

View File

@ -1,15 +0,0 @@
GitHub Action to compute cache key
==================================
It is intended to work in harmony with `check_artifact_exists`:
- compute a stable cache key
- as simple to use as possible (less parameters)
It will expect to be ran in a GitHub Action job that follows
`SUBMODULE_FLAVOR-PLATFORM`:
- it will use the `SUBMODULE` part to check what is the current SHA1 of this git submodule.
- the `FLAVOR` allows to distringuish e.g., opt/dbg builds
- the PLATFORM permits defining an os/arch couple
It allows for an `extras` field for extensive customization, like forcing a
re-build.

View File

@ -1,34 +0,0 @@
name: "get cache key for submodule"
description: "Compute a cache key based on git submodule"
inputs:
extras:
description: "Extra cache key value"
required: true
osarch:
description: "Override automatic OSARCH value"
required: false
outputs:
key:
description: "Computed cache key name"
value: ${{ steps.compute_cache_key.outputs.key }}
runs:
using: "composite"
steps:
- id: compute_cache_key
run: |
set -xe
JOB=${{ github.job }}
SUBMODULE=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f1)
FLAVOR=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f2)
if [ -z "${{ inputs.osarch }}" ]; then
OSARCH=$(echo $JOB | cut -d'-' -f2)
else
OSARCH=${{ inputs.osarch }}
fi
SHA=$(git submodule status ${SUBMODULE} | sed -e 's/^-//g' -e 's/^+//g' -e 's/^U//g' | awk '{ print $1 }')
KEY=${SUBMODULE}-${FLAVOR}_${OSARCH}_${SHA}_${{ inputs.extras }}
echo "::set-output name=key::${KEY}"
shell: bash

View File

@ -1,30 +0,0 @@
name: "Install Python"
description: "Installing an upstream python release"
inputs:
version:
description: "Python version"
required: true
runs:
using: "composite"
steps:
- shell: bash
run: |
set -xe
curl https://www.python.org/ftp/python/${{ inputs.version }}/python-${{ inputs.version }}-macosx10.9.pkg -o "python.pkg"
- shell: bash
run: ls -hal .
- shell: bash
run: |
set -xe
sudo installer -verbose -pkg python.pkg -target /
- shell: bash
run: |
set -xe
which python3
python3 --version
python3 -c "import sysconfig; print(sysconfig.get_config_var('MACOSX_DEPLOYMENT_TARGET'))"
- shell: bash
name: Set up venv with upstream Python
run: |
python3 -m venv /tmp/venv
echo "/tmp/venv/bin" >> $GITHUB_PATH

View File

@ -1,18 +0,0 @@
name: "xldd install"
description: "Install xldd"
inputs:
target:
description: "System target"
required: true
runs:
using: "composite"
steps:
- id: install_xldd
run: |
source ./ci_scripts/all-vars.sh
# -s required to avoid the noisy output like "Entering / Leaving directories"
toolchain=$(make -s -C ${DS_DSDIR}/native_client/ TARGET=${{ inputs.target }} TFDIR=${DS_TFDIR} print-toolchain)
if [ ! -x "${toolchain}ldd" ]; then
cp "${DS_DSDIR}/native_client/xldd" "${toolchain}ldd" && chmod +x "${toolchain}ldd"
fi
shell: bash

View File

@ -1,12 +0,0 @@
name: "Build libstt.so"
description: "Build libstt.so"
inputs:
arch:
description: "Target arch for loading script (host/armv7/aarch64)"
required: false
default: "host"
runs:
using: "composite"
steps:
- run: ./ci_scripts/${{ inputs.arch }}-build.sh
shell: bash

View File

@ -1,67 +0,0 @@
name: "multistrap install"
description: "Install a system root using multistrap"
inputs:
arch:
description: "Target arch"
required: true
packages:
description: "Extra packages to install"
required: false
default: ""
runs:
using: "composite"
steps:
- id: install_multistrap
run: |
sudo apt-get update -y
sudo apt-get install -y --no-install-recommends multistrap qemu-user-static
shell: bash
- id: create_chroot
run: |
set -xe
multistrap_conf=""
if [ "${{ inputs.arch }}" = "armv7" ]; then
multistrap_conf=multistrap_raspbian_buster.conf
wget http://archive.raspbian.org/raspbian/pool/main/r/raspbian-archive-keyring/raspbian-archive-keyring_20120528.2_all.deb && sudo dpkg -i raspbian-archive-keyring_20120528.2_all.deb
fi
if [ "${{ inputs.arch }}" = "aarch64" ]; then
multistrap_conf=multistrap_armbian64_buster.conf
fi
multistrap -d ${{ env.SYSTEM_RASPBIAN }} -f ${{ github.workspace }}/native_client/${multistrap_conf}
if [ ! -z "${{ inputs.packages }}" ]; then
TO_MOUNT=${{ github.workspace }}
# Prepare target directory to bind-mount the github tree
mkdir -p ${{ env.SYSTEM_RASPBIAN }}/${{ github.workspace }}
# Bind-mount so that we have the same tree inside the chroot
for dev in ${TO_MOUNT};
do
sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev}
done;
# Copy some host data:
# resolv.conf: for getting DNS working
# passwd, group, shadow: to have user accounts and apt-get install working
for ff in resolv.conf passwd group shadow;
do
sudo cp /etc/${ff} ${{ env.SYSTEM_RASPBIAN }}/etc/
done;
# Perform apt steps.
# Preserving the env is required
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y --no-install-recommends ${{ inputs.packages }}
# Cleanup apt info to save space
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ rm -fr /var/cache/apt/* /var/lib/apt/lists/*
# Unmount what has been mounted
for dev in ${TO_MOUNT};
do
sudo umount ${{ env.SYSTEM_RASPBIAN }}${dev}
done;
fi
shell: bash

View File

@ -1,77 +0,0 @@
name: "NodeJS binding"
description: "Binding a nodejs binding"
inputs:
nodejs_versions:
description: "NodeJS versions supported"
required: true
electronjs_versions:
description: "ElectronJS versions supported"
required: true
local_cflags:
description: "CFLAGS for NodeJS package"
required: false
default: ""
local_ldflags:
description: "LDFLAGS for NodeJS package"
required: false
default: ""
local_libs:
description: "LIBS for NodeJS package"
required: false
default: ""
target:
description: "TARGET value"
required: false
default: "host"
chroot:
description: "RASPBIAN value"
required: false
default: ""
runs:
using: "composite"
steps:
- run: |
node --version
npm --version
shell: bash
- run: |
npm update
shell: bash
- run: |
mkdir -p tmp/headers/nodejs tmp/headers/electronjs
shell: bash
- run: |
for node in ${{ inputs.nodejs_versions }}; do
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/javascript \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
NODE_ABI_TARGET=--target=${node} \
NODE_DEVDIR=--devdir=headers/nodejs \
clean node-wrapper
done;
shell: bash
- run: |
for electron in ${{ inputs.electronjs_versions }}; do
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/javascript \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
NODE_ABI_TARGET=--target=${electron} \
NODE_DIST_URL=--disturl=https://electronjs.org/headers \
NODE_RUNTIME=--runtime=electron \
NODE_DEVDIR=--devdir=headers/electronjs \
clean node-wrapper
done;
shell: bash
- run: |
make -C native_client/javascript clean npm-pack
shell: bash
- run: |
tar -czf native_client/javascript/wrapper.tar.gz \
-C native_client/javascript/ lib/
shell: bash

View File

@ -1,22 +0,0 @@
name: "nodejs install"
description: "Install nodejs in a chroot"
inputs:
node:
description: "NodeJS version"
required: true
runs:
using: "composite"
steps:
- id: add_apt_source
run: |
set -ex
(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/preferences
echo "deb http://deb.nodesource.com/node_${{ inputs.node }}.x buster main" > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/sources.list.d/nodesource.list
wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-key add -
shell: bash
- id: install_nodejs
run: |
set -ex
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y nodejs
shell: bash

View File

@ -1,14 +0,0 @@
GitHub Action to set NumPy versions
===================================
This actions aims at computing correct values for NumPy dependencies:
- `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time
- `NUMPY_DEP_VERSION`: range of accepted versions for execution time
Versions are set considering several factors:
- API and ABI compatibility ; otherwise we can have the binding wrapper
throwing errors like "Illegal instruction", or computing wrong values
because of changed memory layout
- Wheels availability: for CI and end users, we want to avoid having to
rebuild numpy so we stick to versions where there is an existing upstream
`wheel` file

View File

@ -1,93 +0,0 @@
name: "get numpy versions"
description: "Get proper NumPy build and runtime versions dependencies range"
inputs:
pyver:
description: "Python version"
required: true
outputs:
build_version:
description: "NumPy build dependency"
value: ${{ steps.numpy.outputs.build }}
dep_version:
description: "NumPy runtime dependency"
value: ${{ steps.numpy.outputs.dep }}
runs:
using: "composite"
steps:
- id: numpy
run: |
set -ex
NUMPY_BUILD_VERSION="==1.7.0"
NUMPY_DEP_VERSION=">=1.7.0"
OS=$(uname -s)
ARCH=$(uname -m)
case "${OS}:${ARCH}" in
Linux:x86_64)
case "${{ inputs.pyver }}" in
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
Darwin:*)
case "${{ inputs.pyver }}" in
3.6*)
NUMPY_BUILD_VERSION="==1.9.0"
NUMPY_DEP_VERSION=">=1.9.0"
;;
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
${CI_MSYS_VERSION}:x86_64)
case "${{ inputs.pyver }}" in
3.5*)
NUMPY_BUILD_VERSION="==1.11.0"
NUMPY_DEP_VERSION=">=1.11.0,<1.12.0"
;;
3.6*)
NUMPY_BUILD_VERSION="==1.12.0"
NUMPY_DEP_VERSION=">=1.12.0,<1.14.5"
;;
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
esac
echo "::set-output name=build::${NUMPY_BUILD_VERSION}"
echo "::set-output name=dep::${NUMPY_DEP_VERSION}"
shell: bash

View File

@ -1,7 +0,0 @@
name: "Package TensorFlow"
description: "Package TensorFlow Build"
runs:
using: "composite"
steps:
- run: ./ci_scripts/tf-package.sh
shell: bash

View File

@ -1,7 +0,0 @@
name: "Package lib"
description: "Package of lib"
runs:
using: "composite"
steps:
- run: ./ci_scripts/package.sh
shell: bash

View File

@ -1,58 +0,0 @@
name: "Python binding"
description: "Binding a python binding"
inputs:
numpy_build:
description: "NumPy build dependecy"
required: true
numpy_dep:
description: "NumPy runtime dependecy"
required: true
local_cflags:
description: "CFLAGS for Python package"
required: false
default: ""
local_ldflags:
description: "LDFLAGS for Python package"
required: false
default: ""
local_libs:
description: "LIBS for Python package"
required: false
default: ""
target:
description: "TARGET value"
required: false
default: "host"
chroot:
description: "RASPBIAN value"
required: false
default: ""
runs:
using: "composite"
steps:
- run: |
python3 --version
pip3 --version
shell: bash
- run: |
set -xe
PROJECT_NAME="stt"
OS=$(uname)
if [ "${OS}" = "Linux" -a "${{ inputs.target }}" != "host" ]; then
python3 -m venv stt-build
source stt-build/bin/activate
fi
NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \
NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/python/ \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
bindings-clean bindings
shell: bash

View File

@ -1,35 +0,0 @@
name: "Tests execution"
description: "Running tests"
inputs:
runtime:
description: "Runtime to use for running test"
required: true
model-kind:
description: "Running against CI baked or production model"
required: true
bitrate:
description: "Bitrate for testing"
required: true
chroot:
description: "Run using a chroot"
required: false
runs:
using: "composite"
steps:
- run: |
set -xe
build="_tflite"
model_kind=""
if [ "${{ inputs.model-kind }}" = "prod" ]; then
model_kind="-prod"
fi
prefix="."
if [ ! -z "${{ inputs.chroot }}" ]; then
prefix="${{ inputs.chroot }}"
fi
${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }}
shell: bash

View File

@ -1,11 +0,0 @@
name: "Select XCode version"
description: "Select XCode version"
inputs:
version:
description: "XCode version"
required: true
runs:
using: "composite"
steps:
- run: sudo xcode-select --switch /Applications/Xcode_${{ inputs.version }}.app
shell: bash

View File

@ -1,12 +0,0 @@
name: "Setup TensorFlow"
description: "Setup TensorFlow Build"
inputs:
flavor:
description: "Target flavor for setup script (empty/android-armv7/android-arm64)"
required: false
default: ""
runs:
using: "composite"
steps:
- run: ./ci_scripts/tf-setup.sh ${{ inputs.flavor }}
shell: bash

View File

@ -1,89 +0,0 @@
name: "Upload cache asset to release"
description: "Upload a build cache asset to a release"
inputs:
name:
description: "Artifact name"
required: true
path:
description: "Path of file to upload"
required: true
token:
description: "GitHub token"
required: false
default: ${{ github.token }}
repo:
description: "Repository name with owner (like actions/checkout)"
required: false
default: ${{ github.repository }}
release-tag:
description: "Tag of release to check artifacts under"
required: false
default: "v0.10.0-alpha.7"
runs:
using: "composite"
steps:
- run: |
set -xe
asset_name="${{ inputs.name }}"
filenames="${{ inputs.path }}"
if [ $(compgen -G "$filenames" | wc -l) -gt 1 -a -n "$asset_name" ]; then
echo "Error: multiple input files specified, but also specified an asset_name."
echo "When uploading multiple files leave asset_name empty to use the file names as asset names."
exit 1
fi
# Check input
for file in $filenames; do
if [[ ! -f $file ]]; then
echo "Error: Input file (${filename}) missing"
exit 1;
fi
done
AUTH="Authorization: token ${{ inputs.token }}"
owner=$(echo "${{inputs.repo}}" | cut -f1 -d/)
repo=$(echo "${{inputs.repo}}" | cut -f2 -d/)
tag="${{ inputs.release-tag }}"
GH_REPO="https://api.github.com/repos/${owner}/${repo}"
# Check token
curl -o /dev/null -sH "$AUTH" $GH_REPO || {
echo "Error: Invalid repo, token or network issue!"
exit 1
}
# Check if tag exists
response=$(curl -sH "$AUTH" "${GH_REPO}/git/refs/tags/${tag}")
eval $(echo "$response" | grep -m 1 "sha.:" | grep -w sha | tr : = | tr -cd '[[:alnum:]]=')
[ "$sha" ] || {
echo "Error: Tag does not exist: $tag"
echo "$response" | awk 'length($0)<100' >&2
exit 1
}
# Get ID of the release based on given tag name
GH_TAGS="${GH_REPO}/releases/tags/${tag}"
response=$(curl -sH "$AUTH" $GH_TAGS)
eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=')
[ "$id" ] || {
echo "Error: Could not find release for tag: $tag"
echo "$response" | awk 'length($0)<100' >&2
exit 1
}
# Upload assets
for file in $filenames; do
if [ -z $asset_name ]; then
asset=$(basename $file)
else
asset=$asset_name
fi
echo "Uploading asset with name: $asset from file: $file"
GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset}"
curl -T $file -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET
done
shell: bash

View File

@ -1,12 +0,0 @@
name: "Install SoX and add to PATH"
description: "Install SoX and add to PATH"
runs:
using: "composite"
steps:
- run: |
set -ex
curl -sSLO https://github.com/coqui-ai/STT/releases/download/v0.10.0-alpha.7/sox-14.4.2-win32.zip
"C:/Program Files/7-Zip/7z.exe" x -o`pwd`/bin/ -tzip -aoa sox-14.4.2-win32.zip
rm sox-*zip
echo "`pwd`/bin/sox-14.4.2/" >> $GITHUB_PATH
shell: bash

View File

@ -1,77 +0,0 @@
name: "NodeJS binding"
description: "Binding a nodejs binding"
inputs:
nodejs_versions:
description: "NodeJS versions supported"
required: true
electronjs_versions:
description: "ElectronJS versions supported"
required: true
local_cflags:
description: "CFLAGS for NodeJS package"
required: false
default: ""
local_ldflags:
description: "LDFLAGS for NodeJS package"
required: false
default: ""
local_libs:
description: "LIBS for NodeJS package"
required: false
default: ""
target:
description: "TARGET value"
required: false
default: "host"
chroot:
description: "RASPBIAN value"
required: false
default: ""
runs:
using: "composite"
steps:
- run: |
node --version
npm --version
shell: msys2 {0}
- run: |
npm update
shell: msys2 {0}
- run: |
mkdir -p tmp/headers/nodejs tmp/headers/electronjs
shell: msys2 {0}
- run: |
for node in ${{ inputs.nodejs_versions }}; do
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/javascript \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
NODE_ABI_TARGET=--target=${node} \
NODE_DEVDIR=--devdir=headers/nodejs \
clean node-wrapper
done;
shell: msys2 {0}
- run: |
for electron in ${{ inputs.electronjs_versions }}; do
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/javascript \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
NODE_ABI_TARGET=--target=${electron} \
NODE_DIST_URL=--disturl=https://electronjs.org/headers \
NODE_RUNTIME=--runtime=electron \
NODE_DEVDIR=--devdir=headers/electronjs \
clean node-wrapper
done;
shell: msys2 {0}
- run: |
make -C native_client/javascript clean npm-pack
shell: msys2 {0}
- run: |
tar -czf native_client/javascript/wrapper.tar.gz \
-C native_client/javascript/ lib/
shell: msys2 {0}

View File

@ -1,14 +0,0 @@
GitHub Action to set NumPy versions
===================================
This actions aims at computing correct values for NumPy dependencies:
- `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time
- `NUMPY_DEP_VERSION`: range of accepted versions for execution time
Versions are set considering several factors:
- API and ABI compatibility ; otherwise we can have the binding wrapper
throwing errors like "Illegal instruction", or computing wrong values
because of changed memory layout
- Wheels availability: for CI and end users, we want to avoid having to
rebuild numpy so we stick to versions where there is an existing upstream
`wheel` file

View File

@ -1,93 +0,0 @@
name: "get numpy versions"
description: "Get proper NumPy build and runtime versions dependencies range"
inputs:
pyver:
description: "Python version"
required: true
outputs:
build_version:
description: "NumPy build dependency"
value: ${{ steps.numpy.outputs.build }}
dep_version:
description: "NumPy runtime dependency"
value: ${{ steps.numpy.outputs.dep }}
runs:
using: "composite"
steps:
- id: numpy
run: |
set -ex
NUMPY_BUILD_VERSION="==1.7.0"
NUMPY_DEP_VERSION=">=1.7.0"
OS=$(uname -s)
ARCH=$(uname -m)
case "${OS}:${ARCH}" in
Linux:x86_64)
case "${{ inputs.pyver }}" in
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
Darwin:*)
case "${{ inputs.pyver }}" in
3.6*)
NUMPY_BUILD_VERSION="==1.9.0"
NUMPY_DEP_VERSION=">=1.9.0"
;;
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
${CI_MSYS_VERSION}:x86_64)
case "${{ inputs.pyver }}" in
3.5*)
NUMPY_BUILD_VERSION="==1.11.0"
NUMPY_DEP_VERSION=">=1.11.0,<1.12.0"
;;
3.6*)
NUMPY_BUILD_VERSION="==1.12.0"
NUMPY_DEP_VERSION=">=1.12.0,<1.14.5"
;;
3.7*)
NUMPY_BUILD_VERSION="==1.14.5"
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
;;
3.8*)
NUMPY_BUILD_VERSION="==1.17.3"
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
;;
3.9*)
NUMPY_BUILD_VERSION="==1.19.4"
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
;;
esac
;;
esac
echo "::set-output name=build::${NUMPY_BUILD_VERSION}"
echo "::set-output name=dep::${NUMPY_DEP_VERSION}"
shell: msys2 {0}

View File

@ -1,31 +0,0 @@
name: "Python binding"
description: "Binding a python binding"
inputs:
numpy_build:
description: "NumPy build dependecy"
required: true
numpy_dep:
description: "NumPy runtime dependecy"
required: true
runs:
using: "composite"
steps:
- run: |
set -xe
python3 --version
pip3 --version
PROJECT_NAME="stt"
NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \
NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
EXTRA_LIBS=${{ inputs.local_libs }} \
make -C native_client/python/ \
TARGET=${{ inputs.target }} \
RASPBIAN=${{ inputs.chroot }} \
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
bindings-clean bindings
shell: msys2 {0}

View File

@ -1,35 +0,0 @@
name: "Tests execution"
description: "Running tests"
inputs:
runtime:
description: "Runtime to use for running test"
required: true
model-kind:
description: "Running against CI baked or production model"
required: true
bitrate:
description: "Bitrate for testing"
required: true
chroot:
description: "Run using a chroot"
required: false
runs:
using: "composite"
steps:
- run: |
set -xe
build="_tflite"
model_kind=""
if [ "${{ inputs.model-kind }}" = "prod" ]; then
model_kind="-prod"
fi
prefix="."
if [ ! -z "${{ inputs.chroot }}" ]; then
prefix="${{ inputs.chroot }}"
fi
${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }}
shell: msys2 {0}

View File

@ -1,15 +0,0 @@
# Pull request guidelines
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
In order to make a good pull request, please see our [CONTRIBUTING.rst](CONTRIBUTING.rst) file, in particular make sure you have set-up and run the pre-commit hook to check your changes for code style violations.
Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT).
This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT):
- Protects you, Coqui, and the users of the code.
- Does not change your rights to use your contributions for any purpose.
- Does not change the license of the 🐸STT project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute.

File diff suppressed because it is too large Load Diff

View File

@ -1,32 +0,0 @@
name: "Lints"
on:
pull_request:
defaults:
run:
shell: bash
jobs:
training-unittests:
name: "Lin|Training unittests"
runs-on: ubuntu-20.04
strategy:
matrix:
pyver: [3.6, 3.7]
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.pyver }}
- name: Run training unittests
run: |
./ci_scripts/train-unittests.sh
pre-commit-checks:
name: "Lin|Pre-commit checks"
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Run pre-comit checks
run: |
python .pre-commit-2.11.1.pyz run --all-files

3
.gitignore vendored
View File

@ -32,9 +32,10 @@
/doc/.build/
/doc/xml-c/
/doc/xml-java/
Dockerfile.build
Dockerfile.train
doc/xml-c
doc/xml-java
doc/xml-dotnet
convert_graphdef_memmapped_format
native_client/swift/deepspeech_ios.framework/deepspeech_ios
.github/actions/check_artifact_exists/node_modules/

4
.gitmodules vendored
View File

@ -1,10 +1,10 @@
[submodule "doc/examples"]
path = doc/examples
url = https://github.com/coqui-ai/STT-examples.git
url = https://github.com/mozilla/DeepSpeech-examples.git
branch = master
[submodule "tensorflow"]
path = tensorflow
url = https://bics.ga/experiments/STT-tensorflow.git
url = https://github.com/mozilla/tensorflow.git
[submodule "kenlm"]
path = kenlm
url = https://github.com/kpu/kenlm

View File

@ -1,2 +1,4 @@
[settings]
profile=black
line_length=80
multi_line_output=3
default_section=FIRSTPARTY

Binary file not shown.

View File

@ -1,24 +0,0 @@
exclude: '^(taskcluster|.github|native_client/kenlm|native_client/ctcdecode/third_party|tensorflow|kenlm|doc/examples|data/alphabet.txt|data/smoke_test)'
repos:
- repo: 'https://github.com/pre-commit/pre-commit-hooks'
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: 'https://github.com/psf/black'
rev: 20.8b1
hooks:
- id: black
language_version: python3
# - repo: https://github.com/pycqa/isort
# rev: 5.8.0
# hooks:
# - id: isort
# name: isort (python)
# - id: isort
# name: isort (cython)
# types: [cython]
# - id: isort
# name: isort (pyi)
# types: [pyi]

155
.pylintrc
View File

@ -3,22 +3,14 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-allow-list=
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
# for backward compatibility.)
extension-pkg-whitelist=
# Specify a score threshold to be exceeded before program exits with error.
fail-under=10.0
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=native_client/kenlm
# Files or directories to be skipped. They should be base names, not paths.
ignore=CVS
# Files or directories matching the regex patterns are skipped. The regex
# matches against base names, not paths.
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=
# Python code to execute, usually for sys.path manipulation such as
@ -34,13 +26,16 @@ jobs=1
# complex, nested conditions.
limit-inference-results=100
# List of plugins (as comma separated values of python module names) to load,
# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=
# Pickle collected data for later comparisons.
persistent=yes
# Specify a configuration file.
#rcfile=
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
@ -65,7 +60,16 @@ confidence=
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=print-statement,
disable=missing-docstring,
line-too-long,
wrong-import-order,
ungrouped-imports,
wrong-import-position,
import-error,
no-name-in-module,
no-member,
unsubscriptable-object,
print-statement,
parameter-unpacking,
unpacking-in-except,
old-raise-syntax,
@ -83,6 +87,12 @@ disable=print-statement,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
useless-object-inheritance,
too-few-public-methods,
too-many-branches,
too-many-arguments,
too-many-locals,
too-many-statements,
apply-builtin,
basestring-builtin,
buffer-builtin,
@ -143,8 +153,7 @@ disable=print-statement,
xreadlines-attribute,
deprecated-sys-function,
exception-escape,
comprehension-escape,
format
comprehension-escape
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
@ -155,11 +164,11 @@ enable=c-extension-no-member
[REPORTS]
# Python expression which should return a score less than or equal to 10. You
# have access to the variables 'error', 'warning', 'refactor', and 'convention'
# which contain the number of messages in each category, as well as 'statement'
# which is the total number of statements analyzed. This score is used by the
# global evaluation report (RP0004).
# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
# respectively contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Template used to display messages. This is a python new-style format string
@ -187,13 +196,13 @@ max-nested-blocks=5
# inconsistent-return-statements if a never returning function is called then
# it will be considered as an explicit return statement and no message will be
# printed.
never-returning-functions=sys.exit,argparse.parse_error
never-returning-functions=sys.exit
[LOGGING]
# The type of string formatting that logging methods do. `old` means using %
# formatting, `new` is for `{}` formatting.
# Format style used to check logging format string. `old` means using %
# formatting, while `new` is for `{}` formatting.
logging-format-style=old
# Logging modules to check that the string format arguments are in logging
@ -206,22 +215,18 @@ logging-modules=logging
# Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4
# Spelling dictionary name. Available dictionaries: none. To make it work,
# install the 'python-enchant' package.
# Spelling dictionary name. Available dictionaries: none. To make it working
# install python-enchant package..
spelling-dict=
# List of comma separated words that should be considered directives if they
# appear and the beginning of a comment and should not be checked.
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
# List of comma separated words that should not be checked.
spelling-ignore-words=
# A path to a file that contains the private dictionary; one word per line.
# A path to a file that contains private dictionary; one word per line.
spelling-private-dict-file=
# Tells whether to store unknown words to the private dictionary (see the
# --spelling-private-dict-file option) instead of raising a message.
# Tells whether to store unknown words to indicated private dictionary in
# --spelling-private-dict-file option instead of raising a message.
spelling-store-unknown-words=no
@ -232,9 +237,6 @@ notes=FIXME,
XXX,
TODO
# Regular expression of note tags to take in consideration.
#notes-rgx=
[TYPECHECK]
@ -271,7 +273,7 @@ ignored-classes=optparse.Values,thread._local,_thread._local
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis). It
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=
@ -287,9 +289,6 @@ missing-member-hint-distance=1
# showing a hint for a missing member.
missing-member-max-choices=1
# List of decorators that change the signature of a decorated function.
signature-mutators=
[VARIABLES]
@ -300,9 +299,6 @@ additional-builtins=
# Tells whether unused global variables should be treated as a violation.
allow-global-unused-variables=yes
# List of names allowed to shadow builtins
allowed-redefined-builtins=
# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,
@ -345,6 +341,13 @@ max-line-length=100
# Maximum number of lines in a module.
max-module-lines=1000
# List of optional constructs for which whitespace checking is disabled. `dict-
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
# `empty-line` allows space-only lines.
no-space-check=trailing-comma,
dict-separator
# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no
@ -376,7 +379,7 @@ argument-naming-style=snake_case
# Regular expression matching correct argument names. Overrides argument-
# naming-style.
#argument-rgx=
argument-rgx=[a-z_][a-z0-9_]{0,30}$
# Naming style matching correct attribute names.
attr-naming-style=snake_case
@ -386,16 +389,7 @@ attr-naming-style=snake_case
#attr-rgx=
# Bad variable names which should always be refused, separated by a comma.
bad-names=foo,
bar,
baz,
toto,
tutu,
tata
# Bad variable names regexes, separated by a comma. If names match any regex,
# they will always be refused
bad-names-rgxs=
bad-names=
# Naming style matching correct class attribute names.
class-attribute-naming-style=any
@ -404,13 +398,6 @@ class-attribute-naming-style=any
# attribute-naming-style.
#class-attribute-rgx=
# Naming style matching correct class constant names.
class-const-naming-style=UPPER_CASE
# Regular expression matching correct class constant names. Overrides class-
# const-naming-style.
#class-const-rgx=
# Naming style matching correct class names.
class-naming-style=PascalCase
@ -440,14 +427,11 @@ function-naming-style=snake_case
good-names=i,
j,
k,
x,
ex,
Run,
_
# Good variable names regexes, separated by a comma. If names match any regex,
# they will always be accepted
good-names-rgxs=
# Include a hint for the correct naming format with invalid-name.
include-naming-hint=no
@ -490,26 +474,19 @@ variable-naming-style=snake_case
# Regular expression matching correct variable names. Overrides variable-
# naming-style.
#variable-rgx=
variable-rgx=[a-z_][a-z0-9_]{0,30}$
[STRING]
# This flag controls whether inconsistent-quotes generates a warning when the
# character used as a quote delimiter is used inconsistently within a module.
check-quote-consistency=no
# This flag controls whether the implicit-str-concat should generate a warning
# on implicit string concatenation in sequences defined over several lines.
# This flag controls whether the implicit-str-concat-in-sequence should
# generate a warning on implicit string concatenation in sequences defined over
# several lines.
check-str-concat-over-line-jumps=no
[IMPORTS]
# List of modules that can be imported at any level, not just the top level
# one.
allow-any-import-level=
# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no
@ -521,17 +498,16 @@ analyse-fallback-blocks=no
# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=optparse,tkinter.tix
# Output a graph (.gv or any supported image format) of external dependencies
# to the given file (report RP0402 must not be disabled).
# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled).
ext-import-graph=
# Output a graph (.gv or any supported image format) of all (i.e. internal and
# external) dependencies to the given file (report RP0402 must not be
# disabled).
# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled).
import-graph=
# Output a graph (.gv or any supported image format) of internal dependencies
# to the given file (report RP0402 must not be disabled).
# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled).
int-import-graph=
# Force import order to recognize a module as part of the standard
@ -541,20 +517,13 @@ known-standard-library=
# Force import order to recognize a module as part of a third party library.
known-third-party=enchant
# Couples of modules and preferred modules, separated by a comma.
preferred-modules=
[CLASSES]
# Warn about protected attribute access inside special methods
check-protected-access-in-special-methods=no
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,
__new__,
setUp,
__post_init__
setUp
# List of member names, which should be excluded from the protected access
# warning.
@ -579,7 +548,7 @@ max-args=5
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Maximum number of boolean expressions in an if statement (see R0916).
# Maximum number of boolean expressions in an if statement.
max-bool-expr=5
# Maximum number of branch for function / method body.

View File

@ -14,4 +14,4 @@ sphinx:
python:
version: 3.7
install:
- requirements: doc/requirements.txt
- requirements: taskcluster/docs-requirements.txt

65
.taskcluster.yml Normal file
View File

@ -0,0 +1,65 @@
# The version is always required
version: 0
# Top level metadata is always required
metadata:
name: "DeepSpeech"
description: "DeepSpeech builds"
owner: "{{ event.head.user.email }}" # the user who sent the pr/push e-mail will be inserted here
source: "{{ event.head.repo.url }}" # the repo where the pr came from will be inserted here
tasks:
- provisionerId: "proj-deepspeech"
workerType: "ci-decision-task"
extra:
github:
env: true
events:
- pull_request.opened
- pull_request.synchronize
- pull_request.reopened
- push
- tag
branches:
- r0.9
scopes: [
"queue:create-task:highest:proj-deepspeech/*",
"queue:route:index.project.deepspeech.*",
"index:insert-task:project.deepspeech.*",
"queue:scheduler-id:taskcluster-github",
"generic-worker:cache:deepspeech-macos-pyenv",
"docker-worker:capability:device:kvm"
]
payload:
maxRunTime: 600
image: "ubuntu:18.04"
features:
taskclusterProxy: true
env:
TC_DECISION_SHA: ef67832e6657f43e139a10f37eb326a7d9d96dad
command:
- "/bin/bash"
- "--login"
- "-cxe"
- >
echo "deb http://archive.ubuntu.com/ubuntu/ bionic-updates main" > /etc/apt/sources.list.d/bionic-updates.list &&
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
adduser --system --home /home/build-user build-user &&
cd /home/build-user/ &&
echo -e "#!/bin/bash\nset -xe\nenv && id && mkdir ~/DeepSpeech/ && git clone --quiet {{event.head.repo.url}} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet {{event.head.sha}}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
sudo -H -u build-user /bin/bash /tmp/clone.sh &&
sudo -H -u build-user --preserve-env /bin/bash /home/build-user/DeepSpeech/ds/taskcluster/tc-schedule.sh
artifacts:
"public":
type: "directory"
path: "/tmp/artifacts/"
expires: "{{ '7 days' | $fromNow }}"
# Each task also requires explicit metadata
metadata:
name: "DeepSpeech Decision Task"
description: "DeepSpeech Decision Task: triggers everything."
owner: "{{ event.head.user.email }}"
source: "{{ event.head.repo.url }}"

View File

@ -1,102 +0,0 @@
version: 1
policy:
pullRequests: collaborators_quiet
tasks:
$let:
metadata:
task_id: {$eval: as_slugid("decision_task")}
github:
$if: 'tasks_for == "github-pull-request"'
then:
action: "pull_request.${event.action}"
login: ${event.pull_request.user.login}
ref: ${event.pull_request.head.ref}
branch: ${event.pull_request.head.ref}
tag: ""
sha: ${event.pull_request.head.sha}
clone_url: ${event.pull_request.head.repo.clone_url}
else:
action:
$if: 'event.ref[:10] == "refs/tags/"'
then: "tag"
else: "push"
login: ${event.pusher.name}
ref: ${event.ref}
branch:
$if: 'event.ref[:11] == "refs/heads/"'
then: ${event.ref[11:]}
else: ""
tag:
$if: 'event.ref[:10] == "refs/tags/"'
then: ${event.ref[10:]}
else: ""
sha: ${event.after}
clone_url: ${event.repository.clone_url}
in:
$let:
decision_task:
taskId: ${metadata.task_id}
created: {$fromNow: ''}
deadline: {$fromNow: '60 minutes'}
provisionerId: "proj-deepspeech"
workerType: "ci-decision-task"
scopes: [
"queue:create-task:highest:proj-deepspeech/*",
"queue:route:index.project.deepspeech.*",
"index:insert-task:project.deepspeech.*",
"queue:scheduler-id:taskcluster-github",
"generic-worker:cache:deepspeech-macos-pyenv",
"docker-worker:capability:device:kvm"
]
payload:
maxRunTime: 600
image: "ubuntu:18.04"
features:
taskclusterProxy: true
env:
TASK_ID: ${metadata.task_id}
GITHUB_HEAD_USER_LOGIN: ${metadata.github.login}
GITHUB_HEAD_USER_EMAIL: ${metadata.github.login}@users.noreply.github.com
GITHUB_EVENT: ${metadata.github.action}
GITHUB_HEAD_REPO_URL: ${metadata.github.clone_url}
GITHUB_HEAD_BRANCH: ${metadata.github.branch}
GITHUB_HEAD_TAG: ${metadata.github.tag}
GITHUB_HEAD_REF: ${metadata.github.ref}
GITHUB_HEAD_SHA: ${metadata.github.sha}
command:
- "/bin/bash"
- "--login"
- "-cxe"
- >
echo "deb http://archive.ubuntu.com/ubuntu/ bionic-updates main" > /etc/apt/sources.list.d/bionic-updates.list &&
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
adduser --system --home /home/build-user build-user &&
cd /home/build-user/ &&
echo -e "#!/bin/bash\nset -xe\nenv && id && mkdir ~/DeepSpeech/ && git clone --quiet ${metadata.github.clone_url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${metadata.github.ref}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
sudo -H -u build-user /bin/bash /tmp/clone.sh &&
sudo -H -u build-user --preserve-env /bin/bash /home/build-user/DeepSpeech/ds/taskcluster/tc-schedule.sh
artifacts:
"public":
type: "directory"
path: "/tmp/artifacts/"
expires: {$fromNow: '7 days'}
metadata:
name: "DeepSpeech decision task"
description: "DeepSpeech decision task"
owner: "${metadata.github.login}@users.noreply.github.com"
source: "${metadata.github.clone_url}"
in:
$flattenDeep:
- $if: 'tasks_for == "github-pull-request" && event["action"] in ["opened", "reopened", "synchronize"]'
then: {$eval: decision_task}
- $if: 'tasks_for == "github-push" && event.ref == "refs/heads/master"'
then: {$eval: decision_task}
- $if: 'tasks_for == "github-push" && event.ref[:10] == "refs/tags/"'
then: {$eval: decision_task}

View File

@ -1,18 +1,19 @@
This file contains a list of papers in chronological order that have been published using 🐸STT.
This file contains a list of papers in chronological order that have been published
using DeepSpeech.
To appear
==========
* Raghuveer Peri, Haoqi Li, Krishna Somandepalli, Arindam Jati, Shrikanth Narayanan (2020) "An empirical analysis of information encoded in disentangled neural speaker representations".
* Raghuveer Peri, Haoqi Li, Krishna Somandepalli, Arindam Jati, Shrikanth Narayanan (2020) "An empirical analysis of information encoded in disentangled neural speaker representations".
* Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M. Tyers, and Gregor Weber (2020) "Common Voice: A Massively-Multilingual Speech Corpus".
Published
Published
==========
2020
----------
* Nils Hjortnaes, Niko Partanen, Michael Rießler and Francis M. Tyers (2020)
* Nils Hjortnaes, Niko Partanen, Michael Rießler and Francis M. Tyers (2020)
"Towards a Speech Recognizer for Komi, an Endangered and Low-Resource Uralic Language". *Proceedings of the 6th International Workshop on Computational Linguistics of Uralic Languages*.
```
@ -72,5 +73,5 @@ Published
booktitle = {2018 IEEE/ACM Machine Learning in HPC Environments (MLHPC)},
doi = {https://doi.org/10.1109/MLHPC.2018.8638637}
year = 2018
}
}
```

View File

@ -1,132 +1,15 @@
# Contributor Covenant Code of Conduct
# Community Participation Guidelines
## Our Pledge
This repository is governed by Mozilla's code of conduct and etiquette guidelines.
For more details, please read the
[Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual identity
and orientation.
## How to Report
For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
<!--
## Project Specific Etiquette
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement by emailing
[coc-report@coqui.ai](mailto:coc-report@coqui.ai).
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
at [https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations
In some cases, there will be additional project etiquette i.e.: (https://bugzilla.mozilla.org/page.cgi?id=etiquette.html).
Please update for your project.
-->

View File

@ -1,22 +1,14 @@
Coqui STT code owners / governance system
=========================================
DeepSpeech code owners
======================
🐸STT is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system <https://www.mozilla.org/about/governance/policies/module-ownership/>`_. The project is roughly divided into modules, and each module has its own owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project.
This file describes reviewers who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their own owners.
There's overlap in the areas of expertise of each reviewer, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate question.
Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interests of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We dont ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldnt go on very often or for very long or escalation and review is likely.
The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole.
This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person.
Global owners
Global reviewers
----------------
These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision.
These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other reviewers, global reviewers will make a final decision.
- Alexandre Lissy (@lissyx)
- Reuben Morais (@reuben)
@ -46,7 +38,7 @@ Testing & CI
Native inference client
-----------------------
Everything that goes into libstt.so and is not specifically covered in another area fits here.
Everything that goes into libdeepspeech.so and is not specifically covered in another area fits here.
- Alexandre Lissy (@lissyx)
- Reuben Morais (@reuben)
@ -110,7 +102,7 @@ Documentation
- Alexandre Lissy (@lissyx)
- Reuben Morais (@reuben)
.. Third party bindings
--------------------
Third party bindings
--------------------
Hosted externally and owned by the individual authors. See the `list of third-party bindings <https://stt.readthedocs.io/en/latest/ USING.html#third-party-bindings>`_ for more info.
Hosted externally and owned by the individual authors. See the `list of third-party bindings <https://deepspeech.readthedocs.io/en/master/USING.html#third-party-bindings>`_ for more info.

View File

@ -1,32 +1,37 @@
Contribution guidelines
=======================
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
Welcome to the DeepSpeech project! We are excited to see your interest, and appreciate your support!
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the `CODE_OF_CONDUCT.md <CODE_OF_CONDUCT.md>`_.
This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the `Mozilla Community Participation Guidelines <https://www.mozilla.org/about/governance/policies/participation/>`_.
How to Make a Good Pull Request
-------------------------------
Here's some guidelines on how to make a good PR to 🐸STT.
Here's some guidelines on how to make a good PR to DeepSpeech.
Bug-fix PR
^^^^^^^^^^
You've found a bug and you were able to squash it! Great job! Please write a short but clear commit message describing the bug, and how you fixed it. This makes review much easier. Also, please name your branch something related to the bug-fix.
Documentation PR
^^^^^^^^^^^^^^^^
If you're just making updates or changes to the documentation, there's no need to run all of DeepSpeech's tests for Continuous Integration (i.e. Taskcluster tests). In this case, at the end of your short but clear commit message, you should add **X-DeepSpeech: NOBUILD**. This will trigger the CI tests to skip your PR, saving both time and compute.
New Feature PR
^^^^^^^^^^^^^^
You've made some core changes to 🐸STT, and you would like to share them back with the community -- great! First things first: if you're planning to add a feature (not just fix a bug or docs) let the 🐸STT team know ahead of time and get some feedback early. A quick check-in with the team can save time during code-review, and also ensure that your new feature fits into the project.
You've made some core changes to DeepSpeech, and you would like to share them back with the community -- great! First things first: if you're planning to add a feature (not just fix a bug or docs) let the DeepSpeech team know ahead of time and get some feedback early. A quick check-in with the team can save time during code-review, and also ensure that your new feature fits into the project.
The 🐸STT codebase is made of many connected parts. There is Python code for training 🐸STT, core C++ code for running inference on trained models, and multiple language bindings to the C++ core so you can use 🐸STT in your favorite language.
The DeepSpeech codebase is made of many connected parts. There is Python code for training DeepSpeech, core C++ code for running inference on trained models, and multiple language bindings to the C++ core so you can use DeepSpeech in your favorite language.
Whenever you add a new feature to 🐸STT and what to contribute that feature back to the project, here are some things to keep in mind:
Whenever you add a new feature to DeepSpeech and what to contribute that feature back to the project, here are some things to keep in mind:
1. You've made changes to the core C++ code. Core changes can have downstream effects on all parts of the 🐸STT project, so keep that in mind. You should minimally also make necessary changes to the C client (i.e. **args.h** and **client.cc**). The bindings for Python, Java, and Javascript are SWIG generated, and in the best-case scenario you won't have to worry about them. However, if you've added a whole new feature, you may need to make custom tweaks to those bindings, because SWIG may not automagically work with your new feature, especially if you've exposed new arguments. The bindings for .NET and Swift are not generated automatically. It would be best if you also made the necessary manual changes to these bindings as well. It is best to communicate with the core 🐸STT team and come to an understanding of where you will likely need to work with the bindings. They can't predict all the bugs you will run into, but they will have a good idea of how to plan for some obvious challenges.
1. You've made changes to the core C++ code. Core changes can have downstream effects on all parts of the DeepSpeech project, so keep that in mind. You should minimally also make necessary changes to the C client (i.e. **args.h** and **client.cc**). The bindings for Python, Java, and Javascript are SWIG generated, and in the best-case scenario you won't have to worry about them. However, if you've added a whole new feature, you may need to make custom tweaks to those bindings, because SWIG may not automagically work with your new feature, especially if you've exposed new arguments. The bindings for .NET and Swift are not generated automatically. It would be best if you also made the necessary manual changes to these bindings as well. It is best to communicate with the core DeepSpeech team and come to an understanding of where you will likely need to work with the bindings. They can't predict all the bugs you will run into, but they will have a good idea of how to plan for some obvious challenges.
2. You've made changes to the Python code. Make sure you run a linter (described below).
3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the 🐸STT decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find `perf <https://perf.wiki.kernel.org/index.php/Main_Page>`_ to be a useful tool. You can use sample WAV files for testing which are provided in the `STT/data/` directory.
3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the DeepSpeech decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find [[perf](https://perf.wiki.kernel.org/index.php/Main_Page)] to be a useful tool. You can use sample WAV files for testing which are provided in the `DeepSpeech/data/` directory.
Requesting review on your PR
----------------------------
@ -34,14 +39,54 @@ Requesting review on your PR
Generally, a code owner will be notified of your pull request and will either review it or ask some other code owner for their review. If you'd like to proactively request review as you open the PR, see the the CODE_OWNERS.rst file which describes who's an appropriate reviewer depending on which parts of the code you're changing.
Code linting
------------
Python Linter
-------------
We use `pre-commit <https://pre-commit.com/>`_ to manage pre-commit hooks that take care of checking your changes for code style violations. Before committing changes, make sure you have the hook installed in your setup by running, in the virtual environment you use for running the code:
Before making a Pull Request for Python code changes, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the changed code, you can use the follow command:
.. code-block:: bash
cd STT
python .pre-commit-2.11.1.pyz install
pip install pylint cardboardlint
cardboardlinter --refspec master
This will compare the code against master and run the linter on all the changes. We plan to introduce more linter checks (e.g. for C++) in the future. To run it automatically as a git pre-commit hook, do the following:
.. code-block:: bash
cat <<\EOF > .git/hooks/pre-commit
#!/bin/bash
if [ ! -x "$(command -v cardboardlinter)" ]; then
exit 0
fi
# First, stash index and work dir, keeping only the
# to-be-committed changes in the working directory.
echo "Stashing working tree changes..." 1>&2
old_stash=$(git rev-parse -q --verify refs/stash)
git stash save -q --keep-index
new_stash=$(git rev-parse -q --verify refs/stash)
# If there were no changes (e.g., `--amend` or `--allow-empty`)
# then nothing was stashed, and we should skip everything,
# including the tests themselves. (Presumably the tests passed
# on the previous commit, so there is no need to re-run them.)
if [ "$old_stash" = "$new_stash" ]; then
echo "No changes, skipping lint." 1>&2
exit 0
fi
# Run tests
cardboardlinter --refspec HEAD -n auto
status=$?
# Restore changes
echo "Restoring working tree changes..." 1>&2
git reset --hard -q && git stash apply --index -q && git stash drop -q
# Exit with status from test-run: nonzero prevents commit
exit $status
EOF
chmod +x .git/hooks/pre-commit
This will run the linters on just the changes made in your commit.
This will install a git pre-commit hook which will check your commits and let you know about any style violations that need fixing.

12
DeepSpeech.py Executable file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
if __name__ == '__main__':
try:
from deepspeech_training import train as ds_train
except ImportError:
print('Training package is not installed. See training documentation.')
raise
ds_train.run_script()

View File

@ -1,8 +1,11 @@
# Please refer to the USING documentation, "Dockerfile for building from source"
# Need devel version cause we need /usr/include/cudnn.h
# Need devel version cause we need /usr/include/cudnn.h
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
# >> START Install base software
# Get basic packages
@ -61,7 +64,7 @@ ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
ENV TF_CUDA_VERSION 10.1
ENV TF_CUDNN_VERSION 7.6
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
ENV TF_NCCL_VERSION 2.8
ENV TF_NCCL_VERSION 2.7
# Common Environment Setup
ENV TF_BUILD_CONTAINER_TYPE GPU
@ -109,11 +112,16 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
# << END Configure Bazel
WORKDIR /
COPY . /STT/
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
WORKDIR /DeepSpeech
RUN git checkout $DEEPSPEECH_SHA
RUN git submodule sync tensorflow/
RUN git submodule update --init tensorflow/
# >> START Build and bind
WORKDIR /STT/tensorflow
WORKDIR /DeepSpeech/tensorflow
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
RUN ./configure
@ -124,12 +132,14 @@ RUN ./configure
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
# Build STT
# Build DeepSpeech
RUN bazel build \
--verbose_failures \
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
--config=monolithic \
--config=cuda \
-c opt \
--copt=-O3 \
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
--copt=-mtune=generic \
--copt=-march=x86-64 \
--copt=-msse \
@ -138,26 +148,24 @@ RUN bazel build \
--copt=-msse4.1 \
--copt=-msse4.2 \
--copt=-mavx \
--config=noaws \
--config=nogcp \
--config=nohdfs \
--config=nonccl \
//native_client:libstt.so
--copt=-fvisibility=hidden \
//native_client:libdeepspeech.so \
--verbose_failures \
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
# Copy built libs to /STT/native_client
RUN cp bazel-bin/native_client/libstt.so /STT/native_client/
# Copy built libs to /DeepSpeech/native_client
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
# Build client.cc and install Python client and decoder bindings
ENV TFDIR /STT/tensorflow
ENV TFDIR /DeepSpeech/tensorflow
RUN nproc
WORKDIR /STT/native_client
RUN make NUM_PROCESSES=$(nproc) stt
WORKDIR /DeepSpeech/native_client
RUN make NUM_PROCESSES=$(nproc) deepspeech
WORKDIR /STT
WORKDIR /DeepSpeech
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install -U pip setuptools wheel
RUN pip3 install --upgrade native_client/python/dist/*.whl
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
@ -168,8 +176,8 @@ RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
# Allow Python printing utf-8
ENV PYTHONIOENCODING UTF-8
# Build KenLM in /STT/native_client/kenlm folder
WORKDIR /STT/native_client
# Build KenLM in /DeepSpeech/native_client/kenlm folder
WORKDIR /DeepSpeech/native_client
RUN rm -rf kenlm && \
git clone https://github.com/kpu/kenlm && \
cd kenlm && \
@ -180,4 +188,4 @@ RUN rm -rf kenlm && \
make -j $(nproc)
# Done
WORKDIR /STT
WORKDIR /DeepSpeech

View File

@ -1,97 +0,0 @@
# This is a Dockerfile useful for training models with Coqui STT.
# You can train "acoustic models" with audio + Tensorflow, and
# you can create "scorers" with text + KenLM.
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3 AS kenlm-build
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential cmake libboost-system-dev \
libboost-thread-dev libboost-program-options-dev \
libboost-test-dev libeigen3-dev zlib1g-dev \
libbz2-dev liblzma-dev && \
rm -rf /var/lib/apt/lists/*
# Build KenLM to generate new scorers
WORKDIR /code
COPY kenlm /code/kenlm
RUN cd /code/kenlm && \
mkdir -p build && \
cd build && \
cmake .. && \
make -j $(nproc) || \
( echo "ERROR: Failed to build KenLM."; \
echo "ERROR: Make sure you update the kenlm submodule on host before building this Dockerfile."; \
echo "ERROR: $ cd STT; git submodule update --init kenlm"; \
exit 1; )
FROM ubuntu:20.04 AS wget-binaries
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends wget unzip xz-utils && \
rm -rf /var/lib/apt/lists/*
# Tool to convert output graph for inference
RUN wget --no-check-certificate https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip -O temp.zip && \
unzip temp.zip && \
rm temp.zip
RUN wget --no-check-certificate https://github.com/reuben/STT/releases/download/v0.10.0-alpha.1/native_client.tar.xz -O temp.tar.xz && \
tar -xf temp.tar.xz && \
rm temp.tar.xz
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3
ENV DEBIAN_FRONTEND=noninteractive
# We need to purge python3-xdg because
# it's breaking STT install later with
# errors about setuptools
#
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
wget \
libopus0 \
libopusfile0 \
libsndfile1 \
sox \
libsox-fmt-mp3 && \
apt-get purge -y python3-xdg && \
rm -rf /var/lib/apt/lists/*
# Make sure pip and its dependencies are up-to-date
RUN pip3 install --upgrade pip wheel setuptools
WORKDIR /code
COPY native_client /code/native_client
COPY .git /code/.git
COPY training/coqui_stt_training/VERSION /code/training/coqui_stt_training/VERSION
COPY training/coqui_stt_training/GRAPH_VERSION /code/training/coqui_stt_training/GRAPH_VERSION
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
COPY setup.py /code/setup.py
COPY VERSION /code/VERSION
COPY training /code/training
# Copy files from previous build stages
RUN mkdir -p /code/kenlm/build/
COPY --from=kenlm-build /code/kenlm/build/bin /code/kenlm/build/bin
COPY --from=wget-binaries /convert_graphdef_memmapped_format /code/convert_graphdef_memmapped_format
COPY --from=wget-binaries /generate_scorer_package /code/generate_scorer_package
# Install STT
# No need for the decoder since we did it earlier
# TensorFlow GPU should already be installed on the base image,
# and we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
# Copy rest of the code and test training
COPY . /code
RUN ./bin/run-ldc93s1.sh && rm -rf ~/.local/share/stt

View File

@ -1,10 +0,0 @@
.git/lfs
tensorflow
.git/modules/tensorflow
native_client/ds-swig
native_client/libstt.so
native_client/stt
native_client/ctcdecode/dist/
native_client/ctcdecode/temp_build
native_client/ctcdecode/third_party.a
native_client/ctcdecode/workspace_status.cc

View File

@ -1,12 +0,0 @@
# This is a Dockerfile useful for training models with Coqui STT in Jupyter notebooks
FROM ghcr.io/coqui-ai/stt-train:latest
WORKDIR /code/notebooks
RUN python3 -m pip install --no-cache-dir jupyter jupyter_http_over_ws
RUN jupyter serverextension enable --py jupyter_http_over_ws
EXPOSE 8888
CMD ["bash", "-c", "jupyter notebook --notebook-dir=/code/notebooks --ip 0.0.0.0 --no-browser --allow-root"]

68
Dockerfile.train.tmpl Normal file
View File

@ -0,0 +1,68 @@
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
FROM tensorflow/tensorflow:1.15.4-gpu-py3
ENV DEBIAN_FRONTEND=noninteractive
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
bash-completion \
build-essential \
cmake \
curl \
git \
libboost-all-dev \
libbz2-dev \
locales \
python3-venv \
unzip \
wget
# We need to remove it because it's breaking deepspeech install later with
# weird errors about setuptools
RUN apt-get purge -y python3-xdg
# Install dependencies for audio augmentation
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
# Try and free some space
RUN rm -rf /var/lib/apt/lists/*
WORKDIR /
RUN git clone $DEEPSPEECH_REPO DeepSpeech
WORKDIR /DeepSpeech
RUN git checkout $DEEPSPEECH_SHA
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
# Prepare deps
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
# Install DeepSpeech
# - No need for the decoder since we did it earlier
# - There is already correct TensorFlow GPU installed on the base image,
# we don't want to break that
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
# Tool to convert output graph for inference
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
--artifact convert_graphdef_memmapped_format --target .
# Build KenLM to generate new scorers
WORKDIR /DeepSpeech/native_client
RUN rm -rf kenlm && \
git clone https://github.com/kpu/kenlm && \
cd kenlm && \
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
mkdir -p build && \
cd build && \
cmake .. && \
make -j $(nproc)
WORKDIR /DeepSpeech
RUN ./bin/run-ldc93s1.sh

View File

@ -1 +1 @@
training/coqui_stt_training/GRAPH_VERSION
training/deepspeech_training/GRAPH_VERSION

24
ISSUE_TEMPLATE.md Normal file
View File

@ -0,0 +1,24 @@
For support and discussions, please use our [Discourse forums](https://discourse.mozilla.org/c/deep-speech).
If you've found a bug, or have a feature request, then please create an issue with the following information:
- **Have I written custom code (as opposed to running examples on an unmodified clone of the repository)**:
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
- **TensorFlow installed from (our builds, or upstream TensorFlow)**:
- **TensorFlow version (use command below)**:
- **Python version**:
- **Bazel version (if compiling from source)**:
- **GCC/Compiler version (if compiling from source)**:
- **CUDA/cuDNN version**:
- **GPU model and memory**:
- **Exact command to reproduce**:
You can obtain the TensorFlow version with
```bash
python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
```
Please describe the problem clearly. Be sure to convey here why it's a bug or a feature request.
Include any logs or source code that would be helpful to diagnose the problem. For larger logs, link to a Gist, not a screenshot. If including tracebacks, please include the full traceback. Try to provide a reproducible test case.

View File

@ -1,2 +0,0 @@
include training/coqui_stt_training/VERSION
include training/coqui_stt_training/GRAPH_VERSION

View File

@ -1,8 +1,8 @@
STT_REPO ?= https://github.com/coqui-ai/STT.git
STT_SHA ?= origin/main
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
DEEPSPEECH_SHA ?= origin/master
Dockerfile%: Dockerfile%.tmpl
sed \
-e "s|#STT_REPO#|$(STT_REPO)|g" \
-e "s|#STT_SHA#|$(STT_SHA)|g" \
-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
< $< > $@

View File

@ -1,69 +1,23 @@
.. image:: images/coqui-STT-logo-green.png
:alt: Coqui STT logo
Project DeepSpeech
==================
.. |doc-img| image:: https://readthedocs.org/projects/stt/badge/?version=latest
:target: https://stt.readthedocs.io/?badge=latest
.. image:: https://readthedocs.org/projects/deepspeech/badge/?version=latest
:target: https://deepspeech.readthedocs.io/?badge=latest
:alt: Documentation
.. |covenant-img| image:: https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg
:target: CODE_OF_CONDUCT.md
:alt: Contributor Covenant
.. |gitter-img| image:: https://badges.gitter.im/coqui-ai/STT.svg
:target: https://gitter.im/coqui-ai/STT?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
:alt: Gitter Room
.. |doi| image:: https://zenodo.org/badge/344354127.svg
:target: https://zenodo.org/badge/latestdoi/344354127
|doc-img| |covenant-img| |gitter-img| |doi|
`👉 Subscribe to 🐸Coqui's Newsletter <https://coqui.ai/?subscription=true>`_
**Coqui STT** (🐸STT) is a fast, open-source, multi-platform, deep-learning toolkit for training and deploying speech-to-text models. 🐸STT is battle tested in both production and research 🚀
🐸STT features
---------------
* High-quality pre-trained STT model.
* Efficient training pipeline with Multi-GPU support.
* Streaming inference.
* Multiple possible transcripts, each with an associated confidence score.
* Real-time inference.
* Small-footprint acoustic model.
* Bindings for various programming languages.
Where to Ask Questions
----------------------
.. list-table::
:widths: 25 25
:header-rows: 1
* - Type
- Link
* - 🚨 **Bug Reports**
- `Github Issue Tracker <https://github.com/coqui-ai/STT/issues/>`_
* - 🎁 **Feature Requests & Ideas**
- `Github Issue Tracker <https://github.com/coqui-ai/STT/issues/>`_
* - ❔ **Questions**
- `Github Discussions <https://github.com/coqui-ai/stt/discussions/>`_
* - 💬 **General Discussion**
- `Github Discussions <https://github.com/coqui-ai/stt/discussions/>`_ or `Gitter Room <https://gitter.im/coqui-ai/STT?utm_source=share-link&utm_medium=link&utm_campaign=share-link>`_
.. image:: https://community-tc.services.mozilla.com/api/github/v1/repository/mozilla/DeepSpeech/master/badge.svg
:target: https://community-tc.services.mozilla.com/api/github/v1/repository/mozilla/DeepSpeech/master/latest
:alt: Task Status
Links & Resources
-----------------
.. list-table::
:widths: 25 25
:header-rows: 1
DeepSpeech is an open-source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper <https://arxiv.org/abs/1412.5567>`_. Project DeepSpeech uses Google's `TensorFlow <https://www.tensorflow.org/>`_ to make the implementation easier.
* - Type
- Link
* - 📰 **Documentation**
- `stt.readthedocs.io <https://stt.readthedocs.io/>`_
* - 🚀 **Latest release with pre-trained models**
- `see the latest release on GitHub <https://github.com/coqui-ai/STT/releases/latest>`_
* - 🤝 **Contribution Guidelines**
- `CONTRIBUTING.rst <CONTRIBUTING.rst>`_
Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io <https://deepspeech.readthedocs.io/?badge=latest>`_.
For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
For contribution guidelines, see `CONTRIBUTING.rst <CONTRIBUTING.rst>`_.
For contact and support information, see `SUPPORT.rst <SUPPORT.rst>`_.

12
RELEASE.rst Normal file
View File

@ -0,0 +1,12 @@
Making a (new) release of the codebase
======================================
* Update version in VERSION file, commit
* Open PR, ensure all tests are passing properly
* Merge the PR
* Fetch the new master, tag it with (hopefully) the same version as in VERSION
* Push that to Github
* New build should be triggered and new packages should be made
* TaskCluster should schedule a merge build **including** a "DeepSpeech Packages" task

View File

@ -1,95 +0,0 @@
# General
This is the 1.0.0 release for Coqui STT, the deep learning toolkit for speech-to-text. In accordance with [semantic versioning](https://semver.org/), this version is not completely backwards compatible with previous versions. The compatibility guarantees of our semantic versioning cover the inference APIs: the C API and all the official language bindings: Python, Node.JS/ElectronJS and Android. You can get started today with Coqui STT 1.0.0 by following the steps in our [documentation](https://stt.readthedocs.io/).
This release includes pre-trained English models, available in the Coqui Model Zoo:
- [Coqui English STT v1.0.0-huge-vocab](https://coqui.ai/english/coqui/v1.0.0-huge-vocab)
- [Coqui English STT v1.0.0-yesno](https://coqui.ai/english/coqui/v1.0.0-yesno)
- [Coqui English STT v1.0.0-large-vocab](https://coqui.ai/english/coqui/v1.0.0-large-vocab)
- [Coqui English STT v1.0.0-digits](https://coqui.ai/english/coqui/v1.0.0-digits)
all under the Apache 2.0 license.
The acoustic models were trained on American English data with synthetic noise augmentation. The model achieves a 4.5% word error rate on the [LibriSpeech clean test corpus](http://www.openslr.org/12) and 13.6% word error rate on the [LibriSpeech other test corpus](http://www.openslr.org/12) with the largest release language model.
Note that the model currently performs best in low-noise environments with clear recordings. This does not mean the model cannot be used outside of these conditions, but that accuracy may be lower. Some users may need to further fine tune the model to meet their intended use-case.
We also include example audio files:
[audio-1.0.0.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/audio-1.0.0.tar.gz)
which can be used to test the engine, and checkpoint files for the English model:
[coqui-stt-1.0.0-checkpoint.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/coqui-stt-1.0.0-checkpoint.tar.gz)
which are under the Apache 2.0 license and can be used as the basis for further fine-tuning. Finally this release also includes a source code tarball:
[v1.0.0.tar.gz](https://github.com/coqui-ai/STT/archive/v1.0.0.tar.gz)
Under the [MPL-2.0 license](https://www.mozilla.org/en-US/MPL/2.0/). Note that this tarball is for archival purposes only since GitHub does not include submodules in the automatic tarballs. For usage and development with the source code, clone the repository using Git, following our [documentation](https://stt.readthedocs.io/).
# Notable changes
- Removed support for protocol buffer input in native client and consolidated all packages under a single "STT" name accepting TFLite inputs
- Added programmatic interface to training code and example Jupyter Notebooks, including how to train with Common Voice data
- Added transparent handling of mixed sample rates and stereo audio in training inputs
- Moved CI setup to GitHub Actions, making code contributions easier to test
- Added configuration management via Coqpit, providing a more flexible config interface that's compatible with Coqui TTS
- Handle Opus audio files transparently in training inputs
- Added support for automatic dataset subset splitting
- Added support for automatic alphabet generation and loading
- Started publishing the training code CI for a faster notebook setup
- Refactor training code into self-contained modules and deprecate train.py as universal entry point for training
# Training Regimen + Hyperparameters for fine-tuning
The hyperparameters used to train the model are useful for fine tuning. Thus, we document them here along with the training regimen, hardware used (a server with 8 NVIDIA A100 GPUs each with 40GB of VRAM), along with the full training hyperparameters. The full training configuration in JSON format is available [here](https://gist.github.com/reuben/6ced6a8b41e3d0849dafb7cae301e905).
The datasets used were:
- Common Voice 7.0 (with custom train/dev/test splits)
- Multilingual LibriSpeech (English, Opus)
- LibriSpeech
The optimal `lm_alpha` and `lm_beta` values with respect to the Common Voice 7.0 (custom Coqui splits) and a large vocabulary language model:
- lm_alpha: 0.5891777425167632
- lm_beta: 0.6619145283338659
# Documentation
Documentation is available on [stt.readthedocs.io](https://stt.readthedocs.io/).
# Contact/Getting Help
1. [GitHub Discussions](https://github.com/coqui-ai/STT/discussions/) - best place to ask questions, get support, and discuss anything related to 🐸STT with other users.
3. [Gitter](https://gitter.im/coqui-ai/) - You can also join our Gitter chat.
4. [Issues](https://github.com/coqui-ai/STT/issues) - If you have discussed a problem and identified a bug in 🐸STT, or if you have a feature request, please open an issue in our repo. Please make sure you search for an already existing issue beforehand!
# Contributors to 1.0.0 release
- Alexandre Lissy
- Anon-Artist
- Anton Yaroshenko
- Catalin Voss
- CatalinVoss
- dag7dev
- Dustin Zubke
- Eren Gölge
- Erik Ziegler
- Francis Tyers
- Ideefixze
- Ilnar Salimzianov
- imrahul3610
- Jeremiah Rose
- Josh Meyer
- Kathy Reid
- Kelly Davis
- Kenneth Heafield
- NanoNabla
- Neil Stoker
- Reuben Morais
- zaptrem
Wed also like to thank all the members of our [Gitter chat room](https://gitter.im/coqui-ai/STT) who have been helping to shape this release!

12
SUPPORT.rst Normal file
View File

@ -0,0 +1,12 @@
.. _support:
Contact/Getting Help
====================
There are several ways to contact us or to get help:
#. `Discourse Forums <https://discourse.mozilla.org/c/deep-speech>`_ - The `Deep Speech category on Discourse <https://discourse.mozilla.org/c/deep-speech>`_ is the first place to look. Search for keywords related to your question or problem to see if someone else has run into it already. If you can't find anything relevant there, search on our `issue tracker <https://github.com/mozilla/deepspeech/issues>`_ to see if there is an existing issue about your problem.
#. `Matrix chat <https://chat.mozilla.org/#/room/#machinelearning:mozilla.org>`_ - If your question is not addressed by either the `FAQ <https://github.com/mozilla/DeepSpeech/wiki#frequently-asked-questions>`_ or `Discourse Forums <https://discourse.mozilla.org/c/deep-speech>`_\ , you can contact us on the ``#machinelearning`` channel on `Mozilla Matrix <https://chat.mozilla.org/#/room/#machinelearning:mozilla.org>`_\ ; people there can try to answer/help
#. `Create a new issue <https://github.com/mozilla/deepspeech/issues>`_ - Finally, if you have a bug report or a feature request that isn't already covered by an existing issue, please open an issue in our repo and fill the appropriate information on your hardware and software setup.

View File

@ -1 +1 @@
training/coqui_stt_training/VERSION
training/deepspeech_training/VERSION

View File

@ -9,23 +9,23 @@ index c7aa4cb63..e084bc27c 100644
+import java.io.PrintWriter;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
@@ -73,6 +74,8 @@ public final class FileWriteAction extends AbstractFileWriteAction {
*/
private final CharSequence fileContents;
+ private final Artifact output;
+
/** Minimum length (in chars) for content to be eligible for compression. */
private static final int COMPRESS_CHARS_THRESHOLD = 256;
@@ -90,6 +93,7 @@ public final class FileWriteAction extends AbstractFileWriteAction {
fileContents = new CompressedString((String) fileContents);
}
this.fileContents = fileContents;
+ this.output = output;
}
/**
@@ -230,11 +234,32 @@ public final class FileWriteAction extends AbstractFileWriteAction {
*/
@ -59,7 +59,7 @@ index c7aa4cb63..e084bc27c 100644
+ computeKeyDebugWriter.close();
+ return rv;
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java
index 580788160..26883eb92 100644
@ -74,9 +74,9 @@ index 580788160..26883eb92 100644
import java.util.Collections;
import java.util.LinkedHashMap;
@@ -91,6 +92,9 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie
private final CommandLine argv;
+ private final Iterable<Artifact> inputs;
+ private final Iterable<Artifact> outputs;
+
@ -91,10 +91,10 @@ index 580788160..26883eb92 100644
+ this.inputs = inputs;
+ this.outputs = outputs;
}
@Override
@@ -312,23 +319,89 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie
@Override
protected String computeKey() {
+ boolean genruleSetup = String.valueOf(Iterables.get(inputs, 0).getExecPath()).contains("genrule/genrule-setup.sh");
@ -182,14 +182,14 @@ index 580788160..26883eb92 100644
+ }
+ return rv;
}
@Override
diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
index 3559fffde..3ba39617c 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
@@ -1111,10 +1111,30 @@ public class CppCompileAction extends AbstractAction
@Override
public String computeKey() {
+ // ".ckd" Compute Key Debug
@ -216,7 +216,7 @@ index 3559fffde..3ba39617c 100644
+ for (Map.Entry<String, String> entry : executionInfo.entrySet()) {
+ computeKeyDebugWriter.println("EXECINFO: " + entry.getKey() + "=" + entry.getValue());
+ }
// For the argv part of the cache key, ignore all compiler flags that explicitly denote module
// file (.pcm) inputs. Depending on input discovery, some of the unused ones are removed from
@@ -1124,6 +1144,9 @@ public class CppCompileAction extends AbstractAction
@ -226,7 +226,7 @@ index 3559fffde..3ba39617c 100644
+ for (String input : compileCommandLine.getArgv(getInternalOutputFile(), null)) {
+ computeKeyDebugWriter.println("COMMAND: " + input);
+ }
/*
* getArgv() above captures all changes which affect the compilation
@@ -1133,19 +1156,31 @@ public class CppCompileAction extends AbstractAction
@ -260,5 +260,5 @@ index 3559fffde..3ba39617c 100644
+ computeKeyDebugWriter.close();
+ return rv;
}
@Override

View File

@ -2,12 +2,11 @@
"""
Tool for comparing two wav samples
"""
import argparse
import sys
import argparse
import numpy as np
from coqui_stt_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
from coqui_stt_training.util.sample_collections import load_sample
from deepspeech_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
from deepspeech_training.util.sample_collections import load_sample
def fail(message):
@ -19,29 +18,15 @@ def compare_samples():
sample1 = load_sample(CLI_ARGS.sample1).unpack()
sample2 = load_sample(CLI_ARGS.sample2).unpack()
if sample1.audio_format != sample2.audio_format:
fail(
"Samples differ on: audio-format ({} and {})".format(
sample1.audio_format, sample2.audio_format
)
)
if abs(sample1.duration - sample2.duration) > 0.001:
fail(
"Samples differ on: duration ({} and {})".format(
sample1.duration, sample2.duration
)
)
fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
if sample1.duration != sample2.duration:
fail('Samples differ on: duration ({} and {})'.format(sample1.duration, sample2.duration))
sample1.change_audio_type(AUDIO_TYPE_NP)
sample2.change_audio_type(AUDIO_TYPE_NP)
samples = [sample1, sample2]
largest = np.argmax([sample1.audio.shape[0], sample2.audio.shape[0]])
smallest = (largest + 1) % 2
samples[largest].audio = samples[largest].audio[: len(samples[smallest].audio)]
audio_diff = samples[largest].audio - samples[smallest].audio
audio_diff = sample1.audio - sample2.audio
diff_dbfs = mean_dbfs(audio_diff)
differ_msg = "Samples differ on: sample data ({:0.2f} dB difference) ".format(
diff_dbfs
)
equal_msg = "Samples are considered equal ({:0.2f} dB difference)".format(diff_dbfs)
differ_msg = 'Samples differ on: sample data ({:0.2f} dB difference) '.format(diff_dbfs)
equal_msg = 'Samples are considered equal ({:0.2f} dB difference)'.format(diff_dbfs)
if CLI_ARGS.if_differ:
if diff_dbfs <= CLI_ARGS.threshold:
fail(equal_msg)
@ -60,17 +45,13 @@ def handle_args():
)
parser.add_argument("sample1", help="Filename of sample 1 to compare")
parser.add_argument("sample2", help="Filename of sample 2 to compare")
parser.add_argument(
"--threshold",
type=float,
default=-60.0,
help="dB of sample deltas above which they are considered different",
)
parser.add_argument("--threshold", type=float, default=-60.0,
help="dB of sample deltas above which they are considered different")
parser.add_argument(
"--if-differ",
action="store_true",
help="If to succeed and return status code 0 on different signals and fail on equal ones (inverse check)."
"This will still fail on different formats or durations.",
"This will still fail on different formats or durations.",
)
parser.add_argument(
"--no-success-output",

View File

@ -1,136 +1,121 @@
#!/usr/bin/env python
"""
'''
Tool for building a combined SDB or CSV sample-set from other sets
Use 'python3 data_set_tool.py -h' for help
"""
import argparse
'''
import sys
import argparse
import progressbar
from pathlib import Path
import progressbar
from coqui_stt_training.util.audio import (
AUDIO_TYPE_OPUS,
from deepspeech_training.util.audio import (
AUDIO_TYPE_PCM,
AUDIO_TYPE_OPUS,
AUDIO_TYPE_WAV,
change_audio_types,
)
from coqui_stt_training.util.augmentations import (
SampleAugmentation,
apply_sample_augmentations,
parse_augmentations,
)
from coqui_stt_training.util.downloader import SIMPLE_BAR
from coqui_stt_training.util.sample_collections import (
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.sample_collections import (
CSVWriter,
DirectSDBWriter,
TarWriter,
samples_from_sources,
)
from deepspeech_training.util.augmentations import (
parse_augmentations,
apply_sample_augmentations,
SampleAugmentation
)
AUDIO_TYPE_LOOKUP = {"wav": AUDIO_TYPE_WAV, "opus": AUDIO_TYPE_OPUS}
AUDIO_TYPE_LOOKUP = {'wav': AUDIO_TYPE_WAV, 'opus': AUDIO_TYPE_OPUS}
def build_data_set():
audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type]
augmentations = parse_augmentations(CLI_ARGS.augment)
print(f"Parsed augmentations from flags: {augmentations}")
if any(not isinstance(a, SampleAugmentation) for a in augmentations):
print(
"Warning: Some of the specified augmentations will not get applied, as this tool only supports "
"overlay, codec, reverb, resample and volume."
)
print('Warning: Some of the specified augmentations will not get applied, as this tool only supports '
'overlay, codec, reverb, resample and volume.')
extension = Path(CLI_ARGS.target).suffix.lower()
labeled = not CLI_ARGS.unlabeled
if extension == ".csv":
writer = CSVWriter(
CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled
)
elif extension == ".sdb":
writer = DirectSDBWriter(
CLI_ARGS.target, audio_type=audio_type, labeled=labeled
)
elif extension == ".tar":
writer = TarWriter(
CLI_ARGS.target, labeled=labeled, gz=False, include=CLI_ARGS.include
)
elif extension == ".tgz" or CLI_ARGS.target.lower().endswith(".tar.gz"):
writer = TarWriter(
CLI_ARGS.target, labeled=labeled, gz=True, include=CLI_ARGS.include
)
if extension == '.csv':
writer = CSVWriter(CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled)
elif extension == '.sdb':
writer = DirectSDBWriter(CLI_ARGS.target, audio_type=audio_type, labeled=labeled)
elif extension == '.tar':
writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=False, include=CLI_ARGS.include)
elif extension == '.tgz' or CLI_ARGS.target.lower().endswith('.tar.gz'):
writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=True, include=CLI_ARGS.include)
else:
print(
"Unknown extension of target file - has to be either .csv, .sdb, .tar, .tar.gz or .tgz"
)
print('Unknown extension of target file - has to be either .csv, .sdb, .tar, .tar.gz or .tgz')
sys.exit(1)
with writer:
samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled)
num_samples = len(samples)
if augmentations:
samples = apply_sample_augmentations(
samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations
)
samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations)
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
for sample in bar(
change_audio_types(
for sample in bar(change_audio_types(
samples,
audio_type=audio_type,
bitrate=CLI_ARGS.bitrate,
processes=CLI_ARGS.workers,
)
):
processes=CLI_ARGS.workers)):
writer.add(sample)
def handle_args():
parser = argparse.ArgumentParser(
description="Tool for building a combined SDB or CSV sample-set from other sets"
description='Tool for building a combined SDB or CSV sample-set from other sets'
)
parser.add_argument(
"sources",
nargs="+",
help="Source CSV and/or SDB files - "
"Note: For getting a correctly ordered target set, source SDBs have to have their samples "
"already ordered from shortest to longest.",
'sources',
nargs='+',
help='Source CSV and/or SDB files - '
'Note: For getting a correctly ordered target set, source SDBs have to have their samples '
'already ordered from shortest to longest.',
)
parser.add_argument("target", help="SDB, CSV or TAR(.gz) file to create")
parser.add_argument(
"--audio-type",
default="opus",
'target',
help='SDB, CSV or TAR(.gz) file to create'
)
parser.add_argument(
'--audio-type',
default='opus',
choices=AUDIO_TYPE_LOOKUP.keys(),
help="Audio representation inside target SDB",
help='Audio representation inside target SDB',
)
parser.add_argument(
"--bitrate",
'--bitrate',
type=int,
help="Bitrate for lossy compressed SDB samples like in case of --audio-type opus",
help='Bitrate for lossy compressed SDB samples like in case of --audio-type opus',
)
parser.add_argument(
"--workers", type=int, default=None, help="Number of encoding SDB workers"
'--workers', type=int, default=None, help='Number of encoding SDB workers'
)
parser.add_argument(
"--unlabeled",
action="store_true",
help="If to build an data-set with unlabeled (audio only) samples - "
"typically used for building noise augmentation corpora",
'--unlabeled',
action='store_true',
help='If to build an data-set with unlabeled (audio only) samples - '
'typically used for building noise augmentation corpora',
)
parser.add_argument(
"--absolute-paths",
action="store_true",
help="If to reference samples by their absolute paths when writing CSV files",
'--absolute-paths',
action='store_true',
help='If to reference samples by their absolute paths when writing CSV files',
)
parser.add_argument(
"--augment",
action="append",
help="Add an augmentation operation",
'--augment',
action='append',
help='Add an augmentation operation',
)
parser.add_argument(
"--include",
action="append",
help="Adds a file to the root directory of .tar(.gz) targets",
'--include',
action='append',
help='Adds a file to the root directory of .tar(.gz) targets',
)
return parser.parse_args()
if __name__ == "__main__":
if __name__ == '__main__':
CLI_ARGS = handle_args()
build_data_set()

View File

@ -4,7 +4,8 @@ import os
import tarfile
import pandas
from coqui_stt_training.util.importers import get_importers_parser
from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]

View File

@ -4,7 +4,8 @@ import os
import tarfile
import pandas
from coqui_stt_training.util.importers import get_importers_parser
from deepspeech_training.util.importers import get_importers_parser
COLUMNNAMES = ["wav_filename", "wav_filesize", "transcript"]

View File

@ -5,21 +5,21 @@ Ministère de l'Économie, des Finances et de la Relance
"""
import csv
import decimal
import hashlib
import math
import os
import re
import subprocess
import sys
import unicodedata
import xml.etree.ElementTree as ET
import os
import progressbar
import subprocess
import zipfile
from glob import glob
from multiprocessing import Pool
import progressbar
import hashlib
import decimal
import math
import unicodedata
import re
import sox
import xml.etree.ElementTree as ET
try:
from num2words import num2words
@ -27,19 +27,19 @@ except ImportError as ex:
print("pip install num2words")
sys.exit(1)
import requests
import json
import requests
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.helpers import secs_to_hours
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.helpers import secs_to_hours
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_imported_samples,
get_validate_label,
print_import_report,
)
from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -50,187 +50,58 @@ MIN_SECS = 0.85
DATASET_RELEASE_CSV = "https://data.economie.gouv.fr/explore/dataset/transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020/download/?format=csv&timezone=Europe/Berlin&lang=fr&use_labels_for_header=true&csv_separator=%3B"
DATASET_RELEASE_SHA = [
(
"863d39a06a388c6491c6ff2f6450b151f38f1b57",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001",
),
(
"2f3a0305aa04c61220bb00b5a4e553e45dbf12e1",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002",
),
(
"5e55e9f1f844097349188ac875947e5a3d7fe9f1",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003",
),
(
"8bf54842cf07948ca5915e27a8bd5fa5139c06ae",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004",
),
(
"c8963504aadc015ac48f9af80058a0bb3440b94f",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005",
),
(
"d95e225e908621d83ce4e9795fd108d9d310e244",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006",
),
(
"de6ed9c2b0ee80ca879aae8ba7923cc93217d811",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007",
),
(
"234283c47dacfcd4450d836c52c25f3e807fc5f2",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008",
),
(
"4e6b67a688639bb72f8cd81782eaba604a8d32a6",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009",
),
(
"4165a51389777c8af8e6253d87bdacb877e8b3b0",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010",
),
(
"34322e7009780d97ef5bd02bf2f2c7a31f00baff",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011",
),
(
"48c5be3b2ca9d6108d525da6a03e91d93a95dbac",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012",
),
(
"87573172f506a189c2ebc633856fe11a2e9cd213",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013",
),
(
"6ab2c9e508e9278d5129f023e018725c4a7c69e8",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014",
),
(
"4f84df831ef46dce5d3ab3e21817687a2d8c12d0",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015",
),
(
"e69bfb079885c299cb81080ef88b1b8b57158aa6",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016",
),
(
"5f764ba788ee273981cf211b242c29b49ca22c5e",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017",
),
(
"b6aa81a959525363223494830c1e7307d4c4bae6",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018",
),
(
"91ddcf43c7bf113a6f2528b857c7ec22a50a148a",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019",
),
(
"fa1b29273dd77b9a7494983a2f9ae52654b931d7",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020",
),
(
"1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021",
),
(
"ce6420d5d0b6b5135ba559f83e1a82d4d615c470",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022",
),
(
"d0976ed292ac24fcf1590d1ea195077c74b05471",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023",
),
(
"ec746cd6af066f62d9bf8d3b2f89174783ff4e3c",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024",
),
(
"570d9e1e84178e32fd867171d4b3aaecda1fd4fb",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025",
),
(
"c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026",
),
(
"08406a51146d88e208704ce058c060a1e44efa50",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027",
),
(
"199aedad733a78ea1e7d47def9c71c6fd5795e02",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028",
),
(
"db856a068f92fb4f01f410bba42c7271de0f231a",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029",
),
(
"e3c0135f16c6c9d25a09dcb4f99a685438a84740",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030",
),
(
"e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031",
),
(
"be5e80cbc49b59b31ae33c30576ef0e1a162d84e",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032",
),
(
"501df58e3ff55fcfd75b93dab57566dc536948b8",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033",
),
(
"1a114875811a8cdcb8d85a9f6dbee78be3e05131",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034",
),
(
"465d824e7ee46448369182c0c28646d155a2249b",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035",
),
(
"37f341b1b266d143eb73138c31cfff3201b9d619",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036",
),
(
"9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037",
),
(
"54886755630cb080a53098cb1b6c951c6714a143",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038",
),
(
"4b7cbb0154697be795034f7a49712e882a97197a",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039",
),
(
"c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040",
),
("863d39a06a388c6491c6ff2f6450b151f38f1b57", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001"),
("2f3a0305aa04c61220bb00b5a4e553e45dbf12e1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002"),
("5e55e9f1f844097349188ac875947e5a3d7fe9f1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003"),
("8bf54842cf07948ca5915e27a8bd5fa5139c06ae", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004"),
("c8963504aadc015ac48f9af80058a0bb3440b94f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005"),
("d95e225e908621d83ce4e9795fd108d9d310e244", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006"),
("de6ed9c2b0ee80ca879aae8ba7923cc93217d811", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007"),
("234283c47dacfcd4450d836c52c25f3e807fc5f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008"),
("4e6b67a688639bb72f8cd81782eaba604a8d32a6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009"),
("4165a51389777c8af8e6253d87bdacb877e8b3b0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010"),
("34322e7009780d97ef5bd02bf2f2c7a31f00baff", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011"),
("48c5be3b2ca9d6108d525da6a03e91d93a95dbac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012"),
("87573172f506a189c2ebc633856fe11a2e9cd213", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013"),
("6ab2c9e508e9278d5129f023e018725c4a7c69e8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014"),
("4f84df831ef46dce5d3ab3e21817687a2d8c12d0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015"),
("e69bfb079885c299cb81080ef88b1b8b57158aa6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016"),
("5f764ba788ee273981cf211b242c29b49ca22c5e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017"),
("b6aa81a959525363223494830c1e7307d4c4bae6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018"),
("91ddcf43c7bf113a6f2528b857c7ec22a50a148a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019"),
("fa1b29273dd77b9a7494983a2f9ae52654b931d7", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020"),
("1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021"),
("ce6420d5d0b6b5135ba559f83e1a82d4d615c470", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022"),
("d0976ed292ac24fcf1590d1ea195077c74b05471", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023"),
("ec746cd6af066f62d9bf8d3b2f89174783ff4e3c", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024"),
("570d9e1e84178e32fd867171d4b3aaecda1fd4fb", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025"),
("c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026"),
("08406a51146d88e208704ce058c060a1e44efa50", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027"),
("199aedad733a78ea1e7d47def9c71c6fd5795e02", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028"),
("db856a068f92fb4f01f410bba42c7271de0f231a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029"),
("e3c0135f16c6c9d25a09dcb4f99a685438a84740", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030"),
("e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031"),
("be5e80cbc49b59b31ae33c30576ef0e1a162d84e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032"),
("501df58e3ff55fcfd75b93dab57566dc536948b8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033"),
("1a114875811a8cdcb8d85a9f6dbee78be3e05131", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034"),
("465d824e7ee46448369182c0c28646d155a2249b", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035"),
("37f341b1b266d143eb73138c31cfff3201b9d619", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036"),
("9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037"),
("54886755630cb080a53098cb1b6c951c6714a143", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038"),
("4b7cbb0154697be795034f7a49712e882a97197a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039"),
("c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040"),
]
def _download_and_preprocess_data(csv_url, target_dir):
dataset_sources = os.path.join(
target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt"
)
dataset_sources = os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt")
if os.path.exists(dataset_sources):
return dataset_sources
# Making path absolute
target_dir = os.path.abspath(target_dir)
csv_ref = requests.get(csv_url).text.split("\r\n")[1:-1]
csv_ref = requests.get(csv_url).text.split('\r\n')[1:-1]
for part in csv_ref:
part_filename = (
requests.head(part)
.headers.get("Content-Disposition")
.split(" ")[1]
.split("=")[1]
.replace('"', "")
)
part_filename = requests.head(part).headers.get("Content-Disposition").split(" ")[1].split("=")[1].replace('"', "")
if not os.path.exists(os.path.join(target_dir, part_filename)):
part_path = maybe_download(part_filename, target_dir, part)
@ -255,18 +126,10 @@ def _download_and_preprocess_data(csv_url, target_dir):
assert csum == sha1
# Conditionally extract data
_maybe_extract(
target_dir,
"transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020",
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip",
"transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip",
)
_maybe_extract(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip", "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip")
# Produce source text for extraction / conversion
return _maybe_create_sources(
os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020")
)
return _maybe_create_sources(os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020"))
def _maybe_extract(target_dir, extracted_data, archive, final):
# If target_dir/extracted_data does not exist, extract archive in target_dir
@ -284,10 +147,7 @@ def _maybe_extract(target_dir, extracted_data, archive, final):
subprocess.check_call(cmdline, shell=True, cwd=target_dir)
assert os.path.exists(archive_path)
print(
'No directory "%s" - extracting archive %s ...'
% (extracted_path, archive_path)
)
print('No directory "%s" - extracting archive %s ...' % (extracted_path, archive_path))
with zipfile.ZipFile(archive_path) as zip_f:
zip_f.extractall(extracted_path)
@ -296,7 +156,6 @@ def _maybe_extract(target_dir, extracted_data, archive, final):
else:
print('Found directory "%s" - not extracting it from archive.' % extracted_path)
def _maybe_create_sources(dir):
dataset_sources = os.path.join(dir, "data.txt")
MP3 = glob(os.path.join(dir, "**", "*.mp3"))
@ -309,8 +168,8 @@ def _maybe_create_sources(dir):
for f_xml in XML:
b_mp3 = os.path.splitext(os.path.basename(f_mp3))[0]
b_xml = os.path.splitext(os.path.basename(f_xml))[0]
a_mp3 = b_mp3.split("_")
a_xml = b_xml.split("_")
a_mp3 = b_mp3.split('_')
a_xml = b_xml.split('_')
score = 0
date_mp3 = a_mp3[0]
date_xml = a_xml[0]
@ -319,7 +178,7 @@ def _maybe_create_sources(dir):
continue
for i in range(min(len(a_mp3), len(a_xml))):
if a_mp3[i] == a_xml[i]:
if (a_mp3[i] == a_xml[i]):
score += 1
if score >= 1:
@ -328,7 +187,7 @@ def _maybe_create_sources(dir):
# sort by score
MP3_XML_Scores.sort(key=lambda x: x[2], reverse=True)
for s_mp3, s_xml, score in MP3_XML_Scores:
# print(s_mp3, s_xml, score)
#print(s_mp3, s_xml, score)
if score not in MP3_XML_Fin:
MP3_XML_Fin[score] = {}
@ -349,14 +208,13 @@ def _maybe_create_sources(dir):
if os.path.getsize(mp3) > 0 and os.path.getsize(xml) > 0:
mp3 = os.path.relpath(mp3, dir)
xml = os.path.relpath(xml, dir)
ds.write("{},{},{:0.2e}\n".format(xml, mp3, 2.5e-4))
ds.write('{},{},{:0.2e}\n'.format(xml, mp3, 2.5e-4))
else:
print("Empty file {} or {}".format(mp3, xml), file=sys.stderr)
print("Missing XML pairs:", MP3, file=sys.stderr)
return dataset_sources
def maybe_normalize_for_digits(label):
# first, try to identify numbers like "50 000", "260 000"
if " " in label:
@ -376,44 +234,30 @@ def maybe_normalize_for_digits(label):
date_or_time = re.compile(r"(\d{1,2}):(\d{2}):?(\d{2})?")
maybe_date_or_time = date_or_time.findall(s)
if len(maybe_date_or_time) > 0:
maybe_hours = maybe_date_or_time[0][0]
maybe_hours = maybe_date_or_time[0][0]
maybe_minutes = maybe_date_or_time[0][1]
maybe_seconds = maybe_date_or_time[0][2]
if len(maybe_seconds) > 0:
label = label.replace(
"{}:{}:{}".format(
maybe_hours, maybe_minutes, maybe_seconds
),
"{} heures {} minutes et {} secondes".format(
maybe_hours, maybe_minutes, maybe_seconds
),
)
label = label.replace("{}:{}:{}".format(maybe_hours, maybe_minutes, maybe_seconds), "{} heures {} minutes et {} secondes".format(maybe_hours, maybe_minutes, maybe_seconds))
else:
label = label.replace(
"{}:{}".format(maybe_hours, maybe_minutes),
"{} heures et {} minutes".format(
maybe_hours, maybe_minutes
),
)
label = label.replace("{}:{}".format(maybe_hours, maybe_minutes), "{} heures et {} minutes".format(maybe_hours, maybe_minutes))
new_label = []
# pylint: disable=too-many-nested-blocks
for s in label.split(" "):
if any(i.isdigit() for i in s):
s = s.replace(",", ".") # num2words requires "." for floats
s = s.replace('"', "") # clean some data, num2words would choke on 1959"
s = s.replace(",", ".") # num2words requires "." for floats
s = s.replace("\"", "") # clean some data, num2words would choke on 1959"
last_c = s[-1]
if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?"
if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?"
s = s[:-1]
if any(
i.isalpha() for i in s
): # So we have any(isdigit()) **and** any(sialpha), like "3D"
if any(i.isalpha() for i in s): # So we have any(isdigit()) **and** any(sialpha), like "3D"
ns = []
for c in s:
nc = c
if c.isdigit(): # convert "3" to "trois-"
if c.isdigit(): # convert "3" to "trois-"
try:
nc = num2words(c, lang="fr") + "-"
except decimal.InvalidOperation as ex:
@ -430,36 +274,22 @@ def maybe_normalize_for_digits(label):
new_label.append(s)
return " ".join(new_label)
def maybe_normalize_for_specials_chars(label):
label = label.replace("%", "pourcents")
label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022"
label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80"
label = label.replace("+", " plus ") # clean + and make it speakable
label = label.replace("", " euros ") # clean euro symbol and make it speakable
label = label.replace(
"., ", ", "
) # clean some strange "4.0., " (20181017_Innovation.xml)
label = label.replace(
"°", " degré "
) # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml)
label = label.replace("...", ".") # remove ellipsis
label = label.replace("..", ".") # remove broken ellipsis
label = label.replace(
"", "mètre-carrés"
) # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml
label = label.replace(
"[end]", ""
) # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml
label = label.replace(
u"\xB8c", " ç"
) # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml
label = label.replace(
"C0²", "CO 2"
) # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml
label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022"
label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80"
label = label.replace("+", " plus ") # clean + and make it speakable
label = label.replace("", " euros ") # clean euro symbol and make it speakable
label = label.replace("., ", ", ") # clean some strange "4.0., " (20181017_Innovation.xml)
label = label.replace("°", " degré ") # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml)
label = label.replace("...", ".") # remove ellipsis
label = label.replace("..", ".") # remove broken ellipsis
label = label.replace("", "mètre-carrés") # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml
label = label.replace("[end]", "") # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml
label = label.replace(u'\xB8c', " ç") # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml
label = label.replace("C0²", "CO 2") # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml
return label
def maybe_normalize_for_anglicisms(label):
label = label.replace("B2B", "B to B")
label = label.replace("B2C", "B to C")
@ -467,14 +297,12 @@ def maybe_normalize_for_anglicisms(label):
label = label.replace("@", "at ")
return label
def maybe_normalize(label):
label = maybe_normalize_for_specials_chars(label)
label = maybe_normalize_for_anglicisms(label)
label = maybe_normalize_for_digits(label)
return label
def one_sample(sample):
file_size = -1
frames = 0
@ -488,33 +316,14 @@ def one_sample(sample):
label = label_filter_fun(sample[5])
sample_id = sample[6]
_wav_filename = os.path.basename(
audio_source.replace(".wav", "_{:06}.wav".format(sample_id))
)
_wav_filename = os.path.basename(audio_source.replace(".wav", "_{:06}.wav".format(sample_id)))
wav_fullname = os.path.join(target_dir, dataset_basename, _wav_filename)
if not os.path.exists(wav_fullname):
subprocess.check_output(
[
"ffmpeg",
"-i",
audio_source,
"-ss",
str(start_time),
"-t",
str(duration),
"-c",
"copy",
wav_fullname,
],
stdin=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
)
subprocess.check_output(["ffmpeg", "-i", audio_source, "-ss", str(start_time), "-t", str(duration), "-c", "copy", wav_fullname], stdin=subprocess.DEVNULL, stderr=subprocess.STDOUT)
file_size = os.path.getsize(wav_fullname)
frames = int(
subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT)
)
frames = int(subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT))
_counter = get_counter()
_rows = []
@ -525,13 +334,13 @@ def one_sample(sample):
elif label is None:
# Excluding samples that failed on label validation
_counter["invalid_label"] += 1
elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)):
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
# Excluding samples that are too short to fit the transcript
_counter["too_short"] += 1
elif frames / SAMPLE_RATE < MIN_SECS:
elif frames/SAMPLE_RATE < MIN_SECS:
# Excluding samples that are too short
_counter["too_short"] += 1
elif frames / SAMPLE_RATE > MAX_SECS:
elif frames/SAMPLE_RATE > MAX_SECS:
# Excluding very long samples to keep a reasonable batch-size
_counter["too_long"] += 1
else:
@ -543,71 +352,56 @@ def one_sample(sample):
return (_counter, _rows)
def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1):
dataset_basename = os.path.splitext(os.path.split(xml_file)[1])[0]
wav_root = os.path.join(target_dir, dataset_basename)
if not os.path.exists(wav_root):
os.makedirs(wav_root)
source_frames = int(
subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT)
)
source_frames = int(subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT))
print("Source audio length: %s" % secs_to_hours(source_frames / SAMPLE_RATE))
# Get audiofile path and transcript for each sentence in tsv
samples = []
tree = ET.parse(xml_file)
root = tree.getroot()
seq_id = 0
this_time = 0.0
seq_id = 0
this_time = 0.0
this_duration = 0.0
prev_time = 0.0
prev_time = 0.0
prev_duration = 0.0
this_text = ""
this_text = ""
for child in root:
if child.tag == "row":
cur_time = float(child.attrib["timestamp"])
cur_time = float(child.attrib["timestamp"])
cur_duration = float(child.attrib["timedur"])
cur_text = child.text
cur_text = child.text
if this_time == 0.0:
this_time = cur_time
delta = cur_time - (prev_time + prev_duration)
delta = cur_time - (prev_time + prev_duration)
# rel_tol value is made from trial/error to try and compromise between:
# - cutting enough to skip missing words
# - not too short, not too long sentences
is_close = math.isclose(
cur_time, this_time + this_duration, rel_tol=rel_tol
)
is_short = (this_duration + cur_duration + delta) < MAX_SECS
is_close = math.isclose(cur_time, this_time + this_duration, rel_tol=rel_tol)
is_short = ((this_duration + cur_duration + delta) < MAX_SECS)
# when the previous element is close enough **and** this does not
# go over MAX_SECS, we append content
if is_close and is_short:
if (is_close and is_short):
this_duration += cur_duration + delta
this_text += cur_text
this_text += cur_text
else:
samples.append(
(
audio_source,
target_dir,
dataset_basename,
this_time,
this_duration,
this_text,
seq_id,
)
)
samples.append((audio_source, target_dir, dataset_basename, this_time, this_duration, this_text, seq_id))
this_time = cur_time
this_time = cur_time
this_duration = cur_duration
this_text = cur_text
this_text = cur_text
seq_id += 1
prev_time = cur_time
prev_time = cur_time
prev_duration = cur_duration
# Keep track of how many samples are good vs. problematic
@ -631,27 +425,21 @@ def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1):
assert len(_rows) == imported_samples
print_import_report(_counter, SAMPLE_RATE, MAX_SECS)
print(
"Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames) * 100)
)
print("Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames)*100))
print("")
return _counter, _rows
def _maybe_convert_wav(mp3_filename, _wav_filename):
if not os.path.exists(_wav_filename):
print("Converting {} to WAV file: {}".format(mp3_filename, _wav_filename))
transformer = sox.Transformer()
transformer.convert(
samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH
)
transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH)
try:
transformer.build(mp3_filename, _wav_filename)
except sox.core.SoxError:
pass
def write_general_csv(target_dir, _rows, _counter):
target_csv_template = os.path.join(target_dir, "ccpmf_{}.csv")
with open(target_csv_template.format("train"), "w") as train_csv_file: # 80%
@ -673,13 +461,7 @@ def write_general_csv(target_dir, _rows, _counter):
writer = dev_writer
else:
writer = train_writer
writer.writerow(
{
"wav_filename": item[0],
"wav_filesize": item[1],
"transcript": item[2],
}
)
writer.writerow({"wav_filename": item[0], "wav_filesize": item[1], "transcript": item[2]})
print("")
print("~~~~ FINAL STATISTICS ~~~~")
@ -687,21 +469,11 @@ def write_general_csv(target_dir, _rows, _counter):
print("~~~~ (FINAL STATISTICS) ~~~~")
print("")
if __name__ == "__main__":
PARSER = get_importers_parser(
description="Import XML from Conference Centre for Economics, France"
)
PARSER = get_importers_parser(description="Import XML from Conference Centre for Economics, France")
PARSER.add_argument("target_dir", help="Destination directory")
PARSER.add_argument(
"--filter_alphabet",
help="Exclude samples with characters not in provided alphabet",
)
PARSER.add_argument(
"--normalize",
action="store_true",
help="Converts diacritic characters to their base ones",
)
PARSER.add_argument("--filter_alphabet", help="Exclude samples with characters not in provided alphabet")
PARSER.add_argument("--normalize", action="store_true", help="Converts diacritic characters to their base ones")
PARAMS = PARSER.parse_args()
validate_label = get_validate_label(PARAMS)
@ -709,11 +481,9 @@ if __name__ == "__main__":
def label_filter_fun(label):
if PARAMS.normalize:
label = (
unicodedata.normalize("NFKD", label.strip())
.encode("ascii", "ignore")
label = unicodedata.normalize("NFKD", label.strip()) \
.encode("ascii", "ignore") \
.decode("ascii", "ignore")
)
label = maybe_normalize(label)
label = validate_label(label)
if ALPHABET and label:
@ -723,9 +493,7 @@ if __name__ == "__main__":
label = None
return label
dataset_sources = _download_and_preprocess_data(
csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir
)
dataset_sources = _download_and_preprocess_data(csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir)
sources_root_dir = os.path.dirname(dataset_sources)
all_counter = get_counter()
all_rows = []
@ -736,14 +504,9 @@ if __name__ == "__main__":
this_mp3 = os.path.join(sources_root_dir, d[1])
this_rel = float(d[2])
wav_filename = os.path.join(
sources_root_dir,
os.path.splitext(os.path.basename(this_mp3))[0] + ".wav",
)
wav_filename = os.path.join(sources_root_dir, os.path.splitext(os.path.basename(this_mp3))[0] + ".wav")
_maybe_convert_wav(this_mp3, wav_filename)
counter, rows = _maybe_import_data(
this_xml, wav_filename, sources_root_dir, this_rel
)
counter, rows = _maybe_import_data(this_xml, wav_filename, sources_root_dir, this_rel)
all_counter += counter
all_rows += rows

View File

@ -1,21 +1,22 @@
#!/usr/bin/env python
import csv
import os
import subprocess
import sys
import subprocess
import tarfile
from glob import glob
from multiprocessing import Pool
import progressbar
import sox
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
print_import_report,
)
from coqui_stt_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.importers import validate_label_eng as validate_label
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -34,7 +35,7 @@ def _download_and_preprocess_data(target_dir):
archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL)
# Conditionally extract common voice data
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
# Conditionally convert common voice CSV files and mp3 data to Coqui STT CSVs and wav
# Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav
_maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME)

View File

@ -3,7 +3,7 @@
Broadly speaking, this script takes the audio downloaded from Common Voice
for a certain language, in addition to the *.tsv files output by CorporaCreator,
and the script formats the data and transcripts to be in a state usable by
train.py
DeepSpeech.py
Use "python3 import_cv2.py -h" for help
"""
import csv
@ -14,15 +14,16 @@ from multiprocessing import Pool
import progressbar
import sox
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report,
)
from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -40,11 +41,7 @@ class LabelFilter:
def filter(self, label):
if self.normalize:
label = (
unicodedata.normalize("NFKD", label.strip())
.encode("ascii", "ignore")
.decode("ascii", "ignore")
)
label = unicodedata.normalize("NFKD", label.strip()).encode("ascii", "ignore").decode("ascii", "ignore")
label = self.validate_fun(label)
if self.alphabet and label and not self.alphabet.CanEncode(label):
label = None
@ -100,15 +97,7 @@ def one_sample(sample):
return (counter, rows)
def _maybe_convert_set(
dataset,
tsv_dir,
audio_dir,
filter_obj,
space_after_every_character=None,
rows=None,
exclude=None,
):
def _maybe_convert_set(dataset, tsv_dir, audio_dir, filter_obj, space_after_every_character=None, rows=None, exclude=None):
exclude_transcripts = set()
exclude_speakers = set()
if exclude is not None:
@ -127,13 +116,7 @@ def _maybe_convert_set(
with open(input_tsv, encoding="utf-8") as input_tsv_file:
reader = csv.DictReader(input_tsv_file, delimiter="\t")
for row in reader:
samples.append(
(
os.path.join(audio_dir, row["path"]),
row["sentence"],
row["client_id"],
)
)
samples.append((os.path.join(audio_dir, row["path"]), row["sentence"], row["client_id"]))
counter = get_counter()
num_samples = len(samples)
@ -141,9 +124,7 @@ def _maybe_convert_set(
print("Importing mp3 files...")
pool = Pool(initializer=init_worker, initargs=(PARAMS,))
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
for i, processed in enumerate(
pool.imap_unordered(one_sample, samples), start=1
):
for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1):
counter += processed[0]
rows += processed[1]
bar.update(i)
@ -157,9 +138,9 @@ def _maybe_convert_set(
print_import_report(counter, SAMPLE_RATE, MAX_SECS)
output_csv = os.path.join(os.path.abspath(audio_dir), dataset + ".csv")
print("Saving new Coqui STT-formatted CSV file to: ", output_csv)
print("Saving new DeepSpeech-formatted CSV file to: ", output_csv)
with open(output_csv, "w", encoding="utf-8", newline="") as output_csv_file:
print("Writing CSV file for train.py as: ", output_csv)
print("Writing CSV file for DeepSpeech.py as: ", output_csv)
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
writer.writeheader()
bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
@ -188,20 +169,12 @@ def _maybe_convert_set(
def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False):
exclude = []
for dataset in ["test", "dev", "train", "validated", "other"]:
set_samples = _maybe_convert_set(
dataset, tsv_dir, audio_dir, space_after_every_character
)
set_samples = _maybe_convert_set(dataset, tsv_dir, audio_dir, space_after_every_character)
if dataset in ["test", "dev"]:
exclude += set_samples
if dataset == "validated":
_maybe_convert_set(
"train-all",
tsv_dir,
audio_dir,
space_after_every_character,
rows=set_samples,
exclude=exclude,
)
_maybe_convert_set("train-all", tsv_dir, audio_dir, space_after_every_character,
rows=set_samples, exclude=exclude)
def _maybe_convert_wav(mp3_filename, wav_filename):
@ -239,9 +212,7 @@ def parse_args():
def main():
audio_dir = (
PARAMS.audio_dir if PARAMS.audio_dir else os.path.join(PARAMS.tsv_dir, "clips")
)
audio_dir = PARAMS.audio_dir if PARAMS.audio_dir else os.path.join(PARAMS.tsv_dir, "clips")
_preprocess_data(PARAMS.tsv_dir, audio_dir, PARAMS.space_after_every_character)

View File

@ -2,7 +2,6 @@
import codecs
import fnmatch
import os
import random
import subprocess
import sys
import unicodedata
@ -10,7 +9,8 @@ import unicodedata
import librosa
import pandas
import soundfile # <= Has an external dependency on libsndfile
from coqui_stt_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.importers import validate_label_eng as validate_label
# Prerequisite: Having the sph2pipe tool in your PATH:
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
@ -236,18 +236,14 @@ def _split_and_resample_wav(origAudio, start_time, stop_time, new_wav_file):
def _split_sets(filelist):
"""
randomply split the datasets into train, validation, and test sets where the size of the
validation and test sets are determined by the `get_sample_size` function.
"""
random.shuffle(filelist)
sample_size = get_sample_size(len(filelist))
# We initially split the entire set into 80% train and 20% test, then
# split the train set into 80% train and 20% validation.
train_beg = 0
train_end = len(filelist) - 2 * sample_size
train_end = int(0.8 * len(filelist))
dev_beg = train_end
dev_end = train_end + sample_size
dev_beg = int(0.8 * train_end)
dev_end = train_end
train_end = dev_beg
test_beg = dev_end
test_end = len(filelist)
@ -259,24 +255,5 @@ def _split_sets(filelist):
)
def get_sample_size(population_size):
"""calculates the sample size for a 99% confidence and 1% margin of error"""
margin_of_error = 0.01
fraction_picking = 0.50
z_score = 2.58 # Corresponds to confidence level 99%
numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
margin_of_error ** 2
)
sample_size = 0
for train_size in range(population_size, 0, -1):
denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
margin_of_error ** 2 * train_size
)
sample_size = int(numerator / denominator)
if 2 * sample_size + train_size <= population_size:
break
return sample_size
if __name__ == "__main__":
_download_and_preprocess_data(sys.argv[1])

View File

@ -5,7 +5,8 @@ import tarfile
import numpy as np
import pandas
from coqui_stt_training.util.importers import get_importers_parser
from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]

View File

@ -9,10 +9,11 @@ import urllib
from pathlib import Path
import pandas as pd
import swifter
from coqui_stt_training.util.importers import get_importers_parser, get_validate_label
from sox import Transformer
import swifter
from deepspeech_training.util.importers import get_importers_parser, get_validate_label
__version__ = "0.1.0"
_logger = logging.getLogger(__name__)

View File

@ -3,7 +3,8 @@ import os
import sys
import pandas
from coqui_stt_training.util.downloader import maybe_download
from deepspeech_training.util.downloader import maybe_download
def _download_and_preprocess_data(data_dir):

View File

@ -9,10 +9,11 @@ import unicodedata
import pandas
import progressbar
from coqui_stt_training.util.downloader import maybe_download
from sox import Transformer
from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
SAMPLE_RATE = 16000

View File

@ -11,15 +11,16 @@ from multiprocessing import Pool
import progressbar
import sox
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report,
)
from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -136,15 +137,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
pool.close()
pool.join()
with open(
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
) as train_csv_file: # 80%
with open(
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
) as dev_csv_file: # 10%
with open(
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
) as test_csv_file: # 10%
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
train_writer.writeheader()
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
@ -184,9 +179,7 @@ def _maybe_convert_sets(target_dir, extracted_data):
def _maybe_convert_wav(ogg_filename, wav_filename):
if not os.path.exists(wav_filename):
transformer = sox.Transformer()
transformer.convert(
samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH
)
transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH)
try:
transformer.build(ogg_filename, wav_filename)
except sox.core.SoxError as ex:

View File

@ -9,15 +9,16 @@ from glob import glob
from multiprocessing import Pool
import progressbar
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report,
)
from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -59,20 +60,9 @@ def one_sample(sample):
file_size = -1
frames = 0
if os.path.exists(wav_filename):
tmp_filename = os.path.splitext(wav_filename)[0] + ".tmp.wav"
tmp_filename = os.path.splitext(wav_filename)[0]+'.tmp.wav'
subprocess.check_call(
[
"sox",
wav_filename,
"-r",
str(SAMPLE_RATE),
"-c",
"1",
"-b",
"16",
tmp_filename,
],
stderr=subprocess.STDOUT,
['sox', wav_filename, '-r', str(SAMPLE_RATE), '-c', '1', '-b', '16', tmp_filename], stderr=subprocess.STDOUT
)
os.rename(tmp_filename, wav_filename)
file_size = os.path.getsize(wav_filename)
@ -148,15 +138,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
pool.close()
pool.join()
with open(
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
) as train_csv_file: # 80%
with open(
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
) as dev_csv_file: # 10%
with open(
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
) as test_csv_file: # 10%
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
train_writer.writeheader()
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)

View File

@ -5,7 +5,8 @@ import tarfile
import wave
import pandas
from coqui_stt_training.util.importers import get_importers_parser
from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]

View File

@ -1,99 +0,0 @@
#!/usr/bin/env python
import argparse
import ctypes
import os
from pathlib import Path
import pandas
from tqdm import tqdm
def read_ogg_opus_duration(ogg_file_path):
error = ctypes.c_int()
opusfile = pyogg.opus.op_open_file(
ogg_file_path.encode("utf-8"), ctypes.pointer(error)
)
if error.value != 0:
raise ValueError(
("Ogg/Opus file could not be read." "Error code: {}").format(error.value)
)
pcm_buffer_size = pyogg.opus.op_pcm_total(opusfile, -1)
channel_count = pyogg.opus.op_channel_count(opusfile, -1)
sample_rate = 48000 # opus files are always 48kHz
sample_width = 2 # always 16-bit
pyogg.opus.op_free(opusfile)
return pcm_buffer_size / sample_rate
def main(root_dir):
for subset in (
"train",
"dev",
"test",
):
print("Processing {} subset...".format(subset))
with open(Path(root_dir) / subset / "transcripts.txt") as fin:
subset_entries = []
for i, line in tqdm(enumerate(fin)):
audio_id, transcript = line.split("\t")
audio_id_parts = audio_id.split("_")
# e.g. 4800_10003_000000 -> train/audio/4800/10003/4800_10003_000000.opus
audio_path = (
Path(root_dir)
/ subset
/ "audio"
/ audio_id_parts[0]
/ audio_id_parts[1]
/ "{}.opus".format(audio_id)
)
audio_duration = read_ogg_opus_duration(audio_path)
# TODO: support other languages
transcript = (
transcript.strip()
.replace("-", " ")
.replace("ñ", "n")
.replace(".", "")
.translate(
{
ord(ch): None
for ch in (
"а",
"в",
"е",
"и",
"к",
"м",
"н",
"о",
"п",
"р",
"т",
"ы",
"я",
)
}
)
)
subset_entries.append(
(
audio_path.relative_to(root_dir),
audio_duration,
transcript.strip(),
)
)
df = pandas.DataFrame(
columns=["wav_filename", "wav_filesize", "transcript"],
data=subset_entries,
)
csv_name = Path(root_dir) / "{}.csv".format(subset)
df.to_csv(csv_name, index=False)
print("Wrote {}".format(csv_name))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("root_dir", help="Path to the mls_english_opus directory.")
args = parser.parse_args()
main(args.root_dir)

View File

@ -6,7 +6,8 @@ import tarfile
import numpy as np
import pandas
from coqui_stt_training.util.importers import get_importers_parser
from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]

View File

@ -8,15 +8,16 @@ from glob import glob
from multiprocessing import Pool
import progressbar
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report,
)
from ds_ctcdecoder import Alphabet
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
@ -156,15 +157,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
pool.close()
pool.join()
with open(
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
) as train_csv_file: # 80%
with open(
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
) as dev_csv_file: # 10%
with open(
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
) as test_csv_file: # 10%
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
train_writer.writeheader()
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)

View File

@ -1,11 +1,10 @@
#!/usr/bin/env python
# ensure that you have downloaded the LDC dataset LDC97S62 and tar exists in a folder e.g.
# ./data/swb/swb1_LDC97S62.tgz
# from the Coqui STT directory run with: ./bin/import_swb.py ./data/swb/
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
import codecs
import fnmatch
import os
import random
import subprocess
import sys
import tarfile
@ -16,7 +15,8 @@ import librosa
import pandas
import requests
import soundfile # <= Has an external dependency on libsndfile
from coqui_stt_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.importers import validate_label_eng as validate_label
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03
ARCHIVE_NAME = "switchboard_word_alignments.tar.gz"
@ -290,18 +290,14 @@ def _split_wav(origAudio, start_time, stop_time, new_wav_file):
def _split_sets(filelist):
"""
randomply split the datasets into train, validation, and test sets where the size of the
validation and test sets are determined by the `get_sample_size` function.
"""
random.shuffle(filelist)
sample_size = get_sample_size(len(filelist))
# We initially split the entire set into 80% train and 20% test, then
# split the train set into 80% train and 20% validation.
train_beg = 0
train_end = len(filelist) - 2 * sample_size
train_end = int(0.8 * len(filelist))
dev_beg = train_end
dev_end = train_end + sample_size
dev_beg = int(0.8 * train_end)
dev_end = train_end
train_end = dev_beg
test_beg = dev_end
test_end = len(filelist)
@ -313,25 +309,6 @@ def _split_sets(filelist):
)
def get_sample_size(population_size):
"""calculates the sample size for a 99% confidence and 1% margin of error"""
margin_of_error = 0.01
fraction_picking = 0.50
z_score = 2.58 # Corresponds to confidence level 99%
numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
margin_of_error ** 2
)
sample_size = 0
for train_size in range(population_size, 0, -1):
denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
margin_of_error ** 2 * train_size
)
sample_size = int(numerator / denominator)
if 2 * sample_size + train_size <= population_size:
break
return sample_size
def _read_data_set(
filelist,
thread_count,

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
"""
Downloads and prepares (parts of) the "Spoken Wikipedia Corpora" for train.py
Downloads and prepares (parts of) the "Spoken Wikipedia Corpora" for DeepSpeech.py
Use "python3 import_swc.py -h" for help
"""
@ -21,9 +21,10 @@ from multiprocessing.pool import ThreadPool
import progressbar
import sox
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import validate_label_eng as validate_label
from ds_ctcdecoder import Alphabet
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
SWC_ARCHIVE = "SWC_{language}.tar"
@ -172,6 +173,7 @@ def in_alphabet(alphabet, c):
return alphabet.CanEncode(c) if alphabet else True
ALPHABETS = {}
@ -200,16 +202,8 @@ def label_filter(label, language):
dont_normalize = DONT_NORMALIZE[language] if language in DONT_NORMALIZE else ""
alphabet = get_alphabet(language)
for c in label:
if (
CLI_ARGS.normalize
and c not in dont_normalize
and not in_alphabet(alphabet, c)
):
c = (
unicodedata.normalize("NFKD", c)
.encode("ascii", "ignore")
.decode("ascii", "ignore")
)
if CLI_ARGS.normalize and c not in dont_normalize and not in_alphabet(alphabet, c):
c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore")
for sc in c:
if not in_alphabet(alphabet, sc):
return None, "illegal character"

View File

@ -7,11 +7,12 @@ from glob import glob
from os import makedirs, path, remove, rmdir
import pandas
from coqui_stt_training.util.downloader import maybe_download
from coqui_stt_training.util.stm import parse_stm_file
from sox import Transformer
from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
from deepspeech_training.util.stm import parse_stm_file
def _download_and_preprocess_data(data_dir):
# Conditionally download data

214
bin/import_ts.py Executable file
View File

@ -0,0 +1,214 @@
#!/usr/bin/env python3
import csv
import os
import re
import subprocess
import zipfile
from multiprocessing import Pool
import progressbar
import sox
import unidecode
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report,
)
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
SAMPLE_RATE = 16000
MAX_SECS = 15
ARCHIVE_NAME = "2019-04-11_fr_FR"
ARCHIVE_DIR_NAME = "ts_" + ARCHIVE_NAME
ARCHIVE_URL = (
"https://deepspeech-storage-mirror.s3.fr-par.scw.cloud/" + ARCHIVE_NAME + ".zip"
)
def _download_and_preprocess_data(target_dir, english_compatible=False):
# Making path absolute
target_dir = os.path.abspath(target_dir)
# Conditionally download data
archive_path = maybe_download(
"ts_" + ARCHIVE_NAME + ".zip", target_dir, ARCHIVE_URL
)
# Conditionally extract archive data
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
# Conditionally convert TrainingSpeech data to DeepSpeech CSVs and wav
_maybe_convert_sets(
target_dir, ARCHIVE_DIR_NAME, english_compatible=english_compatible
)
def _maybe_extract(target_dir, extracted_data, archive_path):
# If target_dir/extracted_data does not exist, extract archive in target_dir
extracted_path = os.path.join(target_dir, extracted_data)
if not os.path.exists(extracted_path):
print('No directory "%s" - extracting archive...' % extracted_path)
if not os.path.isdir(extracted_path):
os.mkdir(extracted_path)
with zipfile.ZipFile(archive_path) as zip_f:
zip_f.extractall(extracted_path)
else:
print('Found directory "%s" - not extracting it from archive.' % archive_path)
def one_sample(sample):
""" Take a audio file, and optionally convert it to 16kHz WAV """
orig_filename = sample["path"]
# Storing wav files next to the wav ones - just with a different suffix
wav_filename = os.path.splitext(orig_filename)[0] + ".converted.wav"
_maybe_convert_wav(orig_filename, wav_filename)
file_size = -1
frames = 0
if os.path.exists(wav_filename):
file_size = os.path.getsize(wav_filename)
frames = int(
subprocess.check_output(
["soxi", "-s", wav_filename], stderr=subprocess.STDOUT
)
)
label = sample["text"]
rows = []
# Keep track of how many samples are good vs. problematic
counter = get_counter()
if file_size == -1:
# Excluding samples that failed upon conversion
counter["failed"] += 1
elif label is None:
# Excluding samples that failed on label validation
counter["invalid_label"] += 1
elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)):
# Excluding samples that are too short to fit the transcript
counter["too_short"] += 1
elif frames / SAMPLE_RATE > MAX_SECS:
# Excluding very long samples to keep a reasonable batch-size
counter["too_long"] += 1
else:
# This one is good - keep it for the target CSV
rows.append((wav_filename, file_size, label))
counter["imported_time"] += frames
counter["all"] += 1
counter["total_time"] += frames
return (counter, rows)
def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False):
extracted_dir = os.path.join(target_dir, extracted_data)
# override existing CSV with normalized one
target_csv_template = os.path.join(target_dir, "ts_" + ARCHIVE_NAME + "_{}.csv")
if os.path.isfile(target_csv_template):
return
path_to_original_csv = os.path.join(extracted_dir, "data.csv")
with open(path_to_original_csv) as csv_f:
data = [
d
for d in csv.DictReader(csv_f, delimiter=",")
if float(d["duration"]) <= MAX_SECS
]
for line in data:
line["path"] = os.path.join(extracted_dir, line["path"])
num_samples = len(data)
rows = []
counter = get_counter()
print("Importing {} wav files...".format(num_samples))
pool = Pool()
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
for i, processed in enumerate(pool.imap_unordered(one_sample, data), start=1):
counter += processed[0]
rows += processed[1]
bar.update(i)
bar.update(num_samples)
pool.close()
pool.join()
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
train_writer.writeheader()
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
dev_writer.writeheader()
test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES)
test_writer.writeheader()
for i, item in enumerate(rows):
transcript = validate_label(
cleanup_transcript(
item[2], english_compatible=english_compatible
)
)
if not transcript:
continue
wav_filename = os.path.join(target_dir, extracted_data, item[0])
i_mod = i % 10
if i_mod == 0:
writer = test_writer
elif i_mod == 1:
writer = dev_writer
else:
writer = train_writer
writer.writerow(
dict(
wav_filename=wav_filename,
wav_filesize=os.path.getsize(wav_filename),
transcript=transcript,
)
)
imported_samples = get_imported_samples(counter)
assert counter["all"] == num_samples
assert len(rows) == imported_samples
print_import_report(counter, SAMPLE_RATE, MAX_SECS)
def _maybe_convert_wav(orig_filename, wav_filename):
if not os.path.exists(wav_filename):
transformer = sox.Transformer()
transformer.convert(samplerate=SAMPLE_RATE)
try:
transformer.build(orig_filename, wav_filename)
except sox.core.SoxError as ex:
print("SoX processing error", ex, orig_filename, wav_filename)
PUNCTUATIONS_REG = re.compile(r"\-,;!?.()\[\]*…—]")
MULTIPLE_SPACES_REG = re.compile(r"\s{2,}")
def cleanup_transcript(text, english_compatible=False):
text = text.replace("", "'").replace("\u00A0", " ")
text = PUNCTUATIONS_REG.sub(" ", text)
text = MULTIPLE_SPACES_REG.sub(" ", text)
if english_compatible:
text = unidecode.unidecode(text)
return text.strip().lower()
def handle_args():
parser = get_importers_parser(description="Importer for TrainingSpeech dataset.")
parser.add_argument(dest="target_dir")
parser.add_argument(
"--english-compatible",
action="store_true",
dest="english_compatible",
help="Remove diactrics and other non-ascii chars.",
)
return parser.parse_args()
if __name__ == "__main__":
cli_args = handle_args()
validate_label = get_validate_label(cli_args)
_download_and_preprocess_data(cli_args.target_dir, cli_args.english_compatible)

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
"""
Downloads and prepares (parts of) the "German Distant Speech" corpus (TUDA) for train.py
Downloads and prepares (parts of) the "German Distant Speech" corpus (TUDA) for DeepSpeech.py
Use "python3 import_tuda.py -h" for help
"""
import argparse
@ -13,9 +13,10 @@ import xml.etree.ElementTree as ET
from collections import Counter
import progressbar
from coqui_stt_ctcdecoder import Alphabet
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import validate_label_eng as validate_label
from ds_ctcdecoder import Alphabet
TUDA_VERSION = "v2"
TUDA_PACKAGE = "german-speechdata-package-{}".format(TUDA_VERSION)
@ -54,11 +55,7 @@ def check_and_prepare_sentence(sentence):
chars = []
for c in sentence:
if CLI_ARGS.normalize and c not in "äöüß" and not in_alphabet(c):
c = (
unicodedata.normalize("NFKD", c)
.encode("ascii", "ignore")
.decode("ascii", "ignore")
)
c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore")
for sc in c:
if not in_alphabet(c):
return None
@ -121,7 +118,7 @@ def write_csvs(extracted):
sentence = list(meta.iter("cleaned_sentence"))[0].text
sentence = check_and_prepare_sentence(sentence)
if sentence is None:
reasons["alphabet filter"] += 1
reasons['alphabet filter'] += 1
continue
for wav_name in wav_names:
sample_counter += 1

View File

@ -10,8 +10,9 @@ from zipfile import ZipFile
import librosa
import progressbar
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
from coqui_stt_training.util.importers import (
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
print_import_report,
@ -34,7 +35,7 @@ def _download_and_preprocess_data(target_dir):
archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL)
# Conditionally extract common voice data
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
# Conditionally convert common voice CSV files and mp3 data to Coqui STT CSVs and wav
# Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav
_maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME)

View File

@ -13,8 +13,8 @@ from os import makedirs, path
import pandas
from bs4 import BeautifulSoup
from coqui_stt_training.util.downloader import maybe_download
from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
"""The number of jobs to run in parallel"""
NUM_PARALLEL = 8

View File

@ -1,34 +1,22 @@
#!/usr/bin/env python
"""
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and 🐸STT CSV files
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files
Use "python3 play.py -h" for help
"""
import argparse
import os
import random
import sys
import random
import argparse
from coqui_stt_training.util.audio import (
AUDIO_TYPE_PCM,
AUDIO_TYPE_WAV,
get_loadable_audio_type_from_extension,
)
from coqui_stt_training.util.augmentations import (
SampleAugmentation,
apply_sample_augmentations,
parse_augmentations,
)
from coqui_stt_training.util.sample_collections import (
LabeledSample,
SampleList,
samples_from_source,
)
from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV
from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source
from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation
def get_samples_in_play_order():
ext = os.path.splitext(CLI_ARGS.source)[1].lower()
if get_loadable_audio_type_from_extension(ext):
if ext in LOADABLE_AUDIO_EXTENSIONS:
samples = SampleList([(CLI_ARGS.source, 0)], labeled=False)
else:
samples = samples_from_source(CLI_ARGS.source, buffering=0)
@ -52,17 +40,14 @@ def get_samples_in_play_order():
def play_collection():
augmentations = parse_augmentations(CLI_ARGS.augment)
print(f"Parsed augmentations from flags: {augmentations}")
if any(not isinstance(a, SampleAugmentation) for a in augmentations):
print("Warning: Some of the augmentations cannot be simulated by this command.")
samples = get_samples_in_play_order()
samples = apply_sample_augmentations(
samples,
audio_type=AUDIO_TYPE_PCM,
augmentations=augmentations,
process_ahead=0,
clock=CLI_ARGS.clock,
)
samples = apply_sample_augmentations(samples,
audio_type=AUDIO_TYPE_PCM,
augmentations=augmentations,
process_ahead=0,
clock=CLI_ARGS.clock)
for sample in samples:
if not CLI_ARGS.quiet:
print('Sample "{}"'.format(sample.sample_id), file=sys.stderr)
@ -72,12 +57,10 @@ def play_collection():
sample.change_audio_type(AUDIO_TYPE_WAV)
sys.stdout.buffer.write(sample.audio.getvalue())
return
wave_obj = simpleaudio.WaveObject(
sample.audio,
sample.audio_format.channels,
sample.audio_format.width,
sample.audio_format.rate,
)
wave_obj = simpleaudio.WaveObject(sample.audio,
sample.audio_format.channels,
sample.audio_format.width,
sample.audio_format.rate)
play_obj = wave_obj.play()
play_obj.wait_done()
@ -85,11 +68,9 @@ def play_collection():
def handle_args():
parser = argparse.ArgumentParser(
description="Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) "
"and Coqui STT CSV files"
)
parser.add_argument(
"source", help="Sample DB, CSV or WAV file to play samples from"
"and DeepSpeech CSV files"
)
parser.add_argument("source", help="Sample DB, CSV or WAV file to play samples from")
parser.add_argument(
"--start",
type=int,
@ -109,7 +90,7 @@ def handle_args():
)
parser.add_argument(
"--augment",
action="append",
action='append',
help="Add an augmentation operation",
)
parser.add_argument(
@ -117,8 +98,8 @@ def handle_args():
type=float,
default=0.5,
help="Simulates clock value used for augmentations during training."
"Ranges from 0.0 (representing parameter start values) to"
"1.0 (representing parameter end values)",
"Ranges from 0.0 (representing parameter start values) to"
"1.0 (representing parameter end values)",
)
parser.add_argument(
"--pipe",
@ -139,9 +120,7 @@ if __name__ == "__main__":
try:
import simpleaudio
except ModuleNotFoundError:
print(
'Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples'
)
print('Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples')
sys.exit(1)
try:
play_collection()

View File

@ -1,25 +0,0 @@
#!/usr/bin/env python
import os
from import_ldc93s1 import _download_and_preprocess_data as download_ldc
from coqui_stt_training.util.config import initialize_globals_from_args
from coqui_stt_training.train import train
from coqui_stt_training.evaluate import test
# only one GPU for only one training sample
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
download_ldc("data/smoke_test")
initialize_globals_from_args(
load_train="init",
alphabet_config_path="data/alphabet.txt",
train_files=["data/smoke_test/ldc93s1.csv"],
dev_files=["data/smoke_test/ldc93s1.csv"],
test_files=["data/smoke_test/ldc93s1.csv"],
augment=["time_mask"],
n_hidden=100,
epochs=200,
)
train()
test()

View File

@ -1,30 +1,28 @@
#!/bin/sh
set -xe
if [ ! -f train.py ]; then
echo "Please make sure you run this from STT's top level directory."
if [ ! -f DeepSpeech.py ]; then
echo "Please make sure you run this from DeepSpeech's top level directory."
exit 1
fi;
if [ ! -f "data/smoke_test/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/smoke_test."
python -u bin/import_ldc93s1.py ./data/smoke_test
if [ ! -f "data/ldc93s1/ldc93s1.csv" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/ldc93s1."
python -u bin/import_ldc93s1.py ./data/ldc93s1
fi;
if [ -d "${COMPUTE_KEEP_DIR}" ]; then
checkpoint_dir=$COMPUTE_KEEP_DIR
else
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("stt/ldc93s1"))')
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("deepspeech/ldc93s1"))')
fi
# Force only one visible device because we have a single-sample dataset
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -m coqui_stt_training.train \
--alphabet_config_path "data/alphabet.txt" \
--show_progressbar false \
--train_files data/smoke_test/ldc93s1.csv \
--test_files data/smoke_test/ldc93s1.csv \
python -u DeepSpeech.py --noshow_progressbar \
--train_files data/ldc93s1/ldc93s1.csv \
--test_files data/ldc93s1/ldc93s1.csv \
--train_batch_size 1 \
--test_batch_size 1 \
--n_hidden 100 \

View File

@ -14,17 +14,16 @@ fi;
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u train.py --alphabet_config_path "data/alphabet.txt" \
--show_progressbar false --early_stop false \
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
--train_files ${ldc93s1_csv} --train_batch_size 1 \
--scorer "" \
--augment dropout \
pitch \
tempo \
warp \
time_mask \
frequency_mask \
add \
multiply \
--augment pitch \
--augment tempo \
--augment warp \
--augment time_mask \
--augment frequency_mask \
--augment add \
--augment multiply \
--n_hidden 100 \
--epochs 1

View File

@ -14,8 +14,7 @@ fi;
# and when trying to run on multiple devices (like GPUs), this will break
export CUDA_VISIBLE_DEVICES=0
python -u train.py --alphabet_config_path "data/alphabet.txt" \
--show_progressbar false --early_stop false \
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
--train_files ${ldc93s1_csv} --train_batch_size 1 \
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
--test_files ${ldc93s1_csv} --test_batch_size 1 \

Some files were not shown because too many files have changed in this diff Show More