Compare commits
39 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c119aa6b0e | ||
|
9dde5726cd | ||
|
f2e9c85880 | ||
|
056f5a4c6f | ||
|
504e55b2d8 | ||
|
948a232ae2 | ||
|
b2920c7557 | ||
|
cf2d7e636a | ||
|
c5410fc550 | ||
|
4270e22fe0 | ||
|
29fa2dd405 | ||
|
a8abca0841 | ||
|
82f84c5853 | ||
|
67a9e35028 | ||
|
440e30c097 | ||
|
9c65ddadbf | ||
|
f252de4a8d | ||
|
d7d7ea7db1 | ||
|
b65186907f | ||
|
36a06634a3 | ||
|
b0f2d37d6f | ||
|
965f209665 | ||
|
ab8bd3e11c | ||
|
7cdb5b1ed9 | ||
|
617ce141d0 | ||
|
2d04fbe049 | ||
|
0ae425525d | ||
|
f7e816c014 | ||
|
2368fca0f1 | ||
|
719fedbd93 | ||
|
a1d2d4181b | ||
|
3eaa44b358 | ||
|
aee7fc502c | ||
|
9210a97d48 | ||
|
64fad81e10 | ||
|
7056241f37 | ||
|
065c8a6cdf | ||
|
636b7133a1 | ||
|
ed09fd3610 |
117
.circleci/config.yml
Normal file
117
.circleci/config.yml
Normal file
@ -0,0 +1,117 @@
|
||||
# These environment variables must be set in CircleCI UI
|
||||
#
|
||||
# DOCKERHUB_REPO - docker hub repo, format: <username>/<repo>
|
||||
# DOCKER_USER - login info for docker hub
|
||||
# DOCKER_PASS
|
||||
#
|
||||
version: 2
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: docker:stable-git
|
||||
working_directory: /dockerflow
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
|
||||
- run:
|
||||
name: os-release
|
||||
command: |
|
||||
cat /etc/os-release
|
||||
|
||||
- run:
|
||||
name: install make
|
||||
command: |
|
||||
apk add make
|
||||
|
||||
- run:
|
||||
name: Create a Dockerfile.train
|
||||
command: |
|
||||
make Dockerfile.train \
|
||||
DEEPSPEECH_REPO="https://github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME" \
|
||||
DEEPSPEECH_SHA=$CIRCLE_SHA1
|
||||
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: docker build -t app:build -f Dockerfile.train .
|
||||
|
||||
# save the built docker container into CircleCI's cache. This is
|
||||
# required since Workflows do not have the same remote docker instance.
|
||||
- run:
|
||||
name: docker save app:build
|
||||
command: mkdir -p /cache; docker save -o /cache/docker.tar "app:build"
|
||||
- save_cache:
|
||||
key: v1-{{ .Branch }}-{{epoch}}
|
||||
paths:
|
||||
- /cache/docker.tar
|
||||
|
||||
deploy:
|
||||
docker:
|
||||
- image: docker:18.02.0-ce
|
||||
steps:
|
||||
- setup_remote_docker
|
||||
- restore_cache:
|
||||
key: v1-{{.Branch}}
|
||||
- run:
|
||||
name: Restore Docker image cache
|
||||
command: docker load -i /cache/docker.tar
|
||||
|
||||
- run:
|
||||
name: Deploy to Dockerhub
|
||||
command: |
|
||||
echo $DOCKER_PASS | docker login -u $DOCKER_USER --password-stdin
|
||||
# deploy master
|
||||
if [ "${CIRCLE_BRANCH}" == "master" ]; then
|
||||
docker tag app:build ${DOCKERHUB_REPO}:latest
|
||||
docker push ${DOCKERHUB_REPO}:latest
|
||||
elif [ ! -z "${CIRCLE_TAG}" ]; then
|
||||
# deploy a release tag...
|
||||
echo "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
||||
docker tag app:build "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
||||
docker images
|
||||
docker push "${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
||||
fi
|
||||
|
||||
lint:
|
||||
docker:
|
||||
- image: circleci/python:3.7.9
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install dependencies
|
||||
command: |
|
||||
pip install --upgrade cardboardlint pylint
|
||||
- run:
|
||||
name: Run linter
|
||||
command: |
|
||||
set -ex
|
||||
# Check if branch can be merged with master (if failing script will stop due to set -e)
|
||||
git config user.email "you@example.com"
|
||||
git config user.name "Your Name"
|
||||
git merge --no-commit --no-ff origin/master
|
||||
|
||||
# Undo merge changes if any
|
||||
git reset --hard $CIRCLE_BRANCH
|
||||
|
||||
# Lint differences against master
|
||||
cardboardlinter --refspec origin/master -n auto;
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
build-deploy:
|
||||
jobs:
|
||||
- build:
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
|
||||
- deploy:
|
||||
requires:
|
||||
- build
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
|
||||
lint:
|
||||
jobs:
|
||||
- lint
|
@ -1,5 +0,0 @@
|
||||
.git/lfs
|
||||
native_client/ds-swig
|
||||
native_client/python/dist/*.whl
|
||||
native_client/ctcdecode/*.a
|
||||
native_client/javascript/build/
|
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,2 +1 @@
|
||||
data/lm/kenlm.scorer filter=lfs diff=lfs merge=lfs -text
|
||||
.github/actions/check_artifact_exists/dist/index.js binary
|
||||
|
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
40
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@ -1,40 +0,0 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: 'Bug: '
|
||||
labels: bug
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
|
||||
|
||||
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
|
||||
|
||||
If you've found a bug, please provide the following information:
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Run the following command '...'
|
||||
2. ...
|
||||
3. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Environment (please complete the following information):**
|
||||
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
|
||||
- **TensorFlow installed from (our builds, or upstream TensorFlow)**:
|
||||
- **TensorFlow version (use command below)**:
|
||||
- **Python version**:
|
||||
- **Bazel version (if compiling from source)**:
|
||||
- **GCC/Compiler version (if compiling from source)**:
|
||||
- **CUDA/cuDNN version**:
|
||||
- **GPU model and memory**:
|
||||
- **Exact command to reproduce**:
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
8
.github/ISSUE_TEMPLATE/config.yml
vendored
8
.github/ISSUE_TEMPLATE/config.yml
vendored
@ -1,8 +0,0 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Coqui STT GitHub Discussions
|
||||
url: https://github.com/coqui-ai/STT/discussions
|
||||
about: Please ask and answer questions here.
|
||||
- name: Coqui Security issue disclosure
|
||||
url: mailto:info@coqui.ai
|
||||
about: Please report security vulnerabilities here.
|
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@ -1,26 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: 'Feature request: '
|
||||
labels: enhancement
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
|
||||
|
||||
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
|
||||
|
||||
If you have a feature request, then please provide the following information:
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
11
.github/actions/build-tensorflow/action.yml
vendored
11
.github/actions/build-tensorflow/action.yml
vendored
@ -1,11 +0,0 @@
|
||||
name: "Build TensorFlow"
|
||||
description: "Build TensorFlow Build"
|
||||
inputs:
|
||||
flavor:
|
||||
description: "Build flavor"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/tf-build.sh ${{ inputs.flavor }}
|
||||
shell: bash
|
43
.github/actions/check_artifact_exists/README.md
vendored
43
.github/actions/check_artifact_exists/README.md
vendored
@ -1,43 +0,0 @@
|
||||
Building and using a TensorFlow cache:
|
||||
======================================
|
||||
|
||||
The present action will check the existence of an artifact in the list of the
|
||||
repo artifacts. Since we don't want always to download the artifact, we can't
|
||||
rely on the official download-artifact action.
|
||||
|
||||
Rationale:
|
||||
----------
|
||||
|
||||
Because of the amount of code required to build TensorFlow, the library build
|
||||
is split into two main parts to make it much faster to run PRs:
|
||||
- a TensorFlow prebuild cache
|
||||
- actual code of the library
|
||||
|
||||
The TensorFlow prebuild cache exists because building tensorflow (even just the
|
||||
`libtensorflow_cpp.so`) is a huge amount of code and it will take several hours
|
||||
even on decent systems. So we perform a cache build of it, because the
|
||||
tensorflow version does not change that often.
|
||||
|
||||
However, each PR might have changes to the actual library code, so we rebuild
|
||||
this everytime.
|
||||
|
||||
The `tensorflow_opt-macOS` job checks whether such build cache exists alrady.
|
||||
Those cache are stored as artifacts because [GitHub Actions
|
||||
cache](https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows)
|
||||
has size limitations.
|
||||
|
||||
The `build-tensorflow-macOS` job has a dependency against the cache check to
|
||||
know whether it needs to run an actual build or not.
|
||||
|
||||
Hacking:
|
||||
--------
|
||||
|
||||
For hacking into the action, please follow the [GitHub JavaScript
|
||||
Actions](https://docs.github.com/en/actions/creating-actions/creating-a-javascript-action#commit-tag-and-push-your-action-to-github)
|
||||
and specifically the usage of `ncc`.
|
||||
|
||||
```
|
||||
$ npm install
|
||||
$ npx ncc build main.js --license licenses.txt
|
||||
$ git add dist/
|
||||
```
|
32
.github/actions/check_artifact_exists/action.yml
vendored
32
.github/actions/check_artifact_exists/action.yml
vendored
@ -1,32 +0,0 @@
|
||||
name: "check/download artifacts"
|
||||
description: "Check and download that an artifact exists"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
github_token:
|
||||
description: "GitHub token"
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
download:
|
||||
description: "Should we download?"
|
||||
required: false
|
||||
default: false
|
||||
path:
|
||||
description: "Where to unpack the artifact"
|
||||
required: false
|
||||
default: "./"
|
||||
repo:
|
||||
description: "Repository name with owner (like actions/checkout)"
|
||||
required: false
|
||||
default: ${{ github.repository }}
|
||||
release-tag:
|
||||
description: "Tag of release to check artifacts under"
|
||||
required: false
|
||||
default: "v0.10.0-alpha.7"
|
||||
outputs:
|
||||
status:
|
||||
description: "Status string of the artifact: 'missing' or 'found'"
|
||||
runs:
|
||||
using: "node12"
|
||||
main: "dist/index.js"
|
30888
.github/actions/check_artifact_exists/dist/index.js
vendored
30888
.github/actions/check_artifact_exists/dist/index.js
vendored
File diff suppressed because one or more lines are too long
2540
.github/actions/check_artifact_exists/dist/licenses.txt
vendored
2540
.github/actions/check_artifact_exists/dist/licenses.txt
vendored
File diff suppressed because it is too large
Load Diff
132
.github/actions/check_artifact_exists/main.js
vendored
132
.github/actions/check_artifact_exists/main.js
vendored
@ -1,132 +0,0 @@
|
||||
const core = require('@actions/core');
|
||||
const github = require('@actions/github');
|
||||
const AdmZip = require('adm-zip');
|
||||
const filesize = require('filesize');
|
||||
const pathname = require('path');
|
||||
const fs = require('fs');
|
||||
const { throttling } = require('@octokit/plugin-throttling');
|
||||
const { GitHub } = require('@actions/github/lib/utils');
|
||||
const Download = require('download');
|
||||
const Util = require('util');
|
||||
const Stream = require('stream');
|
||||
|
||||
const Pipeline = Util.promisify(Stream.pipeline);
|
||||
|
||||
async function getGoodArtifacts(client, owner, repo, releaseId, name) {
|
||||
console.log(`==> GET /repos/${owner}/${repo}/releases/${releaseId}/assets`);
|
||||
const goodRepoArtifacts = await client.paginate(
|
||||
"GET /repos/{owner}/{repo}/releases/{release_id}/assets",
|
||||
{
|
||||
owner: owner,
|
||||
repo: repo,
|
||||
release_id: releaseId,
|
||||
per_page: 100,
|
||||
},
|
||||
(releaseAssets, done) => {
|
||||
console.log(" ==> releaseAssets", releaseAssets);
|
||||
const goodAssets = releaseAssets.data.filter((a) => {
|
||||
console.log("==> Asset check", a);
|
||||
return a.name == name
|
||||
});
|
||||
if (goodAssets.length > 0) {
|
||||
done();
|
||||
}
|
||||
return goodAssets;
|
||||
}
|
||||
);
|
||||
|
||||
console.log("==> maybe goodRepoArtifacts:", goodRepoArtifacts);
|
||||
return goodRepoArtifacts;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const token = core.getInput("github_token", { required: true });
|
||||
const [owner, repo] = core.getInput("repo", { required: true }).split("/");
|
||||
const path = core.getInput("path", { required: true });
|
||||
const name = core.getInput("name");
|
||||
const download = core.getInput("download");
|
||||
const releaseTag = core.getInput("release-tag");
|
||||
const OctokitWithThrottling = GitHub.plugin(throttling);
|
||||
const client = new OctokitWithThrottling({
|
||||
auth: token,
|
||||
throttle: {
|
||||
onRateLimit: (retryAfter, options) => {
|
||||
console.log(
|
||||
`Request quota exhausted for request ${options.method} ${options.url}`
|
||||
);
|
||||
|
||||
// Retry twice after hitting a rate limit error, then give up
|
||||
if (options.request.retryCount <= 2) {
|
||||
console.log(`Retrying after ${retryAfter} seconds!`);
|
||||
return true;
|
||||
} else {
|
||||
console.log("Exhausted 2 retries");
|
||||
core.setFailed("Exhausted 2 retries");
|
||||
}
|
||||
},
|
||||
onAbuseLimit: (retryAfter, options) => {
|
||||
// does not retry, only logs a warning
|
||||
console.log(
|
||||
`Abuse detected for request ${options.method} ${options.url}`
|
||||
);
|
||||
core.setFailed(`GitHub REST API Abuse detected for request ${options.method} ${options.url}`)
|
||||
},
|
||||
},
|
||||
});
|
||||
console.log("==> Repo:", owner + "/" + repo);
|
||||
|
||||
const releaseInfo = await client.repos.getReleaseByTag({
|
||||
owner,
|
||||
repo,
|
||||
tag: releaseTag,
|
||||
});
|
||||
console.log(`==> Release info for tag ${releaseTag} = ${JSON.stringify(releaseInfo.data, null, 2)}`);
|
||||
const releaseId = releaseInfo.data.id;
|
||||
|
||||
const goodArtifacts = await getGoodArtifacts(client, owner, repo, releaseId, name);
|
||||
console.log("==> goodArtifacts:", goodArtifacts);
|
||||
|
||||
const artifactStatus = goodArtifacts.length === 0 ? "missing" : "found";
|
||||
|
||||
console.log("==> Artifact", name, artifactStatus);
|
||||
console.log("==> download", download);
|
||||
|
||||
core.setOutput("status", artifactStatus);
|
||||
|
||||
if (artifactStatus === "found" && download == "true") {
|
||||
console.log("==> # artifacts:", goodArtifacts.length);
|
||||
|
||||
const artifact = goodArtifacts[0];
|
||||
console.log("==> Artifact:", artifact.id)
|
||||
|
||||
const size = filesize(artifact.size, { base: 10 })
|
||||
console.log(`==> Downloading: ${artifact.name} (${size}) to path: ${path}`)
|
||||
|
||||
const dir = pathname.dirname(path)
|
||||
console.log(`==> Creating containing dir if needed: ${dir}`)
|
||||
fs.mkdirSync(dir, { recursive: true })
|
||||
|
||||
await Pipeline(
|
||||
Download(artifact.url, {
|
||||
headers: {
|
||||
"Accept": "application/octet-stream",
|
||||
"Authorization": `token ${token}`,
|
||||
},
|
||||
}),
|
||||
fs.createWriteStream(path)
|
||||
)
|
||||
}
|
||||
|
||||
if (artifactStatus === "missing" && download == "true") {
|
||||
core.setFailed("Required", name, "that is missing");
|
||||
}
|
||||
|
||||
return;
|
||||
} catch (err) {
|
||||
console.error(err.stack);
|
||||
core.setFailed(err.message);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
1139
.github/actions/check_artifact_exists/package-lock.json
generated
vendored
1139
.github/actions/check_artifact_exists/package-lock.json
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -1,13 +0,0 @@
|
||||
{
|
||||
"name": "check_artifact_exists",
|
||||
"main": "main.js",
|
||||
"devDependencies": {
|
||||
"@actions/core": "^1.2.6",
|
||||
"@actions/github": "^4.0.0",
|
||||
"@octokit/plugin-throttling": "^3.4.1",
|
||||
"@vercel/ncc": "^0.27.0",
|
||||
"adm-zip": "^0.5.2",
|
||||
"download": "^8.0.0",
|
||||
"filesize": "^6.1.0"
|
||||
}
|
||||
}
|
29
.github/actions/chroot-bind-mount/action.yml
vendored
29
.github/actions/chroot-bind-mount/action.yml
vendored
@ -1,29 +0,0 @@
|
||||
name: "chroot bind mount"
|
||||
description: "Bind mount into chroot"
|
||||
inputs:
|
||||
mounts:
|
||||
description: "Path to consider"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: install_qemu
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y --no-install-recommends qemu-user-static
|
||||
shell: bash
|
||||
- id: bind_mount_chroot
|
||||
run: |
|
||||
set -xe
|
||||
|
||||
# Bind-mount so that we have the same tree inside the chroot
|
||||
for dev in ${{ github.workspace }} ${{ inputs.mounts }};
|
||||
do
|
||||
sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev}
|
||||
done;
|
||||
|
||||
for dev in ${{ inputs.mounts }};
|
||||
do
|
||||
sudo mount -o bind /${dev} ${{ env.SYSTEM_RASPBIAN }}/${dev}
|
||||
done;
|
||||
shell: bash
|
15
.github/actions/get_cache_key/README.md
vendored
15
.github/actions/get_cache_key/README.md
vendored
@ -1,15 +0,0 @@
|
||||
GitHub Action to compute cache key
|
||||
==================================
|
||||
|
||||
It is intended to work in harmony with `check_artifact_exists`:
|
||||
- compute a stable cache key
|
||||
- as simple to use as possible (less parameters)
|
||||
|
||||
It will expect to be ran in a GitHub Action job that follows
|
||||
`SUBMODULE_FLAVOR-PLATFORM`:
|
||||
- it will use the `SUBMODULE` part to check what is the current SHA1 of this git submodule.
|
||||
- the `FLAVOR` allows to distringuish e.g., opt/dbg builds
|
||||
- the PLATFORM permits defining an os/arch couple
|
||||
|
||||
It allows for an `extras` field for extensive customization, like forcing a
|
||||
re-build.
|
34
.github/actions/get_cache_key/action.yml
vendored
34
.github/actions/get_cache_key/action.yml
vendored
@ -1,34 +0,0 @@
|
||||
name: "get cache key for submodule"
|
||||
description: "Compute a cache key based on git submodule"
|
||||
inputs:
|
||||
extras:
|
||||
description: "Extra cache key value"
|
||||
required: true
|
||||
osarch:
|
||||
description: "Override automatic OSARCH value"
|
||||
required: false
|
||||
outputs:
|
||||
key:
|
||||
description: "Computed cache key name"
|
||||
value: ${{ steps.compute_cache_key.outputs.key }}
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: compute_cache_key
|
||||
run: |
|
||||
set -xe
|
||||
JOB=${{ github.job }}
|
||||
SUBMODULE=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f1)
|
||||
FLAVOR=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f2)
|
||||
|
||||
if [ -z "${{ inputs.osarch }}" ]; then
|
||||
OSARCH=$(echo $JOB | cut -d'-' -f2)
|
||||
else
|
||||
OSARCH=${{ inputs.osarch }}
|
||||
fi
|
||||
|
||||
SHA=$(git submodule status ${SUBMODULE} | sed -e 's/^-//g' -e 's/^+//g' -e 's/^U//g' | awk '{ print $1 }')
|
||||
|
||||
KEY=${SUBMODULE}-${FLAVOR}_${OSARCH}_${SHA}_${{ inputs.extras }}
|
||||
echo "::set-output name=key::${KEY}"
|
||||
shell: bash
|
@ -1,30 +0,0 @@
|
||||
name: "Install Python"
|
||||
description: "Installing an upstream python release"
|
||||
inputs:
|
||||
version:
|
||||
description: "Python version"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
set -xe
|
||||
curl https://www.python.org/ftp/python/${{ inputs.version }}/python-${{ inputs.version }}-macosx10.9.pkg -o "python.pkg"
|
||||
- shell: bash
|
||||
run: ls -hal .
|
||||
- shell: bash
|
||||
run: |
|
||||
set -xe
|
||||
sudo installer -verbose -pkg python.pkg -target /
|
||||
- shell: bash
|
||||
run: |
|
||||
set -xe
|
||||
which python3
|
||||
python3 --version
|
||||
python3 -c "import sysconfig; print(sysconfig.get_config_var('MACOSX_DEPLOYMENT_TARGET'))"
|
||||
- shell: bash
|
||||
name: Set up venv with upstream Python
|
||||
run: |
|
||||
python3 -m venv /tmp/venv
|
||||
echo "/tmp/venv/bin" >> $GITHUB_PATH
|
18
.github/actions/install-xldd/action.yml
vendored
18
.github/actions/install-xldd/action.yml
vendored
@ -1,18 +0,0 @@
|
||||
name: "xldd install"
|
||||
description: "Install xldd"
|
||||
inputs:
|
||||
target:
|
||||
description: "System target"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: install_xldd
|
||||
run: |
|
||||
source ./ci_scripts/all-vars.sh
|
||||
# -s required to avoid the noisy output like "Entering / Leaving directories"
|
||||
toolchain=$(make -s -C ${DS_DSDIR}/native_client/ TARGET=${{ inputs.target }} TFDIR=${DS_TFDIR} print-toolchain)
|
||||
if [ ! -x "${toolchain}ldd" ]; then
|
||||
cp "${DS_DSDIR}/native_client/xldd" "${toolchain}ldd" && chmod +x "${toolchain}ldd"
|
||||
fi
|
||||
shell: bash
|
12
.github/actions/libstt-build/action.yml
vendored
12
.github/actions/libstt-build/action.yml
vendored
@ -1,12 +0,0 @@
|
||||
name: "Build libstt.so"
|
||||
description: "Build libstt.so"
|
||||
inputs:
|
||||
arch:
|
||||
description: "Target arch for loading script (host/armv7/aarch64)"
|
||||
required: false
|
||||
default: "host"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/${{ inputs.arch }}-build.sh
|
||||
shell: bash
|
67
.github/actions/multistrap/action.yml
vendored
67
.github/actions/multistrap/action.yml
vendored
@ -1,67 +0,0 @@
|
||||
name: "multistrap install"
|
||||
description: "Install a system root using multistrap"
|
||||
inputs:
|
||||
arch:
|
||||
description: "Target arch"
|
||||
required: true
|
||||
packages:
|
||||
description: "Extra packages to install"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: install_multistrap
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y --no-install-recommends multistrap qemu-user-static
|
||||
shell: bash
|
||||
- id: create_chroot
|
||||
run: |
|
||||
set -xe
|
||||
|
||||
multistrap_conf=""
|
||||
if [ "${{ inputs.arch }}" = "armv7" ]; then
|
||||
multistrap_conf=multistrap_raspbian_buster.conf
|
||||
wget http://archive.raspbian.org/raspbian/pool/main/r/raspbian-archive-keyring/raspbian-archive-keyring_20120528.2_all.deb && sudo dpkg -i raspbian-archive-keyring_20120528.2_all.deb
|
||||
fi
|
||||
if [ "${{ inputs.arch }}" = "aarch64" ]; then
|
||||
multistrap_conf=multistrap_armbian64_buster.conf
|
||||
fi
|
||||
|
||||
multistrap -d ${{ env.SYSTEM_RASPBIAN }} -f ${{ github.workspace }}/native_client/${multistrap_conf}
|
||||
|
||||
if [ ! -z "${{ inputs.packages }}" ]; then
|
||||
TO_MOUNT=${{ github.workspace }}
|
||||
# Prepare target directory to bind-mount the github tree
|
||||
mkdir -p ${{ env.SYSTEM_RASPBIAN }}/${{ github.workspace }}
|
||||
|
||||
# Bind-mount so that we have the same tree inside the chroot
|
||||
for dev in ${TO_MOUNT};
|
||||
do
|
||||
sudo mount -o bind ${dev} ${{ env.SYSTEM_RASPBIAN }}${dev}
|
||||
done;
|
||||
|
||||
# Copy some host data:
|
||||
# resolv.conf: for getting DNS working
|
||||
# passwd, group, shadow: to have user accounts and apt-get install working
|
||||
for ff in resolv.conf passwd group shadow;
|
||||
do
|
||||
sudo cp /etc/${ff} ${{ env.SYSTEM_RASPBIAN }}/etc/
|
||||
done;
|
||||
|
||||
# Perform apt steps.
|
||||
# Preserving the env is required
|
||||
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y
|
||||
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y --no-install-recommends ${{ inputs.packages }}
|
||||
|
||||
# Cleanup apt info to save space
|
||||
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ rm -fr /var/cache/apt/* /var/lib/apt/lists/*
|
||||
|
||||
# Unmount what has been mounted
|
||||
for dev in ${TO_MOUNT};
|
||||
do
|
||||
sudo umount ${{ env.SYSTEM_RASPBIAN }}${dev}
|
||||
done;
|
||||
fi
|
||||
shell: bash
|
77
.github/actions/node-build/action.yml
vendored
77
.github/actions/node-build/action.yml
vendored
@ -1,77 +0,0 @@
|
||||
name: "NodeJS binding"
|
||||
description: "Binding a nodejs binding"
|
||||
inputs:
|
||||
nodejs_versions:
|
||||
description: "NodeJS versions supported"
|
||||
required: true
|
||||
electronjs_versions:
|
||||
description: "ElectronJS versions supported"
|
||||
required: true
|
||||
local_cflags:
|
||||
description: "CFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_ldflags:
|
||||
description: "LDFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_libs:
|
||||
description: "LIBS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
target:
|
||||
description: "TARGET value"
|
||||
required: false
|
||||
default: "host"
|
||||
chroot:
|
||||
description: "RASPBIAN value"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
node --version
|
||||
npm --version
|
||||
shell: bash
|
||||
- run: |
|
||||
npm update
|
||||
shell: bash
|
||||
- run: |
|
||||
mkdir -p tmp/headers/nodejs tmp/headers/electronjs
|
||||
shell: bash
|
||||
- run: |
|
||||
for node in ${{ inputs.nodejs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${node} \
|
||||
NODE_DEVDIR=--devdir=headers/nodejs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: bash
|
||||
- run: |
|
||||
for electron in ${{ inputs.electronjs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${electron} \
|
||||
NODE_DIST_URL=--disturl=https://electronjs.org/headers \
|
||||
NODE_RUNTIME=--runtime=electron \
|
||||
NODE_DEVDIR=--devdir=headers/electronjs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: bash
|
||||
- run: |
|
||||
make -C native_client/javascript clean npm-pack
|
||||
shell: bash
|
||||
- run: |
|
||||
tar -czf native_client/javascript/wrapper.tar.gz \
|
||||
-C native_client/javascript/ lib/
|
||||
shell: bash
|
22
.github/actions/node-install/action.yml
vendored
22
.github/actions/node-install/action.yml
vendored
@ -1,22 +0,0 @@
|
||||
name: "nodejs install"
|
||||
description: "Install nodejs in a chroot"
|
||||
inputs:
|
||||
node:
|
||||
description: "NodeJS version"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: add_apt_source
|
||||
run: |
|
||||
set -ex
|
||||
(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/preferences
|
||||
echo "deb http://deb.nodesource.com/node_${{ inputs.node }}.x buster main" > ${{ env.SYSTEM_RASPBIAN }}/etc/apt/sources.list.d/nodesource.list
|
||||
wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-key add -
|
||||
shell: bash
|
||||
- id: install_nodejs
|
||||
run: |
|
||||
set -ex
|
||||
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get update -y
|
||||
sudo --preserve-env chroot ${{ env.SYSTEM_RASPBIAN }}/ apt-get install -y nodejs
|
||||
shell: bash
|
14
.github/actions/numpy_vers/README.md
vendored
14
.github/actions/numpy_vers/README.md
vendored
@ -1,14 +0,0 @@
|
||||
GitHub Action to set NumPy versions
|
||||
===================================
|
||||
|
||||
This actions aims at computing correct values for NumPy dependencies:
|
||||
- `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time
|
||||
- `NUMPY_DEP_VERSION`: range of accepted versions for execution time
|
||||
|
||||
Versions are set considering several factors:
|
||||
- API and ABI compatibility ; otherwise we can have the binding wrapper
|
||||
throwing errors like "Illegal instruction", or computing wrong values
|
||||
because of changed memory layout
|
||||
- Wheels availability: for CI and end users, we want to avoid having to
|
||||
rebuild numpy so we stick to versions where there is an existing upstream
|
||||
`wheel` file
|
93
.github/actions/numpy_vers/action.yml
vendored
93
.github/actions/numpy_vers/action.yml
vendored
@ -1,93 +0,0 @@
|
||||
name: "get numpy versions"
|
||||
description: "Get proper NumPy build and runtime versions dependencies range"
|
||||
inputs:
|
||||
pyver:
|
||||
description: "Python version"
|
||||
required: true
|
||||
outputs:
|
||||
build_version:
|
||||
description: "NumPy build dependency"
|
||||
value: ${{ steps.numpy.outputs.build }}
|
||||
dep_version:
|
||||
description: "NumPy runtime dependency"
|
||||
value: ${{ steps.numpy.outputs.dep }}
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: numpy
|
||||
run: |
|
||||
set -ex
|
||||
NUMPY_BUILD_VERSION="==1.7.0"
|
||||
NUMPY_DEP_VERSION=">=1.7.0"
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
case "${OS}:${ARCH}" in
|
||||
Linux:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
Darwin:*)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.9.0"
|
||||
NUMPY_DEP_VERSION=">=1.9.0"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
${CI_MSYS_VERSION}:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.5*)
|
||||
NUMPY_BUILD_VERSION="==1.11.0"
|
||||
NUMPY_DEP_VERSION=">=1.11.0,<1.12.0"
|
||||
;;
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.12.0"
|
||||
NUMPY_DEP_VERSION=">=1.12.0,<1.14.5"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "::set-output name=build::${NUMPY_BUILD_VERSION}"
|
||||
echo "::set-output name=dep::${NUMPY_DEP_VERSION}"
|
||||
shell: bash
|
@ -1,7 +0,0 @@
|
||||
name: "Package TensorFlow"
|
||||
description: "Package TensorFlow Build"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/tf-package.sh
|
||||
shell: bash
|
7
.github/actions/package/action.yml
vendored
7
.github/actions/package/action.yml
vendored
@ -1,7 +0,0 @@
|
||||
name: "Package lib"
|
||||
description: "Package of lib"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/package.sh
|
||||
shell: bash
|
58
.github/actions/python-build/action.yml
vendored
58
.github/actions/python-build/action.yml
vendored
@ -1,58 +0,0 @@
|
||||
name: "Python binding"
|
||||
description: "Binding a python binding"
|
||||
inputs:
|
||||
numpy_build:
|
||||
description: "NumPy build dependecy"
|
||||
required: true
|
||||
numpy_dep:
|
||||
description: "NumPy runtime dependecy"
|
||||
required: true
|
||||
local_cflags:
|
||||
description: "CFLAGS for Python package"
|
||||
required: false
|
||||
default: ""
|
||||
local_ldflags:
|
||||
description: "LDFLAGS for Python package"
|
||||
required: false
|
||||
default: ""
|
||||
local_libs:
|
||||
description: "LIBS for Python package"
|
||||
required: false
|
||||
default: ""
|
||||
target:
|
||||
description: "TARGET value"
|
||||
required: false
|
||||
default: "host"
|
||||
chroot:
|
||||
description: "RASPBIAN value"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
python3 --version
|
||||
pip3 --version
|
||||
shell: bash
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
PROJECT_NAME="stt"
|
||||
|
||||
OS=$(uname)
|
||||
if [ "${OS}" = "Linux" -a "${{ inputs.target }}" != "host" ]; then
|
||||
python3 -m venv stt-build
|
||||
source stt-build/bin/activate
|
||||
fi
|
||||
|
||||
NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \
|
||||
NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/python/ \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
|
||||
bindings-clean bindings
|
||||
shell: bash
|
35
.github/actions/run-tests/action.yml
vendored
35
.github/actions/run-tests/action.yml
vendored
@ -1,35 +0,0 @@
|
||||
name: "Tests execution"
|
||||
description: "Running tests"
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to use for running test"
|
||||
required: true
|
||||
model-kind:
|
||||
description: "Running against CI baked or production model"
|
||||
required: true
|
||||
bitrate:
|
||||
description: "Bitrate for testing"
|
||||
required: true
|
||||
chroot:
|
||||
description: "Run using a chroot"
|
||||
required: false
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
build="_tflite"
|
||||
|
||||
model_kind=""
|
||||
if [ "${{ inputs.model-kind }}" = "prod" ]; then
|
||||
model_kind="-prod"
|
||||
fi
|
||||
|
||||
prefix="."
|
||||
if [ ! -z "${{ inputs.chroot }}" ]; then
|
||||
prefix="${{ inputs.chroot }}"
|
||||
fi
|
||||
|
||||
${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }}
|
||||
shell: bash
|
11
.github/actions/select-xcode/action.yml
vendored
11
.github/actions/select-xcode/action.yml
vendored
@ -1,11 +0,0 @@
|
||||
name: "Select XCode version"
|
||||
description: "Select XCode version"
|
||||
inputs:
|
||||
version:
|
||||
description: "XCode version"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: sudo xcode-select --switch /Applications/Xcode_${{ inputs.version }}.app
|
||||
shell: bash
|
12
.github/actions/setup-tensorflow/action.yml
vendored
12
.github/actions/setup-tensorflow/action.yml
vendored
@ -1,12 +0,0 @@
|
||||
name: "Setup TensorFlow"
|
||||
description: "Setup TensorFlow Build"
|
||||
inputs:
|
||||
flavor:
|
||||
description: "Target flavor for setup script (empty/android-armv7/android-arm64)"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/tf-setup.sh ${{ inputs.flavor }}
|
||||
shell: bash
|
89
.github/actions/upload-release-asset/action.yml
vendored
89
.github/actions/upload-release-asset/action.yml
vendored
@ -1,89 +0,0 @@
|
||||
name: "Upload cache asset to release"
|
||||
description: "Upload a build cache asset to a release"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
path:
|
||||
description: "Path of file to upload"
|
||||
required: true
|
||||
token:
|
||||
description: "GitHub token"
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
repo:
|
||||
description: "Repository name with owner (like actions/checkout)"
|
||||
required: false
|
||||
default: ${{ github.repository }}
|
||||
release-tag:
|
||||
description: "Tag of release to check artifacts under"
|
||||
required: false
|
||||
default: "v0.10.0-alpha.7"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
asset_name="${{ inputs.name }}"
|
||||
filenames="${{ inputs.path }}"
|
||||
|
||||
if [ $(compgen -G "$filenames" | wc -l) -gt 1 -a -n "$asset_name" ]; then
|
||||
echo "Error: multiple input files specified, but also specified an asset_name."
|
||||
echo "When uploading multiple files leave asset_name empty to use the file names as asset names."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check input
|
||||
for file in $filenames; do
|
||||
if [[ ! -f $file ]]; then
|
||||
echo "Error: Input file (${filename}) missing"
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
AUTH="Authorization: token ${{ inputs.token }}"
|
||||
|
||||
owner=$(echo "${{inputs.repo}}" | cut -f1 -d/)
|
||||
repo=$(echo "${{inputs.repo}}" | cut -f2 -d/)
|
||||
tag="${{ inputs.release-tag }}"
|
||||
|
||||
GH_REPO="https://api.github.com/repos/${owner}/${repo}"
|
||||
|
||||
# Check token
|
||||
curl -o /dev/null -sH "$AUTH" $GH_REPO || {
|
||||
echo "Error: Invalid repo, token or network issue!"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check if tag exists
|
||||
response=$(curl -sH "$AUTH" "${GH_REPO}/git/refs/tags/${tag}")
|
||||
eval $(echo "$response" | grep -m 1 "sha.:" | grep -w sha | tr : = | tr -cd '[[:alnum:]]=')
|
||||
[ "$sha" ] || {
|
||||
echo "Error: Tag does not exist: $tag"
|
||||
echo "$response" | awk 'length($0)<100' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Get ID of the release based on given tag name
|
||||
GH_TAGS="${GH_REPO}/releases/tags/${tag}"
|
||||
response=$(curl -sH "$AUTH" $GH_TAGS)
|
||||
eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=')
|
||||
[ "$id" ] || {
|
||||
echo "Error: Could not find release for tag: $tag"
|
||||
echo "$response" | awk 'length($0)<100' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Upload assets
|
||||
for file in $filenames; do
|
||||
if [ -z $asset_name ]; then
|
||||
asset=$(basename $file)
|
||||
else
|
||||
asset=$asset_name
|
||||
fi
|
||||
echo "Uploading asset with name: $asset from file: $file"
|
||||
GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset}"
|
||||
curl -T $file -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET
|
||||
done
|
||||
shell: bash
|
12
.github/actions/win-install-sox/action.yml
vendored
12
.github/actions/win-install-sox/action.yml
vendored
@ -1,12 +0,0 @@
|
||||
name: "Install SoX and add to PATH"
|
||||
description: "Install SoX and add to PATH"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -ex
|
||||
curl -sSLO https://github.com/coqui-ai/STT/releases/download/v0.10.0-alpha.7/sox-14.4.2-win32.zip
|
||||
"C:/Program Files/7-Zip/7z.exe" x -o`pwd`/bin/ -tzip -aoa sox-14.4.2-win32.zip
|
||||
rm sox-*zip
|
||||
echo "`pwd`/bin/sox-14.4.2/" >> $GITHUB_PATH
|
||||
shell: bash
|
77
.github/actions/win-node-build/action.yml
vendored
77
.github/actions/win-node-build/action.yml
vendored
@ -1,77 +0,0 @@
|
||||
name: "NodeJS binding"
|
||||
description: "Binding a nodejs binding"
|
||||
inputs:
|
||||
nodejs_versions:
|
||||
description: "NodeJS versions supported"
|
||||
required: true
|
||||
electronjs_versions:
|
||||
description: "ElectronJS versions supported"
|
||||
required: true
|
||||
local_cflags:
|
||||
description: "CFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_ldflags:
|
||||
description: "LDFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_libs:
|
||||
description: "LIBS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
target:
|
||||
description: "TARGET value"
|
||||
required: false
|
||||
default: "host"
|
||||
chroot:
|
||||
description: "RASPBIAN value"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
node --version
|
||||
npm --version
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
npm update
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
mkdir -p tmp/headers/nodejs tmp/headers/electronjs
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
for node in ${{ inputs.nodejs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${node} \
|
||||
NODE_DEVDIR=--devdir=headers/nodejs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
for electron in ${{ inputs.electronjs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${electron} \
|
||||
NODE_DIST_URL=--disturl=https://electronjs.org/headers \
|
||||
NODE_RUNTIME=--runtime=electron \
|
||||
NODE_DEVDIR=--devdir=headers/electronjs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
make -C native_client/javascript clean npm-pack
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
tar -czf native_client/javascript/wrapper.tar.gz \
|
||||
-C native_client/javascript/ lib/
|
||||
shell: msys2 {0}
|
14
.github/actions/win-numpy-vers/README.md
vendored
14
.github/actions/win-numpy-vers/README.md
vendored
@ -1,14 +0,0 @@
|
||||
GitHub Action to set NumPy versions
|
||||
===================================
|
||||
|
||||
This actions aims at computing correct values for NumPy dependencies:
|
||||
- `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time
|
||||
- `NUMPY_DEP_VERSION`: range of accepted versions for execution time
|
||||
|
||||
Versions are set considering several factors:
|
||||
- API and ABI compatibility ; otherwise we can have the binding wrapper
|
||||
throwing errors like "Illegal instruction", or computing wrong values
|
||||
because of changed memory layout
|
||||
- Wheels availability: for CI and end users, we want to avoid having to
|
||||
rebuild numpy so we stick to versions where there is an existing upstream
|
||||
`wheel` file
|
93
.github/actions/win-numpy-vers/action.yml
vendored
93
.github/actions/win-numpy-vers/action.yml
vendored
@ -1,93 +0,0 @@
|
||||
name: "get numpy versions"
|
||||
description: "Get proper NumPy build and runtime versions dependencies range"
|
||||
inputs:
|
||||
pyver:
|
||||
description: "Python version"
|
||||
required: true
|
||||
outputs:
|
||||
build_version:
|
||||
description: "NumPy build dependency"
|
||||
value: ${{ steps.numpy.outputs.build }}
|
||||
dep_version:
|
||||
description: "NumPy runtime dependency"
|
||||
value: ${{ steps.numpy.outputs.dep }}
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: numpy
|
||||
run: |
|
||||
set -ex
|
||||
NUMPY_BUILD_VERSION="==1.7.0"
|
||||
NUMPY_DEP_VERSION=">=1.7.0"
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
case "${OS}:${ARCH}" in
|
||||
Linux:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
Darwin:*)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.9.0"
|
||||
NUMPY_DEP_VERSION=">=1.9.0"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
${CI_MSYS_VERSION}:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.5*)
|
||||
NUMPY_BUILD_VERSION="==1.11.0"
|
||||
NUMPY_DEP_VERSION=">=1.11.0,<1.12.0"
|
||||
;;
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.12.0"
|
||||
NUMPY_DEP_VERSION=">=1.12.0,<1.14.5"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "::set-output name=build::${NUMPY_BUILD_VERSION}"
|
||||
echo "::set-output name=dep::${NUMPY_DEP_VERSION}"
|
||||
shell: msys2 {0}
|
31
.github/actions/win-python-build/action.yml
vendored
31
.github/actions/win-python-build/action.yml
vendored
@ -1,31 +0,0 @@
|
||||
name: "Python binding"
|
||||
description: "Binding a python binding"
|
||||
inputs:
|
||||
numpy_build:
|
||||
description: "NumPy build dependecy"
|
||||
required: true
|
||||
numpy_dep:
|
||||
description: "NumPy runtime dependecy"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
python3 --version
|
||||
pip3 --version
|
||||
|
||||
PROJECT_NAME="stt"
|
||||
|
||||
NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \
|
||||
NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/python/ \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
|
||||
bindings-clean bindings
|
||||
shell: msys2 {0}
|
35
.github/actions/win-run-tests/action.yml
vendored
35
.github/actions/win-run-tests/action.yml
vendored
@ -1,35 +0,0 @@
|
||||
name: "Tests execution"
|
||||
description: "Running tests"
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to use for running test"
|
||||
required: true
|
||||
model-kind:
|
||||
description: "Running against CI baked or production model"
|
||||
required: true
|
||||
bitrate:
|
||||
description: "Bitrate for testing"
|
||||
required: true
|
||||
chroot:
|
||||
description: "Run using a chroot"
|
||||
required: false
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
build="_tflite"
|
||||
|
||||
model_kind=""
|
||||
if [ "${{ inputs.model-kind }}" = "prod" ]; then
|
||||
model_kind="-prod"
|
||||
fi
|
||||
|
||||
prefix="."
|
||||
if [ ! -z "${{ inputs.chroot }}" ]; then
|
||||
prefix="${{ inputs.chroot }}"
|
||||
fi
|
||||
|
||||
${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }}
|
||||
shell: msys2 {0}
|
15
.github/pull_request_template.md
vendored
15
.github/pull_request_template.md
vendored
@ -1,15 +0,0 @@
|
||||
# Pull request guidelines
|
||||
|
||||
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
|
||||
|
||||
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
|
||||
|
||||
In order to make a good pull request, please see our [CONTRIBUTING.rst](CONTRIBUTING.rst) file, in particular make sure you have set-up and run the pre-commit hook to check your changes for code style violations.
|
||||
|
||||
Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT).
|
||||
|
||||
This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT):
|
||||
|
||||
- Protects you, Coqui, and the users of the code.
|
||||
- Does not change your rights to use your contributions for any purpose.
|
||||
- Does not change the license of the 🐸STT project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute.
|
3590
.github/workflows/build-and-test.yml
vendored
3590
.github/workflows/build-and-test.yml
vendored
File diff suppressed because it is too large
Load Diff
32
.github/workflows/lint.yml
vendored
32
.github/workflows/lint.yml
vendored
@ -1,32 +0,0 @@
|
||||
name: "Lints"
|
||||
on:
|
||||
pull_request:
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
jobs:
|
||||
training-unittests:
|
||||
name: "Lin|Training unittests"
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
pyver: [3.6, 3.7]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
- name: Run training unittests
|
||||
run: |
|
||||
./ci_scripts/train-unittests.sh
|
||||
pre-commit-checks:
|
||||
name: "Lin|Pre-commit checks"
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
- name: Run pre-comit checks
|
||||
run: |
|
||||
python .pre-commit-2.11.1.pyz run --all-files
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -32,9 +32,10 @@
|
||||
/doc/.build/
|
||||
/doc/xml-c/
|
||||
/doc/xml-java/
|
||||
Dockerfile.build
|
||||
Dockerfile.train
|
||||
doc/xml-c
|
||||
doc/xml-java
|
||||
doc/xml-dotnet
|
||||
convert_graphdef_memmapped_format
|
||||
native_client/swift/deepspeech_ios.framework/deepspeech_ios
|
||||
.github/actions/check_artifact_exists/node_modules/
|
||||
|
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -1,10 +1,10 @@
|
||||
[submodule "doc/examples"]
|
||||
path = doc/examples
|
||||
url = https://github.com/coqui-ai/STT-examples.git
|
||||
url = https://github.com/mozilla/DeepSpeech-examples.git
|
||||
branch = master
|
||||
[submodule "tensorflow"]
|
||||
path = tensorflow
|
||||
url = https://bics.ga/experiments/STT-tensorflow.git
|
||||
url = https://github.com/mozilla/tensorflow.git
|
||||
[submodule "kenlm"]
|
||||
path = kenlm
|
||||
url = https://github.com/kpu/kenlm
|
||||
|
@ -1,2 +1,4 @@
|
||||
[settings]
|
||||
profile=black
|
||||
line_length=80
|
||||
multi_line_output=3
|
||||
default_section=FIRSTPARTY
|
Binary file not shown.
@ -1,24 +0,0 @@
|
||||
exclude: '^(taskcluster|.github|native_client/kenlm|native_client/ctcdecode/third_party|tensorflow|kenlm|doc/examples|data/alphabet.txt|data/smoke_test)'
|
||||
repos:
|
||||
- repo: 'https://github.com/pre-commit/pre-commit-hooks'
|
||||
rev: v2.3.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: 'https://github.com/psf/black'
|
||||
rev: 20.8b1
|
||||
hooks:
|
||||
- id: black
|
||||
language_version: python3
|
||||
# - repo: https://github.com/pycqa/isort
|
||||
# rev: 5.8.0
|
||||
# hooks:
|
||||
# - id: isort
|
||||
# name: isort (python)
|
||||
# - id: isort
|
||||
# name: isort (cython)
|
||||
# types: [cython]
|
||||
# - id: isort
|
||||
# name: isort (pyi)
|
||||
# types: [pyi]
|
155
.pylintrc
155
.pylintrc
@ -3,22 +3,14 @@
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code.
|
||||
extension-pkg-allow-list=
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
|
||||
# for backward compatibility.)
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Specify a score threshold to be exceeded before program exits with error.
|
||||
fail-under=10.0
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=native_client/kenlm
|
||||
|
||||
# Files or directories to be skipped. They should be base names, not paths.
|
||||
ignore=CVS
|
||||
|
||||
# Files or directories matching the regex patterns are skipped. The regex
|
||||
# matches against base names, not paths.
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
@ -34,13 +26,16 @@ jobs=1
|
||||
# complex, nested conditions.
|
||||
limit-inference-results=100
|
||||
|
||||
# List of plugins (as comma separated values of python module names) to load,
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages.
|
||||
suggestion-mode=yes
|
||||
@ -65,7 +60,16 @@ confidence=
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=print-statement,
|
||||
disable=missing-docstring,
|
||||
line-too-long,
|
||||
wrong-import-order,
|
||||
ungrouped-imports,
|
||||
wrong-import-position,
|
||||
import-error,
|
||||
no-name-in-module,
|
||||
no-member,
|
||||
unsubscriptable-object,
|
||||
print-statement,
|
||||
parameter-unpacking,
|
||||
unpacking-in-except,
|
||||
old-raise-syntax,
|
||||
@ -83,6 +87,12 @@ disable=print-statement,
|
||||
useless-suppression,
|
||||
deprecated-pragma,
|
||||
use-symbolic-message-instead,
|
||||
useless-object-inheritance,
|
||||
too-few-public-methods,
|
||||
too-many-branches,
|
||||
too-many-arguments,
|
||||
too-many-locals,
|
||||
too-many-statements,
|
||||
apply-builtin,
|
||||
basestring-builtin,
|
||||
buffer-builtin,
|
||||
@ -143,8 +153,7 @@ disable=print-statement,
|
||||
xreadlines-attribute,
|
||||
deprecated-sys-function,
|
||||
exception-escape,
|
||||
comprehension-escape,
|
||||
format
|
||||
comprehension-escape
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
@ -155,11 +164,11 @@ enable=c-extension-no-member
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Python expression which should return a score less than or equal to 10. You
|
||||
# have access to the variables 'error', 'warning', 'refactor', and 'convention'
|
||||
# which contain the number of messages in each category, as well as 'statement'
|
||||
# which is the total number of statements analyzed. This score is used by the
|
||||
# global evaluation report (RP0004).
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
@ -187,13 +196,13 @@ max-nested-blocks=5
|
||||
# inconsistent-return-statements if a never returning function is called then
|
||||
# it will be considered as an explicit return statement and no message will be
|
||||
# printed.
|
||||
never-returning-functions=sys.exit,argparse.parse_error
|
||||
never-returning-functions=sys.exit
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# The type of string formatting that logging methods do. `old` means using %
|
||||
# formatting, `new` is for `{}` formatting.
|
||||
# Format style used to check logging format string. `old` means using %
|
||||
# formatting, while `new` is for `{}` formatting.
|
||||
logging-format-style=old
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
@ -206,22 +215,18 @@ logging-modules=logging
|
||||
# Limits count of emitted suggestions for spelling mistakes.
|
||||
max-spelling-suggestions=4
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it work,
|
||||
# install the 'python-enchant' package.
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it working
|
||||
# install python-enchant package..
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should be considered directives if they
|
||||
# appear and the beginning of a comment and should not be checked.
|
||||
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains the private dictionary; one word per line.
|
||||
# A path to a file that contains private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to the private dictionary (see the
|
||||
# --spelling-private-dict-file option) instead of raising a message.
|
||||
# Tells whether to store unknown words to indicated private dictionary in
|
||||
# --spelling-private-dict-file option instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
@ -232,9 +237,6 @@ notes=FIXME,
|
||||
XXX,
|
||||
TODO
|
||||
|
||||
# Regular expression of note tags to take in consideration.
|
||||
#notes-rgx=
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
@ -271,7 +273,7 @@ ignored-classes=optparse.Values,thread._local,_thread._local
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis). It
|
||||
# and thus existing member attributes cannot be deduced by static analysis. It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
@ -287,9 +289,6 @@ missing-member-hint-distance=1
|
||||
# showing a hint for a missing member.
|
||||
missing-member-max-choices=1
|
||||
|
||||
# List of decorators that change the signature of a decorated function.
|
||||
signature-mutators=
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
@ -300,9 +299,6 @@ additional-builtins=
|
||||
# Tells whether unused global variables should be treated as a violation.
|
||||
allow-global-unused-variables=yes
|
||||
|
||||
# List of names allowed to shadow builtins
|
||||
allowed-redefined-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,
|
||||
@ -345,6 +341,13 @@ max-line-length=100
|
||||
# Maximum number of lines in a module.
|
||||
max-module-lines=1000
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict-
|
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
|
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
|
||||
# `empty-line` allows space-only lines.
|
||||
no-space-check=trailing-comma,
|
||||
dict-separator
|
||||
|
||||
# Allow the body of a class to be on the same line as the declaration if body
|
||||
# contains single statement.
|
||||
single-line-class-stmt=no
|
||||
@ -376,7 +379,7 @@ argument-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct argument names. Overrides argument-
|
||||
# naming-style.
|
||||
#argument-rgx=
|
||||
argument-rgx=[a-z_][a-z0-9_]{0,30}$
|
||||
|
||||
# Naming style matching correct attribute names.
|
||||
attr-naming-style=snake_case
|
||||
@ -386,16 +389,7 @@ attr-naming-style=snake_case
|
||||
#attr-rgx=
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma.
|
||||
bad-names=foo,
|
||||
bar,
|
||||
baz,
|
||||
toto,
|
||||
tutu,
|
||||
tata
|
||||
|
||||
# Bad variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be refused
|
||||
bad-names-rgxs=
|
||||
bad-names=
|
||||
|
||||
# Naming style matching correct class attribute names.
|
||||
class-attribute-naming-style=any
|
||||
@ -404,13 +398,6 @@ class-attribute-naming-style=any
|
||||
# attribute-naming-style.
|
||||
#class-attribute-rgx=
|
||||
|
||||
# Naming style matching correct class constant names.
|
||||
class-const-naming-style=UPPER_CASE
|
||||
|
||||
# Regular expression matching correct class constant names. Overrides class-
|
||||
# const-naming-style.
|
||||
#class-const-rgx=
|
||||
|
||||
# Naming style matching correct class names.
|
||||
class-naming-style=PascalCase
|
||||
|
||||
@ -440,14 +427,11 @@ function-naming-style=snake_case
|
||||
good-names=i,
|
||||
j,
|
||||
k,
|
||||
x,
|
||||
ex,
|
||||
Run,
|
||||
_
|
||||
|
||||
# Good variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be accepted
|
||||
good-names-rgxs=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name.
|
||||
include-naming-hint=no
|
||||
|
||||
@ -490,26 +474,19 @@ variable-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct variable names. Overrides variable-
|
||||
# naming-style.
|
||||
#variable-rgx=
|
||||
variable-rgx=[a-z_][a-z0-9_]{0,30}$
|
||||
|
||||
|
||||
[STRING]
|
||||
|
||||
# This flag controls whether inconsistent-quotes generates a warning when the
|
||||
# character used as a quote delimiter is used inconsistently within a module.
|
||||
check-quote-consistency=no
|
||||
|
||||
# This flag controls whether the implicit-str-concat should generate a warning
|
||||
# on implicit string concatenation in sequences defined over several lines.
|
||||
# This flag controls whether the implicit-str-concat-in-sequence should
|
||||
# generate a warning on implicit string concatenation in sequences defined over
|
||||
# several lines.
|
||||
check-str-concat-over-line-jumps=no
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# List of modules that can be imported at any level, not just the top level
|
||||
# one.
|
||||
allow-any-import-level=
|
||||
|
||||
# Allow wildcard imports from modules that define __all__.
|
||||
allow-wildcard-with-all=no
|
||||
|
||||
@ -521,17 +498,16 @@ analyse-fallback-blocks=no
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
deprecated-modules=optparse,tkinter.tix
|
||||
|
||||
# Output a graph (.gv or any supported image format) of external dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
ext-import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of all (i.e. internal and
|
||||
# external) dependencies to the given file (report RP0402 must not be
|
||||
# disabled).
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled).
|
||||
import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of internal dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
@ -541,20 +517,13 @@ known-standard-library=
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Couples of modules and preferred modules, separated by a comma.
|
||||
preferred-modules=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# Warn about protected attribute access inside special methods
|
||||
check-protected-access-in-special-methods=no
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,
|
||||
__new__,
|
||||
setUp,
|
||||
__post_init__
|
||||
setUp
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
@ -579,7 +548,7 @@ max-args=5
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Maximum number of boolean expressions in an if statement (see R0916).
|
||||
# Maximum number of boolean expressions in an if statement.
|
||||
max-bool-expr=5
|
||||
|
||||
# Maximum number of branch for function / method body.
|
||||
|
@ -14,4 +14,4 @@ sphinx:
|
||||
python:
|
||||
version: 3.7
|
||||
install:
|
||||
- requirements: doc/requirements.txt
|
||||
- requirements: taskcluster/docs-requirements.txt
|
||||
|
65
.taskcluster.yml
Normal file
65
.taskcluster.yml
Normal file
@ -0,0 +1,65 @@
|
||||
# The version is always required
|
||||
version: 0
|
||||
# Top level metadata is always required
|
||||
metadata:
|
||||
name: "DeepSpeech"
|
||||
description: "DeepSpeech builds"
|
||||
owner: "{{ event.head.user.email }}" # the user who sent the pr/push e-mail will be inserted here
|
||||
source: "{{ event.head.repo.url }}" # the repo where the pr came from will be inserted here
|
||||
tasks:
|
||||
- provisionerId: "proj-deepspeech"
|
||||
workerType: "ci-decision-task"
|
||||
extra:
|
||||
github:
|
||||
env: true
|
||||
events:
|
||||
- pull_request.opened
|
||||
- pull_request.synchronize
|
||||
- pull_request.reopened
|
||||
- push
|
||||
- tag
|
||||
branches:
|
||||
- r0.9
|
||||
|
||||
scopes: [
|
||||
"queue:create-task:highest:proj-deepspeech/*",
|
||||
"queue:route:index.project.deepspeech.*",
|
||||
"index:insert-task:project.deepspeech.*",
|
||||
"queue:scheduler-id:taskcluster-github",
|
||||
"generic-worker:cache:deepspeech-macos-pyenv",
|
||||
"docker-worker:capability:device:kvm"
|
||||
]
|
||||
|
||||
payload:
|
||||
maxRunTime: 600
|
||||
image: "ubuntu:18.04"
|
||||
|
||||
features:
|
||||
taskclusterProxy: true
|
||||
|
||||
env:
|
||||
TC_DECISION_SHA: ef67832e6657f43e139a10f37eb326a7d9d96dad
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
- "--login"
|
||||
- "-cxe"
|
||||
- >
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ bionic-updates main" > /etc/apt/sources.list.d/bionic-updates.list &&
|
||||
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
|
||||
adduser --system --home /home/build-user build-user &&
|
||||
cd /home/build-user/ &&
|
||||
echo -e "#!/bin/bash\nset -xe\nenv && id && mkdir ~/DeepSpeech/ && git clone --quiet {{event.head.repo.url}} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet {{event.head.sha}}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
|
||||
sudo -H -u build-user /bin/bash /tmp/clone.sh &&
|
||||
sudo -H -u build-user --preserve-env /bin/bash /home/build-user/DeepSpeech/ds/taskcluster/tc-schedule.sh
|
||||
artifacts:
|
||||
"public":
|
||||
type: "directory"
|
||||
path: "/tmp/artifacts/"
|
||||
expires: "{{ '7 days' | $fromNow }}"
|
||||
# Each task also requires explicit metadata
|
||||
metadata:
|
||||
name: "DeepSpeech Decision Task"
|
||||
description: "DeepSpeech Decision Task: triggers everything."
|
||||
owner: "{{ event.head.user.email }}"
|
||||
source: "{{ event.head.repo.url }}"
|
@ -1,102 +0,0 @@
|
||||
version: 1
|
||||
policy:
|
||||
pullRequests: collaborators_quiet
|
||||
tasks:
|
||||
$let:
|
||||
metadata:
|
||||
task_id: {$eval: as_slugid("decision_task")}
|
||||
github:
|
||||
$if: 'tasks_for == "github-pull-request"'
|
||||
then:
|
||||
action: "pull_request.${event.action}"
|
||||
login: ${event.pull_request.user.login}
|
||||
ref: ${event.pull_request.head.ref}
|
||||
branch: ${event.pull_request.head.ref}
|
||||
tag: ""
|
||||
sha: ${event.pull_request.head.sha}
|
||||
clone_url: ${event.pull_request.head.repo.clone_url}
|
||||
else:
|
||||
action:
|
||||
$if: 'event.ref[:10] == "refs/tags/"'
|
||||
then: "tag"
|
||||
else: "push"
|
||||
login: ${event.pusher.name}
|
||||
ref: ${event.ref}
|
||||
branch:
|
||||
$if: 'event.ref[:11] == "refs/heads/"'
|
||||
then: ${event.ref[11:]}
|
||||
else: ""
|
||||
tag:
|
||||
$if: 'event.ref[:10] == "refs/tags/"'
|
||||
then: ${event.ref[10:]}
|
||||
else: ""
|
||||
sha: ${event.after}
|
||||
clone_url: ${event.repository.clone_url}
|
||||
in:
|
||||
$let:
|
||||
decision_task:
|
||||
taskId: ${metadata.task_id}
|
||||
created: {$fromNow: ''}
|
||||
deadline: {$fromNow: '60 minutes'}
|
||||
|
||||
provisionerId: "proj-deepspeech"
|
||||
workerType: "ci-decision-task"
|
||||
|
||||
scopes: [
|
||||
"queue:create-task:highest:proj-deepspeech/*",
|
||||
"queue:route:index.project.deepspeech.*",
|
||||
"index:insert-task:project.deepspeech.*",
|
||||
"queue:scheduler-id:taskcluster-github",
|
||||
"generic-worker:cache:deepspeech-macos-pyenv",
|
||||
"docker-worker:capability:device:kvm"
|
||||
]
|
||||
|
||||
payload:
|
||||
maxRunTime: 600
|
||||
image: "ubuntu:18.04"
|
||||
|
||||
features:
|
||||
taskclusterProxy: true
|
||||
|
||||
env:
|
||||
TASK_ID: ${metadata.task_id}
|
||||
GITHUB_HEAD_USER_LOGIN: ${metadata.github.login}
|
||||
GITHUB_HEAD_USER_EMAIL: ${metadata.github.login}@users.noreply.github.com
|
||||
GITHUB_EVENT: ${metadata.github.action}
|
||||
GITHUB_HEAD_REPO_URL: ${metadata.github.clone_url}
|
||||
GITHUB_HEAD_BRANCH: ${metadata.github.branch}
|
||||
GITHUB_HEAD_TAG: ${metadata.github.tag}
|
||||
GITHUB_HEAD_REF: ${metadata.github.ref}
|
||||
GITHUB_HEAD_SHA: ${metadata.github.sha}
|
||||
|
||||
command:
|
||||
- "/bin/bash"
|
||||
- "--login"
|
||||
- "-cxe"
|
||||
- >
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ bionic-updates main" > /etc/apt/sources.list.d/bionic-updates.list &&
|
||||
apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo &&
|
||||
adduser --system --home /home/build-user build-user &&
|
||||
cd /home/build-user/ &&
|
||||
echo -e "#!/bin/bash\nset -xe\nenv && id && mkdir ~/DeepSpeech/ && git clone --quiet ${metadata.github.clone_url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${metadata.github.ref}" > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
|
||||
sudo -H -u build-user /bin/bash /tmp/clone.sh &&
|
||||
sudo -H -u build-user --preserve-env /bin/bash /home/build-user/DeepSpeech/ds/taskcluster/tc-schedule.sh
|
||||
artifacts:
|
||||
"public":
|
||||
type: "directory"
|
||||
path: "/tmp/artifacts/"
|
||||
expires: {$fromNow: '7 days'}
|
||||
|
||||
metadata:
|
||||
name: "DeepSpeech decision task"
|
||||
description: "DeepSpeech decision task"
|
||||
owner: "${metadata.github.login}@users.noreply.github.com"
|
||||
source: "${metadata.github.clone_url}"
|
||||
in:
|
||||
$flattenDeep:
|
||||
- $if: 'tasks_for == "github-pull-request" && event["action"] in ["opened", "reopened", "synchronize"]'
|
||||
then: {$eval: decision_task}
|
||||
- $if: 'tasks_for == "github-push" && event.ref == "refs/heads/master"'
|
||||
then: {$eval: decision_task}
|
||||
- $if: 'tasks_for == "github-push" && event.ref[:10] == "refs/tags/"'
|
||||
then: {$eval: decision_task}
|
@ -1,18 +1,19 @@
|
||||
This file contains a list of papers in chronological order that have been published using 🐸STT.
|
||||
This file contains a list of papers in chronological order that have been published
|
||||
using DeepSpeech.
|
||||
|
||||
To appear
|
||||
==========
|
||||
|
||||
* Raghuveer Peri, Haoqi Li, Krishna Somandepalli, Arindam Jati, Shrikanth Narayanan (2020) "An empirical analysis of information encoded in disentangled neural speaker representations".
|
||||
* Raghuveer Peri, Haoqi Li, Krishna Somandepalli, Arindam Jati, Shrikanth Narayanan (2020) "An empirical analysis of information encoded in disentangled neural speaker representations".
|
||||
* Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M. Tyers, and Gregor Weber (2020) "Common Voice: A Massively-Multilingual Speech Corpus".
|
||||
|
||||
Published
|
||||
Published
|
||||
==========
|
||||
|
||||
2020
|
||||
----------
|
||||
|
||||
* Nils Hjortnaes, Niko Partanen, Michael Rießler and Francis M. Tyers (2020)
|
||||
* Nils Hjortnaes, Niko Partanen, Michael Rießler and Francis M. Tyers (2020)
|
||||
"Towards a Speech Recognizer for Komi, an Endangered and Low-Resource Uralic Language". *Proceedings of the 6th International Workshop on Computational Linguistics of Uralic Languages*.
|
||||
|
||||
```
|
||||
@ -72,5 +73,5 @@ Published
|
||||
booktitle = {2018 IEEE/ACM Machine Learning in HPC Environments (MLHPC)},
|
||||
doi = {https://doi.org/10.1109/MLHPC.2018.8638637}
|
||||
year = 2018
|
||||
}
|
||||
}
|
||||
```
|
||||
|
@ -1,132 +1,15 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
# Community Participation Guidelines
|
||||
|
||||
## Our Pledge
|
||||
This repository is governed by Mozilla's code of conduct and etiquette guidelines.
|
||||
For more details, please read the
|
||||
[Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual identity
|
||||
and orientation.
|
||||
## How to Report
|
||||
For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
<!--
|
||||
## Project Specific Etiquette
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the
|
||||
overall community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or
|
||||
advances of any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email
|
||||
address, without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement by emailing
|
||||
[coc-report@coqui.ai](mailto:coc-report@coqui.ai).
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series
|
||||
of actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or
|
||||
permanent ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within
|
||||
the community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.0, available at
|
||||
[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
|
||||
at [https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
In some cases, there will be additional project etiquette i.e.: (https://bugzilla.mozilla.org/page.cgi?id=etiquette.html).
|
||||
Please update for your project.
|
||||
-->
|
||||
|
@ -1,22 +1,14 @@
|
||||
Coqui STT code owners / governance system
|
||||
=========================================
|
||||
DeepSpeech code owners
|
||||
======================
|
||||
|
||||
🐸STT is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system <https://www.mozilla.org/about/governance/policies/module-ownership/>`_. The project is roughly divided into modules, and each module has its own owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project.
|
||||
This file describes reviewers who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
|
||||
|
||||
Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their own owners.
|
||||
There's overlap in the areas of expertise of each reviewer, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate question.
|
||||
|
||||
Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interests of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We don’t ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldn’t go on very often or for very long or escalation and review is likely.
|
||||
|
||||
The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole.
|
||||
|
||||
This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
|
||||
|
||||
There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person.
|
||||
|
||||
Global owners
|
||||
Global reviewers
|
||||
----------------
|
||||
|
||||
These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision.
|
||||
These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other reviewers, global reviewers will make a final decision.
|
||||
|
||||
- Alexandre Lissy (@lissyx)
|
||||
- Reuben Morais (@reuben)
|
||||
@ -46,7 +38,7 @@ Testing & CI
|
||||
Native inference client
|
||||
-----------------------
|
||||
|
||||
Everything that goes into libstt.so and is not specifically covered in another area fits here.
|
||||
Everything that goes into libdeepspeech.so and is not specifically covered in another area fits here.
|
||||
|
||||
- Alexandre Lissy (@lissyx)
|
||||
- Reuben Morais (@reuben)
|
||||
@ -110,7 +102,7 @@ Documentation
|
||||
- Alexandre Lissy (@lissyx)
|
||||
- Reuben Morais (@reuben)
|
||||
|
||||
.. Third party bindings
|
||||
--------------------
|
||||
Third party bindings
|
||||
--------------------
|
||||
|
||||
Hosted externally and owned by the individual authors. See the `list of third-party bindings <https://stt.readthedocs.io/en/latest/ USING.html#third-party-bindings>`_ for more info.
|
||||
Hosted externally and owned by the individual authors. See the `list of third-party bindings <https://deepspeech.readthedocs.io/en/master/USING.html#third-party-bindings>`_ for more info.
|
||||
|
@ -1,32 +1,37 @@
|
||||
Contribution guidelines
|
||||
=======================
|
||||
|
||||
Welcome to the 🐸STT project! We are excited to see your interest, and appreciate your support!
|
||||
Welcome to the DeepSpeech project! We are excited to see your interest, and appreciate your support!
|
||||
|
||||
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the `CODE_OF_CONDUCT.md <CODE_OF_CONDUCT.md>`_.
|
||||
This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the `Mozilla Community Participation Guidelines <https://www.mozilla.org/about/governance/policies/participation/>`_.
|
||||
|
||||
How to Make a Good Pull Request
|
||||
-------------------------------
|
||||
|
||||
Here's some guidelines on how to make a good PR to 🐸STT.
|
||||
Here's some guidelines on how to make a good PR to DeepSpeech.
|
||||
|
||||
Bug-fix PR
|
||||
^^^^^^^^^^
|
||||
|
||||
You've found a bug and you were able to squash it! Great job! Please write a short but clear commit message describing the bug, and how you fixed it. This makes review much easier. Also, please name your branch something related to the bug-fix.
|
||||
|
||||
Documentation PR
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
If you're just making updates or changes to the documentation, there's no need to run all of DeepSpeech's tests for Continuous Integration (i.e. Taskcluster tests). In this case, at the end of your short but clear commit message, you should add **X-DeepSpeech: NOBUILD**. This will trigger the CI tests to skip your PR, saving both time and compute.
|
||||
|
||||
New Feature PR
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
You've made some core changes to 🐸STT, and you would like to share them back with the community -- great! First things first: if you're planning to add a feature (not just fix a bug or docs) let the 🐸STT team know ahead of time and get some feedback early. A quick check-in with the team can save time during code-review, and also ensure that your new feature fits into the project.
|
||||
You've made some core changes to DeepSpeech, and you would like to share them back with the community -- great! First things first: if you're planning to add a feature (not just fix a bug or docs) let the DeepSpeech team know ahead of time and get some feedback early. A quick check-in with the team can save time during code-review, and also ensure that your new feature fits into the project.
|
||||
|
||||
The 🐸STT codebase is made of many connected parts. There is Python code for training 🐸STT, core C++ code for running inference on trained models, and multiple language bindings to the C++ core so you can use 🐸STT in your favorite language.
|
||||
The DeepSpeech codebase is made of many connected parts. There is Python code for training DeepSpeech, core C++ code for running inference on trained models, and multiple language bindings to the C++ core so you can use DeepSpeech in your favorite language.
|
||||
|
||||
Whenever you add a new feature to 🐸STT and what to contribute that feature back to the project, here are some things to keep in mind:
|
||||
Whenever you add a new feature to DeepSpeech and what to contribute that feature back to the project, here are some things to keep in mind:
|
||||
|
||||
1. You've made changes to the core C++ code. Core changes can have downstream effects on all parts of the 🐸STT project, so keep that in mind. You should minimally also make necessary changes to the C client (i.e. **args.h** and **client.cc**). The bindings for Python, Java, and Javascript are SWIG generated, and in the best-case scenario you won't have to worry about them. However, if you've added a whole new feature, you may need to make custom tweaks to those bindings, because SWIG may not automagically work with your new feature, especially if you've exposed new arguments. The bindings for .NET and Swift are not generated automatically. It would be best if you also made the necessary manual changes to these bindings as well. It is best to communicate with the core 🐸STT team and come to an understanding of where you will likely need to work with the bindings. They can't predict all the bugs you will run into, but they will have a good idea of how to plan for some obvious challenges.
|
||||
1. You've made changes to the core C++ code. Core changes can have downstream effects on all parts of the DeepSpeech project, so keep that in mind. You should minimally also make necessary changes to the C client (i.e. **args.h** and **client.cc**). The bindings for Python, Java, and Javascript are SWIG generated, and in the best-case scenario you won't have to worry about them. However, if you've added a whole new feature, you may need to make custom tweaks to those bindings, because SWIG may not automagically work with your new feature, especially if you've exposed new arguments. The bindings for .NET and Swift are not generated automatically. It would be best if you also made the necessary manual changes to these bindings as well. It is best to communicate with the core DeepSpeech team and come to an understanding of where you will likely need to work with the bindings. They can't predict all the bugs you will run into, but they will have a good idea of how to plan for some obvious challenges.
|
||||
2. You've made changes to the Python code. Make sure you run a linter (described below).
|
||||
3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the 🐸STT decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find `perf <https://perf.wiki.kernel.org/index.php/Main_Page>`_ to be a useful tool. You can use sample WAV files for testing which are provided in the `STT/data/` directory.
|
||||
3. Make sure your new feature doesn't regress the project. If you've added a significant feature or amount of code, you want to be sure your new feature doesn't create performance issues. For example, if you've made a change to the DeepSpeech decoder, you should know that inference performance doesn't drop in terms of latency, accuracy, or memory usage. Unless you're proposing a new decoding algorithm, you probably don't have to worry about affecting accuracy. However, it's very possible you've affected latency or memory usage. You should run local performance tests to make sure no bugs have crept in. There are lots of tools to check latency and memory usage, and you should use what is most comfortable for you and gets the job done. If you're on Linux, you might find [[perf](https://perf.wiki.kernel.org/index.php/Main_Page)] to be a useful tool. You can use sample WAV files for testing which are provided in the `DeepSpeech/data/` directory.
|
||||
|
||||
Requesting review on your PR
|
||||
----------------------------
|
||||
@ -34,14 +39,54 @@ Requesting review on your PR
|
||||
Generally, a code owner will be notified of your pull request and will either review it or ask some other code owner for their review. If you'd like to proactively request review as you open the PR, see the the CODE_OWNERS.rst file which describes who's an appropriate reviewer depending on which parts of the code you're changing.
|
||||
|
||||
|
||||
Code linting
|
||||
------------
|
||||
Python Linter
|
||||
-------------
|
||||
|
||||
We use `pre-commit <https://pre-commit.com/>`_ to manage pre-commit hooks that take care of checking your changes for code style violations. Before committing changes, make sure you have the hook installed in your setup by running, in the virtual environment you use for running the code:
|
||||
Before making a Pull Request for Python code changes, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the changed code, you can use the follow command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd STT
|
||||
python .pre-commit-2.11.1.pyz install
|
||||
pip install pylint cardboardlint
|
||||
cardboardlinter --refspec master
|
||||
|
||||
This will compare the code against master and run the linter on all the changes. We plan to introduce more linter checks (e.g. for C++) in the future. To run it automatically as a git pre-commit hook, do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cat <<\EOF > .git/hooks/pre-commit
|
||||
#!/bin/bash
|
||||
if [ ! -x "$(command -v cardboardlinter)" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# First, stash index and work dir, keeping only the
|
||||
# to-be-committed changes in the working directory.
|
||||
echo "Stashing working tree changes..." 1>&2
|
||||
old_stash=$(git rev-parse -q --verify refs/stash)
|
||||
git stash save -q --keep-index
|
||||
new_stash=$(git rev-parse -q --verify refs/stash)
|
||||
|
||||
# If there were no changes (e.g., `--amend` or `--allow-empty`)
|
||||
# then nothing was stashed, and we should skip everything,
|
||||
# including the tests themselves. (Presumably the tests passed
|
||||
# on the previous commit, so there is no need to re-run them.)
|
||||
if [ "$old_stash" = "$new_stash" ]; then
|
||||
echo "No changes, skipping lint." 1>&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Run tests
|
||||
cardboardlinter --refspec HEAD -n auto
|
||||
status=$?
|
||||
|
||||
# Restore changes
|
||||
echo "Restoring working tree changes..." 1>&2
|
||||
git reset --hard -q && git stash apply --index -q && git stash drop -q
|
||||
|
||||
# Exit with status from test-run: nonzero prevents commit
|
||||
exit $status
|
||||
EOF
|
||||
chmod +x .git/hooks/pre-commit
|
||||
|
||||
This will run the linters on just the changes made in your commit.
|
||||
|
||||
This will install a git pre-commit hook which will check your commits and let you know about any style violations that need fixing.
|
||||
|
12
DeepSpeech.py
Executable file
12
DeepSpeech.py
Executable file
@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
from deepspeech_training import train as ds_train
|
||||
except ImportError:
|
||||
print('Training package is not installed. See training documentation.')
|
||||
raise
|
||||
|
||||
ds_train.run_script()
|
@ -1,8 +1,11 @@
|
||||
# Please refer to the USING documentation, "Dockerfile for building from source"
|
||||
|
||||
# Need devel version cause we need /usr/include/cudnn.h
|
||||
# Need devel version cause we need /usr/include/cudnn.h
|
||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||
|
||||
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
|
||||
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
|
||||
# >> START Install base software
|
||||
|
||||
# Get basic packages
|
||||
@ -61,7 +64,7 @@ ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/"
|
||||
ENV TF_CUDA_VERSION 10.1
|
||||
ENV TF_CUDNN_VERSION 7.6
|
||||
ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
|
||||
ENV TF_NCCL_VERSION 2.8
|
||||
ENV TF_NCCL_VERSION 2.7
|
||||
|
||||
# Common Environment Setup
|
||||
ENV TF_BUILD_CONTAINER_TYPE GPU
|
||||
@ -109,11 +112,16 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
|
||||
# << END Configure Bazel
|
||||
|
||||
WORKDIR /
|
||||
COPY . /STT/
|
||||
|
||||
RUN git clone --recursive $DEEPSPEECH_REPO DeepSpeech
|
||||
WORKDIR /DeepSpeech
|
||||
RUN git checkout $DEEPSPEECH_SHA
|
||||
RUN git submodule sync tensorflow/
|
||||
RUN git submodule update --init tensorflow/
|
||||
|
||||
# >> START Build and bind
|
||||
|
||||
WORKDIR /STT/tensorflow
|
||||
WORKDIR /DeepSpeech/tensorflow
|
||||
|
||||
# Fix for not found script https://github.com/tensorflow/tensorflow/issues/471
|
||||
RUN ./configure
|
||||
@ -124,12 +132,14 @@ RUN ./configure
|
||||
|
||||
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
|
||||
|
||||
# Build STT
|
||||
|
||||
# Build DeepSpeech
|
||||
RUN bazel build \
|
||||
--verbose_failures \
|
||||
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
|
||||
--config=monolithic \
|
||||
--config=cuda \
|
||||
-c opt \
|
||||
--copt=-O3 \
|
||||
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
|
||||
--copt=-mtune=generic \
|
||||
--copt=-march=x86-64 \
|
||||
--copt=-msse \
|
||||
@ -138,26 +148,24 @@ RUN bazel build \
|
||||
--copt=-msse4.1 \
|
||||
--copt=-msse4.2 \
|
||||
--copt=-mavx \
|
||||
--config=noaws \
|
||||
--config=nogcp \
|
||||
--config=nohdfs \
|
||||
--config=nonccl \
|
||||
//native_client:libstt.so
|
||||
--copt=-fvisibility=hidden \
|
||||
//native_client:libdeepspeech.so \
|
||||
--verbose_failures \
|
||||
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
|
||||
|
||||
# Copy built libs to /STT/native_client
|
||||
RUN cp bazel-bin/native_client/libstt.so /STT/native_client/
|
||||
# Copy built libs to /DeepSpeech/native_client
|
||||
RUN cp bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
|
||||
|
||||
# Build client.cc and install Python client and decoder bindings
|
||||
ENV TFDIR /STT/tensorflow
|
||||
ENV TFDIR /DeepSpeech/tensorflow
|
||||
|
||||
RUN nproc
|
||||
|
||||
WORKDIR /STT/native_client
|
||||
RUN make NUM_PROCESSES=$(nproc) stt
|
||||
WORKDIR /DeepSpeech/native_client
|
||||
RUN make NUM_PROCESSES=$(nproc) deepspeech
|
||||
|
||||
WORKDIR /STT
|
||||
WORKDIR /DeepSpeech
|
||||
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install -U pip setuptools wheel
|
||||
RUN pip3 install --upgrade native_client/python/dist/*.whl
|
||||
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
@ -168,8 +176,8 @@ RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||
# Allow Python printing utf-8
|
||||
ENV PYTHONIOENCODING UTF-8
|
||||
|
||||
# Build KenLM in /STT/native_client/kenlm folder
|
||||
WORKDIR /STT/native_client
|
||||
# Build KenLM in /DeepSpeech/native_client/kenlm folder
|
||||
WORKDIR /DeepSpeech/native_client
|
||||
RUN rm -rf kenlm && \
|
||||
git clone https://github.com/kpu/kenlm && \
|
||||
cd kenlm && \
|
||||
@ -180,4 +188,4 @@ RUN rm -rf kenlm && \
|
||||
make -j $(nproc)
|
||||
|
||||
# Done
|
||||
WORKDIR /STT
|
||||
WORKDIR /DeepSpeech
|
@ -1,97 +0,0 @@
|
||||
# This is a Dockerfile useful for training models with Coqui STT.
|
||||
# You can train "acoustic models" with audio + Tensorflow, and
|
||||
# you can create "scorers" with text + KenLM.
|
||||
|
||||
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3 AS kenlm-build
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential cmake libboost-system-dev \
|
||||
libboost-thread-dev libboost-program-options-dev \
|
||||
libboost-test-dev libeigen3-dev zlib1g-dev \
|
||||
libbz2-dev liblzma-dev && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Build KenLM to generate new scorers
|
||||
WORKDIR /code
|
||||
COPY kenlm /code/kenlm
|
||||
RUN cd /code/kenlm && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake .. && \
|
||||
make -j $(nproc) || \
|
||||
( echo "ERROR: Failed to build KenLM."; \
|
||||
echo "ERROR: Make sure you update the kenlm submodule on host before building this Dockerfile."; \
|
||||
echo "ERROR: $ cd STT; git submodule update --init kenlm"; \
|
||||
exit 1; )
|
||||
|
||||
|
||||
FROM ubuntu:20.04 AS wget-binaries
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends wget unzip xz-utils && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Tool to convert output graph for inference
|
||||
RUN wget --no-check-certificate https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip -O temp.zip && \
|
||||
unzip temp.zip && \
|
||||
rm temp.zip
|
||||
|
||||
RUN wget --no-check-certificate https://github.com/reuben/STT/releases/download/v0.10.0-alpha.1/native_client.tar.xz -O temp.tar.xz && \
|
||||
tar -xf temp.tar.xz && \
|
||||
rm temp.tar.xz
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# We need to purge python3-xdg because
|
||||
# it's breaking STT install later with
|
||||
# errors about setuptools
|
||||
#
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
wget \
|
||||
libopus0 \
|
||||
libopusfile0 \
|
||||
libsndfile1 \
|
||||
sox \
|
||||
libsox-fmt-mp3 && \
|
||||
apt-get purge -y python3-xdg && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Make sure pip and its dependencies are up-to-date
|
||||
RUN pip3 install --upgrade pip wheel setuptools
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
COPY native_client /code/native_client
|
||||
COPY .git /code/.git
|
||||
COPY training/coqui_stt_training/VERSION /code/training/coqui_stt_training/VERSION
|
||||
COPY training/coqui_stt_training/GRAPH_VERSION /code/training/coqui_stt_training/GRAPH_VERSION
|
||||
|
||||
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||
|
||||
COPY setup.py /code/setup.py
|
||||
COPY VERSION /code/VERSION
|
||||
COPY training /code/training
|
||||
# Copy files from previous build stages
|
||||
RUN mkdir -p /code/kenlm/build/
|
||||
COPY --from=kenlm-build /code/kenlm/build/bin /code/kenlm/build/bin
|
||||
COPY --from=wget-binaries /convert_graphdef_memmapped_format /code/convert_graphdef_memmapped_format
|
||||
COPY --from=wget-binaries /generate_scorer_package /code/generate_scorer_package
|
||||
|
||||
# Install STT
|
||||
# No need for the decoder since we did it earlier
|
||||
# TensorFlow GPU should already be installed on the base image,
|
||||
# and we don't want to break that
|
||||
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
||||
|
||||
# Copy rest of the code and test training
|
||||
COPY . /code
|
||||
RUN ./bin/run-ldc93s1.sh && rm -rf ~/.local/share/stt
|
@ -1,10 +0,0 @@
|
||||
.git/lfs
|
||||
tensorflow
|
||||
.git/modules/tensorflow
|
||||
native_client/ds-swig
|
||||
native_client/libstt.so
|
||||
native_client/stt
|
||||
native_client/ctcdecode/dist/
|
||||
native_client/ctcdecode/temp_build
|
||||
native_client/ctcdecode/third_party.a
|
||||
native_client/ctcdecode/workspace_status.cc
|
@ -1,12 +0,0 @@
|
||||
# This is a Dockerfile useful for training models with Coqui STT in Jupyter notebooks
|
||||
|
||||
FROM ghcr.io/coqui-ai/stt-train:latest
|
||||
|
||||
WORKDIR /code/notebooks
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir jupyter jupyter_http_over_ws
|
||||
RUN jupyter serverextension enable --py jupyter_http_over_ws
|
||||
|
||||
EXPOSE 8888
|
||||
|
||||
CMD ["bash", "-c", "jupyter notebook --notebook-dir=/code/notebooks --ip 0.0.0.0 --no-browser --allow-root"]
|
68
Dockerfile.train.tmpl
Normal file
68
Dockerfile.train.tmpl
Normal file
@ -0,0 +1,68 @@
|
||||
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
||||
|
||||
FROM tensorflow/tensorflow:1.15.4-gpu-py3
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
|
||||
ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
apt-utils \
|
||||
bash-completion \
|
||||
build-essential \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
libboost-all-dev \
|
||||
libbz2-dev \
|
||||
locales \
|
||||
python3-venv \
|
||||
unzip \
|
||||
wget
|
||||
|
||||
# We need to remove it because it's breaking deepspeech install later with
|
||||
# weird errors about setuptools
|
||||
RUN apt-get purge -y python3-xdg
|
||||
|
||||
# Install dependencies for audio augmentation
|
||||
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
|
||||
|
||||
# Try and free some space
|
||||
RUN rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /
|
||||
RUN git clone $DEEPSPEECH_REPO DeepSpeech
|
||||
|
||||
WORKDIR /DeepSpeech
|
||||
RUN git checkout $DEEPSPEECH_SHA
|
||||
|
||||
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||
|
||||
# Prepare deps
|
||||
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
|
||||
|
||||
# Install DeepSpeech
|
||||
# - No need for the decoder since we did it earlier
|
||||
# - There is already correct TensorFlow GPU installed on the base image,
|
||||
# we don't want to break that
|
||||
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
||||
|
||||
# Tool to convert output graph for inference
|
||||
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
|
||||
--artifact convert_graphdef_memmapped_format --target .
|
||||
|
||||
# Build KenLM to generate new scorers
|
||||
WORKDIR /DeepSpeech/native_client
|
||||
RUN rm -rf kenlm && \
|
||||
git clone https://github.com/kpu/kenlm && \
|
||||
cd kenlm && \
|
||||
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake .. && \
|
||||
make -j $(nproc)
|
||||
WORKDIR /DeepSpeech
|
||||
|
||||
RUN ./bin/run-ldc93s1.sh
|
@ -1 +1 @@
|
||||
training/coqui_stt_training/GRAPH_VERSION
|
||||
training/deepspeech_training/GRAPH_VERSION
|
24
ISSUE_TEMPLATE.md
Normal file
24
ISSUE_TEMPLATE.md
Normal file
@ -0,0 +1,24 @@
|
||||
For support and discussions, please use our [Discourse forums](https://discourse.mozilla.org/c/deep-speech).
|
||||
|
||||
If you've found a bug, or have a feature request, then please create an issue with the following information:
|
||||
|
||||
- **Have I written custom code (as opposed to running examples on an unmodified clone of the repository)**:
|
||||
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
|
||||
- **TensorFlow installed from (our builds, or upstream TensorFlow)**:
|
||||
- **TensorFlow version (use command below)**:
|
||||
- **Python version**:
|
||||
- **Bazel version (if compiling from source)**:
|
||||
- **GCC/Compiler version (if compiling from source)**:
|
||||
- **CUDA/cuDNN version**:
|
||||
- **GPU model and memory**:
|
||||
- **Exact command to reproduce**:
|
||||
|
||||
You can obtain the TensorFlow version with
|
||||
|
||||
```bash
|
||||
python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"
|
||||
```
|
||||
|
||||
Please describe the problem clearly. Be sure to convey here why it's a bug or a feature request.
|
||||
|
||||
Include any logs or source code that would be helpful to diagnose the problem. For larger logs, link to a Gist, not a screenshot. If including tracebacks, please include the full traceback. Try to provide a reproducible test case.
|
@ -1,2 +0,0 @@
|
||||
include training/coqui_stt_training/VERSION
|
||||
include training/coqui_stt_training/GRAPH_VERSION
|
8
Makefile
8
Makefile
@ -1,8 +1,8 @@
|
||||
STT_REPO ?= https://github.com/coqui-ai/STT.git
|
||||
STT_SHA ?= origin/main
|
||||
DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
|
||||
DEEPSPEECH_SHA ?= origin/master
|
||||
|
||||
Dockerfile%: Dockerfile%.tmpl
|
||||
sed \
|
||||
-e "s|#STT_REPO#|$(STT_REPO)|g" \
|
||||
-e "s|#STT_SHA#|$(STT_SHA)|g" \
|
||||
-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
|
||||
-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
|
||||
< $< > $@
|
||||
|
76
README.rst
76
README.rst
@ -1,69 +1,23 @@
|
||||
.. image:: images/coqui-STT-logo-green.png
|
||||
:alt: Coqui STT logo
|
||||
Project DeepSpeech
|
||||
==================
|
||||
|
||||
|
||||
.. |doc-img| image:: https://readthedocs.org/projects/stt/badge/?version=latest
|
||||
:target: https://stt.readthedocs.io/?badge=latest
|
||||
.. image:: https://readthedocs.org/projects/deepspeech/badge/?version=latest
|
||||
:target: https://deepspeech.readthedocs.io/?badge=latest
|
||||
:alt: Documentation
|
||||
|
||||
.. |covenant-img| image:: https://img.shields.io/badge/Contributor%20Covenant-2.0-4baaaa.svg
|
||||
:target: CODE_OF_CONDUCT.md
|
||||
:alt: Contributor Covenant
|
||||
|
||||
.. |gitter-img| image:: https://badges.gitter.im/coqui-ai/STT.svg
|
||||
:target: https://gitter.im/coqui-ai/STT?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
|
||||
:alt: Gitter Room
|
||||
|
||||
.. |doi| image:: https://zenodo.org/badge/344354127.svg
|
||||
:target: https://zenodo.org/badge/latestdoi/344354127
|
||||
|
||||
|doc-img| |covenant-img| |gitter-img| |doi|
|
||||
|
||||
`👉 Subscribe to 🐸Coqui's Newsletter <https://coqui.ai/?subscription=true>`_
|
||||
|
||||
**Coqui STT** (🐸STT) is a fast, open-source, multi-platform, deep-learning toolkit for training and deploying speech-to-text models. 🐸STT is battle tested in both production and research 🚀
|
||||
|
||||
🐸STT features
|
||||
---------------
|
||||
|
||||
* High-quality pre-trained STT model.
|
||||
* Efficient training pipeline with Multi-GPU support.
|
||||
* Streaming inference.
|
||||
* Multiple possible transcripts, each with an associated confidence score.
|
||||
* Real-time inference.
|
||||
* Small-footprint acoustic model.
|
||||
* Bindings for various programming languages.
|
||||
|
||||
Where to Ask Questions
|
||||
----------------------
|
||||
|
||||
.. list-table::
|
||||
:widths: 25 25
|
||||
:header-rows: 1
|
||||
|
||||
* - Type
|
||||
- Link
|
||||
* - 🚨 **Bug Reports**
|
||||
- `Github Issue Tracker <https://github.com/coqui-ai/STT/issues/>`_
|
||||
* - 🎁 **Feature Requests & Ideas**
|
||||
- `Github Issue Tracker <https://github.com/coqui-ai/STT/issues/>`_
|
||||
* - ❔ **Questions**
|
||||
- `Github Discussions <https://github.com/coqui-ai/stt/discussions/>`_
|
||||
* - 💬 **General Discussion**
|
||||
- `Github Discussions <https://github.com/coqui-ai/stt/discussions/>`_ or `Gitter Room <https://gitter.im/coqui-ai/STT?utm_source=share-link&utm_medium=link&utm_campaign=share-link>`_
|
||||
.. image:: https://community-tc.services.mozilla.com/api/github/v1/repository/mozilla/DeepSpeech/master/badge.svg
|
||||
:target: https://community-tc.services.mozilla.com/api/github/v1/repository/mozilla/DeepSpeech/master/latest
|
||||
:alt: Task Status
|
||||
|
||||
|
||||
Links & Resources
|
||||
-----------------
|
||||
.. list-table::
|
||||
:widths: 25 25
|
||||
:header-rows: 1
|
||||
DeepSpeech is an open-source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper <https://arxiv.org/abs/1412.5567>`_. Project DeepSpeech uses Google's `TensorFlow <https://www.tensorflow.org/>`_ to make the implementation easier.
|
||||
|
||||
* - Type
|
||||
- Link
|
||||
* - 📰 **Documentation**
|
||||
- `stt.readthedocs.io <https://stt.readthedocs.io/>`_
|
||||
* - 🚀 **Latest release with pre-trained models**
|
||||
- `see the latest release on GitHub <https://github.com/coqui-ai/STT/releases/latest>`_
|
||||
* - 🤝 **Contribution Guidelines**
|
||||
- `CONTRIBUTING.rst <CONTRIBUTING.rst>`_
|
||||
Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io <https://deepspeech.readthedocs.io/?badge=latest>`_.
|
||||
|
||||
For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub <https://github.com/mozilla/DeepSpeech/releases/latest>`_.
|
||||
|
||||
For contribution guidelines, see `CONTRIBUTING.rst <CONTRIBUTING.rst>`_.
|
||||
|
||||
For contact and support information, see `SUPPORT.rst <SUPPORT.rst>`_.
|
||||
|
12
RELEASE.rst
Normal file
12
RELEASE.rst
Normal file
@ -0,0 +1,12 @@
|
||||
|
||||
Making a (new) release of the codebase
|
||||
======================================
|
||||
|
||||
|
||||
* Update version in VERSION file, commit
|
||||
* Open PR, ensure all tests are passing properly
|
||||
* Merge the PR
|
||||
* Fetch the new master, tag it with (hopefully) the same version as in VERSION
|
||||
* Push that to Github
|
||||
* New build should be triggered and new packages should be made
|
||||
* TaskCluster should schedule a merge build **including** a "DeepSpeech Packages" task
|
@ -1,95 +0,0 @@
|
||||
# General
|
||||
|
||||
This is the 1.0.0 release for Coqui STT, the deep learning toolkit for speech-to-text. In accordance with [semantic versioning](https://semver.org/), this version is not completely backwards compatible with previous versions. The compatibility guarantees of our semantic versioning cover the inference APIs: the C API and all the official language bindings: Python, Node.JS/ElectronJS and Android. You can get started today with Coqui STT 1.0.0 by following the steps in our [documentation](https://stt.readthedocs.io/).
|
||||
|
||||
This release includes pre-trained English models, available in the Coqui Model Zoo:
|
||||
|
||||
- [Coqui English STT v1.0.0-huge-vocab](https://coqui.ai/english/coqui/v1.0.0-huge-vocab)
|
||||
- [Coqui English STT v1.0.0-yesno](https://coqui.ai/english/coqui/v1.0.0-yesno)
|
||||
- [Coqui English STT v1.0.0-large-vocab](https://coqui.ai/english/coqui/v1.0.0-large-vocab)
|
||||
- [Coqui English STT v1.0.0-digits](https://coqui.ai/english/coqui/v1.0.0-digits)
|
||||
|
||||
all under the Apache 2.0 license.
|
||||
|
||||
The acoustic models were trained on American English data with synthetic noise augmentation. The model achieves a 4.5% word error rate on the [LibriSpeech clean test corpus](http://www.openslr.org/12) and 13.6% word error rate on the [LibriSpeech other test corpus](http://www.openslr.org/12) with the largest release language model.
|
||||
|
||||
Note that the model currently performs best in low-noise environments with clear recordings. This does not mean the model cannot be used outside of these conditions, but that accuracy may be lower. Some users may need to further fine tune the model to meet their intended use-case.
|
||||
|
||||
We also include example audio files:
|
||||
|
||||
[audio-1.0.0.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/audio-1.0.0.tar.gz)
|
||||
|
||||
which can be used to test the engine, and checkpoint files for the English model:
|
||||
|
||||
[coqui-stt-1.0.0-checkpoint.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/coqui-stt-1.0.0-checkpoint.tar.gz)
|
||||
|
||||
which are under the Apache 2.0 license and can be used as the basis for further fine-tuning. Finally this release also includes a source code tarball:
|
||||
|
||||
[v1.0.0.tar.gz](https://github.com/coqui-ai/STT/archive/v1.0.0.tar.gz)
|
||||
|
||||
Under the [MPL-2.0 license](https://www.mozilla.org/en-US/MPL/2.0/). Note that this tarball is for archival purposes only since GitHub does not include submodules in the automatic tarballs. For usage and development with the source code, clone the repository using Git, following our [documentation](https://stt.readthedocs.io/).
|
||||
|
||||
|
||||
# Notable changes
|
||||
|
||||
- Removed support for protocol buffer input in native client and consolidated all packages under a single "STT" name accepting TFLite inputs
|
||||
- Added programmatic interface to training code and example Jupyter Notebooks, including how to train with Common Voice data
|
||||
- Added transparent handling of mixed sample rates and stereo audio in training inputs
|
||||
- Moved CI setup to GitHub Actions, making code contributions easier to test
|
||||
- Added configuration management via Coqpit, providing a more flexible config interface that's compatible with Coqui TTS
|
||||
- Handle Opus audio files transparently in training inputs
|
||||
- Added support for automatic dataset subset splitting
|
||||
- Added support for automatic alphabet generation and loading
|
||||
- Started publishing the training code CI for a faster notebook setup
|
||||
- Refactor training code into self-contained modules and deprecate train.py as universal entry point for training
|
||||
|
||||
# Training Regimen + Hyperparameters for fine-tuning
|
||||
|
||||
The hyperparameters used to train the model are useful for fine tuning. Thus, we document them here along with the training regimen, hardware used (a server with 8 NVIDIA A100 GPUs each with 40GB of VRAM), along with the full training hyperparameters. The full training configuration in JSON format is available [here](https://gist.github.com/reuben/6ced6a8b41e3d0849dafb7cae301e905).
|
||||
|
||||
The datasets used were:
|
||||
- Common Voice 7.0 (with custom train/dev/test splits)
|
||||
- Multilingual LibriSpeech (English, Opus)
|
||||
- LibriSpeech
|
||||
|
||||
The optimal `lm_alpha` and `lm_beta` values with respect to the Common Voice 7.0 (custom Coqui splits) and a large vocabulary language model:
|
||||
|
||||
- lm_alpha: 0.5891777425167632
|
||||
- lm_beta: 0.6619145283338659
|
||||
|
||||
# Documentation
|
||||
|
||||
Documentation is available on [stt.readthedocs.io](https://stt.readthedocs.io/).
|
||||
|
||||
# Contact/Getting Help
|
||||
|
||||
1. [GitHub Discussions](https://github.com/coqui-ai/STT/discussions/) - best place to ask questions, get support, and discuss anything related to 🐸STT with other users.
|
||||
3. [Gitter](https://gitter.im/coqui-ai/) - You can also join our Gitter chat.
|
||||
4. [Issues](https://github.com/coqui-ai/STT/issues) - If you have discussed a problem and identified a bug in 🐸STT, or if you have a feature request, please open an issue in our repo. Please make sure you search for an already existing issue beforehand!
|
||||
|
||||
# Contributors to 1.0.0 release
|
||||
|
||||
- Alexandre Lissy
|
||||
- Anon-Artist
|
||||
- Anton Yaroshenko
|
||||
- Catalin Voss
|
||||
- CatalinVoss
|
||||
- dag7dev
|
||||
- Dustin Zubke
|
||||
- Eren Gölge
|
||||
- Erik Ziegler
|
||||
- Francis Tyers
|
||||
- Ideefixze
|
||||
- Ilnar Salimzianov
|
||||
- imrahul3610
|
||||
- Jeremiah Rose
|
||||
- Josh Meyer
|
||||
- Kathy Reid
|
||||
- Kelly Davis
|
||||
- Kenneth Heafield
|
||||
- NanoNabla
|
||||
- Neil Stoker
|
||||
- Reuben Morais
|
||||
- zaptrem
|
||||
|
||||
We’d also like to thank all the members of our [Gitter chat room](https://gitter.im/coqui-ai/STT) who have been helping to shape this release!
|
12
SUPPORT.rst
Normal file
12
SUPPORT.rst
Normal file
@ -0,0 +1,12 @@
|
||||
.. _support:
|
||||
|
||||
Contact/Getting Help
|
||||
====================
|
||||
|
||||
There are several ways to contact us or to get help:
|
||||
|
||||
#. `Discourse Forums <https://discourse.mozilla.org/c/deep-speech>`_ - The `Deep Speech category on Discourse <https://discourse.mozilla.org/c/deep-speech>`_ is the first place to look. Search for keywords related to your question or problem to see if someone else has run into it already. If you can't find anything relevant there, search on our `issue tracker <https://github.com/mozilla/deepspeech/issues>`_ to see if there is an existing issue about your problem.
|
||||
|
||||
#. `Matrix chat <https://chat.mozilla.org/#/room/#machinelearning:mozilla.org>`_ - If your question is not addressed by either the `FAQ <https://github.com/mozilla/DeepSpeech/wiki#frequently-asked-questions>`_ or `Discourse Forums <https://discourse.mozilla.org/c/deep-speech>`_\ , you can contact us on the ``#machinelearning`` channel on `Mozilla Matrix <https://chat.mozilla.org/#/room/#machinelearning:mozilla.org>`_\ ; people there can try to answer/help
|
||||
|
||||
#. `Create a new issue <https://github.com/mozilla/deepspeech/issues>`_ - Finally, if you have a bug report or a feature request that isn't already covered by an existing issue, please open an issue in our repo and fill the appropriate information on your hardware and software setup.
|
2
VERSION
2
VERSION
@ -1 +1 @@
|
||||
training/coqui_stt_training/VERSION
|
||||
training/deepspeech_training/VERSION
|
28
bazel.patch
28
bazel.patch
@ -9,23 +9,23 @@ index c7aa4cb63..e084bc27c 100644
|
||||
+import java.io.PrintWriter;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
|
||||
@@ -73,6 +74,8 @@ public final class FileWriteAction extends AbstractFileWriteAction {
|
||||
*/
|
||||
private final CharSequence fileContents;
|
||||
|
||||
|
||||
+ private final Artifact output;
|
||||
+
|
||||
/** Minimum length (in chars) for content to be eligible for compression. */
|
||||
private static final int COMPRESS_CHARS_THRESHOLD = 256;
|
||||
|
||||
|
||||
@@ -90,6 +93,7 @@ public final class FileWriteAction extends AbstractFileWriteAction {
|
||||
fileContents = new CompressedString((String) fileContents);
|
||||
}
|
||||
this.fileContents = fileContents;
|
||||
+ this.output = output;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@@ -230,11 +234,32 @@ public final class FileWriteAction extends AbstractFileWriteAction {
|
||||
*/
|
||||
@ -59,7 +59,7 @@ index c7aa4cb63..e084bc27c 100644
|
||||
+ computeKeyDebugWriter.close();
|
||||
+ return rv;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/SpawnAction.java
|
||||
index 580788160..26883eb92 100644
|
||||
@ -74,9 +74,9 @@ index 580788160..26883eb92 100644
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
@@ -91,6 +92,9 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie
|
||||
|
||||
|
||||
private final CommandLine argv;
|
||||
|
||||
|
||||
+ private final Iterable<Artifact> inputs;
|
||||
+ private final Iterable<Artifact> outputs;
|
||||
+
|
||||
@ -91,10 +91,10 @@ index 580788160..26883eb92 100644
|
||||
+ this.inputs = inputs;
|
||||
+ this.outputs = outputs;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@@ -312,23 +319,89 @@ public class SpawnAction extends AbstractAction implements ExecutionInfoSpecifie
|
||||
|
||||
|
||||
@Override
|
||||
protected String computeKey() {
|
||||
+ boolean genruleSetup = String.valueOf(Iterables.get(inputs, 0).getExecPath()).contains("genrule/genrule-setup.sh");
|
||||
@ -182,14 +182,14 @@ index 580788160..26883eb92 100644
|
||||
+ }
|
||||
+ return rv;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
|
||||
index 3559fffde..3ba39617c 100644
|
||||
--- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
|
||||
+++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java
|
||||
@@ -1111,10 +1111,30 @@ public class CppCompileAction extends AbstractAction
|
||||
|
||||
|
||||
@Override
|
||||
public String computeKey() {
|
||||
+ // ".ckd" Compute Key Debug
|
||||
@ -216,7 +216,7 @@ index 3559fffde..3ba39617c 100644
|
||||
+ for (Map.Entry<String, String> entry : executionInfo.entrySet()) {
|
||||
+ computeKeyDebugWriter.println("EXECINFO: " + entry.getKey() + "=" + entry.getValue());
|
||||
+ }
|
||||
|
||||
|
||||
// For the argv part of the cache key, ignore all compiler flags that explicitly denote module
|
||||
// file (.pcm) inputs. Depending on input discovery, some of the unused ones are removed from
|
||||
@@ -1124,6 +1144,9 @@ public class CppCompileAction extends AbstractAction
|
||||
@ -226,7 +226,7 @@ index 3559fffde..3ba39617c 100644
|
||||
+ for (String input : compileCommandLine.getArgv(getInternalOutputFile(), null)) {
|
||||
+ computeKeyDebugWriter.println("COMMAND: " + input);
|
||||
+ }
|
||||
|
||||
|
||||
/*
|
||||
* getArgv() above captures all changes which affect the compilation
|
||||
@@ -1133,19 +1156,31 @@ public class CppCompileAction extends AbstractAction
|
||||
@ -260,5 +260,5 @@ index 3559fffde..3ba39617c 100644
|
||||
+ computeKeyDebugWriter.close();
|
||||
+ return rv;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
|
@ -2,12 +2,11 @@
|
||||
"""
|
||||
Tool for comparing two wav samples
|
||||
"""
|
||||
import argparse
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
import numpy as np
|
||||
from coqui_stt_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
|
||||
from coqui_stt_training.util.sample_collections import load_sample
|
||||
from deepspeech_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
|
||||
from deepspeech_training.util.sample_collections import load_sample
|
||||
|
||||
|
||||
def fail(message):
|
||||
@ -19,29 +18,15 @@ def compare_samples():
|
||||
sample1 = load_sample(CLI_ARGS.sample1).unpack()
|
||||
sample2 = load_sample(CLI_ARGS.sample2).unpack()
|
||||
if sample1.audio_format != sample2.audio_format:
|
||||
fail(
|
||||
"Samples differ on: audio-format ({} and {})".format(
|
||||
sample1.audio_format, sample2.audio_format
|
||||
)
|
||||
)
|
||||
if abs(sample1.duration - sample2.duration) > 0.001:
|
||||
fail(
|
||||
"Samples differ on: duration ({} and {})".format(
|
||||
sample1.duration, sample2.duration
|
||||
)
|
||||
)
|
||||
fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
|
||||
if sample1.duration != sample2.duration:
|
||||
fail('Samples differ on: duration ({} and {})'.format(sample1.duration, sample2.duration))
|
||||
sample1.change_audio_type(AUDIO_TYPE_NP)
|
||||
sample2.change_audio_type(AUDIO_TYPE_NP)
|
||||
samples = [sample1, sample2]
|
||||
largest = np.argmax([sample1.audio.shape[0], sample2.audio.shape[0]])
|
||||
smallest = (largest + 1) % 2
|
||||
samples[largest].audio = samples[largest].audio[: len(samples[smallest].audio)]
|
||||
audio_diff = samples[largest].audio - samples[smallest].audio
|
||||
audio_diff = sample1.audio - sample2.audio
|
||||
diff_dbfs = mean_dbfs(audio_diff)
|
||||
differ_msg = "Samples differ on: sample data ({:0.2f} dB difference) ".format(
|
||||
diff_dbfs
|
||||
)
|
||||
equal_msg = "Samples are considered equal ({:0.2f} dB difference)".format(diff_dbfs)
|
||||
differ_msg = 'Samples differ on: sample data ({:0.2f} dB difference) '.format(diff_dbfs)
|
||||
equal_msg = 'Samples are considered equal ({:0.2f} dB difference)'.format(diff_dbfs)
|
||||
if CLI_ARGS.if_differ:
|
||||
if diff_dbfs <= CLI_ARGS.threshold:
|
||||
fail(equal_msg)
|
||||
@ -60,17 +45,13 @@ def handle_args():
|
||||
)
|
||||
parser.add_argument("sample1", help="Filename of sample 1 to compare")
|
||||
parser.add_argument("sample2", help="Filename of sample 2 to compare")
|
||||
parser.add_argument(
|
||||
"--threshold",
|
||||
type=float,
|
||||
default=-60.0,
|
||||
help="dB of sample deltas above which they are considered different",
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=-60.0,
|
||||
help="dB of sample deltas above which they are considered different")
|
||||
parser.add_argument(
|
||||
"--if-differ",
|
||||
action="store_true",
|
||||
help="If to succeed and return status code 0 on different signals and fail on equal ones (inverse check)."
|
||||
"This will still fail on different formats or durations.",
|
||||
"This will still fail on different formats or durations.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-success-output",
|
||||
|
@ -1,136 +1,121 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
'''
|
||||
Tool for building a combined SDB or CSV sample-set from other sets
|
||||
Use 'python3 data_set_tool.py -h' for help
|
||||
"""
|
||||
import argparse
|
||||
'''
|
||||
import sys
|
||||
import argparse
|
||||
import progressbar
|
||||
from pathlib import Path
|
||||
|
||||
import progressbar
|
||||
from coqui_stt_training.util.audio import (
|
||||
AUDIO_TYPE_OPUS,
|
||||
from deepspeech_training.util.audio import (
|
||||
AUDIO_TYPE_PCM,
|
||||
AUDIO_TYPE_OPUS,
|
||||
AUDIO_TYPE_WAV,
|
||||
change_audio_types,
|
||||
)
|
||||
from coqui_stt_training.util.augmentations import (
|
||||
SampleAugmentation,
|
||||
apply_sample_augmentations,
|
||||
parse_augmentations,
|
||||
)
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR
|
||||
from coqui_stt_training.util.sample_collections import (
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.sample_collections import (
|
||||
CSVWriter,
|
||||
DirectSDBWriter,
|
||||
TarWriter,
|
||||
samples_from_sources,
|
||||
)
|
||||
from deepspeech_training.util.augmentations import (
|
||||
parse_augmentations,
|
||||
apply_sample_augmentations,
|
||||
SampleAugmentation
|
||||
)
|
||||
|
||||
AUDIO_TYPE_LOOKUP = {"wav": AUDIO_TYPE_WAV, "opus": AUDIO_TYPE_OPUS}
|
||||
AUDIO_TYPE_LOOKUP = {'wav': AUDIO_TYPE_WAV, 'opus': AUDIO_TYPE_OPUS}
|
||||
|
||||
|
||||
def build_data_set():
|
||||
audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type]
|
||||
augmentations = parse_augmentations(CLI_ARGS.augment)
|
||||
print(f"Parsed augmentations from flags: {augmentations}")
|
||||
if any(not isinstance(a, SampleAugmentation) for a in augmentations):
|
||||
print(
|
||||
"Warning: Some of the specified augmentations will not get applied, as this tool only supports "
|
||||
"overlay, codec, reverb, resample and volume."
|
||||
)
|
||||
print('Warning: Some of the specified augmentations will not get applied, as this tool only supports '
|
||||
'overlay, codec, reverb, resample and volume.')
|
||||
extension = Path(CLI_ARGS.target).suffix.lower()
|
||||
labeled = not CLI_ARGS.unlabeled
|
||||
if extension == ".csv":
|
||||
writer = CSVWriter(
|
||||
CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled
|
||||
)
|
||||
elif extension == ".sdb":
|
||||
writer = DirectSDBWriter(
|
||||
CLI_ARGS.target, audio_type=audio_type, labeled=labeled
|
||||
)
|
||||
elif extension == ".tar":
|
||||
writer = TarWriter(
|
||||
CLI_ARGS.target, labeled=labeled, gz=False, include=CLI_ARGS.include
|
||||
)
|
||||
elif extension == ".tgz" or CLI_ARGS.target.lower().endswith(".tar.gz"):
|
||||
writer = TarWriter(
|
||||
CLI_ARGS.target, labeled=labeled, gz=True, include=CLI_ARGS.include
|
||||
)
|
||||
if extension == '.csv':
|
||||
writer = CSVWriter(CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled)
|
||||
elif extension == '.sdb':
|
||||
writer = DirectSDBWriter(CLI_ARGS.target, audio_type=audio_type, labeled=labeled)
|
||||
elif extension == '.tar':
|
||||
writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=False, include=CLI_ARGS.include)
|
||||
elif extension == '.tgz' or CLI_ARGS.target.lower().endswith('.tar.gz'):
|
||||
writer = TarWriter(CLI_ARGS.target, labeled=labeled, gz=True, include=CLI_ARGS.include)
|
||||
else:
|
||||
print(
|
||||
"Unknown extension of target file - has to be either .csv, .sdb, .tar, .tar.gz or .tgz"
|
||||
)
|
||||
print('Unknown extension of target file - has to be either .csv, .sdb, .tar, .tar.gz or .tgz')
|
||||
sys.exit(1)
|
||||
with writer:
|
||||
samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled)
|
||||
num_samples = len(samples)
|
||||
if augmentations:
|
||||
samples = apply_sample_augmentations(
|
||||
samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations
|
||||
)
|
||||
samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations)
|
||||
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
|
||||
for sample in bar(
|
||||
change_audio_types(
|
||||
for sample in bar(change_audio_types(
|
||||
samples,
|
||||
audio_type=audio_type,
|
||||
bitrate=CLI_ARGS.bitrate,
|
||||
processes=CLI_ARGS.workers,
|
||||
)
|
||||
):
|
||||
processes=CLI_ARGS.workers)):
|
||||
writer.add(sample)
|
||||
|
||||
|
||||
def handle_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Tool for building a combined SDB or CSV sample-set from other sets"
|
||||
description='Tool for building a combined SDB or CSV sample-set from other sets'
|
||||
)
|
||||
parser.add_argument(
|
||||
"sources",
|
||||
nargs="+",
|
||||
help="Source CSV and/or SDB files - "
|
||||
"Note: For getting a correctly ordered target set, source SDBs have to have their samples "
|
||||
"already ordered from shortest to longest.",
|
||||
'sources',
|
||||
nargs='+',
|
||||
help='Source CSV and/or SDB files - '
|
||||
'Note: For getting a correctly ordered target set, source SDBs have to have their samples '
|
||||
'already ordered from shortest to longest.',
|
||||
)
|
||||
parser.add_argument("target", help="SDB, CSV or TAR(.gz) file to create")
|
||||
parser.add_argument(
|
||||
"--audio-type",
|
||||
default="opus",
|
||||
'target',
|
||||
help='SDB, CSV or TAR(.gz) file to create'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--audio-type',
|
||||
default='opus',
|
||||
choices=AUDIO_TYPE_LOOKUP.keys(),
|
||||
help="Audio representation inside target SDB",
|
||||
help='Audio representation inside target SDB',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bitrate",
|
||||
'--bitrate',
|
||||
type=int,
|
||||
help="Bitrate for lossy compressed SDB samples like in case of --audio-type opus",
|
||||
help='Bitrate for lossy compressed SDB samples like in case of --audio-type opus',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers", type=int, default=None, help="Number of encoding SDB workers"
|
||||
'--workers', type=int, default=None, help='Number of encoding SDB workers'
|
||||
)
|
||||
parser.add_argument(
|
||||
"--unlabeled",
|
||||
action="store_true",
|
||||
help="If to build an data-set with unlabeled (audio only) samples - "
|
||||
"typically used for building noise augmentation corpora",
|
||||
'--unlabeled',
|
||||
action='store_true',
|
||||
help='If to build an data-set with unlabeled (audio only) samples - '
|
||||
'typically used for building noise augmentation corpora',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--absolute-paths",
|
||||
action="store_true",
|
||||
help="If to reference samples by their absolute paths when writing CSV files",
|
||||
'--absolute-paths',
|
||||
action='store_true',
|
||||
help='If to reference samples by their absolute paths when writing CSV files',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--augment",
|
||||
action="append",
|
||||
help="Add an augmentation operation",
|
||||
'--augment',
|
||||
action='append',
|
||||
help='Add an augmentation operation',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include",
|
||||
action="append",
|
||||
help="Adds a file to the root directory of .tar(.gz) targets",
|
||||
'--include',
|
||||
action='append',
|
||||
help='Adds a file to the root directory of .tar(.gz) targets',
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
CLI_ARGS = handle_args()
|
||||
build_data_set()
|
||||
|
@ -4,7 +4,8 @@ import os
|
||||
import tarfile
|
||||
|
||||
import pandas
|
||||
from coqui_stt_training.util.importers import get_importers_parser
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
|
||||
|
@ -4,7 +4,8 @@ import os
|
||||
import tarfile
|
||||
|
||||
import pandas
|
||||
from coqui_stt_training.util.importers import get_importers_parser
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMNNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
|
||||
|
@ -5,21 +5,21 @@ Ministère de l'Économie, des Finances et de la Relance
|
||||
"""
|
||||
|
||||
import csv
|
||||
import decimal
|
||||
import hashlib
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import unicodedata
|
||||
import xml.etree.ElementTree as ET
|
||||
import os
|
||||
import progressbar
|
||||
import subprocess
|
||||
import zipfile
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import hashlib
|
||||
import decimal
|
||||
import math
|
||||
import unicodedata
|
||||
import re
|
||||
import sox
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
try:
|
||||
from num2words import num2words
|
||||
@ -27,19 +27,19 @@ except ImportError as ex:
|
||||
print("pip install num2words")
|
||||
sys.exit(1)
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
import requests
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.helpers import secs_to_hours
|
||||
from coqui_stt_training.util.importers import (
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.helpers import secs_to_hours
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_imported_samples,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -50,187 +50,58 @@ MIN_SECS = 0.85
|
||||
|
||||
DATASET_RELEASE_CSV = "https://data.economie.gouv.fr/explore/dataset/transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020/download/?format=csv&timezone=Europe/Berlin&lang=fr&use_labels_for_header=true&csv_separator=%3B"
|
||||
DATASET_RELEASE_SHA = [
|
||||
(
|
||||
"863d39a06a388c6491c6ff2f6450b151f38f1b57",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001",
|
||||
),
|
||||
(
|
||||
"2f3a0305aa04c61220bb00b5a4e553e45dbf12e1",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002",
|
||||
),
|
||||
(
|
||||
"5e55e9f1f844097349188ac875947e5a3d7fe9f1",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003",
|
||||
),
|
||||
(
|
||||
"8bf54842cf07948ca5915e27a8bd5fa5139c06ae",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004",
|
||||
),
|
||||
(
|
||||
"c8963504aadc015ac48f9af80058a0bb3440b94f",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005",
|
||||
),
|
||||
(
|
||||
"d95e225e908621d83ce4e9795fd108d9d310e244",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006",
|
||||
),
|
||||
(
|
||||
"de6ed9c2b0ee80ca879aae8ba7923cc93217d811",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007",
|
||||
),
|
||||
(
|
||||
"234283c47dacfcd4450d836c52c25f3e807fc5f2",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008",
|
||||
),
|
||||
(
|
||||
"4e6b67a688639bb72f8cd81782eaba604a8d32a6",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009",
|
||||
),
|
||||
(
|
||||
"4165a51389777c8af8e6253d87bdacb877e8b3b0",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010",
|
||||
),
|
||||
(
|
||||
"34322e7009780d97ef5bd02bf2f2c7a31f00baff",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011",
|
||||
),
|
||||
(
|
||||
"48c5be3b2ca9d6108d525da6a03e91d93a95dbac",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012",
|
||||
),
|
||||
(
|
||||
"87573172f506a189c2ebc633856fe11a2e9cd213",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013",
|
||||
),
|
||||
(
|
||||
"6ab2c9e508e9278d5129f023e018725c4a7c69e8",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014",
|
||||
),
|
||||
(
|
||||
"4f84df831ef46dce5d3ab3e21817687a2d8c12d0",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015",
|
||||
),
|
||||
(
|
||||
"e69bfb079885c299cb81080ef88b1b8b57158aa6",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016",
|
||||
),
|
||||
(
|
||||
"5f764ba788ee273981cf211b242c29b49ca22c5e",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017",
|
||||
),
|
||||
(
|
||||
"b6aa81a959525363223494830c1e7307d4c4bae6",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018",
|
||||
),
|
||||
(
|
||||
"91ddcf43c7bf113a6f2528b857c7ec22a50a148a",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019",
|
||||
),
|
||||
(
|
||||
"fa1b29273dd77b9a7494983a2f9ae52654b931d7",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020",
|
||||
),
|
||||
(
|
||||
"1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021",
|
||||
),
|
||||
(
|
||||
"ce6420d5d0b6b5135ba559f83e1a82d4d615c470",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022",
|
||||
),
|
||||
(
|
||||
"d0976ed292ac24fcf1590d1ea195077c74b05471",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023",
|
||||
),
|
||||
(
|
||||
"ec746cd6af066f62d9bf8d3b2f89174783ff4e3c",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024",
|
||||
),
|
||||
(
|
||||
"570d9e1e84178e32fd867171d4b3aaecda1fd4fb",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025",
|
||||
),
|
||||
(
|
||||
"c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026",
|
||||
),
|
||||
(
|
||||
"08406a51146d88e208704ce058c060a1e44efa50",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027",
|
||||
),
|
||||
(
|
||||
"199aedad733a78ea1e7d47def9c71c6fd5795e02",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028",
|
||||
),
|
||||
(
|
||||
"db856a068f92fb4f01f410bba42c7271de0f231a",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029",
|
||||
),
|
||||
(
|
||||
"e3c0135f16c6c9d25a09dcb4f99a685438a84740",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030",
|
||||
),
|
||||
(
|
||||
"e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031",
|
||||
),
|
||||
(
|
||||
"be5e80cbc49b59b31ae33c30576ef0e1a162d84e",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032",
|
||||
),
|
||||
(
|
||||
"501df58e3ff55fcfd75b93dab57566dc536948b8",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033",
|
||||
),
|
||||
(
|
||||
"1a114875811a8cdcb8d85a9f6dbee78be3e05131",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034",
|
||||
),
|
||||
(
|
||||
"465d824e7ee46448369182c0c28646d155a2249b",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035",
|
||||
),
|
||||
(
|
||||
"37f341b1b266d143eb73138c31cfff3201b9d619",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036",
|
||||
),
|
||||
(
|
||||
"9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037",
|
||||
),
|
||||
(
|
||||
"54886755630cb080a53098cb1b6c951c6714a143",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038",
|
||||
),
|
||||
(
|
||||
"4b7cbb0154697be795034f7a49712e882a97197a",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039",
|
||||
),
|
||||
(
|
||||
"c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040",
|
||||
),
|
||||
("863d39a06a388c6491c6ff2f6450b151f38f1b57", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.001"),
|
||||
("2f3a0305aa04c61220bb00b5a4e553e45dbf12e1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.002"),
|
||||
("5e55e9f1f844097349188ac875947e5a3d7fe9f1", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.003"),
|
||||
("8bf54842cf07948ca5915e27a8bd5fa5139c06ae", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.004"),
|
||||
("c8963504aadc015ac48f9af80058a0bb3440b94f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.005"),
|
||||
("d95e225e908621d83ce4e9795fd108d9d310e244", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.006"),
|
||||
("de6ed9c2b0ee80ca879aae8ba7923cc93217d811", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.007"),
|
||||
("234283c47dacfcd4450d836c52c25f3e807fc5f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.008"),
|
||||
("4e6b67a688639bb72f8cd81782eaba604a8d32a6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.009"),
|
||||
("4165a51389777c8af8e6253d87bdacb877e8b3b0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.010"),
|
||||
("34322e7009780d97ef5bd02bf2f2c7a31f00baff", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.011"),
|
||||
("48c5be3b2ca9d6108d525da6a03e91d93a95dbac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.012"),
|
||||
("87573172f506a189c2ebc633856fe11a2e9cd213", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.013"),
|
||||
("6ab2c9e508e9278d5129f023e018725c4a7c69e8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.014"),
|
||||
("4f84df831ef46dce5d3ab3e21817687a2d8c12d0", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.015"),
|
||||
("e69bfb079885c299cb81080ef88b1b8b57158aa6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.016"),
|
||||
("5f764ba788ee273981cf211b242c29b49ca22c5e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.017"),
|
||||
("b6aa81a959525363223494830c1e7307d4c4bae6", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.018"),
|
||||
("91ddcf43c7bf113a6f2528b857c7ec22a50a148a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.019"),
|
||||
("fa1b29273dd77b9a7494983a2f9ae52654b931d7", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.020"),
|
||||
("1113aef4f5e2be2f7fbf2d54b6c710c1c0e7135f", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.021"),
|
||||
("ce6420d5d0b6b5135ba559f83e1a82d4d615c470", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.022"),
|
||||
("d0976ed292ac24fcf1590d1ea195077c74b05471", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.023"),
|
||||
("ec746cd6af066f62d9bf8d3b2f89174783ff4e3c", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.024"),
|
||||
("570d9e1e84178e32fd867171d4b3aaecda1fd4fb", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.025"),
|
||||
("c29ccc7467a75b2cae3d7f2e9fbbb2ab276cb8ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.026"),
|
||||
("08406a51146d88e208704ce058c060a1e44efa50", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.027"),
|
||||
("199aedad733a78ea1e7d47def9c71c6fd5795e02", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.028"),
|
||||
("db856a068f92fb4f01f410bba42c7271de0f231a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.029"),
|
||||
("e3c0135f16c6c9d25a09dcb4f99a685438a84740", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.030"),
|
||||
("e51b8bb9c0ae4339f98b4f21e6d29b825109f0ac", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.031"),
|
||||
("be5e80cbc49b59b31ae33c30576ef0e1a162d84e", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.032"),
|
||||
("501df58e3ff55fcfd75b93dab57566dc536948b8", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.033"),
|
||||
("1a114875811a8cdcb8d85a9f6dbee78be3e05131", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.034"),
|
||||
("465d824e7ee46448369182c0c28646d155a2249b", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.035"),
|
||||
("37f341b1b266d143eb73138c31cfff3201b9d619", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.036"),
|
||||
("9e7d8255987a8a77a90e0d4b55c8fd38b9fb5694", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.037"),
|
||||
("54886755630cb080a53098cb1b6c951c6714a143", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.038"),
|
||||
("4b7cbb0154697be795034f7a49712e882a97197a", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.039"),
|
||||
("c8e1e565a0e7a1f6ff1dbfcefe677aa74a41d2f2", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip.040"),
|
||||
]
|
||||
|
||||
|
||||
def _download_and_preprocess_data(csv_url, target_dir):
|
||||
dataset_sources = os.path.join(
|
||||
target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt"
|
||||
)
|
||||
dataset_sources = os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "data.txt")
|
||||
if os.path.exists(dataset_sources):
|
||||
return dataset_sources
|
||||
|
||||
# Making path absolute
|
||||
target_dir = os.path.abspath(target_dir)
|
||||
csv_ref = requests.get(csv_url).text.split("\r\n")[1:-1]
|
||||
csv_ref = requests.get(csv_url).text.split('\r\n')[1:-1]
|
||||
for part in csv_ref:
|
||||
part_filename = (
|
||||
requests.head(part)
|
||||
.headers.get("Content-Disposition")
|
||||
.split(" ")[1]
|
||||
.split("=")[1]
|
||||
.replace('"', "")
|
||||
)
|
||||
part_filename = requests.head(part).headers.get("Content-Disposition").split(" ")[1].split("=")[1].replace('"', "")
|
||||
if not os.path.exists(os.path.join(target_dir, part_filename)):
|
||||
part_path = maybe_download(part_filename, target_dir, part)
|
||||
|
||||
@ -255,18 +126,10 @@ def _download_and_preprocess_data(csv_url, target_dir):
|
||||
assert csum == sha1
|
||||
|
||||
# Conditionally extract data
|
||||
_maybe_extract(
|
||||
target_dir,
|
||||
"transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020",
|
||||
"transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip",
|
||||
"transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip",
|
||||
)
|
||||
_maybe_extract(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020", "transcriptionsxml_audiomp3_mefr_ccpmf_2012-2020_2.zip", "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020.zip")
|
||||
|
||||
# Produce source text for extraction / conversion
|
||||
return _maybe_create_sources(
|
||||
os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020")
|
||||
)
|
||||
|
||||
return _maybe_create_sources(os.path.join(target_dir, "transcriptionsXML_audioMP3_MEFR_CCPMF_2012-2020"))
|
||||
|
||||
def _maybe_extract(target_dir, extracted_data, archive, final):
|
||||
# If target_dir/extracted_data does not exist, extract archive in target_dir
|
||||
@ -284,10 +147,7 @@ def _maybe_extract(target_dir, extracted_data, archive, final):
|
||||
subprocess.check_call(cmdline, shell=True, cwd=target_dir)
|
||||
assert os.path.exists(archive_path)
|
||||
|
||||
print(
|
||||
'No directory "%s" - extracting archive %s ...'
|
||||
% (extracted_path, archive_path)
|
||||
)
|
||||
print('No directory "%s" - extracting archive %s ...' % (extracted_path, archive_path))
|
||||
with zipfile.ZipFile(archive_path) as zip_f:
|
||||
zip_f.extractall(extracted_path)
|
||||
|
||||
@ -296,7 +156,6 @@ def _maybe_extract(target_dir, extracted_data, archive, final):
|
||||
else:
|
||||
print('Found directory "%s" - not extracting it from archive.' % extracted_path)
|
||||
|
||||
|
||||
def _maybe_create_sources(dir):
|
||||
dataset_sources = os.path.join(dir, "data.txt")
|
||||
MP3 = glob(os.path.join(dir, "**", "*.mp3"))
|
||||
@ -309,8 +168,8 @@ def _maybe_create_sources(dir):
|
||||
for f_xml in XML:
|
||||
b_mp3 = os.path.splitext(os.path.basename(f_mp3))[0]
|
||||
b_xml = os.path.splitext(os.path.basename(f_xml))[0]
|
||||
a_mp3 = b_mp3.split("_")
|
||||
a_xml = b_xml.split("_")
|
||||
a_mp3 = b_mp3.split('_')
|
||||
a_xml = b_xml.split('_')
|
||||
score = 0
|
||||
date_mp3 = a_mp3[0]
|
||||
date_xml = a_xml[0]
|
||||
@ -319,7 +178,7 @@ def _maybe_create_sources(dir):
|
||||
continue
|
||||
|
||||
for i in range(min(len(a_mp3), len(a_xml))):
|
||||
if a_mp3[i] == a_xml[i]:
|
||||
if (a_mp3[i] == a_xml[i]):
|
||||
score += 1
|
||||
|
||||
if score >= 1:
|
||||
@ -328,7 +187,7 @@ def _maybe_create_sources(dir):
|
||||
# sort by score
|
||||
MP3_XML_Scores.sort(key=lambda x: x[2], reverse=True)
|
||||
for s_mp3, s_xml, score in MP3_XML_Scores:
|
||||
# print(s_mp3, s_xml, score)
|
||||
#print(s_mp3, s_xml, score)
|
||||
if score not in MP3_XML_Fin:
|
||||
MP3_XML_Fin[score] = {}
|
||||
|
||||
@ -349,14 +208,13 @@ def _maybe_create_sources(dir):
|
||||
if os.path.getsize(mp3) > 0 and os.path.getsize(xml) > 0:
|
||||
mp3 = os.path.relpath(mp3, dir)
|
||||
xml = os.path.relpath(xml, dir)
|
||||
ds.write("{},{},{:0.2e}\n".format(xml, mp3, 2.5e-4))
|
||||
ds.write('{},{},{:0.2e}\n'.format(xml, mp3, 2.5e-4))
|
||||
else:
|
||||
print("Empty file {} or {}".format(mp3, xml), file=sys.stderr)
|
||||
|
||||
print("Missing XML pairs:", MP3, file=sys.stderr)
|
||||
return dataset_sources
|
||||
|
||||
|
||||
def maybe_normalize_for_digits(label):
|
||||
# first, try to identify numbers like "50 000", "260 000"
|
||||
if " " in label:
|
||||
@ -376,44 +234,30 @@ def maybe_normalize_for_digits(label):
|
||||
date_or_time = re.compile(r"(\d{1,2}):(\d{2}):?(\d{2})?")
|
||||
maybe_date_or_time = date_or_time.findall(s)
|
||||
if len(maybe_date_or_time) > 0:
|
||||
maybe_hours = maybe_date_or_time[0][0]
|
||||
maybe_hours = maybe_date_or_time[0][0]
|
||||
maybe_minutes = maybe_date_or_time[0][1]
|
||||
maybe_seconds = maybe_date_or_time[0][2]
|
||||
if len(maybe_seconds) > 0:
|
||||
label = label.replace(
|
||||
"{}:{}:{}".format(
|
||||
maybe_hours, maybe_minutes, maybe_seconds
|
||||
),
|
||||
"{} heures {} minutes et {} secondes".format(
|
||||
maybe_hours, maybe_minutes, maybe_seconds
|
||||
),
|
||||
)
|
||||
label = label.replace("{}:{}:{}".format(maybe_hours, maybe_minutes, maybe_seconds), "{} heures {} minutes et {} secondes".format(maybe_hours, maybe_minutes, maybe_seconds))
|
||||
else:
|
||||
label = label.replace(
|
||||
"{}:{}".format(maybe_hours, maybe_minutes),
|
||||
"{} heures et {} minutes".format(
|
||||
maybe_hours, maybe_minutes
|
||||
),
|
||||
)
|
||||
label = label.replace("{}:{}".format(maybe_hours, maybe_minutes), "{} heures et {} minutes".format(maybe_hours, maybe_minutes))
|
||||
|
||||
new_label = []
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
for s in label.split(" "):
|
||||
if any(i.isdigit() for i in s):
|
||||
s = s.replace(",", ".") # num2words requires "." for floats
|
||||
s = s.replace('"', "") # clean some data, num2words would choke on 1959"
|
||||
s = s.replace(",", ".") # num2words requires "." for floats
|
||||
s = s.replace("\"", "") # clean some data, num2words would choke on 1959"
|
||||
|
||||
last_c = s[-1]
|
||||
if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?"
|
||||
if not last_c.isdigit(): # num2words will choke on "0.6.", "24 ?"
|
||||
s = s[:-1]
|
||||
|
||||
if any(
|
||||
i.isalpha() for i in s
|
||||
): # So we have any(isdigit()) **and** any(sialpha), like "3D"
|
||||
if any(i.isalpha() for i in s): # So we have any(isdigit()) **and** any(sialpha), like "3D"
|
||||
ns = []
|
||||
for c in s:
|
||||
nc = c
|
||||
if c.isdigit(): # convert "3" to "trois-"
|
||||
if c.isdigit(): # convert "3" to "trois-"
|
||||
try:
|
||||
nc = num2words(c, lang="fr") + "-"
|
||||
except decimal.InvalidOperation as ex:
|
||||
@ -430,36 +274,22 @@ def maybe_normalize_for_digits(label):
|
||||
new_label.append(s)
|
||||
return " ".join(new_label)
|
||||
|
||||
|
||||
def maybe_normalize_for_specials_chars(label):
|
||||
label = label.replace("%", "pourcents")
|
||||
label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022"
|
||||
label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80"
|
||||
label = label.replace("+", " plus ") # clean + and make it speakable
|
||||
label = label.replace("€", " euros ") # clean euro symbol and make it speakable
|
||||
label = label.replace(
|
||||
"., ", ", "
|
||||
) # clean some strange "4.0., " (20181017_Innovation.xml)
|
||||
label = label.replace(
|
||||
"°", " degré "
|
||||
) # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml)
|
||||
label = label.replace("...", ".") # remove ellipsis
|
||||
label = label.replace("..", ".") # remove broken ellipsis
|
||||
label = label.replace(
|
||||
"m²", "mètre-carrés"
|
||||
) # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml
|
||||
label = label.replace(
|
||||
"[end]", ""
|
||||
) # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml
|
||||
label = label.replace(
|
||||
u"\xB8c", " ç"
|
||||
) # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml
|
||||
label = label.replace(
|
||||
"C0²", "CO 2"
|
||||
) # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml
|
||||
label = label.replace("/", ", ") # clean intervals like 2019/2022 to "2019 2022"
|
||||
label = label.replace("-", ", ") # clean intervals like 70-80 to "70 80"
|
||||
label = label.replace("+", " plus ") # clean + and make it speakable
|
||||
label = label.replace("€", " euros ") # clean euro symbol and make it speakable
|
||||
label = label.replace("., ", ", ") # clean some strange "4.0., " (20181017_Innovation.xml)
|
||||
label = label.replace("°", " degré ") # clean some strange "°5" (20181210_EtatsGeneraux-1000_fre_750_und.xml)
|
||||
label = label.replace("...", ".") # remove ellipsis
|
||||
label = label.replace("..", ".") # remove broken ellipsis
|
||||
label = label.replace("m²", "mètre-carrés") # 20150616_Defi_Climat_3_wmv_0_fre_minefi.xml
|
||||
label = label.replace("[end]", "") # broken tag in 20150123_Entretiens_Tresor_PGM_wmv_0_fre_minefi.xml
|
||||
label = label.replace(u'\xB8c', " ç") # strange cedilla in 20150417_Printemps_Economie_2_wmv_0_fre_minefi.xml
|
||||
label = label.replace("C0²", "CO 2") # 20121016_Syteme_sante_copie_wmv_0_fre_minefi.xml
|
||||
return label
|
||||
|
||||
|
||||
def maybe_normalize_for_anglicisms(label):
|
||||
label = label.replace("B2B", "B to B")
|
||||
label = label.replace("B2C", "B to C")
|
||||
@ -467,14 +297,12 @@ def maybe_normalize_for_anglicisms(label):
|
||||
label = label.replace("@", "at ")
|
||||
return label
|
||||
|
||||
|
||||
def maybe_normalize(label):
|
||||
label = maybe_normalize_for_specials_chars(label)
|
||||
label = maybe_normalize_for_anglicisms(label)
|
||||
label = maybe_normalize_for_digits(label)
|
||||
return label
|
||||
|
||||
|
||||
def one_sample(sample):
|
||||
file_size = -1
|
||||
frames = 0
|
||||
@ -488,33 +316,14 @@ def one_sample(sample):
|
||||
label = label_filter_fun(sample[5])
|
||||
sample_id = sample[6]
|
||||
|
||||
_wav_filename = os.path.basename(
|
||||
audio_source.replace(".wav", "_{:06}.wav".format(sample_id))
|
||||
)
|
||||
_wav_filename = os.path.basename(audio_source.replace(".wav", "_{:06}.wav".format(sample_id)))
|
||||
wav_fullname = os.path.join(target_dir, dataset_basename, _wav_filename)
|
||||
|
||||
if not os.path.exists(wav_fullname):
|
||||
subprocess.check_output(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
audio_source,
|
||||
"-ss",
|
||||
str(start_time),
|
||||
"-t",
|
||||
str(duration),
|
||||
"-c",
|
||||
"copy",
|
||||
wav_fullname,
|
||||
],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
subprocess.check_output(["ffmpeg", "-i", audio_source, "-ss", str(start_time), "-t", str(duration), "-c", "copy", wav_fullname], stdin=subprocess.DEVNULL, stderr=subprocess.STDOUT)
|
||||
|
||||
file_size = os.path.getsize(wav_fullname)
|
||||
frames = int(
|
||||
subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT)
|
||||
)
|
||||
frames = int(subprocess.check_output(["soxi", "-s", wav_fullname], stderr=subprocess.STDOUT))
|
||||
|
||||
_counter = get_counter()
|
||||
_rows = []
|
||||
@ -525,13 +334,13 @@ def one_sample(sample):
|
||||
elif label is None:
|
||||
# Excluding samples that failed on label validation
|
||||
_counter["invalid_label"] += 1
|
||||
elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)):
|
||||
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
|
||||
# Excluding samples that are too short to fit the transcript
|
||||
_counter["too_short"] += 1
|
||||
elif frames / SAMPLE_RATE < MIN_SECS:
|
||||
elif frames/SAMPLE_RATE < MIN_SECS:
|
||||
# Excluding samples that are too short
|
||||
_counter["too_short"] += 1
|
||||
elif frames / SAMPLE_RATE > MAX_SECS:
|
||||
elif frames/SAMPLE_RATE > MAX_SECS:
|
||||
# Excluding very long samples to keep a reasonable batch-size
|
||||
_counter["too_long"] += 1
|
||||
else:
|
||||
@ -543,71 +352,56 @@ def one_sample(sample):
|
||||
|
||||
return (_counter, _rows)
|
||||
|
||||
|
||||
def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1):
|
||||
dataset_basename = os.path.splitext(os.path.split(xml_file)[1])[0]
|
||||
wav_root = os.path.join(target_dir, dataset_basename)
|
||||
if not os.path.exists(wav_root):
|
||||
os.makedirs(wav_root)
|
||||
|
||||
source_frames = int(
|
||||
subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT)
|
||||
)
|
||||
source_frames = int(subprocess.check_output(["soxi", "-s", audio_source], stderr=subprocess.STDOUT))
|
||||
print("Source audio length: %s" % secs_to_hours(source_frames / SAMPLE_RATE))
|
||||
|
||||
# Get audiofile path and transcript for each sentence in tsv
|
||||
samples = []
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
seq_id = 0
|
||||
this_time = 0.0
|
||||
seq_id = 0
|
||||
this_time = 0.0
|
||||
this_duration = 0.0
|
||||
prev_time = 0.0
|
||||
prev_time = 0.0
|
||||
prev_duration = 0.0
|
||||
this_text = ""
|
||||
this_text = ""
|
||||
for child in root:
|
||||
if child.tag == "row":
|
||||
cur_time = float(child.attrib["timestamp"])
|
||||
cur_time = float(child.attrib["timestamp"])
|
||||
cur_duration = float(child.attrib["timedur"])
|
||||
cur_text = child.text
|
||||
cur_text = child.text
|
||||
|
||||
if this_time == 0.0:
|
||||
this_time = cur_time
|
||||
|
||||
delta = cur_time - (prev_time + prev_duration)
|
||||
delta = cur_time - (prev_time + prev_duration)
|
||||
# rel_tol value is made from trial/error to try and compromise between:
|
||||
# - cutting enough to skip missing words
|
||||
# - not too short, not too long sentences
|
||||
is_close = math.isclose(
|
||||
cur_time, this_time + this_duration, rel_tol=rel_tol
|
||||
)
|
||||
is_short = (this_duration + cur_duration + delta) < MAX_SECS
|
||||
is_close = math.isclose(cur_time, this_time + this_duration, rel_tol=rel_tol)
|
||||
is_short = ((this_duration + cur_duration + delta) < MAX_SECS)
|
||||
|
||||
# when the previous element is close enough **and** this does not
|
||||
# go over MAX_SECS, we append content
|
||||
if is_close and is_short:
|
||||
if (is_close and is_short):
|
||||
this_duration += cur_duration + delta
|
||||
this_text += cur_text
|
||||
this_text += cur_text
|
||||
else:
|
||||
samples.append(
|
||||
(
|
||||
audio_source,
|
||||
target_dir,
|
||||
dataset_basename,
|
||||
this_time,
|
||||
this_duration,
|
||||
this_text,
|
||||
seq_id,
|
||||
)
|
||||
)
|
||||
samples.append((audio_source, target_dir, dataset_basename, this_time, this_duration, this_text, seq_id))
|
||||
|
||||
this_time = cur_time
|
||||
this_time = cur_time
|
||||
this_duration = cur_duration
|
||||
this_text = cur_text
|
||||
this_text = cur_text
|
||||
|
||||
seq_id += 1
|
||||
|
||||
prev_time = cur_time
|
||||
prev_time = cur_time
|
||||
prev_duration = cur_duration
|
||||
|
||||
# Keep track of how many samples are good vs. problematic
|
||||
@ -631,27 +425,21 @@ def _maybe_import_data(xml_file, audio_source, target_dir, rel_tol=1e-1):
|
||||
assert len(_rows) == imported_samples
|
||||
|
||||
print_import_report(_counter, SAMPLE_RATE, MAX_SECS)
|
||||
print(
|
||||
"Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames) * 100)
|
||||
)
|
||||
print("Import efficiency: %.1f%%" % ((_counter["total_time"] / source_frames)*100))
|
||||
print("")
|
||||
|
||||
return _counter, _rows
|
||||
|
||||
|
||||
def _maybe_convert_wav(mp3_filename, _wav_filename):
|
||||
if not os.path.exists(_wav_filename):
|
||||
print("Converting {} to WAV file: {}".format(mp3_filename, _wav_filename))
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(
|
||||
samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH
|
||||
)
|
||||
transformer.convert(samplerate=SAMPLE_RATE, n_channels=CHANNELS, bitdepth=BIT_DEPTH)
|
||||
try:
|
||||
transformer.build(mp3_filename, _wav_filename)
|
||||
except sox.core.SoxError:
|
||||
pass
|
||||
|
||||
|
||||
def write_general_csv(target_dir, _rows, _counter):
|
||||
target_csv_template = os.path.join(target_dir, "ccpmf_{}.csv")
|
||||
with open(target_csv_template.format("train"), "w") as train_csv_file: # 80%
|
||||
@ -673,13 +461,7 @@ def write_general_csv(target_dir, _rows, _counter):
|
||||
writer = dev_writer
|
||||
else:
|
||||
writer = train_writer
|
||||
writer.writerow(
|
||||
{
|
||||
"wav_filename": item[0],
|
||||
"wav_filesize": item[1],
|
||||
"transcript": item[2],
|
||||
}
|
||||
)
|
||||
writer.writerow({"wav_filename": item[0], "wav_filesize": item[1], "transcript": item[2]})
|
||||
|
||||
print("")
|
||||
print("~~~~ FINAL STATISTICS ~~~~")
|
||||
@ -687,21 +469,11 @@ def write_general_csv(target_dir, _rows, _counter):
|
||||
print("~~~~ (FINAL STATISTICS) ~~~~")
|
||||
print("")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
PARSER = get_importers_parser(
|
||||
description="Import XML from Conference Centre for Economics, France"
|
||||
)
|
||||
PARSER = get_importers_parser(description="Import XML from Conference Centre for Economics, France")
|
||||
PARSER.add_argument("target_dir", help="Destination directory")
|
||||
PARSER.add_argument(
|
||||
"--filter_alphabet",
|
||||
help="Exclude samples with characters not in provided alphabet",
|
||||
)
|
||||
PARSER.add_argument(
|
||||
"--normalize",
|
||||
action="store_true",
|
||||
help="Converts diacritic characters to their base ones",
|
||||
)
|
||||
PARSER.add_argument("--filter_alphabet", help="Exclude samples with characters not in provided alphabet")
|
||||
PARSER.add_argument("--normalize", action="store_true", help="Converts diacritic characters to their base ones")
|
||||
|
||||
PARAMS = PARSER.parse_args()
|
||||
validate_label = get_validate_label(PARAMS)
|
||||
@ -709,11 +481,9 @@ if __name__ == "__main__":
|
||||
|
||||
def label_filter_fun(label):
|
||||
if PARAMS.normalize:
|
||||
label = (
|
||||
unicodedata.normalize("NFKD", label.strip())
|
||||
.encode("ascii", "ignore")
|
||||
label = unicodedata.normalize("NFKD", label.strip()) \
|
||||
.encode("ascii", "ignore") \
|
||||
.decode("ascii", "ignore")
|
||||
)
|
||||
label = maybe_normalize(label)
|
||||
label = validate_label(label)
|
||||
if ALPHABET and label:
|
||||
@ -723,9 +493,7 @@ if __name__ == "__main__":
|
||||
label = None
|
||||
return label
|
||||
|
||||
dataset_sources = _download_and_preprocess_data(
|
||||
csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir
|
||||
)
|
||||
dataset_sources = _download_and_preprocess_data(csv_url=DATASET_RELEASE_CSV, target_dir=PARAMS.target_dir)
|
||||
sources_root_dir = os.path.dirname(dataset_sources)
|
||||
all_counter = get_counter()
|
||||
all_rows = []
|
||||
@ -736,14 +504,9 @@ if __name__ == "__main__":
|
||||
this_mp3 = os.path.join(sources_root_dir, d[1])
|
||||
this_rel = float(d[2])
|
||||
|
||||
wav_filename = os.path.join(
|
||||
sources_root_dir,
|
||||
os.path.splitext(os.path.basename(this_mp3))[0] + ".wav",
|
||||
)
|
||||
wav_filename = os.path.join(sources_root_dir, os.path.splitext(os.path.basename(this_mp3))[0] + ".wav")
|
||||
_maybe_convert_wav(this_mp3, wav_filename)
|
||||
counter, rows = _maybe_import_data(
|
||||
this_xml, wav_filename, sources_root_dir, this_rel
|
||||
)
|
||||
counter, rows = _maybe_import_data(this_xml, wav_filename, sources_root_dir, this_rel)
|
||||
|
||||
all_counter += counter
|
||||
all_rows += rows
|
||||
|
@ -1,21 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
import csv
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import subprocess
|
||||
import tarfile
|
||||
from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
print_import_report,
|
||||
)
|
||||
from coqui_stt_training.util.importers import validate_label_eng as validate_label
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -34,7 +35,7 @@ def _download_and_preprocess_data(target_dir):
|
||||
archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL)
|
||||
# Conditionally extract common voice data
|
||||
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
|
||||
# Conditionally convert common voice CSV files and mp3 data to Coqui STT CSVs and wav
|
||||
# Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav
|
||||
_maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME)
|
||||
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
Broadly speaking, this script takes the audio downloaded from Common Voice
|
||||
for a certain language, in addition to the *.tsv files output by CorporaCreator,
|
||||
and the script formats the data and transcripts to be in a state usable by
|
||||
train.py
|
||||
DeepSpeech.py
|
||||
Use "python3 import_cv2.py -h" for help
|
||||
"""
|
||||
import csv
|
||||
@ -14,15 +14,16 @@ from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -40,11 +41,7 @@ class LabelFilter:
|
||||
|
||||
def filter(self, label):
|
||||
if self.normalize:
|
||||
label = (
|
||||
unicodedata.normalize("NFKD", label.strip())
|
||||
.encode("ascii", "ignore")
|
||||
.decode("ascii", "ignore")
|
||||
)
|
||||
label = unicodedata.normalize("NFKD", label.strip()).encode("ascii", "ignore").decode("ascii", "ignore")
|
||||
label = self.validate_fun(label)
|
||||
if self.alphabet and label and not self.alphabet.CanEncode(label):
|
||||
label = None
|
||||
@ -100,15 +97,7 @@ def one_sample(sample):
|
||||
return (counter, rows)
|
||||
|
||||
|
||||
def _maybe_convert_set(
|
||||
dataset,
|
||||
tsv_dir,
|
||||
audio_dir,
|
||||
filter_obj,
|
||||
space_after_every_character=None,
|
||||
rows=None,
|
||||
exclude=None,
|
||||
):
|
||||
def _maybe_convert_set(dataset, tsv_dir, audio_dir, filter_obj, space_after_every_character=None, rows=None, exclude=None):
|
||||
exclude_transcripts = set()
|
||||
exclude_speakers = set()
|
||||
if exclude is not None:
|
||||
@ -127,13 +116,7 @@ def _maybe_convert_set(
|
||||
with open(input_tsv, encoding="utf-8") as input_tsv_file:
|
||||
reader = csv.DictReader(input_tsv_file, delimiter="\t")
|
||||
for row in reader:
|
||||
samples.append(
|
||||
(
|
||||
os.path.join(audio_dir, row["path"]),
|
||||
row["sentence"],
|
||||
row["client_id"],
|
||||
)
|
||||
)
|
||||
samples.append((os.path.join(audio_dir, row["path"]), row["sentence"], row["client_id"]))
|
||||
|
||||
counter = get_counter()
|
||||
num_samples = len(samples)
|
||||
@ -141,9 +124,7 @@ def _maybe_convert_set(
|
||||
print("Importing mp3 files...")
|
||||
pool = Pool(initializer=init_worker, initargs=(PARAMS,))
|
||||
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
|
||||
for i, processed in enumerate(
|
||||
pool.imap_unordered(one_sample, samples), start=1
|
||||
):
|
||||
for i, processed in enumerate(pool.imap_unordered(one_sample, samples), start=1):
|
||||
counter += processed[0]
|
||||
rows += processed[1]
|
||||
bar.update(i)
|
||||
@ -157,9 +138,9 @@ def _maybe_convert_set(
|
||||
print_import_report(counter, SAMPLE_RATE, MAX_SECS)
|
||||
|
||||
output_csv = os.path.join(os.path.abspath(audio_dir), dataset + ".csv")
|
||||
print("Saving new Coqui STT-formatted CSV file to: ", output_csv)
|
||||
print("Saving new DeepSpeech-formatted CSV file to: ", output_csv)
|
||||
with open(output_csv, "w", encoding="utf-8", newline="") as output_csv_file:
|
||||
print("Writing CSV file for train.py as: ", output_csv)
|
||||
print("Writing CSV file for DeepSpeech.py as: ", output_csv)
|
||||
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
|
||||
writer.writeheader()
|
||||
bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
|
||||
@ -188,20 +169,12 @@ def _maybe_convert_set(
|
||||
def _preprocess_data(tsv_dir, audio_dir, space_after_every_character=False):
|
||||
exclude = []
|
||||
for dataset in ["test", "dev", "train", "validated", "other"]:
|
||||
set_samples = _maybe_convert_set(
|
||||
dataset, tsv_dir, audio_dir, space_after_every_character
|
||||
)
|
||||
set_samples = _maybe_convert_set(dataset, tsv_dir, audio_dir, space_after_every_character)
|
||||
if dataset in ["test", "dev"]:
|
||||
exclude += set_samples
|
||||
if dataset == "validated":
|
||||
_maybe_convert_set(
|
||||
"train-all",
|
||||
tsv_dir,
|
||||
audio_dir,
|
||||
space_after_every_character,
|
||||
rows=set_samples,
|
||||
exclude=exclude,
|
||||
)
|
||||
_maybe_convert_set("train-all", tsv_dir, audio_dir, space_after_every_character,
|
||||
rows=set_samples, exclude=exclude)
|
||||
|
||||
|
||||
def _maybe_convert_wav(mp3_filename, wav_filename):
|
||||
@ -239,9 +212,7 @@ def parse_args():
|
||||
|
||||
|
||||
def main():
|
||||
audio_dir = (
|
||||
PARAMS.audio_dir if PARAMS.audio_dir else os.path.join(PARAMS.tsv_dir, "clips")
|
||||
)
|
||||
audio_dir = PARAMS.audio_dir if PARAMS.audio_dir else os.path.join(PARAMS.tsv_dir, "clips")
|
||||
_preprocess_data(PARAMS.tsv_dir, audio_dir, PARAMS.space_after_every_character)
|
||||
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
import codecs
|
||||
import fnmatch
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
import unicodedata
|
||||
@ -10,7 +9,8 @@ import unicodedata
|
||||
import librosa
|
||||
import pandas
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
from coqui_stt_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
# Prerequisite: Having the sph2pipe tool in your PATH:
|
||||
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
|
||||
@ -236,18 +236,14 @@ def _split_and_resample_wav(origAudio, start_time, stop_time, new_wav_file):
|
||||
|
||||
|
||||
def _split_sets(filelist):
|
||||
"""
|
||||
randomply split the datasets into train, validation, and test sets where the size of the
|
||||
validation and test sets are determined by the `get_sample_size` function.
|
||||
"""
|
||||
random.shuffle(filelist)
|
||||
sample_size = get_sample_size(len(filelist))
|
||||
|
||||
# We initially split the entire set into 80% train and 20% test, then
|
||||
# split the train set into 80% train and 20% validation.
|
||||
train_beg = 0
|
||||
train_end = len(filelist) - 2 * sample_size
|
||||
train_end = int(0.8 * len(filelist))
|
||||
|
||||
dev_beg = train_end
|
||||
dev_end = train_end + sample_size
|
||||
dev_beg = int(0.8 * train_end)
|
||||
dev_end = train_end
|
||||
train_end = dev_beg
|
||||
|
||||
test_beg = dev_end
|
||||
test_end = len(filelist)
|
||||
@ -259,24 +255,5 @@ def _split_sets(filelist):
|
||||
)
|
||||
|
||||
|
||||
def get_sample_size(population_size):
|
||||
"""calculates the sample size for a 99% confidence and 1% margin of error"""
|
||||
margin_of_error = 0.01
|
||||
fraction_picking = 0.50
|
||||
z_score = 2.58 # Corresponds to confidence level 99%
|
||||
numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
|
||||
margin_of_error ** 2
|
||||
)
|
||||
sample_size = 0
|
||||
for train_size in range(population_size, 0, -1):
|
||||
denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
|
||||
margin_of_error ** 2 * train_size
|
||||
)
|
||||
sample_size = int(numerator / denominator)
|
||||
if 2 * sample_size + train_size <= population_size:
|
||||
break
|
||||
return sample_size
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_download_and_preprocess_data(sys.argv[1])
|
||||
|
@ -5,7 +5,8 @@ import tarfile
|
||||
|
||||
import numpy as np
|
||||
import pandas
|
||||
from coqui_stt_training.util.importers import get_importers_parser
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
|
||||
|
@ -9,10 +9,11 @@ import urllib
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import swifter
|
||||
from coqui_stt_training.util.importers import get_importers_parser, get_validate_label
|
||||
from sox import Transformer
|
||||
|
||||
import swifter
|
||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label
|
||||
|
||||
__version__ = "0.1.0"
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -3,7 +3,8 @@ import os
|
||||
import sys
|
||||
|
||||
import pandas
|
||||
from coqui_stt_training.util.downloader import maybe_download
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
|
||||
def _download_and_preprocess_data(data_dir):
|
||||
|
@ -9,10 +9,11 @@ import unicodedata
|
||||
|
||||
import pandas
|
||||
import progressbar
|
||||
from coqui_stt_training.util.downloader import maybe_download
|
||||
from sox import Transformer
|
||||
from tensorflow.python.platform import gfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
|
||||
|
||||
|
@ -11,15 +11,16 @@ from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -136,15 +137,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
with open(
|
||||
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
|
||||
) as train_csv_file: # 80%
|
||||
with open(
|
||||
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
|
||||
) as dev_csv_file: # 10%
|
||||
with open(
|
||||
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
|
||||
) as test_csv_file: # 10%
|
||||
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
|
||||
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
|
||||
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
|
||||
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
|
||||
train_writer.writeheader()
|
||||
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
|
||||
@ -184,9 +179,7 @@ def _maybe_convert_sets(target_dir, extracted_data):
|
||||
def _maybe_convert_wav(ogg_filename, wav_filename):
|
||||
if not os.path.exists(wav_filename):
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(
|
||||
samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH
|
||||
)
|
||||
transformer.convert(samplerate=SAMPLE_RATE, n_channels=N_CHANNELS, bitdepth=BITDEPTH)
|
||||
try:
|
||||
transformer.build(ogg_filename, wav_filename)
|
||||
except sox.core.SoxError as ex:
|
||||
|
@ -9,15 +9,16 @@ from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -59,20 +60,9 @@ def one_sample(sample):
|
||||
file_size = -1
|
||||
frames = 0
|
||||
if os.path.exists(wav_filename):
|
||||
tmp_filename = os.path.splitext(wav_filename)[0] + ".tmp.wav"
|
||||
tmp_filename = os.path.splitext(wav_filename)[0]+'.tmp.wav'
|
||||
subprocess.check_call(
|
||||
[
|
||||
"sox",
|
||||
wav_filename,
|
||||
"-r",
|
||||
str(SAMPLE_RATE),
|
||||
"-c",
|
||||
"1",
|
||||
"-b",
|
||||
"16",
|
||||
tmp_filename,
|
||||
],
|
||||
stderr=subprocess.STDOUT,
|
||||
['sox', wav_filename, '-r', str(SAMPLE_RATE), '-c', '1', '-b', '16', tmp_filename], stderr=subprocess.STDOUT
|
||||
)
|
||||
os.rename(tmp_filename, wav_filename)
|
||||
file_size = os.path.getsize(wav_filename)
|
||||
@ -148,15 +138,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
with open(
|
||||
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
|
||||
) as train_csv_file: # 80%
|
||||
with open(
|
||||
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
|
||||
) as dev_csv_file: # 10%
|
||||
with open(
|
||||
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
|
||||
) as test_csv_file: # 10%
|
||||
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
|
||||
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
|
||||
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
|
||||
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
|
||||
train_writer.writeheader()
|
||||
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
|
||||
|
@ -5,7 +5,8 @@ import tarfile
|
||||
import wave
|
||||
|
||||
import pandas
|
||||
from coqui_stt_training.util.importers import get_importers_parser
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
|
||||
|
@ -1,99 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import argparse
|
||||
import ctypes
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pandas
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def read_ogg_opus_duration(ogg_file_path):
|
||||
error = ctypes.c_int()
|
||||
opusfile = pyogg.opus.op_open_file(
|
||||
ogg_file_path.encode("utf-8"), ctypes.pointer(error)
|
||||
)
|
||||
|
||||
if error.value != 0:
|
||||
raise ValueError(
|
||||
("Ogg/Opus file could not be read." "Error code: {}").format(error.value)
|
||||
)
|
||||
|
||||
pcm_buffer_size = pyogg.opus.op_pcm_total(opusfile, -1)
|
||||
channel_count = pyogg.opus.op_channel_count(opusfile, -1)
|
||||
sample_rate = 48000 # opus files are always 48kHz
|
||||
sample_width = 2 # always 16-bit
|
||||
pyogg.opus.op_free(opusfile)
|
||||
return pcm_buffer_size / sample_rate
|
||||
|
||||
|
||||
def main(root_dir):
|
||||
for subset in (
|
||||
"train",
|
||||
"dev",
|
||||
"test",
|
||||
):
|
||||
print("Processing {} subset...".format(subset))
|
||||
with open(Path(root_dir) / subset / "transcripts.txt") as fin:
|
||||
subset_entries = []
|
||||
for i, line in tqdm(enumerate(fin)):
|
||||
audio_id, transcript = line.split("\t")
|
||||
audio_id_parts = audio_id.split("_")
|
||||
# e.g. 4800_10003_000000 -> train/audio/4800/10003/4800_10003_000000.opus
|
||||
audio_path = (
|
||||
Path(root_dir)
|
||||
/ subset
|
||||
/ "audio"
|
||||
/ audio_id_parts[0]
|
||||
/ audio_id_parts[1]
|
||||
/ "{}.opus".format(audio_id)
|
||||
)
|
||||
audio_duration = read_ogg_opus_duration(audio_path)
|
||||
# TODO: support other languages
|
||||
transcript = (
|
||||
transcript.strip()
|
||||
.replace("-", " ")
|
||||
.replace("ñ", "n")
|
||||
.replace(".", "")
|
||||
.translate(
|
||||
{
|
||||
ord(ch): None
|
||||
for ch in (
|
||||
"а",
|
||||
"в",
|
||||
"е",
|
||||
"и",
|
||||
"к",
|
||||
"м",
|
||||
"н",
|
||||
"о",
|
||||
"п",
|
||||
"р",
|
||||
"т",
|
||||
"ы",
|
||||
"я",
|
||||
)
|
||||
}
|
||||
)
|
||||
)
|
||||
subset_entries.append(
|
||||
(
|
||||
audio_path.relative_to(root_dir),
|
||||
audio_duration,
|
||||
transcript.strip(),
|
||||
)
|
||||
)
|
||||
df = pandas.DataFrame(
|
||||
columns=["wav_filename", "wav_filesize", "transcript"],
|
||||
data=subset_entries,
|
||||
)
|
||||
csv_name = Path(root_dir) / "{}.csv".format(subset)
|
||||
df.to_csv(csv_name, index=False)
|
||||
print("Wrote {}".format(csv_name))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("root_dir", help="Path to the mls_english_opus directory.")
|
||||
args = parser.parse_args()
|
||||
main(args.root_dir)
|
@ -6,7 +6,8 @@ import tarfile
|
||||
|
||||
import numpy as np
|
||||
import pandas
|
||||
from coqui_stt_training.util.importers import get_importers_parser
|
||||
|
||||
from deepspeech_training.util.importers import get_importers_parser
|
||||
|
||||
COLUMN_NAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
|
||||
|
@ -8,15 +8,16 @@ from glob import glob
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
@ -156,15 +157,9 @@ def _maybe_convert_sets(target_dir, extracted_data):
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
with open(
|
||||
target_csv_template.format("train"), "w", encoding="utf-8", newline=""
|
||||
) as train_csv_file: # 80%
|
||||
with open(
|
||||
target_csv_template.format("dev"), "w", encoding="utf-8", newline=""
|
||||
) as dev_csv_file: # 10%
|
||||
with open(
|
||||
target_csv_template.format("test"), "w", encoding="utf-8", newline=""
|
||||
) as test_csv_file: # 10%
|
||||
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
|
||||
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
|
||||
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
|
||||
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
|
||||
train_writer.writeheader()
|
||||
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
|
||||
|
@ -1,11 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# ensure that you have downloaded the LDC dataset LDC97S62 and tar exists in a folder e.g.
|
||||
# ./data/swb/swb1_LDC97S62.tgz
|
||||
# from the Coqui STT directory run with: ./bin/import_swb.py ./data/swb/
|
||||
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
|
||||
import codecs
|
||||
import fnmatch
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
@ -16,7 +15,8 @@ import librosa
|
||||
import pandas
|
||||
import requests
|
||||
import soundfile # <= Has an external dependency on libsndfile
|
||||
from coqui_stt_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03
|
||||
ARCHIVE_NAME = "switchboard_word_alignments.tar.gz"
|
||||
@ -290,18 +290,14 @@ def _split_wav(origAudio, start_time, stop_time, new_wav_file):
|
||||
|
||||
|
||||
def _split_sets(filelist):
|
||||
"""
|
||||
randomply split the datasets into train, validation, and test sets where the size of the
|
||||
validation and test sets are determined by the `get_sample_size` function.
|
||||
"""
|
||||
random.shuffle(filelist)
|
||||
sample_size = get_sample_size(len(filelist))
|
||||
|
||||
# We initially split the entire set into 80% train and 20% test, then
|
||||
# split the train set into 80% train and 20% validation.
|
||||
train_beg = 0
|
||||
train_end = len(filelist) - 2 * sample_size
|
||||
train_end = int(0.8 * len(filelist))
|
||||
|
||||
dev_beg = train_end
|
||||
dev_end = train_end + sample_size
|
||||
dev_beg = int(0.8 * train_end)
|
||||
dev_end = train_end
|
||||
train_end = dev_beg
|
||||
|
||||
test_beg = dev_end
|
||||
test_end = len(filelist)
|
||||
@ -313,25 +309,6 @@ def _split_sets(filelist):
|
||||
)
|
||||
|
||||
|
||||
def get_sample_size(population_size):
|
||||
"""calculates the sample size for a 99% confidence and 1% margin of error"""
|
||||
margin_of_error = 0.01
|
||||
fraction_picking = 0.50
|
||||
z_score = 2.58 # Corresponds to confidence level 99%
|
||||
numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
|
||||
margin_of_error ** 2
|
||||
)
|
||||
sample_size = 0
|
||||
for train_size in range(population_size, 0, -1):
|
||||
denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / (
|
||||
margin_of_error ** 2 * train_size
|
||||
)
|
||||
sample_size = int(numerator / denominator)
|
||||
if 2 * sample_size + train_size <= population_size:
|
||||
break
|
||||
return sample_size
|
||||
|
||||
|
||||
def _read_data_set(
|
||||
filelist,
|
||||
thread_count,
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Downloads and prepares (parts of) the "Spoken Wikipedia Corpora" for train.py
|
||||
Downloads and prepares (parts of) the "Spoken Wikipedia Corpora" for DeepSpeech.py
|
||||
Use "python3 import_swc.py -h" for help
|
||||
"""
|
||||
|
||||
@ -21,9 +21,10 @@ from multiprocessing.pool import ThreadPool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
|
||||
SWC_ARCHIVE = "SWC_{language}.tar"
|
||||
@ -172,6 +173,7 @@ def in_alphabet(alphabet, c):
|
||||
return alphabet.CanEncode(c) if alphabet else True
|
||||
|
||||
|
||||
|
||||
ALPHABETS = {}
|
||||
|
||||
|
||||
@ -200,16 +202,8 @@ def label_filter(label, language):
|
||||
dont_normalize = DONT_NORMALIZE[language] if language in DONT_NORMALIZE else ""
|
||||
alphabet = get_alphabet(language)
|
||||
for c in label:
|
||||
if (
|
||||
CLI_ARGS.normalize
|
||||
and c not in dont_normalize
|
||||
and not in_alphabet(alphabet, c)
|
||||
):
|
||||
c = (
|
||||
unicodedata.normalize("NFKD", c)
|
||||
.encode("ascii", "ignore")
|
||||
.decode("ascii", "ignore")
|
||||
)
|
||||
if CLI_ARGS.normalize and c not in dont_normalize and not in_alphabet(alphabet, c):
|
||||
c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore")
|
||||
for sc in c:
|
||||
if not in_alphabet(alphabet, sc):
|
||||
return None, "illegal character"
|
||||
|
@ -7,11 +7,12 @@ from glob import glob
|
||||
from os import makedirs, path, remove, rmdir
|
||||
|
||||
import pandas
|
||||
from coqui_stt_training.util.downloader import maybe_download
|
||||
from coqui_stt_training.util.stm import parse_stm_file
|
||||
from sox import Transformer
|
||||
from tensorflow.python.platform import gfile
|
||||
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
from deepspeech_training.util.stm import parse_stm_file
|
||||
|
||||
|
||||
def _download_and_preprocess_data(data_dir):
|
||||
# Conditionally download data
|
||||
|
214
bin/import_ts.py
Executable file
214
bin/import_ts.py
Executable file
@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import zipfile
|
||||
from multiprocessing import Pool
|
||||
|
||||
import progressbar
|
||||
import sox
|
||||
|
||||
import unidecode
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
get_importers_parser,
|
||||
get_validate_label,
|
||||
print_import_report,
|
||||
)
|
||||
|
||||
FIELDNAMES = ["wav_filename", "wav_filesize", "transcript"]
|
||||
SAMPLE_RATE = 16000
|
||||
MAX_SECS = 15
|
||||
ARCHIVE_NAME = "2019-04-11_fr_FR"
|
||||
ARCHIVE_DIR_NAME = "ts_" + ARCHIVE_NAME
|
||||
ARCHIVE_URL = (
|
||||
"https://deepspeech-storage-mirror.s3.fr-par.scw.cloud/" + ARCHIVE_NAME + ".zip"
|
||||
)
|
||||
|
||||
|
||||
def _download_and_preprocess_data(target_dir, english_compatible=False):
|
||||
# Making path absolute
|
||||
target_dir = os.path.abspath(target_dir)
|
||||
# Conditionally download data
|
||||
archive_path = maybe_download(
|
||||
"ts_" + ARCHIVE_NAME + ".zip", target_dir, ARCHIVE_URL
|
||||
)
|
||||
# Conditionally extract archive data
|
||||
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
|
||||
# Conditionally convert TrainingSpeech data to DeepSpeech CSVs and wav
|
||||
_maybe_convert_sets(
|
||||
target_dir, ARCHIVE_DIR_NAME, english_compatible=english_compatible
|
||||
)
|
||||
|
||||
|
||||
def _maybe_extract(target_dir, extracted_data, archive_path):
|
||||
# If target_dir/extracted_data does not exist, extract archive in target_dir
|
||||
extracted_path = os.path.join(target_dir, extracted_data)
|
||||
if not os.path.exists(extracted_path):
|
||||
print('No directory "%s" - extracting archive...' % extracted_path)
|
||||
if not os.path.isdir(extracted_path):
|
||||
os.mkdir(extracted_path)
|
||||
with zipfile.ZipFile(archive_path) as zip_f:
|
||||
zip_f.extractall(extracted_path)
|
||||
else:
|
||||
print('Found directory "%s" - not extracting it from archive.' % archive_path)
|
||||
|
||||
|
||||
def one_sample(sample):
|
||||
""" Take a audio file, and optionally convert it to 16kHz WAV """
|
||||
orig_filename = sample["path"]
|
||||
# Storing wav files next to the wav ones - just with a different suffix
|
||||
wav_filename = os.path.splitext(orig_filename)[0] + ".converted.wav"
|
||||
_maybe_convert_wav(orig_filename, wav_filename)
|
||||
file_size = -1
|
||||
frames = 0
|
||||
if os.path.exists(wav_filename):
|
||||
file_size = os.path.getsize(wav_filename)
|
||||
frames = int(
|
||||
subprocess.check_output(
|
||||
["soxi", "-s", wav_filename], stderr=subprocess.STDOUT
|
||||
)
|
||||
)
|
||||
label = sample["text"]
|
||||
|
||||
rows = []
|
||||
|
||||
# Keep track of how many samples are good vs. problematic
|
||||
counter = get_counter()
|
||||
if file_size == -1:
|
||||
# Excluding samples that failed upon conversion
|
||||
counter["failed"] += 1
|
||||
elif label is None:
|
||||
# Excluding samples that failed on label validation
|
||||
counter["invalid_label"] += 1
|
||||
elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)):
|
||||
# Excluding samples that are too short to fit the transcript
|
||||
counter["too_short"] += 1
|
||||
elif frames / SAMPLE_RATE > MAX_SECS:
|
||||
# Excluding very long samples to keep a reasonable batch-size
|
||||
counter["too_long"] += 1
|
||||
else:
|
||||
# This one is good - keep it for the target CSV
|
||||
rows.append((wav_filename, file_size, label))
|
||||
counter["imported_time"] += frames
|
||||
counter["all"] += 1
|
||||
counter["total_time"] += frames
|
||||
|
||||
return (counter, rows)
|
||||
|
||||
|
||||
def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False):
|
||||
extracted_dir = os.path.join(target_dir, extracted_data)
|
||||
# override existing CSV with normalized one
|
||||
target_csv_template = os.path.join(target_dir, "ts_" + ARCHIVE_NAME + "_{}.csv")
|
||||
if os.path.isfile(target_csv_template):
|
||||
return
|
||||
path_to_original_csv = os.path.join(extracted_dir, "data.csv")
|
||||
with open(path_to_original_csv) as csv_f:
|
||||
data = [
|
||||
d
|
||||
for d in csv.DictReader(csv_f, delimiter=",")
|
||||
if float(d["duration"]) <= MAX_SECS
|
||||
]
|
||||
|
||||
for line in data:
|
||||
line["path"] = os.path.join(extracted_dir, line["path"])
|
||||
|
||||
num_samples = len(data)
|
||||
rows = []
|
||||
counter = get_counter()
|
||||
|
||||
print("Importing {} wav files...".format(num_samples))
|
||||
pool = Pool()
|
||||
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
|
||||
for i, processed in enumerate(pool.imap_unordered(one_sample, data), start=1):
|
||||
counter += processed[0]
|
||||
rows += processed[1]
|
||||
bar.update(i)
|
||||
bar.update(num_samples)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
with open(target_csv_template.format("train"), "w", encoding="utf-8", newline="") as train_csv_file: # 80%
|
||||
with open(target_csv_template.format("dev"), "w", encoding="utf-8", newline="") as dev_csv_file: # 10%
|
||||
with open(target_csv_template.format("test"), "w", encoding="utf-8", newline="") as test_csv_file: # 10%
|
||||
train_writer = csv.DictWriter(train_csv_file, fieldnames=FIELDNAMES)
|
||||
train_writer.writeheader()
|
||||
dev_writer = csv.DictWriter(dev_csv_file, fieldnames=FIELDNAMES)
|
||||
dev_writer.writeheader()
|
||||
test_writer = csv.DictWriter(test_csv_file, fieldnames=FIELDNAMES)
|
||||
test_writer.writeheader()
|
||||
|
||||
for i, item in enumerate(rows):
|
||||
transcript = validate_label(
|
||||
cleanup_transcript(
|
||||
item[2], english_compatible=english_compatible
|
||||
)
|
||||
)
|
||||
if not transcript:
|
||||
continue
|
||||
wav_filename = os.path.join(target_dir, extracted_data, item[0])
|
||||
i_mod = i % 10
|
||||
if i_mod == 0:
|
||||
writer = test_writer
|
||||
elif i_mod == 1:
|
||||
writer = dev_writer
|
||||
else:
|
||||
writer = train_writer
|
||||
writer.writerow(
|
||||
dict(
|
||||
wav_filename=wav_filename,
|
||||
wav_filesize=os.path.getsize(wav_filename),
|
||||
transcript=transcript,
|
||||
)
|
||||
)
|
||||
|
||||
imported_samples = get_imported_samples(counter)
|
||||
assert counter["all"] == num_samples
|
||||
assert len(rows) == imported_samples
|
||||
|
||||
print_import_report(counter, SAMPLE_RATE, MAX_SECS)
|
||||
|
||||
|
||||
def _maybe_convert_wav(orig_filename, wav_filename):
|
||||
if not os.path.exists(wav_filename):
|
||||
transformer = sox.Transformer()
|
||||
transformer.convert(samplerate=SAMPLE_RATE)
|
||||
try:
|
||||
transformer.build(orig_filename, wav_filename)
|
||||
except sox.core.SoxError as ex:
|
||||
print("SoX processing error", ex, orig_filename, wav_filename)
|
||||
|
||||
|
||||
PUNCTUATIONS_REG = re.compile(r"[°\-,;!?.()\[\]*…—]")
|
||||
MULTIPLE_SPACES_REG = re.compile(r"\s{2,}")
|
||||
|
||||
|
||||
def cleanup_transcript(text, english_compatible=False):
|
||||
text = text.replace("’", "'").replace("\u00A0", " ")
|
||||
text = PUNCTUATIONS_REG.sub(" ", text)
|
||||
text = MULTIPLE_SPACES_REG.sub(" ", text)
|
||||
if english_compatible:
|
||||
text = unidecode.unidecode(text)
|
||||
return text.strip().lower()
|
||||
|
||||
|
||||
def handle_args():
|
||||
parser = get_importers_parser(description="Importer for TrainingSpeech dataset.")
|
||||
parser.add_argument(dest="target_dir")
|
||||
parser.add_argument(
|
||||
"--english-compatible",
|
||||
action="store_true",
|
||||
dest="english_compatible",
|
||||
help="Remove diactrics and other non-ascii chars.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli_args = handle_args()
|
||||
validate_label = get_validate_label(cli_args)
|
||||
_download_and_preprocess_data(cli_args.target_dir, cli_args.english_compatible)
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Downloads and prepares (parts of) the "German Distant Speech" corpus (TUDA) for train.py
|
||||
Downloads and prepares (parts of) the "German Distant Speech" corpus (TUDA) for DeepSpeech.py
|
||||
Use "python3 import_tuda.py -h" for help
|
||||
"""
|
||||
import argparse
|
||||
@ -13,9 +13,10 @@ import xml.etree.ElementTree as ET
|
||||
from collections import Counter
|
||||
|
||||
import progressbar
|
||||
from coqui_stt_ctcdecoder import Alphabet
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import validate_label_eng as validate_label
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
||||
from ds_ctcdecoder import Alphabet
|
||||
|
||||
TUDA_VERSION = "v2"
|
||||
TUDA_PACKAGE = "german-speechdata-package-{}".format(TUDA_VERSION)
|
||||
@ -54,11 +55,7 @@ def check_and_prepare_sentence(sentence):
|
||||
chars = []
|
||||
for c in sentence:
|
||||
if CLI_ARGS.normalize and c not in "äöüß" and not in_alphabet(c):
|
||||
c = (
|
||||
unicodedata.normalize("NFKD", c)
|
||||
.encode("ascii", "ignore")
|
||||
.decode("ascii", "ignore")
|
||||
)
|
||||
c = unicodedata.normalize("NFKD", c).encode("ascii", "ignore").decode("ascii", "ignore")
|
||||
for sc in c:
|
||||
if not in_alphabet(c):
|
||||
return None
|
||||
@ -121,7 +118,7 @@ def write_csvs(extracted):
|
||||
sentence = list(meta.iter("cleaned_sentence"))[0].text
|
||||
sentence = check_and_prepare_sentence(sentence)
|
||||
if sentence is None:
|
||||
reasons["alphabet filter"] += 1
|
||||
reasons['alphabet filter'] += 1
|
||||
continue
|
||||
for wav_name in wav_names:
|
||||
sample_counter += 1
|
||||
|
@ -10,8 +10,9 @@ from zipfile import ZipFile
|
||||
|
||||
import librosa
|
||||
import progressbar
|
||||
from coqui_stt_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from coqui_stt_training.util.importers import (
|
||||
|
||||
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||
from deepspeech_training.util.importers import (
|
||||
get_counter,
|
||||
get_imported_samples,
|
||||
print_import_report,
|
||||
@ -34,7 +35,7 @@ def _download_and_preprocess_data(target_dir):
|
||||
archive_path = maybe_download(ARCHIVE_NAME, target_dir, ARCHIVE_URL)
|
||||
# Conditionally extract common voice data
|
||||
_maybe_extract(target_dir, ARCHIVE_DIR_NAME, archive_path)
|
||||
# Conditionally convert common voice CSV files and mp3 data to Coqui STT CSVs and wav
|
||||
# Conditionally convert common voice CSV files and mp3 data to DeepSpeech CSVs and wav
|
||||
_maybe_convert_sets(target_dir, ARCHIVE_DIR_NAME)
|
||||
|
||||
|
||||
|
@ -13,8 +13,8 @@ from os import makedirs, path
|
||||
|
||||
import pandas
|
||||
from bs4 import BeautifulSoup
|
||||
from coqui_stt_training.util.downloader import maybe_download
|
||||
from tensorflow.python.platform import gfile
|
||||
from deepspeech_training.util.downloader import maybe_download
|
||||
|
||||
"""The number of jobs to run in parallel"""
|
||||
NUM_PARALLEL = 8
|
||||
|
65
bin/play.py
65
bin/play.py
@ -1,34 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and 🐸STT CSV files
|
||||
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files
|
||||
Use "python3 play.py -h" for help
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import random
|
||||
import argparse
|
||||
|
||||
from coqui_stt_training.util.audio import (
|
||||
AUDIO_TYPE_PCM,
|
||||
AUDIO_TYPE_WAV,
|
||||
get_loadable_audio_type_from_extension,
|
||||
)
|
||||
from coqui_stt_training.util.augmentations import (
|
||||
SampleAugmentation,
|
||||
apply_sample_augmentations,
|
||||
parse_augmentations,
|
||||
)
|
||||
from coqui_stt_training.util.sample_collections import (
|
||||
LabeledSample,
|
||||
SampleList,
|
||||
samples_from_source,
|
||||
)
|
||||
from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV
|
||||
from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source
|
||||
from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation
|
||||
|
||||
|
||||
def get_samples_in_play_order():
|
||||
ext = os.path.splitext(CLI_ARGS.source)[1].lower()
|
||||
if get_loadable_audio_type_from_extension(ext):
|
||||
if ext in LOADABLE_AUDIO_EXTENSIONS:
|
||||
samples = SampleList([(CLI_ARGS.source, 0)], labeled=False)
|
||||
else:
|
||||
samples = samples_from_source(CLI_ARGS.source, buffering=0)
|
||||
@ -52,17 +40,14 @@ def get_samples_in_play_order():
|
||||
|
||||
def play_collection():
|
||||
augmentations = parse_augmentations(CLI_ARGS.augment)
|
||||
print(f"Parsed augmentations from flags: {augmentations}")
|
||||
if any(not isinstance(a, SampleAugmentation) for a in augmentations):
|
||||
print("Warning: Some of the augmentations cannot be simulated by this command.")
|
||||
samples = get_samples_in_play_order()
|
||||
samples = apply_sample_augmentations(
|
||||
samples,
|
||||
audio_type=AUDIO_TYPE_PCM,
|
||||
augmentations=augmentations,
|
||||
process_ahead=0,
|
||||
clock=CLI_ARGS.clock,
|
||||
)
|
||||
samples = apply_sample_augmentations(samples,
|
||||
audio_type=AUDIO_TYPE_PCM,
|
||||
augmentations=augmentations,
|
||||
process_ahead=0,
|
||||
clock=CLI_ARGS.clock)
|
||||
for sample in samples:
|
||||
if not CLI_ARGS.quiet:
|
||||
print('Sample "{}"'.format(sample.sample_id), file=sys.stderr)
|
||||
@ -72,12 +57,10 @@ def play_collection():
|
||||
sample.change_audio_type(AUDIO_TYPE_WAV)
|
||||
sys.stdout.buffer.write(sample.audio.getvalue())
|
||||
return
|
||||
wave_obj = simpleaudio.WaveObject(
|
||||
sample.audio,
|
||||
sample.audio_format.channels,
|
||||
sample.audio_format.width,
|
||||
sample.audio_format.rate,
|
||||
)
|
||||
wave_obj = simpleaudio.WaveObject(sample.audio,
|
||||
sample.audio_format.channels,
|
||||
sample.audio_format.width,
|
||||
sample.audio_format.rate)
|
||||
play_obj = wave_obj.play()
|
||||
play_obj.wait_done()
|
||||
|
||||
@ -85,11 +68,9 @@ def play_collection():
|
||||
def handle_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) "
|
||||
"and Coqui STT CSV files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"source", help="Sample DB, CSV or WAV file to play samples from"
|
||||
"and DeepSpeech CSV files"
|
||||
)
|
||||
parser.add_argument("source", help="Sample DB, CSV or WAV file to play samples from")
|
||||
parser.add_argument(
|
||||
"--start",
|
||||
type=int,
|
||||
@ -109,7 +90,7 @@ def handle_args():
|
||||
)
|
||||
parser.add_argument(
|
||||
"--augment",
|
||||
action="append",
|
||||
action='append',
|
||||
help="Add an augmentation operation",
|
||||
)
|
||||
parser.add_argument(
|
||||
@ -117,8 +98,8 @@ def handle_args():
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Simulates clock value used for augmentations during training."
|
||||
"Ranges from 0.0 (representing parameter start values) to"
|
||||
"1.0 (representing parameter end values)",
|
||||
"Ranges from 0.0 (representing parameter start values) to"
|
||||
"1.0 (representing parameter end values)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pipe",
|
||||
@ -139,9 +120,7 @@ if __name__ == "__main__":
|
||||
try:
|
||||
import simpleaudio
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
'Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples'
|
||||
)
|
||||
print('Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples')
|
||||
sys.exit(1)
|
||||
try:
|
||||
play_collection()
|
||||
|
@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
from import_ldc93s1 import _download_and_preprocess_data as download_ldc
|
||||
from coqui_stt_training.util.config import initialize_globals_from_args
|
||||
from coqui_stt_training.train import train
|
||||
from coqui_stt_training.evaluate import test
|
||||
|
||||
# only one GPU for only one training sample
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
|
||||
download_ldc("data/smoke_test")
|
||||
|
||||
initialize_globals_from_args(
|
||||
load_train="init",
|
||||
alphabet_config_path="data/alphabet.txt",
|
||||
train_files=["data/smoke_test/ldc93s1.csv"],
|
||||
dev_files=["data/smoke_test/ldc93s1.csv"],
|
||||
test_files=["data/smoke_test/ldc93s1.csv"],
|
||||
augment=["time_mask"],
|
||||
n_hidden=100,
|
||||
epochs=200,
|
||||
)
|
||||
|
||||
train()
|
||||
test()
|
@ -1,30 +1,28 @@
|
||||
#!/bin/sh
|
||||
set -xe
|
||||
if [ ! -f train.py ]; then
|
||||
echo "Please make sure you run this from STT's top level directory."
|
||||
if [ ! -f DeepSpeech.py ]; then
|
||||
echo "Please make sure you run this from DeepSpeech's top level directory."
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
if [ ! -f "data/smoke_test/ldc93s1.csv" ]; then
|
||||
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/smoke_test."
|
||||
python -u bin/import_ldc93s1.py ./data/smoke_test
|
||||
if [ ! -f "data/ldc93s1/ldc93s1.csv" ]; then
|
||||
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/ldc93s1."
|
||||
python -u bin/import_ldc93s1.py ./data/ldc93s1
|
||||
fi;
|
||||
|
||||
if [ -d "${COMPUTE_KEEP_DIR}" ]; then
|
||||
checkpoint_dir=$COMPUTE_KEEP_DIR
|
||||
else
|
||||
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("stt/ldc93s1"))')
|
||||
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("deepspeech/ldc93s1"))')
|
||||
fi
|
||||
|
||||
# Force only one visible device because we have a single-sample dataset
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
python -m coqui_stt_training.train \
|
||||
--alphabet_config_path "data/alphabet.txt" \
|
||||
--show_progressbar false \
|
||||
--train_files data/smoke_test/ldc93s1.csv \
|
||||
--test_files data/smoke_test/ldc93s1.csv \
|
||||
python -u DeepSpeech.py --noshow_progressbar \
|
||||
--train_files data/ldc93s1/ldc93s1.csv \
|
||||
--test_files data/ldc93s1/ldc93s1.csv \
|
||||
--train_batch_size 1 \
|
||||
--test_batch_size 1 \
|
||||
--n_hidden 100 \
|
||||
|
@ -14,17 +14,16 @@ fi;
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
--show_progressbar false --early_stop false \
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--train_files ${ldc93s1_csv} --train_batch_size 1 \
|
||||
--scorer "" \
|
||||
--augment dropout \
|
||||
pitch \
|
||||
tempo \
|
||||
warp \
|
||||
time_mask \
|
||||
frequency_mask \
|
||||
add \
|
||||
multiply \
|
||||
--augment pitch \
|
||||
--augment tempo \
|
||||
--augment warp \
|
||||
--augment time_mask \
|
||||
--augment frequency_mask \
|
||||
--augment add \
|
||||
--augment multiply \
|
||||
--n_hidden 100 \
|
||||
--epochs 1
|
@ -14,8 +14,7 @@ fi;
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
--show_progressbar false --early_stop false \
|
||||
python -u DeepSpeech.py --noshow_progressbar --noearly_stop \
|
||||
--train_files ${ldc93s1_csv} --train_batch_size 1 \
|
||||
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
|
||||
--test_files ${ldc93s1_csv} --test_batch_size 1 \
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user