Compare commits
204 Commits
v0.10.0-al
...
rei/fork
Author | SHA1 | Date | |
---|---|---|---|
003b399253 | |||
f008d10c49 | |||
0f698133aa | |||
8cea2cbfec | |||
|
dbd38c3a89 | ||
|
b43e710959 | ||
|
ff24a8b917 | ||
|
479d963155 | ||
|
d90bb60506 | ||
|
5cefd7069c | ||
|
154a67fb2c | ||
|
d6456ae4aa | ||
|
3020949075 | ||
|
efdaa61e2c | ||
|
419b15b72a | ||
|
6a9bd1e6b6 | ||
|
922d668155 | ||
|
8ed0a827de | ||
|
11c2edb068 | ||
|
e7c28ca3c9 | ||
|
2af6f8da89 | ||
|
a5c981bb48 | ||
|
23af8bd095 | ||
|
2b955fc70f | ||
|
90feb63894 | ||
|
91f1307de4 | ||
|
3d1e3ed3ba | ||
|
9a2c2028c7 | ||
|
6ef733be54 | ||
|
a61180aeae | ||
|
391036643c | ||
|
04f62ac9f7 | ||
|
755fb81a62 | ||
|
5f2ff85fe8 | ||
|
489e49f698 | ||
|
65e66117e2 | ||
|
a726351341 | ||
|
d753431d11 | ||
|
83b40b2532 | ||
|
1f7b43f94e | ||
|
5ff8d11393 | ||
|
157ce340b6 | ||
|
27584037f8 | ||
|
29e980473f | ||
|
0b36745338 | ||
|
c6a91dad2a | ||
|
1233fc7b71 | ||
|
bd45ecf56e | ||
|
a4faa4475a | ||
|
18812376dc | ||
|
62effd9acb | ||
|
8a64ed2a1e | ||
|
178cdacf5e | ||
|
0b60e4dbbb | ||
|
fab1bbad73 | ||
|
5691d4e053 | ||
|
c536d1bd01 | ||
|
d4091badf9 | ||
|
1d75af5ab4 | ||
|
8bd5dac837 | ||
|
46bae2f3fc | ||
|
df67678220 | ||
|
fd719ac013 | ||
|
4861557a03 | ||
|
835d657648 | ||
|
72599be9d4 | ||
|
7cbe879fc6 | ||
|
8cfc1163e2 | ||
|
c78f98a7bc | ||
|
90d4e43c58 | ||
|
be7500c8b7 | ||
|
1a55ce8078 | ||
|
242d2eff2c | ||
|
56d1282642 | ||
|
bd7809421d | ||
|
5e1e810102 | ||
|
6405bd1758 | ||
|
8a3cea8b6d | ||
|
f6a64e7dd8 | ||
|
cbd3db9d28 | ||
|
2729da33a8 | ||
|
feeb2a222d | ||
|
76267ebdff | ||
|
0e8920ed63 | ||
|
903c2b4aca | ||
|
5201c2a10c | ||
|
7085fd3ed3 | ||
|
ef8825f5f6 | ||
|
4744d0c9e4 | ||
|
93e743d171 | ||
|
e0e5b0391c | ||
|
7a20c9bd90 | ||
|
473d1a8e4f | ||
|
92aff6a8ef | ||
|
220cc8ab15 | ||
|
39e57b522a | ||
|
2e588bd0b8 | ||
|
abc0399fdb | ||
|
26b578c1c7 | ||
|
6f4a3c1200 | ||
|
e8a5e91151 | ||
|
810164d679 | ||
|
aed43cc988 | ||
|
d437ecc69f | ||
|
ba581501f4 | ||
|
638874e925 | ||
|
b5e8ebb943 | ||
|
822019bf05 | ||
|
01c992caef | ||
|
97a2cb21ee | ||
|
e6d5a0ca8d | ||
|
738874fb6f | ||
|
28f107fb96 | ||
|
0917206827 | ||
|
909b343ce0 | ||
|
a51cc78a3b | ||
|
083a9e1ecc | ||
|
6635668eb3 | ||
|
d85187aa44 | ||
|
186bb63b57 | ||
|
6214816e26 | ||
|
eb19d271fd | ||
|
f94d16bcc3 | ||
|
33c2190015 | ||
|
497c828dd7 | ||
|
412de47623 | ||
|
8458352255 | ||
|
b62fa678e6 | ||
|
07ed417627 | ||
|
66b8a56454 | ||
|
02adea2d50 | ||
|
2b5a844c05 | ||
|
87f0a371b1 | ||
|
5afe3c6e59 | ||
|
2fd98de56f | ||
|
71da178138 | ||
|
3dff38ab3d | ||
|
80a109b04e | ||
|
fb2691ad70 | ||
|
4c3537952a | ||
|
f9556d2236 | ||
|
f90408d3ab | ||
|
ad7335db0e | ||
|
392f4dbb25 | ||
|
3995ec62c5 | ||
|
2936c72c08 | ||
|
32b44c5447 | ||
|
4fc60bf5e9 | ||
|
f71e32735f | ||
|
3cff3dd0de | ||
|
285b524299 | ||
|
c3cc7aae2e | ||
|
b5db9b2f41 | ||
|
f3df9b16d5 | ||
|
d2bcbcc6b7 | ||
|
800ddae12f | ||
|
5a5db45c7e | ||
|
9d44e2f506 | ||
|
1a423a4c8d | ||
|
8f0b759103 | ||
|
90a067df49 | ||
|
b77d33a108 | ||
|
256af35a61 | ||
|
da23122cca | ||
|
fb2d99e9e0 | ||
|
df26eca4d2 | ||
|
1e79b8703d | ||
|
979dccdf58 | ||
|
aeeb2549b1 | ||
|
6c26f616ba | ||
|
4eb5d7814a | ||
|
bc1839baf4 | ||
|
4f14420a25 | ||
|
f4f0c1dba9 | ||
|
e95c8fe0b0 | ||
|
73f7b765ef | ||
|
8c5c35a0ad | ||
|
58a8e813e4 | ||
|
d119957586 | ||
|
c69735e3b6 | ||
|
42ebbf9120 | ||
|
2020f1b15a | ||
|
4cd1a1cec4 | ||
|
c9840e59b1 | ||
|
4b4f00da56 | ||
|
1c7539e9c9 | ||
|
4b2af9ce6b | ||
|
3a695f9c1c | ||
|
36923c1e93 | ||
|
8a40ff086d | ||
|
cbbdcbf246 | ||
|
7846f4602e | ||
|
b7428d114e | ||
|
579c36c98c | ||
|
3119911657 | ||
|
7d40d5d686 | ||
|
ea82ab4cb8 | ||
|
9f7fda14cb | ||
|
d1b0aadfbc | ||
|
59e32556a4 | ||
|
a37ca2ec27 | ||
|
d0f8eb96cd | ||
|
649bc53536 | ||
|
f062f75e17 |
5
.dockerignore
Normal file
5
.dockerignore
Normal file
@ -0,0 +1,5 @@
|
||||
.git/lfs
|
||||
native_client/ds-swig
|
||||
native_client/python/dist/*.whl
|
||||
native_client/ctcdecode/*.a
|
||||
native_client/javascript/build/
|
@ -14,6 +14,10 @@ const fs = __nccwpck_require__(5747);
|
||||
const { throttling } = __nccwpck_require__(9968);
|
||||
const { GitHub } = __nccwpck_require__(3030);
|
||||
const Download = __nccwpck_require__(7490);
|
||||
const Util = __nccwpck_require__(1669);
|
||||
const Stream = __nccwpck_require__(2413);
|
||||
|
||||
const Pipeline = Util.promisify(Stream.pipeline);
|
||||
|
||||
async function getGoodArtifacts(client, owner, repo, releaseId, name) {
|
||||
console.log(`==> GET /repos/${owner}/${repo}/releases/${releaseId}/assets`);
|
||||
@ -44,6 +48,7 @@ async function getGoodArtifacts(client, owner, repo, releaseId, name) {
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const token = core.getInput("github_token", { required: true });
|
||||
const [owner, repo] = core.getInput("repo", { required: true }).split("/");
|
||||
const path = core.getInput("path", { required: true });
|
||||
const name = core.getInput("name");
|
||||
@ -51,6 +56,7 @@ async function main() {
|
||||
const releaseTag = core.getInput("release-tag");
|
||||
const OctokitWithThrottling = GitHub.plugin(throttling);
|
||||
const client = new OctokitWithThrottling({
|
||||
auth: token,
|
||||
throttle: {
|
||||
onRateLimit: (retryAfter, options) => {
|
||||
console.log(
|
||||
@ -61,6 +67,9 @@ async function main() {
|
||||
if (options.request.retryCount <= 2) {
|
||||
console.log(`Retrying after ${retryAfter} seconds!`);
|
||||
return true;
|
||||
} else {
|
||||
console.log("Exhausted 2 retries");
|
||||
core.setFailed("Exhausted 2 retries");
|
||||
}
|
||||
},
|
||||
onAbuseLimit: (retryAfter, options) => {
|
||||
@ -68,6 +77,7 @@ async function main() {
|
||||
console.log(
|
||||
`Abuse detected for request ${options.method} ${options.url}`
|
||||
);
|
||||
core.setFailed(`GitHub REST API Abuse detected for request ${options.method} ${options.url}`)
|
||||
},
|
||||
},
|
||||
});
|
||||
@ -95,21 +105,24 @@ async function main() {
|
||||
console.log("==> # artifacts:", goodArtifacts.length);
|
||||
|
||||
const artifact = goodArtifacts[0];
|
||||
|
||||
console.log("==> Artifact:", artifact.id)
|
||||
|
||||
const size = filesize(artifact.size, { base: 10 })
|
||||
console.log(`==> Downloading: ${artifact.name} (${size}) to path: ${path}`)
|
||||
|
||||
console.log("==> Downloading:", artifact.name, `(${size})`)
|
||||
|
||||
const dir = name ? path : pathname.join(path, artifact.name)
|
||||
const dir = pathname.dirname(path)
|
||||
console.log(`==> Creating containing dir if needed: ${dir}`)
|
||||
fs.mkdirSync(dir, { recursive: true })
|
||||
|
||||
await Download(artifact.url, dir, {
|
||||
headers: {
|
||||
"Accept": "application/octet-stream",
|
||||
},
|
||||
});
|
||||
await Pipeline(
|
||||
Download(artifact.url, {
|
||||
headers: {
|
||||
"Accept": "application/octet-stream",
|
||||
"Authorization": `token ${token}`,
|
||||
},
|
||||
}),
|
||||
fs.createWriteStream(path)
|
||||
)
|
||||
}
|
||||
|
||||
if (artifactStatus === "missing" && download == "true") {
|
||||
@ -30660,7 +30673,7 @@ module.exports = eval("require")("original-fs");
|
||||
/***/ ((module) => {
|
||||
|
||||
"use strict";
|
||||
module.exports = JSON.parse("{\"_from\":\"got@^8.3.1\",\"_id\":\"got@8.3.2\",\"_inBundle\":false,\"_integrity\":\"sha512-qjUJ5U/hawxosMryILofZCkm3C84PLJS/0grRIpjAwu+Lkxxj5cxeCU25BG0/3mDSpXKTyZr8oh8wIgLaH0QCw==\",\"_location\":\"/got\",\"_phantomChildren\":{},\"_requested\":{\"type\":\"range\",\"registry\":true,\"raw\":\"got@^8.3.1\",\"name\":\"got\",\"escapedName\":\"got\",\"rawSpec\":\"^8.3.1\",\"saveSpec\":null,\"fetchSpec\":\"^8.3.1\"},\"_requiredBy\":[\"/download\"],\"_resolved\":\"https://registry.npmjs.org/got/-/got-8.3.2.tgz\",\"_shasum\":\"1d23f64390e97f776cac52e5b936e5f514d2e937\",\"_spec\":\"got@^8.3.1\",\"_where\":\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists/node_modules/download\",\"ava\":{\"concurrency\":4},\"browser\":{\"decompress-response\":false,\"electron\":false},\"bugs\":{\"url\":\"https://github.com/sindresorhus/got/issues\"},\"bundleDependencies\":false,\"dependencies\":{\"@sindresorhus/is\":\"^0.7.0\",\"cacheable-request\":\"^2.1.1\",\"decompress-response\":\"^3.3.0\",\"duplexer3\":\"^0.1.4\",\"get-stream\":\"^3.0.0\",\"into-stream\":\"^3.1.0\",\"is-retry-allowed\":\"^1.1.0\",\"isurl\":\"^1.0.0-alpha5\",\"lowercase-keys\":\"^1.0.0\",\"mimic-response\":\"^1.0.0\",\"p-cancelable\":\"^0.4.0\",\"p-timeout\":\"^2.0.1\",\"pify\":\"^3.0.0\",\"safe-buffer\":\"^5.1.1\",\"timed-out\":\"^4.0.1\",\"url-parse-lax\":\"^3.0.0\",\"url-to-options\":\"^1.0.1\"},\"deprecated\":false,\"description\":\"Simplified HTTP requests\",\"devDependencies\":{\"ava\":\"^0.25.0\",\"coveralls\":\"^3.0.0\",\"form-data\":\"^2.1.1\",\"get-port\":\"^3.0.0\",\"nyc\":\"^11.0.2\",\"p-event\":\"^1.3.0\",\"pem\":\"^1.4.4\",\"proxyquire\":\"^1.8.0\",\"sinon\":\"^4.0.0\",\"slow-stream\":\"0.0.4\",\"tempfile\":\"^2.0.0\",\"tempy\":\"^0.2.1\",\"universal-url\":\"1.0.0-alpha\",\"xo\":\"^0.20.0\"},\"engines\":{\"node\":\">=4\"},\"files\":[\"index.js\",\"errors.js\"],\"homepage\":\"https://github.com/sindresorhus/got#readme\",\"keywords\":[\"http\",\"https\",\"get\",\"got\",\"url\",\"uri\",\"request\",\"util\",\"utility\",\"simple\",\"curl\",\"wget\",\"fetch\",\"net\",\"network\",\"electron\"],\"license\":\"MIT\",\"maintainers\":[{\"name\":\"Sindre Sorhus\",\"email\":\"sindresorhus@gmail.com\",\"url\":\"sindresorhus.com\"},{\"name\":\"Vsevolod Strukchinsky\",\"email\":\"floatdrop@gmail.com\",\"url\":\"github.com/floatdrop\"},{\"name\":\"Alexander Tesfamichael\",\"email\":\"alex.tesfamichael@gmail.com\",\"url\":\"alextes.me\"}],\"name\":\"got\",\"repository\":{\"type\":\"git\",\"url\":\"git+https://github.com/sindresorhus/got.git\"},\"scripts\":{\"coveralls\":\"nyc report --reporter=text-lcov | coveralls\",\"test\":\"xo && nyc ava\"},\"version\":\"8.3.2\"}");
|
||||
module.exports = JSON.parse("{\"_args\":[[\"got@8.3.2\",\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists\"]],\"_development\":true,\"_from\":\"got@8.3.2\",\"_id\":\"got@8.3.2\",\"_inBundle\":false,\"_integrity\":\"sha512-qjUJ5U/hawxosMryILofZCkm3C84PLJS/0grRIpjAwu+Lkxxj5cxeCU25BG0/3mDSpXKTyZr8oh8wIgLaH0QCw==\",\"_location\":\"/got\",\"_phantomChildren\":{},\"_requested\":{\"type\":\"version\",\"registry\":true,\"raw\":\"got@8.3.2\",\"name\":\"got\",\"escapedName\":\"got\",\"rawSpec\":\"8.3.2\",\"saveSpec\":null,\"fetchSpec\":\"8.3.2\"},\"_requiredBy\":[\"/download\"],\"_resolved\":\"https://registry.npmjs.org/got/-/got-8.3.2.tgz\",\"_spec\":\"8.3.2\",\"_where\":\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists\",\"ava\":{\"concurrency\":4},\"browser\":{\"decompress-response\":false,\"electron\":false},\"bugs\":{\"url\":\"https://github.com/sindresorhus/got/issues\"},\"dependencies\":{\"@sindresorhus/is\":\"^0.7.0\",\"cacheable-request\":\"^2.1.1\",\"decompress-response\":\"^3.3.0\",\"duplexer3\":\"^0.1.4\",\"get-stream\":\"^3.0.0\",\"into-stream\":\"^3.1.0\",\"is-retry-allowed\":\"^1.1.0\",\"isurl\":\"^1.0.0-alpha5\",\"lowercase-keys\":\"^1.0.0\",\"mimic-response\":\"^1.0.0\",\"p-cancelable\":\"^0.4.0\",\"p-timeout\":\"^2.0.1\",\"pify\":\"^3.0.0\",\"safe-buffer\":\"^5.1.1\",\"timed-out\":\"^4.0.1\",\"url-parse-lax\":\"^3.0.0\",\"url-to-options\":\"^1.0.1\"},\"description\":\"Simplified HTTP requests\",\"devDependencies\":{\"ava\":\"^0.25.0\",\"coveralls\":\"^3.0.0\",\"form-data\":\"^2.1.1\",\"get-port\":\"^3.0.0\",\"nyc\":\"^11.0.2\",\"p-event\":\"^1.3.0\",\"pem\":\"^1.4.4\",\"proxyquire\":\"^1.8.0\",\"sinon\":\"^4.0.0\",\"slow-stream\":\"0.0.4\",\"tempfile\":\"^2.0.0\",\"tempy\":\"^0.2.1\",\"universal-url\":\"1.0.0-alpha\",\"xo\":\"^0.20.0\"},\"engines\":{\"node\":\">=4\"},\"files\":[\"index.js\",\"errors.js\"],\"homepage\":\"https://github.com/sindresorhus/got#readme\",\"keywords\":[\"http\",\"https\",\"get\",\"got\",\"url\",\"uri\",\"request\",\"util\",\"utility\",\"simple\",\"curl\",\"wget\",\"fetch\",\"net\",\"network\",\"electron\"],\"license\":\"MIT\",\"maintainers\":[{\"name\":\"Sindre Sorhus\",\"email\":\"sindresorhus@gmail.com\",\"url\":\"sindresorhus.com\"},{\"name\":\"Vsevolod Strukchinsky\",\"email\":\"floatdrop@gmail.com\",\"url\":\"github.com/floatdrop\"},{\"name\":\"Alexander Tesfamichael\",\"email\":\"alex.tesfamichael@gmail.com\",\"url\":\"alextes.me\"}],\"name\":\"got\",\"repository\":{\"type\":\"git\",\"url\":\"git+https://github.com/sindresorhus/got.git\"},\"scripts\":{\"coveralls\":\"nyc report --reporter=text-lcov | coveralls\",\"test\":\"xo && nyc ava\"},\"version\":\"8.3.2\"}");
|
||||
|
||||
/***/ }),
|
||||
|
||||
@ -30676,7 +30689,7 @@ module.exports = JSON.parse("{\"application/1d-interleaved-parityfec\":{\"source
|
||||
/***/ ((module) => {
|
||||
|
||||
"use strict";
|
||||
module.exports = JSON.parse("{\"_from\":\"seek-bzip@^1.0.5\",\"_id\":\"seek-bzip@1.0.6\",\"_inBundle\":false,\"_integrity\":\"sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ==\",\"_location\":\"/seek-bzip\",\"_phantomChildren\":{},\"_requested\":{\"type\":\"range\",\"registry\":true,\"raw\":\"seek-bzip@^1.0.5\",\"name\":\"seek-bzip\",\"escapedName\":\"seek-bzip\",\"rawSpec\":\"^1.0.5\",\"saveSpec\":null,\"fetchSpec\":\"^1.0.5\"},\"_requiredBy\":[\"/decompress-tarbz2\"],\"_resolved\":\"https://registry.npmjs.org/seek-bzip/-/seek-bzip-1.0.6.tgz\",\"_shasum\":\"35c4171f55a680916b52a07859ecf3b5857f21c4\",\"_spec\":\"seek-bzip@^1.0.5\",\"_where\":\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists/node_modules/decompress-tarbz2\",\"bin\":{\"seek-bunzip\":\"bin/seek-bunzip\",\"seek-table\":\"bin/seek-bzip-table\"},\"bugs\":{\"url\":\"https://github.com/cscott/seek-bzip/issues\"},\"bundleDependencies\":false,\"contributors\":[{\"name\":\"C. Scott Ananian\",\"url\":\"http://cscott.net\"},{\"name\":\"Eli Skeggs\"},{\"name\":\"Kevin Kwok\"},{\"name\":\"Rob Landley\",\"url\":\"http://landley.net\"}],\"dependencies\":{\"commander\":\"^2.8.1\"},\"deprecated\":false,\"description\":\"a pure-JavaScript Node.JS module for random-access decoding bzip2 data\",\"devDependencies\":{\"fibers\":\"~1.0.6\",\"mocha\":\"~2.2.5\"},\"directories\":{\"test\":\"test\"},\"homepage\":\"https://github.com/cscott/seek-bzip#readme\",\"license\":\"MIT\",\"main\":\"./lib/index.js\",\"name\":\"seek-bzip\",\"repository\":{\"type\":\"git\",\"url\":\"git+https://github.com/cscott/seek-bzip.git\"},\"scripts\":{\"test\":\"mocha\"},\"version\":\"1.0.6\"}");
|
||||
module.exports = JSON.parse("{\"_args\":[[\"seek-bzip@1.0.6\",\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists\"]],\"_development\":true,\"_from\":\"seek-bzip@1.0.6\",\"_id\":\"seek-bzip@1.0.6\",\"_inBundle\":false,\"_integrity\":\"sha512-e1QtP3YL5tWww8uKaOCQ18UxIT2laNBXHjV/S2WYCiK4udiv8lkG89KRIoCjUagnAmCBurjF4zEVX2ByBbnCjQ==\",\"_location\":\"/seek-bzip\",\"_phantomChildren\":{},\"_requested\":{\"type\":\"version\",\"registry\":true,\"raw\":\"seek-bzip@1.0.6\",\"name\":\"seek-bzip\",\"escapedName\":\"seek-bzip\",\"rawSpec\":\"1.0.6\",\"saveSpec\":null,\"fetchSpec\":\"1.0.6\"},\"_requiredBy\":[\"/decompress-tarbz2\"],\"_resolved\":\"https://registry.npmjs.org/seek-bzip/-/seek-bzip-1.0.6.tgz\",\"_spec\":\"1.0.6\",\"_where\":\"/Users/reubenmorais/Development/STT/.github/actions/check_artifact_exists\",\"bin\":{\"seek-bunzip\":\"bin/seek-bunzip\",\"seek-table\":\"bin/seek-bzip-table\"},\"bugs\":{\"url\":\"https://github.com/cscott/seek-bzip/issues\"},\"contributors\":[{\"name\":\"C. Scott Ananian\",\"url\":\"http://cscott.net\"},{\"name\":\"Eli Skeggs\"},{\"name\":\"Kevin Kwok\"},{\"name\":\"Rob Landley\",\"url\":\"http://landley.net\"}],\"dependencies\":{\"commander\":\"^2.8.1\"},\"description\":\"a pure-JavaScript Node.JS module for random-access decoding bzip2 data\",\"devDependencies\":{\"fibers\":\"~1.0.6\",\"mocha\":\"~2.2.5\"},\"directories\":{\"test\":\"test\"},\"homepage\":\"https://github.com/cscott/seek-bzip#readme\",\"license\":\"MIT\",\"main\":\"./lib/index.js\",\"name\":\"seek-bzip\",\"repository\":{\"type\":\"git\",\"url\":\"git+https://github.com/cscott/seek-bzip.git\"},\"scripts\":{\"test\":\"mocha\"},\"version\":\"1.0.6\"}");
|
||||
|
||||
/***/ }),
|
||||
|
||||
|
31
.github/actions/check_artifact_exists/main.js
vendored
31
.github/actions/check_artifact_exists/main.js
vendored
@ -7,6 +7,10 @@ const fs = require('fs');
|
||||
const { throttling } = require('@octokit/plugin-throttling');
|
||||
const { GitHub } = require('@actions/github/lib/utils');
|
||||
const Download = require('download');
|
||||
const Util = require('util');
|
||||
const Stream = require('stream');
|
||||
|
||||
const Pipeline = Util.promisify(Stream.pipeline);
|
||||
|
||||
async function getGoodArtifacts(client, owner, repo, releaseId, name) {
|
||||
console.log(`==> GET /repos/${owner}/${repo}/releases/${releaseId}/assets`);
|
||||
@ -37,6 +41,7 @@ async function getGoodArtifacts(client, owner, repo, releaseId, name) {
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const token = core.getInput("github_token", { required: true });
|
||||
const [owner, repo] = core.getInput("repo", { required: true }).split("/");
|
||||
const path = core.getInput("path", { required: true });
|
||||
const name = core.getInput("name");
|
||||
@ -44,6 +49,7 @@ async function main() {
|
||||
const releaseTag = core.getInput("release-tag");
|
||||
const OctokitWithThrottling = GitHub.plugin(throttling);
|
||||
const client = new OctokitWithThrottling({
|
||||
auth: token,
|
||||
throttle: {
|
||||
onRateLimit: (retryAfter, options) => {
|
||||
console.log(
|
||||
@ -54,6 +60,9 @@ async function main() {
|
||||
if (options.request.retryCount <= 2) {
|
||||
console.log(`Retrying after ${retryAfter} seconds!`);
|
||||
return true;
|
||||
} else {
|
||||
console.log("Exhausted 2 retries");
|
||||
core.setFailed("Exhausted 2 retries");
|
||||
}
|
||||
},
|
||||
onAbuseLimit: (retryAfter, options) => {
|
||||
@ -61,6 +70,7 @@ async function main() {
|
||||
console.log(
|
||||
`Abuse detected for request ${options.method} ${options.url}`
|
||||
);
|
||||
core.setFailed(`GitHub REST API Abuse detected for request ${options.method} ${options.url}`)
|
||||
},
|
||||
},
|
||||
});
|
||||
@ -88,21 +98,24 @@ async function main() {
|
||||
console.log("==> # artifacts:", goodArtifacts.length);
|
||||
|
||||
const artifact = goodArtifacts[0];
|
||||
|
||||
console.log("==> Artifact:", artifact.id)
|
||||
|
||||
const size = filesize(artifact.size, { base: 10 })
|
||||
console.log(`==> Downloading: ${artifact.name} (${size}) to path: ${path}`)
|
||||
|
||||
console.log("==> Downloading:", artifact.name, `(${size})`)
|
||||
|
||||
const dir = name ? path : pathname.join(path, artifact.name)
|
||||
const dir = pathname.dirname(path)
|
||||
console.log(`==> Creating containing dir if needed: ${dir}`)
|
||||
fs.mkdirSync(dir, { recursive: true })
|
||||
|
||||
await Download(artifact.url, dir, {
|
||||
headers: {
|
||||
"Accept": "application/octet-stream",
|
||||
},
|
||||
});
|
||||
await Pipeline(
|
||||
Download(artifact.url, {
|
||||
headers: {
|
||||
"Accept": "application/octet-stream",
|
||||
"Authorization": `token ${token}`,
|
||||
},
|
||||
}),
|
||||
fs.createWriteStream(path)
|
||||
)
|
||||
}
|
||||
|
||||
if (artifactStatus === "missing" && download == "true") {
|
||||
|
1
.github/actions/get_cache_key/action.yml
vendored
1
.github/actions/get_cache_key/action.yml
vendored
@ -16,6 +16,7 @@ runs:
|
||||
steps:
|
||||
- id: compute_cache_key
|
||||
run: |
|
||||
set -xe
|
||||
JOB=${{ github.job }}
|
||||
SUBMODULE=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f1)
|
||||
FLAVOR=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f2)
|
||||
|
15
.github/actions/host-build/action.yml
vendored
15
.github/actions/host-build/action.yml
vendored
@ -1,15 +0,0 @@
|
||||
name: "Run build lib"
|
||||
description: "Run build of lib"
|
||||
inputs:
|
||||
arch:
|
||||
description: "Target arch for loading script (host/armv7/aarch64)"
|
||||
required: false
|
||||
default: "host"
|
||||
flavor:
|
||||
description: "Build flavor"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/${{ inputs.arch }}-build.sh ${{ inputs.flavor }}
|
||||
shell: bash
|
12
.github/actions/libstt-build/action.yml
vendored
Normal file
12
.github/actions/libstt-build/action.yml
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
name: "Build libstt.so"
|
||||
description: "Build libstt.so"
|
||||
inputs:
|
||||
arch:
|
||||
description: "Target arch for loading script (host/armv7/aarch64)"
|
||||
required: false
|
||||
default: "host"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/${{ inputs.arch }}-build.sh
|
||||
shell: bash
|
10
.github/actions/numpy_vers/action.yml
vendored
10
.github/actions/numpy_vers/action.yml
vendored
@ -28,15 +28,15 @@ runs:
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
@ -57,7 +57,7 @@ runs:
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
@ -82,7 +82,7 @@ runs:
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
24
.github/actions/python-build/action.yml
vendored
24
.github/actions/python-build/action.yml
vendored
@ -1,9 +1,6 @@
|
||||
name: "Python binding"
|
||||
description: "Binding a python binding"
|
||||
inputs:
|
||||
build_flavor:
|
||||
description: "Python package name"
|
||||
required: true
|
||||
numpy_build:
|
||||
description: "NumPy build dependecy"
|
||||
required: true
|
||||
@ -36,22 +33,15 @@ runs:
|
||||
- run: |
|
||||
python3 --version
|
||||
pip3 --version
|
||||
python3 -m pip install virtualenv
|
||||
python3 -m virtualenv stt-build
|
||||
shell: bash
|
||||
- run: |
|
||||
mkdir -p wheels
|
||||
shell: bash
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
PROJECT_NAME="stt"
|
||||
if [ "${{ inputs.build_flavor }}" = "tflite" ]; then
|
||||
PROJECT_NAME="stt-tflite"
|
||||
fi
|
||||
|
||||
OS=$(uname)
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
if [ "${OS}" = "Linux" -a "${{ inputs.target }}" != "host" ]; then
|
||||
python3 -m venv stt-build
|
||||
source stt-build/bin/activate
|
||||
fi
|
||||
|
||||
@ -65,14 +55,4 @@ runs:
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
|
||||
bindings-clean bindings
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
deactivate
|
||||
fi
|
||||
shell: bash
|
||||
- run: |
|
||||
cp native_client/python/dist/*.whl wheels
|
||||
shell: bash
|
||||
- run: |
|
||||
make -C native_client/python/ bindings-clean
|
||||
shell: bash
|
||||
|
8
.github/actions/run-tests/action.yml
vendored
8
.github/actions/run-tests/action.yml
vendored
@ -4,9 +4,6 @@ inputs:
|
||||
runtime:
|
||||
description: "Runtime to use for running test"
|
||||
required: true
|
||||
build-flavor:
|
||||
description: "Running against TF or TFLite"
|
||||
required: true
|
||||
model-kind:
|
||||
description: "Running against CI baked or production model"
|
||||
required: true
|
||||
@ -22,10 +19,7 @@ runs:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
build=""
|
||||
if [ "${{ inputs.build-flavor }}" = "tflite" ]; then
|
||||
build="_tflite"
|
||||
fi
|
||||
build="_tflite"
|
||||
|
||||
model_kind=""
|
||||
if [ "${{ inputs.model-kind }}" = "prod" ]; then
|
||||
|
7
.github/actions/setup-tensorflow/action.yml
vendored
7
.github/actions/setup-tensorflow/action.yml
vendored
@ -1,7 +1,12 @@
|
||||
name: "Setup TensorFlow"
|
||||
description: "Setup TensorFlow Build"
|
||||
inputs:
|
||||
flavor:
|
||||
description: "Target flavor for setup script (empty/android-armv7/android-arm64)"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: ./ci_scripts/tf-setup.sh
|
||||
- run: ./ci_scripts/tf-setup.sh ${{ inputs.flavor }}
|
||||
shell: bash
|
||||
|
58
.github/actions/upload-cache-asset/action.yml
vendored
58
.github/actions/upload-cache-asset/action.yml
vendored
@ -1,58 +0,0 @@
|
||||
name: "Upload cache asset to release"
|
||||
description: "Upload a build cache asset to a release"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
path:
|
||||
description: "Path of file to upload"
|
||||
required: true
|
||||
token:
|
||||
description: "GitHub token"
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
repo:
|
||||
description: "Repository name with owner (like actions/checkout)"
|
||||
required: false
|
||||
default: ${{ github.repository }}
|
||||
release-tag:
|
||||
description: "Tag of release to check artifacts under"
|
||||
required: false
|
||||
default: "v0.10.0-alpha.7"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
asset_name="${{ inputs.name }}"
|
||||
filename="${{ inputs.path }}"
|
||||
|
||||
# Check input
|
||||
if [[ ! -f "${filename}" ]]; then
|
||||
echo "Error: Input file (${filename}) missing"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
AUTH="Authorization: token ${{ inputs.token }}"
|
||||
|
||||
owner=$(echo "${{inputs.repo}}" | cut -f1 -d/)
|
||||
repo=$(echo "${{inputs.repo}}" | cut -f2 -d/)
|
||||
tag="${{ inputs.release-tag }}"
|
||||
|
||||
GH_REPO="https://api.github.com/repos/${owner}/${repo}"
|
||||
|
||||
# Check token
|
||||
curl -o /dev/null -sH "$AUTH" $GH_REPO || { echo "Error: Invalid repo, token or network issue!"; exit 1; }
|
||||
|
||||
# Get ID of the release based on given tag name
|
||||
GH_TAGS="${GH_REPO}/releases/tags/${tag}"
|
||||
response=$(curl -sH "$AUTH" $GH_TAGS)
|
||||
eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=')
|
||||
[ "$id" ] || { echo "Error: Failed to get release id for tag: $tag"; echo "$response" | awk 'length($0)<100' >&2; exit 1; }
|
||||
|
||||
# Upload asset
|
||||
echo "Uploading asset..."
|
||||
GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset_name}"
|
||||
curl -T "${filename}" -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET
|
||||
shell: bash
|
89
.github/actions/upload-release-asset/action.yml
vendored
Normal file
89
.github/actions/upload-release-asset/action.yml
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
name: "Upload cache asset to release"
|
||||
description: "Upload a build cache asset to a release"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
path:
|
||||
description: "Path of file to upload"
|
||||
required: true
|
||||
token:
|
||||
description: "GitHub token"
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
repo:
|
||||
description: "Repository name with owner (like actions/checkout)"
|
||||
required: false
|
||||
default: ${{ github.repository }}
|
||||
release-tag:
|
||||
description: "Tag of release to check artifacts under"
|
||||
required: false
|
||||
default: "v0.10.0-alpha.7"
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
asset_name="${{ inputs.name }}"
|
||||
filenames="${{ inputs.path }}"
|
||||
|
||||
if [ $(compgen -G "$filenames" | wc -l) -gt 1 -a -n "$asset_name" ]; then
|
||||
echo "Error: multiple input files specified, but also specified an asset_name."
|
||||
echo "When uploading multiple files leave asset_name empty to use the file names as asset names."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check input
|
||||
for file in $filenames; do
|
||||
if [[ ! -f $file ]]; then
|
||||
echo "Error: Input file (${filename}) missing"
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
AUTH="Authorization: token ${{ inputs.token }}"
|
||||
|
||||
owner=$(echo "${{inputs.repo}}" | cut -f1 -d/)
|
||||
repo=$(echo "${{inputs.repo}}" | cut -f2 -d/)
|
||||
tag="${{ inputs.release-tag }}"
|
||||
|
||||
GH_REPO="https://api.github.com/repos/${owner}/${repo}"
|
||||
|
||||
# Check token
|
||||
curl -o /dev/null -sH "$AUTH" $GH_REPO || {
|
||||
echo "Error: Invalid repo, token or network issue!"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Check if tag exists
|
||||
response=$(curl -sH "$AUTH" "${GH_REPO}/git/refs/tags/${tag}")
|
||||
eval $(echo "$response" | grep -m 1 "sha.:" | grep -w sha | tr : = | tr -cd '[[:alnum:]]=')
|
||||
[ "$sha" ] || {
|
||||
echo "Error: Tag does not exist: $tag"
|
||||
echo "$response" | awk 'length($0)<100' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Get ID of the release based on given tag name
|
||||
GH_TAGS="${GH_REPO}/releases/tags/${tag}"
|
||||
response=$(curl -sH "$AUTH" $GH_TAGS)
|
||||
eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=')
|
||||
[ "$id" ] || {
|
||||
echo "Error: Could not find release for tag: $tag"
|
||||
echo "$response" | awk 'length($0)<100' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Upload assets
|
||||
for file in $filenames; do
|
||||
if [ -z $asset_name ]; then
|
||||
asset=$(basename $file)
|
||||
else
|
||||
asset=$asset_name
|
||||
fi
|
||||
echo "Uploading asset with name: $asset from file: $file"
|
||||
GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset}"
|
||||
curl -T $file -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET
|
||||
done
|
||||
shell: bash
|
2
.github/actions/win-install-sox/action.yml
vendored
2
.github/actions/win-install-sox/action.yml
vendored
@ -5,7 +5,7 @@ runs:
|
||||
steps:
|
||||
- run: |
|
||||
set -ex
|
||||
wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip
|
||||
curl -sSLO https://github.com/coqui-ai/STT/releases/download/v0.10.0-alpha.7/sox-14.4.2-win32.zip
|
||||
"C:/Program Files/7-Zip/7z.exe" x -o`pwd`/bin/ -tzip -aoa sox-14.4.2-win32.zip
|
||||
rm sox-*zip
|
||||
echo "`pwd`/bin/sox-14.4.2/" >> $GITHUB_PATH
|
||||
|
77
.github/actions/win-node-build/action.yml
vendored
Normal file
77
.github/actions/win-node-build/action.yml
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
name: "NodeJS binding"
|
||||
description: "Binding a nodejs binding"
|
||||
inputs:
|
||||
nodejs_versions:
|
||||
description: "NodeJS versions supported"
|
||||
required: true
|
||||
electronjs_versions:
|
||||
description: "ElectronJS versions supported"
|
||||
required: true
|
||||
local_cflags:
|
||||
description: "CFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_ldflags:
|
||||
description: "LDFLAGS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
local_libs:
|
||||
description: "LIBS for NodeJS package"
|
||||
required: false
|
||||
default: ""
|
||||
target:
|
||||
description: "TARGET value"
|
||||
required: false
|
||||
default: "host"
|
||||
chroot:
|
||||
description: "RASPBIAN value"
|
||||
required: false
|
||||
default: ""
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
node --version
|
||||
npm --version
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
npm update
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
mkdir -p tmp/headers/nodejs tmp/headers/electronjs
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
for node in ${{ inputs.nodejs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${node} \
|
||||
NODE_DEVDIR=--devdir=headers/nodejs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
for electron in ${{ inputs.electronjs_versions }}; do
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/javascript \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
NODE_ABI_TARGET=--target=${electron} \
|
||||
NODE_DIST_URL=--disturl=https://electronjs.org/headers \
|
||||
NODE_RUNTIME=--runtime=electron \
|
||||
NODE_DEVDIR=--devdir=headers/electronjs \
|
||||
clean node-wrapper
|
||||
done;
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
make -C native_client/javascript clean npm-pack
|
||||
shell: msys2 {0}
|
||||
- run: |
|
||||
tar -czf native_client/javascript/wrapper.tar.gz \
|
||||
-C native_client/javascript/ lib/
|
||||
shell: msys2 {0}
|
14
.github/actions/win-numpy-vers/README.md
vendored
Normal file
14
.github/actions/win-numpy-vers/README.md
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
GitHub Action to set NumPy versions
|
||||
===================================
|
||||
|
||||
This actions aims at computing correct values for NumPy dependencies:
|
||||
- `NUMPY_BUILD_VERSION`: range of accepted versions at Python binding build time
|
||||
- `NUMPY_DEP_VERSION`: range of accepted versions for execution time
|
||||
|
||||
Versions are set considering several factors:
|
||||
- API and ABI compatibility ; otherwise we can have the binding wrapper
|
||||
throwing errors like "Illegal instruction", or computing wrong values
|
||||
because of changed memory layout
|
||||
- Wheels availability: for CI and end users, we want to avoid having to
|
||||
rebuild numpy so we stick to versions where there is an existing upstream
|
||||
`wheel` file
|
93
.github/actions/win-numpy-vers/action.yml
vendored
Normal file
93
.github/actions/win-numpy-vers/action.yml
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
name: "get numpy versions"
|
||||
description: "Get proper NumPy build and runtime versions dependencies range"
|
||||
inputs:
|
||||
pyver:
|
||||
description: "Python version"
|
||||
required: true
|
||||
outputs:
|
||||
build_version:
|
||||
description: "NumPy build dependency"
|
||||
value: ${{ steps.numpy.outputs.build }}
|
||||
dep_version:
|
||||
description: "NumPy runtime dependency"
|
||||
value: ${{ steps.numpy.outputs.dep }}
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- id: numpy
|
||||
run: |
|
||||
set -ex
|
||||
NUMPY_BUILD_VERSION="==1.7.0"
|
||||
NUMPY_DEP_VERSION=">=1.7.0"
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
case "${OS}:${ARCH}" in
|
||||
Linux:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
Darwin:*)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.9.0"
|
||||
NUMPY_DEP_VERSION=">=1.9.0"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
${CI_MSYS_VERSION}:x86_64)
|
||||
case "${{ inputs.pyver }}" in
|
||||
3.5*)
|
||||
NUMPY_BUILD_VERSION="==1.11.0"
|
||||
NUMPY_DEP_VERSION=">=1.11.0,<1.12.0"
|
||||
;;
|
||||
3.6*)
|
||||
NUMPY_BUILD_VERSION="==1.12.0"
|
||||
NUMPY_DEP_VERSION=">=1.12.0,<1.14.5"
|
||||
;;
|
||||
3.7*)
|
||||
NUMPY_BUILD_VERSION="==1.14.5"
|
||||
NUMPY_DEP_VERSION=">=1.14.5,<=1.17.0"
|
||||
;;
|
||||
3.8*)
|
||||
NUMPY_BUILD_VERSION="==1.17.3"
|
||||
NUMPY_DEP_VERSION=">=1.17.3,<=1.17.3"
|
||||
;;
|
||||
3.9*)
|
||||
NUMPY_BUILD_VERSION="==1.19.4"
|
||||
NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "::set-output name=build::${NUMPY_BUILD_VERSION}"
|
||||
echo "::set-output name=dep::${NUMPY_DEP_VERSION}"
|
||||
shell: msys2 {0}
|
31
.github/actions/win-python-build/action.yml
vendored
Normal file
31
.github/actions/win-python-build/action.yml
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
name: "Python binding"
|
||||
description: "Binding a python binding"
|
||||
inputs:
|
||||
numpy_build:
|
||||
description: "NumPy build dependecy"
|
||||
required: true
|
||||
numpy_dep:
|
||||
description: "NumPy runtime dependecy"
|
||||
required: true
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
python3 --version
|
||||
pip3 --version
|
||||
|
||||
PROJECT_NAME="stt"
|
||||
|
||||
NUMPY_BUILD_VERSION="${{ inputs.numpy_build }}" \
|
||||
NUMPY_DEP_VERSION="${{ inputs.numpy_dep }}" \
|
||||
EXTRA_CFLAGS=${{ inputs.local_cflags }} \
|
||||
EXTRA_LDFLAGS=${{ inputs.local_ldflags }} \
|
||||
EXTRA_LIBS=${{ inputs.local_libs }} \
|
||||
make -C native_client/python/ \
|
||||
TARGET=${{ inputs.target }} \
|
||||
RASPBIAN=${{ inputs.chroot }} \
|
||||
SETUP_FLAGS="--project_name ${PROJECT_NAME}" \
|
||||
bindings-clean bindings
|
||||
shell: msys2 {0}
|
35
.github/actions/win-run-tests/action.yml
vendored
Normal file
35
.github/actions/win-run-tests/action.yml
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
name: "Tests execution"
|
||||
description: "Running tests"
|
||||
inputs:
|
||||
runtime:
|
||||
description: "Runtime to use for running test"
|
||||
required: true
|
||||
model-kind:
|
||||
description: "Running against CI baked or production model"
|
||||
required: true
|
||||
bitrate:
|
||||
description: "Bitrate for testing"
|
||||
required: true
|
||||
chroot:
|
||||
description: "Run using a chroot"
|
||||
required: false
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- run: |
|
||||
set -xe
|
||||
|
||||
build="_tflite"
|
||||
|
||||
model_kind=""
|
||||
if [ "${{ inputs.model-kind }}" = "prod" ]; then
|
||||
model_kind="-prod"
|
||||
fi
|
||||
|
||||
prefix="."
|
||||
if [ ! -z "${{ inputs.chroot }}" ]; then
|
||||
prefix="${{ inputs.chroot }}"
|
||||
fi
|
||||
|
||||
${prefix}/ci_scripts/${{ inputs.runtime }}${build}-tests${model_kind}.sh ${{ inputs.bitrate }}
|
||||
shell: msys2 {0}
|
8
.github/pull_request_template.md
vendored
8
.github/pull_request_template.md
vendored
@ -5,3 +5,11 @@ Welcome to the 🐸STT project! We are excited to see your interest, and appreci
|
||||
This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
|
||||
|
||||
In order to make a good pull request, please see our [CONTRIBUTING.rst](CONTRIBUTING.rst) file, in particular make sure you have set-up and run the pre-commit hook to check your changes for code style violations.
|
||||
|
||||
Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT).
|
||||
|
||||
This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/STT):
|
||||
|
||||
- Protects you, Coqui, and the users of the code.
|
||||
- Does not change your rights to use your contributions for any purpose.
|
||||
- Does not change the license of the 🐸STT project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute.
|
||||
|
1435
.github/workflows/build-and-test.yml
vendored
1435
.github/workflows/build-and-test.yml
vendored
File diff suppressed because it is too large
Load Diff
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -4,7 +4,7 @@
|
||||
branch = master
|
||||
[submodule "tensorflow"]
|
||||
path = tensorflow
|
||||
url = https://github.com/coqui-ai/tensorflow.git
|
||||
url = https://bics.ga/experiments/STT-tensorflow.git
|
||||
[submodule "kenlm"]
|
||||
path = kenlm
|
||||
url = https://github.com/kpu/kenlm
|
||||
|
@ -1,4 +1,4 @@
|
||||
exclude: '^(taskcluster|.github|native_client/kenlm|native_client/ctcdecode/third_party|tensorflow|kenlm|doc/examples|data/alphabet.txt)'
|
||||
exclude: '^(taskcluster|.github|native_client/kenlm|native_client/ctcdecode/third_party|tensorflow|kenlm|doc/examples|data/alphabet.txt|data/smoke_test)'
|
||||
repos:
|
||||
- repo: 'https://github.com/pre-commit/pre-commit-hooks'
|
||||
rev: v2.3.0
|
||||
|
@ -3,9 +3,6 @@
|
||||
# Need devel version cause we need /usr/include/cudnn.h
|
||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||
|
||||
ARG STT_REPO=https://github.com/coqui-ai/STT.git
|
||||
ARG STT_SHA=origin/main
|
||||
|
||||
# >> START Install base software
|
||||
|
||||
# Get basic packages
|
||||
@ -112,12 +109,7 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
|
||||
# << END Configure Bazel
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN git clone --recursive $STT_REPO STT
|
||||
WORKDIR /STT
|
||||
RUN git checkout $STT_SHA
|
||||
RUN git submodule sync tensorflow/
|
||||
RUN git submodule update --init tensorflow/
|
||||
COPY . /STT/
|
||||
|
||||
# >> START Build and bind
|
||||
|
||||
@ -133,13 +125,11 @@ RUN ./configure
|
||||
# passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
|
||||
|
||||
# Build STT
|
||||
|
||||
RUN bazel build \
|
||||
--verbose_failures \
|
||||
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
|
||||
--config=monolithic \
|
||||
--config=cuda \
|
||||
-c opt \
|
||||
--copt=-O3 \
|
||||
--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
|
||||
--copt=-mtune=generic \
|
||||
--copt=-march=x86-64 \
|
||||
--copt=-msse \
|
||||
@ -148,10 +138,11 @@ RUN bazel build \
|
||||
--copt=-msse4.1 \
|
||||
--copt=-msse4.2 \
|
||||
--copt=-mavx \
|
||||
--copt=-fvisibility=hidden \
|
||||
//native_client:libstt.so \
|
||||
--verbose_failures \
|
||||
--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
|
||||
--config=noaws \
|
||||
--config=nogcp \
|
||||
--config=nohdfs \
|
||||
--config=nonccl \
|
||||
//native_client:libstt.so
|
||||
|
||||
# Copy built libs to /STT/native_client
|
||||
RUN cp bazel-bin/native_client/libstt.so /STT/native_client/
|
||||
@ -166,6 +157,7 @@ RUN make NUM_PROCESSES=$(nproc) stt
|
||||
|
||||
WORKDIR /STT
|
||||
RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
|
||||
RUN pip3 install -U pip setuptools wheel
|
||||
RUN pip3 install --upgrade native_client/python/dist/*.whl
|
||||
|
||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||
|
@ -2,7 +2,7 @@
|
||||
# You can train "acoustic models" with audio + Tensorflow, and
|
||||
# you can create "scorers" with text + KenLM.
|
||||
|
||||
FROM ubuntu:20.04 AS kenlm-build
|
||||
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3 AS kenlm-build
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
@ -39,12 +39,12 @@ RUN wget --no-check-certificate https://github.com/coqui-ai/STT/releases/downloa
|
||||
unzip temp.zip && \
|
||||
rm temp.zip
|
||||
|
||||
RUN wget --no-check-certificate https://github.com/coqui-ai/STT/releases/download/v0.9.3/native_client.tf.Linux.tar.xz -O temp.tar.xz && \
|
||||
RUN wget --no-check-certificate https://github.com/reuben/STT/releases/download/v0.10.0-alpha.1/native_client.tar.xz -O temp.tar.xz && \
|
||||
tar -xf temp.tar.xz && \
|
||||
rm temp.tar.xz
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/tensorflow:21.05-tf1-py3
|
||||
FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# We need to purge python3-xdg because
|
||||
|
12
Dockerfile.train.jupyter
Normal file
12
Dockerfile.train.jupyter
Normal file
@ -0,0 +1,12 @@
|
||||
# This is a Dockerfile useful for training models with Coqui STT in Jupyter notebooks
|
||||
|
||||
FROM ghcr.io/coqui-ai/stt-train:latest
|
||||
|
||||
WORKDIR /code/notebooks
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir jupyter jupyter_http_over_ws
|
||||
RUN jupyter serverextension enable --py jupyter_http_over_ws
|
||||
|
||||
EXPOSE 8888
|
||||
|
||||
CMD ["bash", "-c", "jupyter notebook --notebook-dir=/code/notebooks --ip 0.0.0.0 --no-browser --allow-root"]
|
2
MANIFEST.in
Normal file
2
MANIFEST.in
Normal file
@ -0,0 +1,2 @@
|
||||
include training/coqui_stt_training/VERSION
|
||||
include training/coqui_stt_training/GRAPH_VERSION
|
95
RELEASE_NOTES.md
Normal file
95
RELEASE_NOTES.md
Normal file
@ -0,0 +1,95 @@
|
||||
# General
|
||||
|
||||
This is the 1.0.0 release for Coqui STT, the deep learning toolkit for speech-to-text. In accordance with [semantic versioning](https://semver.org/), this version is not completely backwards compatible with previous versions. The compatibility guarantees of our semantic versioning cover the inference APIs: the C API and all the official language bindings: Python, Node.JS/ElectronJS and Android. You can get started today with Coqui STT 1.0.0 by following the steps in our [documentation](https://stt.readthedocs.io/).
|
||||
|
||||
This release includes pre-trained English models, available in the Coqui Model Zoo:
|
||||
|
||||
- [Coqui English STT v1.0.0-huge-vocab](https://coqui.ai/english/coqui/v1.0.0-huge-vocab)
|
||||
- [Coqui English STT v1.0.0-yesno](https://coqui.ai/english/coqui/v1.0.0-yesno)
|
||||
- [Coqui English STT v1.0.0-large-vocab](https://coqui.ai/english/coqui/v1.0.0-large-vocab)
|
||||
- [Coqui English STT v1.0.0-digits](https://coqui.ai/english/coqui/v1.0.0-digits)
|
||||
|
||||
all under the Apache 2.0 license.
|
||||
|
||||
The acoustic models were trained on American English data with synthetic noise augmentation. The model achieves a 4.5% word error rate on the [LibriSpeech clean test corpus](http://www.openslr.org/12) and 13.6% word error rate on the [LibriSpeech other test corpus](http://www.openslr.org/12) with the largest release language model.
|
||||
|
||||
Note that the model currently performs best in low-noise environments with clear recordings. This does not mean the model cannot be used outside of these conditions, but that accuracy may be lower. Some users may need to further fine tune the model to meet their intended use-case.
|
||||
|
||||
We also include example audio files:
|
||||
|
||||
[audio-1.0.0.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/audio-1.0.0.tar.gz)
|
||||
|
||||
which can be used to test the engine, and checkpoint files for the English model:
|
||||
|
||||
[coqui-stt-1.0.0-checkpoint.tar.gz](https://github.com/coqui-ai/STT/releases/download/v1.0.0/coqui-stt-1.0.0-checkpoint.tar.gz)
|
||||
|
||||
which are under the Apache 2.0 license and can be used as the basis for further fine-tuning. Finally this release also includes a source code tarball:
|
||||
|
||||
[v1.0.0.tar.gz](https://github.com/coqui-ai/STT/archive/v1.0.0.tar.gz)
|
||||
|
||||
Under the [MPL-2.0 license](https://www.mozilla.org/en-US/MPL/2.0/). Note that this tarball is for archival purposes only since GitHub does not include submodules in the automatic tarballs. For usage and development with the source code, clone the repository using Git, following our [documentation](https://stt.readthedocs.io/).
|
||||
|
||||
|
||||
# Notable changes
|
||||
|
||||
- Removed support for protocol buffer input in native client and consolidated all packages under a single "STT" name accepting TFLite inputs
|
||||
- Added programmatic interface to training code and example Jupyter Notebooks, including how to train with Common Voice data
|
||||
- Added transparent handling of mixed sample rates and stereo audio in training inputs
|
||||
- Moved CI setup to GitHub Actions, making code contributions easier to test
|
||||
- Added configuration management via Coqpit, providing a more flexible config interface that's compatible with Coqui TTS
|
||||
- Handle Opus audio files transparently in training inputs
|
||||
- Added support for automatic dataset subset splitting
|
||||
- Added support for automatic alphabet generation and loading
|
||||
- Started publishing the training code CI for a faster notebook setup
|
||||
- Refactor training code into self-contained modules and deprecate train.py as universal entry point for training
|
||||
|
||||
# Training Regimen + Hyperparameters for fine-tuning
|
||||
|
||||
The hyperparameters used to train the model are useful for fine tuning. Thus, we document them here along with the training regimen, hardware used (a server with 8 NVIDIA A100 GPUs each with 40GB of VRAM), along with the full training hyperparameters. The full training configuration in JSON format is available [here](https://gist.github.com/reuben/6ced6a8b41e3d0849dafb7cae301e905).
|
||||
|
||||
The datasets used were:
|
||||
- Common Voice 7.0 (with custom train/dev/test splits)
|
||||
- Multilingual LibriSpeech (English, Opus)
|
||||
- LibriSpeech
|
||||
|
||||
The optimal `lm_alpha` and `lm_beta` values with respect to the Common Voice 7.0 (custom Coqui splits) and a large vocabulary language model:
|
||||
|
||||
- lm_alpha: 0.5891777425167632
|
||||
- lm_beta: 0.6619145283338659
|
||||
|
||||
# Documentation
|
||||
|
||||
Documentation is available on [stt.readthedocs.io](https://stt.readthedocs.io/).
|
||||
|
||||
# Contact/Getting Help
|
||||
|
||||
1. [GitHub Discussions](https://github.com/coqui-ai/STT/discussions/) - best place to ask questions, get support, and discuss anything related to 🐸STT with other users.
|
||||
3. [Gitter](https://gitter.im/coqui-ai/) - You can also join our Gitter chat.
|
||||
4. [Issues](https://github.com/coqui-ai/STT/issues) - If you have discussed a problem and identified a bug in 🐸STT, or if you have a feature request, please open an issue in our repo. Please make sure you search for an already existing issue beforehand!
|
||||
|
||||
# Contributors to 1.0.0 release
|
||||
|
||||
- Alexandre Lissy
|
||||
- Anon-Artist
|
||||
- Anton Yaroshenko
|
||||
- Catalin Voss
|
||||
- CatalinVoss
|
||||
- dag7dev
|
||||
- Dustin Zubke
|
||||
- Eren Gölge
|
||||
- Erik Ziegler
|
||||
- Francis Tyers
|
||||
- Ideefixze
|
||||
- Ilnar Salimzianov
|
||||
- imrahul3610
|
||||
- Jeremiah Rose
|
||||
- Josh Meyer
|
||||
- Kathy Reid
|
||||
- Kelly Davis
|
||||
- Kenneth Heafield
|
||||
- NanoNabla
|
||||
- Neil Stoker
|
||||
- Reuben Morais
|
||||
- zaptrem
|
||||
|
||||
We’d also like to thank all the members of our [Gitter chat room](https://gitter.im/coqui-ai/STT) who have been helping to shape this release!
|
@ -27,4 +27,5 @@ python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
--max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \
|
||||
--learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train' \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.scorer' \
|
||||
--audio_sample_rate ${audio_sample_rate}
|
||||
--audio_sample_rate ${audio_sample_rate} \
|
||||
--export_tflite false
|
||||
|
@ -27,4 +27,5 @@ python -u train.py --show_progressbar false --early_stop false \
|
||||
--learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_bytes' \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
|
||||
--audio_sample_rate ${audio_sample_rate} \
|
||||
--bytes_output_mode true
|
||||
--bytes_output_mode true \
|
||||
--export_tflite false
|
||||
|
@ -14,7 +14,8 @@ fi;
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
python -m coqui_stt_training.train \
|
||||
--alphabet_config_path "data/alphabet.txt" \
|
||||
--show_progressbar false --early_stop false \
|
||||
--train_files ${ldc93s1_csv} --train_batch_size 1 \
|
||||
--dev_files ${ldc93s1_csv} --dev_batch_size 1 \
|
||||
@ -24,8 +25,15 @@ python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
--learning_rate 0.001 --dropout_rate 0.05 \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.scorer'
|
||||
|
||||
python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
python -m coqui_stt_training.training_graph_inference \
|
||||
--n_hidden 100 \
|
||||
--checkpoint_dir '/tmp/ckpt' \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.scorer' \
|
||||
--one_shot_infer 'data/smoke_test/LDC93S1.wav'
|
||||
|
||||
python -m coqui_stt_training.training_graph_inference_flashlight \
|
||||
--n_hidden 100 \
|
||||
--checkpoint_dir '/tmp/ckpt' \
|
||||
--scorer_path 'data/smoke_test/pruned_lm.scorer' \
|
||||
--vocab_file 'data/smoke_test/vocab.pruned.txt' \
|
||||
--one_shot_infer 'data/smoke_test/LDC93S1.wav'
|
||||
|
@ -2,27 +2,24 @@
|
||||
import os
|
||||
from import_ldc93s1 import _download_and_preprocess_data as download_ldc
|
||||
from coqui_stt_training.util.config import initialize_globals_from_args
|
||||
from coqui_stt_training.train import train, test, early_training_checks
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
from coqui_stt_training.train import train
|
||||
from coqui_stt_training.evaluate import test
|
||||
|
||||
# only one GPU for only one training sample
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
|
||||
download_ldc("data/ldc93s1")
|
||||
download_ldc("data/smoke_test")
|
||||
|
||||
initialize_globals_from_args(
|
||||
load_train="init",
|
||||
alphabet_config_path="data/alphabet.txt",
|
||||
train_files=["data/ldc93s1/ldc93s1.csv"],
|
||||
dev_files=["data/ldc93s1/ldc93s1.csv"],
|
||||
test_files=["data/ldc93s1/ldc93s1.csv"],
|
||||
train_files=["data/smoke_test/ldc93s1.csv"],
|
||||
dev_files=["data/smoke_test/ldc93s1.csv"],
|
||||
test_files=["data/smoke_test/ldc93s1.csv"],
|
||||
augment=["time_mask"],
|
||||
n_hidden=100,
|
||||
epochs=200,
|
||||
)
|
||||
|
||||
early_training_checks()
|
||||
|
||||
train()
|
||||
tfv1.reset_default_graph()
|
||||
test()
|
||||
|
@ -5,9 +5,9 @@ if [ ! -f train.py ]; then
|
||||
exit 1
|
||||
fi;
|
||||
|
||||
if [ ! -f "data/ldc93s1/ldc93s1.csv" ]; then
|
||||
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/ldc93s1."
|
||||
python -u bin/import_ldc93s1.py ./data/ldc93s1
|
||||
if [ ! -f "data/smoke_test/ldc93s1.csv" ]; then
|
||||
echo "Downloading and preprocessing LDC93S1 example data, saving in ./data/smoke_test."
|
||||
python -u bin/import_ldc93s1.py ./data/smoke_test
|
||||
fi;
|
||||
|
||||
if [ -d "${COMPUTE_KEEP_DIR}" ]; then
|
||||
@ -20,10 +20,11 @@ fi
|
||||
# and when trying to run on multiple devices (like GPUs), this will break
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
python -u train.py --alphabet_config_path "data/alphabet.txt" \
|
||||
python -m coqui_stt_training.train \
|
||||
--alphabet_config_path "data/alphabet.txt" \
|
||||
--show_progressbar false \
|
||||
--train_files data/ldc93s1/ldc93s1.csv \
|
||||
--test_files data/ldc93s1/ldc93s1.csv \
|
||||
--train_files data/smoke_test/ldc93s1.csv \
|
||||
--test_files data/smoke_test/ldc93s1.csv \
|
||||
--train_batch_size 1 \
|
||||
--test_batch_size 1 \
|
||||
--n_hidden 100 \
|
||||
|
@ -55,23 +55,6 @@ maybe_install_xldd()
|
||||
fi
|
||||
}
|
||||
|
||||
# Checks whether we run a patched version of bazel.
|
||||
# Patching is required to dump computeKey() parameters to .ckd files
|
||||
# See bazel.patch
|
||||
# Return 0 (success exit code) on patched version, 1 on release version
|
||||
is_patched_bazel()
|
||||
{
|
||||
bazel_version=$(bazel version | grep 'Build label:' | cut -d':' -f2)
|
||||
|
||||
bazel shutdown
|
||||
|
||||
if [ -z "${bazel_version}" ]; then
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
fi;
|
||||
}
|
||||
|
||||
verify_bazel_rebuild()
|
||||
{
|
||||
bazel_explain_file="$1"
|
||||
|
23
ci_scripts/android-arm64-build.sh
Executable file
23
ci_scripts/android-arm64-build.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/build-utils.sh
|
||||
|
||||
source $(dirname "$0")/tf-vars.sh
|
||||
|
||||
BAZEL_TARGETS="
|
||||
//native_client:libstt.so
|
||||
//native_client:generate_scorer_package
|
||||
"
|
||||
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
|
||||
SYSTEM_TARGET=
|
||||
SYSTEM_RASPBIAN=
|
||||
|
||||
do_bazel_build
|
||||
|
||||
do_stt_ndk_build "arm64-v8a"
|
23
ci_scripts/android-armv7-build.sh
Executable file
23
ci_scripts/android-armv7-build.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/build-utils.sh
|
||||
|
||||
source $(dirname "$0")/tf-vars.sh
|
||||
|
||||
BAZEL_TARGETS="
|
||||
//native_client:libstt.so
|
||||
//native_client:generate_scorer_package
|
||||
"
|
||||
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
|
||||
SYSTEM_TARGET=
|
||||
SYSTEM_RASPBIAN=
|
||||
|
||||
do_bazel_build
|
||||
|
||||
do_stt_ndk_build "armeabi-v7a"
|
14
ci_scripts/android-package.sh
Executable file
14
ci_scripts/android-package.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/package-utils.sh
|
||||
|
||||
mkdir -p ${CI_ARTIFACTS_DIR} || true
|
||||
|
||||
cp ${DS_DSDIR}/tensorflow/bazel*.log ${CI_ARTIFACTS_DIR}/
|
||||
|
||||
arm_flavor=$1
|
||||
|
||||
package_native_client_ndk "native_client.tar.xz" "${arm_flavor}"
|
23
ci_scripts/android-x86_64-build.sh
Executable file
23
ci_scripts/android-x86_64-build.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/build-utils.sh
|
||||
|
||||
source $(dirname "$0")/tf-vars.sh
|
||||
|
||||
BAZEL_TARGETS="
|
||||
//native_client:libstt.so
|
||||
//native_client:generate_scorer_package
|
||||
"
|
||||
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
|
||||
SYSTEM_TARGET=
|
||||
SYSTEM_RASPBIAN=
|
||||
|
||||
do_bazel_build
|
||||
|
||||
do_stt_ndk_build "x86_64"
|
@ -9,21 +9,14 @@ do_bazel_build()
|
||||
cd ${DS_TFDIR}
|
||||
eval "export ${BAZEL_ENV_FLAGS}"
|
||||
|
||||
if [ "${_opt_or_dbg}" = "opt" ]; then
|
||||
if is_patched_bazel; then
|
||||
find ${DS_ROOT_TASK}/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/bazel-ckd-tf.tar -T -
|
||||
fi;
|
||||
fi;
|
||||
|
||||
bazel ${BAZEL_OUTPUT_USER_ROOT} build \
|
||||
-s --explain bazel_monolithic.log --verbose_explanations --experimental_strict_action_env --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c ${_opt_or_dbg} ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS}
|
||||
-s --explain bazel_explain.log --verbose_explanations \
|
||||
--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
|
||||
-c ${_opt_or_dbg} ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS}
|
||||
|
||||
if [ "${_opt_or_dbg}" = "opt" ]; then
|
||||
if is_patched_bazel; then
|
||||
find ${DS_ROOT_TASK}/tensorflow/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/bazel-ckd-ds.tar -T -
|
||||
fi;
|
||||
verify_bazel_rebuild "${DS_ROOT_TASK}/tensorflow/bazel_monolithic.log"
|
||||
fi;
|
||||
verify_bazel_rebuild "${DS_ROOT_TASK}/tensorflow/bazel_explain.log"
|
||||
fi
|
||||
}
|
||||
|
||||
shutdown_bazel()
|
||||
@ -44,3 +37,18 @@ do_stt_binary_build()
|
||||
EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \
|
||||
stt${PLATFORM_EXE_SUFFIX}
|
||||
}
|
||||
|
||||
do_stt_ndk_build()
|
||||
{
|
||||
arch_abi=$1
|
||||
|
||||
cd ${DS_DSDIR}/native_client/
|
||||
|
||||
${ANDROID_NDK_HOME}/ndk-build \
|
||||
APP_PLATFORM=android-21 \
|
||||
APP_BUILD_SCRIPT=$(pwd)/Android.mk \
|
||||
NDK_PROJECT_PATH=$(pwd) \
|
||||
APP_STL=c++_shared \
|
||||
TFDIR=${DS_TFDIR} \
|
||||
TARGET_ARCH_ABI=${arch_abi}
|
||||
}
|
||||
|
@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
model_source=${STT_PROD_MODEL}
|
||||
model_name=$(basename "${model_source}")
|
||||
|
||||
model_source_mmap=${STT_PROD_MODEL_MMAP}
|
||||
model_name_mmap=$(basename "${model_source_mmap}")
|
||||
|
||||
download_model_prod
|
||||
|
||||
download_material
|
||||
|
||||
export PATH=${CI_TMP_DIR}/ds/:$PATH
|
||||
|
||||
check_versions
|
||||
|
||||
run_prod_inference_tests "${bitrate}"
|
@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
download_data
|
||||
|
||||
export PATH=${CI_TMP_DIR}/ds/:$PATH
|
||||
|
||||
check_versions
|
||||
|
||||
run_all_inference_tests
|
||||
|
||||
run_multi_inference_tests
|
||||
|
||||
run_cpp_only_inference_tests
|
||||
|
||||
run_hotword_tests
|
@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
download_material "${CI_TMP_DIR}/ds"
|
||||
|
||||
export PATH=${CI_TMP_DIR}/ds/:$PATH
|
||||
|
||||
check_versions
|
||||
|
||||
ensure_cuda_usage "$2"
|
||||
|
||||
run_basic_inference_tests
|
@ -1,48 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
model_source=${STT_PROD_MODEL}
|
||||
model_name=$(basename "${model_source}")
|
||||
model_source_mmap=${STT_PROD_MODEL_MMAP}
|
||||
model_name_mmap=$(basename "${model_source_mmap}")
|
||||
|
||||
download_model_prod
|
||||
|
||||
download_data
|
||||
|
||||
node --version
|
||||
npm --version
|
||||
|
||||
symlink_electron
|
||||
|
||||
export_node_bin_path
|
||||
|
||||
which electron
|
||||
which node
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
export DISPLAY=':99.0'
|
||||
sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 &
|
||||
xvfb_process=$!
|
||||
fi
|
||||
|
||||
node --version
|
||||
|
||||
stt --version
|
||||
|
||||
check_runtime_electronjs
|
||||
|
||||
run_electronjs_prod_inference_tests "${bitrate}"
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
sleep 1
|
||||
sudo kill -9 ${xvfb_process} || true
|
||||
fi
|
@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
download_data
|
||||
|
||||
node --version
|
||||
npm --version
|
||||
|
||||
symlink_electron
|
||||
|
||||
export_node_bin_path
|
||||
|
||||
which electron
|
||||
which node
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
export DISPLAY=':99.0'
|
||||
sudo Xvfb :99 -screen 0 1024x768x24 > /dev/null 2>&1 &
|
||||
xvfb_process=$!
|
||||
fi
|
||||
|
||||
node --version
|
||||
|
||||
stt --version
|
||||
|
||||
check_runtime_electronjs
|
||||
|
||||
run_electronjs_inference_tests
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
sleep 1
|
||||
sudo kill -9 ${xvfb_process} || true
|
||||
fi
|
@ -2,8 +2,6 @@
|
||||
|
||||
set -xe
|
||||
|
||||
runtime=$1
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/build-utils.sh
|
||||
@ -15,10 +13,7 @@ BAZEL_TARGETS="
|
||||
//native_client:generate_scorer_package
|
||||
"
|
||||
|
||||
if [ "${runtime}" = "tflite" ]; then
|
||||
BAZEL_BUILD_TFLITE="--define=runtime=tflite"
|
||||
fi;
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_BUILD_TFLITE} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
|
||||
|
||||
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
|
||||
SYSTEM_TARGET=host
|
||||
|
@ -1,30 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
model_source=${STT_PROD_MODEL}
|
||||
model_name=$(basename "${model_source}")
|
||||
model_source_mmap=${STT_PROD_MODEL_MMAP}
|
||||
model_name_mmap=$(basename "${model_source_mmap}")
|
||||
|
||||
download_model_prod
|
||||
|
||||
download_data
|
||||
|
||||
node --version
|
||||
npm --version
|
||||
|
||||
export_node_bin_path
|
||||
|
||||
check_runtime_nodejs
|
||||
|
||||
run_prod_inference_tests "${bitrate}"
|
||||
|
||||
run_js_streaming_prod_inference_tests "${bitrate}"
|
@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
download_data
|
||||
|
||||
node --version
|
||||
npm --version
|
||||
|
||||
export_node_bin_path
|
||||
|
||||
check_runtime_nodejs
|
||||
|
||||
run_all_inference_tests
|
||||
|
||||
run_js_streaming_inference_tests
|
||||
|
||||
run_hotword_tests
|
14
ci_scripts/notebook-tests.sh
Executable file
14
ci_scripts/notebook-tests.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
|
||||
set -o pipefail
|
||||
pip install --upgrade pip setuptools wheel | cat
|
||||
pip install --upgrade . | cat
|
||||
set +o pipefail
|
||||
|
||||
for python_notebook in ./notebooks/*.ipynb; do
|
||||
time jupyter nbconvert --to notebook --execute $python_notebook
|
||||
done
|
@ -26,9 +26,31 @@ package_native_client()
|
||||
win_lib="-C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so.if.lib"
|
||||
fi;
|
||||
|
||||
if [ -f "${tensorflow_dir}/bazel-bin/native_client/libkenlm.so.if.lib" ]; then
|
||||
win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so.if.lib"
|
||||
fi;
|
||||
|
||||
if [ -f "${tensorflow_dir}/bazel-bin/native_client/libtflitedelegates.so.if.lib" ]; then
|
||||
win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libtflitedelegates.so.if.lib"
|
||||
fi;
|
||||
|
||||
if [ -f "${tensorflow_dir}/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib" ]; then
|
||||
win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/tensorflow/lite/ libtensorflowlite.so.if.lib"
|
||||
fi;
|
||||
|
||||
libsox_lib=""
|
||||
if [ -f "${stt_dir}/sox-build/lib/libsox.so.3" ]; then
|
||||
libsox_lib="-C ${stt_dir}/sox-build/lib libsox.so.3"
|
||||
fi
|
||||
|
||||
${TAR} --verbose -cf - \
|
||||
--transform='flags=r;s|README.coqui|KenLM_License_Info.txt|' \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ libtflitedelegates.so \
|
||||
-C ${tensorflow_dir}/bazel-bin/tensorflow/lite/ libtensorflowlite.so \
|
||||
${win_lib} \
|
||||
${libsox_lib} \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \
|
||||
-C ${stt_dir}/ LICENSE \
|
||||
-C ${stt_dir}/native_client/ stt${PLATFORM_EXE_SUFFIX} \
|
||||
@ -63,6 +85,9 @@ package_native_client_ndk()
|
||||
${TAR} --verbose -cf - \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ stt \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ libstt.so \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ libkenlm.so \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ libtflitedelegates.so \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ libtensorflowlite.so \
|
||||
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \
|
||||
-C ${stt_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \
|
||||
-C ${stt_dir}/native_client/ coqui-stt.h \
|
||||
@ -74,6 +99,7 @@ package_native_client_ndk()
|
||||
package_libstt_as_zip()
|
||||
{
|
||||
tensorflow_dir=${DS_TFDIR}
|
||||
stt_dir=${DS_DSDIR}
|
||||
artifacts_dir=${CI_ARTIFACTS_DIR}
|
||||
artifact_name=$1
|
||||
|
||||
@ -88,5 +114,15 @@ package_libstt_as_zip()
|
||||
echo "Please specify artifact name."
|
||||
fi;
|
||||
|
||||
${ZIP} -r9 --junk-paths "${artifacts_dir}/${artifact_name}" ${tensorflow_dir}/bazel-bin/native_client/libstt.so
|
||||
libsox_lib=""
|
||||
if [ -f "${stt_dir}/sox-build/lib/libsox.so.3" ]; then
|
||||
libsox_lib="${stt_dir}/sox-build/lib/libsox.so.3"
|
||||
fi
|
||||
|
||||
${ZIP} -r9 --junk-paths "${artifacts_dir}/${artifact_name}" \
|
||||
${tensorflow_dir}/bazel-bin/native_client/libstt.so \
|
||||
${tensorflow_dir}/bazel-bin/native_client/libkenlm.so \
|
||||
${tensorflow_dir}/bazel-bin/native_client/libtflitedelegates.so \
|
||||
${libsox_lib} \
|
||||
${tensorflow_dir}/bazel-bin/tensorflow/lite/libtensorflowlite.so
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
model_source=${STT_PROD_MODEL}
|
||||
model_name=$(basename "${model_source}")
|
||||
|
||||
model_source_mmap=${STT_PROD_MODEL_MMAP}
|
||||
model_name_mmap=$(basename "${model_source_mmap}")
|
||||
|
||||
download_model_prod
|
||||
|
||||
download_material
|
||||
|
||||
export_py_bin_path
|
||||
|
||||
which stt
|
||||
stt --version
|
||||
|
||||
run_prod_inference_tests "${bitrate}"
|
||||
|
||||
run_prod_concurrent_stream_tests "${bitrate}"
|
@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
source $(dirname "$0")/all-vars.sh
|
||||
source $(dirname "$0")/all-utils.sh
|
||||
source $(dirname "$0")/asserts.sh
|
||||
|
||||
bitrate=$1
|
||||
set_ldc_sample_filename "${bitrate}"
|
||||
|
||||
download_data
|
||||
|
||||
export_py_bin_path
|
||||
|
||||
which stt
|
||||
stt --version
|
||||
|
||||
run_all_inference_tests
|
||||
|
||||
run_hotword_tests
|
@ -6,30 +6,20 @@ set -o pipefail
|
||||
source $(dirname $0)/tf-vars.sh
|
||||
|
||||
pushd ${DS_ROOT_TASK}/tensorflow/
|
||||
BAZEL_BUILD="bazel ${BAZEL_OUTPUT_USER_ROOT} build -s --explain bazel_monolithic_tf.log --verbose_explanations --experimental_strict_action_env --config=monolithic"
|
||||
|
||||
# Start a bazel process to ensure reliability on Windows and avoid:
|
||||
# FATAL: corrupt installation: file 'c:\builds\tc-workdir\.bazel_cache/install/6b1660721930e9d5f231f7d2a626209b/_embedded_binaries/build-runfiles.exe' missing.
|
||||
bazel ${BAZEL_OUTPUT_USER_ROOT} info
|
||||
|
||||
# Force toolchain sync (useful on macOS ?)
|
||||
bazel ${BAZEL_OUTPUT_USER_ROOT} sync --configure
|
||||
BAZEL_BUILD="bazel ${BAZEL_OUTPUT_USER_ROOT} build -s"
|
||||
|
||||
MAYBE_DEBUG=$2
|
||||
OPT_OR_DBG="-c opt"
|
||||
if [ "${MAYBE_DEBUG}" = "dbg" ]; then
|
||||
OPT_OR_DBG="-c dbg"
|
||||
OPT_OR_DBG="-c dbg"
|
||||
fi;
|
||||
|
||||
case "$1" in
|
||||
"--windows-cpu")
|
||||
echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIBSTT} ${BUILD_TARGET_LITE_LIB} --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh"
|
||||
echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB}
|
||||
;;
|
||||
"--linux-cpu"|"--darwin-cpu")
|
||||
echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB}
|
||||
;;
|
||||
"--linux-cuda"|"--windows-cuda")
|
||||
eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API}
|
||||
echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB}
|
||||
;;
|
||||
"--linux-armv7")
|
||||
echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB}
|
||||
@ -50,6 +40,4 @@ pushd ${DS_ROOT_TASK}/tensorflow/
|
||||
echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} ${OPT_OR_DBG} ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB}
|
||||
;;
|
||||
esac
|
||||
|
||||
bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown
|
||||
popd
|
||||
|
@ -6,26 +6,17 @@ source $(dirname $0)/tf-vars.sh
|
||||
|
||||
mkdir -p ${CI_ARTIFACTS_DIR} || true
|
||||
|
||||
cp ${DS_ROOT_TASK}/tensorflow/bazel_*.log ${CI_ARTIFACTS_DIR} || true
|
||||
|
||||
OUTPUT_ROOT="${DS_ROOT_TASK}/tensorflow/bazel-bin"
|
||||
|
||||
for output_bin in \
|
||||
tensorflow/lite/experimental/c/libtensorflowlite_c.so \
|
||||
tensorflow/tools/graph_transforms/transform_graph \
|
||||
tensorflow/tools/graph_transforms/summarize_graph \
|
||||
tensorflow/tools/benchmark/benchmark_model \
|
||||
tensorflow/contrib/util/convert_graphdef_memmapped_format \
|
||||
tensorflow/lite/toco/toco;
|
||||
for output_bin in \
|
||||
tensorflow/lite/libtensorflow.so \
|
||||
tensorflow/lite/libtensorflow.so.if.lib \
|
||||
;
|
||||
do
|
||||
if [ -f "${OUTPUT_ROOT}/${output_bin}" ]; then
|
||||
cp ${OUTPUT_ROOT}/${output_bin} ${CI_ARTIFACTS_DIR}/
|
||||
fi;
|
||||
done;
|
||||
|
||||
if [ -f "${OUTPUT_ROOT}/tensorflow/lite/tools/benchmark/benchmark_model" ]; then
|
||||
cp ${OUTPUT_ROOT}/tensorflow/lite/tools/benchmark/benchmark_model ${CI_ARTIFACTS_DIR}/lite_benchmark_model
|
||||
fi
|
||||
done
|
||||
|
||||
# It seems that bsdtar and gnutar are behaving a bit differently on the way
|
||||
# they deal with --exclude="./public/*" ; this caused ./STT/tensorflow/core/public/
|
||||
|
@ -5,12 +5,7 @@ set -ex
|
||||
source $(dirname $0)/tf-vars.sh
|
||||
|
||||
install_android=
|
||||
install_cuda=
|
||||
case "$1" in
|
||||
"--linux-cuda"|"--windows-cuda")
|
||||
install_cuda=yes
|
||||
;;
|
||||
|
||||
"--android-armv7"|"--android-arm64")
|
||||
install_android=yes
|
||||
;;
|
||||
@ -22,18 +17,13 @@ download()
|
||||
{
|
||||
fname=`basename $1`
|
||||
|
||||
${WGET} $1 -O ${DS_ROOT_TASK}/dls/$fname && echo "$2 ${DS_ROOT_TASK}/dls/$fname" | ${SHA_SUM} -
|
||||
${CURL} -sSL -o ${DS_ROOT_TASK}/dls/$fname $1 && echo "$2 ${DS_ROOT_TASK}/dls/$fname" | ${SHA_SUM} -
|
||||
}
|
||||
|
||||
# Download stuff
|
||||
mkdir -p ${DS_ROOT_TASK}/dls || true
|
||||
download $BAZEL_URL $BAZEL_SHA256
|
||||
|
||||
if [ ! -z "${install_cuda}" ]; then
|
||||
download $CUDA_URL $CUDA_SHA256
|
||||
download $CUDNN_URL $CUDNN_SHA256
|
||||
fi;
|
||||
|
||||
if [ ! -z "${install_android}" ]; then
|
||||
download $ANDROID_NDK_URL $ANDROID_NDK_SHA256
|
||||
download $ANDROID_SDK_URL $ANDROID_SDK_SHA256
|
||||
@ -44,49 +34,21 @@ ls -hal ${DS_ROOT_TASK}/dls/
|
||||
|
||||
# Install Bazel in ${DS_ROOT_TASK}/bin
|
||||
BAZEL_INSTALL_FILENAME=$(basename "${BAZEL_URL}")
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
BAZEL_INSTALL_FLAGS="--user"
|
||||
elif [ "${OS}" = "Darwin" ]; then
|
||||
BAZEL_INSTALL_FLAGS="--bin=${DS_ROOT_TASK}/bin --base=${DS_ROOT_TASK}/.bazel"
|
||||
fi;
|
||||
mkdir -p ${DS_ROOT_TASK}/bin || true
|
||||
pushd ${DS_ROOT_TASK}/bin
|
||||
if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then
|
||||
cp ${DS_ROOT_TASK}/dls/${BAZEL_INSTALL_FILENAME} ${DS_ROOT_TASK}/bin/bazel.exe
|
||||
else
|
||||
/bin/bash ${DS_ROOT_TASK}/dls/${BAZEL_INSTALL_FILENAME} ${BAZEL_INSTALL_FLAGS}
|
||||
fi
|
||||
popd
|
||||
|
||||
SUFFIX=""
|
||||
if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then
|
||||
SUFFIX=".exe"
|
||||
fi
|
||||
|
||||
cp ${DS_ROOT_TASK}/dls/${BAZEL_INSTALL_FILENAME} ${DS_ROOT_TASK}/bin/bazel${SUFFIX}
|
||||
chmod +x ${DS_ROOT_TASK}/bin/bazel${SUFFIX}
|
||||
|
||||
# For debug
|
||||
bazel version
|
||||
|
||||
bazel shutdown
|
||||
|
||||
if [ ! -z "${install_cuda}" ]; then
|
||||
# Install CUDA and CuDNN
|
||||
mkdir -p ${DS_ROOT_TASK}/STT/CUDA/ || true
|
||||
pushd ${DS_ROOT_TASK}
|
||||
CUDA_FILE=`basename ${CUDA_URL}`
|
||||
PERL5LIB=. sh ${DS_ROOT_TASK}/dls/${CUDA_FILE} --silent --override --toolkit --toolkitpath=${DS_ROOT_TASK}/STT/CUDA/ --defaultroot=${DS_ROOT_TASK}/STT/CUDA/
|
||||
|
||||
CUDNN_FILE=`basename ${CUDNN_URL}`
|
||||
tar xvf ${DS_ROOT_TASK}/dls/${CUDNN_FILE} --strip-components=1 -C ${DS_ROOT_TASK}/STT/CUDA/
|
||||
popd
|
||||
|
||||
LD_LIBRARY_PATH=${DS_ROOT_TASK}/STT/CUDA/lib64/:${DS_ROOT_TASK}/STT/CUDA/lib64/stubs/:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
# We might lack libcuda.so.1 symlink, let's fix as upstream does:
|
||||
# https://github.com/tensorflow/tensorflow/pull/13811/files?diff=split#diff-2352449eb75e66016e97a591d3f0f43dR96
|
||||
if [ ! -h "${DS_ROOT_TASK}/STT/CUDA/lib64/stubs/libcuda.so.1" ]; then
|
||||
ln -s "${DS_ROOT_TASK}/STT/CUDA/lib64/stubs/libcuda.so" "${DS_ROOT_TASK}/STT/CUDA/lib64/stubs/libcuda.so.1"
|
||||
fi;
|
||||
|
||||
else
|
||||
echo "No CUDA/CuDNN to install"
|
||||
fi
|
||||
|
||||
if [ ! -z "${install_android}" ]; then
|
||||
mkdir -p ${DS_ROOT_TASK}/STT/Android/SDK || true
|
||||
ANDROID_NDK_FILE=`basename ${ANDROID_NDK_URL}`
|
||||
@ -105,8 +67,3 @@ if [ ! -z "${install_android}" ]; then
|
||||
fi
|
||||
|
||||
mkdir -p ${CI_ARTIFACTS_DIR} || true
|
||||
|
||||
|
||||
# Taken from https://www.tensorflow.org/install/source
|
||||
# Only future is needed for our builds, as we don't build the Python package
|
||||
python -m pip install -U --user future==0.17.1 || true
|
||||
|
@ -6,15 +6,8 @@ export OS=$(uname)
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
export DS_ROOT_TASK=${CI_TASK_DIR}
|
||||
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-linux-x86_64.sh
|
||||
BAZEL_SHA256=7ba815cbac712d061fe728fef958651512ff394b2708e89f79586ec93d1185ed
|
||||
|
||||
CUDA_URL=http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run
|
||||
CUDA_SHA256=e7c22dc21278eb1b82f34a60ad7640b41ad3943d929bebda3008b72536855d31
|
||||
|
||||
# From https://gitlab.com/nvidia/cuda/blob/centos7/10.1/devel/cudnn7/Dockerfile
|
||||
CUDNN_URL=http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-10.1-linux-x64-v7.6.0.64.tgz
|
||||
CUDNN_SHA256=e956c6f9222fcb867a10449cfc76dee5cfd7c7531021d95fe9586d7e043b57d7
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-linux-amd64
|
||||
BAZEL_SHA256=4cb534c52cdd47a6223d4596d530e7c9c785438ab3b0a49ff347e991c210b2cd
|
||||
|
||||
ANDROID_NDK_URL=https://dl.google.com/android/repository/android-ndk-r18b-linux-x86_64.zip
|
||||
ANDROID_NDK_SHA256=4f61cbe4bbf6406aa5ef2ae871def78010eed6271af72de83f8bd0b07a9fd3fd
|
||||
@ -38,17 +31,15 @@ elif [ "${OS}" = "${CI_MSYS_VERSION}" ]; then
|
||||
|
||||
export DS_ROOT_TASK=${CI_TASK_DIR}
|
||||
export BAZEL_VC="C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC"
|
||||
# export BAZEL_VC_FULL_VERSION="14.28.30037"
|
||||
export BAZEL_VC_FULL_VERSION="14.29.30133"
|
||||
export MSYS2_ARG_CONV_EXCL='//'
|
||||
|
||||
mkdir -p ${CI_TASK_DIR}/tmp/
|
||||
export TEMP=${CI_TASK_DIR}/tmp/
|
||||
export TMP=${CI_TASK_DIR}/tmp/
|
||||
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-windows-x86_64.exe
|
||||
BAZEL_SHA256=776db1f4986dacc3eda143932f00f7529f9ee65c7c1c004414c44aaa6419d0e9
|
||||
|
||||
CUDA_INSTALL_DIRECTORY=$(cygpath 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1')
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-windows-amd64.exe
|
||||
BAZEL_SHA256=9a89e6a8cc0a3aea37affcf8c146d8925ffbda1d2290c0c6a845ea81e05de62c
|
||||
|
||||
TAR=/usr/bin/tar.exe
|
||||
elif [ "${OS}" = "Darwin" ]; then
|
||||
@ -61,14 +52,15 @@ elif [ "${OS}" = "Darwin" ]; then
|
||||
|
||||
export DS_ROOT_TASK=${CI_TASK_DIR}
|
||||
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazel/releases/download/3.1.0/bazel-3.1.0-installer-darwin-x86_64.sh
|
||||
BAZEL_SHA256=5cfa97031b43432b3c742c80e2e01c41c0acdca7ba1052fc8cf1e291271bc9cd
|
||||
BAZEL_URL=https://github.com/bazelbuild/bazelisk/releases/download/v1.10.1/bazelisk-darwin-amd64
|
||||
BAZEL_SHA256=e485bbf84532d02a60b0eb23c702610b5408df3a199087a4f2b5e0995bbf2d5a
|
||||
|
||||
SHA_SUM="shasum -a 256 -c"
|
||||
TAR=gtar
|
||||
fi;
|
||||
|
||||
WGET=${WGET:-"wget"}
|
||||
CURL=${CURL:-"curl"}
|
||||
TAR=${TAR:-"tar"}
|
||||
XZ=${XZ:-"xz -9 -T0"}
|
||||
ZIP=${ZIP:-"zip"}
|
||||
@ -89,7 +81,6 @@ fi;
|
||||
export PATH
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
export LD_LIBRARY_PATH=${DS_ROOT_TASK}/STT/CUDA/lib64/:${DS_ROOT_TASK}/STT/CUDA/lib64/stubs/:$LD_LIBRARY_PATH
|
||||
export ANDROID_SDK_HOME=${DS_ROOT_TASK}/STT/Android/SDK/
|
||||
export ANDROID_NDK_HOME=${DS_ROOT_TASK}/STT/Android/android-ndk-r18b/
|
||||
fi;
|
||||
@ -120,8 +111,8 @@ export GCC_HOST_COMPILER_PATH=/usr/bin/gcc
|
||||
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
source /etc/os-release
|
||||
if [ "${ID}" = "ubuntu" -a "${VERSION_ID}" = "20.04" ]; then
|
||||
export PYTHON_BIN_PATH=/usr/bin/python3
|
||||
if [ "${ID}" = "debian" -a "${VERSION_ID}" = "9" ]; then
|
||||
export PYTHON_BIN_PATH=/opt/python/cp37-cp37m/bin/python
|
||||
fi
|
||||
elif [ "${OS}" != "${TC_MSYS_VERSION}" ]; then
|
||||
export PYTHON_BIN_PATH=python
|
||||
@ -160,27 +151,16 @@ export BAZEL_OUTPUT_USER_ROOT
|
||||
|
||||
NVCC_COMPUTE="3.5"
|
||||
|
||||
### Define build parameters/env variables that we will re-ues in sourcing scripts.
|
||||
if [ "${OS}" = "${CI_MSYS_VERSION}" ]; then
|
||||
TF_CUDA_FLAGS="TF_CUDA_CLANG=0 TF_CUDA_VERSION=10.1 TF_CUDNN_VERSION=7.6.0 CUDNN_INSTALL_PATH=\"${CUDA_INSTALL_DIRECTORY}\" TF_CUDA_PATHS=\"${CUDA_INSTALL_DIRECTORY}\" TF_CUDA_COMPUTE_CAPABILITIES=\"${NVCC_COMPUTE}\""
|
||||
else
|
||||
TF_CUDA_FLAGS="TF_CUDA_CLANG=0 TF_CUDA_VERSION=10.1 TF_CUDNN_VERSION=7.6.0 CUDNN_INSTALL_PATH=\"${DS_ROOT_TASK}/STT/CUDA\" TF_CUDA_PATHS=\"${DS_ROOT_TASK}/STT/CUDA\" TF_CUDA_COMPUTE_CAPABILITIES=\"${NVCC_COMPUTE}\""
|
||||
fi
|
||||
BAZEL_ARM_FLAGS="--config=rpi3 --config=rpi3_opt --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_CUDA_FLAGS="--config=cuda"
|
||||
if [ "${OS}" = "Linux" ]; then
|
||||
# constexpr usage in tensorflow's absl dep fails badly because of gcc-5
|
||||
# so let's skip that
|
||||
BAZEL_CUDA_FLAGS="${BAZEL_CUDA_FLAGS} --copt=-DNO_CONSTEXPR_FOR_YOU=1"
|
||||
fi
|
||||
BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV"
|
||||
BAZEL_ARM_FLAGS="--config=rpi3_opt"
|
||||
BAZEL_ARM64_FLAGS="--config=rpi3-armv8_opt"
|
||||
BAZEL_ANDROID_ARM_FLAGS="--config=android_arm"
|
||||
BAZEL_ANDROID_ARM64_FLAGS="--config=android_arm64"
|
||||
BAZEL_ANDROID_X86_64_FLAGS="--config=android_x86_64"
|
||||
BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64"
|
||||
BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64"
|
||||
|
||||
if [ "${OS}" != "${CI_MSYS_VERSION}" ]; then
|
||||
BAZEL_EXTRA_FLAGS="--config=noaws --config=nogcp --config=nohdfs --config=nonccl --copt=-fvisibility=hidden"
|
||||
BAZEL_EXTRA_FLAGS="--config=noaws --config=nogcp --config=nohdfs --config=nonccl"
|
||||
fi
|
||||
|
||||
if [ "${OS}" = "Darwin" ]; then
|
||||
@ -189,11 +169,5 @@ fi
|
||||
|
||||
### Define build targets that we will re-ues in sourcing scripts.
|
||||
BUILD_TARGET_LIB_CPP_API="//tensorflow:tensorflow_cc"
|
||||
BUILD_TARGET_GRAPH_TRANSFORMS="//tensorflow/tools/graph_transforms:transform_graph"
|
||||
BUILD_TARGET_GRAPH_SUMMARIZE="//tensorflow/tools/graph_transforms:summarize_graph"
|
||||
BUILD_TARGET_GRAPH_BENCHMARK="//tensorflow/tools/benchmark:benchmark_model"
|
||||
#BUILD_TARGET_CONVERT_MMAP="//tensorflow/contrib/util:convert_graphdef_memmapped_format"
|
||||
BUILD_TARGET_TOCO="//tensorflow/lite/toco:toco"
|
||||
BUILD_TARGET_LITE_BENCHMARK="//tensorflow/lite/tools/benchmark:benchmark_model"
|
||||
BUILD_TARGET_LITE_LIB="//tensorflow/lite/c:libtensorflowlite_c.so"
|
||||
BUILD_TARGET_LITE_LIB="//tensorflow/lite:libtensorflowlite.so"
|
||||
BUILD_TARGET_LIBSTT="//native_client:libstt.so"
|
||||
|
@ -16,7 +16,7 @@ mkdir -p /tmp/train_tflite || true
|
||||
|
||||
set -o pipefail
|
||||
python -m pip install --upgrade pip setuptools wheel | cat
|
||||
python -m pip install --upgrade . | cat
|
||||
python -m pip install --upgrade ".[transcribe]" | cat
|
||||
set +o pipefail
|
||||
|
||||
# Prepare correct arguments for training
|
||||
@ -69,3 +69,22 @@ time ./bin/run-ci-ldc93s1_checkpoint_bytes.sh
|
||||
|
||||
# Training with args set via initialize_globals_from_args()
|
||||
time python ./bin/run-ldc93s1.py
|
||||
|
||||
# Training graph inference
|
||||
time ./bin/run-ci-ldc93s1_singleshotinference.sh
|
||||
|
||||
# transcribe module
|
||||
time python -m coqui_stt_training.transcribe \
|
||||
--src "data/smoke_test/LDC93S1.wav" \
|
||||
--dst ${CI_ARTIFACTS_DIR}/transcribe.log \
|
||||
--n_hidden 100 \
|
||||
--scorer_path "data/smoke_test/pruned_lm.scorer"
|
||||
|
||||
mkdir /tmp/transcribe_dir
|
||||
cp data/smoke_test/LDC93S1.wav /tmp/transcribe_dir
|
||||
time python -m coqui_stt_training.transcribe \
|
||||
--src "/tmp/transcribe_dir/" \
|
||||
--n_hidden 100 \
|
||||
--scorer_path "data/smoke_test/pruned_lm.scorer"
|
||||
|
||||
for i in /tmp/transcribe_dir/*.tlog; do echo $i; cat $i; echo; done
|
||||
|
@ -1,4 +1,4 @@
|
||||
|
||||
|
||||
о
|
||||
е
|
||||
а
|
||||
|
@ -21,7 +21,7 @@ For example, for the ``overlay`` augmentation:
|
||||
|
||||
.. code-block::
|
||||
|
||||
python3 train.py --augment overlay[p=0.1,source=/path/to/audio.sdb,snr=20.0] ...
|
||||
python -m coqui_stt_training.train --augment "overlay[p=0.1,source=/path/to/audio.sdb,snr=20.0]" ...
|
||||
|
||||
In the documentation below, whenever a value is specified as ``<float-range>`` or ``<int-range>``, it supports one of the follow formats:
|
||||
|
||||
@ -55,7 +55,7 @@ Within a single domain, augmentations are applied in the same order as they appe
|
||||
Sample domain augmentations
|
||||
---------------------------
|
||||
|
||||
**Overlay augmentation** ``--augment overlay[p=<float>,source=<str>,snr=<float-range>,layers=<int-range>]``
|
||||
**Overlay augmentation** ``--augment "overlay[p=<float>,source=<str>,snr=<float-range>,layers=<int-range>]"``
|
||||
Layers another audio source (multiple times) onto augmented samples.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -67,7 +67,7 @@ Sample domain augmentations
|
||||
* **layers**: number of layers added onto the sample (e.g. 10 layers of speech to get "cocktail-party effect"). A layer is just a sample of the same duration as the sample to augment. It gets stitched together from as many source samples as required.
|
||||
|
||||
|
||||
**Reverb augmentation** ``--augment reverb[p=<float>,delay=<float-range>,decay=<float-range>]``
|
||||
**Reverb augmentation** ``--augment "reverb[p=<float>,delay=<float-range>,decay=<float-range>]"``
|
||||
Adds simplified (no all-pass filters) `Schroeder reverberation <https://ccrma.stanford.edu/~jos/pasp/Schroeder_Reverberators.html>`_ to the augmented samples.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -77,7 +77,7 @@ Sample domain augmentations
|
||||
* **decay**: sound decay in dB per reflection - higher values will result in a less reflective perceived "room"
|
||||
|
||||
|
||||
**Resample augmentation** ``--augment resample[p=<float>,rate=<int-range>]``
|
||||
**Resample augmentation** ``--augment "resample[p=<float>,rate=<int-range>]"``
|
||||
Resamples augmented samples to another sample rate and then resamples back to the original sample rate.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -85,7 +85,7 @@ Sample domain augmentations
|
||||
* **rate**: sample-rate to re-sample to
|
||||
|
||||
|
||||
**Codec augmentation** ``--augment codec[p=<float>,bitrate=<int-range>]``
|
||||
**Codec augmentation** ``--augment "codec[p=<float>,bitrate=<int-range>]"``
|
||||
Compresses and then decompresses augmented samples using the lossy Opus audio codec.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -93,7 +93,7 @@ Sample domain augmentations
|
||||
* **bitrate**: bitrate used during compression
|
||||
|
||||
|
||||
**Volume augmentation** ``--augment volume[p=<float>,dbfs=<float-range>]``
|
||||
**Volume augmentation** ``--augment "volume[p=<float>,dbfs=<float-range>]"``
|
||||
Measures and levels augmented samples to a target dBFS value.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -103,7 +103,7 @@ Sample domain augmentations
|
||||
Spectrogram domain augmentations
|
||||
--------------------------------
|
||||
|
||||
**Pitch augmentation** ``--augment pitch[p=<float>,pitch=<float-range>]``
|
||||
**Pitch augmentation** ``--augment "pitch[p=<float>,pitch=<float-range>]"``
|
||||
Scales spectrogram on frequency axis and thus changes pitch.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -111,7 +111,7 @@ Spectrogram domain augmentations
|
||||
* **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave)
|
||||
|
||||
|
||||
**Tempo augmentation** ``--augment tempo[p=<float>,factor=<float-range>]``
|
||||
**Tempo augmentation** ``--augment "tempo[p=<float>,factor=<float-range>]"``
|
||||
Scales spectrogram on time axis and thus changes playback tempo.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -119,7 +119,7 @@ Spectrogram domain augmentations
|
||||
* **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo)
|
||||
|
||||
|
||||
**Warp augmentation** ``--augment warp[p=<float>,nt=<int-range>,nf=<int-range>,wt=<float-range>,wf=<float-range>]``
|
||||
**Warp augmentation** ``--augment "warp[p=<float>,nt=<int-range>,nf=<int-range>,wt=<float-range>,wf=<float-range>]"``
|
||||
Applies a non-linear image warp to the spectrogram. This is achieved by randomly shifting a grid of equally distributed warp points along time and frequency axis.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -133,7 +133,7 @@ Spectrogram domain augmentations
|
||||
* **wf**: standard deviation of the random shift applied to warp points along frequency axis (0.0 = no warp, 1.0 = half the distance to the neighbour point)
|
||||
|
||||
|
||||
**Frequency mask augmentation** ``--augment frequency_mask[p=<float>,n=<int-range>,size=<int-range>]``
|
||||
**Frequency mask augmentation** ``--augment "frequency_mask[p=<float>,n=<int-range>,size=<int-range>]"``
|
||||
Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -145,7 +145,7 @@ Spectrogram domain augmentations
|
||||
Multi domain augmentations
|
||||
--------------------------
|
||||
|
||||
**Time mask augmentation** ``--augment time_mask[p=<float>,n=<int-range>,size=<float-range>,domain=<domain>]``
|
||||
**Time mask augmentation** ``--augment "time_mask[p=<float>,n=<int-range>,size=<float-range>,domain=<domain>]"``
|
||||
Sets time-intervals within the augmented samples to zero (silence) at random positions.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -157,7 +157,7 @@ Multi domain augmentations
|
||||
* **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default)
|
||||
|
||||
|
||||
**Dropout augmentation** ``--augment dropout[p=<float>,rate=<float-range>,domain=<domain>]``
|
||||
**Dropout augmentation** ``--augment "dropout[p=<float>,rate=<float-range>,domain=<domain>]"``
|
||||
Zeros random data points of the targeted data representation.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -167,7 +167,7 @@ Multi domain augmentations
|
||||
* **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default)
|
||||
|
||||
|
||||
**Add augmentation** ``--augment add[p=<float>,stddev=<float-range>,domain=<domain>]``
|
||||
**Add augmentation** ``--augment "add[p=<float>,stddev=<float-range>,domain=<domain>]"``
|
||||
Adds random values picked from a normal distribution (with a mean of 0.0) to all data points of the targeted data representation.
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -177,7 +177,7 @@ Multi domain augmentations
|
||||
* **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram"
|
||||
|
||||
|
||||
**Multiply augmentation** ``--augment multiply[p=<float>,stddev=<float-range>,domain=<domain>]``
|
||||
**Multiply augmentation** ``--augment "multiply[p=<float>,stddev=<float-range>,domain=<domain>]"``
|
||||
Multiplies all data points of the targeted data representation with random values picked from a normal distribution (with a mean of 1.0).
|
||||
|
||||
* **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method
|
||||
@ -191,24 +191,22 @@ Example training with all augmentations:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python -u train.py \
|
||||
python -m coqui_stt_training.train \
|
||||
--train_files "train.sdb" \
|
||||
--feature_cache ./feature.cache \
|
||||
--cache_for_epochs 10 \
|
||||
--epochs 100 \
|
||||
--augment overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10] \
|
||||
--augment reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0] \
|
||||
--augment resample[p=0.1,rate=12000:8000~4000] \
|
||||
--augment codec[p=0.1,bitrate=48000:16000] \
|
||||
--augment volume[p=0.1,dbfs=-10:-40] \
|
||||
--augment pitch[p=0.1,pitch=1~0.2] \
|
||||
--augment tempo[p=0.1,factor=1~0.5] \
|
||||
--augment warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2] \
|
||||
--augment frequency_mask[p=0.1,n=1:3,size=1:5] \
|
||||
--augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \
|
||||
--augment dropout[p=0.1,rate=0.05] \
|
||||
--augment add[p=0.1,domain=signal,stddev=0~0.5] \
|
||||
--augment multiply[p=0.1,domain=features,stddev=0~0.5] \
|
||||
--augment "overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10]" \
|
||||
--augment "reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0]" \
|
||||
--augment "resample[p=0.1,rate=12000:8000~4000]" \
|
||||
--augment "codec[p=0.1,bitrate=48000:16000]" \
|
||||
--augment "volume[p=0.1,dbfs=-10:-40]" \
|
||||
--augment "pitch[p=0.1,pitch=1~0.2]" \
|
||||
--augment "tempo[p=0.1,factor=1~0.5]" \
|
||||
--augment "warp[p=0.1,nt=4,nf=1,wt=0.5:1.0,wf=0.1:0.2]" \
|
||||
--augment "frequency_mask[p=0.1,n=1:3,size=1:5]" \
|
||||
--augment "time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40]" \
|
||||
--augment "dropout[p=0.1,rate=0.05]" \
|
||||
--augment "add[p=0.1,domain=signal,stddev=0~0.5]" \
|
||||
--augment "multiply[p=0.1,domain=features,stddev=0~0.5]" \
|
||||
[...]
|
||||
|
||||
|
||||
@ -218,20 +216,20 @@ Example of playing all samples with reverberation and maximized volume:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
bin/play.py --augment reverb[p=0.1,delay=50.0,decay=2.0] --augment volume --random test.sdb
|
||||
bin/play.py --augment "reverb[p=0.1,delay=50.0,decay=2.0]" --augment volume --random test.sdb
|
||||
|
||||
Example simulation of the codec augmentation of a wav-file first at the beginning and then at the end of an epoch:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav
|
||||
bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav
|
||||
bin/play.py --augment "codec[p=0.1,bitrate=48000:16000]" --clock 0.0 test.wav
|
||||
bin/play.py --augment "codec[p=0.1,bitrate=48000:16000]" --clock 1.0 test.wav
|
||||
|
||||
Example of creating a pre-augmented test set:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
bin/data_set_tool.py \
|
||||
--augment overlay[source=noise.sdb,layers=1,snr=20~10] \
|
||||
--augment resample[rate=12000:8000~4000] \
|
||||
--augment "overlay[source=noise.sdb,layers=1,snr=20~10]" \
|
||||
--augment "resample[rate=12000:8000~4000]" \
|
||||
test.sdb test-augmented.sdb
|
||||
|
@ -76,7 +76,7 @@ You can now use Bazel to build the main 🐸STT library, ``libstt.so``. Add ``--
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libstt.so
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt="-D_GLIBCXX_USE_CXX11_ABI=0" //native_client:libstt.so
|
||||
|
||||
The generated binaries will be saved to ``bazel-bin/native_client/``.
|
||||
|
||||
@ -90,7 +90,7 @@ Using the example from above you can build the library and that binary at the sa
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libstt.so //native_client:generate_scorer_package
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt="-D_GLIBCXX_USE_CXX11_ABI=0" //native_client:libstt.so //native_client:generate_scorer_package
|
||||
|
||||
The generated binaries will be saved to ``bazel-bin/native_client/``.
|
||||
|
||||
@ -126,7 +126,7 @@ Included are a set of generated Python bindings. After following the above build
|
||||
make bindings
|
||||
pip install dist/stt-*
|
||||
|
||||
The API mirrors the C++ API and is demonstrated in `client.py <python/client.py>`_. Refer to `coqui-stt.h <coqui-stt.h>`_ for documentation.
|
||||
`Reference documentation <python-api>`_ is available for the Python bindings, as well as examples in the `STT-examples repository <https://github.com/coqui-ai/STT-examples>`_ and the `source code for the CLI tool installed alongside the Python bindings <py-api-example>`_.
|
||||
|
||||
Install NodeJS / ElectronJS bindings
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -186,22 +186,22 @@ Cross-building
|
||||
RPi3 ARMv7 and LePotato ARM64
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
We do support cross-compilation. Please refer to our ``coqui-ai/tensorflow`` fork, where we define the following ``--config`` flags:
|
||||
We support cross-compilation from Linux hosts. The following ``--config`` flags can be specified when building with bazel:
|
||||
|
||||
* ``--config=rpi3`` and ``--config=rpi3_opt`` for Raspbian / ARMv7
|
||||
* ``--config=rpi3-armv8`` and ``--config=rpi3-armv8_opt`` for ARMBian / ARM64
|
||||
* ``--config=rpi3_opt`` for Raspbian / ARMv7
|
||||
* ``--config=rpi3-armv8_opt`` for ARMBian / ARM64
|
||||
|
||||
So your command line for ``RPi3`` and ``ARMv7`` should look like:
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libstt.so
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=rpi3_opt //native_client:libstt.so
|
||||
|
||||
And your command line for ``LePotato`` and ``ARM64`` should look like:
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libstt.so
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=rpi3-armv8_opt //native_client:libstt.so
|
||||
|
||||
While we test only on RPi3 Raspbian Buster and LePotato ARMBian Buster, anything compatible with ``armv7-a cortex-a53`` or ``armv8-a cortex-a53`` should be fine.
|
||||
|
||||
@ -213,63 +213,61 @@ The path of the system tree can be overridden from the default values defined in
|
||||
cd ../STT/native_client
|
||||
make TARGET=<system> stt
|
||||
|
||||
Android devices support
|
||||
-----------------------
|
||||
RPi4 ARMv8 (Ubuntu 21.10)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
We have support for Android relying on TensorFlow Lite, with Java and JNI bindinds. For more details on how to experiment with those, please refer to the section below.
|
||||
We support cross-compilation from Linux hosts. The following ``--config`` flags can be specified when building with bazel:
|
||||
|
||||
Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required).
|
||||
* ``--config=rpi4ub-armv8_opt`` for Ubuntu / ARM64
|
||||
|
||||
Using the library from Android project
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Due to the discontinuation of Bintray JCenter we do not have pre-built Android packages published for now. We are working to move to Maven Central and will update this section when it's available.
|
||||
|
||||
.. We provide uptodate and tested ``libstt`` usable as an ``AAR`` package,
|
||||
for Android versions starting with 7.0 to 11.0. The package is published on
|
||||
`JCenter <https://bintray.com/coqui/ai.coqui.stt/libstt>`_,
|
||||
and the ``JCenter`` repository should be available by default in any Android
|
||||
project. Please make sure your project is setup to pull from this repository.
|
||||
You can then include the library by just adding this line to your
|
||||
``gradle.build``, adjusting ``VERSION`` to the version you need:
|
||||
|
||||
.. code-block::
|
||||
|
||||
implementation 'stt.coqui.ai:libstt:VERSION@aar'
|
||||
|
||||
Building ``libstt.so`` for Android
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can build the ``libstt.so`` using (ARMv7):
|
||||
Your command line should look like:
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=rpi4ub-armv8_opt //native_client:libstt.so
|
||||
|
||||
The ``stt`` binary can also be cross-built, with ``TARGET=rpi4ub-armv8``. This might require you to setup a system tree using the tool ``multistrap`` and the multistrap configuration file: ``native_client/multistrap-ubuntu64-impish.conf``.
|
||||
The path of the system tree can be overridden from the default values defined in ``definitions.mk`` through the ``RASPBIAN`` ``make`` variable.
|
||||
|
||||
.. code-block::
|
||||
|
||||
cd ../STT/native_client
|
||||
make TARGET=rpi4ub-armv8 stt
|
||||
|
||||
Building ``libstt.so`` for Android
|
||||
----------------------------------
|
||||
|
||||
Prerequisites
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Beyond the general prerequisites listed above, you'll also need the Android-specific dependencies for TensorFlow, namely you'll need to install the `Android SDK <https://developer.android.com>`_ and the `Android NDK version r18b <https://github.com/android/ndk/wiki/Unsupported-Downloads#r18b>`_. After that's done, export the environment variables ``ANDROID_SDK_HOME`` and ``ANDROID_NDK_HOME`` to the corresponding folders where the SDK and NDK were installed. Finally, configure the TensorFlow build and make sure you answer yes when the script asks if you want to set-up an Android build.
|
||||
|
||||
Then, you can build the ``libstt.so`` using (ARMv7):
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 //native_client:libstt.so
|
||||
|
||||
Or (ARM64):
|
||||
|
||||
.. code-block::
|
||||
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so
|
||||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 //native_client:libstt.so
|
||||
|
||||
Building ``libstt.aar``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the unlikely event you have to rebuild the JNI bindings, source code is
|
||||
available under the ``libstt`` subdirectory. Building depends on shared
|
||||
object: please ensure to place ``libstt.so`` into the
|
||||
``libstt/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` matching subdirectories.
|
||||
In order to build the JNI bindings, source code is available under the ``native_client/java/libstt`` directory. Building the AAR package requires having previously built ``libstt.so`` for all desired architectures and placed the corresponding binaries into the ``native_client/java/libstt/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` subdirectories. If you don't want to build the AAR package for all of ARM64, ARMv7 and x86_64, you can edit the ``native_client/java/libstt/gradle.properties`` file to remove unneeded architectures.
|
||||
|
||||
Building the bindings is managed by ``gradle`` and should be limited to issuing
|
||||
``./gradlew libstt:build``, producing an ``AAR`` package in
|
||||
``./libstt/build/outputs/aar/``.
|
||||
Building the bindings is managed by ``gradle`` and can be done by calling ``./gradlew libstt:build`` inside the ``native_client/java`` folder, producing an ``AAR`` package in
|
||||
``native_client/java/libstt/build/outputs/aar/``.
|
||||
|
||||
Please note that you might have to copy the file to a local Maven repository
|
||||
and adapt file naming (when missing, the error message should states what
|
||||
filename it expects and where).
|
||||
|
||||
Building C++ ``stt`` binary
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Building C++ ``stt`` binary for Android
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Building the ``stt`` binary will happen through ``ndk-build`` (ARMv7):
|
||||
|
||||
|
@ -1,8 +0,0 @@
|
||||
.. _byte-output-mode:
|
||||
|
||||
Training in byte output mode
|
||||
=============================
|
||||
|
||||
🐸STT includes a ``byte output mode`` which can be useful when working with languages with very large alphabets, such as Mandarin Chinese.
|
||||
|
||||
This training mode is experimental, and has only been used for Mandarin Chinese.
|
@ -1,4 +1,4 @@
|
||||
.. _c-usage:
|
||||
.. _c-api:
|
||||
|
||||
C API
|
||||
=====
|
||||
|
@ -13,8 +13,8 @@ Creating a model instance and loading model
|
||||
:start-after: sphinx-doc: c_ref_model_start
|
||||
:end-before: sphinx-doc: c_ref_model_stop
|
||||
|
||||
Deploying trained model
|
||||
-----------------------
|
||||
Transcribing audio with the loaded model
|
||||
----------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/client.cc
|
||||
:language: c
|
||||
|
@ -32,13 +32,13 @@ The CSV files contain the following fields:
|
||||
* ``wav_filesize`` - samples size given in bytes, used for sorting the data before training. Expects integer
|
||||
* ``transcript`` - transcription target for the sample
|
||||
|
||||
To use Common Voice data for training, validation and testing, you should pass the ``CSV`` filenames to ``train.py`` via ``--train_files``, ``--dev_files``, ``--test_files``.
|
||||
To use Common Voice data for training, validation and testing, you should pass the ``CSV`` filenames via ``--train_files``, ``--dev_files``, ``--test_files``.
|
||||
|
||||
For example, if you download, extracted, and imported the French language data from Common Voice, you will have a new local directory named ``fr``. You can train STT with this new French data as such:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
--train_files fr/clips/train.csv \
|
||||
--dev_files fr/clips/dev.csv \
|
||||
--test_files fr/clips/test.csv
|
||||
$ python -m coqui_stt_training.train \
|
||||
--train_files fr/clips/train.csv \
|
||||
--dev_files fr/clips/dev.csv \
|
||||
--test_files fr/clips/test.csv
|
||||
|
79
doc/Checkpoint-Inference.rst
Normal file
79
doc/Checkpoint-Inference.rst
Normal file
@ -0,0 +1,79 @@
|
||||
.. _checkpoint-inference:
|
||||
|
||||
Inference tools in the training package
|
||||
=======================================
|
||||
|
||||
The standard deployment options for 🐸STT use highly optimized packages for deployment in real time, single-stream, low latency use cases. They take as input exported models which are also optimized, leading to further space and runtime gains. On the other hand, for the development of new features, it might be easier to use the training code for prototyping, which will allow you to test your changes without needing to recompile source code.
|
||||
|
||||
The training package contains options for performing inference directly from a checkpoint (and optionally a scorer), without needing to export a model. They are documented below, and all require a working :ref:`training environment <intro-training-docs>` before they can be used. Additionally, they require the Python ``webrtcvad`` package to be installed. This can either be done by specifying the "transcribe" extra when installing the training package, or by installing it manually in your training environment:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install webrtcvad
|
||||
|
||||
Note that if your goal is to evaluate a trained model and obtain accuracy metrics, you should use the evaluation module: ``python -m coqui_stt_training.evaluate``, which calculates character and word error rates, from a properly formatted CSV file (specified with the ``--test_files`` flag. See the :ref:`training docs <intro-training-docs>` for more information).
|
||||
|
||||
Single file (aka one-shot) inference
|
||||
------------------------------------
|
||||
|
||||
This is the simplest way to perform inference from a checkpoint. It takes a single WAV file as input with the ``--one_shot_infer`` flag, and outputs the predicted transcription for that file.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m coqui_stt_training.training_graph_inference --checkpoint_dir coqui-stt-1.0.0-checkpoint --scorer_path huge-vocabulary.scorer --n_hidden 2048 --one_shot_infer audio/2830-3980-0043.wav
|
||||
I --alphabet_config_path not specified, but found an alphabet file alongside specified checkpoint (coqui-stt-1.0.0-checkpoint/alphabet.txt). Will use this alphabet file for this run.
|
||||
I Loading best validating checkpoint from coqui-stt-1.0.0-checkpoint/best_dev-3663881
|
||||
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/bias
|
||||
I Loading variable from checkpoint: cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel
|
||||
I Loading variable from checkpoint: layer_1/bias
|
||||
I Loading variable from checkpoint: layer_1/weights
|
||||
I Loading variable from checkpoint: layer_2/bias
|
||||
I Loading variable from checkpoint: layer_2/weights
|
||||
I Loading variable from checkpoint: layer_3/bias
|
||||
I Loading variable from checkpoint: layer_3/weights
|
||||
I Loading variable from checkpoint: layer_5/bias
|
||||
I Loading variable from checkpoint: layer_5/weights
|
||||
I Loading variable from checkpoint: layer_6/bias
|
||||
I Loading variable from checkpoint: layer_6/weights
|
||||
experience proves this
|
||||
|
||||
Transcription of longer audio files
|
||||
-----------------------------------
|
||||
|
||||
If you have longer audio files to transcribe, we offer a script which uses Voice Activity Detection (VAD) to split audio files in chunks and perform batched inference on said files. This can speed-up the transcription time significantly. The transcription script will also output the results in JSON format, allowing for easier programmatic usage of the outputs.
|
||||
|
||||
There are two main usage modes: transcribing a single file, or scanning a directory for audio files and transcribing all of them.
|
||||
|
||||
Transcribing a single file
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
For a single audio file, you can specify it directly in the ``--src`` flag of the ``python -m coqui_stt_training.transcribe`` script:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m coqui_stt_training.transcribe --checkpoint_dir coqui-stt-1.0.0-checkpoint --n_hidden 2048 --scorer_path huge-vocabulary.scorer --vad_aggressiveness 0 --src audio/2830-3980-0043.wav
|
||||
[1]: "audio/2830-3980-0043.wav" -> "audio/2830-3980-0043.tlog"
|
||||
Transcribing files: 100%|███████████████████████████████████| 1/1 [00:05<00:00, 5.40s/it]
|
||||
$ cat audio/2830-3980-0043.tlog
|
||||
[{"start": 150, "end": 1950, "transcript": "experience proves this"}]
|
||||
|
||||
Note the use of the ``--vad_aggressiveness`` flag above to control the behavior of the VAD process used to find silent sections of the audio file for splitting into chunks. You can run ``python -m coqui_stt_training.transcribe --help`` to see the full listing of options, the last ones are specific to the transcribe module.
|
||||
|
||||
By default the transcription results are put in a ``.tlog`` file next to the audio file that was transcribed, but you can specify a different location with the ``--dst path/to/some/file.tlog`` flag. This only works when trancribing a single file.
|
||||
|
||||
Scanning a directory for audio files
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Alternatively you can also specify a directory in the ``--src`` flag, in which case the directory will be scanned for any WAV files to be transcribed. If you specify ``--recursive true``, it'll scan the directory recursively, going into any subdirectories as well. Transcription results will be placed in a ``.tlog`` file alongside every audio file that was found by the process.
|
||||
|
||||
Multiple processes will be used to distribute the transcription work among available CPUs.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m coqui_stt_training.transcribe --checkpoint_dir coqui-stt-1.0.0-checkpoint --n_hidden 2048 --scorer_path huge-vocabulary.scorer --vad_aggressiveness 0 --src audio/ --recursive true
|
||||
Transcribing all files in --src directory audio
|
||||
Transcribing files: 0%| | 0/3 [00:00<?, ?it/s]
|
||||
[3]: "audio/8455-210777-0068.wav" -> "audio/8455-210777-0068.tlog"
|
||||
[1]: "audio/2830-3980-0043.wav" -> "audio/2830-3980-0043.tlog"
|
||||
[2]: "audio/4507-16021-0012.wav" -> "audio/4507-16021-0012.tlog"
|
||||
Transcribing files: 100%|███████████████████████████████████| 3/3 [00:07<00:00, 2.50s/it]
|
@ -1,14 +1,14 @@
|
||||
.. _decoder-docs:
|
||||
|
||||
CTC beam search decoder
|
||||
=======================
|
||||
Beam search decoder
|
||||
===================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
🐸STT uses the `Connectionist Temporal Classification <http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_ loss function. For an excellent explanation of CTC and its usage, see this Distill article: `Sequence Modeling with CTC <https://distill.pub/2017/ctc/>`_. This document assumes the reader is familiar with the concepts described in that article, and describes 🐸STT specific behaviors that developers building systems with 🐸STT should know to avoid problems.
|
||||
|
||||
Note: Documentation for the tooling for creating custom scorer packages is available in :ref:`language-model`.
|
||||
Note: Documentation for the tooling for creating custom scorer packages is available in :ref:`language-model`. Documentation for the coqui_stt_ctcdecoder Python package used by the training code for decoding is available in :ref:`decoder-api`.
|
||||
|
||||
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in `BCP 14 <https://tools.ietf.org/html/bcp14>`_ when, and only when, they appear in all capitals, as shown here.
|
||||
|
||||
|
@ -10,31 +10,37 @@ Introduction
|
||||
|
||||
Deployment is the process of feeding audio (speech) into a trained 🐸STT model and receiving text (transcription) as output. In practice you probably want to use two models for deployment: an audio model and a text model. The audio model (a.k.a. the acoustic model) is a deep neural network which converts audio into text. The text model (a.k.a. the language model / scorer) returns the likelihood of a string of text. If the acoustic model makes spelling or grammatical mistakes, the language model can help correct them.
|
||||
|
||||
You can deploy 🐸STT models either via a command-line client or a language binding. 🐸 provides three language bindings and one command line client. There also exist several community-maintained clients and language bindings, which are listed `further down in this README <#third-party-bindings>`_.
|
||||
|
||||
*Note that 🐸STT currently only provides packages for CPU deployment with Python 3.5 or higher on Linux. We're working to get the rest of our usually supported packages back up and running as soon as possible.*
|
||||
You can deploy 🐸STT models either via a command-line client or a language binding.
|
||||
|
||||
* :ref:`The Python package + language binding <py-usage>`
|
||||
* :ref:`The command-line client <cli-usage>`
|
||||
* :ref:`The native C API <c-usage>`
|
||||
* :ref:`The Node.JS package + language binding <nodejs-usage>`
|
||||
* :ref:`The .NET client + language binding <build-native-client-dotnet>`
|
||||
* :ref:`The Android libstt AAR package <android-usage>`
|
||||
* :ref:`The command-line client <cli-usage>`
|
||||
* :ref:`The C API <c-usage>`
|
||||
|
||||
In some use cases, you might want to use the inference facilities built into the training code, for example for faster prototyping of new features. They are not production-ready, but because it's all Python code you won't need to recompile in order to test code changes, which can be much faster. See :ref:`checkpoint-inference` for more details.
|
||||
|
||||
.. _download-models:
|
||||
|
||||
Download trained Coqui STT models
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can find pre-trained models ready for deployment on the 🐸STT `releases page <https://github.com/coqui-ai/STT/releases>`_. You can also download the latest acoustic model (``.pbmm``) and language model (``.scorer``) from the command line as such:
|
||||
You can find pre-trained models ready for deployment on the `Coqui Model Zoo <https://coqui.ai/models>`_. You can also use the 🐸STT Model Manager to download and try out the latest models:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.pbmm
|
||||
wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.scorer
|
||||
# Create a virtual environment
|
||||
$ python3 -m venv venv-stt
|
||||
$ source venv-stt/bin/activate
|
||||
|
||||
In every 🐸STT official release, there are several kinds of model files provided. For the acoustic model there are two file extensions: ``.pbmm`` and ``.tflite``. Files ending in ``.pbmm`` are compatible with clients and language bindings built against the standard TensorFlow runtime. ``.pbmm`` files are also compatible with CUDA enabled clients and language bindings. Files ending in ``.tflite``, on the other hand, are only compatible with clients and language bindings built against the `TensorFlow Lite runtime <https://www.tensorflow.org/lite/>`_. TFLite models are optimized for size and performance on low-power devices. You can find a full list of supported platforms and TensorFlow runtimes at :ref:`supported-platforms-deployment`.
|
||||
# Install 🐸STT model manager
|
||||
$ python -m pip install -U pip
|
||||
$ python -m pip install coqui-stt-model-manager
|
||||
|
||||
For language models, there is only only file extension: ``.scorer``. Language models can run on any supported device, regardless of Tensorflow runtime. You can read more about language models with regard to :ref:`the decoding process <decoder-docs>` and :ref:`how scorers are generated <language-model>`.
|
||||
# Run the model manager. A browser tab will open and you can then download and test models from the Model Zoo.
|
||||
$ stt-model-manager
|
||||
|
||||
In every 🐸STT official release, there are different model files provided. The acoustic model uses the ``.tflite`` extension. Language models use the extension ``.scorer``. You can read more about language models with regard to :ref:`the decoding process <decoder-docs>` and :ref:`how scorers are generated <language-model>`.
|
||||
|
||||
.. _model-data-match:
|
||||
|
||||
@ -51,7 +57,7 @@ How well a 🐸STT model transcribes your audio will depend on a lot of things.
|
||||
|
||||
If you take a 🐸STT model trained on English, and pass Spanish into it, you should expect the model to perform horribly. Imagine you have a friend who only speaks English, and you ask her to make Spanish subtitles for a Spanish film, you wouldn't expect to get good subtitles. This is an extreme example, but it helps to form an intuition for what to expect from 🐸STT models. Imagine that the 🐸STT models are like people who speak a certain language with a certain accent, and then think about what would happen if you asked that person to transcribe your audio.
|
||||
|
||||
An acoustic model (i.e. ``.pbmm`` or ``.tflite``) has "learned" how to transcribe a certain language, and the model probably understands some accents better than others. In addition to languages and accents, acoustic models are sensitive to the style of speech, the topic of speech, and the demographics of the person speaking. The language model (``.scorer``) has been trained on text alone. As such, the language model is sensitive to how well the topic and style of speech matches that of the text used in training. The 🐸STT `release notes <https://github.com/coqui-ai/STT/releases/tag/v0.9.3>`_ include detailed information on the data used to train the models. If the data used for training the off-the-shelf models does not align with your intended use case, it may be necessary to adapt or train new models in order to improve transcription on your data.
|
||||
An acoustic model (i.e. ``.tflite`` file) has "learned" how to transcribe a certain language, and the model probably understands some accents better than others. In addition to languages and accents, acoustic models are sensitive to the style of speech, the topic of speech, and the demographics of the person speaking. The language model (``.scorer``) has been trained on text alone. As such, the language model is sensitive to how well the topic and style of speech matches that of the text used in training. The 🐸STT `release notes <https://github.com/coqui-ai/STT/releases/latest>`_ include detailed information on the data used to train the models. If the data used for training the off-the-shelf models does not align with your intended use case, it may be necessary to adapt or train new models in order to improve transcription on your data.
|
||||
|
||||
Training your own language model is often a good way to improve transcription on your audio. The process and tools used to generate a language model are described in :ref:`language-model` and general information can be found in :ref:`decoder-docs`. Generating a scorer from a constrained topic dataset is a quick process and can bring significant accuracy improvements if your audio is from a specific topic.
|
||||
|
||||
@ -67,7 +73,7 @@ Model compatibility
|
||||
Using the Python package
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Pre-built binaries for deploying a trained model can be installed with ``pip``. It is highly recommended that you use Python 3.5 or higher in a virtual environment. Both `pip <https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/#installing-pip>`_ and `venv <https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment>`_ are included in normal Python 3 installations.
|
||||
Pre-built binaries for deploying a trained model can be installed with ``pip``. It is highly recommended that you use Python 3.6 or higher in a virtual environment. Both `pip <https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/#installing-pip>`_ and `venv <https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment>`_ are included in normal Python 3 installations.
|
||||
|
||||
When you create a new Python virtual environment, you create a directory containing a ``python`` binary and everything needed to run 🐸STT. For the purpose of this documentation, we will use on ``$HOME/coqui-stt-venv``, but you can use whatever directory you like.
|
||||
|
||||
@ -87,7 +93,7 @@ After your environment has been activated, you can use ``pip`` to install ``stt`
|
||||
|
||||
.. code-block::
|
||||
|
||||
(coqui-stt-venv)$ python3 -m pip install -U pip && python3 -m pip install stt
|
||||
(coqui-stt-venv)$ python -m pip install -U pip && python -m pip install stt
|
||||
|
||||
After installation has finished, you can call ``stt`` from the command-line.
|
||||
|
||||
@ -95,53 +101,10 @@ The following command assumes you :ref:`downloaded the pre-trained models <downl
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
(coqui-stt-venv)$ stt --model stt-0.9.3-models.pbmm --scorer stt-0.9.3-models.scorer --audio my_audio_file.wav
|
||||
(coqui-stt-venv)$ stt --model model.tflite --scorer huge-vocabulary.scorer --audio my_audio_file.wav
|
||||
|
||||
See :ref:`the Python client <py-api-example>` for an example of how to use the package programatically.
|
||||
|
||||
*GPUs will soon be supported:* If you have a supported NVIDIA GPU on Linux, you can install the GPU specific package as follows:
|
||||
|
||||
.. code-block::
|
||||
|
||||
(coqui-stt-venv)$ python3 -m pip install -U pip && python3 -m pip install stt-gpu
|
||||
|
||||
See the `release notes <https://github.com/coqui-ai/STT/releases>`_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_.
|
||||
|
||||
.. _cli-usage:
|
||||
|
||||
Using the command-line client
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To download the pre-built binaries for the ``stt`` command-line (compiled C++) client, use ``util/taskcluster.py``\ :
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 util/taskcluster.py --target .
|
||||
|
||||
or if you're on macOS:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 util/taskcluster.py --arch osx --target .
|
||||
|
||||
also, if you need some binaries different than current main branch, like ``v0.2.0-alpha.6``\ , you can use ``--branch``\ :
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 util/taskcluster.py --branch "v0.2.0-alpha.6" --target "."
|
||||
|
||||
The script ``taskcluster.py`` will download ``native_client.tar.xz`` (which includes the ``stt`` binary and associated libraries) and extract it into the current folder. ``taskcluster.py`` will download binaries for Linux/x86_64 by default, but you can override that behavior with the ``--arch`` parameter. See the help info with ``python3 util/taskcluster.py -h`` for more details. Specific branches of 🐸STT or TensorFlow can be specified as well.
|
||||
|
||||
Alternatively you may manually download the ``native_client.tar.xz`` from the `releases page <https://github.com/coqui-ai/STT/releases>`_.
|
||||
|
||||
Assuming you have :ref:`downloaded the pre-trained models <download-models>`, you can use the client as such:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./stt --model coqui-stt-0.9.3-models.pbmm --scorer coqui-stt-0.9.3-models.scorer --audio audio_input.wav
|
||||
|
||||
See the help output with ``./stt -h`` for more details.
|
||||
|
||||
.. _nodejs-usage:
|
||||
|
||||
Using the Node.JS / Electron.JS package
|
||||
@ -163,16 +126,56 @@ Please note that as of now, we support:
|
||||
|
||||
TypeScript support is also provided.
|
||||
|
||||
If you're using Linux and have a supported NVIDIA GPU, you can install the GPU specific package as follows:
|
||||
See the :ref:`TypeScript client <js-api-example>` for an example of how to use the bindings programatically.
|
||||
|
||||
.. _android-usage:
|
||||
|
||||
Using the Android AAR libstt package
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
A pre-built ``libstt`` Android AAR package can be downloaded from GitHub Releases, for Android versions 7.0+. In order to use it in your Android application, first modify your app's ``build.gradle`` file to add a local dir as a repository. In the ``repository`` section, add the following definition:
|
||||
|
||||
.. code-block:: groovy
|
||||
|
||||
repositories {
|
||||
flatDir {
|
||||
dirs 'libs'
|
||||
}
|
||||
}
|
||||
|
||||
Then, create a libs directory inside your app's folder, and place the libstt AAR file there. Finally, add the following dependency declaration in your app's ``build.gradle`` file:
|
||||
|
||||
.. code-block:: groovy
|
||||
|
||||
dependencies {
|
||||
implementation fileTree(dir: 'libs', include: ['*.aar'])
|
||||
}
|
||||
|
||||
This will link all .aar files in the ``libs`` directory you just created, including libstt.
|
||||
|
||||
.. _cli-usage:
|
||||
|
||||
Using the command-line client
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The pre-built binaries for the ``stt`` command-line (compiled C++) client are available in the ``native_client.*.tar.xz`` archive for your desired platform (where the * is the appropriate identifier for the platform you want to run on). You can download the archive from our `releases page <https://github.com/coqui-ai/STT/releases>`_.
|
||||
|
||||
Assuming you have :ref:`downloaded the pre-trained models <download-models>`, you can use the client as such:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
npm install stt-gpu
|
||||
./stt --model model.tflite --scorer huge-vocabulary.scorer --audio audio_input.wav
|
||||
|
||||
See the `release notes <https://github.com/coqui-ai/STT/releases>`_ to find which GPUs are supported. Please ensure you have the required `CUDA dependency <#cuda-dependency>`_.
|
||||
See the help output with ``./stt -h`` for more details.
|
||||
|
||||
See the :ref:`TypeScript client <js-api-example>` for an example of how to use the bindings programatically.
|
||||
.. _c-usage:
|
||||
|
||||
Using the C API
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
Alongside the pre-built binaries for the ``stt`` command-line client described :ref:`above <cli-usage>`, in the same ``native_client.*.tar.xz`` platform-specific archive, you'll find the ``coqui-stt.h`` header file as well as the pre-built shared libraries needed to use the 🐸STT C API. You can download the archive from our `releases page <https://github.com/coqui-ai/STT/releases>`_.
|
||||
|
||||
Then, simply include the header file and link against the shared libraries in your project, and you should be able to use the C API. Reference documentation is available in :ref:`c-api`.
|
||||
|
||||
Installing bindings from source
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -184,13 +187,24 @@ Dockerfile for building from source
|
||||
|
||||
We provide ``Dockerfile.build`` to automatically build ``libstt.so``, the C++ native client, Python bindings, and KenLM.
|
||||
|
||||
If you want to specify a different repository or branch, you can specify the ``STT_REPO`` or ``STT_SHA`` arguments:
|
||||
Before building, make sure that git submodules have been initialised:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
docker build . -f Dockerfile.build --build-arg STT_REPO=git://your/fork --build-arg STT_SHA=origin/your-branch
|
||||
git submodule sync
|
||||
git submodule update --init
|
||||
|
||||
.. _runtime-deps:
|
||||
Then build with:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
docker build . -f Dockerfile.build -t stt-image
|
||||
|
||||
You can then use stt inside the Docker container:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
docker run -it stt-image bash
|
||||
|
||||
|
||||
Runtime Dependencies
|
||||
@ -204,16 +218,8 @@ Running ``stt`` may require runtime dependencies. Please refer to your system's
|
||||
* ``libpthread`` - Reported dependency on Linux. On Ubuntu, ``libpthread`` is part of the ``libpthread-stubs0-dev`` package
|
||||
* ``Redistribuable Visual C++ 2015 Update 3 (64-bits)`` - Reported dependency on Windows. Please `download from Microsoft <https://www.microsoft.com/download/details.aspx?id=53587>`_
|
||||
|
||||
CUDA Dependency
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
The GPU capable builds (Python, NodeJS, C++, etc) depend on CUDA 10.1 and CuDNN v7.6.
|
||||
|
||||
.. _cuda-inference-deps:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Supported Platforms
|
||||
|
||||
SUPPORTED_PLATFORMS
|
||||
|
||||
|
7
doc/Decoder-API.rst
Normal file
7
doc/Decoder-API.rst
Normal file
@ -0,0 +1,7 @@
|
||||
.. _decoder-api:
|
||||
|
||||
Decoder API reference
|
||||
=====================
|
||||
|
||||
.. automodule:: native_client.ctcdecode
|
||||
:members:
|
@ -13,8 +13,8 @@ Creating a model instance and loading model
|
||||
:start-after: sphinx-doc: csharp_ref_model_start
|
||||
:end-before: sphinx-doc: csharp_ref_model_stop
|
||||
|
||||
Deploying trained model
|
||||
-----------------------
|
||||
Transcribing audio with the loaded model
|
||||
----------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs
|
||||
:language: csharp
|
||||
|
@ -3,54 +3,17 @@
|
||||
Exporting a model for deployment
|
||||
================================
|
||||
|
||||
After you train a STT model, your model will be stored on disk as a :ref:`checkpoint file <checkpointing>`. Model checkpoints are useful for resuming training at a later date, but they are not the correct format for deploying a model into production. The best model format for deployment is a protobuf file.
|
||||
After you train a STT model, your model will be stored on disk as a :ref:`checkpoint file <checkpointing>`. Model checkpoints are useful for resuming training at a later date, but they are not the correct format for deploying a model into production. The model format for deployment is a TFLite file.
|
||||
|
||||
This document explains how to export model checkpoints as a protobuf file.
|
||||
This document explains how to export model checkpoints as a TFLite file.
|
||||
|
||||
How to export a model
|
||||
---------------------
|
||||
|
||||
The simplest way to export STT model checkpoints for deployment is via ``train.py`` and the ``--export_dir`` flag.
|
||||
You can export STT model checkpoints for deployment by using the export script and the ``--export_dir`` flag.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
--checkpoint_dir path/to/existing/model/checkpoints \
|
||||
--export_dir where/to/export/new/protobuf
|
||||
|
||||
However, you may want to export a model for small devices or for more efficient memory usage. In this case, follow the steps below.
|
||||
|
||||
Exporting as memory-mapped
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
By default, the protobuf exported by ``train.py`` will be loaded in memory every time the model is deployed. This results in extra loading time and memory consumption. Creating a memory-mapped protobuf file will avoid these issues.
|
||||
|
||||
First, export your checkpoints to a protobuf with ``train.py``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
--checkpoint_dir path/to/existing/model/checkpoints \
|
||||
--export_dir where/to/export/new/protobuf
|
||||
|
||||
Second, convert the protobuf to a memory-mapped protobuf with ``convert_graphdef_memmapped_format``:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ convert_graphdef_memmapped_format \
|
||||
--in_graph=output_graph.pb \
|
||||
--out_graph=output_graph.pbmm
|
||||
|
||||
``convert_graphdef_memmapped_format`` is a dedicated tool to convert regular protobuf files to memory-mapped protobufs. You can find this tool pre-compiled on the STT `release page <https://github.com/coqui-ai/STT/releases>`_. You should download and decompress ``convert_graphdef_memmapped_format`` before use. Upon a sucessful conversion ``convert_graphdef_memmapped_format`` will report conversion of a non-zero number of nodes.
|
||||
|
||||
Exporting for small devices
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you want to deploy a STT model on a small device, you might consider exporting the model with `Tensorflow Lite <https://www.tensorflow.org/lite>`_ support. Export STT model checkpoints for Tensorflow Lite via ``train.py`` and the ``--export_tflite`` flag.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
--checkpoint_dir path/to/existing/model/checkpoints \
|
||||
--export_dir where/to/export/new/protobuf \
|
||||
--export_tflite
|
||||
$ python3 -m coqui_stt_training.export \
|
||||
--checkpoint_dir path/to/existing/model/checkpoints \
|
||||
--export_dir where/to/export/model
|
||||
|
@ -13,8 +13,8 @@ Creating a model instance and loading model
|
||||
:start-after: sphinx-doc: java_ref_model_start
|
||||
:end-before: sphinx-doc: java_ref_model_stop
|
||||
|
||||
Deploying trained model
|
||||
-----------------------
|
||||
Transcribing audio with the loaded model
|
||||
----------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/sttexampleapp/STTActivity.java
|
||||
:language: java
|
||||
|
@ -49,7 +49,7 @@ For more custom use cases, you might familiarize yourself with the `KenLM toolki
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 generate_lm.py \
|
||||
python generate_lm.py \
|
||||
--input_txt librispeech-lm-norm.txt.gz \
|
||||
--output_dir . \
|
||||
--top_k 500000 \
|
||||
|
@ -5,14 +5,14 @@ Automatic Mixed Precision
|
||||
|
||||
Training with `automatic mixed precision <https://medium.com/tensorflow/automatic-mixed-precision-in-tensorflow-for-faster-ai-training-on-nvidia-gpus-6033234b2540>`_ is available when training STT on an GPU.
|
||||
|
||||
Mixed precision training makes use of both ``FP32`` and ``FP16`` precisions where appropriate. ``FP16`` operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput. Mixed precision training often allows larger batch sizes. Automatic mixed precision training can be enabled by including the flag `--automatic_mixed_precision` at training time:
|
||||
Mixed precision training makes use of both ``FP32`` and ``FP16`` precisions where appropriate. ``FP16`` operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput. Mixed precision training often allows larger batch sizes. Automatic mixed precision training can be enabled by including the flag ``--automatic_mixed_precision true`` at training time:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
$ python -m coqui_stt_training.train \
|
||||
--train_files train.csv \
|
||||
--dev_files dev.csv \
|
||||
--test_files test.csv \
|
||||
--automatic_mixed_precision
|
||||
--dev_files dev.csv \
|
||||
--test_files test.csv \
|
||||
--automatic_mixed_precision true
|
||||
|
||||
On a Volta generation V100 GPU, automatic mixed precision can speed up 🐸STT training and evaluation by approximately 30% to 40%.
|
||||
|
@ -15,8 +15,8 @@ Creating a model instance and loading model
|
||||
:start-after: sphinx-doc: js_ref_model_start
|
||||
:end-before: sphinx-doc: js_ref_model_stop
|
||||
|
||||
Deploying trained model
|
||||
-----------------------
|
||||
Transcribing audio with the loaded model
|
||||
----------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/javascript/client.ts
|
||||
:language: javascript
|
||||
|
@ -1,3 +1,5 @@
|
||||
.. _python-api:
|
||||
|
||||
Python
|
||||
======
|
||||
|
||||
|
@ -15,8 +15,8 @@ Creating a model instance and loading model
|
||||
:start-after: sphinx-doc: python_ref_model_start
|
||||
:end-before: sphinx-doc: python_ref_model_stop
|
||||
|
||||
Deploying trained model
|
||||
-----------------------
|
||||
Transcribing audio with the loaded model
|
||||
----------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/python/client.py
|
||||
:language: python
|
||||
|
@ -5,67 +5,46 @@ Supported platforms
|
||||
|
||||
Here we maintain the list of supported platforms for deployment.
|
||||
|
||||
*Note that 🐸STT currently only provides packages for CPU deployment with Python 3.5 or higher on Linux. We're working to get the rest of our usually supported packages back up and running as soon as possible.*
|
||||
|
||||
Linux / AMD64 without GPU
|
||||
Linux / AMD64
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down performance)
|
||||
* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Linux / AMD64 with GPU
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down performance)
|
||||
* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8)
|
||||
* CUDA 10.0 (and capable GPU)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* glibc >= 2.24, libstdc++6 >= 6.3
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
Linux / ARMv7
|
||||
^^^^^^^^^^^^^
|
||||
* Cortex-A53 compatible ARMv7 SoC with Neon support
|
||||
* Raspbian Buster-compatible distribution
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
Linux / Aarch64
|
||||
^^^^^^^^^^^^^^^
|
||||
* Cortex-A72 compatible Aarch64 SoC
|
||||
* ARMbian Buster-compatible distribution
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
Android / ARMv7
|
||||
^^^^^^^^^^^^^^^
|
||||
* ARMv7 SoC with Neon support
|
||||
* Android 7.0-10.0
|
||||
* NDK API level >= 21
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
Android / Aarch64
|
||||
^^^^^^^^^^^^^^^^^
|
||||
* Aarch64 SoC
|
||||
* Android 7.0-10.0
|
||||
* NDK API level >= 21
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
macOS / AMD64
|
||||
^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down performance)
|
||||
* macOS >= 10.10
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
||||
Windows / AMD64 without GPU
|
||||
Windows / AMD64
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down performance)
|
||||
* Windows Server >= 2012 R2 ; Windows >= 8.1
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Windows / AMD64 with GPU
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down performance)
|
||||
* Windows Server >= 2012 R2 ; Windows >= 8.1
|
||||
* CUDA 10.0 (and capable GPU)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
* TensorFlow Lite runtime
|
||||
|
@ -5,15 +5,27 @@ Training: Advanced Topics
|
||||
|
||||
This document contains more advanced topics with regard to training models with STT. If you'd prefer a lighter introduction, please refer to :ref:`Training: Quickstart<intro-training-docs>`.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
1. :ref:`training-flags`
|
||||
2. :ref:`transfer-learning`
|
||||
3. :ref:`automatic-mixed-precision`
|
||||
4. :ref:`checkpointing`
|
||||
5. :ref:`common-voice-data`
|
||||
6. :ref:`training-data-augmentation`
|
||||
7. :ref:`exporting-checkpoints`
|
||||
8. :ref:`model-geometry`
|
||||
9. :ref:`parallel-training-optimization`
|
||||
10. :ref:`data-importers`
|
||||
11. :ref:`byte-output-mode`
|
||||
TRAINING_FLAGS
|
||||
|
||||
TRANSFER_LEARNING
|
||||
|
||||
MIXED_PRECISION
|
||||
|
||||
CHECKPOINTING
|
||||
|
||||
COMMON_VOICE_DATA
|
||||
|
||||
AUGMENTATION
|
||||
|
||||
EXPORTING_MODELS
|
||||
|
||||
Geometry
|
||||
|
||||
PARALLLEL_OPTIMIZATION
|
||||
|
||||
DATASET_IMPORTERS
|
||||
|
||||
Checkpoint-Inference
|
||||
|
@ -3,14 +3,12 @@
|
||||
Command-line flags for the training scripts
|
||||
===========================================
|
||||
|
||||
Below you can find the definition of all command-line flags supported by the training scripts. This includes ``train.py``, ``evaluate.py``, ``evaluate_tflite.py``, ``transcribe.py`` and ``lm_optimizer.py``.
|
||||
Below you can find the definition of all command-line flags supported by the training modules. This includes the modules ``coqui_stt_training.train``, ``coqui_stt_training.evaluate``, ``coqui_stt_training.export``, ``coqui_stt_training.training_graph_inference``, and the scripts ``evaluate_tflite.py``, ``transcribe.py`` and ``lm_optimizer.py``.
|
||||
|
||||
Flags
|
||||
-----
|
||||
|
||||
.. literalinclude:: ../training/coqui_stt_training/util/flags.py
|
||||
.. literalinclude:: ../training/coqui_stt_training/util/config.py
|
||||
:language: python
|
||||
:linenos:
|
||||
:lineno-match:
|
||||
:start-after: sphinx-doc: training_ref_flags_start
|
||||
:end-before: sphinx-doc: training_ref_flags_end
|
||||
|
@ -41,18 +41,18 @@ If you don't want to use our Dockerfile template, you will need to manually inst
|
||||
Prerequisites
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* `Python 3.6 <https://www.python.org/>`_
|
||||
* `Python 3.6, 3.7 or 3.8 <https://www.python.org/>`_
|
||||
* Mac or Linux environment (training on Windows is *not* currently supported)
|
||||
* CUDA 10.0 and CuDNN v7.6
|
||||
|
||||
Download
|
||||
^^^^^^^^
|
||||
|
||||
We recommened that you clone the STT repo from the latest stable release branch on Github (e.g. ``v0.9.3``). You can find all 🐸STT releases `here <https://github.com/coqui-ai/STT/releases>`_).
|
||||
Clone the STT repo from GitHub:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ git clone --branch v0.9.3 --depth 1 https://github.com/coqui-ai/STT
|
||||
$ git clone https://github.com/coqui-ai/STT
|
||||
|
||||
Installation
|
||||
^^^^^^^^^^^^
|
||||
@ -86,23 +86,17 @@ Now that we have cloned the STT repo from Github and setup a virtual environment
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd STT
|
||||
$ python3 -m pip install --upgrade pip wheel setuptools
|
||||
$ python3 -m pip install --upgrade -e .
|
||||
|
||||
The ``webrtcvad`` package may additionally require ``python3-dev``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ sudo apt-get install python3-dev
|
||||
$ python -m pip install --upgrade pip wheel setuptools
|
||||
$ python -m pip install --upgrade -e .
|
||||
|
||||
If you have an NVIDIA GPU, it is highly recommended to install TensorFlow with GPU support. Training will be significantly faster than using the CPU.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 -m pip uninstall tensorflow
|
||||
$ python3 -m pip install 'tensorflow-gpu==1.15.4'
|
||||
$ python -m pip uninstall tensorflow
|
||||
$ python -m pip install 'tensorflow-gpu==1.15.4'
|
||||
|
||||
Please ensure you have the required `CUDA dependency <https://www.tensorflow.org/install/source#gpu>`_ and :ref:`prerequisites <training-deps>`.
|
||||
Please ensure you have the required :ref:`prerequisites <training-deps>` and a working CUDA installation with the versions listed above.
|
||||
|
||||
Verify Install
|
||||
""""""""""""""
|
||||
@ -118,12 +112,12 @@ This script will train a model on a single audio file. If the script exits succe
|
||||
Training on your own Data
|
||||
-------------------------
|
||||
|
||||
Whether you used our Dockerfile template or you set up your own environment, the central STT training script is ``train.py``. For a list of command line options, use the ``--helpfull`` flag:
|
||||
Whether you used our Dockerfile template or you set up your own environment, the central STT training module is ``python -m coqui_stt_training.train``. For a list of command line options, use the ``--help`` flag:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd STT
|
||||
$ python3 train.py --helpfull
|
||||
$ python -m coqui_stt_training.train --help
|
||||
|
||||
Training Data
|
||||
^^^^^^^^^^^^^
|
||||
@ -143,12 +137,18 @@ Text transcripts should be formatted exactly as the transcripts you expect your
|
||||
CSV file format
|
||||
"""""""""""""""
|
||||
|
||||
The audio and transcripts used in training are passed to ``train.py`` via CSV files. You should supply CSV files for training (``train.csv``), development (``dev.csv``), and testing (``test.csv``). The CSV files should contain three columns:
|
||||
The audio and transcripts used in training are specified via CSV files. You should supply CSV files for training (``train.csv``), validation (``dev.csv``), and testing (``test.csv``). The CSV files should contain three columns:
|
||||
|
||||
1. ``wav_filename`` - the path to a WAV file on your machine
|
||||
2. ``wav_filesize`` - the number of bytes in the WAV file
|
||||
3. ``transcript`` - the text transcript of the WAV file
|
||||
|
||||
Alternatively, if you don't have pre-defined splits for training, validation and testing, you can use the ``--auto_input_dataset`` flag to automatically split a single CSV into subsets and generate an alphabet automatically:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m coqui_stt_training.train --auto_input_dataset samples.csv
|
||||
|
||||
Start Training
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
@ -157,11 +157,11 @@ After you've successfully installed STT and have access to data, you can start a
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd STT
|
||||
$ python3 train.py --train_files train.csv --dev_files dev.csv --test_files test.csv
|
||||
$ python -m coqui_stt_training.train --train_files train.csv --dev_files dev.csv --test_files test.csv
|
||||
|
||||
Next Steps
|
||||
----------
|
||||
|
||||
You will want to customize the settings of ``train.py`` to work better with your data and your hardware. You should review the :ref:`command-line training flags <training-flags>`, and experiment with different settings.
|
||||
You will want to customize the training settings to work better with your data and your hardware. You should review the :ref:`command-line training flags <training-flags>`, and experiment with different settings.
|
||||
|
||||
For more in-depth training documentation, you should refer to the :ref:`Advanced Training Topics <advanced-training-docs>` section.
|
||||
|
@ -14,17 +14,17 @@ If your own data uses the *extact* same alphabet as the English release model (i
|
||||
Fine-Tuning (same alphabet)
|
||||
---------------------------
|
||||
|
||||
You can fine-tune pre-trained model checkpoints by using the ``--checkpoint_dir`` flag in ``train.py``. Specify the path to the checkpoints, and training will resume from the pre-trained model.
|
||||
You can fine-tune pre-trained model checkpoints by using the ``--checkpoint_dir`` flag. Specify the path to the checkpoints, and training will resume from the pre-trained model.
|
||||
|
||||
For example, if you want to fine tune existing checkpoints to your own data in ``my-train.csv``, ``my-dev.csv``, and ``my-test.csv``, you can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python3 train.py \
|
||||
--checkpoint_dir path/to/checkpoint/folder \
|
||||
--train_files my-train.csv \
|
||||
--dev_files my-dev.csv \
|
||||
--test_files my_test.csv
|
||||
$ python -m coqui_stt_training.train \
|
||||
--checkpoint_dir path/to/checkpoint/folder \
|
||||
--train_files my-train.csv \
|
||||
--dev_files my-dev.csv \
|
||||
--test_files my_test.csv
|
||||
|
||||
Transfer-Learning (new alphabet)
|
||||
--------------------------------
|
||||
@ -39,12 +39,12 @@ You need to specify the location of the pre-trained model with ``--load_checkpoi
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 train.py \
|
||||
python -m coqui_stt_training.train \
|
||||
--drop_source_layers 1 \
|
||||
--alphabet_config_path my-alphabet.txt \
|
||||
--save_checkpoint_dir path/to/output-checkpoint/folder \
|
||||
--load_checkpoint_dir path/to/input-checkpoint/folder \
|
||||
--train_files my-new-language-train.csv \
|
||||
--train_files my-new-language-train.csv \
|
||||
--dev_files my-new-language-dev.csv \
|
||||
--test_files my-new-language-test.csv
|
||||
|
||||
|
16
doc/conf.py
16
doc/conf.py
@ -24,7 +24,8 @@ import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../"))
|
||||
|
||||
autodoc_mock_imports = ["stt"]
|
||||
autodoc_mock_imports = ["stt", "native_client.ctcdecode.swigwrapper"]
|
||||
autodoc_member_order = "bysource"
|
||||
|
||||
# This is in fact only relevant on ReadTheDocs, but we want to run the same way
|
||||
# on our CI as in RTD to avoid regressions on RTD that we would not catch on CI
|
||||
@ -128,7 +129,6 @@ todo_include_todos = False
|
||||
|
||||
add_module_names = False
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
@ -136,10 +136,20 @@ add_module_names = False
|
||||
#
|
||||
html_theme = "furo"
|
||||
|
||||
html_theme_options = {
|
||||
"light_logo": "coqui-STT-circle.png",
|
||||
"dark_logo": "coqui-STT-circle.png",
|
||||
"sidebar_hide_name": True,
|
||||
}
|
||||
|
||||
html_css_files = [
|
||||
"custom.css",
|
||||
]
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = [".static"]
|
||||
html_static_path = ["static"]
|
||||
|
||||
|
||||
# -- Options for HTMLHelp output ------------------------------------------
|
||||
|
@ -3,8 +3,8 @@
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Coqui STT
|
||||
=========
|
||||
.. image:: https://raw.githubusercontent.com/coqui-ai/STT/main/images/coqui-STT-logo-green.png
|
||||
:alt: Coqui STT logo and wordmark
|
||||
|
||||
**Coqui STT** (🐸STT) is an open-source deep-learning toolkit for training and deploying speech-to-text models.
|
||||
|
||||
@ -19,12 +19,14 @@ Coqui STT
|
||||
|
||||
TRAINING_INTRO
|
||||
|
||||
TRAINING_ADVANCED
|
||||
|
||||
BUILDING
|
||||
|
||||
Quickstart: Deployment
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
Quickstart
|
||||
^^^^^^^^^^
|
||||
|
||||
The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python 3.5 or higher (*Note - only Linux supported at this time. We are working to get our normally supported packages back up and running.*):
|
||||
The fastest way to use a pre-trained 🐸STT model is with the 🐸STT model manager, a tool that lets you quickly test and demo models locally. You'll need Python 3.6, 3.7, 3.8 or 3.9:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@ -32,20 +34,12 @@ The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python
|
||||
$ python3 -m venv venv-stt
|
||||
$ source venv-stt/bin/activate
|
||||
|
||||
# Install 🐸STT
|
||||
$ python3 -m pip install -U pip
|
||||
$ python3 -m pip install stt
|
||||
# Install 🐸STT model manager
|
||||
$ python -m pip install -U pip
|
||||
$ python -m pip install coqui-stt-model-manager
|
||||
|
||||
# Download 🐸's pre-trained English models
|
||||
$ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.pbmm
|
||||
$ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.scorer
|
||||
|
||||
# Download some example audio files
|
||||
$ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/audio-0.9.3.tar.gz
|
||||
$ tar -xvf audio-0.9.3.tar.gz
|
||||
|
||||
# Transcribe an audio file
|
||||
$ stt --model coqui-stt-0.9.3-models.pbmm --scorer coqui-stt-0.9.3-models.scorer --audio audio/2830-3980-0043.wav
|
||||
# Run the model manager. A browser tab will open and you can then download and test models from the Model Zoo.
|
||||
$ stt-model-manager
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
@ -95,6 +89,17 @@ The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python
|
||||
|
||||
playbook/README
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Advanced topics
|
||||
|
||||
DECODER
|
||||
|
||||
Decoder-API
|
||||
|
||||
PARALLEL_OPTIMIZATION
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
|
@ -39,7 +39,7 @@ Numbers should be written in full (ie as a [cardinal](https://en.wikipedia.org/w
|
||||
|
||||
### Data from Common Voice
|
||||
|
||||
If you are using data from Common Voice for training a model, you will need to prepare it as [outlined in the 🐸STT documentation](https://stt.readthedocs.io/en/latest/TRAINING.html#common-voice-training-data).
|
||||
If you are using data from Common Voice for training a model, you will need to prepare it as [outlined in the 🐸STT documentation](https://stt.readthedocs.io/en/latest/COMMON_VOICE_DATA.html#common-voice-data).
|
||||
|
||||
In this example we will prepare the Indonesian dataset for training, but you can use any language from Common Voice that you prefer. We've chosen Indonesian as it has the same [orthographic alphabet](ALPHABET.md) as English, which means we don't have to use a different `alphabet.txt` file for training; we can use the default.
|
||||
|
||||
|
@ -219,12 +219,12 @@ Next, we need to install the `native_client` package, which contains the `genera
|
||||
|
||||
The `generate_scorer_package`, once installed via the `native client` package, is usable on _all platforms_ supported by 🐸STT. This is so that developers can generate scorers _on-device_, such as on an Android device, or Raspberry Pi 3.
|
||||
|
||||
To install `generate_scorer_package`, first download the relevant `native client` package from the [🐸STT GitHub releases page](https://github.com/coqui-ai/STT/releases/tag/v0.9.3) into the `data/lm` directory. The Docker image uses Ubuntu Linux, so you should use either the `native_client.amd64.cuda.linux.tar.xz` package if you are using `cuda` or the `native_client.amd64.cpu.linux.tar.xz` package if not.
|
||||
To install `generate_scorer_package`, first download the relevant `native client` package from the [🐸STT GitHub releases page](https://github.com/coqui-ai/STT/releases/latest) into the `data/lm` directory. The Docker image uses Ubuntu Linux, so you should use either the `native_client.amd64.cuda.linux.tar.xz` package if you are using `cuda` or the `native_client.amd64.cpu.linux.tar.xz` package if not.
|
||||
|
||||
The easiest way to download the package and extract it is using `curl [URL] | tar -Jxvf [FILENAME]`:
|
||||
The easiest way to download the package and extract it is using `curl -L [URL] | tar -Jxvf [FILENAME]`:
|
||||
|
||||
```
|
||||
root@dcb62aada58b:/STT/data/lm# curl https://github.com/coqui-ai/STT/releases/download/v0.9.3/native_client.amd64.cuda.linux.tar.xz | tar -Jxvf native_client.amd64.cuda.linux.tar.xz
|
||||
root@dcb62aada58b:/STT/data/lm# curl -L https://github.com/coqui-ai/STT/releases/download/v1.0.0/native_client.tflite.Linux.tar.xz | tar -Jxvf -
|
||||
libstt.so
|
||||
generate_scorer_package
|
||||
LICENSE
|
||||
@ -233,7 +233,7 @@ coqui-stt.h
|
||||
README.coqui
|
||||
```
|
||||
|
||||
You can now generate a `ken.lm` scorer file.
|
||||
You can now generate a KenLM scorer file.
|
||||
|
||||
```
|
||||
root@dcb62aada58b:/STT/data/lm# ./generate_scorer_package \
|
||||
|
@ -4,6 +4,7 @@ sphinx==3.5.2
|
||||
sphinx-js==3.1
|
||||
furo==2021.2.28b28
|
||||
pygments==2.7.4
|
||||
docutils>=0.12,<=0.17.1
|
||||
#FIXME: switch to stable after C# changes have been merged: https://github.com/djungelorm/sphinx-csharp/pull/8
|
||||
git+https://github.com/reuben/sphinx-csharp.git@9dc6202f558e3d3fa14ec7f5f1e36a8e66e6d622
|
||||
recommonmark==0.7.1
|
||||
|
BIN
doc/static/coqui-STT-circle.png
vendored
Normal file
BIN
doc/static/coqui-STT-circle.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 38 KiB |
3
doc/static/custom.css
vendored
Normal file
3
doc/static/custom.css
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
#flags pre, #inference-tools-in-the-training-package pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
@ -10,10 +10,9 @@ import wave
|
||||
from functools import partial
|
||||
from multiprocessing import JoinableQueue, Manager, Process, cpu_count
|
||||
|
||||
import absl.app
|
||||
import numpy as np
|
||||
from coqui_stt_training.util.evaluate_tools import calculate_and_print_report
|
||||
from coqui_stt_training.util.flags import create_flags
|
||||
from coqui_stt_training.util.config import Config, initialize_globals_from_args
|
||||
from six.moves import range, zip
|
||||
from stt import Model
|
||||
|
||||
@ -61,6 +60,7 @@ def tflite_worker(model, scorer, queue_in, queue_out, gpu_mask):
|
||||
|
||||
|
||||
def main(args):
|
||||
initialize_globals_from_args()
|
||||
manager = Manager()
|
||||
work_todo = JoinableQueue() # this is where we are going to store input data
|
||||
work_done = manager.Queue() # this where we are gonna push them out
|
||||
|
@ -4,36 +4,36 @@ from __future__ import absolute_import, print_function
|
||||
|
||||
import sys
|
||||
|
||||
import absl.app
|
||||
import optuna
|
||||
import tensorflow.compat.v1 as tfv1
|
||||
from coqui_stt_ctcdecoder import Scorer
|
||||
from coqui_stt_training.evaluate import evaluate
|
||||
from coqui_stt_training.train import create_model
|
||||
from coqui_stt_training.util.config import Config, initialize_globals_from_cli
|
||||
from coqui_stt_training.train import create_model, early_training_checks
|
||||
from coqui_stt_training.util.config import (
|
||||
Config,
|
||||
initialize_globals_from_cli,
|
||||
log_error,
|
||||
)
|
||||
from coqui_stt_training.util.evaluate_tools import wer_cer_batch
|
||||
from coqui_stt_training.util.flags import FLAGS, create_flags
|
||||
from coqui_stt_training.util.logging import log_error
|
||||
|
||||
|
||||
def character_based():
|
||||
is_character_based = False
|
||||
if FLAGS.scorer_path:
|
||||
scorer = Scorer(
|
||||
FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet
|
||||
)
|
||||
is_character_based = scorer.is_utf8_mode()
|
||||
scorer = Scorer(
|
||||
Config.lm_alpha, Config.lm_beta, Config.scorer_path, Config.alphabet
|
||||
)
|
||||
is_character_based = scorer.is_utf8_mode()
|
||||
return is_character_based
|
||||
|
||||
|
||||
def objective(trial):
|
||||
FLAGS.lm_alpha = trial.suggest_uniform("lm_alpha", 0, FLAGS.lm_alpha_max)
|
||||
FLAGS.lm_beta = trial.suggest_uniform("lm_beta", 0, FLAGS.lm_beta_max)
|
||||
Config.lm_alpha = trial.suggest_uniform("lm_alpha", 0, Config.lm_alpha_max)
|
||||
Config.lm_beta = trial.suggest_uniform("lm_beta", 0, Config.lm_beta_max)
|
||||
|
||||
is_character_based = trial.study.user_attrs["is_character_based"]
|
||||
|
||||
samples = []
|
||||
for step, test_file in enumerate(FLAGS.test_files.split(",")):
|
||||
for step, test_file in enumerate(Config.test_files):
|
||||
tfv1.reset_default_graph()
|
||||
|
||||
current_samples = evaluate([test_file], create_model)
|
||||
@ -51,10 +51,18 @@ def objective(trial):
|
||||
return cer if is_character_based else wer
|
||||
|
||||
|
||||
def main(_):
|
||||
def main():
|
||||
initialize_globals_from_cli()
|
||||
early_training_checks()
|
||||
|
||||
if not FLAGS.test_files:
|
||||
if not Config.scorer_path:
|
||||
log_error(
|
||||
"Missing --scorer_path: can't optimize scorer alpha and beta "
|
||||
"parameters without a scorer!"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if not Config.test_files:
|
||||
log_error(
|
||||
"You need to specify what files to use for evaluation via "
|
||||
"the --test_files flag."
|
||||
@ -65,7 +73,7 @@ def main(_):
|
||||
|
||||
study = optuna.create_study()
|
||||
study.set_user_attr("is_character_based", is_character_based)
|
||||
study.optimize(objective, n_jobs=1, n_trials=FLAGS.n_trials)
|
||||
study.optimize(objective, n_jobs=1, n_trials=Config.n_trials)
|
||||
print(
|
||||
"Best params: lm_alpha={} and lm_beta={} with WER={}".format(
|
||||
study.best_params["lm_alpha"],
|
||||
@ -76,5 +84,4 @@ def main(_):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_flags()
|
||||
absl.app.run(main)
|
||||
main()
|
||||
|
@ -5,10 +5,25 @@ LOCAL_MODULE := stt-prebuilt
|
||||
LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libstt.so
|
||||
include $(PREBUILT_SHARED_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := kenlm-prebuilt
|
||||
LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libkenlm.so
|
||||
include $(PREBUILT_SHARED_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := tensorflowlite-prebuilt
|
||||
LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/tensorflow/lite/libtensorflowlite.so
|
||||
include $(PREBUILT_SHARED_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := tflitedelegates-prebuilt
|
||||
LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libtflitedelegates.so
|
||||
include $(PREBUILT_SHARED_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_CPP_EXTENSION := .cc .cxx .cpp
|
||||
LOCAL_MODULE := stt
|
||||
LOCAL_SRC_FILES := client.cc
|
||||
LOCAL_SHARED_LIBRARIES := stt-prebuilt
|
||||
LOCAL_SHARED_LIBRARIES := stt-prebuilt kenlm-prebuilt tensorflowlite-prebuilt tflitedelegates-prebuilt
|
||||
LOCAL_LDFLAGS := -Wl,--no-as-needed
|
||||
include $(BUILD_EXECUTABLE)
|
||||
|
@ -1,22 +1,9 @@
|
||||
# Description: Coqui STT native client library.
|
||||
|
||||
load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object", "tf_copts", "lrt_if_needed")
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
|
||||
load("@org_tensorflow//tensorflow:tensorflow.bzl", "lrt_if_needed")
|
||||
load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps")
|
||||
load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework")
|
||||
|
||||
load(
|
||||
"@org_tensorflow//tensorflow/lite:build_def.bzl",
|
||||
"tflite_copts",
|
||||
"tflite_linkopts",
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "tflite",
|
||||
define_values = {
|
||||
"runtime": "tflite",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "rpi3",
|
||||
@ -32,6 +19,13 @@ config_setting(
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "rpi4ub-armv8",
|
||||
define_values = {
|
||||
"target_system": "rpi4ub-armv8"
|
||||
},
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "workspace_status",
|
||||
outs = ["workspace_status.cc"],
|
||||
@ -52,6 +46,31 @@ OPENFST_INCLUDES_PLATFORM = select({
|
||||
"//conditions:default": ["ctcdecode/third_party/openfst-1.6.7/src/include"],
|
||||
})
|
||||
|
||||
DECODER_SOURCES = [
|
||||
"alphabet.cc",
|
||||
"alphabet.h",
|
||||
"ctcdecode/ctc_beam_search_decoder.cpp",
|
||||
"ctcdecode/ctc_beam_search_decoder.h",
|
||||
"ctcdecode/decoder_utils.cpp",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"ctcdecode/path_trie.cpp",
|
||||
"ctcdecode/path_trie.h",
|
||||
"ctcdecode/scorer.cpp",
|
||||
"ctcdecode/scorer.h",
|
||||
] + OPENFST_SOURCES_PLATFORM
|
||||
|
||||
DECODER_INCLUDES = [
|
||||
".",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
"ctcdecode/third_party/object_pool",
|
||||
] + OPENFST_INCLUDES_PLATFORM
|
||||
|
||||
DECODER_LINKOPTS = [
|
||||
"-lm",
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
]
|
||||
|
||||
LINUX_LINKOPTS = [
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
@ -60,10 +79,12 @@ LINUX_LINKOPTS = [
|
||||
"-Wl,-export-dynamic",
|
||||
]
|
||||
|
||||
cc_library(
|
||||
name = "kenlm",
|
||||
cc_binary(
|
||||
name = "libkenlm.so",
|
||||
srcs = glob([
|
||||
"kenlm/lm/*.hh",
|
||||
"kenlm/lm/*.cc",
|
||||
"kenlm/util/*.hh",
|
||||
"kenlm/util/*.cc",
|
||||
"kenlm/util/double-conversion/*.cc",
|
||||
"kenlm/util/double-conversion/*.h",
|
||||
@ -72,43 +93,134 @@ cc_library(
|
||||
"kenlm/*/*test.cc",
|
||||
"kenlm/*/*main.cc",
|
||||
],),
|
||||
copts = select({
|
||||
"//tensorflow:windows": ["/std:c++14"],
|
||||
"//conditions:default": ["-std=c++14", "-fwrapv", "-fvisibility=hidden"],
|
||||
}),
|
||||
defines = ["KENLM_MAX_ORDER=6"],
|
||||
includes = ["kenlm"],
|
||||
linkshared = 1,
|
||||
linkopts = select({
|
||||
"//tensorflow:ios": [
|
||||
"-Wl,-install_name,@rpath/libkenlm.so",
|
||||
],
|
||||
"//tensorflow:macos": [
|
||||
"-Wl,-install_name,@rpath/libkenlm.so",
|
||||
],
|
||||
"//tensorflow:windows": [],
|
||||
"//conditions:default": [
|
||||
"-Wl,-soname,libkenlm.so",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name="kenlm",
|
||||
hdrs = glob([
|
||||
"kenlm/lm/*.hh",
|
||||
"kenlm/util/*.hh",
|
||||
]),
|
||||
copts = ["-std=c++11"],
|
||||
srcs = [":libkenlm.so"],
|
||||
copts = ["-std=c++14"],
|
||||
defines = ["KENLM_MAX_ORDER=6"],
|
||||
includes = ["kenlm"],
|
||||
includes = [".", "kenlm"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "flashlight",
|
||||
hdrs = [
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/common/String.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/common/System.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/Decoder.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/LexiconDecoder.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/LexiconFreeDecoder.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/ConvLM.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/KenLM.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/LM.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/ZeroLM.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/Trie.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/Utils.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/dictionary/Defines.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/dictionary/Dictionary.h",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/dictionary/Utils.h",
|
||||
],
|
||||
srcs = [
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/common/String.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/common/System.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/LexiconDecoder.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/LexiconFreeDecoder.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/ConvLM.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/KenLM.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/lm/ZeroLM.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/Trie.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/decoder/Utils.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/dictionary/Dictionary.cpp",
|
||||
"ctcdecode/third_party/flashlight/flashlight/lib/text/dictionary/Utils.cpp",
|
||||
],
|
||||
includes = ["ctcdecode/third_party/flashlight"],
|
||||
deps = [":kenlm"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "decoder",
|
||||
srcs = [
|
||||
"ctcdecode/ctc_beam_search_decoder.cpp",
|
||||
"ctcdecode/decoder_utils.cpp",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"ctcdecode/scorer.cpp",
|
||||
"ctcdecode/path_trie.cpp",
|
||||
"ctcdecode/path_trie.h",
|
||||
"alphabet.cc",
|
||||
] + OPENFST_SOURCES_PLATFORM,
|
||||
srcs = DECODER_SOURCES,
|
||||
includes = DECODER_INCLUDES,
|
||||
deps = [":kenlm", ":flashlight"],
|
||||
linkopts = DECODER_LINKOPTS,
|
||||
copts = select({
|
||||
"//tensorflow:windows": ["/std:c++14"],
|
||||
"//conditions:default": ["-std=c++14", "-fexceptions", "-fwrapv"],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite",
|
||||
hdrs = [
|
||||
"ctcdecode/ctc_beam_search_decoder.h",
|
||||
"ctcdecode/scorer.h",
|
||||
"ctcdecode/decoder_utils.h",
|
||||
"alphabet.h",
|
||||
"//tensorflow/lite:model.h",
|
||||
"//tensorflow/lite/kernels:register.h",
|
||||
] + select({
|
||||
"//tensorflow:android": [
|
||||
"//tensorflow/lite/delegates/gpu:delegate.h",
|
||||
"//tensorflow/lite/delegates/hexagon:hexagon_delegate.h",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
srcs = [
|
||||
"//tensorflow/lite:libtensorflowlite.so",
|
||||
],
|
||||
includes = [
|
||||
".",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
"ctcdecode/third_party/object_pool",
|
||||
] + OPENFST_INCLUDES_PLATFORM,
|
||||
deps = [":kenlm"],
|
||||
linkopts = [
|
||||
"-lm",
|
||||
"-ldl",
|
||||
"-pthread",
|
||||
includes = ["tensorflow"],
|
||||
deps = ["//tensorflow/lite:libtensorflowlite.so"],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libtflitedelegates.so",
|
||||
deps = [
|
||||
"//tensorflow/lite/tools/evaluation:utils",
|
||||
],
|
||||
linkshared = 1,
|
||||
linkopts = select({
|
||||
"//tensorflow:ios": [
|
||||
"-Wl,-install_name,@rpath/libtflitedelegates.so",
|
||||
],
|
||||
"//tensorflow:macos": [
|
||||
"-Wl,-install_name,@rpath/libtflitedelegates.so",
|
||||
],
|
||||
"//tensorflow:windows": [],
|
||||
"//conditions:default": [
|
||||
"-Wl,-soname,libtflitedelegates.so",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflitedelegates",
|
||||
hdrs = [
|
||||
"//tensorflow/lite/tools/evaluation:utils.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/lite/tools/evaluation:utils",
|
||||
],
|
||||
srcs = [":libtflitedelegates.so"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
@ -121,92 +233,57 @@ cc_library(
|
||||
"modelstate.h",
|
||||
"workspace_status.cc",
|
||||
"workspace_status.h",
|
||||
] + select({
|
||||
"//native_client:tflite": [
|
||||
"tflitemodelstate.h",
|
||||
"tflitemodelstate.cc",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"tfmodelstate.h",
|
||||
"tfmodelstate.cc",
|
||||
],
|
||||
}),
|
||||
copts = tf_copts() + select({
|
||||
"tflitemodelstate.h",
|
||||
"tflitemodelstate.cc",
|
||||
] + DECODER_SOURCES,
|
||||
copts = select({
|
||||
# -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default
|
||||
"//tensorflow:windows": ["/w"],
|
||||
"//tensorflow:windows": ["/std:c++14", "/w"],
|
||||
# -Wno-sign-compare to silent a lot of warnings from tensorflow itself,
|
||||
# which makes it harder to see our own warnings
|
||||
"//conditions:default": [
|
||||
"-std=c++14",
|
||||
"-fwrapv",
|
||||
"-Wno-sign-compare",
|
||||
"-fvisibility=hidden",
|
||||
],
|
||||
}) + select({
|
||||
"//native_client:tflite": ["-DUSE_TFLITE"],
|
||||
"//conditions:default": ["-UUSE_TFLITE"],
|
||||
}) + tflite_copts(),
|
||||
}),
|
||||
linkopts = lrt_if_needed() + select({
|
||||
"//tensorflow:macos": [],
|
||||
"//tensorflow:ios": ["-fembed-bitcode"],
|
||||
"//tensorflow:linux_x86_64": LINUX_LINKOPTS,
|
||||
"//native_client:rpi3": LINUX_LINKOPTS,
|
||||
"//native_client:rpi3-armv8": LINUX_LINKOPTS,
|
||||
"//tensorflow:windows": [],
|
||||
# Bazel is has too strong opinions about static linking, so it's
|
||||
# near impossible to get it to link a DLL against another DLL on Windows.
|
||||
# We simply force the linker option manually here as a hacky fix.
|
||||
"//tensorflow:windows": [
|
||||
"bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib",
|
||||
"bazel-out/x64_windows-opt/bin/native_client/libtflitedelegates.so.if.lib",
|
||||
"bazel-out/x64_windows-opt/bin/tensorflow/lite/libtensorflowlite.so.if.lib",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}) + tflite_linkopts(),
|
||||
deps = select({
|
||||
"//native_client:tflite": [
|
||||
"//tensorflow/lite/kernels:builtin_ops",
|
||||
"//tensorflow/lite/tools/evaluation:utils",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:direct_session",
|
||||
"//third_party/eigen3",
|
||||
#"//tensorflow/core:all_kernels",
|
||||
### => Trying to be more fine-grained
|
||||
### Use bin/ops_in_graph.py to list all the ops used by a frozen graph.
|
||||
### CPU only build, libstt.so file size reduced by ~50%
|
||||
"//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram
|
||||
"//tensorflow/core/kernels:bias_op", # BiasAdd
|
||||
"//tensorflow/core/kernels:cast_op", # Cast
|
||||
"//tensorflow/core/kernels:concat_op", # ConcatV2
|
||||
"//tensorflow/core/kernels:constant_op", # Const, Placeholder
|
||||
"//tensorflow/core/kernels:shape_ops", # ExpandDims, Shape
|
||||
"//tensorflow/core/kernels:gather_nd_op", # GatherNd
|
||||
"//tensorflow/core/kernels:identity_op", # Identity
|
||||
"//tensorflow/core/kernels:immutable_constant_op", # ImmutableConst (used in memmapped models)
|
||||
"//tensorflow/core/kernels:deepspeech_cwise_ops", # Less, Minimum, Mul
|
||||
"//tensorflow/core/kernels:matmul_op", # MatMul
|
||||
"//tensorflow/core/kernels:reduction_ops", # Max
|
||||
"//tensorflow/core/kernels:mfcc_op", # Mfcc
|
||||
"//tensorflow/core/kernels:no_op", # NoOp
|
||||
"//tensorflow/core/kernels:pack_op", # Pack
|
||||
"//tensorflow/core/kernels:sequence_ops", # Range
|
||||
"//tensorflow/core/kernels:relu_op", # Relu
|
||||
"//tensorflow/core/kernels:reshape_op", # Reshape
|
||||
"//tensorflow/core/kernels:softmax_op", # Softmax
|
||||
"//tensorflow/core/kernels:tile_ops", # Tile
|
||||
"//tensorflow/core/kernels:transpose_op", # Transpose
|
||||
"//tensorflow/core/kernels:rnn_ops", # BlockLSTM
|
||||
# And we also need the op libs for these ops used in the model:
|
||||
"//tensorflow/core:audio_ops_op_lib", # AudioSpectrogram, Mfcc
|
||||
"//tensorflow/core:rnn_ops_op_lib", # BlockLSTM
|
||||
"//tensorflow/core:math_ops_op_lib", # Cast, Less, Max, MatMul, Minimum, Range
|
||||
"//tensorflow/core:array_ops_op_lib", # ConcatV2, Const, ExpandDims, Fill, GatherNd, Identity, Pack, Placeholder, Reshape, Tile, Transpose
|
||||
"//tensorflow/core:no_op_op_lib", # NoOp
|
||||
"//tensorflow/core:nn_ops_op_lib", # Relu, Softmax, BiasAdd
|
||||
# And op libs for these ops brought in by dependencies of dependencies to silence unknown OpKernel warnings:
|
||||
"//tensorflow/core:dataset_ops_op_lib", # UnwrapDatasetVariant, WrapDatasetVariant
|
||||
"//tensorflow/core:sendrecv_ops_op_lib", # _HostRecv, _HostSend, _Recv, _Send
|
||||
],
|
||||
}) + if_cuda([
|
||||
"//tensorflow/core:core",
|
||||
]) + [":decoder"],
|
||||
}) + DECODER_LINKOPTS,
|
||||
includes = DECODER_INCLUDES,
|
||||
deps = [":kenlm", ":tflite", ":tflitedelegates", ":flashlight"],
|
||||
)
|
||||
|
||||
tf_cc_shared_object(
|
||||
cc_binary(
|
||||
name = "libstt.so",
|
||||
deps = [":coqui_stt_bundle"],
|
||||
linkshared = 1,
|
||||
linkopts = select({
|
||||
"//tensorflow:ios": [
|
||||
"-Wl,-install_name,@rpath/libstt.so",
|
||||
],
|
||||
"//tensorflow:macos": [
|
||||
"-Wl,-install_name,@rpath/libstt.so",
|
||||
],
|
||||
"//tensorflow:windows": [],
|
||||
"//conditions:default": [
|
||||
"-Wl,-soname,libstt.so",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
ios_static_framework(
|
||||
@ -231,9 +308,13 @@ cc_binary(
|
||||
"generate_scorer_package.cpp",
|
||||
"stt_errors.cc",
|
||||
],
|
||||
copts = ["-std=c++11"],
|
||||
copts = select({
|
||||
"//tensorflow:windows": ["/std:c++14"],
|
||||
"//conditions:default": ["-std=c++14"],
|
||||
}),
|
||||
deps = [
|
||||
":decoder",
|
||||
":kenlm",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/flags:parse",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
@ -247,6 +328,10 @@ cc_binary(
|
||||
] + select({
|
||||
# ARMv7: error: Android 5.0 and later only support position-independent executables (-fPIE).
|
||||
"//tensorflow:android": ["-fPIE -pie"],
|
||||
# Bazel is has too strong opinions about static linking, so it's
|
||||
# near impossible to get it to link a DLL against another DLL on Windows.
|
||||
# We simply force the linker option manually here as a hacky fix.
|
||||
"//tensorflow:windows": ["bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
@ -257,15 +342,20 @@ cc_binary(
|
||||
"enumerate_kenlm_vocabulary.cpp",
|
||||
],
|
||||
deps = [":kenlm"],
|
||||
copts = ["-std=c++11"],
|
||||
copts = select({
|
||||
"//tensorflow:windows": ["/std:c++14"],
|
||||
"//conditions:default": ["-std=c++14"],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "trie_load",
|
||||
srcs = [
|
||||
"alphabet.h",
|
||||
"trie_load.cc",
|
||||
],
|
||||
copts = ["-std=c++11"],
|
||||
deps = [":decoder"],
|
||||
] + DECODER_SOURCES,
|
||||
copts = select({
|
||||
"//tensorflow:windows": ["/std:c++14"],
|
||||
"//conditions:default": ["-std=c++14"],
|
||||
}),
|
||||
linkopts = DECODER_LINKOPTS,
|
||||
)
|
||||
|
@ -45,8 +45,8 @@ Alphabet::init(const char *config_file)
|
||||
if (!in) {
|
||||
return 1;
|
||||
}
|
||||
unsigned int label = 0;
|
||||
space_label_ = -2;
|
||||
int index = 0;
|
||||
space_index_ = -2;
|
||||
for (std::string line; getline_crossplatform(in, line);) {
|
||||
if (line.size() == 2 && line[0] == '\\' && line[1] == '#') {
|
||||
line = '#';
|
||||
@ -55,35 +55,68 @@ Alphabet::init(const char *config_file)
|
||||
}
|
||||
//TODO: we should probably do something more i18n-aware here
|
||||
if (line == " ") {
|
||||
space_label_ = label;
|
||||
space_index_ = index;
|
||||
}
|
||||
if (line.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
label_to_str_[label] = line;
|
||||
str_to_label_[line] = label;
|
||||
++label;
|
||||
addEntry(line, index);
|
||||
++index;
|
||||
}
|
||||
size_ = label;
|
||||
in.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
Alphabet::InitFromLabels(const std::vector<std::string>& labels)
|
||||
{
|
||||
space_index_ = -2;
|
||||
for (int idx = 0; idx < labels.size(); ++idx) {
|
||||
const std::string& label = labels[idx];
|
||||
if (label == " ") {
|
||||
space_index_ = idx;
|
||||
}
|
||||
addEntry(label, idx);
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
Alphabet::SerializeText()
|
||||
{
|
||||
std::stringstream out;
|
||||
|
||||
out << "# Each line in this file represents the Unicode codepoint (UTF-8 encoded)\n"
|
||||
<< "# associated with a numeric index.\n"
|
||||
<< "# A line that starts with # is a comment. You can escape it with \\# if you wish\n"
|
||||
<< "# to use '#' in the Alphabet.\n";
|
||||
|
||||
for (int idx = 0; idx < entrySize(); ++idx) {
|
||||
out << getEntry(idx) << "\n";
|
||||
}
|
||||
|
||||
out << "# The last (non-comment) line needs to end with a newline.\n";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string
|
||||
Alphabet::Serialize()
|
||||
{
|
||||
// Should always be true in our usage, but this method will crash if for some
|
||||
// mystical reason it doesn't hold, so defensively assert it here.
|
||||
assert(isContiguous());
|
||||
|
||||
// Serialization format is a sequence of (key, value) pairs, where key is
|
||||
// a uint16_t and value is a uint16_t length followed by `length` UTF-8
|
||||
// encoded bytes with the label.
|
||||
std::stringstream out;
|
||||
|
||||
// We start by writing the number of pairs in the buffer as uint16_t.
|
||||
uint16_t size = size_;
|
||||
uint16_t size = entrySize();
|
||||
out.write(reinterpret_cast<char*>(&size), sizeof(size));
|
||||
|
||||
for (auto it = label_to_str_.begin(); it != label_to_str_.end(); ++it) {
|
||||
uint16_t key = it->first;
|
||||
string str = it->second;
|
||||
for (int i = 0; i < GetSize(); ++i) {
|
||||
uint16_t key = i;
|
||||
string str = DecodeSingle(i);
|
||||
uint16_t len = str.length();
|
||||
// Then we write the key as uint16_t, followed by the length of the value
|
||||
// as uint16_t, followed by `length` bytes (the value itself).
|
||||
@ -105,7 +138,6 @@ Alphabet::Deserialize(const char* buffer, const int buffer_size)
|
||||
}
|
||||
uint16_t size = *(uint16_t*)(buffer + offset);
|
||||
offset += sizeof(uint16_t);
|
||||
size_ = size;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (buffer_size - offset < sizeof(uint16_t)) {
|
||||
@ -126,22 +158,26 @@ Alphabet::Deserialize(const char* buffer, const int buffer_size)
|
||||
std::string val(buffer+offset, val_len);
|
||||
offset += val_len;
|
||||
|
||||
label_to_str_[label] = val;
|
||||
str_to_label_[val] = label;
|
||||
addEntry(val, label);
|
||||
|
||||
if (val == " ") {
|
||||
space_label_ = label;
|
||||
space_index_ = label;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t
|
||||
Alphabet::GetSize() const
|
||||
{
|
||||
return entrySize();
|
||||
}
|
||||
|
||||
bool
|
||||
Alphabet::CanEncodeSingle(const std::string& input) const
|
||||
{
|
||||
auto it = str_to_label_.find(input);
|
||||
return it != str_to_label_.end();
|
||||
return contains(input);
|
||||
}
|
||||
|
||||
bool
|
||||
@ -158,25 +194,14 @@ Alphabet::CanEncode(const std::string& input) const
|
||||
std::string
|
||||
Alphabet::DecodeSingle(unsigned int label) const
|
||||
{
|
||||
auto it = label_to_str_.find(label);
|
||||
if (it != label_to_str_.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
std::cerr << "Invalid label " << label << std::endl;
|
||||
abort();
|
||||
}
|
||||
assert(label <= INT_MAX);
|
||||
return getEntry(label);
|
||||
}
|
||||
|
||||
unsigned int
|
||||
Alphabet::EncodeSingle(const std::string& string) const
|
||||
{
|
||||
auto it = str_to_label_.find(string);
|
||||
if (it != str_to_label_.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
std::cerr << "Invalid string " << string << std::endl;
|
||||
abort();
|
||||
}
|
||||
return getIndex(string);
|
||||
}
|
||||
|
||||
std::string
|
||||
|
@ -5,12 +5,15 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "flashlight/lib/text/dictionary/Dictionary.h"
|
||||
|
||||
/*
|
||||
* Loads a text file describing a mapping of labels to strings, one string per
|
||||
* line. This is used by the decoder, client and Python scripts to convert the
|
||||
* output of the decoder to a human-readable string and vice-versa.
|
||||
*/
|
||||
class Alphabet {
|
||||
class Alphabet : public fl::lib::text::Dictionary
|
||||
{
|
||||
public:
|
||||
Alphabet() = default;
|
||||
Alphabet(const Alphabet&) = default;
|
||||
@ -19,22 +22,26 @@ public:
|
||||
|
||||
virtual int init(const char *config_file);
|
||||
|
||||
// Initialize directly from sequence of labels.
|
||||
void InitFromLabels(const std::vector<std::string>& labels);
|
||||
|
||||
// Serialize alphabet into a binary buffer.
|
||||
std::string Serialize();
|
||||
|
||||
// Serialize alphabet into a text representation (ie. config file read by `init`)
|
||||
std::string SerializeText();
|
||||
|
||||
// Deserialize alphabet from a binary buffer.
|
||||
int Deserialize(const char* buffer, const int buffer_size);
|
||||
|
||||
size_t GetSize() const {
|
||||
return size_;
|
||||
}
|
||||
size_t GetSize() const;
|
||||
|
||||
bool IsSpace(unsigned int label) const {
|
||||
return label == space_label_;
|
||||
return label == space_index_;
|
||||
}
|
||||
|
||||
unsigned int GetSpaceLabel() const {
|
||||
return space_label_;
|
||||
return space_index_;
|
||||
}
|
||||
|
||||
// Returns true if the single character/output class has a corresponding label
|
||||
@ -66,23 +73,20 @@ public:
|
||||
virtual std::vector<unsigned int> Encode(const std::string& input) const;
|
||||
|
||||
protected:
|
||||
size_t size_;
|
||||
unsigned int space_label_;
|
||||
std::unordered_map<unsigned int, std::string> label_to_str_;
|
||||
std::unordered_map<std::string, unsigned int> str_to_label_;
|
||||
unsigned int space_index_;
|
||||
};
|
||||
|
||||
class UTF8Alphabet : public Alphabet
|
||||
{
|
||||
public:
|
||||
UTF8Alphabet() {
|
||||
size_ = 255;
|
||||
space_label_ = ' ' - 1;
|
||||
for (size_t i = 0; i < size_; ++i) {
|
||||
std::string val(1, i+1);
|
||||
label_to_str_[i] = val;
|
||||
str_to_label_[val] = i;
|
||||
// 255 byte values, index n -> byte value n+1
|
||||
// because NUL is never used, we don't use up an index in the maps for it
|
||||
for (int idx = 0; idx < 255; ++idx) {
|
||||
std::string val(1, idx+1);
|
||||
addEntry(val, idx);
|
||||
}
|
||||
space_index_ = ' ' - 1;
|
||||
}
|
||||
|
||||
int init(const char*) override {
|
||||
|
@ -15,6 +15,10 @@ extern "C" {
|
||||
#define STT_EXPORT
|
||||
#endif
|
||||
|
||||
// For the decoder package we include this header but should only expose
|
||||
// the error info, so guard all the other definitions out.
|
||||
#ifndef SWIG_ERRORS_ONLY
|
||||
|
||||
typedef struct ModelState ModelState;
|
||||
|
||||
typedef struct StreamingState StreamingState;
|
||||
@ -59,6 +63,8 @@ typedef struct Metadata {
|
||||
const unsigned int num_transcripts;
|
||||
} Metadata;
|
||||
|
||||
#endif /* SWIG_ERRORS_ONLY */
|
||||
|
||||
// sphinx-doc: error_code_listing_start
|
||||
|
||||
#define STT_FOR_EACH_ERROR(APPLY) \
|
||||
@ -95,6 +101,8 @@ STT_FOR_EACH_ERROR(DEFINE)
|
||||
#undef DEFINE
|
||||
};
|
||||
|
||||
#ifndef SWIG_ERRORS_ONLY
|
||||
|
||||
/**
|
||||
* @brief An object providing an interface to a trained Coqui STT model.
|
||||
*
|
||||
@ -105,7 +113,7 @@ STT_FOR_EACH_ERROR(DEFINE)
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_CreateModel(const char* aModelPath,
|
||||
ModelState** retval);
|
||||
ModelState** retval);
|
||||
|
||||
/**
|
||||
* @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth}
|
||||
@ -130,7 +138,7 @@ unsigned int STT_GetModelBeamWidth(const ModelState* aCtx);
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_SetModelBeamWidth(ModelState* aCtx,
|
||||
unsigned int aBeamWidth);
|
||||
unsigned int aBeamWidth);
|
||||
|
||||
/**
|
||||
* @brief Return the sample rate expected by a model.
|
||||
@ -158,7 +166,7 @@ void STT_FreeModel(ModelState* ctx);
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_EnableExternalScorer(ModelState* aCtx,
|
||||
const char* aScorerPath);
|
||||
const char* aScorerPath);
|
||||
|
||||
/**
|
||||
* @brief Add a hot-word and its boost.
|
||||
@ -173,8 +181,8 @@ int STT_EnableExternalScorer(ModelState* aCtx,
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_AddHotWord(ModelState* aCtx,
|
||||
const char* word,
|
||||
float boost);
|
||||
const char* word,
|
||||
float boost);
|
||||
|
||||
/**
|
||||
* @brief Remove entry for a hot-word from the hot-words map.
|
||||
@ -186,7 +194,7 @@ int STT_AddHotWord(ModelState* aCtx,
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_EraseHotWord(ModelState* aCtx,
|
||||
const char* word);
|
||||
const char* word);
|
||||
|
||||
/**
|
||||
* @brief Removes all elements from the hot-words map.
|
||||
@ -219,8 +227,8 @@ int STT_DisableExternalScorer(ModelState* aCtx);
|
||||
*/
|
||||
STT_EXPORT
|
||||
int STT_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
float aAlpha,
|
||||
float aBeta);
|
||||
float aAlpha,
|
||||
float aBeta);
|
||||
|
||||
/**
|
||||
* @brief Use the Coqui STT model to convert speech to text.
|
||||
@ -235,8 +243,8 @@ int STT_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* STT_SpeechToText(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
|
||||
/**
|
||||
* @brief Use the Coqui STT model to convert speech to text and output results
|
||||
@ -255,9 +263,9 @@ char* STT_SpeechToText(ModelState* aCtx,
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* STT_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize,
|
||||
unsigned int aNumResults);
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize,
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
* @brief Create a new streaming inference state. The streaming state returned
|
||||
@ -284,8 +292,8 @@ int STT_CreateStream(ModelState* aCtx,
|
||||
*/
|
||||
STT_EXPORT
|
||||
void STT_FeedAudioContent(StreamingState* aSctx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
|
||||
/**
|
||||
* @brief Compute the intermediate decoding of an ongoing streaming inference.
|
||||
@ -312,7 +320,7 @@ char* STT_IntermediateDecode(const StreamingState* aSctx);
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||
unsigned int aNumResults);
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
* @brief Compute the final decoding of an ongoing streaming inference and return
|
||||
@ -345,7 +353,7 @@ char* STT_FinishStream(StreamingState* aSctx);
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* STT_FinishStreamWithMetadata(StreamingState* aSctx,
|
||||
unsigned int aNumResults);
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
* @brief Destroy a streaming state without decoding the computed logits. This
|
||||
@ -389,6 +397,7 @@ char* STT_Version();
|
||||
STT_EXPORT
|
||||
char* STT_ErrorCodeToErrorMessage(int aErrorCode);
|
||||
|
||||
#endif /* SWIG_ERRORS_ONLY */
|
||||
#undef STT_EXPORT
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -45,16 +45,16 @@ workspace_status.cc:
|
||||
# Enforce PATH here because swig calls from build_ext looses track of some
|
||||
# variables over several runs
|
||||
bindings: clean-keep-third-party workspace_status.cc $(DS_SWIG_DEP)
|
||||
python -m pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0
|
||||
DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN_DIR):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
|
||||
python -m pip install --quiet $(PYTHON_PACKAGES) wheel setuptools
|
||||
DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN_DIR):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(SETUP_FLAGS)
|
||||
find temp_build -type f -name "*.o" -delete
|
||||
DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
|
||||
DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(SETUP_FLAGS)
|
||||
rm -rf temp_build
|
||||
|
||||
bindings-debug: clean-keep-third-party workspace_status.cc $(DS_SWIG_DEP)
|
||||
python -m pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0
|
||||
DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN_DIR):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
|
||||
python -m pip install --quiet $(PYTHON_PACKAGES) wheel setuptools
|
||||
DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN_DIR):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(SETUP_FLAGS)
|
||||
$(GENERATE_DEBUG_SYMS)
|
||||
find temp_build -type f -name "*.o" -delete
|
||||
DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS)
|
||||
DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(SETUP_FLAGS)
|
||||
rm -rf temp_build
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import enum
|
||||
|
||||
from . import swigwrapper # pylint: disable=import-self
|
||||
|
||||
@ -13,45 +13,28 @@ for symbol in dir(swigwrapper):
|
||||
globals()[symbol] = getattr(swigwrapper, symbol)
|
||||
|
||||
|
||||
class Scorer(swigwrapper.Scorer):
|
||||
"""Wrapper for Scorer.
|
||||
|
||||
:param alpha: Language model weight.
|
||||
:type alpha: float
|
||||
:param beta: Word insertion bonus.
|
||||
:type beta: float
|
||||
:scorer_path: Path to load scorer from.
|
||||
:alphabet: Alphabet
|
||||
:type scorer_path: basestring
|
||||
class Alphabet(swigwrapper.Alphabet):
|
||||
"""An Alphabet is a bidirectional map from tokens (eg. characters) to
|
||||
internal integer representations used by the underlying acoustic models
|
||||
and external scorers. It can be created from alphabet configuration file
|
||||
via the constructor, or from a list of tokens via :py:meth:`Alphabet.InitFromLabels`.
|
||||
"""
|
||||
|
||||
def __init__(self, alpha=None, beta=None, scorer_path=None, alphabet=None):
|
||||
super(Scorer, self).__init__()
|
||||
# Allow bare initialization
|
||||
if alphabet:
|
||||
assert alpha is not None, "alpha parameter is required"
|
||||
assert beta is not None, "beta parameter is required"
|
||||
assert scorer_path, "scorer_path parameter is required"
|
||||
|
||||
err = self.init(scorer_path.encode("utf-8"), alphabet)
|
||||
def __init__(self, config_path=None):
|
||||
super(Alphabet, self).__init__()
|
||||
if config_path:
|
||||
err = self.init(config_path.encode("utf-8"))
|
||||
if err != 0:
|
||||
raise ValueError(
|
||||
"Scorer initialization failed with error code 0x{:X}".format(err)
|
||||
"Alphabet initialization failed with error code 0x{:X}".format(err)
|
||||
)
|
||||
|
||||
self.reset_params(alpha, beta)
|
||||
|
||||
|
||||
class Alphabet(swigwrapper.Alphabet):
|
||||
"""Convenience wrapper for Alphabet which calls init in the constructor"""
|
||||
|
||||
def __init__(self, config_path):
|
||||
super(Alphabet, self).__init__()
|
||||
err = self.init(config_path.encode("utf-8"))
|
||||
if err != 0:
|
||||
raise ValueError(
|
||||
"Alphabet initialization failed with error code 0x{:X}".format(err)
|
||||
)
|
||||
def InitFromLabels(self, data):
|
||||
"""
|
||||
Initialize Alphabet from a list of labels ``data``. Each label gets
|
||||
associated with an integer value corresponding to its position in the list.
|
||||
"""
|
||||
return super(Alphabet, self).InitFromLabels([c.encode("utf-8") for c in data])
|
||||
|
||||
def CanEncodeSingle(self, input):
|
||||
"""
|
||||
@ -79,7 +62,7 @@ class Alphabet(swigwrapper.Alphabet):
|
||||
Encode a sequence of character/output classes into a sequence of labels.
|
||||
Characters are assumed to always take a single Unicode codepoint.
|
||||
Characters must be in the alphabet, this method will assert that. Use
|
||||
`CanEncode` and `CanEncodeSingle` to test.
|
||||
``CanEncode`` and ``CanEncodeSingle`` to test.
|
||||
"""
|
||||
# Convert SWIG's UnsignedIntVec to a Python list
|
||||
res = super(Alphabet, self).Encode(input.encode("utf-8"))
|
||||
@ -95,57 +78,39 @@ class Alphabet(swigwrapper.Alphabet):
|
||||
return res.decode("utf-8")
|
||||
|
||||
|
||||
class UTF8Alphabet(swigwrapper.UTF8Alphabet):
|
||||
"""Convenience wrapper for Alphabet which calls init in the constructor"""
|
||||
class Scorer(swigwrapper.Scorer):
|
||||
"""An external scorer is a data structure composed of a language model built
|
||||
from text data, as well as the vocabulary used in the construction of this
|
||||
language model and additional parameters related to how the decoding
|
||||
process uses the external scorer, such as the language model weight
|
||||
``alpha`` and the word insertion score ``beta``.
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Alphabet, self).__init__()
|
||||
err = self.init(b"")
|
||||
if err != 0:
|
||||
raise ValueError(
|
||||
"UTF8Alphabet initialization failed with error code 0x{:X}".format(err)
|
||||
)
|
||||
:param alpha: Language model weight.
|
||||
:type alpha: float
|
||||
:param beta: Word insertion score.
|
||||
:type beta: float
|
||||
:param scorer_path: Path to load scorer from.
|
||||
:type scorer_path: str
|
||||
:param alphabet: Alphabet object matching the tokens used when creating the
|
||||
external scorer.
|
||||
:type alphabet: Alphabet
|
||||
"""
|
||||
|
||||
def CanEncodeSingle(self, input):
|
||||
"""
|
||||
Returns true if the single character/output class has a corresponding label
|
||||
in the alphabet.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).CanEncodeSingle(input.encode("utf-8"))
|
||||
def __init__(self, alpha=None, beta=None, scorer_path=None, alphabet=None):
|
||||
super(Scorer, self).__init__()
|
||||
# Allow bare initialization
|
||||
if alphabet:
|
||||
assert alpha is not None, "alpha parameter is required"
|
||||
assert beta is not None, "beta parameter is required"
|
||||
assert scorer_path, "scorer_path parameter is required"
|
||||
|
||||
def CanEncode(self, input):
|
||||
"""
|
||||
Returns true if the entire string can be encoded into labels in this
|
||||
alphabet.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).CanEncode(input.encode("utf-8"))
|
||||
err = self.init(scorer_path.encode("utf-8"), alphabet)
|
||||
if err != 0:
|
||||
raise ValueError(
|
||||
"Scorer initialization failed with error code 0x{:X}".format(err)
|
||||
)
|
||||
|
||||
def EncodeSingle(self, input):
|
||||
"""
|
||||
Encode a single character/output class into a label. Character must be in
|
||||
the alphabet, this method will assert that. Use `CanEncodeSingle` to test.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).EncodeSingle(input.encode("utf-8"))
|
||||
|
||||
def Encode(self, input):
|
||||
"""
|
||||
Encode a sequence of character/output classes into a sequence of labels.
|
||||
Characters are assumed to always take a single Unicode codepoint.
|
||||
Characters must be in the alphabet, this method will assert that. Use
|
||||
`CanEncode` and `CanEncodeSingle` to test.
|
||||
"""
|
||||
# Convert SWIG's UnsignedIntVec to a Python list
|
||||
res = super(UTF8Alphabet, self).Encode(input.encode("utf-8"))
|
||||
return [el for el in res]
|
||||
|
||||
def DecodeSingle(self, input):
|
||||
res = super(UTF8Alphabet, self).DecodeSingle(input)
|
||||
return res.decode("utf-8")
|
||||
|
||||
def Decode(self, input):
|
||||
"""Decode a sequence of labels into a string."""
|
||||
res = super(UTF8Alphabet, self).Decode(input)
|
||||
return res.decode("utf-8")
|
||||
self.reset_params(alpha, beta)
|
||||
|
||||
|
||||
def ctc_beam_search_decoder(
|
||||
@ -178,7 +143,7 @@ def ctc_beam_search_decoder(
|
||||
count or language model.
|
||||
:type scorer: Scorer
|
||||
:param hot_words: Map of words (keys) to their assigned boosts (values)
|
||||
:type hot_words: map{string:float}
|
||||
:type hot_words: dict[string, float]
|
||||
:param num_results: Number of beams to return.
|
||||
:type num_results: int
|
||||
:return: List of tuples of confidence and sentence as decoding
|
||||
@ -237,7 +202,7 @@ def ctc_beam_search_decoder_batch(
|
||||
count or language model.
|
||||
:type scorer: Scorer
|
||||
:param hot_words: Map of words (keys) to their assigned boosts (values)
|
||||
:type hot_words: map{string:float}
|
||||
:type hot_words: dict[string, float]
|
||||
:param num_results: Number of beams to return.
|
||||
:type num_results: int
|
||||
:return: List of tuples of confidence and sentence as decoding
|
||||
@ -261,3 +226,247 @@ def ctc_beam_search_decoder_batch(
|
||||
for beam_results in batch_beam_results
|
||||
]
|
||||
return batch_beam_results
|
||||
|
||||
|
||||
class FlashlightDecoderState(swigwrapper.FlashlightDecoderState):
|
||||
"""
|
||||
This class contains constants used to specify the desired behavior for the
|
||||
:py:func:`flashlight_beam_search_decoder` and :py:func:`flashlight_beam_search_decoder_batch`
|
||||
functions.
|
||||
"""
|
||||
|
||||
class CriterionType(enum.IntEnum):
|
||||
"""Constants used to specify which loss criterion was used by the
|
||||
acoustic model. This class is a Python :py:class:`enum.IntEnum`.
|
||||
"""
|
||||
|
||||
#: Decoder mode for handling acoustic models trained with CTC loss
|
||||
CTC = swigwrapper.FlashlightDecoderState.CTC
|
||||
|
||||
#: Decoder mode for handling acoustic models trained with ASG loss
|
||||
ASG = swigwrapper.FlashlightDecoderState.ASG
|
||||
|
||||
#: Decoder mode for handling acoustic models trained with Seq2seq loss
|
||||
#: Note: this criterion type is currently not supported.
|
||||
S2S = swigwrapper.FlashlightDecoderState.S2S
|
||||
|
||||
class DecoderType(enum.IntEnum):
|
||||
"""Constants used to specify if decoder should operate in lexicon mode,
|
||||
only predicting words present in a fixed vocabulary, or in lexicon-free
|
||||
mode, without such restriction. This class is a Python :py:class:`enum.IntEnum`.
|
||||
"""
|
||||
|
||||
#: Lexicon mode, only predict words in specified vocabulary.
|
||||
LexiconBased = swigwrapper.FlashlightDecoderState.LexiconBased
|
||||
|
||||
#: Lexicon-free mode, allow prediction of any word.
|
||||
LexiconFree = swigwrapper.FlashlightDecoderState.LexiconFree
|
||||
|
||||
class TokenType(enum.IntEnum):
|
||||
"""Constants used to specify the granularity of text units used when training
|
||||
the external scorer in relation to the text units used when training the
|
||||
acoustic model. For example, you can have an acoustic model predicting
|
||||
characters and an external scorer trained on words, or an acoustic model
|
||||
and an external scorer both trained with sub-word units. If the acoustic
|
||||
model and the scorer were both trained on the same text unit granularity,
|
||||
use ``TokenType.Single``. Otherwise, if the external scorer was trained
|
||||
on a sequence of acoustic model text units, use ``TokenType.Aggregate``.
|
||||
This class is a Python :py:class:`enum.IntEnum`.
|
||||
"""
|
||||
|
||||
#: Token type for external scorers trained on the same textual units as
|
||||
#: the acoustic model.
|
||||
Single = swigwrapper.FlashlightDecoderState.Single
|
||||
|
||||
#: Token type for external scorers trained on a sequence of acoustic model
|
||||
#: textual units.
|
||||
Aggregate = swigwrapper.FlashlightDecoderState.Aggregate
|
||||
|
||||
|
||||
def flashlight_beam_search_decoder(
|
||||
logits_seq,
|
||||
alphabet,
|
||||
beam_size,
|
||||
decoder_type,
|
||||
token_type,
|
||||
lm_tokens,
|
||||
scorer=None,
|
||||
beam_threshold=25.0,
|
||||
cutoff_top_n=40,
|
||||
silence_score=0.0,
|
||||
merge_with_log_add=False,
|
||||
criterion_type=FlashlightDecoderState.CriterionType.CTC,
|
||||
transitions=[],
|
||||
num_results=1,
|
||||
):
|
||||
"""Decode acoustic model emissions for a single sample. Note that unlike
|
||||
:py:func:`ctc_beam_search_decoder`, this function expects raw outputs
|
||||
from CTC and ASG acoustic models, without softmaxing them over
|
||||
timesteps.
|
||||
|
||||
:param logits_seq: 2-D list of acoustic model emissions, dimensions are
|
||||
time steps x number of output units.
|
||||
:type logits_seq: 2-D list of floats or numpy array
|
||||
:param alphabet: Alphabet object matching the tokens used when creating the
|
||||
acoustic model and external scorer if specified.
|
||||
:type alphabet: Alphabet
|
||||
:param beam_size: Width for beam search.
|
||||
:type beam_size: int
|
||||
:param decoder_type: Decoding mode, lexicon-constrained or lexicon-free.
|
||||
:type decoder_type: FlashlightDecoderState.DecoderType
|
||||
:param token_type: Type of token in the external scorer.
|
||||
:type token_type: FlashlightDecoderState.TokenType
|
||||
:param lm_tokens: List of tokens to constrain decoding to when in lexicon-constrained
|
||||
mode. Must match the token type used in the scorer, ie.
|
||||
must be a list of characters if scorer is character-based,
|
||||
or a list of words if scorer is word-based.
|
||||
:param lm_tokens: list[str]
|
||||
:param scorer: External scorer.
|
||||
:type scorer: Scorer
|
||||
:param beam_threshold: Maximum threshold in beam score from leading beam. Any
|
||||
newly created candidate beams which lag behind the best
|
||||
beam so far by more than this value will get pruned.
|
||||
This is a performance optimization parameter and an
|
||||
appropriate value should be found empirically using a
|
||||
validation set.
|
||||
:type beam_threshold: float
|
||||
:param cutoff_top_n: Maximum number of tokens to expand per time step during
|
||||
decoding. Only the highest probability cutoff_top_n
|
||||
candidates (characters, sub-word units, words) in a given
|
||||
timestep will be expanded. This is a performance
|
||||
optimization parameter and an appropriate value should
|
||||
be found empirically using a validation set.
|
||||
:type cutoff_top_n: int
|
||||
:param silence_score: Score to add to beam when encountering a predicted
|
||||
silence token (eg. the space symbol).
|
||||
:type silence_score: float
|
||||
:param merge_with_log_add: Whether to use log-add when merging scores of
|
||||
new candidate beams equivalent to existing ones
|
||||
(leading to the same transcription). When disabled,
|
||||
the maximum score is used.
|
||||
:type merge_with_log_add: bool
|
||||
:param criterion_type: Criterion used for training the acoustic model.
|
||||
:type criterion_type: FlashlightDecoderState.CriterionType
|
||||
:param transitions: Transition score matrix for ASG acoustic models.
|
||||
:type transitions: list[float]
|
||||
:param num_results: Number of beams to return.
|
||||
:type num_results: int
|
||||
:return: List of FlashlightOutput structures.
|
||||
:rtype: list[FlashlightOutput]
|
||||
"""
|
||||
return swigwrapper.flashlight_beam_search_decoder(
|
||||
logits_seq,
|
||||
alphabet,
|
||||
beam_size,
|
||||
beam_threshold,
|
||||
cutoff_top_n,
|
||||
scorer,
|
||||
token_type,
|
||||
lm_tokens,
|
||||
decoder_type,
|
||||
silence_score,
|
||||
merge_with_log_add,
|
||||
criterion_type,
|
||||
transitions,
|
||||
num_results,
|
||||
)
|
||||
|
||||
|
||||
def flashlight_beam_search_decoder_batch(
|
||||
probs_seq,
|
||||
seq_lengths,
|
||||
alphabet,
|
||||
beam_size,
|
||||
decoder_type,
|
||||
token_type,
|
||||
lm_tokens,
|
||||
num_processes,
|
||||
scorer=None,
|
||||
beam_threshold=25.0,
|
||||
cutoff_top_n=40,
|
||||
silence_score=0.0,
|
||||
merge_with_log_add=False,
|
||||
criterion_type=FlashlightDecoderState.CriterionType.CTC,
|
||||
transitions=[],
|
||||
num_results=1,
|
||||
):
|
||||
"""Decode batch acoustic model emissions in parallel. ``num_processes``
|
||||
controls how many samples from the batch will be decoded simultaneously.
|
||||
All the other parameters are forwarded to :py:func:`flashlight_beam_search_decoder`.
|
||||
|
||||
Returns a list of lists of FlashlightOutput structures.
|
||||
"""
|
||||
|
||||
return swigwrapper.flashlight_beam_search_decoder_batch(
|
||||
probs_seq,
|
||||
seq_lengths,
|
||||
alphabet,
|
||||
beam_size,
|
||||
beam_threshold,
|
||||
cutoff_top_n,
|
||||
scorer,
|
||||
token_type,
|
||||
lm_tokens,
|
||||
decoder_type,
|
||||
silence_score,
|
||||
merge_with_log_add,
|
||||
criterion_type,
|
||||
transitions,
|
||||
num_results,
|
||||
num_processes,
|
||||
)
|
||||
|
||||
|
||||
class UTF8Alphabet(swigwrapper.UTF8Alphabet):
|
||||
"""Alphabet class representing 255 possible byte values for Bytes Output Mode.
|
||||
For internal use only.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Alphabet, self).__init__()
|
||||
err = self.init(b"")
|
||||
if err != 0:
|
||||
raise ValueError(
|
||||
"UTF8Alphabet initialization failed with error code 0x{:X}".format(err)
|
||||
)
|
||||
|
||||
def CanEncodeSingle(self, input):
|
||||
"""
|
||||
Returns true if the single character/output class has a corresponding label
|
||||
in the alphabet.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).CanEncodeSingle(input.encode("utf-8"))
|
||||
|
||||
def CanEncode(self, input):
|
||||
"""
|
||||
Returns true if the entire string can be encoded into labels in this
|
||||
alphabet.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).CanEncode(input.encode("utf-8"))
|
||||
|
||||
def EncodeSingle(self, input):
|
||||
"""
|
||||
Encode a single character/output class into a label. Character must be in
|
||||
the alphabet, this method will assert that. Use ``CanEncodeSingle`` to test.
|
||||
"""
|
||||
return super(UTF8Alphabet, self).EncodeSingle(input.encode("utf-8"))
|
||||
|
||||
def Encode(self, input):
|
||||
"""
|
||||
Encode a sequence of character/output classes into a sequence of labels.
|
||||
Characters are assumed to always take a single Unicode codepoint.
|
||||
Characters must be in the alphabet, this method will assert that. Use
|
||||
``CanEncode`` and ``CanEncodeSingle`` to test.
|
||||
"""
|
||||
# Convert SWIG's UnsignedIntVec to a Python list
|
||||
res = super(UTF8Alphabet, self).Encode(input.encode("utf-8"))
|
||||
return [el for el in res]
|
||||
|
||||
def DecodeSingle(self, input):
|
||||
res = super(UTF8Alphabet, self).DecodeSingle(input)
|
||||
return res.decode("utf-8")
|
||||
|
||||
def Decode(self, input):
|
||||
"""Decode a sequence of labels into a string."""
|
||||
res = super(UTF8Alphabet, self).Decode(input)
|
||||
return res.decode("utf-8")
|
||||
|
@ -17,7 +17,7 @@ else:
|
||||
ARGS = [
|
||||
"-fPIC",
|
||||
"-DKENLM_MAX_ORDER=6",
|
||||
"-std=c++11",
|
||||
"-std=c++14",
|
||||
"-Wno-unused-local-typedefs",
|
||||
"-Wno-sign-compare",
|
||||
]
|
||||
@ -32,6 +32,7 @@ INCLUDES = [
|
||||
OPENFST_DIR + "/src/include",
|
||||
"third_party/ThreadPool",
|
||||
"third_party/object_pool",
|
||||
"third_party/flashlight",
|
||||
]
|
||||
|
||||
KENLM_FILES = (
|
||||
@ -40,7 +41,7 @@ KENLM_FILES = (
|
||||
+ glob.glob("../kenlm/util/double-conversion/*.cc")
|
||||
)
|
||||
|
||||
KENLM_FILES += glob.glob(OPENFST_DIR + "/src/lib/*.cc")
|
||||
OPENFST_FILES = glob.glob(OPENFST_DIR + "/src/lib/*.cc")
|
||||
|
||||
KENLM_FILES = [
|
||||
fn
|
||||
@ -50,6 +51,22 @@ KENLM_FILES = [
|
||||
)
|
||||
]
|
||||
|
||||
FLASHLIGHT_FILES = [
|
||||
"third_party/flashlight/flashlight/lib/common/String.cpp",
|
||||
"third_party/flashlight/flashlight/lib/common/System.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/LexiconDecoder.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/LexiconFreeDecoder.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/lm/ConvLM.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/lm/KenLM.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/lm/ZeroLM.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/Trie.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/decoder/Utils.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/dictionary/Dictionary.cpp",
|
||||
"third_party/flashlight/flashlight/lib/text/dictionary/Utils.cpp",
|
||||
]
|
||||
|
||||
THIRD_PARTY_FILES = KENLM_FILES + OPENFST_FILES + FLASHLIGHT_FILES
|
||||
|
||||
CTC_DECODER_FILES = [
|
||||
"ctc_beam_search_decoder.cpp",
|
||||
"scorer.cpp",
|
||||
|
@ -12,6 +12,12 @@
|
||||
#include "fst/fstlib.h"
|
||||
#include "path_trie.h"
|
||||
|
||||
#include "flashlight/lib/text/dictionary/Dictionary.h"
|
||||
#include "flashlight/lib/text/decoder/Trie.h"
|
||||
#include "flashlight/lib/text/decoder/LexiconDecoder.h"
|
||||
#include "flashlight/lib/text/decoder/LexiconFreeDecoder.h"
|
||||
|
||||
namespace flt = fl::lib::text;
|
||||
|
||||
int
|
||||
DecoderState::init(const Alphabet& alphabet,
|
||||
@ -264,6 +270,180 @@ DecoderState::decode(size_t num_results) const
|
||||
return outputs;
|
||||
}
|
||||
|
||||
int
|
||||
FlashlightDecoderState::init(
|
||||
const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
flt::Dictionary lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions)
|
||||
{
|
||||
// Lexicon-free decoder must use single-token based LM
|
||||
if (decoder_type == LexiconFree) {
|
||||
assert(token_type == Single);
|
||||
}
|
||||
|
||||
// Build lexicon index to LM index map
|
||||
if (!lm_tokens.contains("<unk>")) {
|
||||
lm_tokens.addEntry("<unk>");
|
||||
}
|
||||
ext_scorer->load_words(lm_tokens);
|
||||
lm_tokens_ = lm_tokens;
|
||||
|
||||
// Convert our criterion type to Flashlight type
|
||||
flt::CriterionType flt_criterion;
|
||||
switch (criterion_type) {
|
||||
case ASG: flt_criterion = flt::CriterionType::ASG; break;
|
||||
case CTC: flt_criterion = flt::CriterionType::CTC; break;
|
||||
case S2S: flt_criterion = flt::CriterionType::S2S; break;
|
||||
default: assert(false);
|
||||
}
|
||||
|
||||
// Build Trie
|
||||
std::shared_ptr<flt::Trie> trie = nullptr;
|
||||
auto startState = ext_scorer->start(false);
|
||||
if (token_type == Aggregate || decoder_type == LexiconBased) {
|
||||
trie = std::make_shared<flt::Trie>(lm_tokens.indexSize(), alphabet.GetSpaceLabel());
|
||||
for (int i = 0; i < lm_tokens.entrySize(); ++i) {
|
||||
const std::string entry = lm_tokens.getEntry(i);
|
||||
if (entry[0] == '<') { // don't insert <s>, </s> and <unk>
|
||||
continue;
|
||||
}
|
||||
float score = -1;
|
||||
if (token_type == Aggregate) {
|
||||
flt::LMStatePtr dummyState;
|
||||
std::tie(dummyState, score) = ext_scorer->score(startState, i);
|
||||
}
|
||||
std::vector<unsigned int> encoded = alphabet.Encode(entry);
|
||||
std::vector<int> encoded_s(encoded.begin(), encoded.end());
|
||||
trie->insert(encoded_s, i, score);
|
||||
}
|
||||
|
||||
// Smear trie
|
||||
trie->smear(flt::SmearingMode::MAX);
|
||||
}
|
||||
|
||||
// Query unknown token score
|
||||
int unknown_word_index = lm_tokens.getIndex("<unk>");
|
||||
float unknown_score = -std::numeric_limits<float>::infinity();
|
||||
if (token_type == Aggregate) {
|
||||
std::tie(std::ignore, unknown_score) =
|
||||
ext_scorer->score(startState, unknown_word_index);
|
||||
}
|
||||
|
||||
// Make sure conversions from uint to int below don't trip us
|
||||
assert(beam_size < INT_MAX);
|
||||
assert(cutoff_top_n < INT_MAX);
|
||||
|
||||
if (decoder_type == LexiconBased) {
|
||||
flt::LexiconDecoderOptions opts;
|
||||
opts.beamSize = static_cast<int>(beam_size);
|
||||
opts.beamSizeToken = static_cast<int>(cutoff_top_n);
|
||||
opts.beamThreshold = beam_threshold;
|
||||
opts.lmWeight = ext_scorer->alpha;
|
||||
opts.wordScore = ext_scorer->beta;
|
||||
opts.unkScore = unknown_score;
|
||||
opts.silScore = silence_score;
|
||||
opts.logAdd = merge_with_log_add;
|
||||
opts.criterionType = flt_criterion;
|
||||
decoder_impl_.reset(new flt::LexiconDecoder(
|
||||
opts,
|
||||
trie,
|
||||
ext_scorer,
|
||||
alphabet.GetSpaceLabel(), // silence index
|
||||
alphabet.GetSize(), // blank index
|
||||
unknown_word_index,
|
||||
transitions,
|
||||
token_type == Single)
|
||||
);
|
||||
} else {
|
||||
flt::LexiconFreeDecoderOptions opts;
|
||||
opts.beamSize = static_cast<int>(beam_size);
|
||||
opts.beamSizeToken = static_cast<int>(cutoff_top_n);
|
||||
opts.beamThreshold = beam_threshold;
|
||||
opts.lmWeight = ext_scorer->alpha;
|
||||
opts.silScore = silence_score;
|
||||
opts.logAdd = merge_with_log_add;
|
||||
opts.criterionType = flt_criterion;
|
||||
decoder_impl_.reset(new flt::LexiconFreeDecoder(
|
||||
opts,
|
||||
ext_scorer,
|
||||
alphabet.GetSpaceLabel(), // silence index
|
||||
alphabet.GetSize(), // blank index
|
||||
transitions)
|
||||
);
|
||||
}
|
||||
|
||||
// Init decoder for stream
|
||||
decoder_impl_->decodeBegin();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
FlashlightDecoderState::next(
|
||||
const double *probs,
|
||||
int time_dim,
|
||||
int class_dim)
|
||||
{
|
||||
std::vector<float> probs_f(probs, probs + (time_dim * class_dim) + 1);
|
||||
decoder_impl_->decodeStep(probs_f.data(), time_dim, class_dim);
|
||||
}
|
||||
|
||||
FlashlightOutput
|
||||
FlashlightDecoderState::intermediate(bool prune)
|
||||
{
|
||||
flt::DecodeResult result = decoder_impl_->getBestHypothesis();
|
||||
std::vector<int> valid_words;
|
||||
for (int w : result.words) {
|
||||
if (w != -1) {
|
||||
valid_words.push_back(w);
|
||||
}
|
||||
}
|
||||
FlashlightOutput ret;
|
||||
ret.aggregate_score = result.score;
|
||||
ret.acoustic_model_score = result.amScore;
|
||||
ret.language_model_score = result.lmScore;
|
||||
ret.words = lm_tokens_.mapIndicesToEntries(valid_words); // how does this interact with token-based decoding
|
||||
ret.tokens = result.tokens;
|
||||
if (prune) {
|
||||
decoder_impl_->prune();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<FlashlightOutput>
|
||||
FlashlightDecoderState::decode(size_t num_results)
|
||||
{
|
||||
decoder_impl_->decodeEnd();
|
||||
std::vector<flt::DecodeResult> flt_results = decoder_impl_->getAllFinalHypothesis();
|
||||
std::vector<FlashlightOutput> ret;
|
||||
for (auto result : flt_results) {
|
||||
std::vector<int> valid_words;
|
||||
for (int w : result.words) {
|
||||
if (w != -1) {
|
||||
valid_words.push_back(w);
|
||||
}
|
||||
}
|
||||
FlashlightOutput out;
|
||||
out.aggregate_score = result.score;
|
||||
out.acoustic_model_score = result.amScore;
|
||||
out.language_model_score = result.lmScore;
|
||||
out.words = lm_tokens_.mapIndicesToEntries(valid_words); // how does this interact with token-based decoding
|
||||
out.tokens = result.tokens;
|
||||
ret.push_back(out);
|
||||
}
|
||||
decoder_impl_.reset(nullptr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<Output> ctc_beam_search_decoder(
|
||||
const double *probs,
|
||||
int time_dim,
|
||||
@ -328,3 +508,104 @@ ctc_beam_search_decoder_batch(
|
||||
}
|
||||
return batch_results;
|
||||
}
|
||||
|
||||
std::vector<FlashlightOutput>
|
||||
flashlight_beam_search_decoder(
|
||||
const double* probs,
|
||||
int time_dim,
|
||||
int class_dim,
|
||||
const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
const std::vector<std::string>& lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions,
|
||||
size_t num_results)
|
||||
{
|
||||
VALID_CHECK_EQ(alphabet.GetSize()+1, class_dim, "Number of output classes in acoustic model does not match number of labels in the alphabet file. Alphabet file must be the same one that was used to train the acoustic model.");
|
||||
flt::Dictionary tokens_dict;
|
||||
for (auto str : lm_tokens) {
|
||||
tokens_dict.addEntry(str);
|
||||
}
|
||||
FlashlightDecoderState state;
|
||||
state.init(
|
||||
alphabet,
|
||||
beam_size,
|
||||
beam_threshold,
|
||||
cutoff_top_n,
|
||||
ext_scorer,
|
||||
token_type,
|
||||
tokens_dict,
|
||||
decoder_type,
|
||||
silence_score,
|
||||
merge_with_log_add,
|
||||
criterion_type,
|
||||
transitions);
|
||||
state.next(probs, time_dim, class_dim);
|
||||
return state.decode(num_results);
|
||||
}
|
||||
|
||||
std::vector<std::vector<FlashlightOutput>>
|
||||
flashlight_beam_search_decoder_batch(
|
||||
const double *probs,
|
||||
int batch_size,
|
||||
int time_dim,
|
||||
int class_dim,
|
||||
const int* seq_lengths,
|
||||
int seq_lengths_size,
|
||||
const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
const std::vector<std::string>& lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions,
|
||||
size_t num_processes,
|
||||
size_t num_results)
|
||||
{
|
||||
VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
|
||||
VALID_CHECK_EQ(batch_size, seq_lengths_size, "must have one sequence length per batch element");
|
||||
|
||||
ThreadPool pool(num_processes);
|
||||
|
||||
// enqueue the tasks of decoding
|
||||
std::vector<std::future<std::vector<FlashlightOutput>>> res;
|
||||
for (size_t i = 0; i < batch_size; ++i) {
|
||||
res.emplace_back(pool.enqueue(flashlight_beam_search_decoder,
|
||||
&probs[i*time_dim*class_dim],
|
||||
seq_lengths[i],
|
||||
class_dim,
|
||||
alphabet,
|
||||
beam_size,
|
||||
beam_threshold,
|
||||
cutoff_top_n,
|
||||
ext_scorer,
|
||||
token_type,
|
||||
lm_tokens,
|
||||
decoder_type,
|
||||
silence_score,
|
||||
merge_with_log_add,
|
||||
criterion_type,
|
||||
transitions,
|
||||
num_results));
|
||||
}
|
||||
|
||||
// get decoding results
|
||||
std::vector<std::vector<FlashlightOutput>> batch_results;
|
||||
for (size_t i = 0; i < batch_size; ++i) {
|
||||
batch_results.emplace_back(res[i].get());
|
||||
}
|
||||
|
||||
return batch_results;
|
||||
}
|
||||
|
@ -9,7 +9,10 @@
|
||||
#include "output.h"
|
||||
#include "alphabet.h"
|
||||
|
||||
class DecoderState {
|
||||
#include "flashlight/lib/text/decoder/Decoder.h"
|
||||
|
||||
class DecoderState
|
||||
{
|
||||
int abs_time_step_;
|
||||
int space_id_;
|
||||
int blank_id_;
|
||||
@ -76,6 +79,89 @@ public:
|
||||
std::vector<Output> decode(size_t num_results=1) const;
|
||||
};
|
||||
|
||||
class FlashlightDecoderState
|
||||
{
|
||||
public:
|
||||
FlashlightDecoderState() = default;
|
||||
~FlashlightDecoderState() = default;
|
||||
|
||||
// Disallow copying
|
||||
FlashlightDecoderState(const FlashlightDecoderState&) = delete;
|
||||
FlashlightDecoderState& operator=(FlashlightDecoderState&) = delete;
|
||||
|
||||
enum LMTokenType {
|
||||
Single // LM units == AM units (character/byte LM)
|
||||
,Aggregate // LM units != AM units (word LM)
|
||||
};
|
||||
|
||||
enum DecoderType {
|
||||
LexiconBased
|
||||
,LexiconFree
|
||||
};
|
||||
|
||||
enum CriterionType {
|
||||
ASG = 0
|
||||
,CTC = 1
|
||||
,S2S = 2
|
||||
};
|
||||
|
||||
/* Initialize beam search decoder
|
||||
*
|
||||
* Parameters:
|
||||
* alphabet: The alphabet.
|
||||
* beam_size: The width of beam search.
|
||||
* cutoff_prob: Cutoff probability for pruning.
|
||||
* cutoff_top_n: Cutoff number for pruning.
|
||||
* ext_scorer: External scorer to evaluate a prefix, which consists of
|
||||
* n-gram language model scoring and word insertion term.
|
||||
* Default null, decoding the input sample without scorer.
|
||||
* Return:
|
||||
* Zero on success, non-zero on failure.
|
||||
*/
|
||||
int init(const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
fl::lib::text::Dictionary lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions);
|
||||
|
||||
/* Send data to the decoder
|
||||
*
|
||||
* Parameters:
|
||||
* probs: 2-D vector where each element is a vector of probabilities
|
||||
* over alphabet of one time step.
|
||||
* time_dim: Number of timesteps.
|
||||
* class_dim: Number of classes (alphabet length + 1 for space character).
|
||||
*/
|
||||
void next(const double *probs,
|
||||
int time_dim,
|
||||
int class_dim);
|
||||
|
||||
/* Return current best hypothesis, optinoally pruning hypothesis space */
|
||||
FlashlightOutput intermediate(bool prune = true);
|
||||
|
||||
/* Get up to num_results transcriptions from current decoder state.
|
||||
*
|
||||
* Parameters:
|
||||
* num_results: Number of hypotheses to return.
|
||||
*
|
||||
* Return:
|
||||
* A vector where each element is a pair of score and decoding result,
|
||||
* in descending order.
|
||||
*/
|
||||
std::vector<FlashlightOutput> decode(size_t num_results = 1);
|
||||
|
||||
private:
|
||||
fl::lib::text::Dictionary lm_tokens_;
|
||||
std::unique_ptr<fl::lib::text::Decoder> decoder_impl_;
|
||||
};
|
||||
|
||||
|
||||
/* CTC Beam Search Decoder
|
||||
* Parameters:
|
||||
@ -146,4 +232,86 @@ ctc_beam_search_decoder_batch(
|
||||
std::unordered_map<std::string, float> hot_words,
|
||||
size_t num_results=1);
|
||||
|
||||
/* Flashlight Beam Search Decoder
|
||||
* Parameters:
|
||||
* probs: 2-D vector where each element is a vector of probabilities
|
||||
* over alphabet of one time step.
|
||||
* time_dim: Number of timesteps.
|
||||
* class_dim: Alphabet length (plus 1 for space character).
|
||||
* alphabet: The alphabet.
|
||||
* beam_size: The width of beam search.
|
||||
* cutoff_prob: Cutoff probability for pruning.
|
||||
* cutoff_top_n: Cutoff number for pruning.
|
||||
* ext_scorer: External scorer to evaluate a prefix, which consists of
|
||||
* n-gram language model scoring and word insertion term.
|
||||
* Default null, decoding the input sample without scorer.
|
||||
* hot_words: A map of hot-words and their corresponding boosts
|
||||
* The hot-word is a string and the boost is a float.
|
||||
* num_results: Number of beams to return.
|
||||
* Return:
|
||||
* A vector where each element is a pair of score and decoding result,
|
||||
* in descending order.
|
||||
*/
|
||||
|
||||
std::vector<FlashlightOutput>
|
||||
flashlight_beam_search_decoder(
|
||||
const double* probs,
|
||||
int time_dim,
|
||||
int class_dim,
|
||||
const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
const std::vector<std::string>& lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions,
|
||||
size_t num_results);
|
||||
|
||||
/* Flashlight Beam Search Decoder for batch data
|
||||
* Parameters:
|
||||
* probs: 3-D vector where each element is a 2-D vector that can be used
|
||||
* by flashlight_beam_search_decoder().
|
||||
* alphabet: The alphabet.
|
||||
* beam_size: The width of beam search.
|
||||
* num_processes: Number of threads for beam search.
|
||||
* cutoff_prob: Cutoff probability for pruning.
|
||||
* cutoff_top_n: Cutoff number for pruning.
|
||||
* ext_scorer: External scorer to evaluate a prefix, which consists of
|
||||
* n-gram language model scoring and word insertion term.
|
||||
* Default null, decoding the input sample without scorer.
|
||||
* hot_words: A map of hot-words and their corresponding boosts
|
||||
* The hot-word is a string and the boost is a float.
|
||||
* num_results: Number of beams to return.
|
||||
* Return:
|
||||
* A 2-D vector where each element is a vector of beam search decoding
|
||||
* result for one audio sample.
|
||||
*/
|
||||
std::vector<std::vector<FlashlightOutput>>
|
||||
flashlight_beam_search_decoder_batch(
|
||||
const double* probs,
|
||||
int batch_size,
|
||||
int time_dim,
|
||||
int class_dim,
|
||||
const int* seq_lengths,
|
||||
int seq_lengths_size,
|
||||
const Alphabet& alphabet,
|
||||
size_t beam_size,
|
||||
double beam_threshold,
|
||||
size_t cutoff_top_n,
|
||||
std::shared_ptr<Scorer> ext_scorer,
|
||||
FlashlightDecoderState::LMTokenType token_type,
|
||||
const std::vector<std::string>& lm_tokens,
|
||||
FlashlightDecoderState::DecoderType decoder_type,
|
||||
double silence_score,
|
||||
bool merge_with_log_add,
|
||||
FlashlightDecoderState::CriterionType criterion_type,
|
||||
std::vector<float> transitions,
|
||||
size_t num_results,
|
||||
size_t num_processes);
|
||||
|
||||
#endif // CTC_BEAM_SEARCH_DECODER_H_
|
||||
|
@ -12,4 +12,12 @@ struct Output {
|
||||
std::vector<unsigned int> timesteps;
|
||||
};
|
||||
|
||||
struct FlashlightOutput {
|
||||
double aggregate_score;
|
||||
double acoustic_model_score;
|
||||
double language_model_score;
|
||||
std::vector<std::string> words;
|
||||
std::vector<int> tokens;
|
||||
};
|
||||
|
||||
#endif // OUTPUT_H_
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifdef _MSC_VER
|
||||
#include <stdlib.h>
|
||||
#include <io.h>
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
|
||||
#define R_OK 4 /* Read permission. */
|
||||
@ -17,16 +18,27 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include "lm/config.hh"
|
||||
#include "lm/model.hh"
|
||||
#include "lm/state.hh"
|
||||
#include "util/string_piece.hh"
|
||||
#include "kenlm/lm/config.hh"
|
||||
#include "kenlm/lm/model.hh"
|
||||
#include "kenlm/lm/state.hh"
|
||||
#include "kenlm/lm/word_index.hh"
|
||||
#include "kenlm/util/string_piece.hh"
|
||||
|
||||
#include "decoder_utils.h"
|
||||
|
||||
using namespace fl::lib::text;
|
||||
|
||||
static const int32_t MAGIC = 'TRIE';
|
||||
static const int32_t FILE_VERSION = 6;
|
||||
|
||||
Scorer::Scorer()
|
||||
{
|
||||
}
|
||||
|
||||
Scorer::~Scorer()
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
Scorer::init(const std::string& lm_path,
|
||||
const Alphabet& alphabet)
|
||||
@ -347,3 +359,54 @@ void Scorer::fill_dictionary(const std::unordered_set<std::string>& vocabulary)
|
||||
std::unique_ptr<FstType> converted(new FstType(*new_dict));
|
||||
this->dictionary = std::move(converted);
|
||||
}
|
||||
|
||||
LMStatePtr
|
||||
Scorer::start(bool startWithNothing)
|
||||
{
|
||||
auto outState = std::make_shared<KenLMState>();
|
||||
if (startWithNothing) {
|
||||
language_model_->NullContextWrite(outState->ken());
|
||||
} else {
|
||||
language_model_->BeginSentenceWrite(outState->ken());
|
||||
}
|
||||
|
||||
return outState;
|
||||
}
|
||||
|
||||
std::pair<LMStatePtr, float>
|
||||
Scorer::score(const LMStatePtr& state,
|
||||
const int usrTokenIdx)
|
||||
{
|
||||
if (usrTokenIdx < 0 || usrTokenIdx >= usrToLmIdxMap_.size()) {
|
||||
throw std::runtime_error(
|
||||
"[Scorer] Invalid user token index: " + std::to_string(usrTokenIdx));
|
||||
}
|
||||
auto inState = std::static_pointer_cast<KenLMState>(state);
|
||||
auto outState = inState->child<KenLMState>(usrTokenIdx);
|
||||
float score = language_model_->BaseScore(
|
||||
inState->ken(), usrToLmIdxMap_[usrTokenIdx], outState->ken());
|
||||
return std::make_pair(std::move(outState), score);
|
||||
}
|
||||
|
||||
std::pair<LMStatePtr, float>
|
||||
Scorer::finish(const LMStatePtr& state)
|
||||
{
|
||||
auto inState = std::static_pointer_cast<KenLMState>(state);
|
||||
auto outState = inState->child<KenLMState>(-1);
|
||||
float score = language_model_->BaseScore(
|
||||
inState->ken(),
|
||||
language_model_->BaseVocabulary().EndSentence(),
|
||||
outState->ken()
|
||||
);
|
||||
return std::make_pair(std::move(outState), score);
|
||||
}
|
||||
|
||||
void
|
||||
Scorer::load_words(const Dictionary& word_dict)
|
||||
{
|
||||
const auto& vocab = language_model_->BaseVocabulary();
|
||||
usrToLmIdxMap_.resize(word_dict.indexSize());
|
||||
for (int i = 0; i < word_dict.indexSize(); ++i) {
|
||||
usrToLmIdxMap_[i] = vocab.Index(word_dict.getEntry(i));
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user