Fix #3355: Add valgrind runs

This commit is contained in:
Alexandre Lissy 2020-09-29 15:52:59 +02:00
parent 86bba80b0e
commit fdd663829a
22 changed files with 11651 additions and 1 deletions

7
ds_generic.supp Normal file
View File

@ -0,0 +1,7 @@
{
libgomp_malloc
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
obj:/usr/lib/*/libgomp.so.1.0.0
}

10
ds_lib.supp Normal file
View File

@ -0,0 +1,10 @@
{
deepspeech_tflite_error_reporter
Memcheck:Leak
match-leak-kinds: reachable
fun:_Znwm
fun:_ZN6tflite20DefaultErrorReporterEv
fun:_ZN16TFLiteModelState4initEPKc
fun:DS_CreateModel
fun:main
}

1356
ds_openfst.supp Normal file

File diff suppressed because it is too large Load Diff

42
ds_sox.supp Normal file
View File

@ -0,0 +1,42 @@
{
sox_effect_gain
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
fun:realloc
fun:lsx_realloc
fun:lsx_usage_lines
fun:lsx_gain_effect_fn
fun:sox_find_effect
fun:_Z14GetAudioBufferPKci
fun:_Z11ProcessFileP10ModelStatePKcb
fun:main
}
{
sox_effect_rate
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
fun:realloc
fun:lsx_realloc
fun:lsx_usage_lines
fun:lsx_rate_effect_fn
fun:sox_find_effect
fun:_Z14GetAudioBufferPKci
fun:_Z11ProcessFileP10ModelStatePKcb
fun:main
}
{
sox_effect_flanger
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
fun:realloc
fun:lsx_realloc
fun:lsx_usage_lines
fun:lsx_flanger_effect_fn
fun:sox_find_effect
fun:_Z14GetAudioBufferPKci
fun:_Z11ProcessFileP10ModelStatePKcb
fun:main
}

View File

@ -38,6 +38,8 @@ int json_candidate_transcripts = 3;
int stream_size = 0;
int extended_stream_size = 0;
char* hot_words = NULL;
void PrintHelp(const char* bin)
@ -58,6 +60,7 @@ void PrintHelp(const char* bin)
"\t--json\t\t\t\tExtended output, shows word timings as JSON\n"
"\t--candidate_transcripts NUMBER\tNumber of candidate transcripts to include in JSON output\n"
"\t--stream size\t\t\tRun in stream mode, output intermediate results\n"
"\t--extended_stream size\t\t\tRun in stream mode using metadata output, output intermediate results\n"
"\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
"\t--help\t\t\t\tShow help\n"
"\t--version\t\t\tPrint version and exits\n";
@ -82,6 +85,7 @@ bool ProcessArgs(int argc, char** argv)
{"json", no_argument, nullptr, 'j'},
{"candidate_transcripts", required_argument, nullptr, 150},
{"stream", required_argument, nullptr, 's'},
{"extended_stream", required_argument, nullptr, 'S'},
{"hot_words", required_argument, nullptr, 'w'},
{"version", no_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
@ -144,6 +148,10 @@ bool ProcessArgs(int argc, char** argv)
stream_size = atoi(optarg);
break;
case 'S':
extended_stream_size = atoi(optarg);
break;
case 'v':
has_versions = true;
break;
@ -172,7 +180,7 @@ bool ProcessArgs(int argc, char** argv)
return false;
}
if (stream_size < 0 || stream_size % 160 != 0) {
if ((stream_size < 0 || stream_size % 160 != 0) || (extended_stream_size < 0 || extended_stream_size % 160 != 0)) {
std::cout <<
"Stream buffer size must be multiples of 160\n";
return false;

View File

@ -205,6 +205,38 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
DS_FreeString((char *) last);
}
res.string = DS_FinishStream(ctx);
} else if (extended_stream_size > 0) {
StreamingState* ctx;
int status = DS_CreateStream(aCtx, &ctx);
if (status != DS_ERR_OK) {
res.string = strdup("");
return res;
}
size_t off = 0;
const char *last = nullptr;
const char *prev = nullptr;
while (off < aBufferSize) {
size_t cur = aBufferSize - off > extended_stream_size ? extended_stream_size : aBufferSize - off;
DS_FeedAudioContent(ctx, aBuffer + off, cur);
off += cur;
prev = last;
const Metadata* result = DS_IntermediateDecodeWithMetadata(ctx, 1);
const char* partial = CandidateTranscriptToString(&result->transcripts[0]);
if (last == nullptr || strcmp(last, partial)) {
printf("%s\n", partial);
last = partial;
} else {
free((char *) partial);
}
if (prev != nullptr && prev != last) {
free((char *) prev);
}
DS_FreeMetadata((Metadata *)result);
}
const Metadata* result = DS_FinishStreamWithMetadata(ctx, 1);
res.string = CandidateTranscriptToString(&result->transcripts[0]);
DS_FreeMetadata((Metadata *)result);
free((char *) last);
} else {
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize);
}

57
parse_valgrind_suppressions.sh Executable file
View File

@ -0,0 +1,57 @@
#! /usr/bin/awk -f
# A script to extract the actual suppression info from the output of (for example) valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all ./minimal
# The desired bits are between ^{ and ^} (including the braces themselves).
# The combined output should either be appended to /usr/lib/valgrind/default.supp, or placed in a .supp of its own
# If the latter, either tell valgrind about it each time with --suppressions=<filename>, or add that line to ~/.valgrindrc
# NB This script uses the |& operator, which I believe is gawk-specific. In case of failure, check that you're using gawk rather than some other awk
# The script looks for suppressions. When it finds one it stores it temporarily in an array,
# and also feeds it line by line to the external app 'md5sum' which generates a unique checksum for it.
# The checksum is used as an index in a different array. If an item with that index already exists the suppression must be a duplicate and is discarded.
BEGIN { suppression=0; md5sum = "md5sum" }
# If the line begins with '{', it's the start of a supression; so set the var and initialise things
/^{/ {
suppression=1; i=0; next
}
# If the line begins with '}' its the end of a suppression
/^}/ {
if (suppression)
{ suppression=0;
close(md5sum, "to") # We've finished sending data to md5sum, so close that part of the pipe
ProcessInput() # Do the slightly-complicated stuff in functions
delete supparray # We don't want subsequent suppressions to append to it!
}
}
# Otherwise, it's a normal line. If we're inside a supression, store it, and pipe it to md5sum. Otherwise it's cruft, so ignore it
{ if (suppression)
{
supparray[++i] = $0
print |& md5sum
}
}
function ProcessInput()
{
# Pipe the result from md5sum, then close it
md5sum |& getline result
close(md5sum)
# gawk can't cope with enormous ints like $result would be, so stringify it first by prefixing a definite string
resultstring = "prefix"result
if (! (resultstring in chksum_array) )
{ chksum_array[resultstring] = 0; # This checksum hasn't been seen before, so add it to the array
OutputSuppression() # and output the contents of the suppression
}
}
function OutputSuppression()
{
# A suppression is surrounded by '{' and '}'. Its data was stored line by line in the array
print "{"
for (n=1; n <= i; ++n)
{ print supparray[n] }
print "}"
}

View File

@ -28,6 +28,9 @@ tensorflow:
packages_win:
pacman: 'pacman --noconfirm -S patch unzip tar'
msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64'
valgrind:
packages_bionic:
apt: 'apt-get -qq update && apt-get -qq -y install python3 python3-simplejson python-is-python3 valgrind'
java:
packages_xenial:
apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f'

View File

@ -22,6 +22,9 @@ source ${tc_tests_utils}/tc-node-utils.sh
# Scoping of .Net-related tooling
source ${tc_tests_utils}/tc-dotnet-utils.sh
# For checking with valgrind
source ${tc_tests_utils}/tc-valgrind-utils.sh
# Functions that controls directly the build process
source ${tc_tests_utils}/tc-build-utils.sh

25
taskcluster/tc-valgrind-cpp.sh Executable file
View File

@ -0,0 +1,25 @@
#!/bin/bash
set -xe
kind=$1
source $(dirname "$0")/tc-tests-utils.sh
set_ldc_sample_filename "16k"
download_material "${TASKCLUSTER_TMP_DIR}/ds"
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
if [ "${kind}" = "--basic" ]; then
run_valgrind_basic
run_valgrind_stream
fi
if [ "${kind}" = "--metadata" ]; then
run_valgrind_extended
run_valgrind_extended_stream
fi

View File

@ -0,0 +1,29 @@
#!/bin/bash
set -xe
kind=$1
source $(dirname "$0")/tc-tests-utils.sh
set_ldc_sample_filename "16k"
model_source=${DEEPSPEECH_TEST_MODEL//.pb/.tflite}
model_name=$(basename "${model_source}")
model_name_mmap=$(basename "${model_source}")
download_material "${TASKCLUSTER_TMP_DIR}/ds"
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
if [ "${kind}" = "--basic" ]; then
run_valgrind_basic
run_valgrind_stream
fi
if [ "${kind}" = "--metadata" ]; then
run_valgrind_extended
run_valgrind_extended_stream
fi

View File

@ -0,0 +1,68 @@
#!/bin/bash
set -xe
# How to generate / update valgrind suppression lists:
# https://wiki.wxwidgets.org/Valgrind_Suppression_File_Howto#How_to_make_a_suppression_file
#
# $ valgrind --leak-check=full --show-reachable=yes --error-limit=no --gen-suppressions=all --log-file=minimalraw.log ./minimal
# $ cat ./minimalraw.log | ./parse_valgrind_suppressions.sh > minimal.supp
VALGRIND_CMD=${VALGRIND_CMD:-"valgrind \
--error-exitcode=4242 \
--errors-for-leak-kinds=all \
--leak-check=full \
--leak-resolution=high \
--show-reachable=yes \
--track-origins=yes \
--gen-suppressions=all \
--suppressions=${DS_DSDIR}/ds_generic.supp \
--suppressions=${DS_DSDIR}/ds_lib.supp \
--suppressions=${DS_DSDIR}/ds_sox.supp \
--suppressions=${DS_DSDIR}/ds_openfst.supp \
--suppressions=${DS_DSDIR}/tensorflow_full_runtime.supp \
--suppressions=${DS_DSDIR}/tensorflow_tflite_runtime.supp \
"}
run_valgrind_basic()
{
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_basic.log \
deepspeech \
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
-t
}
run_valgrind_stream()
{
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream.log \
deepspeech \
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
--stream 320 \
-t
}
run_valgrind_extended()
{
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_extended.log \
deepspeech \
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
--extended \
-t
}
run_valgrind_extended_stream()
{
${VALGRIND_CMD} --log-file=${TASKCLUSTER_ARTIFACTS}/valgrind_stream_extended.log \
deepspeech \
--model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \
--scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer \
--audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} \
--extended_stream 320 \
-t
}

0
taskcluster/test-cpp_16k_tflite-linux-amd64-opt.yml Executable file → Normal file
View File

0
taskcluster/test-cpp_8k_tflite-linux-amd64-opt.yml Executable file → Normal file
View File

View File

@ -0,0 +1,16 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-tflite-dbg"
- "test-training_16k-linux-amd64-py36m-opt"
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
docker_image: "ubuntu:20.04"
system_setup:
>
${valgrind.packages_bionic.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --basic"
workerType: "${docker.dsHighMemTests}"
metadata:
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite basic tests"
description: "Testing basic DeepSpeech valgrind C++ TFLite for Linux/AMD64"

View File

@ -0,0 +1,16 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-cpu-dbg"
- "test-training_16k-linux-amd64-py36m-opt"
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
docker_image: "ubuntu:20.04"
system_setup:
>
${valgrind.packages_bionic.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp.sh --basic"
workerType: "${docker.dsHighMemTests}"
metadata:
name: "DeepSpeech Linux AMD64 valgrind C++ basic tests"
description: "Testing basic DeepSpeech valgrind C++ for Linux/AMD64"

View File

@ -0,0 +1,16 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-tflite-dbg"
- "test-training_16k-linux-amd64-py36m-opt"
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
docker_image: "ubuntu:20.04"
system_setup:
>
${valgrind.packages_bionic.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp_tflite.sh --metadata"
workerType: "${docker.dsHighMemTests}"
metadata:
name: "DeepSpeech Linux AMD64 valgrind C++ TFLite metadata tests"
description: "Testing metadata DeepSpeech valgrind C++ TFLite for Linux/AMD64"

View File

@ -0,0 +1,16 @@
build:
template_file: test-linux-opt-base.tyml
dependencies:
- "linux-amd64-cpu-dbg"
- "test-training_16k-linux-amd64-py36m-opt"
test_model_task: "test-training_16k-linux-amd64-py36m-opt"
docker_image: "ubuntu:20.04"
system_setup:
>
${valgrind.packages_bionic.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-valgrind-cpp.sh --metadata"
workerType: "${docker.dsHighMemTests}"
metadata:
name: "DeepSpeech Linux AMD64 valgrind C++ metadata tests"
description: "Testing metadata DeepSpeech valgrind C++ for Linux/AMD64"

0
taskcluster/test-cpp_tflite-linux-amd64-prod-opt.yml Executable file → Normal file
View File

View File

@ -34,6 +34,7 @@ then:
DECODER_ARTIFACTS_ROOT: https://community-tc.services.mozilla.com/api/queue/v1/task/${linux_amd64_ctc}/artifacts/public
PIP_DEFAULT_TIMEOUT: "60"
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
DEBIAN_FRONTEND: "noninteractive"
command:
- "/bin/bash"

9945
tensorflow_full_runtime.supp Normal file

File diff suppressed because it is too large Load Diff

View File