* Redo remote I/O changes once more; this time without messing with taskcluster * Add bin changes * Fix merge-induced issue? * For the interleaved case with multiple collections, unpack audio on the fly To reproduce the previous failure rm data/smoke_test/ldc93s1.csv rm data/smoke_test/ldc93s1.sdb rm -rf /tmp/ldc93s1_cache_sdb_csv rm -rf /tmp/ckpt_sdb_csv rm -rf /tmp/train_sdb_csv ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 16000 python -u DeepSpeech.py --noshow_progressbar --noearly_stop --train_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --train_batch_size 1 --feature_cache /tmp/ldc93s1_cache_sdb_csv --dev_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --dev_batch_size 1 --test_files ./data/smoke_test/ldc93s1.sdb,./data/smoke_test/ldc93s1.csv --test_batch_size 1 --n_hidden 100 --epochs 109 --max_to_keep 1 --checkpoint_dir /tmp/ckpt_sdb_csv --learning_rate 0.001 --dropout_rate 0.05 --export_dir /tmp/train_sdb_csv --scorer_path data/smoke_test/pruned_lm.scorer --audio_sample_rate 16000 * Attempt to preserve length information with a wrapper around `map()`… this gets pretty python-y * Call the right `__next__()` * Properly implement the rest of the map wrappers here…… * Fix trailing whitespace situation and other linter complaints * Remove data accidentally checked in * Fix overlay augmentations * Wavs must be open in rb mode if we're passing in an external file pointer -- this confused me * Lint whitespace * Revert "Fix trailing whitespace situation and other linter complaints" This reverts commit c3c45397a2f98e9b00d00c18c4ced4fc52475032. * Fix linter issue but without such an aggressive diff * Move unpack_maybe into sample_collections * Use unpack_maybe in place of duplicate lambda * Fix confusing comment * Add clarifying comment for on-the-fly unpacking
67 lines
2.5 KiB
Python
Executable File
67 lines
2.5 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
Tool for comparing two wav samples
|
|
"""
|
|
import sys
|
|
import argparse
|
|
|
|
from deepspeech_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
|
|
from deepspeech_training.util.sample_collections import load_sample
|
|
|
|
|
|
def fail(message):
|
|
print(message, file=sys.stderr, flush=True)
|
|
sys.exit(1)
|
|
|
|
|
|
def compare_samples():
|
|
sample1 = load_sample(CLI_ARGS.sample1).unpack()
|
|
sample2 = load_sample(CLI_ARGS.sample2).unpack()
|
|
if sample1.audio_format != sample2.audio_format:
|
|
fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
|
|
if sample1.duration != sample2.duration:
|
|
fail('Samples differ on: duration ({} and {})'.format(sample1.duration, sample2.duration))
|
|
sample1.change_audio_type(AUDIO_TYPE_NP)
|
|
sample2.change_audio_type(AUDIO_TYPE_NP)
|
|
audio_diff = sample1.audio - sample2.audio
|
|
diff_dbfs = mean_dbfs(audio_diff)
|
|
differ_msg = 'Samples differ on: sample data ({:0.2f} dB difference) '.format(diff_dbfs)
|
|
equal_msg = 'Samples are considered equal ({:0.2f} dB difference)'.format(diff_dbfs)
|
|
if CLI_ARGS.if_differ:
|
|
if diff_dbfs <= CLI_ARGS.threshold:
|
|
fail(equal_msg)
|
|
if not CLI_ARGS.no_success_output:
|
|
print(differ_msg, file=sys.stderr, flush=True)
|
|
else:
|
|
if diff_dbfs > CLI_ARGS.threshold:
|
|
fail(differ_msg)
|
|
if not CLI_ARGS.no_success_output:
|
|
print(equal_msg, file=sys.stderr, flush=True)
|
|
|
|
|
|
def handle_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Tool for checking similarity of two samples"
|
|
)
|
|
parser.add_argument("sample1", help="Filename of sample 1 to compare")
|
|
parser.add_argument("sample2", help="Filename of sample 2 to compare")
|
|
parser.add_argument("--threshold", type=float, default=-60.0,
|
|
help="dB of sample deltas above which they are considered different")
|
|
parser.add_argument(
|
|
"--if-differ",
|
|
action="store_true",
|
|
help="If to succeed and return status code 0 on different signals and fail on equal ones (inverse check)."
|
|
"This will still fail on different formats or durations.",
|
|
)
|
|
parser.add_argument(
|
|
"--no-success-output",
|
|
action="store_true",
|
|
help="Stay silent on success (if samples are equal of - with --if-differ - samples are not equal)",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
CLI_ARGS = handle_args()
|
|
compare_samples()
|