diff --git a/bin/benchmark_nc.py b/bin/benchmark_nc.py index eeac928a..6a98dcaa 100755 --- a/bin/benchmark_nc.py +++ b/bin/benchmark_nc.py @@ -9,7 +9,7 @@ import sys # To use util.tc sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(sys.argv[0])))) import util.taskcluster as tcu -from util.benchmark import keep_only_digits +from util.helpers import keep_only_digits import paramiko import argparse @@ -171,8 +171,8 @@ def all_files(models=[]): assert len(fa) == len(fb) assert len(fa) == 1 - fa = keep_only_digits(fa[0]) - fb = keep_only_digits(fb[0]) + fa = int(keep_only_digits(fa[0])) + fb = int(keep_only_digits(fb[0])) if fa < fb: return -1 diff --git a/bin/benchmark_plotter.py b/bin/benchmark_plotter.py index d2221dd8..3952f11c 100755 --- a/bin/benchmark_plotter.py +++ b/bin/benchmark_plotter.py @@ -8,8 +8,7 @@ import sys # To use util.tc sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(sys.argv[0])))) -import util.taskcluster as tcu -from util.benchmark import keep_only_digits +from util.helpers import keep_only_digits import argparse import numpy @@ -35,7 +34,7 @@ def reduce_filename(f): ''' f = os.path.basename(f).split('.') - return keep_only_digits(f[-3]) + return int(keep_only_digits(f[-3])) def ingest_csv(datasets=None, range=None): existing_files = filter(lambda x: os.path.isfile(x[1]), datasets) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index c96cd445..acea122b 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -27,7 +27,7 @@ from multiprocessing.dummy import Pool from multiprocessing import cpu_count from util.downloader import SIMPLE_BAR from util.text import Alphabet, validate_label -from util.feeding import secs_to_hours +from util.helpers import secs_to_hours FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index 8706fde6..ae893350 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -28,7 +28,7 @@ from glob import glob from util.downloader import maybe_download from util.text import Alphabet, validate_label -from util.feeding import secs_to_hours +from util.helpers import secs_to_hours FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] SAMPLE_RATE = 16000 diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py index ac2dfda0..060e8f2a 100755 --- a/bin/import_m-ailabs.py +++ b/bin/import_m-ailabs.py @@ -26,7 +26,7 @@ from glob import glob from util.downloader import maybe_download from util.text import Alphabet, validate_label -from util.feeding import secs_to_hours +from util.helpers import secs_to_hours FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] SAMPLE_RATE = 16000 diff --git a/bin/import_slr57.py b/bin/import_slr57.py index 67ea54d0..5dde767a 100755 --- a/bin/import_slr57.py +++ b/bin/import_slr57.py @@ -29,7 +29,7 @@ from glob import glob from util.downloader import maybe_download from util.text import Alphabet, validate_label -from util.feeding import secs_to_hours +from util.helpers import secs_to_hours FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] SAMPLE_RATE = 16000 diff --git a/bin/import_ts.py b/bin/import_ts.py index 6ff10426..a9d9b949 100755 --- a/bin/import_ts.py +++ b/bin/import_ts.py @@ -27,7 +27,7 @@ from os import path from util.downloader import maybe_download from util.text import validate_label -from util.feeding import secs_to_hours +from util.helpers import secs_to_hours FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] SAMPLE_RATE = 16000 diff --git a/stats.py b/stats.py index 466b78e5..37ae8581 100644 --- a/stats.py +++ b/stats.py @@ -3,7 +3,8 @@ import argparse import os -from util.feeding import read_csvs, secs_to_hours +from util.helpers import secs_to_hours +from util.feeding import read_csvs def main(): parser = argparse.ArgumentParser() diff --git a/util/benchmark.py b/util/benchmark.py deleted file mode 100644 index 7dca193d..00000000 --- a/util/benchmark.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import absolute_import, division, print_function - -def keep_only_digits(s): - r''' - local helper to just keep digits - ''' - fs = '' - for c in s: - if c.isdigit(): - fs += c - - return int(fs) diff --git a/util/helpers.py b/util/helpers.py new file mode 100644 index 00000000..6c792409 --- /dev/null +++ b/util/helpers.py @@ -0,0 +1,9 @@ + +def keep_only_digits(txt): + return ''.join(filter(lambda c: c.isdigit(), txt)) + + +def secs_to_hours(secs): + hours, remainder = divmod(secs, 3600) + minutes, seconds = divmod(remainder, 60) + return '%d:%02d:%02d' % (hours, minutes, seconds)