Introducing utils.helpers for miscellaneous helper functions

2020-01-14 16:02:40 +01:00 · 2020-01-14 16:02:40 +01:00 · ad9f0c581b
commit ad9f0c581b
parent 7b3bc31171
10 changed files with 21 additions and 27 deletions
--- a/bin/benchmark_nc.py
+++ b/bin/benchmark_nc.py
@ -9,7 +9,7 @@ import sys
 # To use util.tc
 sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(sys.argv[0]))))
 import util.taskcluster as tcu
-from util.benchmark import keep_only_digits
+from util.helpers import keep_only_digits
 import paramiko
 import argparse
@ -171,8 +171,8 @@ def all_files(models=[]):
        assert len(fa) == len(fb)
        assert len(fa) == 1
-        fa = keep_only_digits(fa[0])
+        fa = int(keep_only_digits(fa[0]))
-        fb = keep_only_digits(fb[0])
+        fb = int(keep_only_digits(fb[0]))
        if fa < fb:
            return -1
--- a/bin/benchmark_plotter.py
+++ b/bin/benchmark_plotter.py
@ -8,8 +8,7 @@ import sys
 # To use util.tc
 sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(sys.argv[0]))))
-import util.taskcluster as tcu
+from util.helpers import keep_only_digits
 from util.benchmark import keep_only_digits
 import argparse
 import numpy
@ -35,7 +34,7 @@ def reduce_filename(f):
    '''
    f = os.path.basename(f).split('.')
-    return keep_only_digits(f[-3])
+    return int(keep_only_digits(f[-3]))
 def ingest_csv(datasets=None, range=None):
    existing_files = filter(lambda x: os.path.isfile(x[1]), datasets)
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@ -27,7 +27,7 @@ from multiprocessing.dummy import Pool
 from multiprocessing import cpu_count
 from util.downloader import SIMPLE_BAR
 from util.text import Alphabet, validate_label
-from util.feeding import secs_to_hours
+from util.helpers import secs_to_hours
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_lingua_libre.py
+++ b/bin/import_lingua_libre.py
@ -28,7 +28,7 @@ from glob import glob
 from util.downloader import maybe_download
 from util.text import Alphabet, validate_label
-from util.feeding import secs_to_hours
+from util.helpers import secs_to_hours
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_m-ailabs.py
+++ b/bin/import_m-ailabs.py
@ -26,7 +26,7 @@ from glob import glob
 from util.downloader import maybe_download
 from util.text import Alphabet, validate_label
-from util.feeding import secs_to_hours
+from util.helpers import secs_to_hours
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_slr57.py
+++ b/bin/import_slr57.py
@ -29,7 +29,7 @@ from glob import glob
 from util.downloader import maybe_download
 from util.text import Alphabet, validate_label
-from util.feeding import secs_to_hours
+from util.helpers import secs_to_hours
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_ts.py
+++ b/bin/import_ts.py
@ -27,7 +27,7 @@ from os import path
 from util.downloader import maybe_download
 from util.text import validate_label
-from util.feeding import secs_to_hours
+from util.helpers import secs_to_hours
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/stats.py
+++ b/stats.py
@ -3,7 +3,8 @@
 import argparse
 import os
-from util.feeding import read_csvs, secs_to_hours
+from util.helpers import secs_to_hours
 from util.feeding import read_csvs
 def main():
    parser = argparse.ArgumentParser()
--- a/util/benchmark.py
+++ b/util/benchmark.py
@ -1,15 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function
 def keep_only_digits(s):
    r'''
    local helper to just keep digits
    '''
    fs = ''
    for c in s:
        if c.isdigit():
            fs += c
    return int(fs)
--- a/util/helpers.py
+++ b/util/helpers.py
@ -0,0 +1,9 @@
 def keep_only_digits(txt):
    return ''.join(filter(lambda c: c.isdigit(), txt))
 def secs_to_hours(secs):
    hours, remainder = divmod(secs, 3600)
    minutes, seconds = divmod(remainder, 60)
    return '%d:%02d:%02d' % (hours, minutes, seconds)