From 44e502e236d676dfcdb3068f6a6d9d1a9d644dd1 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 10 Sep 2018 22:41:14 -0300 Subject: [PATCH 1/5] Feature caching --- DeepSpeech.py | 34 ++++++++++++++-- evaluate.py | 83 +-------------------------------------- requirements.txt | 1 + util/audio.py | 62 ++++++----------------------- util/feeding.py | 70 +++++++++++++++++---------------- util/preprocess.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 179 insertions(+), 169 deletions(-) create mode 100644 util/preprocess.py diff --git a/DeepSpeech.py b/DeepSpeech.py index 44a30bf5..e128ac42 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -25,6 +25,7 @@ from tensorflow.python.tools import freeze_graph from threading import Thread, Lock from util.audio import audiofile_to_input_vector from util.feeding import DataSet, ModelFeeder +from util.preprocess import preprocess from util.gpu import get_available_gpus from util.shared_lib import check_cupti from util.text import sparse_tensor_value_to_texts, wer, levenshtein, Alphabet, ndarray_to_text @@ -40,6 +41,10 @@ def create_flags(): tf.app.flags.DEFINE_string ('test_files', '', 'comma separated list of files specifying the dataset used for testing. multiple files will get merged') tf.app.flags.DEFINE_boolean ('fulltrace', False, 'if full trace debug info should be generated during training') + tf.app.flags.DEFINE_string ('train_cached_features_path', '', 'comma separated list of files specifying the dataset used for training. multiple files will get merged') + tf.app.flags.DEFINE_string ('dev_cached_features_path', '', 'comma separated list of files specifying the dataset used for validation. multiple files will get merged') + tf.app.flags.DEFINE_string ('test_cached_features_path', '', 'comma separated list of files specifying the dataset used for testing. multiple files will get merged') + # Cluster configuration # ===================== @@ -402,7 +407,7 @@ def BiRNN(batch_x, seq_length, dropout, reuse=False, batch_size=None, n_steps=-1 # This is done to prepare the batch for input into the first layer which expects a tensor of rank `2`. # Permute n_steps and batch_size - batch_x = tf.transpose(batch_x, [1, 0, 2]) + batch_x = tf.transpose(batch_x, [1, 0, 2, 3]) # Reshape to prepare input for first layer batch_x = tf.reshape(batch_x, [-1, n_input + 2*n_input*n_context]) # (n_steps*batch_size, n_input + 2*n_input*n_context) layers['input_reshaped'] = batch_x @@ -1459,19 +1464,40 @@ def train(server=None): global_step = tf.Variable(0, trainable=False, name='global_step') # Reading training set - train_set = DataSet(FLAGS.train_files.split(','), + train_data = preprocess(FLAGS.train_files.split(','), + FLAGS.train_batch_size, + n_input, + n_context, + alphabet, + hdf5_cache_path=FLAGS.train_cached_features_path) + + train_set = DataSet(train_data, FLAGS.train_batch_size, limit=FLAGS.limit_train, next_index=lambda i: COORD.get_next_index('train')) # Reading validation set - dev_set = DataSet(FLAGS.dev_files.split(','), + dev_data = preprocess(FLAGS.dev_files.split(','), + FLAGS.dev_batch_size, + n_input, + n_context, + alphabet, + hdf5_cache_path=FLAGS.dev_cached_features_path) + + dev_set = DataSet(dev_data, FLAGS.dev_batch_size, limit=FLAGS.limit_dev, next_index=lambda i: COORD.get_next_index('dev')) # Reading test set - test_set = DataSet(FLAGS.test_files.split(','), + test_data = preprocess(FLAGS.test_files.split(','), + FLAGS.test_batch_size, + n_input, + n_context, + alphabet, + hdf5_cache_path=FLAGS.test_cached_features_path) + + test_set = DataSet(test_data, FLAGS.test_batch_size, limit=FLAGS.limit_test, next_index=lambda i: COORD.get_next_index('test')) diff --git a/evaluate.py b/evaluate.py index 5725b23c..d4842d62 100755 --- a/evaluate.py +++ b/evaluate.py @@ -19,6 +19,7 @@ from multiprocessing import Pool from six.moves import zip, range from util.audio import audiofile_to_input_vector from util.text import sparse_tensor_value_to_texts, text_to_char_array, Alphabet, ctc_label_dense_to_sparse, wer, levenshtein +from util.preprocess import pmap, preprocess FLAGS = tf.app.flags.FLAGS @@ -28,88 +29,6 @@ N_FEATURES = 26 N_CONTEXT = 9 -def pmap(fun, iterable, threads=8): - pool = Pool(threads) - results = pool.map(fun, iterable) - pool.close() - return results - - -def process_single_file(row): - # row = index, Series - _, file = row - features = audiofile_to_input_vector(file.wav_filename, N_FEATURES, N_CONTEXT) - transcript = text_to_char_array(file.transcript, alphabet) - - return features, len(features), transcript, len(transcript) - - -# load samples from CSV, compute features, optionally cache results on disk -def preprocess(dataset_files, batch_size, hdf5_dest_path=None): - COLUMNS = ('features', 'features_len', 'transcript', 'transcript_len') - - if hdf5_dest_path and os.path.exists(hdf5_dest_path): - with tables.open_file(hdf5_dest_path, 'r') as file: - features = file.root.features[:] - features_len = file.root.features_len[:] - transcript = file.root.transcript[:] - transcript_len = file.root.transcript_len[:] - - # features are stored flattened, so reshape into - # [n_steps, (n_input + 2*n_context*n_input)] - for i in range(len(features)): - features[i] = np.reshape(features[i], [features_len[i], -1]) - - in_data = list(zip(features, features_len, - transcript, transcript_len)) - return pandas.DataFrame(data=in_data, columns=COLUMNS) - - csv_files = dataset_files.split(',') - source_data = None - for csv in csv_files: - file = pandas.read_csv(csv, encoding='utf-8', na_filter=False) - if source_data is None: - source_data = file - else: - source_data = source_data.append(file) - - # discard last samples if dataset does not divide batch size evenly - if len(source_data) % batch_size != 0: - source_data = source_data[:-(len(source_data) % batch_size)] - - out_data = pmap(process_single_file, source_data.iterrows()) - - if hdf5_dest_path: - # list of tuples -> tuple of lists - features, features_len, transcript, transcript_len = zip(*out_data) - - with tables.open_file(hdf5_dest_path, 'w') as file: - features_dset = file.create_vlarray(file.root, - 'features', - tables.Float32Atom(), - filters=tables.Filters(complevel=1)) - # VLArray atoms need to be 1D, so flatten feature array - for f in features: - features_dset.append(np.reshape(f, -1)) - - features_len_dset = file.create_array(file.root, - 'features_len', - features_len) - - transcript_dset = file.create_vlarray(file.root, - 'transcript', - tables.Int32Atom(), - filters=tables.Filters(complevel=1)) - for t in transcript: - transcript_dset.append(t) - - transcript_len_dset = file.create_array(file.root, - 'transcript_len', - transcript_len) - - return pandas.DataFrame(data=out_data, columns=COLUMNS) - - def split_data(dataset, batch_size): remainder = len(dataset) % batch_size if remainder != 0: diff --git a/requirements.txt b/requirements.txt index c33550a9..5c423db4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ pyxdg bs4 six requests +tables diff --git a/util/audio.py b/util/audio.py index f6f7cc20..2b68e108 100644 --- a/util/audio.py +++ b/util/audio.py @@ -1,53 +1,7 @@ -from __future__ import absolute_import, print_function - +import numpy as np import scipy.io.wavfile as wav -import sys -import warnings -class DeepSpeechDeprecationWarning(DeprecationWarning): - pass - -warnings.simplefilter('once', category=DeepSpeechDeprecationWarning) - -try: - from deepspeech import audioToInputVector -except ImportError: - warnings.warn('DeepSpeech Python bindings could not be imported, resorting to slower code to compute audio features. ' - 'Refer to README.md for instructions on how to install (or build) the DeepSpeech Python bindings.', - category=DeepSpeechDeprecationWarning) - - import numpy as np - from python_speech_features import mfcc - from six.moves import range - - def audioToInputVector(audio, fs, numcep, numcontext): - # Get mfcc coefficients - features = mfcc(audio, samplerate=fs, numcep=numcep) - - # We only keep every second feature (BiRNN stride = 2) - features = features[::2] - - # One stride per time step in the input - num_strides = len(features) - - # Add empty initial and final contexts - empty_context = np.zeros((numcontext, numcep), dtype=features.dtype) - features = np.concatenate((empty_context, features, empty_context)) - - # Create a view into the array with overlapping strides of size - # numcontext (past) + 1 (present) + numcontext (future) - window_size = 2*numcontext+1 - train_inputs = np.lib.stride_tricks.as_strided( - features, - (num_strides, window_size, numcep), - (features.strides[0], features.strides[0], features.strides[1]), - writeable=False) - - # Flatten the second and third dimensions - train_inputs = np.reshape(train_inputs, [num_strides, -1]) - - # Return results - return train_inputs +from python_speech_features import mfcc def audiofile_to_input_vector(audio_filename, numcep, numcontext): @@ -60,4 +14,14 @@ def audiofile_to_input_vector(audio_filename, numcep, numcontext): # Load wav files fs, audio = wav.read(audio_filename) - return audioToInputVector(audio, fs, numcep, numcontext) + # Get mfcc coefficients + features = mfcc(audio, samplerate=fs, numcep=numcep) + + # We only keep every second feature (BiRNN stride = 2) + features = features[::2] + + # Add empty initial and final contexts + empty_context = np.zeros((numcontext, numcep), dtype=features.dtype) + features = np.concatenate((empty_context, features, empty_context)) + + return features diff --git a/util/feeding.py b/util/feeding.py index 69069e4f..20e4a92b 100644 --- a/util/feeding.py +++ b/util/feeding.py @@ -1,12 +1,12 @@ -import pandas +import numpy as np import tensorflow as tf -from threading import Thread from math import ceil from six.moves import range -from util.audio import audiofile_to_input_vector +from threading import Thread from util.gpu import get_available_gpus -from util.text import ctc_label_dense_to_sparse, text_to_char_array +from util.text import ctc_label_dense_to_sparse + class ModelFeeder(object): ''' @@ -24,7 +24,7 @@ class ModelFeeder(object): numcontext, alphabet, tower_feeder_count=-1, - threads_per_queue=2): + threads_per_queue=4): self.train = train_set self.dev = dev_set @@ -35,7 +35,7 @@ class ModelFeeder(object): self.tower_feeder_count = max(len(get_available_gpus()), 1) if tower_feeder_count < 0 else tower_feeder_count self.threads_per_queue = threads_per_queue - self.ph_x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) + self.ph_x = tf.placeholder(tf.float32, [None, 2*numcontext+1, numcep]) self.ph_x_length = tf.placeholder(tf.int32, []) self.ph_y = tf.placeholder(tf.int32, [None,]) self.ph_y_length = tf.placeholder(tf.int32, []) @@ -77,27 +77,19 @@ class ModelFeeder(object): ''' return self._tower_feeders[tower_feeder_index].next_batch() + class DataSet(object): ''' Represents a collection of audio samples and their respective transcriptions. Takes a set of CSV files produced by importers in /bin. ''' - def __init__(self, csvs, batch_size, skip=0, limit=0, ascending=True, next_index=lambda i: i + 1): + def __init__(self, data, batch_size, skip=0, limit=0, ascending=True, next_index=lambda i: i + 1): + self.data = data + self.data.sort_values(by="features_len", ascending=ascending, inplace=True) self.batch_size = batch_size self.next_index = next_index - self.files = None - for csv in csvs: - file = pandas.read_csv(csv, encoding='utf-8', na_filter=False) - if self.files is None: - self.files = file - else: - self.files = self.files.append(file) - self.files = self.files.sort_values(by="wav_filesize", ascending=ascending) \ - .ix[:, ["wav_filename", "transcript"]] \ - .values[skip:] - if limit > 0: - self.files = self.files[:limit] - self.total_batches = int(ceil(len(self.files) / batch_size)) + self.total_batches = int(ceil(len(self.data) / batch_size)) + class _DataSetLoader(object): ''' @@ -109,9 +101,9 @@ class _DataSetLoader(object): def __init__(self, model_feeder, data_set, alphabet): self._model_feeder = model_feeder self._data_set = data_set - self.queue = tf.PaddingFIFOQueue(shapes=[[None, model_feeder.numcep + (2 * model_feeder.numcep * model_feeder.numcontext)], [], [None,], []], + self.queue = tf.PaddingFIFOQueue(shapes=[[None, 2 * model_feeder.numcontext + 1, model_feeder.numcep], [], [None,], []], dtypes=[tf.float32, tf.int32, tf.int32, tf.int32], - capacity=data_set.batch_size * 2) + capacity=data_set.batch_size * 8) self._enqueue_op = self.queue.enqueue([model_feeder.ph_x, model_feeder.ph_x_length, model_feeder.ph_y, model_feeder.ph_y_length]) self._close_op = self.queue.close(cancel_pending_enqueues=True) self._alphabet = alphabet @@ -138,25 +130,35 @@ class _DataSetLoader(object): ''' Queue thread routine. ''' - file_count = len(self._data_set.files) + file_count = len(self._data_set.data) index = -1 while not coord.should_stop(): index = self._data_set.next_index(index) % file_count - wav_file, transcript = self._data_set.files[index] - source = audiofile_to_input_vector(wav_file, self._model_feeder.numcep, self._model_feeder.numcontext) - source_len = len(source) - target = text_to_char_array(transcript, self._alphabet) - target_len = len(target) - if source_len < target_len: - raise ValueError('Error: Audio file {} is too short for transcription.'.format(wav_file)) + features, _, transcript, transcript_len = self._data_set.data.iloc[index] + + # One stride per time step in the input + num_strides = len(features) - (self._model_feeder.numcontext * 2) + + # Create a view into the array with overlapping strides of size + # numcontext (past) + 1 (present) + numcontext (future) + window_size = 2*self._model_feeder.numcontext+1 + features = np.lib.stride_tricks.as_strided( + features, + (num_strides, window_size, self._model_feeder.numcep), + (features.strides[0], features.strides[0], features.strides[1]), + writeable=False) + try: - session.run(self._enqueue_op, feed_dict={ self._model_feeder.ph_x: source, - self._model_feeder.ph_x_length: source_len, - self._model_feeder.ph_y: target, - self._model_feeder.ph_y_length: target_len }) + session.run(self._enqueue_op, feed_dict={ + self._model_feeder.ph_x: features, + self._model_feeder.ph_x_length: num_strides, + self._model_feeder.ph_y: transcript, + self._model_feeder.ph_y_length: transcript_len + }) except tf.errors.CancelledError: return + class _TowerFeeder(object): ''' Internal class that represents a switchable input queue for one tower. diff --git a/util/preprocess.py b/util/preprocess.py new file mode 100644 index 00000000..12fa647e --- /dev/null +++ b/util/preprocess.py @@ -0,0 +1,98 @@ +import numpy as np +import os +import pandas +import tables + +from functools import partial +from multiprocessing.dummy import Pool +from util.audio import audiofile_to_input_vector +from util.text import text_to_char_array + +def pmap(fun, iterable, threads=8): + pool = Pool(threads) + results = pool.map(fun, iterable) + pool.close() + return results + + +def process_single_file(row, numcep, numcontext, alphabet): + # row = index, Series + _, file = row + features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext) + transcript = text_to_char_array(file.transcript, alphabet) + + if (2*numcontext + len(features)) < len(transcript): + raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename)) + + return features, len(features), transcript, len(transcript) + + +# load samples from CSV, compute features, optionally cache results on disk +def preprocess(csv_files, batch_size, numcep, numcontext, alphabet, hdf5_cache_path=None): + COLUMNS = ('features', 'features_len', 'transcript', 'transcript_len') + + print('Preprocessing', csv_files) + + if hdf5_cache_path and os.path.exists(hdf5_cache_path): + with tables.open_file(hdf5_cache_path, 'r') as file: + features = file.root.features[:] + features_len = file.root.features_len[:] + transcript = file.root.transcript[:] + transcript_len = file.root.transcript_len[:] + + # features are stored flattened, so reshape into + # [n_steps, (n_input + 2*n_context*n_input)] + for i in range(len(features)): + features[i] = np.reshape(features[i], [features_len[i], -1]) + + in_data = list(zip(features, features_len, + transcript, transcript_len)) + print('Loaded from cache at', hdf5_cache_path) + return pandas.DataFrame(data=in_data, columns=COLUMNS) + + source_data = None + for csv in csv_files: + file = pandas.read_csv(csv, encoding='utf-8', na_filter=False) + if source_data is None: + source_data = file + else: + source_data = source_data.append(file) + + step_fn = partial(process_single_file, + numcep=numcep, + numcontext=numcontext, + alphabet=alphabet) + out_data = pmap(step_fn, source_data.iterrows()) + + if hdf5_cache_path: + print('Saving to', hdf5_cache_path) + + # list of tuples -> tuple of lists + features, features_len, transcript, transcript_len = zip(*out_data) + + with tables.open_file(hdf5_cache_path, 'w') as file: + features_dset = file.create_vlarray(file.root, + 'features', + tables.Float32Atom(), + filters=tables.Filters(complevel=1)) + # VLArray atoms need to be 1D, so flatten feature array + for f in features: + features_dset.append(np.reshape(f, -1)) + + features_len_dset = file.create_array(file.root, + 'features_len', + features_len) + + transcript_dset = file.create_vlarray(file.root, + 'transcript', + tables.Int32Atom(), + filters=tables.Filters(complevel=1)) + for t in transcript: + transcript_dset.append(t) + + transcript_len_dset = file.create_array(file.root, + 'transcript_len', + transcript_len) + + print('Preprocessing done') + return pandas.DataFrame(data=out_data, columns=COLUMNS) From 25a9e76afcf8aafe17bb77ab90ea25ffdfb7ec84 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 14 Sep 2018 11:14:49 -0300 Subject: [PATCH 2/5] Support relative paths in CSVs --- util/preprocess.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util/preprocess.py b/util/preprocess.py index 12fa647e..3b86480b 100644 --- a/util/preprocess.py +++ b/util/preprocess.py @@ -53,6 +53,9 @@ def preprocess(csv_files, batch_size, numcep, numcontext, alphabet, hdf5_cache_p source_data = None for csv in csv_files: file = pandas.read_csv(csv, encoding='utf-8', na_filter=False) + #FIXME: not cross-platform + csv_dir = os.path.dirname(os.path.abspath(csv)) + file['wav_filename'] = file['wav_filename'].str.replace(r'(^[^/])', lambda m: os.path.join(csv_dir, m.group(1))) if source_data is None: source_data = file else: From 778f5deb9d218bfc7cbb204c3030a506036a4bf3 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 14 Sep 2018 11:42:42 -0300 Subject: [PATCH 3/5] Fix export graph --- DeepSpeech.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DeepSpeech.py b/DeepSpeech.py index e128ac42..4a09383b 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -1785,8 +1785,8 @@ def train(server=None): sys.exit(1) def create_inference_graph(batch_size=1, n_steps=16, use_new_decoder=False): - # Input tensor will be of shape [batch_size, n_steps, n_input + 2*n_input*n_context] - input_tensor = tf.placeholder(tf.float32, [batch_size, n_steps if n_steps > 0 else None, n_input + 2*n_input*n_context], name='input_node') + # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input] + input_tensor = tf.placeholder(tf.float32, [batch_size, n_steps if n_steps > 0 else None, 2*n_context+1, n_input], name='input_node') seq_length = tf.placeholder(tf.int32, [batch_size], name='input_lengths') previous_state_c = variable_on_worker_level('previous_state_c', [batch_size, n_cell_dim], initializer=None) From 799baf1f990af2e6b600575a07780f192b6dbae4 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 14 Sep 2018 12:03:02 -0300 Subject: [PATCH 4/5] Fix parameter inference from model shape --- native_client/deepspeech.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 425588d6..6a54b986 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -323,14 +323,15 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector& logi return; #endif // DS_NATIVE_MODEL } else { - Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, mfcc_feats_per_timestep})); + Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, 2*n_context+1, MFCC_FEATURES})); - auto input_mapped = input.tensor(); - int idx = 0; - for (int i = 0; i < n_frames; i++) { - for (int j = 0; j < mfcc_feats_per_timestep; j++, idx++) { - input_mapped(0, i, j) = aMfcc[idx]; - } + auto input_mapped = input.flat(); + int i; + for (i = 0; i < n_frames*mfcc_feats_per_timestep; ++i) { + input_mapped(i) = aMfcc[i]; + } + for (; i < n_steps*mfcc_feats_per_timestep; ++i) { + input_mapped(i) = 0; } Tensor input_lengths(DT_INT32, TensorShape({1})); @@ -482,9 +483,8 @@ DS_CreateModel(const char* aModelPath, if (node.name() == "input_node") { const auto& shape = node.attr().at("shape").shape(); model->n_steps = shape.dim(1).size(); - model->mfcc_feats_per_timestep = shape.dim(2).size(); - // mfcc_features_per_timestep = MFCC_FEATURES * ((2*n_context) + 1) - model->n_context = (model->mfcc_feats_per_timestep - MFCC_FEATURES) / (2 * MFCC_FEATURES); + model->n_context = (shape.dim(2).size()-1)/2; + model->mfcc_feats_per_timestep = shape.dim(2).size() * shape.dim(3).size(); } else if (node.name() == "logits_shape") { Tensor logits_shape = Tensor(DT_INT32, TensorShape({3})); if (!logits_shape.FromProto(node.attr().at("value").tensor())) { From 3f9d960fd284902b2819569e38bca10c41fd5064 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Fri, 14 Sep 2018 14:38:18 -0300 Subject: [PATCH 5/5] Strip output before testing correctness in inference tests --- tc-tests-utils.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tc-tests-utils.sh b/tc-tests-utils.sh index b0ef70f4..1eb95de4 100755 --- a/tc-tests-utils.sh +++ b/tc-tests-utils.sh @@ -47,11 +47,15 @@ model_source_mmap="$(dirname "${model_source}")/${model_name_mmap}" SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.14:ucs2 2.7.14:ucs4 3.4.8:ucs4 3.5.5:ucs4 3.6.4:ucs4 3.7.0:ucs4} SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.9.1 5.12.0 6.14.1 7.10.1 8.11.1 9.11.1 10.3.0} +strip() { + echo "$(echo $1 | sed -e 's/^[[:space:]]+//' -e 's/[[:space:]]+$//')" +} + # This verify exact inference result assert_correct_inference() { - phrase=$1 - expected=$2 + phrase=$(strip "$1") + expected=$(strip "$2") if [ -z "${phrase}" -o -z "${expected}" ]; then echo "One or more empty strings:" @@ -158,8 +162,8 @@ assert_correct_ldc93s1_prodmodel() assert_correct_ldc93s1_somodel() { - somodel_nolm=$1 - somodel_withlm=$2 + somodel_nolm=$(strip "$1") + somodel_withlm=$(strip "$2") # We want to be able to return non zero value from the function, while not # failing the whole execution