STT/native_client/ctcdecode/__init__.py

from __future__ import absolute_import, division, print_function

from . import swigwrapper # pylint: disable=import-self

# This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle
# string encoding explicitly, here and throughout this file.
__version__ = swigwrapper.__version__.decode('utf-8')

# Hack: import error codes by matching on their names, as SWIG unfortunately
# does not support binding enums to Python in a scoped manner yet.
for symbol in dir(swigwrapper):
    if symbol.startswith('DS_ERR_'):
        globals()[symbol] = getattr(swigwrapper, symbol)

class Scorer(swigwrapper.Scorer):
    """Wrapper for Scorer.

    :param alpha: Language model weight.
    :type alpha: float
    :param beta: Word insertion bonus.
    :type beta: float
    :scorer_path: Path to load scorer from.
    :alphabet: Alphabet
    :type scorer_path: basestring
    """
    def __init__(self, alpha=None, beta=None, scorer_path=None, alphabet=None):
        super(Scorer, self).__init__()
        # Allow bare initialization
        if alphabet:
            assert alpha is not None, 'alpha parameter is required'
            assert beta is not None, 'beta parameter is required'
            assert scorer_path, 'scorer_path parameter is required'

            err = self.init(scorer_path.encode('utf-8'), alphabet)
            if err != 0:
                raise ValueError('Scorer initialization failed with error code 0x{:X}'.format(err))

            self.reset_params(alpha, beta)


class Alphabet(swigwrapper.Alphabet):
    """Convenience wrapper for Alphabet which calls init in the constructor"""
    def __init__(self, config_path):
        super(Alphabet, self).__init__()
        err = self.init(config_path.encode('utf-8'))
        if err != 0:
            raise ValueError('Alphabet initialization failed with error code 0x{:X}'.format(err))

    def CanEncodeSingle(self, input):
        '''
        Returns true if the single character/output class has a corresponding label
        in the alphabet.
        '''
        return super(Alphabet, self).CanEncodeSingle(input.encode('utf-8'))

    def CanEncode(self, input):
        '''
        Returns true if the entire string can be encoded into labels in this
        alphabet.
        '''
        return super(Alphabet, self).CanEncode(input.encode('utf-8'))

    def EncodeSingle(self, input):
        '''
        Encode a single character/output class into a label. Character must be in
        the alphabet, this method will assert that. Use `CanEncodeSingle` to test.
        '''
        return super(Alphabet, self).EncodeSingle(input.encode('utf-8'))

    def Encode(self, input):
        '''
        Encode a sequence of character/output classes into a sequence of labels.
        Characters are assumed to always take a single Unicode codepoint.
        Characters must be in the alphabet, this method will assert that. Use
        `CanEncode` and `CanEncodeSingle` to test.
        '''
        # Convert SWIG's UnsignedIntVec to a Python list
        res = super(Alphabet, self).Encode(input.encode('utf-8'))
        return [el for el in res]

    def DecodeSingle(self, input):
        res = super(Alphabet, self).DecodeSingle(input)
        return res.decode('utf-8')

    def Decode(self, input):
        '''Decode a sequence of labels into a string.'''
        res = super(Alphabet, self).Decode(input)
        return res.decode('utf-8')


class UTF8Alphabet(swigwrapper.UTF8Alphabet):
    """Convenience wrapper for Alphabet which calls init in the constructor"""
    def __init__(self):
        super(UTF8Alphabet, self).__init__()
        err = self.init(b'')
        if err != 0:
            raise ValueError('UTF8Alphabet initialization failed with error code 0x{:X}'.format(err))

    def CanEncodeSingle(self, input):
        '''
        Returns true if the single character/output class has a corresponding label
        in the alphabet.
        '''
        return super(UTF8Alphabet, self).CanEncodeSingle(input.encode('utf-8'))

    def CanEncode(self, input):
        '''
        Returns true if the entire string can be encoded into labels in this
        alphabet.
        '''
        return super(UTF8Alphabet, self).CanEncode(input.encode('utf-8'))

    def EncodeSingle(self, input):
        '''
        Encode a single character/output class into a label. Character must be in
        the alphabet, this method will assert that. Use `CanEncodeSingle` to test.
        '''
        return super(UTF8Alphabet, self).EncodeSingle(input.encode('utf-8'))

    def Encode(self, input):
        '''
        Encode a sequence of character/output classes into a sequence of labels.
        Characters are assumed to always take a single Unicode codepoint.
        Characters must be in the alphabet, this method will assert that. Use
        `CanEncode` and `CanEncodeSingle` to test.
        '''
        # Convert SWIG's UnsignedIntVec to a Python list
        res = super(UTF8Alphabet, self).Encode(input.encode('utf-8'))
        return [el for el in res]

    def DecodeSingle(self, input):
        res = super(UTF8Alphabet, self).DecodeSingle(input)
        return res.decode('utf-8')

    def Decode(self, input):
        '''Decode a sequence of labels into a string.'''
        res = super(UTF8Alphabet, self).Decode(input)
        return res.decode('utf-8')


def ctc_beam_search_decoder(probs_seq,
                            alphabet,
                            beam_size,
                            cutoff_prob=1.0,
                            cutoff_top_n=40,
                            scorer=None,
                            hot_words=dict(),
                            num_results=1):
    """Wrapper for the CTC Beam Search Decoder.

    :param probs_seq: 2-D list of probability distributions over each time
                      step, with each element being a list of normalized
                      probabilities over alphabet and blank.
    :type probs_seq: 2-D list
    :param alphabet: Alphabet
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param cutoff_prob: Cutoff probability in pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                         characters with highest probs in alphabet will be
                         used in beam search, default 40.
    :type cutoff_top_n: int
    :param scorer: External scorer for partially decoded sentence, e.g. word
                   count or language model.
    :type scorer: Scorer
    :param hot_words: Map of words (keys) to their assigned boosts (values)
    :type hot_words: map{string:float}
    :param num_results: Number of beams to return.
    :type num_results: int
    :return: List of tuples of confidence and sentence as decoding
             results, in descending order of the confidence.
    :rtype: list
    """
    beam_results = swigwrapper.ctc_beam_search_decoder(
        probs_seq, alphabet, beam_size, cutoff_prob, cutoff_top_n,
        scorer, hot_words, num_results)
    beam_results = [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results]
    return beam_results


def ctc_beam_search_decoder_batch(probs_seq,
                                  seq_lengths,
                                  alphabet,
                                  beam_size,
                                  num_processes,
                                  cutoff_prob=1.0,
                                  cutoff_top_n=40,
                                  scorer=None,
                                  hot_words=dict(),
                                  num_results=1):
    """Wrapper for the batched CTC beam search decoder.

    :param probs_seq: 3-D list with each element as an instance of 2-D list
                      of probabilities used by ctc_beam_search_decoder().
    :type probs_seq: 3-D list
    :param alphabet: alphabet list.
    :alphabet: Alphabet
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param cutoff_prob: Cutoff probability in alphabet pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                         characters with highest probs in alphabet will be
                         used in beam search, default 40.
    :type cutoff_top_n: int
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param scorer: External scorer for partially decoded sentence, e.g. word
                   count or language model.
    :type scorer: Scorer
    :param hot_words: Map of words (keys) to their assigned boosts (values)
    :type hot_words: map{string:float}
    :param num_results: Number of beams to return.
    :type num_results: int
    :return: List of tuples of confidence and sentence as decoding
             results, in descending order of the confidence.
    :rtype: list
    """
    batch_beam_results = swigwrapper.ctc_beam_search_decoder_batch(probs_seq, seq_lengths, alphabet, beam_size, num_processes, cutoff_prob, cutoff_top_n, scorer, hot_words, num_results)
    batch_beam_results = [
        [(res.confidence, alphabet.Decode(res.tokens)) for res in beam_results]
        for beam_results in batch_beam_results
    ]
    return batch_beam_results