Source code for cam.sgnmt.predictors.core

# -*- coding: utf-8 -*-
# coding=utf-8
# Copyright 2019 The SGNMT Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module contains the two basic predictor interfaces
for bounded and unbounded vocabulary predictors.
"""

from abc import abstractmethod

from cam.sgnmt import utils
from cam.sgnmt.utils import Observer, NEG_INF, MESSAGE_TYPE_DEFAULT


[docs]class Predictor(Observer):
    """A predictor produces the predictive probability distribution of
    the next word given the state of the predictor. The state may 
    change during ``predict_next()`` and ``consume()``. The functions
    ``get_state()`` and ``set_state()`` can be used for non-greedy 
    decoding. Note: The state describes the predictor with the current
    history. It does not encapsulate the current source sentence, i.e. 
    you cannot recover a predictor state if ``initialize()`` was called
    in between. ``predict_next()`` and ``consume()`` must be called 
    alternately. This holds even when using ``get_state()`` and 
    ``set_state()``: Loading/saving states is transparent to the
    predictor instance.
    """
    
    def __init__(self):
        """Initializes ``current_sen_id`` with 0. """
        super(Predictor, self).__init__()
        self.current_sen_id = 0

[docs]    def set_current_sen_id(self, cur_sen_id):
        """This function is called between ``initialize()`` calls to 
        increment the sentence id counter. It can also be used to skip 
        sentences for the --range argument.
        
        Args:
            cur_sen_id (int):  Sentence id for the next call of
                               ``initialize()``
        """
        self.current_sen_id = cur_sen_id
    
    @abstractmethod
[docs]    def predict_next(self):
        """Returns the predictive distribution over the target 
        vocabulary for the next word given the predictor state. Note 
        that the prediction itself can change the state of the 
        predictor. For example, the neural predictor updates the 
        decoder network state and its attention to predict the next 
        word. Two calls of ``predict_next()`` must be separated by a 
        ``consume()`` call.
        
        Returns:
            dictionary,array,list. Word log probabilities for the next 
            target token. All ids which are not set are assumed to have
            probability ``get_unk_probability()``
        """
        raise NotImplementedError
    
    @abstractmethod
[docs]    def consume(self, word):
        """Expand the current history by ``word`` and update the 
        internal predictor state accordingly. Two calls of ``consume()``
        must be separated by a ``predict_next()`` call.
        
        Args:
            word (int):  Word to add to the current history
        """
        raise NotImplementedError
    
    @abstractmethod
[docs]    def get_state(self):
        """Get the current predictor state. The state can be any object
        or tuple of objects which makes it possible to return to the
        predictor state with the current history.
        
        Returns:
          object. Predictor state
        """
        raise NotImplementedError
    
    @abstractmethod
[docs]    def set_state(self, state):
        """Loads a predictor state from an object created with 
        ``get_state()``. Note that this does not copy the argument but
        just references the given state. If ``state`` is going to be
        used in the future to return to that point again, you should
        copy the state with ``copy.deepcopy()`` before.
        
        Args:
           state (object): Predictor state as returned by 
                           ``get_state()``
        """
        raise NotImplementedError
    
[docs]    def estimate_future_cost(self, hypo):
        """Predictors can implement their own look-ahead cost functions.
        They are used in A* if the --heuristics parameter is set to 
        predictor. This function should return the future log *cost* 
        (i.e. the lower the better) given the current predictor state, 
        assuming that the last word in the partial hypothesis 'hypo' is
        consumed next. This function must not change the internal 
        predictor state.
        
        Args:
            hypo (PartialHypothesis): Hypothesis for which to estimate
                                      the future cost given the current
                                      predictor state
        
        Returns
            float. Future cost
        """
        return 0.0
    
[docs]    def get_unk_probability(self, posterior):
        """This function defines the probability of all words which are
        not in ``posterior``. This is usually used to combine open and
        closed vocabulary predictors. The argument ``posterior`` should 
        have been produced with ``predict_next()``
        
        Args:
            posterior (list,array,dict): Return value of the last call
                                         of ``predict_next``
        
        Returns:
            float: Score to use for words outside ``posterior``
        """
        return NEG_INF
    
[docs]    def initialize(self, src_sentence):
        """Initialize the predictor with the given source sentence. 
        This resets the internal predictor state and loads everything 
        which is constant throughout the processing of a single source
        sentence. For example, the NMT decoder runs the encoder network
        and stores the source annotations.
        
        Args:
            src_sentence (list): List of word IDs which form the source
                                 sentence without <S> or </S>
        """
        pass
    
[docs]    def initialize_heuristic(self, src_sentence):
        """This is called after ``initialize()`` if the predictor is
        registered as heuristic predictor (i.e. 
        ``estimate_future_cost()`` will be called in the future).
        Predictors can implement this function for initialization of 
        their own heuristic mechanisms.
        
        Args:
            src_sentence (list): List of word IDs which form the source
                                 sentence without <S> or </S>
        """
        pass
    
[docs]    def finalize_posterior(self, scores, use_weights, normalize_scores):
        """This method can be used to enforce the parameters use_weights
        normalize_scores in predictors with dict posteriors.
        
        Args:
            scores (dict): unnormalized log valued scores
            use_weights (bool): Set to false to replace all values in 
                                ``scores`` with 0 (= log 1)
            normalize_scores: Set to true to make the exp of elements 
                              in ``scores`` sum up to 1"""
        if not scores: # empty scores -> pass through
            return scores
        if not use_weights:
            scores = dict.fromkeys(scores, 0.0)
        if normalize_scores:
            log_sum = utils.log_sum(scores.values())
            ret = {k: v - log_sum for k, v in scores.items()}
            return ret
        return scores
    
[docs]    def is_equal(self, state1, state2):
        """Returns true if two predictor states are equal, i.e. both
        states will always result in the same scores. This is used for
        hypothesis recombination
        
        Args:
            state1 (object): First predictor state
            state2 (object): Second predictor state
        
        Returns:
            bool. True if both states are equal, false if not
        """
        return False
    
[docs]    def notify(self, message, message_type = MESSAGE_TYPE_DEFAULT):
        """We implement the ``notify`` method from the ``Observer``
        super class with an empty method here s.t. predictors do not
        need to implement it.
        
        Args:
            message (object): The posterior sent by the decoder
        """
        pass


[docs]class UnboundedVocabularyPredictor(Predictor):
    """Predictors under this class implement models with very large 
    target vocabularies, for which it is too inefficient to list the 
    entire posterior. Instead, they are evaluated only for a given list
    of target words. This list is usually created by taking all non-zero
    probability words from the bounded vocabulary predictors. An 
    example of a unbounded vocabulary predictor is the ngram predictor:
    Instead of listing the entire ngram vocabulary, we run srilm only
    on the words which are possible according other predictor (e.g. fst
    or nmt). This is realized by introducing the ``trgt_words``
    argument to ``predict_next``. """

    def __init__(self):
        """ Initializes ``current_sen_id`` with 0. """
        super(UnboundedVocabularyPredictor, self).__init__()

    @abstractmethod
[docs]    def predict_next(self, trgt_words):
        """Like in ``Predictor``, returns the predictive distribution
        over target words given the predictor state. Note 
        that the prediction itself can change the state of the 
        predictor. For example, the neural predictor updates the 
        decoder network state and its attention to predict the next 
        word. Two calls of ``predict_next()`` must be separated by a 
        ``consume()`` call.
        
        Args:
            trgt_words (list): List of target word ids.
        
        Returns:
            dictionary,array,list. Word log probabilities for the next 
            target token. All ids which are not set are assumed to have
            probability ``get_unk_probability()``. The returned set should
            not contain any ids which are not in ``trgt_words``, but it
            does not have to score all of them
        """
        raise NotImplementedError