Source code for cam.sgnmt.decoding.combination

# -*- coding: utf-8 -*-
# coding=utf-8
# Copyright 2019 The SGNMT Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module contains strategies to convert a score breakdown to
the total score. This is commonly specified via the
--combination_scheme parameter.

TODO: The breakdown2score interface is not very elegant, and has some
      overlap with the interpolation_strategy implementations.
"""

from cam.sgnmt import utils
from cam.sgnmt.decoding.core import Decoder
import numpy as np
import logging


[docs]def breakdown2score_sum(working_score, score_breakdown, full=False):
    """Implements the combination scheme 'sum' by always returning
    ``working_score``. 
    
    Args:
        working_score (float): Working combined score, which is the 
                               weighted sum of the scores in
                               ``score_breakdown``
        score_breakdown (list): Breakdown of the combined score into
                                predictor scores (not used).
        full (bool): If True, reevaluate all time steps. If False,
                     assume that this function has been called in the
                      previous time step (not used).
    
    Returns:
        float. Returns ``working_score``
    """
    return working_score


[docs]def breakdown2score_length_norm(working_score, score_breakdown, full=False):
    """Implements the combination scheme 'length_norm' by normalizing
    the sum of the predictor scores by the length of the current 
    sequence (i.e. the length of ``score_breakdown``). 
    TODO could make more efficient use of ``working_score``
    
    Args:
        working_score (float): Working combined score, which is the 
                               weighted sum of the scores in
                               ``score_breakdown``. Not used.
        score_breakdown (list): Breakdown of the combined score into
                                predictor scores
        full (bool): If True, reevaluate all time steps. If False,
                     assume that this function has been called in the
                      previous time step (not used).
    
    Returns:
        float. Returns a length normalized ``working_score``
    """
    score = sum([Decoder.combi_arithmetic_unnormalized(s) 
                        for s in score_breakdown])
    return score / len(score_breakdown)


[docs]def breakdown2score_bayesian(working_score, score_breakdown, full=False, prev_score=None):
    """This realizes score combination following the Bayesian LM 
    interpolation scheme from (Allauzen and Riley, 2011)
    
      Bayesian Language Model Interpolation for Mobile Speech Input
    
    By setting K=T we define the predictor weights according the score
    the predictors give to the current partial hypothesis. The initial
    predictor weights are used as priors. 
    TODO could make more efficient use of ``working_score``
    
    Args:
        working_score (float): Working combined score, which is the 
                               weighted sum of the scores in
                               ``score_breakdown``. Not used.
        score_breakdown (list): Breakdown of the combined score into
                                predictor scores
        full (bool): If True, reevaluate all time steps. If False,
                     assume that this function has been called in the
                      previous time step.
    
    Returns:
        float. Bayesian interpolated predictor scores
    """
    if not score_breakdown or working_score == utils.NEG_INF:
        return working_score
    alphas = [np.log(w) for (_, w) in score_breakdown[0]]
    if full:
        acc = []
        for pos in score_breakdown: # for each position in the hypothesis
            for k, (p, _) in enumerate(pos): 
                alphas[k] += p
            alpha_part = utils.log_sum(alphas)
            scores = [alphas[k] - alpha_part + p 
                      for k, (p, _) in enumerate(pos)]
            acc.append(utils.log_sum(scores)) 
        return sum(acc)
    else: 
        if len(score_breakdown) == 1:
            scores = [np.log(w) + p for p, w in score_breakdown[0]]
            return utils.log_sum(scores)
        working_score = prev_score
        for k, (p, w) in enumerate(score_breakdown[-2]):
            alphas[k] = np.log(w) + p
        alpha_norm = alphas - utils.log_sum(alphas)
        scores = [alpha_norm[k] + p 
                for k, (p, w) in enumerate(score_breakdown[-1])]
        updated_breakdown = [(p, np.exp(alpha_norm[k]))
                for k, (p, w) in enumerate(score_breakdown[-1])]
        score_breakdown[-1] = updated_breakdown
        working_score += utils.log_sum(scores)
        return working_score


[docs]def breakdown2score_bayesian_state_dependent(working_score, score_breakdown, 
                                             full=False, prev_score=None,
                                             lambdas=None):
    """This realizes score combination following the Bayesian LM 
    interpolation scheme from (Allauzen and Riley, 2011)
    
      Bayesian Language Model Interpolation for Mobile Speech Input
    
    By setting K=T we define the predictor weights according the score
    the predictors give to the current partial hypothesis. The initial
    predictor weights are used as priors .

    Unlike breakdown2score_bayesian, define state-independent weights
    which affect how much state-dependent mixture weights (alphas) are
    affected by scores from the other model.

    Makes more efficient use of working_score and calculated priors
    when used incrementally.
    Args:                                                           
        working_score (float): Working combined score, which is the 
                               weighted sum of the scores in
                               ``score_breakdown``. Not used.
        score_breakdown (list): Breakdown of the combined score into
                                predictor scores
        full (bool): If True, reevaluate all time steps. If False,
                     assume that this function has been called in the
                      previous time step.
        prev_score: score of hypothesis without final step
        lambdas: np array of domain-task weights
    
    Returns:
        float. Bayesian interpolated predictor scores
    """
    if not score_breakdown or working_score == utils.NEG_INF:
        return working_score
    if full:
        acc = []
        alphas = [np.log(w) for (_, w) in score_breakdown[0]]
        for pos in score_breakdown: # for each position in the hypothesis
            for k, (p_k, _) in enumerate(pos):
                alphas[k] += p_k
            alpha_prob = np.exp(alphas - utils.log_sum(alphas))
            alpha_prob_lambdas = np.zeros_like(alpha_prob)
            for k in range(len(alpha_prob)):
                for t in range(len(alpha_prob)):
                    alpha_prob_lambdas[k] += alpha_prob[t] * lambdas[k, t]
            scores = [np.log(alpha_prob_lambdas[k]) + p
                      for k, (p, _) in enumerate(pos)]
            acc.append(utils.log_sum(scores))
        return sum(acc)
    else: 
        if len(score_breakdown) == 1:
            scores = [np.log(w) + p for p, w in score_breakdown[0]]
            return utils.log_sum(scores)
        working_score = prev_score
        alphas = [np.log(w) for (_, w) in score_breakdown[-2]]
        for k, (p_k, _) in enumerate(score_breakdown[-2]):
            alphas[k] += p_k 
        alpha_prob = np.exp(alphas - utils.log_sum(alphas)) 
        alpha_prob_lambdas = np.zeros_like(alpha_prob)
        for k in range(len(alpha_prob)):
            for t in range(len(alpha_prob)):
                alpha_prob_lambdas[k] += alpha_prob[t] * lambdas[k, t]
        scores = [np.log(alpha_prob_lambdas[k]) + p
                  for k, (p, _) in enumerate(score_breakdown[-1])]
        updated_breakdown = [(p, alpha_prob[k])
                             for k, (p, _) in enumerate(score_breakdown[-1])]
        score_breakdown[-1] = updated_breakdown
        working_score += utils.log_sum(scores)
        return working_score


[docs]def breakdown2score_bayesian_loglin(working_score, score_breakdown, full=False,
                                    prev_score=None):
    """Like bayesian combination scheme, but uses loglinear model
    combination rather than linear interpolation weights
   
    TODO: Implement incremental version of it, write weights into breakdowns.
    """
    if not score_breakdown:
        return working_score
    acc = []
    prev_alphas = [] # list of all alpha_i,k
    # Write priors to alphas
    for (p, w) in score_breakdown[0]:
        prev_alphas.append(np.log(w)) 
    for pos in score_breakdown: # for each position in the hypothesis
        alphas = []
        sub_acc = []
        # for each predictor (p: p_k(w_i|h_i), w: prior p(k))
        for k, (p, w) in enumerate(pos): 
            alpha = prev_alphas[k] + p
            alphas.append(alpha)
            sub_acc.append(p + alpha)
        acc.append(utils.log_sum(sub_acc) - utils.log_sum(alphas))
        prev_alphas = alphas
    return sum(acc)