Cambridge SMT System
LexicalProbability.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  * Copyright 2014 - Juan Pino, Aurelien Waite, William Byrne
15  *******************************************************************************/
20 package uk.ac.cam.eng.extraction.hadoop.features.lexical;
21 
22 import java.io.IOException;
23 import java.util.Arrays;
24 import java.util.List;
25 import java.util.Map;
26 
27 import uk.ac.cam.eng.extraction.Rule;
28 import uk.ac.cam.eng.extraction.Symbol;
29 import uk.ac.cam.eng.extraction.Terminal;
30 
39 class LexicalProbability {
40 
41  private final double minSum = 4.24e-18; // exp(-40)
42 
43  private boolean source2target;
44 
45  public LexicalProbability(boolean source2target) {
46  this.source2target = source2target;
47  }
48 
49  public void buildQuery(Rule ruleWritable, int noOfProvs,
50  Map<List<Integer>, Double> batchWordAlignments) {
51  Rule rule = new Rule(ruleWritable);
52  List<Symbol> sourceWords;
53  List<Symbol> targetWords;
54  if (source2target) {
55  sourceWords = rule.getSource();
56  targetWords = rule.getTarget();
57  } else {
58  sourceWords = rule.getTarget();
59  targetWords = rule.getSource();
60  }
61  if (sourceWords.size() > 1) {
62  targetWords.add(Terminal.create(0));
63  }
64  for (Symbol sourceWord : sourceWords) {
65  for (Symbol targetWord : targetWords) {
66  for (int i = 0; i < noOfProvs; ++i) {
67  Integer[] key;
68  key = new Integer[] { i, sourceWord.serialised(), targetWord.serialised() };
69  batchWordAlignments.put(Arrays.asList(key),
70  Double.MAX_VALUE);
71  }
72  }
73 
74  }
75  }
76 
77  public double value(Rule ruleWritable, byte prov,
78  Map<List<Integer>, Double> batchWordAlignments) throws IOException {
79  double lexprob = 1;
80  Rule rule = new Rule(ruleWritable);
81  List<Symbol> sourceWords;
82  List<Symbol> targetWords;
83  if (source2target) {
84  sourceWords = rule.source().getTerminals();
85  targetWords = rule.target().getTerminals();
86  } else {
87  sourceWords = rule.target().getTerminals();
88  targetWords = rule.source().getTerminals();
89  }
90  if (sourceWords.size() > 1) {
91  targetWords.add(Terminal.create(0));
92  }
93  for (Symbol sourceWord : sourceWords) {
94  double sum = 0;
95  for (Symbol targetWord : targetWords) {
96  Integer[] key;
97  key = new Integer[] { (int) prov, sourceWord.serialised(), targetWord.serialised() };
98  List<Integer> serverKey = Arrays.asList(key);
99  if (batchWordAlignments.containsKey(serverKey)) {
100  double val = batchWordAlignments.get(serverKey);
101  sum += val;
102  }
103  }
104  if (sum > 0) {
105  lexprob *= sum;
106  } else {
107  lexprob *= minSum;
108  }
109 
110  }
111  lexprob /= Math.pow(targetWords.size(), sourceWords.size());
112  return Math.log(lexprob);
113  }
114 
115 }
fst::TropicalWeightTpl< F > Map(double)