20 package uk.ac.cam.eng.extraction.hadoop.features.lexical;
22 import java.io.IOException;
23 import java.util.Arrays;
24 import java.util.List;
39 class LexicalProbability {
41 private final double minSum = 4.24e-18;
43 private boolean source2target;
45 public LexicalProbability(
boolean source2target) {
46 this.source2target = source2target;
49 public void buildQuery(Rule ruleWritable,
int noOfProvs,
50 Map<List<Integer>, Double> batchWordAlignments) {
51 Rule rule =
new Rule(ruleWritable);
52 List<Symbol> sourceWords;
53 List<Symbol> targetWords;
55 sourceWords = rule.getSource();
56 targetWords = rule.getTarget();
58 sourceWords = rule.getTarget();
59 targetWords = rule.getSource();
61 if (sourceWords.size() > 1) {
62 targetWords.add(Terminal.create(0));
64 for (Symbol sourceWord : sourceWords) {
65 for (Symbol targetWord : targetWords) {
66 for (
int i = 0; i < noOfProvs; ++i) {
68 key =
new Integer[] { i, sourceWord.serialised(), targetWord.serialised() };
69 batchWordAlignments.put(Arrays.asList(key),
77 public double value(Rule ruleWritable, byte prov,
78 Map<List<Integer>, Double> batchWordAlignments)
throws IOException {
80 Rule rule =
new Rule(ruleWritable);
81 List<Symbol> sourceWords;
82 List<Symbol> targetWords;
84 sourceWords = rule.source().getTerminals();
85 targetWords = rule.target().getTerminals();
87 sourceWords = rule.target().getTerminals();
88 targetWords = rule.source().getTerminals();
90 if (sourceWords.size() > 1) {
91 targetWords.add(Terminal.create(0));
93 for (Symbol sourceWord : sourceWords) {
95 for (Symbol targetWord : targetWords) {
97 key =
new Integer[] { (int) prov, sourceWord.serialised(), targetWord.serialised() };
98 List<Integer> serverKey = Arrays.asList(key);
99 if (batchWordAlignments.containsKey(serverKey)) {
100 double val = batchWordAlignments.get(serverKey);
111 lexprob /= Math.pow(targetWords.size(), sourceWords.size());
112 return Math.log(lexprob);
fst::TropicalWeightTpl< F > Map(double)