16 package uk.ac.cam.eng.rule.features;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collections;
21 import java.util.HashMap;
22 import java.util.List;
25 import org.apache.hadoop.io.IntWritable;
36 private final static double DEFAULT_S2T_PHRASE_LOG_PROB = -4.7;
38 private final static double DEFAULT_T2S_PHRASE_LOG_PROB = -7;
40 private final static double DEFAULT_LEX_VALUE = -40;
42 private final List<Feature> allFeatures;
44 private final Map<Feature, int[]> indexMappings =
new HashMap<>();
46 private final int noOfProvs;
48 private final double[] zeroNonProv =
new double[] { 0 };
50 private final double[] zeroProv;
52 private final Map<Integer, Double> defaultFeatures;
54 private final Map<Integer, Double> defaultOOVFeatures;
56 private final Map<Integer, Double> defaultPassThroughFeatures;
58 private final Map<Integer, Double> defaultDeletionFeatures;
60 private final Map<Integer, Double> defaultGlueFeatures;
62 private final Map<Integer, Double> defaultDeleteGlueFeatures;
64 private final Map<Integer, Double> defaultGlueStartOrEndFeatures;
69 String[] featureSplit = featureString.split(
",");
70 noOfProvs = provenanceString.split(
",").length;
71 List<Feature> features =
new ArrayList<>();
73 boolean lexFeatures =
false;
74 for (String fString : featureSplit) {
80 mappings =
new int[noOfProvs];
81 for (
int i = 0; i < noOfProvs; ++i) {
82 mappings[i] = indexCounter++;
85 mappings =
new int[] { indexCounter++ };
87 indexMappings.put(f, mappings);
89 allFeatures = Collections.unmodifiableList(features);
90 zeroProv =
new double[noOfProvs];
91 hasLexicalFeatures = lexFeatures;
92 Arrays.fill(zeroProv, 0.0);
93 defaultFeatures = createDefaultData();
94 defaultOOVFeatures = createOOVDefaultData();
95 defaultPassThroughFeatures = createPassThroughDefaultData();
96 defaultDeletionFeatures = createDeletionDefaultData();
97 defaultGlueFeatures = createGlueDefaultData();
98 defaultDeleteGlueFeatures = createDeleteGlueDefaultData();
99 defaultGlueStartOrEndFeatures = createGlueStartOrEndDefaultData();
103 List<int[]> mappings =
new ArrayList<int[]>(features.length);
105 for (
Feature feature : features) {
106 if (!indexMappings.containsKey(feature)) {
107 throw new IllegalArgumentException(
"Feature " 108 + feature.getConfName() +
" is not in the registry");
110 int[] mapping = indexMappings.get(feature);
111 mappings.add(mapping);
112 totalSize += mapping.length;
114 int[] result =
new int[totalSize];
116 for (
int[] mapping : mappings) {
117 for (
int index : mapping) {
118 result[counter++] = index;
125 return allFeatures.contains(f);
158 private void addDefault(
Feature f, Map<Integer, Double> vals,
double val) {
159 if (allFeatures.contains(f)) {
161 for (
int mapping : mappings) {
162 vals.put(mapping, val);
172 private Map<Integer, Double> createDefaultData() {
174 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
176 defaultFeatures, DEFAULT_S2T_PHRASE_LOG_PROB);
178 defaultFeatures, DEFAULT_T2S_PHRASE_LOG_PROB);
180 return defaultFeatures;
188 private Map<Integer, Double> createPassThroughDefaultData() {
190 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
196 defaultFeatures, DEFAULT_LEX_VALUE);
198 defaultFeatures, DEFAULT_LEX_VALUE);
199 return defaultFeatures;
202 private Map<Integer, Double> createOOVDefaultData() {
203 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
205 return defaultFeatures;
208 private Map<Integer, Double> createDeletionDefaultData() {
209 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
211 return defaultFeatures;
214 private Map<Integer, Double> createGlueDefaultData() {
215 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
217 return defaultFeatures;
220 private Map<Integer, Double> createDeleteGlueDefaultData() {
221 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
223 return defaultFeatures;
226 private Map<Integer, Double> createGlueStartOrEndDefaultData() {
227 Map<Integer, Double> defaultFeatures =
new HashMap<Integer, Double>();
231 return defaultFeatures;
235 return new HashMap<Integer, Double>(defaultFeatures);
239 return new HashMap<Integer, Double>(defaultOOVFeatures);
243 return new HashMap<Integer, Double>(defaultDeletionFeatures);
247 return new HashMap<Integer, Double>(defaultGlueFeatures);
251 return new HashMap<Integer, Double>(defaultDeleteGlueFeatures);
255 return new HashMap<Integer, Double>(defaultGlueStartOrEndFeatures);
259 return new HashMap<Integer, Double>(defaultPassThroughFeatures);
266 throw new RuntimeException(
"No data for feature " + f.
getConfName());
287 int[] mappings = indexMappings.get(f);
290 for (
int index : mappings) {
295 ffVal = probs.
get(indexIntW).get();
298 defaults.put(index, ffVal);
305 private static void setVal(
int mapping,
double val,
306 Map<Integer, Double> features) {
309 features.remove(mapping);
311 features.put(mapping, val);
317 for (
Feature f : allFeatures) {
318 int[] mappings = indexMappings.get(f);
322 if (results == null) {
325 for (
int i = 0; i < results.length; ++i) {
326 setVal(mappings[i], results[i], processedFeatures);
330 for (
int i = 0; i < mappings.length; ++i) {
337 double ffVal = probs.
get(
340 setVal(mappings[i], ffVal, processedFeatures);
345 return processedFeatures;
SOURCE2TARGET_LEXICAL_PROBABILITY
Map< Integer, Double > createFoundPassThroughRuleFeatures(FeatureMap features)
FeatureRegistry(String featureString, String provenanceString)
TARGET2SOURCE_LEXICAL_PROBABILITY
boolean hasLexicalFeatures()
Map< Integer, Double > getDefaultPassThroughRuleFeatures()
PROVENANCE_SOURCE2TARGET_LEXICAL_PROBABILITY
Map< Integer, Double > getDefaultFeatures()
int[] getFeatureIndices(Feature...features)
PROVENANCE_SOURCE2TARGET_PROBABILITY
List< Feature > getFeatures()
Map< Integer, Double > getDefaultGlueStartOrEndFeatures()
Map< Integer, Double > getDefaultOOVFeatures()
RULE_COUNT_GREATER_THAN_2
Map< Integer, Double > getDefaultGlueFeatures()
boolean containsFeature(Feature f)
double[] getZeros(Feature f)
static Feature findFromConf(String name)
Map< Integer, Double > processFeatures(Rule rule, RuleData data)
Map< Integer, Double > getDefaultDeleteGlueFeatures()
PROVENANCE_TARGET2SOURCE_PROBABILITY
PROVENANCE_TARGET2SOURCE_LEXICAL_PROBABILITY
Map< Integer, Double > getDefaultDeletionFeatures()