16 package uk.ac.cam.eng.extraction.hadoop.features.phrase;
18 import java.io.IOException;
19 import java.util.List;
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.mapreduce.Job;
23 import org.apache.hadoop.mapreduce.Mapper;
24 import org.apache.hadoop.mapreduce.Partitioner;
25 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
26 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
27 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
28 import org.apache.hadoop.util.ToolRunner;
44 public static class Target2SourceComparator
extends 45 MarginalReducer.MRComparator {
48 protected boolean isSource2Target() {
54 private static class Target2SourcePartitioner
extends 55 Partitioner<Rule, ProvenanceCountMap> {
57 private Partitioner<List<Symbol>,
ProvenanceCountMap> defaultPartitioner =
new HashPartitioner<>();
62 return defaultPartitioner.getPartition(key.getTarget(), value,
68 private static class SwappingMapper
extends 69 Mapper<Rule, ExtractedData, Rule, ProvenanceCountMap> {
72 protected void map(Rule key,
ExtractedData value, Context context)
73 throws IOException, InterruptedException {
75 Rule r =
new Rule(key);
76 newKey = r.invertNonTerminals();
82 public Job
getJob(Configuration conf)
throws IOException {
83 conf.set(
"mapreduce.map.child.java.opts",
"-Xmx200m");
84 conf.set(
"mapreduce.reduce.child.java.opts",
"-Xmx5128m");
85 conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET,
false);
86 Job job =
new Job(conf);
88 job.setJobName(
"Target2Source");
89 job.setSortComparatorClass(Target2SourceComparator.class);
90 job.setPartitionerClass(Target2SourcePartitioner.class);
91 job.setMapperClass(SwappingMapper.class);
92 job.setReducerClass(MarginalReducer.class);
93 job.setMapOutputKeyClass(Rule.class);
95 job.setOutputKeyClass(Rule.class);
97 job.setInputFormatClass(SequenceFileInputFormat.class);
98 job.setOutputFormatClass(SequenceFileOutputFormat.class);
102 public static void main(String[] args)
throws Exception {