16 package uk.ac.cam.eng.extraction.hadoop.features.phrase;
    18 import java.io.IOException;
    19 import java.util.List;
    21 import org.apache.hadoop.conf.Configuration;
    22 import org.apache.hadoop.mapreduce.Job;
    23 import org.apache.hadoop.mapreduce.Mapper;
    24 import org.apache.hadoop.mapreduce.Partitioner;
    25 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
    26 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
    27 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
    28 import org.apache.hadoop.util.ToolRunner;
    44         public static class Target2SourceComparator 
extends    45                         MarginalReducer.MRComparator {
    48                 protected boolean isSource2Target() {
    54         private static class Target2SourcePartitioner 
extends    55                         Partitioner<Rule, ProvenanceCountMap> {
    57                 private Partitioner<List<Symbol>, 
ProvenanceCountMap> defaultPartitioner = 
new HashPartitioner<>();
    62                         return defaultPartitioner.getPartition(key.getTarget(), value,
    68         private static class SwappingMapper 
extends    69                         Mapper<Rule, ExtractedData, Rule, ProvenanceCountMap> {
    72                 protected void map(Rule key, 
ExtractedData value, Context context)
    73                                 throws IOException, InterruptedException {
    75                         Rule r = 
new Rule(key);
    76                         newKey = r.invertNonTerminals();
    82         public Job 
getJob(Configuration conf) 
throws IOException {
    83                 conf.set(
"mapreduce.map.child.java.opts", 
"-Xmx200m");
    84                 conf.set(
"mapreduce.reduce.child.java.opts", 
"-Xmx5128m");
    85                 conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, 
false);
    86                 Job job = 
new Job(conf);
    88                 job.setJobName(
"Target2Source");
    89                 job.setSortComparatorClass(Target2SourceComparator.class);
    90                 job.setPartitionerClass(Target2SourcePartitioner.class);
    91                 job.setMapperClass(SwappingMapper.class);
    92                 job.setReducerClass(MarginalReducer.class);
    93                 job.setMapOutputKeyClass(Rule.class);
    95                 job.setOutputKeyClass(Rule.class);
    97                 job.setInputFormatClass(SequenceFileInputFormat.class);
    98                 job.setOutputFormatClass(SequenceFileOutputFormat.class);
   102         public static void main(String[] args) 
throws Exception {