Cambridge SMT System
Target2SourceJob.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  * Copyright 2014 - Juan Pino, Aurelien Waite, William Byrne
15  *******************************************************************************/
16 package uk.ac.cam.eng.extraction.hadoop.features.phrase;
17 
18 import java.io.IOException;
19 import java.util.List;
20 
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.mapreduce.Job;
23 import org.apache.hadoop.mapreduce.Mapper;
24 import org.apache.hadoop.mapreduce.Partitioner;
25 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
26 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
27 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
28 import org.apache.hadoop.util.ToolRunner;
29 
30 import uk.ac.cam.eng.extraction.Rule;
31 import uk.ac.cam.eng.extraction.Symbol;
35 
42 public class Target2SourceJob extends PhraseJob {
43 
44  public static class Target2SourceComparator extends
45  MarginalReducer.MRComparator {
46 
47  @Override
48  protected boolean isSource2Target() {
49  return false;
50  }
51 
52  }
53 
54  private static class Target2SourcePartitioner extends
55  Partitioner<Rule, ProvenanceCountMap> {
56 
57  private Partitioner<List<Symbol>, ProvenanceCountMap> defaultPartitioner = new HashPartitioner<>();
58 
59  @Override
60  public int getPartition(Rule key, ProvenanceCountMap value,
61  int numPartitions) {
62  return defaultPartitioner.getPartition(key.getTarget(), value,
63  numPartitions);
64  }
65 
66  }
67 
68  private static class SwappingMapper extends
69  Mapper<Rule, ExtractedData, Rule, ProvenanceCountMap> {
70 
71  @Override
72  protected void map(Rule key, ExtractedData value, Context context)
73  throws IOException, InterruptedException {
74  Rule newKey = key;
75  Rule r = new Rule(key);
76  newKey = r.invertNonTerminals();
77  context.write(newKey, value.getProvenanceCountMap());
78  }
79  }
80 
81  @Override
82  public Job getJob(Configuration conf) throws IOException {
83  conf.set("mapreduce.map.child.java.opts", "-Xmx200m");
84  conf.set("mapreduce.reduce.child.java.opts", "-Xmx5128m");
85  conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, false);
86  Job job = new Job(conf);
87  job.setJarByClass(Target2SourceJob.class);
88  job.setJobName("Target2Source");
89  job.setSortComparatorClass(Target2SourceComparator.class);
90  job.setPartitionerClass(Target2SourcePartitioner.class);
91  job.setMapperClass(SwappingMapper.class);
92  job.setReducerClass(MarginalReducer.class);
93  job.setMapOutputKeyClass(Rule.class);
94  job.setMapOutputValueClass(ProvenanceCountMap.class);
95  job.setOutputKeyClass(Rule.class);
96  job.setOutputValueClass(FeatureMap.class);
97  job.setInputFormatClass(SequenceFileInputFormat.class);
98  job.setOutputFormatClass(SequenceFileOutputFormat.class);
99  return job;
100  }
101 
102  public static void main(String[] args) throws Exception {
103  int res = ToolRunner.run(new Target2SourceJob(), args);
104  System.exit(res);
105  }
106 }