Cambridge SMT System
Source2TargetJob.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  * Copyright 2014 - Juan Pino, Aurelien Waite, William Byrne
15  *******************************************************************************/
16 package uk.ac.cam.eng.extraction.hadoop.features.phrase;
17 
18 import java.io.IOException;
19 import java.util.List;
20 
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.mapreduce.Job;
23 import org.apache.hadoop.mapreduce.Mapper;
24 import org.apache.hadoop.mapreduce.Partitioner;
25 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
26 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
27 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
28 import org.apache.hadoop.util.ToolRunner;
29 
30 import uk.ac.cam.eng.extraction.Rule;
31 import uk.ac.cam.eng.extraction.Symbol;
35 
42 public class Source2TargetJob extends PhraseJob{
43 
44  public static class Source2TargetComparator extends
45  MarginalReducer.MRComparator {
46 
47  @Override
48  protected boolean isSource2Target() {
49  return true;
50  }
51 
52  }
53 
54  private static class Source2TargetPartitioner extends
55  Partitioner<Rule, ProvenanceCountMap> {
56 
57  private Partitioner<List<Symbol>, ProvenanceCountMap> defaultPartitioner = new HashPartitioner<>();
58 
59  @Override
60  public int getPartition(Rule key, ProvenanceCountMap value,
61  int numPartitions) {
62  return defaultPartitioner.getPartition(key.getSource(), value,
63  numPartitions);
64  }
65 
66  }
67 
68  private static class KeepProvenanceCountsOnlyMapper
69  extends
70  Mapper<Rule, ExtractedData, Rule, ProvenanceCountMap> {
71 
72  @Override
73  protected void map(Rule key, ExtractedData value,
74  Context context) throws IOException, InterruptedException {
75  context.write(key, value.getProvenanceCountMap());
76  }
77 
78  }
79 
80  @Override
81  public Job getJob(Configuration conf) throws IOException {
82  conf.setIfUnset("mapreduce.map.child.java.opts", "-Xmx200m");
83  conf.setIfUnset("mapreduce.reduce.child.java.opts", "-Xmx5128m");
84  conf.setIfUnset("mapreduce.map.memory.mb", "1000");
85  conf.setIfUnset("mapreduce.reduce.memory.mb", "6000");
86  conf.setBoolean(MarginalReducer.SOURCE_TO_TARGET, true);
87  Job job = new Job(conf);
88  job.setJarByClass(Source2TargetJob.class);
89  job.setJobName("Source2Taget");
90  job.setSortComparatorClass(Source2TargetComparator.class);
91  job.setPartitionerClass(Source2TargetPartitioner.class);
92  job.setMapperClass(KeepProvenanceCountsOnlyMapper.class);
93  job.setReducerClass(MarginalReducer.class);
94  job.setMapOutputKeyClass(Rule.class);
95  job.setMapOutputValueClass(ProvenanceCountMap.class);
96  job.setOutputKeyClass(Rule.class);
97  job.setOutputValueClass(FeatureMap.class);
98  job.setInputFormatClass(SequenceFileInputFormat.class);
99  job.setOutputFormatClass(SequenceFileOutputFormat.class);
100  return job;
101  }
102 
103 
104  public static void main(String[] args) throws Exception {
105  int res = ToolRunner.run(new Source2TargetJob(), args);
106  System.exit(res);
107  }
108 }