Cambridge SMT System
RuleExtractorTest.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  * Copyright 2014 - Juan Pino, Aurelien Waite, William Byrne
15  *******************************************************************************/
16 
17 package uk.ac.cam.eng.extraction;
18 
19 import java.io.File;
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Set;
28 import java.util.function.Function;
29 
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.io.MapWritable;
34 import org.apache.hadoop.io.SequenceFile;
35 import org.junit.AfterClass;
36 import org.junit.Assert;
37 import org.junit.BeforeClass;
38 import org.junit.ClassRule;
39 import org.junit.Test;
40 import org.junit.rules.TemporaryFolder;
41 
45 import uk.ac.cam.eng.util.Pair;
46 
56 public class RuleExtractorTest {
57 
58  private static final String LOCAL_URI = "file:///";
59  private static final String TRAINING_DATA = "/unit_testing_training_data";
60 
61  @ClassRule
62  public static TemporaryFolder folder = new TemporaryFolder();
63  public static Configuration conf;
64  public static File trainingDataFile;
65 
66 
67  public static File copyDataToTestDir(String resource) throws IOException{
68  File output = folder.newFile();
69  try (OutputStream writer = new FileOutputStream(output)) {
70  try (InputStream rulesFile = RuleExtractorTest.class.getResourceAsStream(
71  resource)) {
72  for (int in = rulesFile.read(); in != -1; in = rulesFile.read()) {
73  writer.write(in);
74  }
75  }
76  }
77  return output;
78  }
79 
80  @BeforeClass
81  public static void setupFileSystem() throws IOException {
82  // Ensure hadoop to use local file system
83  conf = new Configuration();
84  FileSystem.setDefaultUri(conf, LOCAL_URI);
85  FileSystem fs = FileSystem.get(conf);
86  fs.setWorkingDirectory(new Path(folder.getRoot().getAbsolutePath()));
87  trainingDataFile = copyDataToTestDir(TRAINING_DATA);
88  }
89 
90  @AfterClass
91  public static void cleanUp() throws IOException{
92  folder.delete();
93  }
94 
95 
96  private boolean isContiguous(List<Rule> rules, Function<Rule, List<Symbol>> getStr){
97  Set<List<Symbol>> prevs = new HashSet<>();
98  List<Symbol> prev = getStr.apply(rules.get(0));
99  prevs.add(prev);
100  for(Rule rule : rules){
101  List<Symbol> str = getStr.apply(rule);
102  if(!(str.equals(prev) || prevs.add(str))){
103  return false;
104  }
105  prev = str;
106  }
107  return true;
108  }
109 
110  @SuppressWarnings("unchecked")
111  @Test
112  public void testRuleComparator() throws IOException {
113  try (SequenceFile.Reader reader = new SequenceFile.Reader(
114  FileSystem.get(conf), new Path(trainingDataFile.getPath()),
115  conf)) {
116  MapWritable key = new MapWritable();
118  List<Rule> rules = new ArrayList<>();
119  ExtractOptions opts = new ExtractOptions(9, 5, 5, 10, true, true);
120  int count = 0;
121  while (reader.next(key, val) && count < 1000) {
122  String src = val.get()[0].toString();
123  String trg = val.get()[1].toString();
124  String a = val.get()[2].toString();
125  List<Pair<Rule, Alignment>> extracted = Extract.extractJava(opts, src, trg, a);
126  for(Pair<Rule, Alignment> pair : extracted){
127  rules.add(pair.getFirst());
128  }
129  ++count;
130  }
131  Assert.assertEquals(472100, rules.size());
132  Assert.assertFalse(isContiguous(rules, r -> r.getSource()));
133  rules.sort(new Source2TargetJob.Source2TargetComparator());
134  Assert.assertTrue(isContiguous(rules, r -> r.getSource()));
135  rules.sort(new Target2SourceJob.Target2SourceComparator());
136  Assert.assertTrue(isContiguous(rules, r -> r.getTarget()));
137  }
138  }
139 
140 
141 
142 
143 }
MertOpt opts
Definition: MertCommon.cpp:14
static File copyDataToTestDir(String resource)