17 package uk.ac.cam.eng.extraction;
20 import java.io.FileOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.HashSet;
26 import java.util.List;
28 import java.util.function.Function;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.io.MapWritable;
34 import org.apache.hadoop.io.SequenceFile;
35 import org.junit.AfterClass;
36 import org.junit.Assert;
37 import org.junit.BeforeClass;
38 import org.junit.ClassRule;
39 import org.junit.Test;
40 import org.junit.rules.TemporaryFolder;
58 private static final String LOCAL_URI =
"file:///";
59 private static final String TRAINING_DATA =
"/unit_testing_training_data";
62 public static TemporaryFolder
folder =
new TemporaryFolder();
63 public static Configuration
conf;
68 File output = folder.newFile();
69 try (OutputStream writer =
new FileOutputStream(output)) {
72 for (
int in = rulesFile.read(); in != -1; in = rulesFile.read()) {
83 conf =
new Configuration();
84 FileSystem.setDefaultUri(conf, LOCAL_URI);
85 FileSystem fs = FileSystem.get(conf);
86 fs.setWorkingDirectory(
new Path(folder.getRoot().getAbsolutePath()));
91 public static void cleanUp() throws IOException{
96 private boolean isContiguous(List<Rule> rules, Function<Rule, List<Symbol>> getStr){
97 Set<List<Symbol>> prevs =
new HashSet<>();
98 List<Symbol> prev = getStr.apply(rules.get(0));
100 for(Rule rule : rules){
101 List<Symbol> str = getStr.apply(rule);
102 if(!(str.equals(prev) || prevs.add(str))){
110 @SuppressWarnings(
"unchecked")
113 try (SequenceFile.Reader reader =
new SequenceFile.Reader(
114 FileSystem.get(conf),
new Path(trainingDataFile.getPath()),
116 MapWritable key =
new MapWritable();
118 List<Rule> rules =
new ArrayList<>();
119 ExtractOptions
opts =
new ExtractOptions(9, 5, 5, 10,
true,
true);
121 while (reader.next(key, val) && count < 1000) {
122 String src = val.get()[0].toString();
123 String trg = val.get()[1].toString();
124 String a = val.get()[2].toString();
125 List<Pair<Rule, Alignment>> extracted = Extract.extractJava(opts, src, trg, a);
127 rules.add(pair.getFirst());
131 Assert.assertEquals(472100, rules.size());
132 Assert.assertFalse(isContiguous(rules, r -> r.getSource()));
134 Assert.assertTrue(isContiguous(rules, r -> r.getSource()));
136 Assert.assertTrue(isContiguous(rules, r -> r.getTarget()));