Cambridge SMT System
CopyRecordsForTesting.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  *******************************************************************************/
15 
16 package uk.ac.cam.eng.extraction.hadoop.util;
17 
18 import java.io.IOException;
19 
20 import org.apache.hadoop.conf.Configuration;
21 import org.apache.hadoop.fs.FileSystem;
22 import org.apache.hadoop.fs.Path;
23 import org.apache.hadoop.io.SequenceFile;
24 import org.apache.hadoop.io.SequenceFile.CompressionType;
25 import org.apache.hadoop.io.Writable;
26 import org.apache.hadoop.util.ReflectionUtils;
27 
35 public class CopyRecordsForTesting {
36 
37  public static void main(String[] args) throws IOException {
38  if (args.length != 3) {
39  System.err
40  .println("Args: <sequence file in> <sequence file out> <modulo #>");
41  System.exit(1);
42  }
43  int modulo = Integer.parseInt(args[2]);
44  Configuration conf = new Configuration();
45  FileSystem fs = FileSystem.get(conf);
46  Path pathIn = new Path(args[0]);
47  SequenceFile.Reader reader = new SequenceFile.Reader(fs, pathIn, conf);
48  Path pathOut = new Path(args[1]);
49  SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,
50  pathOut, reader.getKeyClass(), reader.getValueClass(),
51  CompressionType.BLOCK);
52  Writable key = (Writable) ReflectionUtils.newInstance(
53  reader.getKeyClass(), conf);
54  Writable value = (Writable) ReflectionUtils.newInstance(
55  reader.getValueClass(), conf);
56  int count=0;
57  while (reader.next(key, value)) {
58  if(count % modulo ==0){
59  writer.append(key, value);
60  }
61  ++count;
62  }
63  reader.close();
64  writer.close();
65  }
66 
67 }