16 package uk.ac.cam.eng.rule.retrieval;
18 import java.io.IOException;
19 import java.util.Collections;
20 import java.util.Iterator;
22 import org.apache.hadoop.conf.Configuration;
23 import org.apache.hadoop.fs.FileSystem;
24 import org.apache.hadoop.fs.Path;
25 import org.apache.hadoop.hbase.KeyValue;
26 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
27 import org.apache.hadoop.hbase.io.hfile.HFile;
28 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
29 import org.apache.hadoop.io.DataInputBuffer;
30 import org.apache.hadoop.io.DataOutputBuffer;
46 private HFileScanner scanner;
48 private final DataInputBuffer in =
new DataInputBuffer();
49 private final DataOutputBuffer out =
new DataOutputBuffer();
50 private final Rule rule =
new Rule();
52 private RuleString key =
new RuleString();
55 scanner = hfReader.getScanner(
false,
false);
59 private void readValue() {
60 in.reset(scanner.getValue().array(), scanner.getValue().arrayOffset(),
61 scanner.getValue().limit());
64 }
catch (IOException e) {
66 throw new RuntimeException(e);
70 public boolean seek(RuleString source)
throws IOException {
73 byte[] empty = Array.emptyByteArray();
74 KeyValue kv =
new KeyValue(out.getData(), 0, out.getLength(), empty, 0,
75 0, empty, 0, 0, 0l, KeyValue.Type.Put, empty, 0, 0);
76 int pos = scanner.seekTo(kv.getBuffer(), kv.getKeyOffset(),
89 final Iterator<Pair<RuleString, RuleData>> instance = value.iterator();
91 return new Iterable<Pair<Rule, RuleData>>() {
94 public Iterator<Pair<Rule, RuleData>>
iterator() {
95 return new Iterator<Pair<Rule, RuleData>>() {
98 public boolean hasNext() {
99 return instance.hasNext();
110 public void remove() {
111 throw new UnsupportedOperationException();
119 private RuleString readSource() {
121 in.reset(scanner.getKey().array(), scanner.getKey().arrayOffset()
122 + KeyValue.ROW_LENGTH_SIZE, scanner.getKey().limit());
130 boolean temp =
false;
132 temp = scanner.seekTo();
133 }
catch (IOException e) {
134 throw new RuntimeException(e);
136 final boolean isNotEmpty = temp;
141 return new Iterator<Pair<Rule, RuleData>>() {
143 Iterator<Pair<Rule, RuleData>> targetIter;
145 boolean hasNext = isNotEmpty;
148 public boolean hasNext() {
149 return hasNext || targetIter.hasNext();
154 if (targetIter == null) {
157 hasNext = scanner.next();
158 }
catch (IOException e) {
163 if (targetIter.hasNext()) {
164 return targetIter.next();
165 }
else if (hasNext) {
169 hasNext = scanner.next();
170 }
catch (IOException e) {
174 return targetIter.next();
180 public void remove() {
181 throw new UnsupportedOperationException();
187 public static void main(String[] args)
throws IOException {
188 Configuration conf =
new Configuration();
189 CacheConfig cacheConf =
new CacheConfig(conf);
191 for (String fileName : args) {
193 System.out.println(
"Reading file " + fileName);
194 HFile.Reader hfReader = HFile.createReader(FileSystem.get(conf),
195 new Path(fileName), cacheConf);
197 for (@SuppressWarnings(
"unused")
202 System.out.println(fileCount +
"\t" + fileName);
204 System.out.println(count +
"\ttotal");
Iterator< Pair< Rule, RuleData > > iterator()
static< F, S > Pair< F, S > createPair(F first, S second)
boolean seek(RuleString source)
HFileRuleReader(HFile.Reader hfReader)
static void main(String[] args)
Iterable< Pair< Rule, RuleData > > getRulesForSource()