Cambridge SMT System
hifst_enumerate_vocab.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef HIFSTENUMERATE_HPP
16 #define HIFSTENUMERATE_HPP
17 
24 #include <lm/config.hh>
25 #include <lm/enumerate_vocab.hh>
26 #include <wordmapper.hpp>
27 
28 namespace lm {
29 
37 template<class WordMapperT>
38 class HifstEnumerateVocab: public EnumerateVocab {
39  private:
41  WordMapperT *wm_;
42  public:
43  // we need this available for a quick nplm hack
44  IdBridge& idb_;
45  HifstEnumerateVocab (IdBridge& idb, WordMapperT *wm) : idb_ (idb), wm_ (wm) {}
46  virtual ~HifstEnumerateVocab() {}
47  virtual void Add (WordIndex index, const StringPiece& str) {
48  std::string s = str.as_string();
49  Add(index, s);
50  }
51 
52  void Add (WordIndex index, const std::string& s) {
53  unsigned t;
54  LDEBUG ("Converting ... s=" << s << ",lm_idx=" << index);
55  if (s == "<s>") t = 1;
56  else if (s == "</s>") t = 2;
57  else if (s == "<unk>") t = 0;
58 #ifdef WITH_NPLM
59  else if (s == "<null>") t = 3;
60 #endif
61  else if (wm_ == NULL) t = ucam::util::toNumber<unsigned> (s);
62  else {
63  t = (*wm_) (s);
64  LDEBUG ("Found gidx = " << t);
65  }
66  if (t < std::numeric_limits<unsigned>::max() ) {
67  LDEBUG ("Adding " << t << " => " << index);
68  idb_.add (t, index);
69  }
70  }
71 
72  void AddOutput (WordIndex index, const std::string& s) {
73  unsigned t;
74  LDEBUG ("Converting ... s=" << s << ",lm_idx=" << index);
75  if (s == "<s>") t = 1;
76  else if (s == "</s>") t = 2;
77  else if (s == "<unk>") t = 0;
78 #ifdef WITH_NPLM
79  else if (s == "<null>") t = 3;
80 #endif
81  else if (wm_ == NULL) t = ucam::util::toNumber<unsigned> (s);
82  else {
83  t = (*wm_) (s);
84  LDEBUG ("Found gidx = " << t);
85  }
86  if (t < std::numeric_limits<unsigned>::max() ) {
87  LDEBUG ("Adding " << t << " => " << index);
88  idb_.addOutput (t, index);
89  }
90  }
91 };
92 }
93 
94 #endif
virtual void Add(WordIndex index, const StringPiece &str)
This class extends EnumerateVocab in kenlm code. This class creates a grammar-integer to lm-integer h...
#define LDEBUG(msg)
void addOutput(unsigned grammar_idx, unsigned lm_idx)
Definition: idbridge.hpp:82
void Add(WordIndex index, const std::string &s)
HifstEnumerateVocab(IdBridge &idb, WordMapperT *wm)
class WordMapper
void add(unsigned grammar_idx, unsigned lm_idx)
Definition: idbridge.hpp:73
void AddOutput(WordIndex index, const std::string &s)