Cambridge SMT System
task.applylm.kenlmtype.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef APPLYLMTASK_KENLMTYPE_HPP
16 #define APPLYLMTASK_KENLMTYPE_HPP
17 
25 namespace ucam {
26 namespace fsttools {
27 
28 template<class Arc, template<class> class MakeWeightT>
31  , std::string const &lmkey
32  , std::unordered_set<typename Arc::Label> &epsilons
33  , KenLMData const &klm
34  , MakeWeightT<Arc> &mw
35  , bool useNaturalLog
36  , unsigned offset = 0) {
37  using namespace lm::ngram;
38  typedef lm::np::Model NplmModel;
39  // Detect here kenlm binary type
40  std::string file = rg.getVectorString (lmkey, offset) ;
41  int kenmt = ucam::util::detectkenlm(file);
42 
43  switch (kenmt) {
44  case PROBING:
46  (dynamic_cast<ProbingModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
47  case REST_PROBING:
48  return new fst::ApplyLanguageModelOnTheFly<Arc, MakeWeightT<Arc>, RestProbingModel >
49  (dynamic_cast<RestProbingModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
50  case TRIE:
52  (dynamic_cast<TrieModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
53  case QUANT_TRIE:
55  (dynamic_cast<QuantTrieModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
56  case ARRAY_TRIE:
58  (dynamic_cast<ArrayTrieModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
59  case QUANT_ARRAY_TRIE:
60  return new fst::ApplyLanguageModelOnTheFly<Arc, MakeWeightT<Arc>, QuantArrayTrieModel >
61  (dynamic_cast<QuantArrayTrieModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
62  case util::KENLM_NPLM:
63  #ifdef WITH_NPLM
65  (dynamic_cast<NplmModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
66 #endif
67  LERROR("Unsuported format: KENLM_NPLM. Did you compile NPLM library?");
68  exit(EXIT_FAILURE);
69  default:
70  // bad news if it reaches this point, as format should default to probing
71  LERROR("Programmer mistake -- (task.applylm.kenlmtype.hpp)");
72  exit(EXIT_FAILURE);
73  }
74  return NULL;
75 };
76 
77 
78 // We only support nplm:
79 template<class Arc, template<class> class MakeWeightT>
82  , std::string const &lmkey
83  , std::unordered_set<typename Arc::Label> &epsilons
84  , KenLMData const &klm
85  , MakeWeightT<Arc> &mw
86  , bool useNaturalLog
87  , unsigned offset = 0) {
88  using namespace lm::ngram;
89  typedef lm::np::Model NplmModel;
90  // Detect here kenlm binary type
91  std::string file = rg.getVectorString (lmkey, offset) ;
92  int kenmt = ucam::util::detectkenlm(file);
93 
94  switch (kenmt) {
95  case util::KENLM_NPLM:
96  #ifdef WITH_NPLM
98  (dynamic_cast<NplmModel &>(*klm.model), epsilons,useNaturalLog, klm.lmscale, klm.lmwp, klm.idb, mw);
99 #endif
100  LERROR("Unsuported format: KENLM_NPLM. Did you compile NPLM library?");
101  exit(EXIT_FAILURE);
102  case PROBING:
103  case REST_PROBING:
104  case TRIE:
105  case QUANT_TRIE:
106  case ARRAY_TRIE:
107  case QUANT_ARRAY_TRIE:
108  LERROR("Unsuported format, only NPLM supported for bilingual models. ");
109  exit(EXIT_FAILURE);
110  default:
111  // bad news if it reaches this point, as format should default to probing
112  LERROR("Programmer mistake -- (task.applylm.kenlmtype.hpp)");
113  exit(EXIT_FAILURE);
114  }
115  return NULL;
116 };
117 
118 
119 }} // end namespaces
120 
121 #endif
std::vector< std::string > getVectorString(const std::string &key) const
Convenience method that returns a vector of strings taking "," as the separator character.
Definition: registrypo.hpp:245
float lmscale
Scales applied to each model.
Definition: data.lm.hpp:50
Class that applies language model on the fly using kenlm.
Language Model data structure.
Definition: data.lm.hpp:35
fst::ApplyLanguageModelOnTheFlyInterface< Arc > * assignKenLmHandler(util::RegistryPO const &rg, std::string const &lmkey, std::unordered_set< typename Arc::Label > &epsilons, KenLMData const &klm, MakeWeightT< Arc > &mw, bool useNaturalLog, unsigned offset=0)
fst::ApplyLanguageModelOnTheFlyInterface< Arc > * assignKenLmHandlerBilingual(util::RegistryPO const &rg, std::string const &lmkey, std::unordered_set< typename Arc::Label > &epsilons, KenLMData const &klm, MakeWeightT< Arc > &mw, bool useNaturalLog, unsigned offset=0)
#define LERROR(msg)
lm::base::Model * model
KenLM.
Definition: data.lm.hpp:41
Definition: bleu.hpp:14