Cambridge SMT System
task.applylm.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef APPLYLMTASK_HPP
16 #define APPLYLMTASK_HPP
17 
26 
27 
28 namespace ucam {
29 namespace fsttools {
30 
35 template <class Data , class Arc >
37 
38  typedef typename Arc::Label Label;
39  typedef typename Arc::Weight Weight;
40 
41  private:
42  ucam::util::RegistryPO const& rg_;
43  bool deletelmscores_;
44  bool natlog_;
45 
46  const std::string lmkey_;
47  const std::string latticeloadkey_;
48  const std::string latticestorekey_;
49 
50  fst::VectorFst<Arc> mylmfst_;
51 
53  typedef boost::shared_ptr<ApplyLanguageModelOnTheFlyInterfaceType> ApplyLanguageModelOnTheFlyInterfacePtrType;
54  std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> almotf_;
55 
56  public:
59  const std::string& lmkey = HifstConstants::kLmLoad ,
60  const std::string& latticeloadkey = "lm.lattice.load",
61  const std::string& latticestorekey = "lm.lattice.store",
62  bool deletelmscores = false
63  )
64  : rg_(rg)
65  , lmkey_ ( lmkey )
66  , latticeloadkey_ ( latticeloadkey )
67  , latticestorekey_ ( latticestorekey )
68  , natlog_ ( !rg.exists ( HifstConstants::kLmLogTen ) )
69  , deletelmscores_ (deletelmscores)
70  {};
71 
76 
77  if (almotf_.size()) return; // already done
78  almotf_.resize(d.klm[lmkey_].size());
80  unordered_set<Label> epsilons;
82  epsilons.insert ( DR );
83  epsilons.insert ( OOV );
84  epsilons.insert ( EPSILON );
85  epsilons.insert ( SEP );
86  for ( unsigned k = 0; k < d.klm[lmkey_].size(); ++k ) {
87  USER_CHECK ( d.klm[lmkey_][k]->model != NULL,
88  "Language model " << k << " not available!" );
89  almotf_[k].reset(assignKenLmHandler<Arc>(rg_,lmkey_, epsilons
90  , *(d.klm[lmkey_][k])
91  , mw, natlog_,k));
92  mw.update();
93  }
94  }
95 
101  bool run ( Data& d ) {
102  mylmfst_.DeleteStates();
103  if ( !USER_CHECK ( d.klm.size() ,
104  "No language models available" ) ) return true;
105  if ( !USER_CHECK ( d.klm.find ( lmkey_ ) != d.klm.end() ,
106  "No language models available (key not initialized) " ) ) return true;
107  if ( !USER_CHECK ( d.fsts.find ( latticeloadkey_ ) != d.fsts.end() ,
108  " Input fst not available!" ) ) return true;
109 
111  mylmfst_ = * (static_cast<fst::VectorFst<Arc> * > ( d.fsts[latticeloadkey_] ) );
112  if (deletelmscores_) {
113  LINFO ( "Delete old LM scores first" );
114  //Deletes LM scores if using lexstdarc. Note -- will copy through on stdarc and ignore on tuplearc!
115  fst::MakeWeight2<Arc> mwcopy;
116  fst::Map<Arc> ( &mylmfst_,
118  }
119  LINFO ( "Input lattice loaded with key=" << latticeloadkey_ << ", NS=" <<
120  mylmfst_.NumStates() );
121  boost::shared_ptr<fst::VectorFst<Arc> > p;
122  for ( unsigned k = 0; k < almotf_.size(); ++k ) {
123  d.stats->setTimeStart ( "on-the-fly-composition " + ucam::util::toString ( k ) );
124  p.reset(almotf_[k]->run(mylmfst_));
125  mylmfst_ = *p;
126  p.reset();
127  d.stats->setTimeEnd ("on-the-fly-composition " + ucam::util::toString ( k ) );
128  LDEBUG ( mylmfst_.NumStates() );
129  }
130  d.fsts[latticestorekey_] = &mylmfst_;
131  LINFO ( "Done!" );
132  return false;
133  };
134 
136  LINFO ("Shutdown!");
137  };
138 
139  private:
140  ZDISALLOW_COPY_AND_ASSIGN ( ApplyLanguageModelTask );
141 };
142 
143 template <class Data , class Arc >
145  private:
146  typedef typename Arc::Label Label;
147  typedef typename Arc::Weight Weight;
148 
149  ucam::util::RegistryPO const& rg_;
150  bool deletelmscores_;
151  bool natlog_;
152 
153  const std::string lmkey_;
154  const std::string latticeloadkey_;
155  const std::string latticestorekey_;
156 
157  fst::VectorFst<Arc> mylmfst_;
158 
160  typedef boost::shared_ptr<ApplyLanguageModelOnTheFlyInterfaceType> ApplyLanguageModelOnTheFlyInterfacePtrType;
161  std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> almotf_;
162 
163 
164  unsigned srcWindowsSize_;
165 
166  public:
169  , const std::string& lmkey
170  , const std::string& latticeloadkey
171  , const std::string& latticestorekey
172  , bool deletelmscores
173  )
174  : rg_(rg)
175  , lmkey_ ( lmkey )
176  , latticeloadkey_ ( latticeloadkey )
177  , latticestorekey_ ( latticestorekey )
178  , natlog_ ( !rg.exists ( HifstConstants::kLmLogTen ) )
179  , deletelmscores_ (deletelmscores) // hum...
180  , srcWindowsSize_(rg_.get<unsigned>(HifstConstants::kUseBilingualModelSourceSize))
181  {};
182 
187  if (almotf_.size()) return; // already done
188  almotf_.resize(d.klm[lmkey_].size());
190  unordered_set<Label> epsilons;
192  epsilons.insert ( DR );
193  epsilons.insert ( OOV );
194  epsilons.insert ( EPSILON );
195  epsilons.insert ( SEP );
196  USER_CHECK(d.klm[lmkey_].size() == 1
197  , "Only ONE bilingual model supported. Sorry for my limitations!");
198  unsigned k = 0;
199  almotf_[k].reset
200  (assignKenLmHandlerBilingual<Arc>(rg_,lmkey_, epsilons
201  , *(d.klm[lmkey_][k])
202  , mw, natlog_,k
203  )
204  );
205  };
206 
212  bool run ( Data& d ) {
213  mylmfst_.DeleteStates();
214  if ( !USER_CHECK ( d.klm.size() ,
215  "No language models available" ) ) return true;
216  if ( !USER_CHECK ( d.klm.find ( lmkey_ ) != d.klm.end() ,
217  "No language models available (key not initialized) " ) ) return true;
218  if ( !USER_CHECK ( d.fsts.find ( latticeloadkey_ ) != d.fsts.end() ,
219  " Input fst not available!" ) ) return true;
221  mylmfst_ = * (static_cast<fst::VectorFst<Arc> * > ( d.fsts[latticeloadkey_] ) );
222  // if (deletelmscores_) {
223  // LINFO ( "Delete old LM scores first" );
224  // //Deletes LM scores if using lexstdarc. Note -- will copy through on stdarc and ignore on tuplearc!
225  // fst::MakeWeight2<Arc> mwcopy;
226  // fst::Map<Arc> ( &mylmfst_,
227  // fst::GenericWeightAutoMapper<Arc, fst::MakeWeight2<Arc> > ( mwcopy ) );
228  // }
229  LINFO ( "Input lattice loaded with key=" << latticeloadkey_ << ", NS=" <<
230  mylmfst_.NumStates() );
231  boost::shared_ptr<fst::VectorFst<Arc> > p;
232  for ( unsigned k = 0; k < almotf_.size(); ++k ) {
233  d.stats->setTimeStart ( "on-the-fly-bilm-composition " + ucam::util::toString ( k ) );
234  p.reset(almotf_[k]->run(mylmfst_, srcWindowsSize_, d.sourceWindows) );
235  mylmfst_ = *p;
236  p.reset();
237  d.stats->setTimeEnd ("on-the-fly-bilm-composition " + ucam::util::toString ( k ) );
238  LDEBUG ( mylmfst_.NumStates() );
239  }
240  d.fsts[latticestorekey_] = &mylmfst_;
241  LINFO ( "Done!" );
242  return false;
243  };
244 
246  LINFO ("Shutdown!");
247  };
248 
249  private:
251 
252 };
253 
254 }} // end namespaces
255 
256 #endif
#define ZDISALLOW_COPY_AND_ASSIGN(TypeName)
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
#define LINFO(msg)
#define SEP
std::string const kLmLogTen
#define LDEBUG(msg)
templated Mapper that modifies weights over an FST, passing through the other values of the arc...
#define DR
ApplyBiLMTask(const ucam::util::RegistryPO &rg, const std::string &lmkey, const std::string &latticeloadkey, const std::string &latticestorekey, bool deletelmscores)
Constructor with ucam::util::RegistryPO object.
bool run(Data &d)
Method inherited from ucam::util::TaskInterface. Loads the language model and stores in lm data struc...
void initializeLanguageModelHandlers(Data &d)
Initializes appropriate templated handlers for kenlm language models.
Templated (hybrid) Interface for Task classes.
bool exists(const std::string &source, const std::string &needle)
Convenience function to find out whether a needle exists in a text.
Templated functor that creates a weight given a float.
Wrapper to ApplyLanguageModelOnTheFly to apply different kenlm models.
#define EPSILON
void initializeLanguageModelHandlers(Data &d)
Initializes appropriate templated handlers for kenlm language models.
Templated functor that creates a weight given a float.
bool run(Data &d)
Method inherited from ucam::util::TaskInterface. Loads the language model and stores in lm data struc...
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
#define OOV
Language model loader task, loads a language model wrapping it in a class to provide.
ApplyLanguageModelTask(const ucam::util::RegistryPO &rg, const std::string &lmkey=HifstConstants::kLmLoad, const std::string &latticeloadkey="lm.lattice.load", const std::string &latticestorekey="lm.lattice.store", bool deletelmscores=false)
Constructor with ucam::util::RegistryPO object.
std::string const kLmLoad
std::string const kUseBilingualModelSourceSize
Definition: bleu.hpp:14