Cambridge SMT System
main-run.alilats2splats.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Adrià de Gispert, William Byrne
14 
15 #ifndef RUNNER_ALILATS2SPARSELATS_HPP
16 #define RUNNER_ALILATS2SPARSELATS_HPP
17 
25 namespace ucam {
26 namespace hifst {
27 
38  unsigned *offset,
39  const std::string& lmscales = HifstConstants::kLmFeatureweights,
40  const std::string& grammarscales =
42  const std::string& featureweights = HifstConstants::kFeatureweights,
43  const std::string& lmload = HifstConstants::kLmLoad
44  ) {
45  if (rg.get<std::string> (featureweights) != "" ) {
46  // overrides separate grammar feature weights + lm feature weights
48  = ucam::util::ParseParamString<float> (rg.getString (featureweights) );
49  *offset = rg.getVectorString (lmload).size();
50  LWARN ( HifstConstants::kFeatureweights << " overrides program options " <<
53  return;
54  }
55  std::vector<float> fscales1, fscales2;
56  if ( rg.exists ( grammarscales ) )
57  if ( rg.get<std::string> ( grammarscales ) != "" )
58  fscales2 = ucam::util::ParseParamString<float> ( rg.getString (
59  grammarscales ) );
60  if ( rg.exists ( lmscales ) )
61  if ( rg.get<std::string> ( lmscales ) != "" )
62  fscales1 = ucam::util::ParseParamString<float> ( rg.getString ( lmscales ) );
63  std::vector<float>& fscales = fst::TropicalSparseTupleWeight<float>::Params();
64  *offset = fscales1.size();
65  if ( fscales1.size() + fscales2.size() ) {
66  LWARN ( "env parameter is overriden by " << lmscales << "," << grammarscales );
67  fscales.clear();
68  fscales = fscales1;
69  copy ( fscales2.begin(), fscales2.end(), std::back_inserter ( fscales ) );
70  }
71  USER_CHECK ( fscales.size(),
72  "Number of scaling factors must be greater than 0" );
74  std::string x = ucam::util::toString<float> ( fscales[0] );
75  for ( unsigned k = 1; k < fscales.size(); ++k )
76  x += "," + ucam::util::toString<float> ( fscales[k] );
77  LINFO ( "Number of language models =" << *offset << ". Scales=" << x );
78 };
79 
84 template < template <class> class DataT
85  , class ArcT = void
86  >
88 ucam::util::TaskInterface<DataT<TupleArc32 > > {
89  private:
90  typedef DataT<TupleArc32 > Data;
103 
104  const ucam::util::RegistryPO& rg_;
105  public:
111  rg_ ( rg ) {
112  };
113 
117  bool run ( Data& d ) {
118  using namespace HifstConstants;
119  unsigned numlms;
120  setScales ( rg_ , &numlms);
121  boost::scoped_ptr < LoadSparseWeightFlowerLattice>
122  mytask (new LoadSparseWeightFlowerLattice ( rg_ , numlms,
124  mytask->appendTask
126  ( LoadWordMap::init ( rg_, kLmWordmap, true ) )
127  ( new LoadLanguageModel ( rg_, kLmLoad, "" ) ) //Here, language model weights always to 1
128  ( new ReadFst ( rg_, kSparseweightvectorlatticeLoadalilats ) )
129  ( new SparseWeightVectorLattices ( rg_
133  ( new ApplyLanguageModel ( rg_
134  , kLmLoad
140  , numlms
142  ;
144  kRangeOne ) );
145  !ir->done ();
146  ir->next () ) {
147  d.sidx = ir->get ();
148  mytask->chainrun ( d ); // Run!
149  }
150  return false;
151  };
152 
153  inline bool operator() () {
154  Data d;
155  return run ( d );
156  };
157 
158  private:
159 
160  DISALLOW_COPY_AND_ASSIGN ( SingleThreadedAliLatsToSparseVecLatsTask );
161 
162 };
163 
167 template < template <class> class DataT
168  , class ArcT = void
169  >
171 ucam::util::TaskInterface<DataT<TupleArc32 > > {
172  private:
173  typedef DataT<TupleArc32 > Data;
186 
188  const ucam::util::RegistryPO& rg_;
190  unsigned threadcount_;
191  public:
193  threadcount_ ( rg.get<unsigned> ( HifstConstants::kNThreads ) ),
194  rg_ (rg) {
195  }
196 
197  bool run ( Data& original_data ) {
198  using namespace HifstConstants;
199  unsigned numlms;
200  setScales ( rg_ , &numlms);
201  boost::scoped_ptr < LoadSparseWeightFlowerLattice>
202  loadtask ( new LoadSparseWeightFlowerLattice ( rg_
203  , numlms
205  loadtask->appendTask
207  ( new LoadLanguageModel ( rg_ , kLmLoad,
208  "" ) ) //Forcing language model scales always to 1
209  ( LoadWordMap::init ( rg_ , kLmWordmap , true ) )
210  ( LoadWordMap::init ( rg_ ,
212  ;
213  loadtask->chainrun ( original_data ); // Load grammar and language model;
214  {
215  ucam::util::TrivialThreadPool tp ( threadcount_ );
216  bool finished = false;
218  kRangeOne ) );
219  !ir->done ();
220  ir->next () ) {
221  Data *d = new Data; //( original_data ); // reset.
222  d->sidx = ir->get();
223  d->klm = original_data.klm;
224  d->fsts = original_data.fsts;
225  d->wm = original_data.wm;
226  FORCELINFO ( "=====Extract features for sentence " << d->sidx << ":" );
227  ReadFst *runtask = new ReadFst ( rg_ ,
229  runtask->appendTask
230  ( new SparseWeightVectorLattices ( rg_
234  ( new ApplyLanguageModel ( rg_
235  , kLmLoad
239  ( DumpNbestFeatures::init ( rg_ , numlms,
241  ;
242  tp ( ucam::util::TaskFunctor<Data> ( runtask,
243  d ) ); //tp takes ownership of runtask and d
244  }
245  }
246  return false;
247  };
248 
249  inline bool operator() () {
250  Data d;
251  return run ( d );
252  };
253 
254  private:
255  DISALLOW_COPY_AND_ASSIGN ( MultiThreadedAliLatsToSparseVecLatsTask );
256 };
257 
258 }
259 } // end namespaces
260 
261 #endif
Full single-threaded Alignment lattices to Sparse lattices.
static WriteFstTask * init(const ucam::util::RegistryPO &rg, const std::string &fstkey, const std::string &readfstkey="")
MultiThreadedAliLatsToSparseVecLatsTask(const ucam::util::RegistryPO &rg)
std::vector< std::string > getVectorString(const std::string &key) const
Convenience method that returns a vector of strings taking "," as the separator character.
Definition: registrypo.hpp:245
#define LINFO(msg)
static LoadWordMapTask * init(const ucam::util::RegistryPO &rg, const std::string &key, bool reverse=false)
Static constructor, will return NULL if there is no need for word-mapping.
const std::string kSparseweightvectorlatticeLoadalilats
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
#define IntRangeFactory
Definition: range.hpp:213
static DumpNbestFeaturesTask * init(const ucam::util::RegistryPO &rg, const unsigned offset=1, const std::string &sparseweightlatticekey=HifstConstants::kSparseweightvectorlatticeStore)
SingleThreadedAliLatsToSparseVecLatsTask(const ucam::util::RegistryPO &rg)
Constructor.
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
Language model loader task, loads a language model wrapping it in a class to provide.
Loads wordmap in constructor and delivers pointer to data object during run time. ...
Convenience class that inherits Taskinterface behaviour and writes an fst to [file] using a key defin...
Templated (hybrid) Interface for Task classes.
Simple functor that accepts an interface and pointer to the data object in which it will have to run ...
TaskInterface & appendTask(TaskInterface *t)
Appends a task class. If there is no task, append here, otherwise delegate in next task...
std::string const kLmFeatureweights
const std::string kRuleflowerlatticeFeatureweights
const std::string kNThreads
const std::string kSparseweightvectorlatticeWordmap
std::string getString(const std::string &key) const
Performs get<string> and checks whether the real value is to be loaded from file (–param=file://.....)
Definition: registrypo.hpp:205
#define LWARN(msg)
const std::string kRuleflowerlatticeStore
const std::string kSparseweightvectorlatticeStore
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
Definition: registrypo.hpp:235
void setScales(const ucam::util::RegistryPO &rg, unsigned *offset, const std::string &lmscales=HifstConstants::kLmFeatureweights, const std::string &grammarscales=HifstConstants::kRuleflowerlatticeFeatureweights, const std::string &featureweights=HifstConstants::kFeatureweights, const std::string &lmload=HifstConstants::kLmLoad)
Sets scales using environment parameter (see sparse tuple weight semiring file), or grammar scales an...
Task that dumps nbest and feature file. Templated on specific Data object and Fst Arc...
Convenience class that loads an fst using a key defined in the constructor and delivers it to the dat...
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
bool run(Data &d)
Reads an input sentence, tokenizes and integer-maps.
Language model loader task, loads a language model wrapping it in a class to provide.
std::string const kLmWordmap
std::string const kLmLoad
const std::string kRangeOne
Definition: range.hpp:26
Multithreaded implementation of alilats2splats pipeline.
const std::string kFeatureweights
Creates lattices using tropical tuple weight semiring – each arc containing separate feature weight ...
const std::string kSparseweightvectorlatticeStorenolm
Definition: bleu.hpp:14
Implements a class that loads the grammar sparseweight flower lattice and stores a pointer on the dat...