Cambridge SMT System
main-run.lmbr.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef MAIN_RUN_LMBR_HPP
16 #define MAIN_RUN_LMBR_HPP
17 
25 namespace ucam {
26 namespace lmbr {
27 
32 template <class Data = LmbrTaskData >
34 
35  private:
39  const ucam::util::RegistryPO& rg_;
40  public:
46  rg_ ( rg ) {
47  };
48 
52  bool run ( Data& d ) {
53  using ucam::fsttools::ReadFstInit;
56  std::string const& smr = rg_.exists (HifstConstants::kLmbrLexstdarc) ?
58  boost::scoped_ptr < ITask > mytask ( ReadFstInit<Data> ( rg_
60  , smr ) ); // Read evidence space
61  mytask->appendTask
62  ( ReadFstInit<Data> ( rg_
64  , smr ) )
65  ( new Lmbr ( rg_ ) )
67  ;
68  unordered_map<std::string, boost::shared_ptr<oszfstream > > onebestfiles;
69  ucam::util::PatternAddress<float> onebestfilename (rg_.get<std::string>
70  (HifstConstants::kLmbrWriteonebest), "%%alpha%%");
73  !ir->done ();
74  ir->next () ) {
75  lmbrtunedata lmbronebest;
76  d.sidx = ir->get ();
77  d.lmbronebest = &lmbronebest;
78  mytask->chainrun ( d ); // Run!
80  for (unsigned j = 0; j < d.lmbronebest->alpha.size(); ++j) {
81  std::string filename = onebestfilename (d.lmbronebest->alpha[j]);
82  ucam::util::find_and_replace (filename, "%%wps%%",
83  toString<float> (d.lmbronebest->wps[j] ) );
84  ucam::util::find_and_replace (filename, "?",
85  toString<unsigned> (d.lmbronebest->idx ) );
86  if (onebestfiles.find (filename) == onebestfiles.end() )
87  onebestfiles[filename] = boost::shared_ptr<oszfstream> (new oszfstream (
88  filename) );
89  *onebestfiles[filename] << d.lmbronebest->alpha[j]
90  << " " << d.lmbronebest->wps[j]
91  << " " << d.lmbronebest->idx
92  << ":" << d.lmbronebest->hyp[j] << endl;
93  }
94  }
95  }
96  return false;
97  };
98 
99  inline bool operator() () {
100  Data d;
101  return run ( d );
102  };
103 
104  private:
105 
106  DISALLOW_COPY_AND_ASSIGN ( SingleThreadedLmbrTask );
107 
108 };
109 
113 template <class Data = LmbrTaskData >
115  private:
119 
121  const ucam::util::RegistryPO& rg_;
122 
124  unsigned threadcount_;
125  public:
127  rg_ (rg),
128  threadcount_ ( rg.get<unsigned> ( HifstConstants::kNThreads.c_str() ) ) {
129  };
130 
132  bool run ( Data& original_data ) {
133  using ucam::fsttools::ReadFstInit;
137  using ucam::util::toString;
138  std::vector < boost::shared_ptr< lmbrtunedata > > lmbronebest;
139  std::string const& smr = rg_.exists (HifstConstants::kLmbrLexstdarc)
142  {
143  ucam::util::TrivialThreadPool tp ( threadcount_ );
146  ; !ir->done()
147  ; ir->next() ) {
148  TaskInterface<Data> *mytask (ReadFstInit <Data > ( rg_ ,
150  smr)
151  ); //Read evidence space.
152  mytask->appendTask
153  ( ReadFstInit<Data> ( rg_
155  , smr ) )
156  ( new Lmbr ( rg_ ) )
157  ( WriteFst::init ( rg_ ,
159  ;
160  Data *d = new Data;
161  d->sidx = ir->get();
162  LINFO ("Processing sentence " << d->sidx);
163  lmbronebest.push_back ( boost::shared_ptr< lmbrtunedata >
164  ( new lmbrtunedata ) );
165  d->lmbronebest = lmbronebest[lmbronebest.size() - 1].get();
166  tp ( TaskFunctor<Data> ( mytask, d ) ); //Handles pointer ownership
167  }
168  }
169  if (rg_.exists (HifstConstants::kLmbrWriteonebest.c_str() ) ) {
170  //Write all 1-bests
171  unordered_map<std::string, boost::shared_ptr<oszfstream > > onebestfiles;
172  ucam::util::PatternAddress<float> onebestfilename (rg_.get<std::string>
173  (HifstConstants::kLmbrWriteonebest), "%%alpha%%");
174  FORCELINFO ("Writing to file(s) one best hypotheses");
175  for (unsigned k = 0; k < lmbronebest.size(); ++k) {
176  for (unsigned j = 0; j < lmbronebest[k]->alpha.size(); ++j) {
177  std::string filename = onebestfilename (lmbronebest[k]->alpha[j]);
178  ucam::util::find_and_replace (filename, "%%wps%%",
179  toString<float> (lmbronebest[k]->wps[j] ) );
180  ucam::util::find_and_replace (filename, "?",
181  toString<unsigned> (lmbronebest[k]->idx ) );
182  if (onebestfiles.find (filename) == onebestfiles.end() )
183  onebestfiles[filename] = boost::shared_ptr<oszfstream> (new oszfstream (
184  filename) );
185  *onebestfiles[filename] << lmbronebest[k]->alpha[j]
186  << " " << lmbronebest[k]->wps[j]
187  << " " << lmbronebest[k]->idx
188  << ":" << lmbronebest[k]->hyp[j] << endl;
189  }
190  }
191  }
192  return false;
193  };
194 
196  inline bool operator() () {
197  Data d;
198  return run ( d );
199  }
200 
201  private:
202  DISALLOW_COPY_AND_ASSIGN ( MultiThreadedLmbrTask );
203 };
204 
205 }
206 } // end namespaces
207 
208 #endif
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
const std::string kLmbrLoadHypothesesspace
bool run(Data &original_data)
original_data is being ignored in this case.
static WriteFstTask * init(const ucam::util::RegistryPO &rg, const std::string &fstkey, const std::string &readfstkey="")
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
#define LINFO(msg)
const std::string kLmbrLexstdarc
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
#define IntRangeFactory
Definition: range.hpp:213
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
const std::string kLmbrWritedecoder
Convenience class that inherits Taskinterface behaviour and writes an fst to [file] using a key defin...
Lattice MBR task.
Definition: task.lmbr.hpp:33
SingleThreadedLmbrTask(const ucam::util::RegistryPO &rg)
Constructor.
Templated (hybrid) Interface for Task classes.
Full single-threaded Alignment lattices to Sparse lattices.
Simple functor that accepts an interface and pointer to the data object in which it will have to run ...
std::string const kHifstSemiringLexStdArc
const std::string kLmbrLoadEvidencespace
std::string const kHifstSemiringStdArc
const std::string kNThreads
MultiThreadedLmbrTask(const ucam::util::RegistryPO &rg)
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
Definition: registrypo.hpp:235
void find_and_replace(std::string &haystack, const std::string &needle, const std::string &replace)
const std::string kRangeOne
Definition: range.hpp:26
bool run(Data &d)
Reads evidence space and hypotheses space (FSTs) and applies lmbr.
Definition: bleu.hpp:14
const std::string kLmbrWriteonebest