Cambridge SMT System
task.dumpnbestfeatures.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_DUMPNBESTFEATURES_HPP
16 #define TASK_DUMPNBESTFEATURES_HPP
17 
25 namespace ucam {
26 namespace hifst {
31 template<class Data, class Arc = fst::StdArc>
33  private:
34 
39 
42 
44  std::vector<float>& scales_;
45 
47  const std::string sparseweightlatticekey_;
48 
51  const unsigned trigger_;
52 
56  const unsigned offset_;
57 
58  const std::string wordmapkey_;
59 
60  public:
61 
70  const unsigned offset = 1, //Minimum for 1 language model
71  const std::string& sparseweightlatticekey =
73  const std::string& wordmapkey =
75  ) :
76  offset_ ( offset ),
77  sparseweightlatticekey_ ( sparseweightlatticekey ),
78  fdir_ ( rg.get<std::string>
80  nbdir_ ( rg.get<std::string>
82  wm_ (NULL),
83  scales_ ( fst::TropicalSparseTupleWeight<float>::Params() ),
84  trigger_ ( rg.get<unsigned>
86  wordmapkey_ ( wordmapkey ) {
87  };
88 
92  const unsigned offset = 1 ,
93  const std::string& sparseweightlatticekey =
95  ) {
98  return new DumpNbestFeaturesTask ( rg, offset, sparseweightlatticekey );
99  return NULL;
100  }
101 
103  bool run ( Data& d ) {
104  //Search for tuplearc32 lattice
105  if ( d.fsts.find ( sparseweightlatticekey_ ) == d.fsts.end() ) {
106  LWARN ( "No sparse tuple lattice to dump!" );
107  return true;
108  }
109  fst::VectorFst<TupleArc32> *vectorlattice =
110  static_cast<fst::VectorFst<TupleArc32> *> ( d.fsts[sparseweightlatticekey_] );
111  if ( nbdir_() != "" ) {
112  if ( d.wm.find ( wordmapkey_ ) != d.wm.end() )
113  wm_ = d.wm[wordmapkey_];
114  else wm_ = NULL;
115  writeNbestFile ( *vectorlattice, nbdir_ ( d.sidx ) );
116  }
117  if ( fdir_() != "" )
118  writeFeatureFile ( *vectorlattice, fdir_ ( d.sidx ) );
119  return false;
120  }
121 
123  void writeNbestFile ( const fst::VectorFst<TupleArc32>& vectorlattice,
124  const std::string& filename ) {
125  USER_CHECK ( vectorlattice.NumStates()
126  , "Attempting to write an nbest file from an empty lattice!" );
127  unordered_map<std::string, float> hyps;
128  unordered_map<std::string, std::string> fhyps;
129  fst::VectorFst<Arc> lattice;
130  fst::DotProductMap<float> m ( scales_ );
131  fst::Map ( vectorlattice
132  , &lattice
134  ( m ) );
135  fst::printstrings<Arc> ( lattice, hyps );
136  std::priority_queue<struct fst::hypcost, std::vector< struct fst::hypcost >, fst::CompareHyp>
137  pq;
138  for ( unordered_map<std::string, float>::iterator itx = hyps.begin()
139  ; itx != hyps.end()
140  ; ++itx ) {
141  struct fst::hypcost hc;
142  if (wm_ != NULL) (*wm_) (itx->first, &hc.hyp, false);
143  else hc.hyp = itx->first;
144  boost::algorithm::trim ( hc.hyp );
145  hc.cost = itx->second;
146  pq.push ( hc );
147  }
148  FORCELINFO ( "Write Nbest File to " << filename );
149  ucam::util::oszfstream file ( filename );
150  while ( !pq.empty() ) {
151  struct fst::hypcost hc = pq.top();
152  file << hc.hyp << "\t" << hc.cost << endl;
153  pq.pop();
154  }
155  file.close();
156  }
157 
159  void writeFeatureFile ( const fst::VectorFst<TupleArc32>& vectorlattice ,
160  const std::string& filename ) {
161  USER_CHECK ( vectorlattice.NumStates(),
162  "Attempting to write feature file for an empty lattice!!!!" );
163  unordered_map<std::string, float> hyps;
164  unordered_map<std::string, std::string> fhyps;
165  fst::VectorFst<Arc> lattice;
166  fst::DotProductMap<float> m ( scales_ );
167  fst::Map ( vectorlattice, &lattice,
169  fst::printstrings<Arc> ( lattice, hyps );
170  fst::VectorFst<TupleArc32> vl ( vectorlattice );
171  unordered_set<unsigned> latticefeatures;
172  listSparseFeatureIndices ( vl, latticefeatures );
173  LINFO ( "Number of Active features:" << latticefeatures.size() );
174  for ( unsigned k = 0; k < scales_.size(); ++k ) {
175  if ( k + 1 > trigger_
176  && latticefeatures.find ( k + 1 ) == latticefeatures.end() ) continue;
177  fst::VectorFst<Arc> dimfst;
178  {
179  fst::VectorToStd<float> m ( k );
180  fst::Map ( vl, &dimfst,
182  }
183  unordered_map<std::string, float> dhyps;
184  fst::printstrings<Arc> ( dimfst, dhyps );
185  using ucam::util::toString;
186  for ( unordered_map<std::string, float>::iterator itx = dhyps.begin();
187  itx != dhyps.end(); ++itx ) {
188  if ( k + 1 > trigger_
189  && itx->second == 0.0000f ) continue; //skip features with path cost 0
190  if ( fhyps.find ( itx->first ) != fhyps.end() ) fhyps[itx->first] += "\t" +
191  toString<float> ( itx->second, 4 );
192  else fhyps[itx->first] = toString<float> ( itx->second, 4 );
193  if ( k + 1 > trigger_ ) fhyps[itx->first] += "@" + toString<unsigned>
194  ( k ); //Add @position.
195  }
196  }
197  std::priority_queue<struct fst::hypcost, std::vector< struct fst::hypcost >, fst::CompareHyp>
198  pq;
199  for ( unordered_map<std::string, float>::iterator itx = hyps.begin()
200  ; itx != hyps.end()
201  ; ++itx ) {
202  struct fst::hypcost hc;
203  hc.hyp = fhyps[itx->first];
204  hc.cost = itx->second;
205  pq.push ( hc );
206  }
207  FORCELINFO ( "Write Feature File to " << filename );
208  ucam::util::oszfstream file ( filename );
209  while ( !pq.empty() ) {
210  struct fst::hypcost hc = pq.top();
211  file << hc.hyp << endl;
212  pq.pop();
213  }
214  file.close();
215  }
216 };
217 
218 }
219 } // end namespaces
220 
221 #endif // TASK_DUMPNBESTFEATURES_HPP
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
#define LINFO(msg)
Definition: fstio.hpp:27
bool run(Data &d)
Dumps nbest or features from sparse tuple weight lattice in the data object d, if it exists...
#define FORCELINFO(msg)
fst::TropicalWeightTpl< F > Map(double)
static DumpNbestFeaturesTask * init(const ucam::util::RegistryPO &rg, const unsigned offset=1, const std::string &sparseweightlatticekey=HifstConstants::kSparseweightvectorlatticeStore)
const std::string kSparseweightvectorlatticeStorenbestfile
void writeNbestFile(const fst::VectorFst< TupleArc32 > &vectorlattice, const std::string &filename)
Specific method to write nbest list from the sparse vector weight lattice.
Templated (hybrid) Interface for Task classes.
std::string hyp
Definition: fstutils.hpp:292
templated Mapper that modifies weights when copying from one FST to another, passing through the othe...
Functor to convert sparse tuple weight to tropical (single weight)
const std::string kSparseweightvectorlatticeWordmap
#define LWARN(msg)
const std::string kSparseweightvectorlatticeStorefeaturefile
void listSparseFeatureIndices(VectorFst< TupleArc32 > &myfst, unordered_set< uint > &idx)
Traverses a machine and returns the indices actually used for the sparse vector weight tropical semir...
DumpNbestFeaturesTask(const ucam::util::RegistryPO &rg, const unsigned offset=1, const std::string &sparseweightlatticekey=HifstConstants::kSparseweightvectorlatticeStore, const std::string &wordmapkey=HifstConstants::kSparseweightvectorlatticeWordmap)
Constructor.
const std::string kSparseweightvectorlatticeStore
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
Definition: registrypo.hpp:235
Struct for priority queue comparison.
Definition: fstutils.hpp:291
const std::string kSparseweightvectorlatticeFirstsparsefeatureatindex
fst::ArcTpl< TupleW32 > TupleArc32
Class used by priority queue to compare two hypotheses and decide which one wins. ...
Definition: fstutils.hpp:298
Task that dumps nbest and feature file. Templated on specific Data object and Fst Arc...
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
Loads efficiently a wordmap file and provides methods to map word-to-integer or integer-to-word. To avoid memory footprint issues, hashing the wordmap entries is avoided.
Definition: wordmapper.hpp:63
Map functor used with generic weight mapper.
Definition: bleu.hpp:14
void writeFeatureFile(const fst::VectorFst< TupleArc32 > &vectorlattice, const std::string &filename)
Specific method to dump feature file from sparse vector weight lattice.
void close()
Closes the file.
Definition: szfstream.hpp:323