Cambridge SMT System
fstutils.topofeatures.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #pragma once
22 
23 #include <set>
24 #include <boost/bimap.hpp>
25 #include <boost/unordered_map.hpp>
26 #include <fstutils.hpp>
27 
28 namespace ucam {
29 namespace fsttools {
30 
31 // Tuple weights can have repeated components per arc
32 // Use this struct in fst::Map to merge them.
33 struct MergeFeatures {
34  typedef TupleArc32 Arc;
35  Arc operator()(Arc const &arc) const {
36  if ( arc.weight == Arc::Weight::Zero() ) //irrelevant labels
37  return Arc ( arc.ilabel, arc.olabel, Arc::Weight::Zero(), arc.nextstate );
38  int index = 0;
39  std::map<int, float> ws;
40  for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
41  ; !it.Done()
42  ; it.Next() ) {
43  ws[it.Value().first] += it.Value().second.Value();
44  }
45  Arc::Weight w;
46  for (std::map<int,float>::const_iterator itx = ws.begin()
47  ; itx != ws.end()
48  ; ++itx) {
49  w.Push(itx->first, itx->second);
50  }
51  return Arc(arc.ilabel,arc.olabel, w, arc.nextstate);
52  }
53 };
54 
55 // Used together with fst::map to map output labels into topological features
56 // Negative indices are reserved for topological feature and start at -2
57 // due to a quirk with the sparse tuple vector semiring.
59  typedef fst::StdArc FromArc;
60  typedef TupleArc32 ToArc;
61  typedef ToArc::Weight ToWeight;
62  mutable unsigned counter_;
63  std::vector<unsigned> &features_;
65 
66  explicit OLabelToFeature(std::vector<unsigned> &features, unsigned size
67  , bool replaceOlabelByTopologicalLabel=false)
68  : counter_(2) // needs this offset to work properly
69  , features_(features)
70  , replaceOlabelByTopologicalLabel_(replaceOlabelByTopologicalLabel)
71  {
72  features_.resize(size + 2,-1);
73  features_[0] = 0;
74  }
75  void reset() { counter_ = 2;}
76  ToArc operator()(FromArc const &arc) const {
77  if ( arc.weight == FromArc::Weight::Zero() ) //irrelevant labels
78  return ToArc ( arc.ilabel, arc.olabel, ToArc::Weight::Zero(), arc.nextstate );
79  ToWeight w;
80  w.Push(1, arc.weight.Value());
81  LDEBUG("counter_=" << counter_
82  << ",arc.olabel=" << arc.olabel
83  << ",arc.weight=" << arc.weight.Value()
84  << ",arc.nextstate=" << arc.nextstate
85  );
86  if (arc.nextstate > -1) {
87  features_[counter_] = arc.olabel;
88  w.Push(-counter_,1);
89  LDEBUG("***olabel as feature:" << counter_ << "=>" << features_[counter_]);
90  ToArc::Label ilabel =(arc.ilabel != EPSILON)? arc.ilabel: PROTECTEDEPSILON;
91  ToArc::Label olabel = (replaceOlabelByTopologicalLabel_)? counter_ : ilabel;
92  ++counter_;
93  return ToArc ( ilabel, olabel ,w, arc.nextstate );
94  }
95  // final state -- irrelevant labels
96  return ToArc ( arc.ilabel, arc.olabel,w, arc.nextstate );
97  }
98 
99  // Go back to StdArc restoring the label
100  // Assumes there is only one topological feature.
101  FromArc operator()(ToArc const &arc) const {
102  if ( arc.weight == ToArc::Weight::Zero() ) //irrelevant labels
103  return FromArc ( arc.ilabel, arc.olabel, FromArc::Weight::Zero(), arc.nextstate );
104 
105  // assuming i only have two.
106  int index = 0;
107  FromArc::Weight w; w = 0;
108  for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
109  ; !it.Done()
110  ; it.Next() ) {
111  LDEBUG(it.Value().first << ":" << it.Value().second);
112  it.Value().first < 0 ? index = -it.Value().first: w = it.Value().second;
113  }
114  if (arc.nextstate != -1) {
115  LDEBUG("index=" << index << ", arc.ilabel=" << arc.ilabel
116  << ", new output label = " << features_[index] );
117  FromArc::Label ilabel =(arc.ilabel != PROTECTEDEPSILON)? arc.ilabel: EPSILON;
118  FromArc::Label olabel = features_[index];
119  return FromArc(ilabel,olabel, w, arc.nextstate);
120  }
121  // final state -- irrelevant labels
122  return FromArc ( arc.ilabel, arc.olabel ,w, arc.nextstate );
123  }
124 };
125 
126 // for debugging:
127 inline std::string printState(std::map<int,int> &currentState) {
128  std::stringstream ss;
129  for (std::map<int,int>::iterator itx = currentState.begin()
130  ; itx != currentState.end()
131  ; ++itx)
132  ss << itx->first << "=" << itx->second << ",";
133  return ss.str();
134 };
135 
136 // Trivial function to check whether a state is final or not.
137 template<class ArcT>
138 inline bool isFinal(fst::VectorFst<ArcT> const &fst, unsigned s) {
139  if (fst.Final (s) != ArcT::Weight::Zero() ) return true;
140  return false;
141 }
142 
143 // Adds and tracks states as pairs of
144 // <new_state_id, (original_state_id,residual_features) >
145 // Note that (original_state_id,residual_features) is a variable length vector
146 struct StateHandler {
147  // negative integers are features, positive (only one)
148  // is the state of the original fst
149  typedef unsigned StateId;
150  // StateId, new state, final
151  typedef std::pair<bool, bool> StateDetailType;
152  typedef std::pair<StateId, StateDetailType > AddResultType;
153 
154  typedef std::map<int,int> VectorState;
155  typedef boost::bimap<StateId,VectorState > BimapStateIdVectorStateType;
156  BimapStateIdVectorStateType state;
157 
158  fst::VectorFst<TupleArc32> &f_;
159  fst::VectorFst<TupleArc32> &o_;
160 
161  explicit StateHandler(fst::VectorFst<TupleArc32> &f
162  , fst::VectorFst<TupleArc32> &o)
163  : f_(f), o_(o)
164  {}
165  // add new state (if not already in)
166  // and return state id with additional information
167  AddResultType add(VectorState &out) {
168  StateId in;
169  if (get(out, in)) { // if exists, return id
170  LDEBUG("******************* This state already exists! s="
171  << in << ", vs=" << printState(out));
172  return AddResultType(in,StateDetailType(false, isFinal(f_, in) ) );
173  }
174  in = f_.AddState();
175  f_.SetFinal(in, o_.Final(out.rbegin()->first ));
176 
177  LDEBUG("******************* Adding new state " << in << ":" << printState(out));
178  state.insert(BimapStateIdVectorStateType::value_type(in, out ));
179  return AddResultType(in,StateDetailType(true, isFinal(f_,in)));
180  }
181 
182  // return vector state given state id
183  bool get(StateId const& in, VectorState &out) {
184  typedef BimapStateIdVectorStateType::left_map::const_iterator Itx;
185  Itx x =state.left.find(in);
186  if (x != state.left.end()) {
187  out = x->second;
188  return true;
189  }
190  return false;
191  }
192 
193  // return state id given vector state
194  bool get(VectorState const& in, StateId &out) {
195  typedef BimapStateIdVectorStateType::right_map::const_iterator Itx;
196  Itx x =state.right.find(in);
197  if (x != state.right.end()) {
198  out = x->second;
199  return true;
200  }
201  return false;
202  }
203 };
204 
205 // Cursor that abstracts how we traverse
206 // topological features std::map with forward iterators
207 struct MapCursorRev {
209  : map_(map)
210  , itx_(map_.begin())
211  {};
212  // void begin() { itx_ = map_.begin();};
213  bool done() { return (itx_ == map_.end());};
214  bool next() { ++itx_; return done();};
215  StateHandler::VectorState::iterator &operator()() { return itx_; };
217  StateHandler::VectorState::iterator itx_;
218 };
219 
220 // This cursor traverses the topological features
221 // in reverse order. This is useful for the second pass
222 // over the lattice.
223 struct MapCursor {
225  : map_(map)
226  , itx_(map_.end())
227  , done_(false)
228  {
229  if (itx_ == map_.begin() ) done_ = true;
230  else --itx_;
231  }
232 
233  bool done() {
234  if (done_) return true;
235  if (itx_ == map_.begin()) done_ = true;
236  return false;
237  };
238  bool next() { --itx_; return done();};
239  StateHandler::VectorState::iterator &operator()() {
240  return itx_;
241  };
242 
244  StateHandler::VectorState::iterator itx_;
245  bool done_;
246 };
247 
248 // Tracks features for first pass expansion
250  std::map<unsigned, unsigned> trackLastFeature_;
251  int const null_;
252  int local_;
253  bool stop_;
254  public:
256  : null_(std::numeric_limits<int>::max())
257  , local_(null_)
258  , stop_(false)
259  {};
260  bool remove(unsigned s) {
261  trackLastFeature_.erase(s);
262  }
263  int operator() (unsigned s) {
264  std::map<unsigned, unsigned>::iterator itx
265  = trackLastFeature_.find(s);
266  if ( itx != trackLastFeature_.end()) return itx->second;
267  return null_;
268  }
269 
270  void operator() ( unsigned s, unsigned f) {
271  std::map<unsigned, unsigned>::iterator itx
272  = trackLastFeature_.find(s);
273  if (itx != trackLastFeature_.end()) {
274  if (itx->second > f) itx->second = f;
275  } else trackLastFeature_[s] = f;
276  }
277  // local updates
278  void update(unsigned s) {
279  LDEBUG("Updating state " << s << " with local=" << local_);
280  this->operator()(s, local_);
281  local_ = null_;
282  }
283  bool stop() {
284  return stop_;
285  }
286  bool validate(unsigned s, unsigned k, float v) {
287  stop_=false;
288  int f = this->operator()(s); // lowest feature seen up to state s.
289  LDEBUG("u=" << s << ": highest feature = "
290  << f << ", current local feature=" << local_ << ",v=" << v);
291  LDEBUG("u=" << s << ": update local:"
292  << (int) local_ << ", and compare to candidate topological feature: "
293  << (int) k);
294  if ((int) k >= f ) { // we have a past valid feature ...
295  if ( v > 0 ) { // and positive. Drop!
296  LDEBUG("u=" << s << ": past feature, drop TRUE "
297  << (int) k << ">=" << (int) f);
298  return true;
299  }
300  return false;
301  } else if ( v > 0 ) { // valid future feature ( k < f)
302  if (local_ > (int) k ) {
303  local_ = k;
304  LDEBUG("u=" << s << ": update local (valid future feature):" << local_);
305  }
306  LDEBUG("u=" << s << ": TRUE");
307  stop_=true;
308  return true;
309  }
310  // should not be dropped:
311  LDEBUG("u=" << s << " FALSE");
312  return false;
313  }
314 };
315 
316 // Tracks features for second pass expansion
317 // TODO: refactor and clean up;
318 // classes FeatureTracker and FeatureTrackerRev are almost identical
320  std::map<unsigned, unsigned> trackLastFeature_;
321  int const null_;
322  int local_;
323  bool stop_;
324  public:
326  : null_(std::numeric_limits<int>::min())
327  , local_(null_)
328  , stop_(false)
329  {};
330  bool remove(unsigned s) {
331  trackLastFeature_.erase(s);
332  }
333  int operator() (unsigned s) {
334  std::map<unsigned, unsigned>::iterator itx
335  = trackLastFeature_.find(s);
336  if ( itx != trackLastFeature_.end()) return itx->second;
337  return null_;
338  }
339  void operator() ( unsigned s, unsigned f) {
340  std::map<unsigned, unsigned>::iterator itx
341  = trackLastFeature_.find(s);
342  if (itx != trackLastFeature_.end()) {
343  // going from lower to higher,
344  // we need to see the biggest feature index.
345  if (itx->second < f) itx->second = f;
346  } else trackLastFeature_[s] = f;
347  }
348  // local updates
349  void update(unsigned s) {
350  LDEBUG("Updating state " << s << " with local=" << local_);
351  this->operator()(s, local_);
352  local_ = null_;
353  }
354  bool stop() {
355  return stop_;
356  }
357  bool validate(unsigned s, unsigned k, float v) {
358  stop_=false;
359  int f = this->operator()(s); // lowest feature seen up to state s.
360  LDEBUG("u=" << s << ": highest feature = "
361  << f << ", current local feature=" << local_ << ",v=" << v);
362  LDEBUG("u=" << s << ": update local:"
363  << (int) local_ << ", and compare to " << (int) k);
364  if ((int) k <= f ) { // we have a past valid feature ...
365  if ( v > 0 ) { // and positive. Drop!
366  LDEBUG("u=" << s << ": past feature, drop TRUE "
367  << (int) f << "<=" << (int) k);
368  return true;
369  }
370  if (k == f) return true; // we know for sure this has happened.
371  return false;
372  } else if (v > 0 ) { // valid future feature ( k < f)
373  if (local_ < (int) k ) {
374  local_ = k;
375  LDEBUG("u=" << s << ": update local (valid future feature):" << local_);
376  }
377  LDEBUG("u=" << s << ": TRUE");
378  stop_=true;
379  return true;
380  }
381  // should not be dropped:
382  LDEBUG("u=" << s << " FALSE");
383  return false;
384  }
385 };
386 
387 template<class MapCursorT, class FeatureTrackerT >
389  private:
390  bool fail_;
391  std::string suffix_;
392  unsigned u_;
393  int s_;
394  fst::VectorFst<TupleArc32> out;
395  // track lowest (highest) feature seen per state.
396  FeatureTrackerT tracker_;
397  public:
399  : fail_(false)
400  , s_(0), u_(0)
401  {}
402  bool getFail() { return fail_;}
403  // TODO: mapped to super final.
404  void operator()(fst::VectorFst<TupleArc32> &myfst, bool reverse) {
405  using namespace fst;
406  typedef TupleArc32::StateId StateId;
407  std::queue < unsigned > tf; // simple fifo queue
408  VectorFst<TupleArc32> aux;
409  std::string suf;
410  if (reverse) {
411  // TODO: need method to reverse the lattice without creating extra epsilons.
412  Reverse(myfst, &aux);
413  suffix_ = "-reversed";
414  } else aux = myfst;
415  LDBG_EXECUTE(aux.Write("xx-determinized" + suffix_ + ".fst"));
416  out.DeleteStates();
417 
418  LDEBUG("Start state=" << aux.Start() );
419  // initialize old states? Probably not needed. Just initialize state 0
420  StateHandler::VectorState empty; // 0 and bigger are normal states.
421  empty[aux.Start()]=1;
422  StateHandler sh(out, aux); // keeps track of state mapping
423  unsigned start =sh.add(empty).first;
424  LDEBUG("New start state=" << start );
425  tf.push(start); // always push, added state 0.
426  out.SetStart(start);
427 
428  while (tf.size()) {
429  u_ = tf.front();
430  StateHandler::VectorState currentState;
431  if (!sh.get(u_, currentState)) {
432  LDEBUG("STATE " << u_ << ": does not exist!");
433  exit(EXIT_FAILURE);
434  }
435  // last element is the actual state of the original fst.
436  s_ = currentState.rbegin()->first;
437  LDEBUG("STATE " << u_ << ":" << printState(currentState));
438  tf.pop();
439  for ( MutableArcIterator< VectorFst<TupleArc32> > ai ( &aux, s_ )
440  ; !ai.Done()
441  ; ai.Next() ) {
442  TupleArc32 arc = ai.Value();
443  StateHandler::VectorState ns = currentState;
444  LDEBUG("STATE " << u_ << ": original => s=" << s_ << ",arc="
445  << arc.ilabel << "," << arc.olabel << ","
446  << arc.nextstate << "w=" << ai.Value().weight);
447  unsigned g=(StateId) addWeight(tf, sh, arc, ns, u_, s_);
448  arc.nextstate = g; // just modify the next state
449  LDEBUG("STATE " << u_ << ": after => s=" << s_ << ",arc="
450  << arc.ilabel << "," << arc.olabel << ","
451  << arc.nextstate << "w=" << ai.Value().weight);
452  out.AddArc(u_, arc); // add arc, weight has been modified.
453  }
454  tracker_.remove(u_);
455  LDEBUG("STATE " << u_ << ": Exit the arc loop");
456  }
457  LDEBUG("Finished! Reverse back and topsort");
458  LDBG_EXECUTE(out.Write("xx-determinized-topo-expanded" + suffix_ + ".fst"));
459  if (reverse) Reverse(out, &myfst);
460  else myfst = out;
461  TopSort(&myfst); // show it topologically sorted
462  };
463 
464  private:
465  // Deletes current state from the vector state, removes 0-valued features,
466  // determines any arc weight contribution present on the vector state
467  void purge(StateHandler::VectorState &nf, TupleArc32 &arc, TupleArc32::Weight &w) {
468  StateHandler::VectorState::iterator itx = nf.end(); // doesn't work with rbegin
469  --itx;
470  nf.erase(itx); // delete current state -- will add new one at the end.
471  LDEBUG("STATE " << u_ << ": s="
472  << s_ << "BEFORE updating with features (no next state!): "
473  << printState(nf));
474  // for now, nf should only contain topological features.
475  for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
476  ; !it.Done()
477  ; it.Next() ) {
478  int const &k = it.Value().first;
479  if (k >=0) {
480  w.Push(k, it.Value().second);
481  continue;
482  }
483  // only negative feature indices <=> topological features.
484  nf[k] += it.Value().second.Value(); // +1 or -1
485  if (nf[k] == 0) {
486  nf.erase(k); // delete 0 values to avoid unnecessary clutter
487  }
488  }
489  };
490 
491  // size should always be at least 1 (the current state itself).
492  void assertNotEmpty(StateHandler::VectorState &nf) {
493  if (!nf.size()) {
494  LERROR("General error: Empty state.");
495  exit(EXIT_FAILURE);
496  }
497  };
498 
499  void pop(StateHandler::VectorState &nf, TupleArc32 &arc, TupleArc32::Weight &w) {
500  MapCursorT mc(nf);
502  bool yei=false;
503  unsigned countNegatives = 0;
504  unsigned countPositives = 0;
505  while (!mc.done() ) {
506  StateHandler::VectorState::iterator &itx = mc();
507  LDEBUG("STATE " << u_ << ", state " << s_
508  << ": let us validate feature "
509  << itx->first << "=>" << itx->second);
510 
511  if (itx->first < 0 && tracker_.validate(u_, -itx->first, itx->second) ) {
512 
513  if (itx->second < 0 ) ++countNegatives;
514  else if (itx->second > 0 ) ++countPositives;
515  if (itx->second < 0 || countNegatives + 1 >= countPositives )
516  drop[itx->first]=itx->second;
517  if (tracker_.stop()) {
518  LDEBUG("STATE " << u_ << ", state " << s_ << ": "
519  << itx->first << " feature found. We are done here");
520  yei=true;
521  break;
522  }
523  }
524  mc.next();
525  }
526  if (!yei) {
527  LDEBUG("STATE " << u_ << ", state " << s_ << ": did not find a feature??");
528  }
529  // add topological feature (s) to w
530  for (StateHandler::VectorState::iterator itx2 = drop.begin()
531  ; itx2 != drop.end()
532  ; ++itx2
533  ) {
534  w.Push(itx2->first, itx2->second);
535  nf.erase(itx2->first); // probably should generate residue on the fly
536  }
537  // we have at least one feature we wanna drop.
538  // If drop.size() > 1 then we can also flag problem
539  // (and will need double pass to fix).
540  if (!drop.size()) {
541  if (u_) {
542  fail_=true;
543  }
544 
545  } else if (drop.size() > 1) {
546  LDEBUG("Marking as non-solved");
547  fail_=true;
548  }
549  };
550 
551  // Adds weight and topological feature for the current arc;
552  // will add new state if it didn't exist before, using
553  // StateHandler class.
554  // Returns the next state id.
555  // Note: ignores epsilon arcs. This is useful as
556  // our lattices are assumed to start (end) with epsilons
557  // (i.e. paths in reversed lattice start with an epsilon
558  unsigned addWeight(std::queue < unsigned > &tf
559  , StateHandler &sh
560  , TupleArc32 &arc
562  , unsigned u, int s) {
563  assertNotEmpty(nf);
564  TupleArc32::Weight w;
565  purge(nf, arc, w);
566  LDEBUG("STATE " << u_ << ": s=" << s_ <<", AFTER purge: " << printState(nf));
567  // remove lowest feature index (or more) and store in the arc weight.
568  // don't want to do this on start state for a reversed lattice :).
569  if (arc.ilabel != 0) {
570  pop(nf, arc, w);
571  } // if the arc is an epsilon, just leave arc as is.
572  arc.weight = w;
573  // encode original nextstate, guaranteed to be at the end of the set.
574  nf[arc.nextstate]=1;
575  LDEBUG("STATE " << u_ << ": s=" << s_
576  <<", AFTER pop (includes next state): " << printState(nf));
577  StateHandler::AddResultType art=sh.add(nf);
578  if (art.second.first) {
579  LDEBUG("STATE " << u_ << ": s=" << s_ << ", Added new state "
580  << art.first << ":" << printState(nf)
581  << " -- goes to queue");
582  tf.push( art.first); // push new state
583  } else {
584  LDEBUG("STATE " << u_ << ": s=" << s_ << ", Added new state "
585  << art.first << ":" << printState(nf)
586  << " -- DOES NOT go to queue");
587  }
588 
589  if (arc.ilabel != 0)
590  tracker_.update(art.first);
591  // Next state is final (reversed lattice, so we only have ONE final state).
592  // Furthermore, a twice reversed FSA only has one final state with epsilons.
593  if (art.second.second) {
594  // Check at this point, nf only contains the next state.
595  if (nf.size() > 1) {
596  LERROR("STATE " << u_ << "s=" << s_
597  << ",At the end of the path we still have... features!"
598  << printState(nf));
599  // hack: delete the nextstate feature.
600  nf.erase(arc.nextstate);
601  for (StateHandler::VectorState::iterator itx2 = nf.begin()
602  ; itx2 != nf.end()
603  ; ++itx2
604  ) {
605  arc.weight.Push(itx2->first, itx2->second);
606  }
607  fail_ = true;
608  }
609  }
610  return art.first;
611  }
612 };
613 
614 // Map output labels into topological features
615 // Runs a forward pass over the lattice. Labels are
616 // defined based on (label-name,partial-hyp-min-length).
617 // Instead of keeping one label per arc, we ensure they
618 // are unique at the hypothesis level.
619 // assumes topologically sorted ifst on tropical semiring
620 inline void TopologicalLabelMap(fst::VectorFst<fst::StdArc> const &ifst
621  , fst::VectorFst<TupleArc32> *ofst
622  , std::vector<unsigned> *features) {
623  using namespace fst;
624  typedef boost::unordered_map<unsigned, unsigned > PathCountType;
625  typedef std::pair<unsigned,unsigned> VectorState;
626  typedef boost::bimap<unsigned,VectorState > BimapStateIdVectorStateType;
627 
628  BimapStateIdVectorStateType mit;
629  PathCountType tf;
630  std::vector<unsigned> &ft = *features;
631  ofst->ReserveStates(ifst.NumStates());
632  for (unsigned k =0; k < ifst.NumStates(); ++k) ofst->AddState();
633  ofst->SetStart(ifst.Start());
634  tf[ifst.Start()] = 0; // we start with length 0.
635 
636  for ( StateIterator< VectorFst<StdArc> > si (ifst ); !si.Done(); si.Next() ) {
637  unsigned state_id = si.Value();
638  LDEBUG("State Id =" << state_id);
639  unsigned &pcount = tf[state_id];
640 
641  if (ifst.Final(state_id) != StdArc::Weight::Zero() ) {
642  TupleArc32::Weight z;
643  z.Push(1,ifst.Final(state_id));
644  ofst->SetFinal(state_id, z);
645  }
646  for ( ArcIterator< VectorFst<StdArc> > ai ( ifst, si.Value() )
647  ; !ai.Done()
648  ; ai.Next() ) {
649 
650  StdArc arc = ai.Value();
651  tf[arc.nextstate] = std::max(tf[state_id] +1, tf[arc.nextstate]);
652  typedef BimapStateIdVectorStateType::right_map::const_iterator Itx;
653  std::pair<unsigned,unsigned> a(arc.olabel, pcount);
654  Itx x = mit.right.find(a);
655  unsigned index;
656  if (x != mit.right.end()) {
657  index = x->second;
658  } else {
659  index = mit.size() + 2;
660  mit.insert(BimapStateIdVectorStateType::value_type(index, a));
661  }
662 
663  ft[index ] = arc.olabel; // mapping
664  LDEBUG("State Id =" << state_id << ": ft at "
665  << index << "=" << ft[index ]);
666  TupleArc32::Weight y;
667  y.Push(1, arc.weight);
668  y.Push(-index, 1);
669  TupleArc32 ab(arc.ilabel, arc.ilabel, y,arc.nextstate);
670  ofst->AddArc(state_id,ab);
671  }
672  tf.erase(state_id); // we are done with this state;
673  }
674 };
675 
676 // Actions (typically standard sequence of fst operations).
677 // See TopoFeaturesHelper class below
679  void operator()(fst::VectorFst<TupleArc32> const &in
680  , fst::VectorFst<TupleArc32> *out) {
681  using namespace fst;
682  LDBG_EXECUTE(in.Write("noaction.fst"));
683  Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
684  LDBG_EXECUTE(out->Write("determinized.fst"));
685  Minimize(out);
686  LDBG_EXECUTE(out->Write("determinized-minimized.fst"));
687  Push<TupleArc32,REWEIGHT_TO_FINAL>(*out,out,kPushWeights);
688  LDBG_EXECUTE(out->Write("determinized-minimized-pushed.fst"));
689  }
690 };
691 
693  void operator()(fst::VectorFst<TupleArc32> const &in
694  , fst::VectorFst<TupleArc32> *out) {
695  using namespace fst;
696  Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
697  LDBG_EXECUTE(out->Write("determinized.fst"));
698  }
699 };
700 
702  void operator()(fst::VectorFst<TupleArc32> const &in
703  , fst::VectorFst<TupleArc32> *out) {
704  using namespace fst;
705  Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
706  LDBG_EXECUTE(out->Write("determinized.fst"));
707  Push<TupleArc32,REWEIGHT_TO_FINAL>(*out,out,kPushWeights);
708  LDBG_EXECUTE(out->Write("determinized-minimized-pushed.fst"));
709  }
710 };
711 
712 struct NullAction {
713  void operator()(fst::VectorFst<TupleArc32> const &in
714  , fst::VectorFst<TupleArc32> *out) {
715  using namespace fst;
716  *out = in;
717  LDBG_EXECUTE(out->Write("null-action.fst"));
718  }
719 };
720 
731 template<class ActionT>
733  ActionT action_;
736  explicit TopoFeaturesHelper(bool exitOnFailure)
737  : exitOnFailure_(exitOnFailure)
738  , reverseFirst_(true)
739  {}
740  // Map output labels as <olabel, max # times it has appeared in previous paths>
741  // plus use double pass if needed.
742  inline void operator() (fst::VectorFst<fst::StdArc> *fst) {
743  using namespace fst;
744  // get total number of arcs.
745  unsigned narcs = 0;
746  for (unsigned k = 0; k < fst->NumStates(); ++k){
747  narcs += fst->NumArcs(k);
748  }
749  std::vector<unsigned> features(narcs, -1);
750  OLabelToFeature otf(features,features.size(), true); // could be false
753  VectorFst<TupleArc32> mfstTopoFeatures, dfst;
754  VectorFst<StdArc> result;
755  // RelabelUtility 0s to some special symbol.
756  // this implementation skips epsilons, so in order to be
757  // tagged properly they must be protected.
759  LDBG_EXECUTE(fst->Write("01-topsorted-input.th.fst"));
760  //Map(*fst, &mfstTopoFeatures,gam);
761  TopologicalLabelMap(*fst, &mfstTopoFeatures, &features);
762  LDBG_EXECUTE(mfstTopoFeatures.Write("02-topologically-mapped.th.fst"));
763 
764  // ActionT typically performs a sequence of transformations
765  // to FSAs, defined by the user.
766  // It is also responsibility of the user to ensure
767  // that topological features "sufficiently" pushed towards
768  // end state, i.e. they appear wherever possible
769  // in the reversed lattice before or at the expected arc.
770  // ensure output is acyclic and epsilon-free.
771  action_(mfstTopoFeatures, &dfst);
772  LDBG_EXECUTE(dfst.Write("03-det-topo.th.fst"));
773  // simplify features -- silly hack motivated by underlying
774  // sparse tuple semiring representation.
775  MergeFeatures mf;
777  VectorFst<TupleArc32> dfstw;
778  Map(dfst,&dfstw, gam2);
779  // simplify features -- end of silly hack
780 
781  LDBG_EXECUTE(dfstw.Write("04-det-topo-merged.th.fst"));
783  epw(dfstw, reverseFirst_); // determinized lattice, ergo we start reversed.
784  LDBG_EXECUTE(dfstw.Write("05-det-topo-expanded-rev.th.fst"));
785  if (epw.getFail()) {
786  if (exitOnFailure_) {
787  LERROR("Failed to position correctly all topological features in ONE pass.");
788  exit(EXIT_FAILURE);
789  }
790  FORCELINFO("First pass did not position all features correctly."
791  << " Running Second pass:");
792  // double reverse, now have an epsilon arc from start state,
793  // and epsilon to single final state.
795  epw2(dfstw, !reverseFirst_);
796  LDBG_EXECUTE(dfstw.Write("06-det-topo-expanded.th.fst"));
797  if (epw2.getFail()) {
798  LERROR("Failed to position correctly all topological features"
799  << " after the second pass");
800  exit(EXIT_FAILURE);
801  }
802  }
803  Map(dfstw,&result,gamr);
804  RelabelUtil<StdArc>().addIPL(PROTECTEDEPSILON, EPSILON)(&*fst);
805  LDBG_EXECUTE(result.Write("07-det-topo-final.th.fst"));
806  fst->DeleteStates();
807  *fst = result;
808  };
809 };
810 
811 }} // end namespace
812 
bool get(StateId const &in, VectorState &out)
bool validate(unsigned s, unsigned k, float v)
std::string printState(std::map< int, int > &currentState)
MapCursorRev(StateHandler::VectorState &map)
Definition: fstio.hpp:27
#define LDBG_EXECUTE(order)
fst::VectorFst< TupleArc32 > & f_
StateHandler(fst::VectorFst< TupleArc32 > &f, fst::VectorFst< TupleArc32 > &o)
#define FORCELINFO(msg)
void operator()(fst::VectorFst< TupleArc32 > const &in, fst::VectorFst< TupleArc32 > *out)
fst::TropicalWeightTpl< F > Map(double)
bool isFinal(fst::VectorFst< ArcT > const &fst, unsigned s)
#define LDEBUG(msg)
StateHandler::VectorState & map_
FromArc operator()(ToArc const &arc) const
A wrapper that runs maps labels to topological features, runs an "action" (sequence of standard fst o...
OLabelToFeature(std::vector< unsigned > &features, unsigned size, bool replaceOlabelByTopologicalLabel=false)
ToArc operator()(FromArc const &arc) const
Arc operator()(Arc const &arc) const
fst::VectorFst< TupleArc32 > & o_
StateHandler::VectorState::iterator & operator()()
AddResultType add(VectorState &out)
MapCursor(StateHandler::VectorState &map)
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
StateHandler::VectorState::iterator itx_
boost::bimap< StateId, VectorState > BimapStateIdVectorStateType
void operator()(fst::VectorFst< TupleArc32 > const &in, fst::VectorFst< TupleArc32 > *out)
bool validate(unsigned s, unsigned k, float v)
void operator()(fst::VectorFst< TupleArc32 > &myfst, bool reverse)
Utility functor for relabeling one or more lattices. Note that you can chain commands. See Unit test in fstutils.gtest.cpp for an example.
Definition: fstutils.hpp:503
#define EPSILON
std::pair< StateId, StateDetailType > AddResultType
StateHandler::VectorState::iterator itx_
BimapStateIdVectorStateType state
StateHandler::VectorState::iterator & operator()()
fst::ArcTpl< TupleW32 > TupleArc32
void operator()(fst::VectorFst< TupleArc32 > const &in, fst::VectorFst< TupleArc32 > *out)
void operator()(fst::VectorFst< TupleArc32 > const &in, fst::VectorFst< TupleArc32 > *out)
void TopologicalLabelMap(fst::VectorFst< fst::StdArc > const &ifst, fst::VectorFst< TupleArc32 > *ofst, std::vector< unsigned > *features)
#define LERROR(msg)
#define PROTECTEDEPSILON
std::vector< unsigned > & features_
std::pair< bool, bool > StateDetailType
Definition: bleu.hpp:14