24 #include <boost/bimap.hpp> 25 #include <boost/unordered_map.hpp> 36 if ( arc.weight == Arc::Weight::Zero() )
37 return Arc ( arc.ilabel, arc.olabel, Arc::Weight::Zero(), arc.nextstate );
39 std::map<int, float> ws;
40 for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
43 ws[it.Value().first] += it.Value().second.Value();
46 for (std::map<int,float>::const_iterator itx = ws.begin()
49 w.Push(itx->first, itx->second);
51 return Arc(arc.ilabel,arc.olabel, w, arc.nextstate);
67 ,
bool replaceOlabelByTopologicalLabel=
false)
70 , replaceOlabelByTopologicalLabel_(replaceOlabelByTopologicalLabel)
72 features_.resize(size + 2,-1);
77 if ( arc.weight == FromArc::Weight::Zero() )
78 return ToArc ( arc.ilabel, arc.olabel, ToArc::Weight::Zero(), arc.nextstate );
80 w.Push(1, arc.weight.Value());
81 LDEBUG(
"counter_=" << counter_
82 <<
",arc.olabel=" << arc.olabel
83 <<
",arc.weight=" << arc.weight.Value()
84 <<
",arc.nextstate=" << arc.nextstate
86 if (arc.nextstate > -1) {
87 features_[counter_] = arc.olabel;
89 LDEBUG(
"***olabel as feature:" << counter_ <<
"=>" << features_[counter_]);
91 ToArc::Label olabel = (replaceOlabelByTopologicalLabel_)? counter_ : ilabel;
93 return ToArc ( ilabel, olabel ,w, arc.nextstate );
96 return ToArc ( arc.ilabel, arc.olabel,w, arc.nextstate );
102 if ( arc.weight == ToArc::Weight::Zero() )
103 return FromArc ( arc.ilabel, arc.olabel, FromArc::Weight::Zero(), arc.nextstate );
107 FromArc::Weight w; w = 0;
108 for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
111 LDEBUG(it.Value().first <<
":" << it.Value().second);
112 it.Value().first < 0 ? index = -it.Value().first: w = it.Value().second;
114 if (arc.nextstate != -1) {
115 LDEBUG(
"index=" << index <<
", arc.ilabel=" << arc.ilabel
116 <<
", new output label = " << features_[index] );
118 FromArc::Label olabel = features_[index];
119 return FromArc(ilabel,olabel, w, arc.nextstate);
122 return FromArc ( arc.ilabel, arc.olabel ,w, arc.nextstate );
127 inline std::string
printState(std::map<int,int> ¤tState) {
128 std::stringstream ss;
129 for (std::map<int,int>::iterator itx = currentState.begin()
130 ; itx != currentState.end()
132 ss << itx->first <<
"=" << itx->second <<
",";
138 inline bool isFinal(fst::VectorFst<ArcT>
const &
fst,
unsigned s) {
139 if (fst.Final (s) != ArcT::Weight::Zero() )
return true;
158 fst::VectorFst<TupleArc32> &
f_;
159 fst::VectorFst<TupleArc32> &
o_;
162 , fst::VectorFst<TupleArc32> &o)
167 AddResultType
add(VectorState &out) {
170 LDEBUG(
"******************* This state already exists! s=" 172 return AddResultType(in,StateDetailType(
false,
isFinal(f_, in) ) );
175 f_.SetFinal(in, o_.Final(out.rbegin()->first ));
177 LDEBUG(
"******************* Adding new state " << in <<
":" <<
printState(out));
178 state.insert(BimapStateIdVectorStateType::value_type(in, out ));
179 return AddResultType(in,StateDetailType(
true,
isFinal(f_,in)));
183 bool get(StateId
const& in, VectorState &out) {
184 typedef BimapStateIdVectorStateType::left_map::const_iterator Itx;
185 Itx x =state.left.find(in);
186 if (x != state.left.end()) {
194 bool get(VectorState
const& in, StateId &out) {
195 typedef BimapStateIdVectorStateType::right_map::const_iterator Itx;
196 Itx x =state.right.find(in);
197 if (x != state.right.end()) {
213 bool done() {
return (itx_ == map_.end());};
214 bool next() { ++itx_;
return done();};
215 StateHandler::VectorState::iterator &
operator()() {
return itx_; };
217 StateHandler::VectorState::iterator
itx_;
229 if (itx_ == map_.begin() ) done_ =
true;
234 if (done_)
return true;
235 if (itx_ == map_.begin()) done_ =
true;
238 bool next() { --itx_;
return done();};
244 StateHandler::VectorState::iterator
itx_;
250 std::map<unsigned, unsigned> trackLastFeature_;
256 : null_(std::numeric_limits<int>::max())
260 bool remove(
unsigned s) {
261 trackLastFeature_.erase(s);
264 std::map<unsigned, unsigned>::iterator itx
265 = trackLastFeature_.find(s);
266 if ( itx != trackLastFeature_.end())
return itx->second;
271 std::map<unsigned, unsigned>::iterator itx
272 = trackLastFeature_.find(s);
273 if (itx != trackLastFeature_.end()) {
274 if (itx->second > f) itx->second = f;
275 }
else trackLastFeature_[s] = f;
279 LDEBUG(
"Updating state " << s <<
" with local=" << local_);
289 LDEBUG(
"u=" << s <<
": highest feature = " 290 << f <<
", current local feature=" << local_ <<
",v=" << v);
291 LDEBUG(
"u=" << s <<
": update local:" 292 << (
int) local_ <<
", and compare to candidate topological feature: " 296 LDEBUG(
"u=" << s <<
": past feature, drop TRUE " 297 << (
int) k <<
">=" << (
int) f);
301 }
else if ( v > 0 ) {
302 if (local_ > (
int) k ) {
304 LDEBUG(
"u=" << s <<
": update local (valid future feature):" << local_);
306 LDEBUG(
"u=" << s <<
": TRUE");
311 LDEBUG(
"u=" << s <<
" FALSE");
320 std::map<unsigned, unsigned> trackLastFeature_;
326 : null_(std::numeric_limits<int>::min())
330 bool remove(
unsigned s) {
331 trackLastFeature_.erase(s);
334 std::map<unsigned, unsigned>::iterator itx
335 = trackLastFeature_.find(s);
336 if ( itx != trackLastFeature_.end())
return itx->second;
340 std::map<unsigned, unsigned>::iterator itx
341 = trackLastFeature_.find(s);
342 if (itx != trackLastFeature_.end()) {
345 if (itx->second < f) itx->second = f;
346 }
else trackLastFeature_[s] = f;
350 LDEBUG(
"Updating state " << s <<
" with local=" << local_);
360 LDEBUG(
"u=" << s <<
": highest feature = " 361 << f <<
", current local feature=" << local_ <<
",v=" << v);
362 LDEBUG(
"u=" << s <<
": update local:" 363 << (
int) local_ <<
", and compare to " << (
int) k);
366 LDEBUG(
"u=" << s <<
": past feature, drop TRUE " 367 << (
int) f <<
"<=" << (
int) k);
370 if (k == f)
return true;
373 if (local_ < (
int) k ) {
375 LDEBUG(
"u=" << s <<
": update local (valid future feature):" << local_);
377 LDEBUG(
"u=" << s <<
": TRUE");
382 LDEBUG(
"u=" << s <<
" FALSE");
387 template<
class MapCursorT,
class FeatureTrackerT >
394 fst::VectorFst<TupleArc32> out;
396 FeatureTrackerT tracker_;
404 void operator()(fst::VectorFst<TupleArc32> &myfst,
bool reverse) {
406 typedef TupleArc32::StateId StateId;
407 std::queue < unsigned > tf;
408 VectorFst<TupleArc32> aux;
412 Reverse(myfst, &aux);
413 suffix_ =
"-reversed";
415 LDBG_EXECUTE(aux.Write(
"xx-determinized" + suffix_ +
".fst"));
418 LDEBUG(
"Start state=" << aux.Start() );
421 empty[aux.Start()]=1;
423 unsigned start =sh.
add(empty).first;
424 LDEBUG(
"New start state=" << start );
431 if (!sh.
get(u_, currentState)) {
432 LDEBUG(
"STATE " << u_ <<
": does not exist!");
436 s_ = currentState.rbegin()->first;
439 for ( MutableArcIterator< VectorFst<TupleArc32> > ai ( &aux, s_ )
444 LDEBUG(
"STATE " << u_ <<
": original => s=" << s_ <<
",arc=" 445 << arc.ilabel <<
"," << arc.olabel <<
"," 446 << arc.nextstate <<
"w=" << ai.Value().weight);
447 unsigned g=(StateId) addWeight(tf, sh, arc, ns, u_, s_);
449 LDEBUG(
"STATE " << u_ <<
": after => s=" << s_ <<
",arc=" 450 << arc.ilabel <<
"," << arc.olabel <<
"," 451 << arc.nextstate <<
"w=" << ai.Value().weight);
455 LDEBUG(
"STATE " << u_ <<
": Exit the arc loop");
457 LDEBUG(
"Finished! Reverse back and topsort");
458 LDBG_EXECUTE(out.Write(
"xx-determinized-topo-expanded" + suffix_ +
".fst"));
459 if (reverse) Reverse(out, &myfst);
468 StateHandler::VectorState::iterator itx = nf.end();
471 LDEBUG(
"STATE " << u_ <<
": s=" 472 << s_ <<
"BEFORE updating with features (no next state!): " 475 for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
478 int const &k = it.Value().first;
480 w.Push(k, it.Value().second);
484 nf[k] += it.Value().second.Value();
494 LERROR(
"General error: Empty state.");
503 unsigned countNegatives = 0;
504 unsigned countPositives = 0;
505 while (!mc.done() ) {
506 StateHandler::VectorState::iterator &itx = mc();
507 LDEBUG(
"STATE " << u_ <<
", state " << s_
508 <<
": let us validate feature " 509 << itx->first <<
"=>" << itx->second);
511 if (itx->first < 0 && tracker_.validate(u_, -itx->first, itx->second) ) {
513 if (itx->second < 0 ) ++countNegatives;
514 else if (itx->second > 0 ) ++countPositives;
515 if (itx->second < 0 || countNegatives + 1 >= countPositives )
516 drop[itx->first]=itx->second;
517 if (tracker_.stop()) {
518 LDEBUG(
"STATE " << u_ <<
", state " << s_ <<
": " 519 << itx->first <<
" feature found. We are done here");
527 LDEBUG(
"STATE " << u_ <<
", state " << s_ <<
": did not find a feature??");
530 for (StateHandler::VectorState::iterator itx2 = drop.begin()
534 w.Push(itx2->first, itx2->second);
535 nf.erase(itx2->first);
545 }
else if (drop.size() > 1) {
546 LDEBUG(
"Marking as non-solved");
558 unsigned addWeight(std::queue < unsigned > &tf
562 ,
unsigned u,
int s) {
564 TupleArc32::Weight w;
566 LDEBUG(
"STATE " << u_ <<
": s=" << s_ <<
", AFTER purge: " <<
printState(nf));
569 if (arc.ilabel != 0) {
575 LDEBUG(
"STATE " << u_ <<
": s=" << s_
576 <<
", AFTER pop (includes next state): " <<
printState(nf));
578 if (art.second.first) {
579 LDEBUG(
"STATE " << u_ <<
": s=" << s_ <<
", Added new state " 581 <<
" -- goes to queue");
584 LDEBUG(
"STATE " << u_ <<
": s=" << s_ <<
", Added new state " 586 <<
" -- DOES NOT go to queue");
590 tracker_.update(art.first);
593 if (art.second.second) {
596 LERROR(
"STATE " << u_ <<
"s=" << s_
597 <<
",At the end of the path we still have... features!" 600 nf.erase(arc.nextstate);
601 for (StateHandler::VectorState::iterator itx2 = nf.begin()
605 arc.weight.Push(itx2->first, itx2->second);
621 , fst::VectorFst<TupleArc32> *ofst
622 , std::vector<unsigned> *features) {
624 typedef boost::unordered_map<unsigned, unsigned > PathCountType;
625 typedef std::pair<unsigned,unsigned> VectorState;
626 typedef boost::bimap<unsigned,VectorState > BimapStateIdVectorStateType;
628 BimapStateIdVectorStateType mit;
630 std::vector<unsigned> &ft = *features;
631 ofst->ReserveStates(ifst.NumStates());
632 for (
unsigned k =0; k < ifst.NumStates(); ++k) ofst->AddState();
633 ofst->SetStart(ifst.Start());
634 tf[ifst.Start()] = 0;
636 for ( StateIterator< VectorFst<StdArc> > si (ifst ); !si.Done(); si.Next() ) {
637 unsigned state_id = si.Value();
638 LDEBUG(
"State Id =" << state_id);
639 unsigned &pcount = tf[state_id];
641 if (ifst.Final(state_id) != StdArc::Weight::Zero() ) {
642 TupleArc32::Weight z;
643 z.Push(1,ifst.Final(state_id));
644 ofst->SetFinal(state_id, z);
646 for ( ArcIterator< VectorFst<StdArc> > ai ( ifst, si.Value() )
650 StdArc arc = ai.Value();
651 tf[arc.nextstate] = std::max(tf[state_id] +1, tf[arc.nextstate]);
652 typedef BimapStateIdVectorStateType::right_map::const_iterator Itx;
653 std::pair<unsigned,unsigned> a(arc.olabel, pcount);
654 Itx x = mit.right.find(a);
656 if (x != mit.right.end()) {
659 index = mit.size() + 2;
660 mit.insert(BimapStateIdVectorStateType::value_type(index, a));
663 ft[index ] = arc.olabel;
664 LDEBUG(
"State Id =" << state_id <<
": ft at " 665 << index <<
"=" << ft[index ]);
666 TupleArc32::Weight y;
667 y.Push(1, arc.weight);
669 TupleArc32 ab(arc.ilabel, arc.ilabel, y,arc.nextstate);
670 ofst->AddArc(state_id,ab);
680 , fst::VectorFst<TupleArc32> *out) {
683 Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
687 Push<TupleArc32,REWEIGHT_TO_FINAL>(*out,out,kPushWeights);
688 LDBG_EXECUTE(out->Write(
"determinized-minimized-pushed.fst"));
694 , fst::VectorFst<TupleArc32> *out) {
696 Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
703 , fst::VectorFst<TupleArc32> *out) {
705 Determinize(ProjectFst<TupleArc32>(in,PROJECT_INPUT), out);
707 Push<TupleArc32,REWEIGHT_TO_FINAL>(*out,out,kPushWeights);
708 LDBG_EXECUTE(out->Write(
"determinized-minimized-pushed.fst"));
714 , fst::VectorFst<TupleArc32> *out) {
731 template<
class ActionT>
737 : exitOnFailure_(exitOnFailure)
738 , reverseFirst_(true)
746 for (
unsigned k = 0; k < fst->NumStates(); ++k){
747 narcs += fst->NumArcs(k);
749 std::vector<unsigned> features(narcs, -1);
753 VectorFst<TupleArc32> mfstTopoFeatures, dfst;
754 VectorFst<StdArc> result;
762 LDBG_EXECUTE(mfstTopoFeatures.Write(
"02-topologically-mapped.th.fst"));
771 action_(mfstTopoFeatures, &dfst);
777 VectorFst<TupleArc32> dfstw;
778 Map(dfst,&dfstw, gam2);
783 epw(dfstw, reverseFirst_);
784 LDBG_EXECUTE(dfstw.Write(
"05-det-topo-expanded-rev.th.fst"));
786 if (exitOnFailure_) {
787 LERROR(
"Failed to position correctly all topological features in ONE pass.");
790 FORCELINFO(
"First pass did not position all features correctly." 791 <<
" Running Second pass:");
795 epw2(dfstw, !reverseFirst_);
796 LDBG_EXECUTE(dfstw.Write(
"06-det-topo-expanded.th.fst"));
798 LERROR(
"Failed to position correctly all topological features" 799 <<
" after the second pass");
803 Map(dfstw,&result,gamr);
#define LDBG_EXECUTE(order)
fst::TropicalWeightTpl< F > Map(double)
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
Utility functor for relabeling one or more lattices. Note that you can chain commands. See Unit test in fstutils.gtest.cpp for an example.