16 #ifndef TASK_HIFST_HPP 17 #define TASK_HIFST_HPP 40 template <
class Data ,
42 class OptimizeT = OptimizeMachine<Arc> ,
43 class CYKdataT = CYKdata ,
46 class ExpandedNumStatesRTNT = ExpandedNumStatesRTN<Arc> ,
47 class ReplaceFstByArcT = ManualReplaceFstByArc<Arc> ,
51 typedef typename Arc::Label Label;
52 typedef typename Arc::Weight Weight;
60 std::set<Label> hieroindexexistence_;
76 ReplaceFstByArcT *rfba_;
77 unordered_set<std::string> replacefstbyarc_;
78 unordered_set<std::string> replacefstbyarcexceptions_;
79 unsigned replacefstbynumstates_;
96 std::vector<std::pair<Label, Label> > pdtparens_;
102 ExpandedNumStatesRTNT *rtnnumstates_;
105 std::vector< std::pair< Label, const fst::Fst<Arc> * > > pairlabelfsts_;
108 fst::VectorFst<Arc> cykfstresult_;
113 unsigned numlocallm_;
116 std::vector<std::string> lpctuples_;
131 const std::string lmkey_;
132 const std::string locallmkey_;
133 const std::string outputkey_;
134 const std::string fullreferencelatticekey_;
141 unsigned numstatesthreshold_;
147 enum AlignmentType {RULES, AFFILIATION};
155 const std::string& fullreferencelatticekey =
160 numlocallm_ (rg.getVectorString (locallmkey).size() ),
163 fullreferencelatticekey_ ( fullreferencelatticekey ),
165 locallmkey_ ( locallmkey ),
166 outputkey_ ( outputkey ),
173 replacefstbyarc_ ( rg.getSetString (
175 replacefstbyarcexceptions_ ( rg.getSetString (
177 replacefstbynumstates_ ( rg.get<unsigned>
181 numstatesthreshold_ ( rg.get<unsigned>
183 lpctuples_ ( rg.getVectorString (
191 LINFO (
"Number of local language models=" << numlocallm_);
192 LINFO (
"aligner mode=" << aligner_);
193 LINFO (
"localprune mode=" << localprune_);
196 "local pruning conditions are defined by tuples of 4 elements: category,x,y,Number-of-states. Category is a string and x,y are int. Number of states is unsigned" );
200 "If you want to do cell pruning in translation, you should normally use a language model for local pruning. Check --hifst.localprune.lm.load and --hifst.localprune.enable.\n");
201 optimize.setAlignMode (aligner_);
206 LINFO (
"Hipdt mode enabled!");
209 LINFO (
"RTN openfst optimizations will not be applied");
216 LDEBUG (
"Hifst constructor done!" );
225 cykfstresult_.DeleteStates();
227 hieroindexexistence_.clear();
228 LINFO (
"Running HiFST" );
232 cykdata_ = d.cykdata;
233 if ( !
USER_CHECK ( cykdata_,
"cyk parse has not been executed previously?" ) ) {
234 resetExternalData (d);
239 fst::VectorFst<Arc> aux;
240 d.fsts[outputkey_] = &cykfstresult_;
241 d.vcat = cykdata_->vcat;
242 resetExternalData (d);
246 initLocalConditions();
249 rtnnumstates_ =
new ExpandedNumStatesRTNT;
250 rfba_ =
new ReplaceFstByArcT ( cykdata_->vcat, replacefstbyarc_,
251 replacefstbyarcexceptions_, aligner_, replacefstbynumstates_ );
253 LINFO (
"Second Pass: FST-building!" );
254 d.stats->setTimeStart (
"lattice-construction" );
256 fst::Fst<Arc> *sfst = buildRTN ( cykdata_->categories[
"S"], 0,
257 cykdata_->sentence.size() - 1 ).ptr_;
258 d.stats->setTimeEnd (
"lattice-construction" );
259 cykfstresult_ = (*sfst);
260 LINFO (
"Final - RTN head optimizations !" );
261 optimize ( &cykfstresult_ ,
262 std::numeric_limits<unsigned>::max() ,
263 !hipdtmode_ && optimize_
265 FORCELINFO (
"Stats for Sentence " << d.sidx <<
266 ": local pruning, number of times=" << piscount_);
267 d.stats->lpcount = piscount_;
268 LINFO (
"RTN expansion starts now!");
273 ( cykdata_->sentence.size() - 1 ) *
APYTAG;
274 if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
275 pairlabelfsts_.push_back ( pair< Label,
const fst::Fst<Arc> * > ( hieroindex,
279 #if OPENFSTVERSION>=1005000 280 fst::ReplaceUtilOptions ruopt(hieroindex, !aligner_);
281 fst::ReplaceUtil<Arc> replace_util (pairlabelfsts_, ruopt);
282 #elif OPENFSTVERSION>=1004000 283 fst::ReplaceUtilOptions<Arc> ruopt(hieroindex, !aligner_);
284 fst::ReplaceUtil<Arc> replace_util (pairlabelfsts_, ruopt);
286 fst::ReplaceUtil<Arc> replace_util (pairlabelfsts_, hieroindex
290 LINFO (
"rtn optimizations...");
291 d_->stats->setTimeStart (
"replace-opts");
292 replace_util.ReplaceTrivial();
293 replace_util.ReplaceUnique();
294 replace_util.Connect();
295 pairlabelfsts_.clear();
296 replace_util.GetFstPairs (&pairlabelfsts_);
297 d_->stats->setTimeEnd (
"replace-opts");
301 boost::scoped_ptr< fst::VectorFst<Arc> > efst (
new fst::VectorFst<Arc>);
303 LINFO (
"Final Replace (RTN->FSA), main index=" << hieroindex);
304 d_->stats->setTimeStart (
"replace-rtn-final");
305 Replace (pairlabelfsts_, &*efst, hieroindex, !aligner_);
306 d_->stats->setTimeEnd (
"replace-rtn-final");
308 LINFO (
"Final Replace (RTN->PDA)");
309 d_->stats->setTimeStart (
"replace-pdt-final");
310 Replace (pairlabelfsts_, &*efst, &pdtparens_, hieroindex);
311 d_->stats->setTimeEnd (
"replace-pdt-final");
312 LINFO (
"Number of pdtparens=" << pdtparens_.size() );
323 if ( d.fsts.find ( fullreferencelatticekey_ ) != d.fsts.end() ) {
324 if (
static_cast< fst::VectorFst<Arc> *
> 325 (d.fsts[fullreferencelatticekey_])->NumStates() > 0 ) {
326 LINFO (
"Composing with full reference lattice, NS=" <<
327 static_cast< fst::VectorFst<Arc> *
> 328 (d.fsts[fullreferencelatticekey_])->NumStates() );
329 fst::Compose<Arc> ( *efst,
330 * (
static_cast<fst::VectorFst<Arc> *
> (d.fsts[fullreferencelatticekey_]) ),
332 LINFO (
"After composition: NS=" << efst->NumStates() );
334 LINFO (
"No composition with full ref lattice" );
337 LINFO (
"No composition with full ref lattice" );
341 fst::VectorFst<Arc> *res = NULL;
342 if (efst->NumStates() )
343 res = applyLanguageModel ( *efst );
345 LWARN (
"Empty lattice -- skipping LM application");
348 boost::shared_ptr<fst::VectorFst<Arc> >latlm ( res );
349 if ( latlm.get() == efst.get() ) {
350 LWARN (
"Yikes! Unexpected situation! Will it crash? (muhahaha) " );
354 if ( pruneweight_ < std::numeric_limits<float>::max() ) {
355 if (!hipdtmode_ || pdtparens_.empty() ) {
356 LINFO (
"Pruning, weight=" << pruneweight_);
357 fst::Prune<Arc> (*latlm, &cykfstresult_, mw_ ( pruneweight_ ) );
359 LINFO (
"Expanding, weight=" << pruneweight_);
360 fst::ExpandOptions<Arc> eopts (
true,
false, mw_ ( pruneweight_ ) );
361 Expand ( *latlm, pdtparens_, &cykfstresult_, eopts);
365 LINFO (
"Copying through full lattice with lm scores");
366 cykfstresult_ = *latlm;
369 LINFO (
"Copying through full lattice (no lm)");
370 cykfstresult_ = *efst;
372 if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
373 pairlabelfsts_.pop_back();
375 pairlabelfsts_.clear();
376 LDBG_EXECUTE ( cykfstresult_.Write (
"fsts/FINAL-efcp.fst" ) );
378 fst::RmEpsilon ( &cykfstresult_ );
379 LDBG_EXECUTE ( cykfstresult_.Write (
"fsts/FINAL-efcpr.fst" ) );
380 LINFO (
"NS=" << cykfstresult_.NumStates() );
382 LINFO (
"deleting data stuff..." );
385 delete rtnnumstates_;
387 d.vcat = cykdata_->vcat;
388 resetExternalData (d);
389 d.fsts[outputkey_] = &cykfstresult_;
390 if (hipdtmode_ && pdtparens_.size() )
391 d.fsts[outputkey_ +
".parens" ] = &pdtparens_;
393 FORCELINFO (
"End Sentence ******************************************************" );
394 d.stats->setTimeEnd (
"sent-dec" );
402 inline void resetExternalData (Data& d) {
403 cykdata_->freeMemory();
411 if (rtnfiles_() !=
"") {
412 std::string filenamepattern = rtnfiles_ (d_->sidx);
413 FORCELINFO (
"Writing rtn files..." << filenamepattern);
414 for (
unsigned k = 0; k < pairlabelfsts_.size(); ++k) {
415 std::string filename = filenamepattern;
417 , ucam::util::toString<Label> (pairlabelfsts_[k].first) );
419 (pairlabelfsts_[k].second) ), filename);
429 explicit FSAPlusInfo(fst::Fst<Arc>* p
440 explicit FSAPlusInfo()
457 inline void mapfsts (
unsigned int rule_idx,
458 std::vector < FSAPlusInfo >& fsts ) {
459 unordered_map<unsigned int, unsigned int > mappings;
460 d_->ssgd->getMappings ( rule_idx, &mappings );
462 "Mismatch between mappings and lower-level fsts" );
463 LDEBUG (
"mappings size=" << mappings.size() );
465 std::vector<FSAPlusInfo> newfsts(fsts.size());
466 for (
unsigned int k = 0; k < fsts.size(); k++ ) {
467 newfsts[mappings[k]] = fsts[k];
469 newfsts[mappings[k]].x_ = fsts[mappings[k]].x_;
470 newfsts[mappings[k]].y_ = fsts[mappings[k]].y_;
471 newfsts[mappings[k]].cc_ = fsts[mappings[k]].cc_;
488 FSAPlusInfo buildRTN (
unsigned int cc,
unsigned int x,
unsigned int y ) {
489 FSAPlusInfo fpi( ( *rtn_ ) ( cc, x, y ), cc, x, y);
491 if ( fpi.ptr_ != NULL )
return fpi;
493 std::ostringstream o;
494 o << cc <<
"." << x <<
"." << y;
496 unsigned& nnt = cykdata_->nnt;
498 std::stringstream ostr;
499 ostr << vcat[cc] <<
"." << x <<
"." << y;
500 std::string filename;
505 LDEBUG (
"bp> " << cc <<
"," << x <<
"," << y <<
":" <<
506 (
unsigned ) cykdata_->bp ( cc, x, y ).size() );
507 for (
unsigned i = 0; i < cykdata_->bp ( cc, x, y ).size(); i++ ) {
508 unsigned idx = cykdata_->cykgrid ( cc, x, y, i );
510 std::vector<FSAPlusInfo> requiredfsts;
512 mur.Add ( addRule ( idx, requiredfsts, x + 1) ) ;
513 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
514 ":adding phrase-based rule index " << idx );
518 for (
unsigned j = 0; j < mybp[i].size(); j += 3 ) {
519 if ( mybp[i][j] > nnt ) {
522 requiredfsts.push_back ( buildRTN ( mybp[i][j], mybp[i][j + 1],
524 LDEBUG (
"back to bp> " << cc <<
"," << x <<
"," << y <<
":" <<
525 (
unsigned ) cykdata_->bp ( cc, x, y ).size() );
527 mapfsts ( idx, requiredfsts );
528 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
": adding hiero rule index " <<
530 mur.Add ( addRule ( idx, requiredfsts , x + 1) );
532 boost::shared_ptr< fst::VectorFst<Arc> > mdfst ( mur() );
533 LDBG_EXECUTE ( mdfst->Write (
"fsts/" + o.str() +
".fst" ) );
536 std::numeric_limits<unsigned>::max(),
538 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
": FST built!" );
539 LDBG_EXECUTE ( mdfst->Write (
"fsts/" + o.str() +
"redm.fst" ) );
540 d_->stats->numstates[ cc * 1000000 + y * 1000 + x ] =
544 rtnnumstates_->update ( cc, x, y, &*mdfst );
545 boost::scoped_ptr< fst::VectorFst<Arc> > pruned ( localPruning ( *mdfst, cc, x, y ) );
547 if ( pruned.get() != NULL ) {
548 LDBG_EXECUTE ( pruned->Write (
"fsts/" + o.str() +
"redmp.fst" ) );
549 optimize (&*pruned , numstatesthreshold_ , !hipdtmode_ && optimize_ );
550 LDBG_EXECUTE ( pruned->Write (
"fsts/" + o.str() +
"redmpo.fst" ) );
553 d_->stats->numstates[ cc * 1000000 + y * 1000 + x ] = ( *rtnnumstates_ ) ( cc,
556 rtnnumstates_->update ( cc, x, y, &*mdfst );
557 d_->stats->numprunedstates[ cc * 1000000 + y * 1000 + x ]
558 = ( *rtnnumstates_ ) (cc, x, y );
560 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
":No pruning" );
562 boost::shared_ptr< fst::VectorFst<Arc> > outfst ( ( *rfba_ ) ( *mdfst,
564 if ( outfst.get() != NULL ) {
565 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
": replacefstbyarcfor cat= " <<
566 vcat[cc] <<
",NS=" << mdfst->NumStates() );
567 rtn_->Add ( cc, x, y, outfst , mdfst );
568 hieroindexexistence_.insert ( hieroindex );
569 pairlabelfsts_.push_back ( pair< Label,
const fst::Fst<Arc> * > ( hieroindex,
572 rtn_->Add ( cc, x, y, mdfst , outfst );
573 LDEBUG (
"AT: " << cc <<
"," << x <<
"," << y <<
":" <<
574 "Delaying not applied. Stored, NS=" << (
unsigned ) mdfst->NumStates() );
577 FSAPlusInfo fpi2( ( *rtn_ ) ( cc, x, y ), cc, x, y);
590 fst::VectorFst<Arc> *addRule (
unsigned rule_idx,
591 std::vector<FSAPlusInfo>& lowerfsts
592 ,
unsigned offset ) {
595 if ( !translation.size() ) {
597 translation.push_back ( 0 );
599 for (
unsigned k = 0; k < translation.size(); ++k) {
600 if ( translation[k] ==
"<s>" ) {
601 translation[k] =
"1";
602 }
else if ( translation[k] ==
"</s>" ) translation[k] =
"2";
603 else if ( translation[k] ==
"<dr>" ) {
604 std::stringstream dr;
606 translation[k] = dr.str();
607 LDEBUG (
"Deletion rule: " << gd.
getRule ( rule_idx ) <<
"," <<
609 }
else if ( translation[k] ==
"<oov>" ) {
610 std::stringstream oov;
612 translation[k] = oov.str();
613 LDEBUG (
"oov rule: " << gd.
getRule ( rule_idx ) <<
"," << translation[k] );
614 }
else if ( translation[k] ==
"<sep>" ) {
615 std::stringstream sep;
617 translation[k] = sep.str();
618 LDEBUG (
"separator rule: " << gd.
getRule ( rule_idx ) <<
"," <<
622 LDEBUG (
"Starting to build!" );
623 fst::VectorFst<Arc> *rulefst =
new fst::VectorFst<Arc>;
625 rulefst->SetStart ( 0 );
627 Label iw2 = (at_ == RULES)?gd.
getIdx ( rule_idx ) + 1: 0;
629 if ( !aligner_ ) iw = 0;
631 LDEBUG (
"Building FST for rule " << rule_idx <<
":" << gd.
getRule ( rule_idx ) <<
", original id=" << gd.
getIdx(rule_idx)
632 <<
", translation size=" << translation.size() );
633 unsigned kmax = translation.size();
634 unsigned nonterminal = 0;
635 std::vector< pair< Label, const fst::Fst<Arc> * > > pairlabelfsts;
637 std::vector<unsigned> links(translation.size(),
NORULE);
638 if (at_ == AFFILIATION) {
639 LDEBUG(
"Getting affiliation...");
642 for (
unsigned k = 0; k < kmax; ++k ) {
645 bool isnonterminal = !
isTerminal ( translation[k] );
646 if ( isnonterminal) {
649 "Missing fsts to build the rule..." );
650 offset +=lowerfsts[nonterminal].y_ + 1;
651 pairlabelfsts.push_back ( pair< Label,
const fst::Fst<Arc> * >
652 ( ow, lowerfsts[nonterminal++].ptr_ ) );
655 std::istringstream buffer ( translation[k] );
660 if ( !aligner_ ) iw = ow;
662 if (isnonterminal) iw =
NORULE;
665 if (at_ == AFFILIATION) {
666 iw += offset - nonterminal;
670 LDEBUG(
"Adding arc iw=" << iw <<
",ow=" << ow);
671 rulefst->AddArc ( k, Arc ( iw, ow, Weight::One(), k + 1 ) );
674 Weight weight = mw_ ( w , iw2 );
675 rulefst->AddArc ( kmax, Arc ( iw, 0, weight, kmax + 1 ) );
676 rulefst->SetFinal ( kmax + 1, Weight::One() );
677 fst::VectorFst<Arc>* auxi;
678 if ( nonterminal > 0 ) {
679 pairlabelfsts.push_back ( pair< Label,
const fst::Fst<Arc> * >
681 fst::VectorFst<Arc> *aux =
new fst::VectorFst<Arc>;
682 Replace (pairlabelfsts, aux,
APRULETAG + nonterminal, !aligner_);
686 fst::RmEpsilon<Arc> ( rulefst );
691 void initLocalConditions() {
692 if ( !localprune_ )
return;
693 if ( !lpctuples_.size() )
return;
695 LINFO (
"Set up conditions for local cell pruning" );
696 for (
unsigned k = 0; k < lpctuples_.size(); k += 4 ) {
697 int y = ucam::util::toNumber<int> ( lpctuples_[k + 1] );
698 if ( y < 0 ) y = cykdata_->getNumberWordsSentence() + y + 1;
699 LINFO (
"cell pruning conditions (cat,span,numstates,weight): " 700 << cykdata_->categories[lpctuples_[k]]
702 << ucam::util::toNumber<unsigned> ( lpctuples_[k + 2] ) <<
"," 703 << ucam::util::toNumber<unsigned> ( lpctuples_[k + 3] ) );
704 conditions c ( cykdata_->categories[lpctuples_[k]]
706 , ucam::util::toNumber<unsigned> ( lpctuples_[k + 2] )
707 , ucam::util::toNumber<unsigned> ( lpctuples_[k + 3] ) );
710 LINFO (
"We have: " << lpc_.
size() <<
" conditions" );
719 inline void applyFilters ( fst::VectorFst<Arc> *
fst ) {
720 fst::ArcSort<Arc> ( fst, fst::OLabelCompare<Arc>() );
723 LINFO (
"Apply " << d_->filters.size() <<
" filters to the search space!" );
724 for (
unsigned k = 0; k < d_->filters.size(); ++k ) {
728 if (!hipdtmode_ || pdtparens_.empty() ) {
729 LINFO (
"FST composition with filter");
730 *fst = (fst::ComposeFst<Arc> (*fst, *d_->filters[k]) );
732 LINFO (
"PDT composition");
733 #if OPENFSTVERSION>=1003003 734 fst::PdtComposeFstOptions<Arc>
opts (*fst, pdtparens_, *d_->filters[k]);
736 fst::PdtComposeOptions<Arc>
opts (*fst, pdtparens_, *d_->filters[k]);
739 *fst = (fst::ComposeFst<Arc> (*fst, *d_->filters[k],
opts) );
741 LINFO (
"After filter " << k <<
", NS=" << fst->NumStates() );
744 if ( !fst->NumStates() )
break;
749 typedef boost::shared_ptr<ApplyLanguageModelOnTheFlyInterfaceType> ApplyLanguageModelOnTheFlyInterfacePtrType;
750 std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> almotfLocal_;
751 std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> almotf_;
757 template<
template<
class>
class MakeWeightT>
758 void initializeLanguageModelHandlers(
const std::string& lmkey
759 , MakeWeightT<Arc> &mw
760 , std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> &almotf) {
765 almotf.resize(d_->klm[lmkey].size());
766 unordered_set<Label> epsilons;
767 for (
unsigned k = 0; k < d_->klm[lmkey].size(); ++k ) {
768 USER_CHECK ( d_->klm[lmkey][k]->model != NULL,
769 "Language model " << k <<
" not available!" );
770 almotf[k].reset(fsttools::assignKenLmHandler<Arc, MakeWeightT >(rg_, lmkey, epsilons
771 , *(d_->klm[lmkey][k])
775 LINFO(
"Initialized " << d_->klm[lmkey].size() <<
" language model handlers");
779 template<
template<
class>
class MakeWeightT>
780 inline fst::VectorFst<Arc> *applyLanguageModel (
const fst::Fst<Arc>& localfst
781 ,
const std::string& lmkey
782 , MakeWeightT<Arc> &mw
783 , std::vector<ApplyLanguageModelOnTheFlyInterfacePtrType> &almo
785 if ( d_->klm.find ( lmkey ) == d_->klm.end() ) {
787 FORCELINFO (
"No Language models for key=" << lmkey
788 <<
" available! Skipping language model application. " );
794 fst::VectorFst<Arc> *output
795 =
new fst::VectorFst<Arc> (* (
const_cast<fst::Fst<Arc> *
> ( &localfst ) ) );
798 unordered_set<Label> epsilons;
799 epsilons.insert (
DR );
800 epsilons.insert (
OOV );
802 epsilons.insert (
SEP );
805 for (
unsigned j = 0; j < pdtparens_.size(); ++j) {
806 epsilons.insert (pdtparens_[j].first);
807 epsilons.insert (pdtparens_[j].second);
810 for (
unsigned k = 0; k < d_->klm[lmkey].size(); ++k ) {
811 LINFO (
"Composing with " << k <<
"-th language model" );
812 d_->stats->setTimeStart (
"on-the-fly-composition " 814 fst::VectorFst<Arc> *aux = almo[k]->run(*output, epsilons);
816 LERROR (
"Something very wrong happened in composition with the lm...");
819 delete output; output = aux;
820 d_->stats->setTimeEnd (
"on-the-fly-composition " 822 LDEBUG (
"After applying language model, NS=" << output->NumStates() );
824 LINFO (
"Connect!" );
826 LINFO (
"Done! NS=" << output->NumStates() );
834 inline fst::VectorFst<Arc> *applyLanguageModel (
const fst::Fst<Arc>& localfst
835 ,
bool local =
false ) {
838 initializeLanguageModelHandlers(locallmkey_, mw, almotfLocal_);
839 if (!almotfLocal_.size())
return NULL;
840 LINFO (
"Composing with local lm for inadmissible pruning (unless on top cell)" );
841 return applyLanguageModel (localfst, locallmkey_, mw, almotfLocal_);
844 initializeLanguageModelHandlers(lmkey_, mw, almotf_);
845 if (!almotf_.size())
return NULL;
846 LINFO (
"Composing with full lm for admissible pruning" );
847 return applyLanguageModel (localfst, lmkey_, mw, almotf_);
851 inline fst::VectorFst<Arc> *expand (
const fst::VectorFst<Arc>& localfst,
852 unsigned cc,
unsigned x,
unsigned y ) {
854 USER_CHECK ( localfst.NumStates() > 0,
"Empty lattice?" );
856 if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
857 pairlabelfsts_.push_back ( pair< Label,
const fst::Fst<Arc> * > ( hieroindex,
859 fst::VectorFst<Arc> *aux =
new fst::VectorFst<Arc>;
861 LINFO (
"Replace (RTN->FSA)");
862 d_->stats->setTimeStart (
"replace-rtn");
863 Replace (pairlabelfsts_, aux, hieroindex, !aligner_);
864 d_->stats->setTimeEnd (
"replace-rtn");
866 LINFO (
"Replace (RTN->PDA)");
867 d_->stats->setTimeStart (
"replace-pdt");
868 Replace (pairlabelfsts_, aux, &pdtparens_, hieroindex);
869 d_->stats->setTimeEnd (
"replace-pdt");
870 LINFO (
"Number of pdtparens=" << pdtparens_.size() );
873 if ( hieroindexexistence_.find ( hieroindex ) == hieroindexexistence_.end() )
874 pairlabelfsts_.pop_back();
890 fst::VectorFst<Arc> *localPruning (
const fst::VectorFst<Arc>& fst,
unsigned cc,
891 unsigned x,
unsigned y ) {
893 std::ostringstream o;
894 o << cc <<
"." << x <<
"." << y;
896 if ( !localprune_ )
return NULL;
898 unsigned referenceminstates;
899 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
900 ": Testing conditions; expected lattice size=" << ( *rtnnumstates_ ) ( cc, x,
902 if ( lpc_ ( cc, y + 1, ( *rtnnumstates_ ) ( cc, x, y ), weight ) ) {
903 LINFO (
"AT " << cc <<
"," << x <<
"," << y <<
904 ": Qualifies for local pruning. Making it so!" );
905 LDEBUG (
"AT " << cc <<
"," << x <<
"," << y <<
": expanding RTN/RmEpsilon" );
906 fst::VectorFst<Arc> *efst = expand ( fst, cc, x, y );
907 fst::RmEpsilon<Arc> ( efst );
908 LINFO (
"AT " << cc <<
"," << x <<
"," << y <<
": NS=" << efst->NumStates() );
910 LINFO(
"Apply filtering");
911 applyFilters ( efst );
912 LINFO (
"Apply LM" );
913 fst::VectorFst<Arc> * latlm = applyLanguageModel ( *efst ,
true );
915 if ( latlm != NULL ) {
918 if (!hipdtmode_ || pdtparens_.empty() ) {
919 LINFO (
"Prune with weight=" << weight );
920 fst::Prune<Arc> ( latlm, mw_ ( weight ) );
922 LINFO (
"PDT expanding with weight=" << weight );
923 fst::ExpandOptions<Arc> eopts (
true,
false, mw_ ( weight ) );
924 fst::VectorFst<Arc> latlmaux;
925 Expand ( *latlm, pdtparens_, &latlmaux, eopts);
929 LINFO (
"Delete LM scores" );
933 fst::Map<Arc> ( latlm,
935 LINFO (
"AT " << cc <<
"," << x <<
"," << y <<
": pruned with weight=" << weight
936 <<
",NS=" << latlm->NumStates() );
939 LINFO (
"AT " << cc <<
"," << x <<
"," << y <<
940 "Local LM not applied, filtered with " << d_->filters.size() <<
941 " filter(s) ,NS=" << efst->NumStates() );
944 LINFO (
"AT " << cc <<
"," << x <<
"," << y <<
945 ": Does not qualify for local pruning. " );
#define ZDISALLOW_COPY_AND_ASSIGN(TypeName)
struct containing the elements that trigger local pruning.
const float getWeight(std::size_t idx)
Returns the weight of a rule. This weight is the dot product of all the features with its scales...
const std::string kHifstLatticeStore
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
bool isTerminal(const std::string &word)
Determine if the element is a terminal (i.e. a word, represented by a number) or a non-terminal (i...
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
Contains utility class to predict number of states of an RTN after expanding to equivalent FSA...
const std::string kHifstWritertn
const std::string kHifstPrune
unordered_map< uint, std::string > grammar_inversecategories_t
#define LDBG_EXECUTE(order)
void getLinks(std::size_t idx, std::vector< unsigned > &links) const
std::string getTimestamp(void)
Generates time stamp.
T get(const std::string &key) const
Returns parsed value associated to key.
const std::string kHifstUsepdt
const std::string getRule(std::size_t idx)
Returns rule corresponding to index idx.
std::size_t size()
returns size of map
Core of Hifst. Implements the lattice-building procedure for a cyk-parsed sentence.
This class creates the Union of an arbitrarily large number of fsts. This implementation uses one RTN...
templated Mapper that modifies weights over an FST, passing through the other values of the arc...
const std::string kHifstReplacefstbyarcNonterminals
const std::string kHifstAlilatsmode
HiFSTTask(const ucam::util::RegistryPO &rg, const std::string &outputkey=HifstConstants::kHifstLatticeStore, const std::string &locallmkey=HifstConstants::kHifstLocalpruneLmLoad, const std::string &fullreferencelatticekey=HifstConstants::kReferencefilterNosubstringStore, const std::string &lmkey=HifstConstants::kLmLoad)
Constructor with registry object and several keys to access data object and registry.
const bool isPhrase(std::size_t idx)
const std::string kHifstLocalpruneLmLoad
Templated (hybrid) Interface for Task classes.
const std::string kHifstLocalpruneNumstates
const std::string kHifstOptimizecells
const std::string kHifstAlilatsmodeLinks
void FstWrite(const Fst< Arc > &fst, const std::string &filename, const std::string &txtname="txt")
Templated method that writes an fst either in binary or text format.
Templated functor that creates a weight given a float.
Wrapper to ApplyLanguageModelOnTheFly to apply different kenlm models.
std::vector< cykparser_rulebpcoordinates_t > cykparser_ruledependencies_t
const std::string kHifstLocalpruneConditions
void add(const conditions &c)
Add condition.
Contains Function objects that determine whether an FST is replaceable or not by an arc pointer...
#define CYK_RETURN_FAILURE
Contains Function objects that optimize a machine.
convenience class that takes care of local pruning conditions. Conditions are indexed by 1000*cc+y...
bool run(Data &d)
Runs the lattice building procedure.
const std::size_t getIdx(std::size_t idx)
Returns the true idx of a rule (i.e. line in the grammar file). If it is sentence specific...
LexicographicArc< StdArc::Weight, StdArc::Weight > LexStdArc
void clear()
Clears all conditions.
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
const std::string kHifstRtnopt
Implements RTN class. Stores pointers to cell FSAs of the RTN using a hiero-index representing cell c...
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
std::string const kLmLoad
const std::string kHifstReplacefstbyarcNumstates
Contains functor and struct to handle local pruning conditions.
void find_and_replace(std::string &haystack, const std::string &needle, const std::string &replace)
const std::string kReferencefilterNosubstringStore
const std::string kReferencefilterLoad
const std::string kHifstReplacefstbyarcExceptions
const std::string kHifstLocalpruneEnable
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx)
Returns translation as a vector of elements.