15 #ifndef TASK_DISAMBIG_HPP 16 #define TASK_DISAMBIG_HPP 35 template<
class Data,
class Arc>
38 typedef typename Arc::Weight Weight;
39 typedef typename Arc::Label Label;
42 fst::VectorFst<Arc> *unimap_;
53 const std::string lmkey_;
55 const std::string inputkey_;
57 const std::string outputkey_;
59 const std::string unimapkey_;
68 fst::VectorFst<Arc> olattice_;
77 bool forceloading =
false ) :
80 inputkey_ ( inputkey ),
81 outputkey_ ( outputkey ),
82 unimapkey_ ( unimapkey ),
89 "prune parameter must be byshortestpath/byweight,number" );
90 if ( pstrat[0] ==
"byshortestpath" ) {
91 LINFO (
"Shortest Path n=" << pstrat[1] );
92 shp_ = toNumber<unsigned> ( pstrat[1] );
93 prune_ = std::numeric_limits<float>::max();
94 }
else if ( pstrat[0] ==
"byweight" ) {
95 LINFO (
"Prune by weight b=" << pstrat[1] );
96 shp_ = std::numeric_limits<unsigned>::max();
97 prune_ = toNumber<float> ( pstrat[1] );
100 "prune parameter incorrectly set: first parameter is byshortestpath or byweight" );
102 if ( rg.
get<std::string> ( lmkey_) ==
"" 103 && rg.
get<std::string> ( unimapkey_ ) ==
"" )
return;
105 && rg.
get<std::string> ( unimapkey_ ) !=
"" ,
106 "recaser.lm and recaser.unimap must either be both defined or both left to empty string " ) )
113 USER_CHECK ( d.fsts.find ( inputkey_ ) != d.fsts.end(),
114 "No input fst to recase?" );
115 fst::ShortestPath<Arc> ( * (
static_cast< fst::VectorFst<Arc> *
> 116 (d.fsts[inputkey_] ) ), &olattice_, 1 );
117 fst::Map<Arc> ( &olattice_, fst::RmWeightMapper<Arc>() );
119 LINFO (
"(Recased) lattice available at key=" << outputkey_ );
120 d.fsts[outputkey_] = &olattice_;
128 typedef boost::shared_ptr<ApplyLanguageModelOnTheFlyInterfaceType> ApplyLanguageModelOnTheFlyInterfacePtrType;
129 ApplyLanguageModelOnTheFlyInterfacePtrType almotf_;
134 void initializeLanguageModelHandler() {
135 if (almotf_.get() )
return;
136 USER_CHECK ( d_->klm.find ( lmkey_ ) != d_->klm.end()
137 && d_->klm[lmkey_].size() == 1
138 ,
"You need to load ONE recasing Language Model!" );
140 unordered_set<Label> epsilons;
142 epsilons.insert (
DR );
143 epsilons.insert (
OOV );
145 epsilons.insert (
SEP );
147 almotf_.reset(assignKenLmHandler<Arc>
148 ( rg_, lmkey_, epsilons, *(d_->klm[lmkey_][0]), mw, a));
152 void run ( fst::VectorFst<Arc> *
fst ) {
153 if ( d_->fsts.find ( unimapkey_ ) == d_->fsts.end() ) {
154 LINFO (
"No recasing step (key=" << unimapkey_ <<
" not found)" );
156 }
else if ( d_->fsts[unimapkey_] == NULL ) {
157 LINFO (
"No recasing step (NULL) " );
160 initializeLanguageModelHandler();
161 unimap_ =
static_cast<fst::VectorFst<Arc> *
> ( d_->fsts[unimapkey_] );
162 LINFO (
"Apply Unigram Model to 1-best" );
163 fst::VectorFst<Arc> mappedinput ( fst::RRhoCompose<Arc> ( *fst, *unimap_ ) );
164 LINFO (
"Tag OOVs" );
165 tagOOVs<Arc> ( &mappedinput, *d_->recasingvcblm );
166 LDBG_EXECUTE ( mappedinput.Write (
"mappedinput.fst" ) );
167 fst::VectorFst<Arc> *output = almotf_->run(mappedinput);
168 LINFO (
"Recover OOVs" );
169 recoverOOVs<Arc> ( output );
170 if ( shp_ < std::numeric_limits<unsigned>::max() ) {
171 fst::VectorFst<Arc> *aux =
new fst::VectorFst<Arc>;
172 LINFO (
"Shortest Path n=" << shp_ );
173 fst::ShortestPath<Arc> ( *output, aux, shp_ );
174 delete output; output = aux;
175 fst::TopSort<Arc> ( output );
176 }
else if ( prune_ < std::numeric_limits<float>::max() ) {
177 LINFO (
"Prune by weight=" << prune_ );
178 fst::Prune<Arc> ( output, mw_ ( prune_ ) );
181 "prune parameter incorrectly set: first parameter is byshortestpath or byweight" );
184 *fst = *output;
delete output;
185 fst::Project ( fst, fst::PROJECT_OUTPUT );
194 #endif // TASK_DISAMBIG_HPP
std::vector< std::string > getVectorString(const std::string &key) const
Convenience method that returns a vector of strings taking "," as the separator character.
std::string const kRecaserPrune
std::string const kRecaserUnimapLoad
std::string const kRecaserLmLoad
#define LDBG_EXECUTE(order)
T get(const std::string &key) const
Returns parsed value associated to key.
std::string const kRecaserOutput
Templated (hybrid) Interface for Task classes.
Templated functor that creates a weight given a float.
Wrapper to ApplyLanguageModelOnTheFly to apply different kenlm models.
Templated functor that creates a weight given a float.
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
Utilities for DisambigTask and related tasks.
T toNumber(const std::string &x)
Converts a string to an arbitrary number Converts strings to a number. Quits execution if conversion ...
std::string const kRecaserInput