15 #ifndef TASK_DISAMBIG_HPP    16 #define TASK_DISAMBIG_HPP    35 template<
class Data, 
class Arc>
    38   typedef typename Arc::Weight Weight;
    39   typedef typename Arc::Label Label;
    42   fst::VectorFst<Arc> *unimap_;
    53   const std::string lmkey_;
    55   const std::string inputkey_;
    57   const std::string outputkey_;
    59   const std::string unimapkey_;
    68   fst::VectorFst<Arc> olattice_;
    77                  bool forceloading = 
false ) :
    80     inputkey_ ( inputkey ),
    81     outputkey_ ( outputkey ),
    82     unimapkey_ ( unimapkey ),
    89                  "prune parameter must be byshortestpath/byweight,number" );
    90     if ( pstrat[0] == 
"byshortestpath" ) {
    91       LINFO ( 
"Shortest Path n=" << pstrat[1] );
    92       shp_ = toNumber<unsigned> ( pstrat[1] );
    93       prune_ = std::numeric_limits<float>::max();
    94     } 
else if ( pstrat[0] == 
"byweight" ) {
    95       LINFO ( 
"Prune by weight b=" << pstrat[1] );
    96       shp_ = std::numeric_limits<unsigned>::max();
    97       prune_ = toNumber<float> ( pstrat[1] );
   100                    "prune parameter incorrectly set: first parameter is byshortestpath or byweight" );
   102     if ( rg.
get<std::string> ( lmkey_) == 
""   103          && rg.
get<std::string> ( unimapkey_ ) == 
"" ) 
return;
   105                         && rg.
get<std::string> ( unimapkey_ ) != 
"" ,
   106                         "recaser.lm and recaser.unimap must either be both defined or both left to empty string " ) )
   113     USER_CHECK ( d.fsts.find ( inputkey_ ) != d.fsts.end(),
   114                  "No input fst to recase?" );
   115     fst::ShortestPath<Arc> ( * ( 
static_cast< fst::VectorFst<Arc> * 
>   116                                  (d.fsts[inputkey_] ) ), &olattice_, 1 );
   117     fst::Map<Arc> ( &olattice_, fst::RmWeightMapper<Arc>() );
   119     LINFO ( 
"(Recased) lattice available at key=" << outputkey_ );
   120     d.fsts[outputkey_] = &olattice_;
   128   typedef boost::shared_ptr<ApplyLanguageModelOnTheFlyInterfaceType> ApplyLanguageModelOnTheFlyInterfacePtrType;
   129   ApplyLanguageModelOnTheFlyInterfacePtrType almotf_;
   134   void initializeLanguageModelHandler() {
   135     if (almotf_.get() )  
return; 
   136     USER_CHECK ( d_->klm.find ( lmkey_ ) != d_->klm.end() 
   137                  && d_->klm[lmkey_].size() == 1
   138                  , 
"You need to load ONE recasing Language Model!" );
   140     unordered_set<Label> epsilons;
   142     epsilons.insert ( 
DR );
   143     epsilons.insert ( 
OOV );
   145     epsilons.insert ( 
SEP );
   147     almotf_.reset(assignKenLmHandler<Arc>
   148                   ( rg_, lmkey_, epsilons, *(d_->klm[lmkey_][0]), mw, a));
   152   void run ( fst::VectorFst<Arc> *
fst ) {
   153     if ( d_->fsts.find ( unimapkey_ ) == d_->fsts.end() ) {
   154       LINFO ( 
"No recasing step (key=" << unimapkey_ << 
" not found)" );
   156     } 
else if ( d_->fsts[unimapkey_] == NULL ) {
   157       LINFO ( 
"No recasing step (NULL) " );
   160     initializeLanguageModelHandler();
   161     unimap_ = 
static_cast<fst::VectorFst<Arc> *
> ( d_->fsts[unimapkey_] );
   162     LINFO ( 
"Apply Unigram Model to 1-best" );
   163     fst::VectorFst<Arc> mappedinput ( fst::RRhoCompose<Arc> ( *fst, *unimap_ ) );
   164     LINFO ( 
"Tag OOVs" );
   165     tagOOVs<Arc> ( &mappedinput, *d_->recasingvcblm );
   166     LDBG_EXECUTE ( mappedinput.Write ( 
"mappedinput.fst" ) );
   167     fst::VectorFst<Arc> *output = almotf_->run(mappedinput);
   168     LINFO ( 
"Recover OOVs" );
   169     recoverOOVs<Arc> ( output );
   170     if ( shp_ < std::numeric_limits<unsigned>::max() ) {
   171       fst::VectorFst<Arc> *aux = 
new fst::VectorFst<Arc>;
   172       LINFO ( 
"Shortest Path n=" << shp_ );
   173       fst::ShortestPath<Arc> ( *output, aux, shp_ );
   174       delete output; output = aux;
   175       fst::TopSort<Arc> ( output );
   176     } 
else if ( prune_ < std::numeric_limits<float>::max() ) {
   177       LINFO ( 
"Prune by weight=" << prune_ );
   178       fst::Prune<Arc> ( output, mw_ ( prune_ ) );
   181                    "prune parameter incorrectly set: first parameter is byshortestpath or byweight" );
   184     *fst = *output; 
delete output;
   185     fst::Project ( fst, fst::PROJECT_OUTPUT ); 
   194 #endif // TASK_DISAMBIG_HPP 
std::vector< std::string > getVectorString(const std::string &key) const 
Convenience method that returns a vector of strings taking "," as the separator character. 
std::string const kRecaserPrune
std::string const kRecaserUnimapLoad
std::string const kRecaserLmLoad
#define LDBG_EXECUTE(order)
T get(const std::string &key) const 
Returns parsed value associated to key. 
std::string const kRecaserOutput
Templated (hybrid) Interface for Task classes. 
Templated functor that creates a weight given a float. 
Wrapper to ApplyLanguageModelOnTheFly to apply different kenlm models. 
Templated functor that creates a weight given a float. 
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends. 
Utilities for DisambigTask and related tasks. 
T toNumber(const std::string &x)
Converts a string to an arbitrary number Converts strings to a number. Quits execution if conversion ...
std::string const kRecaserInput