46 fst::VectorFst<fst::StdArc> lmbroutput_;
50 std::vector<NGramList> ngrams;
55 const std::string evidencespacekey_;
57 const std::string hypothesesspacekey_;
59 const std::string lmbroutputkey_;
71 const std::string& hypothesesspacekey =
85 evidencespacekey_ (evidencespacekey),
86 hypothesesspacekey_ (hypothesesspacekey),
87 lmbroutputkey_ (lmbroutputkey),
88 minorder_ (rg.get<unsigned> (minorder) ),
89 maxorder_ (rg.get<unsigned> (maxorder) ),
90 alpha_ (rg, alphakey),
92 ppweight_ (rg.get<float> (preprunekey) ),
93 onebest_ (rg.
exists (writeonebestkey) ),
94 loadlexstdarc_ (rg.
exists (lexstdarckey) ),
95 theta_ (rg.get<float> (unigramprecisionkey),
96 rg.get<float> (precisionratiokey),
97 rg.get<float> (numberunigramtokenskey),
98 rg.get<unsigned> (minorder),
99 rg.get<unsigned> (maxorder)
101 if (minorder_ < 1 || maxorder_ < 1) {
102 cerr <<
"error: 'minorder' and/or 'maxorder' < 1 \n";
105 if (minorder_ > 10 || maxorder_ > 10) {
106 cerr <<
"error: 'minorder' and/or 'maxorder' > 10\n";
109 if (minorder_ > maxorder_) {
110 cerr <<
"error: 'minorder' > 'maxorder'\n";
113 LINFO (
"min order=" << minorder_ );
114 LINFO (
"max order=" << maxorder_ );
115 ngrams.resize (maxorder_ + 1);
120 FORCELINFO (
"applying Lattice MBR , sentence " << d.sidx );
121 lmbroutput_.DeleteStates();
123 fst::VectorFst<fst::StdArc>* fstevd = NULL;
124 fst::VectorFst<fst::StdArc> aux;
126 if (d.fsts.find (evidencespacekey_) != d.fsts.end() ) {
127 if (loadlexstdarc_) {
128 fst::Map<fst::LexStdArc, fst::StdArc> (*
129 (
static_cast<fst::VectorFst<fst::LexStdArc> *
> (d.fsts[evidencespacekey_]) ),
132 d.fsts[evidencespacekey_] =
135 fstevd =
static_cast<fst::VectorFst<fst::StdArc> *
> (d.fsts[evidencespacekey_]);
144 fst::Determinize (fst::RmEpsilonFst<fst::StdArc> (*fstevd), fstevd);
145 fst::Minimize (fstevd);
146 if (ppweight_ != std::numeric_limits<float>::max() ) {
147 LINFO (
"Pruning evidence space, weight=" << ppweight_);
149 fst::Determinize (fst::RmEpsilonFst<fst::StdArc> (*fstevd),
151 fst::Minimize (fstevd);
154 if (fstevd == NULL) {
155 LINFO (
"No evidence space provided. Skipping LMBR!");
158 fst::VectorFst<fst::StdArc>* fsthyp = NULL;
159 fst::VectorFst<fst::StdArc> aux2;
160 if (d.fsts.find (hypothesesspacekey_) != d.fsts.end() ) {
161 if (loadlexstdarc_) {
162 fst::Map<fst::LexStdArc, fst::StdArc> (*
163 (
static_cast<fst::VectorFst<fst::LexStdArc> *
> (d.fsts[hypothesesspacekey_]) ),
165 d.fsts[hypothesesspacekey_] =
168 fsthyp =
static_cast<fst::VectorFst<fst::StdArc> *
> 169 (d.fsts[hypothesesspacekey_]);
171 }
else fsthyp = fstevd;
174 fst::RmEpsilon (fsthyp);
176 unsigned count =
extractNGrams (*fstevd, ngrams, minorder_, maxorder_);
177 LINFO ( count <<
" ngrams extracted (evidence space)");
178 if (fsthyp != NULL && fsthyp != fstevd) {
180 unsigned count_hs =
extractNGrams (*fsthyp, ngrams, minorder_, maxorder_);
181 LINFO ( count_hs <<
" ngrams extracted (hypotheses space)");
185 LINFO (
"Fast posterior computing");
188 LINFO (
"scaling weights by " << std::fixed << std::setprecision (
190 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > scaledfst (
FstScaleWeights (
191 fstevd, alpha_() ) );
192 cp (scaledfst.get() );
195 fst::Map (*fsthyp, &lmbroutput_, fst::RmWeightMapper<fst::StdArc>() );
196 boost::scoped_ptr<fst::VectorFst<fst::StdArc> > lmbrlat (ap (lmbroutput_) );
197 fst::VectorFst<fst::StdArc> original (*lmbrlat);
199 fst::Map (original, &lmbroutput_, fst::TimesMapper<fst::StdArc> (wps_() ) );
202 LINFO (
"wps=" << wps_() <<
":" << hyp);
205 d.lmbronebest->alpha.push_back (alpha_() );
206 d.lmbronebest->wps.push_back (wps_() );
207 d.lmbronebest->hyp.push_back (hyp);
214 d.lmbronebest->idx = d.sidx;
215 d.fsts[lmbroutputkey_] = &lmbroutput_;
217 LINFO (
"LMBR finished");
void Prune(fst::MutableFst< FunctionArc > *, PruneStats &)
const std::string kLmbrAlpha
const std::string kLmbrLoadHypothesesspace
void FstGetBestStringHypothesis(const fst::VectorFst< Arc > &latfst, std::string &hyp)
unordered_set< fst::WordId > Wlist
Based on Graeme Blackwood's PhD work and original code – implementation of posterior computation fro...
const std::string kLmbrLexstdarc
NGramToPosteriorsMapper & getPosteriors()
Retrieve reference to posteriors.
RelabelUtil & addIPL(typename Arc::Label labelfind, typename Arc::Label labelreplace)
fst::TropicalWeightTpl< F > Map(double)
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
bool run(Data &d)
Run method inherited from TaskInterface.
Functor handling LMBR theta parameters.
void start(void)
Empty implementation.
const std::string kLmbrWritedecoder
const std::string kLmbrPreprune
void next(void)
Increment index.
Templated (hybrid) Interface for Task classes.
LexStdArc to StdArc Mapper.
const std::string kLmbrLoadEvidencespace
const std::string kLmbrMaxorder
bool exists(const std::string &source, const std::string &needle)
Convenience function to find out whether a needle exists in a text.
Utility functor for relabeling one or more lattices. Note that you can chain commands. See Unit test in fstutils.gtest.cpp for an example.
const std::string kLmbrMinorder
void extractSourceVocabulary(const fst::VectorFst< Arc > &myfst, unordered_set< std::string > *vcb)
Extract source (left-side) vocabulary from an fst.
fst::VectorFst< fst::StdArc > * FstScaleWeights(fst::VectorFst< fst::StdArc > *fst, const double scale)
bool done(void)
Checks if reached the last element.
std::unordered_map< NGram, StdArc::Weight, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramList
unordered_map< fst::NGram, fst::StdArc::StateId, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToStateMapper
Functor that applies posteriors to any hypothesis space. Initializes with previously calculated poste...
Based on Graeme Blackwood's PhD work and original code – implementation of posterior application to ...
LmbrTask(const ucam::util::RegistryPO &rg, const std::string &evidencespacekey=HifstConstants::kLmbrLoadEvidencespace, const std::string &hypothesesspacekey=HifstConstants::kLmbrLoadHypothesesspace, const std::string &lmbroutputkey=HifstConstants::kLmbrWritedecoder, const std::string &writeonebestkey=HifstConstants::kLmbrWriteonebest, const std::string &alphakey=HifstConstants::kLmbrAlpha, const std::string &wpskey=HifstConstants::kLmbrWps, const std::string &minorder=HifstConstants::kLmbrMinorder, const std::string &maxorder=HifstConstants::kLmbrMaxorder, const std::string &unigramprecisionkey=HifstConstants::kLmbrP, const std::string &precisionratiokey=HifstConstants::kLmbrR, const std::string &numberunigramtokenskey=HifstConstants::kLmbrT, const std::string &preprunekey=HifstConstants::kLmbrPreprune, const std::string &lexstdarckey=HifstConstants::kLmbrLexstdarc)
Constructor using multiple keys that can be arranged so to use different parameter names...
uint extractNGrams(fst::VectorFst< Arc > myfst, std::vector< fst::NGramList > &ngramlist, uint minorder=1, uint maxorder=4)
Interfaces with extractNGrams and generates information in the right format for lmbr classes...
const std::string kLmbrWriteonebest
const std::string kLmbrWps