15 #ifndef TASK_LMBR_APPLYPOSTERIORS 16 #define TASK_LMBR_APPLYPOSTERIORS 31 std::vector<fst::StdArc::Weight> theta_;
35 Theta (
unsigned minorder = 1,
unsigned maxorder = 4) {
36 theta_.resize (maxorder + 1);
38 for (
int n = minorder; n <= maxorder; n++) {
41 LINFO (
"ngram factors disabled");
42 LINFO (
"theta[0]=" << std::fixed << std::setprecision (10) << theta_[0] );
43 for (
int n = minorder; n <= maxorder; n++) {
44 LINFO (
"theta[" << n <<
"]=" << std::fixed << std::setprecision (
50 Theta (
float p,
float r,
float T,
unsigned minorder = 1,
51 unsigned maxorder = 4) {
52 theta_.resize (maxorder + 1);
53 if (T != 0 && p != 0 && r != 0) {
55 LINFO (
"p=" << std::fixed << std::setprecision (4) << p );
56 LINFO (
"r=" << std::fixed << std::setprecision (4) << r );
58 for (
int n = minorder; n <= maxorder; n++) {
59 theta_[n] = 1 / (4 * T * p * pow (r, n - 1) );
63 for (
int n = minorder; n <= maxorder; n++) {
67 LINFO (
"theta[0]=" << std::fixed << std::setprecision (10) << theta_[0] );
68 for (
int n = minorder; n <= maxorder; n++) {
69 LINFO (
"theta[" << n <<
"]=" << std::fixed << std::setprecision (
73 inline fst::StdArc::Weight
const&
operator() (
unsigned k) {
88 std::vector<NGramList>& hs_ngrams_;
94 fst::VectorFst<fst::StdArc> lmbroutput_;
106 unsigned minorder = 1,
107 unsigned maxorder = 4) :
108 minorder_ (minorder),
109 maxorder_ (maxorder),
116 fst::VectorFst<fst::StdArc> *
operator() (fst::VectorFst<fst::StdArc>
const 118 LINFO (
"decoding...");
119 fst::VectorFst<fst::StdArc> aux;
120 fst::Map (fsthyp, &aux, fst::TimesMapper<fst::StdArc> (theta_ (0) ) );
122 fst::VectorFst<fst::StdArc>* fstmbr = NULL;
123 LINFO (
"NS=" << aux.NumStates() );
124 fstmbr = fastApplyPosteriors (aux);
133 fst::StdArc::StateId GetState (
const fst::NGram& w) {
134 NGramToStateMapper::iterator it = statemapper.find (w);
135 if (it != statemapper.end() ) {
142 void initializeStateMap() {
149 void applyPosteriorsEx (fst::MutableFst<fst::StdArc>* fstlat,
151 fst::MutableFst<fst::StdArc>* fsttmp = fstlat->Copy();
152 fst::VectorFst<fst::StdArc>* fstpst = makePosteriorsFST (n);
153 fst::ArcSort (fstpst, fst::ILabelCompare<fst::StdArc>() );
154 fst::Compose (*fsttmp, *fstpst, fstlat);
160 fst::VectorFst<fst::StdArc>* fastApplyPosteriors (
const 161 fst::VectorFst<fst::StdArc>& fsthyp) {
162 LINFO (
"fast decoding enabled");
163 initializeStateMap();
164 fst::VectorFst<fst::StdArc>* fsttmp = fsthyp.Copy();
165 for (
unsigned n = maxorder_; n >= minorder_; --n) {
166 if (hs_ngrams_[n].size() > 0) {
167 applyPosteriorsEx (fsttmp, n);
174 fst::VectorFst<fst::StdArc>* makeUnigramPosteriorsFST() {
175 fst::VectorFst<fst::StdArc>*
fst =
new fst::VectorFst<fst::StdArc>;
176 fst::StdArc::StateId startState = fst->AddState();
177 fst->SetStart (startState);
178 for (NGramList::const_iterator it = hs_ngrams_[1].begin();
179 it != hs_ngrams_[1].end(); ++it) {
181 fst::StdArc::Weight p = 0;
182 if (posteriors_.find (w) != posteriors_.end() ) p = posteriors_[w][0][0] *
185 fst->AddArc (startState, fst::StdArc (w[0], w[0], p, startState) );
187 fst->SetFinal (startState, fst::StdArc::Weight::One() );
188 LINFO (std::setw (6) << hs_ngrams_[1].size() <<
" 1-ngram gain(s) applied");
193 fst::VectorFst<fst::StdArc>* makePosteriorsFST (
const unsigned n) {
195 return makeUnigramPosteriorsFST();
197 fst::VectorFst<fst::StdArc>*
fst =
new fst::VectorFst<fst::StdArc>;
198 fst::StdArc::StateId startState = fst->AddState();
199 fst->SetStart (startState);
200 for (NGramList::const_iterator it = hs_ngrams_[n].begin();
201 it != hs_ngrams_[n].end(); ++it) {
204 h.resize (h.size() - 1);
205 if (GetState (h) == -1) {
206 MakeHistory (fst, h);
209 for (NGramList::const_iterator it = hs_ngrams_[n].begin();
210 it != hs_ngrams_[n].end(); ++it) {
213 h.assign ( w.begin(), w.end() - 1);
214 t.assign ( w.begin() + 1, w.end() );
215 fst::StdArc::StateId src = GetState (h);
216 fst::StdArc::StateId trg = GetState (t);
218 trg = fst->AddState();
220 fst::StdArc::Weight::One() );
221 statemapper[t] = trg;
223 WordId wid = t[t.size() - 1];
224 fst::StdArc::Weight p = 0;
225 if (posteriors_.find (w) != posteriors_.end() ) p = posteriors_[w][0][0] *
228 fst->AddArc (src, fst::StdArc (wid, wid, p, trg) );
229 fst->SetFinal (trg, fst::StdArc::Weight::One() );
231 LINFO (std::setw (6) << hs_ngrams_[n].size() <<
" " << n <<
232 "-ngram gain(s) applied");
237 void MakeHistory (fst::VectorFst<fst::StdArc>*
fst,
const fst::NGram& h) {
238 fst::StdArc::StateId src = fst->Start();
239 fst::StdArc::StateId trg;
240 for (fst::NGram::const_iterator it = h.begin(); it != h.end(); ++it) {
242 trg = fst->AddState();
243 fst->AddArc (src, fst::StdArc (wid, wid, fst::StdArc::Weight::One(), trg) );
246 statemapper[h] = trg;
253 #endif //TASK_LMBR_APPLYPOSTERIORS Theta(unsigned minorder=1, unsigned maxorder=4)
unordered_set< fst::WordId > Wlist
void SetFinalStateCost(fst::MutableFst< fst::StdArc > *fst, const fst::StdArc::Weight cost)
fst::TropicalWeightTpl< F > Map(double)
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
Functor handling LMBR theta parameters.
std::basic_string< WordId > NGram
fst::VectorFst< fst::StdArc > * FstScaleWeights(fst::VectorFst< fst::StdArc > *fst, const double scale)
ApplyPosteriors(std::vector< NGramList > &ng, NGramToPosteriorsMapper &pst, Theta &theta, unsigned minorder=1, unsigned maxorder=4)
Constructor: initializes functor with theta, min/max order, the ngrams and the posteriors.
std::unordered_map< NGram, StdArc::Weight, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramList
unordered_map< fst::NGram, fst::StdArc::StateId, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToStateMapper
fst::StdArc::Weight const & operator()(unsigned k)
Functor that applies posteriors to any hypothesis space. Initializes with previously calculated poste...
Theta(float p, float r, float T, unsigned minorder=1, unsigned maxorder=4)
Constructor – Theta parameters defined from p,R,T,minorder, maxorder.