15 #ifndef TASK_LMBR_COMPUTEPOSTERIORS_HPP 16 #define TASK_LMBR_COMPUTEPOSTERIORS_HPP 37 const std::vector<NGramList>& ngrams;
39 uint minorder_, maxorder_;
49 inline void setOrders (uint minorder, uint maxorder) {
55 inline void operator() (
const fst::VectorFst<fst::StdArc>* fstlat) {
56 fastComputePosteriors (fstlat);
67 LINFO (
"writing posterior probabilities: " << filename);
75 for (uint n = minorder_; n <= maxorder_; ++n) {
76 for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
79 ofs << std::fixed << std::setprecision (12) << posteriors[w][0][0];
80 using ucam::util::operator<<;
81 ofs <<
"\t" << w <<
'\n';
90 fst::StdArc::StateId GetState (
const fst::NGram& w) {
91 NGramToStateMapper::iterator it = statemapper.find (w);
92 if (it != statemapper.end() ) {
99 fst::VectorFst<fst::StdArc>* ConvertToPosteriors (
const 100 fst::VectorFst<fst::StdArc>*
fst) {
101 fst::VectorFst<fst::StdArc> tmp1 (*fst);
102 fst::RmEpsilon (&tmp1);
103 fst::VectorFst<fst::StdArc> tmp2;
104 fst::Determinize (tmp1, &tmp2);
105 fst::Minimize (&tmp2);
106 fst::VectorFst<fst::LogArc>* tmp3 =
StdToLog (&tmp2);
107 fst::VectorFst<fst::LogArc> tmp4;
108 fst::Push<fst::LogArc, fst::REWEIGHT_TO_FINAL> (*tmp3, &tmp4,
111 fst::VectorFst<fst::StdArc>* tmp5 =
LogToStd (&tmp4);
113 fst::VectorFst<fst::StdArc>* fstret =
new fst::VectorFst<fst::StdArc> (*tmp5);
119 void fastComputePosteriors (
const fst::VectorFst<fst::StdArc>* fstlat) {
120 LINFO (
"computing posteriors: fast mode (vector-based)" );
121 fst::VectorFst<fst::StdArc>* fsttmp = ConvertToPosteriors (fstlat);
122 initializeStateMap();
123 initializePosteriorsTable();
124 for (uint n = minorder_; n <= maxorder_; ++n) {
125 FastComputePosteriorsVectorForward (fsttmp, n);
131 inline void initializeStateMap() {
137 void initializePosteriorsTable() {
138 for (uint n = minorder_; n <= maxorder_; ++n) {
139 for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
141 posteriors[it->first].resize (1);
142 posteriors[it->first][0].resize (1);
148 void FastComputePosteriorsVectorForward (
const fst::VectorFst<fst::StdArc>*
149 fstlat,
const uint n) {
150 LINFO (
"fast compute posteriors: order=" << n );
153 for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
155 ngs.push_back (it->first);
157 if (ngs.size() == 1) {
160 fst::LogArc::Label kSpecialLabel = ngs.size();
161 fst::VectorFst<fst::LogArc>* fsttmp1 =
StdToLog (fstlat);
162 LINFO (
"map lattice to order " << n );
163 fst::VectorFst<fst::LogArc>* fsttmp2 = MapToHigherOrderLattice (fsttmp1, ngs,
166 LINFO (
"Number of states=" << fsttmp2->NumStates() );
167 ForwardComputePosteriors (fsttmp2, ngs);
173 void ForwardComputePosteriors (fst::VectorFst<fst::LogArc>* fst,
174 const NGramVector& ngs) {
175 typedef std::map<fst::LogArc::Label, fst::LogArc::Weight> LabelToWeightMapper;
176 std::vector<fst::LogArc::Weight> fwdAlpha;
177 std::vector<LabelToWeightMapper> ngmAlpha;
178 LabelToWeightMapper ngmAlphaFinal;
180 ngmAlpha.resize (fst->NumStates() );
181 fwdAlpha.resize (fst->NumStates(), fst::LogArc::Weight::Zero() );
182 fwdAlpha[fst->Start()] = fst::LogArc::Weight::One();
183 for (fst::StateIterator< fst::VectorFst<fst::LogArc> > si (*fst); !si.Done();
185 fst::LogArc::StateId q = si.Value();
186 for (fst::ArcIterator< fst::VectorFst<fst::LogArc> > ai (*fst, q); !ai.Done();
188 fst::LogArc a = ai.Value();
189 fwdAlpha[a.nextstate] =
Plus (fwdAlpha[a.nextstate],
Times (fwdAlpha[q],
191 if (ngmAlpha[a.nextstate].find (a.ilabel) == ngmAlpha[a.nextstate].end() ) {
192 ngmAlpha[a.nextstate][a.ilabel] = fst::LogArc::Weight::Zero();
194 ngmAlpha[a.nextstate][a.ilabel] =
Plus (ngmAlpha[a.nextstate][a.ilabel],
195 Times (fwdAlpha[q], a.weight) );
196 for (LabelToWeightMapper::const_iterator it = ngmAlpha[q].begin();
197 it != ngmAlpha[q].end(); ++it) {
198 if (it->first != a.ilabel) {
199 if (ngmAlpha[a.nextstate].find (it->first) == ngmAlpha[a.nextstate].end() ) {
200 ngmAlpha[a.nextstate][it->first] = fst::LogArc::Weight::Zero();
202 ngmAlpha[a.nextstate][it->first] =
Plus (ngmAlpha[a.nextstate][it->first],
203 Times (it->second, a.weight) );
207 if (fst->Final (q) != fst::LogArc::Weight::Zero() ) {
208 for (LabelToWeightMapper::const_iterator it = ngmAlpha[q].begin();
209 it != ngmAlpha[q].end(); ++it) {
210 if (ngmAlphaFinal.find (it->first) == ngmAlphaFinal.end() ) {
211 ngmAlphaFinal[it->first] = fst::LogArc::Weight::Zero();
213 ngmAlphaFinal[it->first] =
Plus (ngmAlphaFinal[it->first],
214 Times (ngmAlpha[q][it->first], fst->Final (q) ) );
219 for (LabelToWeightMapper::const_iterator it = ngmAlphaFinal.begin();
220 it != ngmAlphaFinal.end(); ++it) {
221 posteriors[ngs[it->first]][0][0] = exp (-1.0f * it->second.Value() );
227 fst::VectorFst<Arc>* MapToHigherOrderLattice (
const fst::VectorFst<Arc>* fstlat,
228 const NGramVector& ngs,
const uint order) {
229 fst::VectorFst<Arc>* fsttmp1 =
new fst::VectorFst<Arc> (*fstlat);
230 fst::VectorFst<Arc>* fsttmp2 = MakeOrderMappingTransducer<Arc> (ngs, order);
231 fst::VectorFst<Arc>* fsttmp3 =
new fst::VectorFst<Arc>;
232 Compose (*fsttmp1, *fsttmp2, fsttmp3);
235 fst::Project (fsttmp3, fst::PROJECT_OUTPUT);
236 fst::RmEpsilon (fsttmp3);
237 fst::VectorFst<Arc>* fsttmp4 =
new fst::VectorFst<Arc>;
238 fst::Determinize (*fsttmp3, fsttmp4);
240 fst::Minimize (fsttmp4);
246 void MakeOrderMappingTransducerHistory (fst::VectorFst<Arc>* fst,
248 typedef typename Arc::StateId StateId;
249 typedef typename Arc::Weight Weight;
250 StateId src = fst->Start();
251 StateId trg = fst->Start();
252 for (uint i = 0; i < h.size(); ++i) {
253 trg = fst->AddState();
254 fst->AddArc (src, Arc (h[i],
kEpsLabel, Weight::One(), trg) );
256 if (i + 1 == h.size() ) {
257 fst->SetFinal (trg, Weight::One() );
260 statemapper[h] = trg;
265 fst::VectorFst<Arc>* MakeOrderMappingTransducer (
const NGramVector& ngs,
267 typedef typename Arc::StateId StateId;
268 typedef typename Arc::Weight Weight;
269 fst::VectorFst<Arc>* fst =
new fst::VectorFst<Arc>;
270 StateId start = fst->AddState();
271 fst->SetStart (start);
273 statemapper[w] = start;
274 for (uint i = 1; i < ngs.size(); ++i) {
277 h.resize (h.size() - 1);
278 if (GetState (h) == -1) {
279 MakeOrderMappingTransducerHistory (fst, h);
282 for (uint i = 1; i < ngs.size(); ++i) {
286 StateId src = GetState (h);
287 StateId trg = GetState (t);
289 trg = fst->AddState();
290 statemapper[t] = trg;
293 WordId wid = w[w.size() - 1];
294 fst->AddArc (src, Arc (wid, i, Arc::Weight::One(), trg) );
295 fst->SetFinal (trg, Arc::Weight::One() );
297 fst::ArcSort (fst, fst::ILabelCompare<Arc>() );
Wrapper stream class that writes to pipes, text files or gzipped files.
fst::VectorFst< fst::LogArc > * StdToLog(const fst::VectorFst< fst::StdArc > *fst)
void WritePosteriors(const std::string &filename)
Write posteriors to file.
fst::VectorFst< fst::StdArc > * LogToStd(const fst::VectorFst< fst::LogArc > *fst)
std::vector< NGram > NGramVector
NGramToPosteriorsMapper & getPosteriors()
Retrieve reference to posteriors.
void SetFinalStateCost(fst::MutableFst< fst::StdArc > *fst, const fst::StdArc::Weight cost)
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
TropicalSparseTupleWeight< T > Plus(const TropicalSparseTupleWeight< T > &vw1, const TropicalSparseTupleWeight< T > &vw2)
void operator()(const fst::VectorFst< fst::StdArc > *fstlat)
Compute posteriors.
std::basic_string< WordId > NGram
void setOrders(uint minorder, uint maxorder)
Set order.
void WritePosteriors(ucam::util::oszfstream &ofs)
Write posteriors to a szfstream.
TropicalSparseTupleWeight< T > Times(const TropicalSparseTupleWeight< T > &w1, const TropicalSparseTupleWeight< T > &w2)
std::unordered_map< NGram, StdArc::Weight, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramList
unordered_map< fst::NGram, fst::StdArc::StateId, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToStateMapper
ComputePosteriors(const std::vector< NGramList > &ng)
Initialize from list of ngrams.