Cambridge SMT System
task.lmbr.computeposteriors.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Graeme Blackwood, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_LMBR_COMPUTEPOSTERIORS_HPP
16 #define TASK_LMBR_COMPUTEPOSTERIORS_HPP
17 
25 namespace ucam {
26 namespace lmbr {
27 
28 //Functor that computes posteriors over an evidence space
30  //Private variables
31  private:
32  typedef fst::NGramList NGramList;
33  typedef fst::NGramVector NGramVector;
34  typedef fst::WordId WordId;
35 
36  NGramToStateMapper statemapper;
37  const std::vector<NGramList>& ngrams;
38  NGramToPosteriorsMapper posteriors;
39  uint minorder_, maxorder_;
40 
41  //Public methods...
42  public:
44  ComputePosteriors (const std::vector<NGramList>& ng) : minorder_ (1),
45  maxorder_ (4),
46  ngrams (ng) {};
47 
49  inline void setOrders (uint minorder, uint maxorder) {
50  minorder_ = minorder;
51  maxorder_ = maxorder;
52  };
53 
55  inline void operator() (const fst::VectorFst<fst::StdArc>* fstlat) {
56  fastComputePosteriors (fstlat);
57  statemapper.clear();
58  };
59 
62  return posteriors;
63  };
64 
66  inline void WritePosteriors (const std::string& filename) {
67  LINFO ("writing posterior probabilities: " << filename);
68  ucam::util::oszfstream ofs (filename.data() );
69  WritePosteriors (ofs);
70  ofs.close();
71  };
72 
75  for (uint n = minorder_; n <= maxorder_; ++n) {
76  for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
77  ++it) {
78  fst::NGram w = it->first;
79  ofs << std::fixed << std::setprecision (12) << posteriors[w][0][0];
80  using ucam::util::operator<<;
81  ofs << "\t" << w << '\n';
82  }
83  }
84  };
85 
86  //Private methods
87  private:
88 
90  fst::StdArc::StateId GetState (const fst::NGram& w) {
91  NGramToStateMapper::iterator it = statemapper.find (w);
92  if (it != statemapper.end() ) {
93  return it->second;
94  }
95  return -1;
96  };
97 
99  fst::VectorFst<fst::StdArc>* ConvertToPosteriors (const
100  fst::VectorFst<fst::StdArc>* fst) {
101  fst::VectorFst<fst::StdArc> tmp1 (*fst);
102  fst::RmEpsilon (&tmp1);
103  fst::VectorFst<fst::StdArc> tmp2;
104  fst::Determinize (tmp1, &tmp2);
105  fst::Minimize (&tmp2);
106  fst::VectorFst<fst::LogArc>* tmp3 = StdToLog (&tmp2);
107  fst::VectorFst<fst::LogArc> tmp4;
108  fst::Push<fst::LogArc, fst::REWEIGHT_TO_FINAL> (*tmp3, &tmp4,
109  fst::kPushWeights);
110  delete tmp3;
111  fst::VectorFst<fst::StdArc>* tmp5 = LogToStd (&tmp4);
112  SetFinalStateCost (tmp5, fst::StdArc::Weight::One() );
113  fst::VectorFst<fst::StdArc>* fstret = new fst::VectorFst<fst::StdArc> (*tmp5);
114  delete tmp5;
115  return fstret;
116  };
117 
119  void fastComputePosteriors (const fst::VectorFst<fst::StdArc>* fstlat) {
120  LINFO ("computing posteriors: fast mode (vector-based)" );
121  fst::VectorFst<fst::StdArc>* fsttmp = ConvertToPosteriors (fstlat);
122  initializeStateMap();
123  initializePosteriorsTable();
124  for (uint n = minorder_; n <= maxorder_; ++n) {
125  FastComputePosteriorsVectorForward (fsttmp, n);
126  }
127  delete fsttmp;
128  }
129 
131  inline void initializeStateMap() {
132  statemapper.clear();
133  fst::NGram n;
134  statemapper[n] = 0;
135  }
136 
137  void initializePosteriorsTable() {
138  for (uint n = minorder_; n <= maxorder_; ++n) {
139  for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
140  ++it) {
141  posteriors[it->first].resize (1);
142  posteriors[it->first][0].resize (1);
143  }
144  }
145  }
146 
148  void FastComputePosteriorsVectorForward (const fst::VectorFst<fst::StdArc>*
149  fstlat, const uint n) {
150  LINFO ("fast compute posteriors: order=" << n );
151  // NGramVector ngs(1, 0);
152  NGramVector ngs (1);
153  for (NGramList::const_iterator it = ngrams[n].begin(); it != ngrams[n].end();
154  ++it) {
155  ngs.push_back (it->first);
156  }
157  if (ngs.size() == 1) {
158  return;
159  }
160  fst::LogArc::Label kSpecialLabel = ngs.size();
161  fst::VectorFst<fst::LogArc>* fsttmp1 = StdToLog (fstlat);
162  LINFO ("map lattice to order " << n );
163  fst::VectorFst<fst::LogArc>* fsttmp2 = MapToHigherOrderLattice (fsttmp1, ngs,
164  n);
165  delete fsttmp1;
166  LINFO ("Number of states=" << fsttmp2->NumStates() );
167  ForwardComputePosteriors (fsttmp2, ngs);
168  delete fsttmp2;
169  }
170 
173  void ForwardComputePosteriors (fst::VectorFst<fst::LogArc>* fst,
174  const NGramVector& ngs) {
175  typedef std::map<fst::LogArc::Label, fst::LogArc::Weight> LabelToWeightMapper;
176  std::vector<fst::LogArc::Weight> fwdAlpha;
177  std::vector<LabelToWeightMapper> ngmAlpha;
178  LabelToWeightMapper ngmAlphaFinal;
179  TopSort (fst);
180  ngmAlpha.resize (fst->NumStates() );
181  fwdAlpha.resize (fst->NumStates(), fst::LogArc::Weight::Zero() );
182  fwdAlpha[fst->Start()] = fst::LogArc::Weight::One();
183  for (fst::StateIterator< fst::VectorFst<fst::LogArc> > si (*fst); !si.Done();
184  si.Next() ) {
185  fst::LogArc::StateId q = si.Value();
186  for (fst::ArcIterator< fst::VectorFst<fst::LogArc> > ai (*fst, q); !ai.Done();
187  ai.Next() ) {
188  fst::LogArc a = ai.Value();
189  fwdAlpha[a.nextstate] = Plus (fwdAlpha[a.nextstate], Times (fwdAlpha[q],
190  a.weight) );
191  if (ngmAlpha[a.nextstate].find (a.ilabel) == ngmAlpha[a.nextstate].end() ) {
192  ngmAlpha[a.nextstate][a.ilabel] = fst::LogArc::Weight::Zero();
193  }
194  ngmAlpha[a.nextstate][a.ilabel] = Plus (ngmAlpha[a.nextstate][a.ilabel],
195  Times (fwdAlpha[q], a.weight) );
196  for (LabelToWeightMapper::const_iterator it = ngmAlpha[q].begin();
197  it != ngmAlpha[q].end(); ++it) {
198  if (it->first != a.ilabel) {
199  if (ngmAlpha[a.nextstate].find (it->first) == ngmAlpha[a.nextstate].end() ) {
200  ngmAlpha[a.nextstate][it->first] = fst::LogArc::Weight::Zero();
201  }
202  ngmAlpha[a.nextstate][it->first] = Plus (ngmAlpha[a.nextstate][it->first],
203  Times (it->second, a.weight) );
204  }
205  }
206  }
207  if (fst->Final (q) != fst::LogArc::Weight::Zero() ) {
208  for (LabelToWeightMapper::const_iterator it = ngmAlpha[q].begin();
209  it != ngmAlpha[q].end(); ++it) {
210  if (ngmAlphaFinal.find (it->first) == ngmAlphaFinal.end() ) {
211  ngmAlphaFinal[it->first] = fst::LogArc::Weight::Zero();
212  }
213  ngmAlphaFinal[it->first] = Plus (ngmAlphaFinal[it->first],
214  Times (ngmAlpha[q][it->first], fst->Final (q) ) );
215  }
216  }
217  ngmAlpha[q].clear();
218  }
219  for (LabelToWeightMapper::const_iterator it = ngmAlphaFinal.begin();
220  it != ngmAlphaFinal.end(); ++it) {
221  posteriors[ngs[it->first]][0][0] = exp (-1.0f * it->second.Value() );
222  }
223  }
224 
226  template <class Arc>
227  fst::VectorFst<Arc>* MapToHigherOrderLattice (const fst::VectorFst<Arc>* fstlat,
228  const NGramVector& ngs, const uint order) {
229  fst::VectorFst<Arc>* fsttmp1 = new fst::VectorFst<Arc> (*fstlat);
230  fst::VectorFst<Arc>* fsttmp2 = MakeOrderMappingTransducer<Arc> (ngs, order);
231  fst::VectorFst<Arc>* fsttmp3 = new fst::VectorFst<Arc>;
232  Compose (*fsttmp1, *fsttmp2, fsttmp3);
233  delete fsttmp1;
234  delete fsttmp2;
235  fst::Project (fsttmp3, fst::PROJECT_OUTPUT);
236  fst::RmEpsilon (fsttmp3);
237  fst::VectorFst<Arc>* fsttmp4 = new fst::VectorFst<Arc>;
238  fst::Determinize (*fsttmp3, fsttmp4);
239  delete fsttmp3;
240  fst::Minimize (fsttmp4);
241  return fsttmp4;
242  }
243 
245  template <class Arc>
246  void MakeOrderMappingTransducerHistory (fst::VectorFst<Arc>* fst,
247  const fst::NGram& h) {
248  typedef typename Arc::StateId StateId;
249  typedef typename Arc::Weight Weight;
250  StateId src = fst->Start();
251  StateId trg = fst->Start();
252  for (uint i = 0; i < h.size(); ++i) {
253  trg = fst->AddState();
254  fst->AddArc (src, Arc (h[i], kEpsLabel, Weight::One(), trg) );
255  src = trg;
256  if (i + 1 == h.size() ) {
257  fst->SetFinal (trg, Weight::One() );
258  }
259  }
260  statemapper[h] = trg;
261  }
262 
264  template <class Arc>
265  fst::VectorFst<Arc>* MakeOrderMappingTransducer (const NGramVector& ngs,
266  const uint order) {
267  typedef typename Arc::StateId StateId;
268  typedef typename Arc::Weight Weight;
269  fst::VectorFst<Arc>* fst = new fst::VectorFst<Arc>;
270  StateId start = fst->AddState();
271  fst->SetStart (start);
272  fst::NGram w;
273  statemapper[w] = start;
274  for (uint i = 1; i < ngs.size(); ++i) {
275  fst::NGram h = ngs[i];
276  // h.pop_back();
277  h.resize (h.size() - 1);
278  if (GetState (h) == -1) {
279  MakeOrderMappingTransducerHistory (fst, h);
280  }
281  }
282  for (uint i = 1; i < ngs.size(); ++i) {
283  fst::NGram w = ngs[i];
284  fst::NGram h (w.begin(), w.end() - 1); // NGram history at source
285  fst::NGram t (w.begin() + 1, w.end() ); // NGram history at target
286  StateId src = GetState (h);
287  StateId trg = GetState (t);
288  if (trg == -1) {
289  trg = fst->AddState();
290  statemapper[t] = trg;
291  }
292  // WordId wid = w.back();
293  WordId wid = w[w.size() - 1];
294  fst->AddArc (src, Arc (wid, i, Arc::Weight::One(), trg) );
295  fst->SetFinal (trg, Arc::Weight::One() );
296  }
297  fst::ArcSort (fst, fst::ILabelCompare<Arc>() );
298  return fst;
299  }
300 
301 };
302 
303 }
304 } // end namespaces
305 
306 #endif
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
fst::VectorFst< fst::LogArc > * StdToLog(const fst::VectorFst< fst::StdArc > *fst)
Definition: fstutils.hpp:371
void WritePosteriors(const std::string &filename)
Write posteriors to file.
fst::VectorFst< fst::StdArc > * LogToStd(const fst::VectorFst< fst::LogArc > *fst)
Definition: fstutils.hpp:378
#define LINFO(msg)
Definition: fstio.hpp:27
std::vector< NGram > NGramVector
NGramToPosteriorsMapper & getPosteriors()
Retrieve reference to posteriors.
const unsigned kEpsLabel
Definition: data.lmbr.hpp:23
void SetFinalStateCost(fst::MutableFst< fst::StdArc > *fst, const fst::StdArc::Weight cost)
Definition: fstutils.hpp:445
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
Definition: data.lmbr.hpp:35
unsigned WordId
TropicalSparseTupleWeight< T > Plus(const TropicalSparseTupleWeight< T > &vw1, const TropicalSparseTupleWeight< T > &vw2)
void operator()(const fst::VectorFst< fst::StdArc > *fstlat)
Compute posteriors.
std::basic_string< WordId > NGram
void setOrders(uint minorder, uint maxorder)
Set order.
void WritePosteriors(ucam::util::oszfstream &ofs)
Write posteriors to a szfstream.
TropicalSparseTupleWeight< T > Times(const TropicalSparseTupleWeight< T > &w1, const TropicalSparseTupleWeight< T > &w2)
std::unordered_map< NGram, StdArc::Weight, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramList
unordered_map< fst::NGram, fst::StdArc::StateId, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToStateMapper
Definition: data.lmbr.hpp:31
ComputePosteriors(const std::vector< NGramList > &ng)
Initialize from list of ngrams.
Definition: bleu.hpp:14