Cambridge SMT System
task.lmbr.applyposteriors.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Graeme Blackwood, Adrià de Gispert, William Byrne
14 
15 #ifndef TASK_LMBR_APPLYPOSTERIORS
16 #define TASK_LMBR_APPLYPOSTERIORS
17 
25 namespace ucam {
26 namespace lmbr {
27 
29 class Theta {
30  private:
31  std::vector<fst::StdArc::Weight> theta_;
32  public:
33 
34  //Constructor with no p,r,T defined -- ngram factors disabled
35  Theta (unsigned minorder = 1, unsigned maxorder = 4) {
36  theta_.resize (maxorder + 1);
37  theta_[0] = 0.0f;
38  for (int n = minorder; n <= maxorder; n++) {
39  theta_[n] = 1.0f;
40  }
41  LINFO ( "ngram factors disabled");
42  LINFO ( "theta[0]=" << std::fixed << std::setprecision (10) << theta_[0] );
43  for (int n = minorder; n <= maxorder; n++) {
44  LINFO ( "theta[" << n << "]=" << std::fixed << std::setprecision (
45  10) << theta_[n] );
46  }
47  }
48 
50  Theta (float p, float r, float T, unsigned minorder = 1,
51  unsigned maxorder = 4) {
52  theta_.resize (maxorder + 1);
53  if (T != 0 && p != 0 && r != 0) {
54  LINFO ( "T=" << T );
55  LINFO ( "p=" << std::fixed << std::setprecision (4) << p );
56  LINFO ( "r=" << std::fixed << std::setprecision (4) << r );
57  theta_[0] = -1 / T;
58  for (int n = minorder; n <= maxorder; n++) {
59  theta_[n] = 1 / (4 * T * p * pow (r, n - 1) );
60  }
61  } else {
62  theta_[0] = 0.0f;
63  for (int n = minorder; n <= maxorder; n++) {
64  theta_[n] = 1.0f;
65  }
66  }
67  LINFO ( "theta[0]=" << std::fixed << std::setprecision (10) << theta_[0] );
68  for (int n = minorder; n <= maxorder; n++) {
69  LINFO ( "theta[" << n << "]=" << std::fixed << std::setprecision (
70  10) << theta_[n] );
71  }
72  };
73  inline fst::StdArc::Weight const& operator() (unsigned k) {
74  return theta_[k];
75  }
76 
77 };
78 
81 
82  private:
83  typedef fst::NGramList NGramList;
84  typedef fst::WordId WordId;
85 
86  NGramToStateMapper statemapper;
87  Wlist vocab;
88  std::vector<NGramList>& hs_ngrams_;
89  Theta& theta_;
90  NGramToPosteriorsMapper& posteriors_;
91 
92  float wps_;
93  //Lmbr output...
94  fst::VectorFst<fst::StdArc> lmbroutput_;
95  unsigned minorder_;
96  unsigned maxorder_;
97 
98  //Public methods
99  public:
103  ApplyPosteriors ( std::vector<NGramList>& ng,
105  Theta& theta,
106  unsigned minorder = 1,
107  unsigned maxorder = 4) :
108  minorder_ (minorder),
109  maxorder_ (maxorder),
110  posteriors_ (pst),
111  hs_ngrams_ (ng),
112  theta_ (theta) {
113  };
114 
116  fst::VectorFst<fst::StdArc> *operator() (fst::VectorFst<fst::StdArc> const
117  &fsthyp) {
118  LINFO ("decoding...");
119  fst::VectorFst<fst::StdArc> aux;
120  fst::Map (fsthyp, &aux, fst::TimesMapper<fst::StdArc> (theta_ (0) ) );
121  SetFinalStateCost (&aux, fst::StdArc::Weight::One() );
122  fst::VectorFst<fst::StdArc>* fstmbr = NULL;
123  LINFO ("NS=" << aux.NumStates() );
124  fstmbr = fastApplyPosteriors (aux);
125  fst::VectorFst<fst::StdArc> *fstmax = FstScaleWeights (fstmbr, -1);
126  delete fstmbr;
127  return fstmax;
128  };
129 
130  //Private methods
131  private:
133  fst::StdArc::StateId GetState (const fst::NGram& w) {
134  NGramToStateMapper::iterator it = statemapper.find (w);
135  if (it != statemapper.end() ) {
136  return it->second;
137  }
138  return -1;
139  };
140 
142  void initializeStateMap() {
143  fst::NGram w;
144  statemapper.clear();
145  statemapper[w] = 0;
146  }
147 
149  void applyPosteriorsEx (fst::MutableFst<fst::StdArc>* fstlat,
150  const unsigned n) {
151  fst::MutableFst<fst::StdArc>* fsttmp = fstlat->Copy();
152  fst::VectorFst<fst::StdArc>* fstpst = makePosteriorsFST (n);
153  fst::ArcSort (fstpst, fst::ILabelCompare<fst::StdArc>() );
154  fst::Compose (*fsttmp, *fstpst, fstlat);
155  delete fsttmp;
156  delete fstpst;
157  }
158 
160  fst::VectorFst<fst::StdArc>* fastApplyPosteriors (const
161  fst::VectorFst<fst::StdArc>& fsthyp) {
162  LINFO ("fast decoding enabled");
163  initializeStateMap();
164  fst::VectorFst<fst::StdArc>* fsttmp = fsthyp.Copy();
165  for (unsigned n = maxorder_; n >= minorder_; --n) {
166  if (hs_ngrams_[n].size() > 0) {
167  applyPosteriorsEx (fsttmp, n);
168  }
169  }
170  return fsttmp;
171  }
172 
174  fst::VectorFst<fst::StdArc>* makeUnigramPosteriorsFST() {
175  fst::VectorFst<fst::StdArc>* fst = new fst::VectorFst<fst::StdArc>;
176  fst::StdArc::StateId startState = fst->AddState();
177  fst->SetStart (startState);
178  for (NGramList::const_iterator it = hs_ngrams_[1].begin();
179  it != hs_ngrams_[1].end(); ++it) {
180  fst::NGram w = it->first;
181  fst::StdArc::Weight p = 0;
182  if (posteriors_.find (w) != posteriors_.end() ) p = posteriors_[w][0][0] *
183  theta_
184  (1).Value();
185  fst->AddArc (startState, fst::StdArc (w[0], w[0], p, startState) );
186  }
187  fst->SetFinal (startState, fst::StdArc::Weight::One() );
188  LINFO (std::setw (6) << hs_ngrams_[1].size() << " 1-ngram gain(s) applied");
189  return fst;
190  }
191 
193  fst::VectorFst<fst::StdArc>* makePosteriorsFST (const unsigned n) {
194  if (n == 1) {
195  return makeUnigramPosteriorsFST();
196  }
197  fst::VectorFst<fst::StdArc>* fst = new fst::VectorFst<fst::StdArc>;
198  fst::StdArc::StateId startState = fst->AddState();
199  fst->SetStart (startState);
200  for (NGramList::const_iterator it = hs_ngrams_[n].begin();
201  it != hs_ngrams_[n].end(); ++it) {
202  fst::NGram h = it->first;
203  // h.pop_back();
204  h.resize (h.size() - 1);
205  if (GetState (h) == -1) {
206  MakeHistory (fst, h);
207  }
208  }
209  for (NGramList::const_iterator it = hs_ngrams_[n].begin();
210  it != hs_ngrams_[n].end(); ++it) {
211  fst::NGram w = it->first;
212  fst::NGram h, t;
213  h.assign ( w.begin(), w.end() - 1);
214  t.assign ( w.begin() + 1, w.end() );
215  fst::StdArc::StateId src = GetState (h);
216  fst::StdArc::StateId trg = GetState (t);
217  if (trg == -1) {
218  trg = fst->AddState();
219  fst->SetFinal (trg,
220  fst::StdArc::Weight::One() ); //Gonzalo: Set all states to final.
221  statemapper[t] = trg;
222  }
223  WordId wid = t[t.size() - 1];
224  fst::StdArc::Weight p = 0;
225  if (posteriors_.find (w) != posteriors_.end() ) p = posteriors_[w][0][0] *
226  theta_
227  (n).Value();
228  fst->AddArc (src, fst::StdArc (wid, wid, p, trg) );
229  fst->SetFinal (trg, fst::StdArc::Weight::One() );
230  }
231  LINFO (std::setw (6) << hs_ngrams_[n].size() << " " << n <<
232  "-ngram gain(s) applied");
233  return fst;
234  }
235 
237  void MakeHistory (fst::VectorFst<fst::StdArc>* fst, const fst::NGram& h) {
238  fst::StdArc::StateId src = fst->Start();
239  fst::StdArc::StateId trg;
240  for (fst::NGram::const_iterator it = h.begin(); it != h.end(); ++it) {
241  WordId wid = *it;
242  trg = fst->AddState();
243  fst->AddArc (src, fst::StdArc (wid, wid, fst::StdArc::Weight::One(), trg) );
244  src = trg;
245  }
246  statemapper[h] = trg;
247  }
248 };
249 
250 }
251 } // end namespaces
252 
253 #endif //TASK_LMBR_APPLYPOSTERIORS
Theta(unsigned minorder=1, unsigned maxorder=4)
unordered_set< fst::WordId > Wlist
Definition: data.lmbr.hpp:25
#define LINFO(msg)
Definition: fstio.hpp:27
void SetFinalStateCost(fst::MutableFst< fst::StdArc > *fst, const fst::StdArc::Weight cost)
Definition: fstutils.hpp:445
fst::TropicalWeightTpl< F > Map(double)
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
Definition: data.lmbr.hpp:35
unsigned WordId
Functor handling LMBR theta parameters.
std::basic_string< WordId > NGram
fst::VectorFst< fst::StdArc > * FstScaleWeights(fst::VectorFst< fst::StdArc > *fst, const double scale)
Definition: fstutils.hpp:387
ApplyPosteriors(std::vector< NGramList > &ng, NGramToPosteriorsMapper &pst, Theta &theta, unsigned minorder=1, unsigned maxorder=4)
Constructor: initializes functor with theta, min/max order, the ngrams and the posteriors.
std::unordered_map< NGram, StdArc::Weight, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramList
unordered_map< fst::NGram, fst::StdArc::StateId, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToStateMapper
Definition: data.lmbr.hpp:31
fst::StdArc::Weight const & operator()(unsigned k)
Functor that applies posteriors to any hypothesis space. Initializes with previously calculated poste...
Theta(float p, float r, float T, unsigned minorder=1, unsigned maxorder=4)
Constructor – Theta parameters defined from p,R,T,minorder, maxorder.
Definition: bleu.hpp:14