Cambridge SMT System
lmert.hpp
Go to the documentation of this file.
1 #ifndef LMERT_LMERT_HPP
2 #define LMERT_LMERT_HPP
3 
4 #include <cmath>
5 #include <vector>
6 #include <functional>
7 
8 #include <iostream>
9 #include <sstream>
10 #include <vector>
11 #include <string>
12 #include <limits>
13 #include <cstdlib>
14 #include <unordered_map>
15 #include <bleu.hpp>
16 
17 namespace ucam {
18 namespace lmert {
19 
24 
25 template <class Arc>
26 class MertLine {
27  public:
28  MertLine() : x ( -std::numeric_limits<double>::infinity() ), y ( 0.0 ),
29  m ( 0.0 ) {}
30 
31  MertLine ( double y, double m, Wid word ) :
32  x ( -std::numeric_limits<double>::infinity() ), y ( y ), m ( m ) {}
33 
34  double x; // x-intercept of left-adjacent line
35  double y; // y-intercept of line
36  double m; // slope of line
37  SentenceIdx t; // partial translation hypothesis associated with line
38  double score; //translation score (quality)
39  typename Arc::Weight weight;
40 };
41 
42 template <class Arc>
43 class MertEnvelope {
44  public:
45  // lines that define the envelope / convex hull
46  std::vector<MertLine<Arc> > lines;
47 
48  static bool GradientSortPredicate ( const MertLine<Arc>& line1,
49  const MertLine<Arc>& line2 ) {
50  return line1.m < line2.m;
51  }
52 
54 
55  // sort envelope lines by slope
56  void SortLines() {
57  sort ( lines.begin(), lines.end(), GradientSortPredicate );
58  }
59 
60  // compute upper envelope of lines in array
61  void SweepLine() {
62  SortLines();
63  int j = 0;
64 
65  for ( typename std::vector<MertLine<Arc> >::size_type i = 0; i < lines.size(); i++ ) {
66  MertLine<Arc> l = lines[i];
67  l.x = -std::numeric_limits<double>::infinity();
68  if ( 0 < j ) {
69  if ( lines[j - 1].m == l.m ) {
70  if ( l.y <= lines[j - 1].y )
71  continue;
72  --j;
73  }
74  while ( 0 < j ) {
75  l.x = ( l.y - lines[j - 1].y ) / ( lines[j - 1].m - l.m );
76 
77  if ( lines[j - 1].x < l.x )
78  break;
79 
80  --j;
81  }
82  if ( 0 == j )
83  l.x = -std::numeric_limits<double>::infinity();
84 
85  lines[j++] = l;
86  } else {
87  lines[j++] = l;
88  }
89  }
90  lines.resize ( j );
91  }
92 
93  // returns lines that constitute the envelope as a string
94  std::string ToString ( bool show_hypothesis = false ) {
95  std::ostringstream oss;
96 
97  for ( typename std::vector<MertLine<Arc> >::size_type i = 0; i < lines.size();
98  ++i ) {
99  oss << "line i=[" << std::right << std::setw ( 4 ) << i << "]" << std::fixed
100  << std::setprecision ( 6 ) << " x=[" << std::right << std::setw (
101  12 ) << lines[i].x
102  << "]" << " y=[" << std::right << std::setw ( 12 ) << lines[i].y << "]"
103  << " m=[" << std::right << std::setw ( 12 ) << lines[i].m << "]";
104 
105  if ( show_hypothesis ) {
106  oss << " t=[" << lines[i].t << "]";
107  }
108 
109  oss << " w=[" << lines[i].weight << "]";
110  oss << std::endl;
111  }
112 
113  return oss.str();
114  }
115 
116 };
117 
118 template <class Arc>
119 class MertLattice {
120  public:
122  std::vector<ucam::fsttools::BleuStats> prev;
123 
124  MertLattice (fst::VectorFst<Arc>* fst, const PARAMS32& lambda, const PARAMS32& direction ) :
125  fst_ ( fst ), lambda_ ( lambda ), direction_ ( direction ) {
126  finalEnvelope.lines.clear();
127  // InializeEnvelopes()
128  envelopes_.clear();
129  envelopes_.resize ( fst_->NumStates() + 1 );
130  // InitializeStartState()
131  envelopes_[fst->Start()].lines.push_back ( MertLine<Arc>() );
132  // ComputeStateEnvelopes()
133  for ( fst::StateIterator < fst::VectorFst<Arc> > si ( *fst ); !si.Done();
134  si.Next() ) {
135  const typename Arc::StateId& s = si.Value();
136  envelopes_[s].SweepLine();
137  for ( fst::ArcIterator < fst::VectorFst<Arc> > ai ( *fst, si.Value() );
138  !ai.Done(); ai.Next() ) {
139  const Arc& a = ai.Value();
140  PropagateEnvelope ( s, a.nextstate, a.weight, a.ilabel );
141  }
142  if ( fst->Final ( s ) != Arc::Weight::Zero() ) {
143  PropagateEnvelope ( s, fst->NumStates(), fst->Final ( s ) );
144  }
145  envelopes_[s].lines.clear();
146  }
147  // ComputeFinalEnvelopes()
148  envelopes_[fst->NumStates()].SweepLine();
149  //
150  finalEnvelope = envelopes_[fst->NumStates()];
151  envelopes_[fst->NumStates()].lines.clear();
152  }
153 
154  void PropagateEnvelope ( const typename Arc::StateId& src,
155  const typename Arc::StateId& trg, const typename Arc::Weight& weight,
156  const Wid& w = 0 ) {
157  for ( unsigned int i = 0; i < envelopes_[src].lines.size(); ++i ) {
158  MertLine<Arc> line ( envelopes_[src].lines[i] );
159  line.y += fst::DotProduct<float> ( weight, lambda_ ) * -1;
160  line.m += fst::DotProduct<float> ( weight, direction_ ) * -1;
161  line.weight = fst::Times<float> ( weight, line.weight );
162  if ( w != 0 ) {
163  line.t.push_back ( w );
164  }
165  envelopes_[trg].lines.push_back ( line );
166  }
167  }
168 
169  private:
170  fst::VectorFst<Arc>* fst_;
171  std::vector<MertEnvelope<Arc> > envelopes_;
172  PARAMS32 lambda_;
173  PARAMS32 direction_;
174 };
175 
176 
177 template <class Arc>
179  public:
180  MertLatticeWrap ( Sid sidx, fst::VectorFst<Arc>* fst, const PARAMS32& lambda,
181  const PARAMS32& direction, vector< MertEnvelope<Arc> >& env ) :
182  sid_ ( sidx ), fst_ ( fst ), lambda_ ( lambda ), direction_ ( direction ),
183  env_ ( env ) {}
184 
185  void operator() () {
186  MertLattice<Arc> ml ( fst_, lambda_, direction_ );
187  env_[sid_] = ml.finalEnvelope;
188  };
189 
190  Sid sid_;
191  fst::VectorFst<Arc>* fst_;
192  PARAMS32 lambda_;
193  PARAMS32 direction_;
194  vector< MertEnvelope<Arc> >& env_;
195 };
196 
197 }} // end namespaces
198 #endif
MertLattice(fst::VectorFst< Arc > *fst, const PARAMS32 &lambda, const PARAMS32 &direction)
Definition: lmert.hpp:124
std::vector< Wid > SentenceIdx
Definition: bleu.hpp:22
Definition: fstio.hpp:27
MertEnvelope< Arc > finalEnvelope
Definition: lmert.hpp:121
std::string ToString(bool show_hypothesis=false)
Definition: lmert.hpp:94
MertLatticeWrap(Sid sidx, fst::VectorFst< Arc > *fst, const PARAMS32 &lambda, const PARAMS32 &direction, vector< MertEnvelope< Arc > > &env)
Definition: lmert.hpp:180
bool GradientSortPredicate(const MertLine &line1, const MertLine &line2)
Definition: LMert.cpp:21
ucam::fsttools::Wid Wid
Definition: lmert.hpp:20
MertLine(double y, double m, Wid word)
Definition: lmert.hpp:31
void PropagateEnvelope(const typename Arc::StateId &src, const typename Arc::StateId &trg, const typename Arc::Weight &weight, const Wid &w=0)
Definition: lmert.hpp:154
vector< MertEnvelope< Arc > > & env_
Definition: lmert.hpp:194
fst::VectorFst< Arc > * fst_
Definition: lmert.hpp:191
long long Wid
Definition: bleu.hpp:21
static bool GradientSortPredicate(const MertLine< Arc > &line1, const MertLine< Arc > &line2)
Definition: lmert.hpp:48
std::vector< float > PARAMS32
Definition: bleu.hpp:18
unsigned Sid
Definition: bleu.hpp:19
ucam::fsttools::Sid Sid
Definition: lmert.hpp:21
ucam::fsttools::SentenceIdx SentenceIdx
Definition: lmert.hpp:22
SentenceIdx t
Definition: lmert.hpp:37
ucam::fsttools::PARAMS32 PARAMS32
Definition: lmert.hpp:23
std::vector< ucam::fsttools::BleuStats > prev
Definition: lmert.hpp:122
std::vector< MertLine< Arc > > lines
Definition: lmert.hpp:46
Arc::Weight weight
Definition: lmert.hpp:39
Definition: bleu.hpp:14