Cambridge SMT System
lineoptimize.hpp
Go to the documentation of this file.
1 #ifndef LMERT_LINEOPTIMIZE_HPP
2 #define LMERT_LINEOPTIMIZE_HPP
3 
5 #include <bleu.hpp>
6 #include <tuneset.hpp>
7 
8 namespace ucam {
9 namespace lmert {
10 
12 public:
14 
15  IntervalBoundary ( Sid sentence, const double gamma,
16  ucam::fsttools::BleuStats const &bleuStats ) :
17  sentence_ ( sentence ),
18  gamma_ ( gamma ),
19  bleuStats_ ( bleuStats ) {}
20 
23  double gamma_;
24 };
25 
26 std::ostream& operator<< ( std::ostream&os, const IntervalBoundary& b ) {
27  os << b.sentence_ << "; gamma: " << b.gamma_ << "; bleuStats: " <<
28  b.bleuStats_;
29 }
30 
31 template<typename IntervalBoundary>
33  , const IntervalBoundary& b2 ) {
34  return b1.gamma_ < b2.gamma_;
35 }
36 
37 
38 template <class Arc>
39 class LineOptimize {
40 public:
44  PARAMS32 const &lambda,
45  PARAMS32 const &direction ) :
46  lambda_ ( lambda ),
47  direction_ ( direction ),
48  nthreads_ ( rg.get<int> ( HifstConstants::kNThreads.c_str()) ) {
49  envelopes_.clear();
50  envelopes_.resize ( ts.sidMax );
51  {
52 #ifdef NO_MULTI_THREADING
53 
54  for ( Sid sidx = 0; sidx < ts.sidMax; sidx++ ) {
55  MertLattice<Arc> env ( sidx, ts.cachedLats[sidx], lambda_, direction_ );
56  envelopes_[sidx] = env.finalEnvelope;
57  }
58 
59 #else
60  ucam::util::TrivialThreadPool tp ( nthreads_ );
61 
62  for ( Sid sidx = 0; sidx < ts.sidMax; sidx++ ) {
63  MertLatticeWrap<Arc> envw ( sidx, &*(ts.cachedLats[sidx]), lambda_, direction_,
64  envelopes_ );
65  tp ( envw );
66  }
67 
68 #endif
69  }
70  prev.resize ( envelopes_.size() );
71  initials.resize ( envelopes_.size() );
72  for ( Sid sidx = 0; sidx < envelopes_.size(); sidx++ ) {
73  MertEnvelope<Arc> env = envelopes_[sidx];
74  // iterate over lines
75  typename std::vector<MertLine<Arc> >::size_type i = 0;
76  // Trim sentence start and end markers from hypothesis
77  int offset = ( env.lines[i].t.size() < 2 ? 0 : 1 );
78  SentenceIdx h ( env.lines[i].t.begin() + offset,
79  env.lines[i].t.end() - offset );
80  // CreateInitial()
81  prev[sidx] = bs.SentenceBleuStats ( sidx, h );
82  IntervalBoundary bd1 ( sidx, env.lines[i].x, prev[sidx] );
83  initials[sidx] = bd1;
84  // CreateInterval()
85  for ( i = 1; i < env.lines.size(); ++i ) {
86  offset = ( env.lines[i].t.size() < 2 ? 0 : 1 );
87  h.assign ( env.lines[i].t.begin() + offset, env.lines[i].t.end() - offset );
88  ucam::fsttools::BleuStats next = bs.SentenceBleuStats ( sidx, h );
89  IntervalBoundary bd ( sidx, env.lines[i].x, next - prev[sidx] );
90  // std::cerr << bd << std::endl;
91  boundaries.push_back ( bd );
92  prev[sidx] = next;
93  }
94  }
95  Surface ( bs );
96  }
97 
98  // Compute Surface()
100  std::vector<IntervalBoundary> currentIBs ( initials );
101  ucam::fsttools::BleuStats aggregateBleuStats;
102 
103  // MergeInitialScores() and MergeInitials()
104  for ( typename std::vector<IntervalBoundary>::const_iterator it =
105  initials.begin(); it != initials.end(); ++it ) {
106  aggregateBleuStats = aggregateBleuStats + it->bleuStats_;
107  }
108  optimalGamma = -std::numeric_limits<double>::infinity();
109  if ( boundaries.size() == 0 ) {
110  LINFO("no boundaries - returning");
111  return;
112  }
113  sort ( boundaries.begin(), boundaries.end(),
114  IntervalBoundarySortPredicate<IntervalBoundary> );
115  unbounded = true; // initial interval is unbounded
116  optimalGamma = boundaries.front().gamma_ - 1;
117  optimalBleu = bs.ComputeBleu ( aggregateBleuStats );
118  // std::cerr << "OO " << optimalGamma << " " << optimalBleu << " :: " << boundaries.size() << std::endl;
119  typename std::vector<IntervalBoundary>::iterator itNext = ++( boundaries.begin() );
120  for ( typename std::vector<IntervalBoundary>::iterator it = boundaries.begin();
121  it != boundaries.end(); ++it ) {
122  aggregateBleuStats = aggregateBleuStats + it->bleuStats_;
123  ucam::fsttools::Bleu current = bs.ComputeBleu ( aggregateBleuStats );
124  // if ( current > optimalBleu ) {
125  if ( current > optimalBleu && current.m_brev >= optimalBleu.m_brev ) { // added test to favour longer hyps
126  optimalBleu = current;
127  unbounded = ( itNext == boundaries.end() );
128  optimalGamma = ( itNext == boundaries.end() ) ? it->gamma_ + 1.0 :
129  0.5 * ( it->gamma_ + itNext->gamma_ );
130  }
131  ++itNext;
132  }
133  }
134 
135  double OptimalGamma() {
136  return optimalGamma;
137  }
138 
140  return optimalBleu;
141  }
142 
143 private:
144  int nthreads_;
145  PARAMS32 lambda_;
146  PARAMS32 direction_;
147  std::vector< MertEnvelope<Arc> > envelopes_;
148  std::vector<ucam::fsttools::BleuStats> prev;
149  std::vector<IntervalBoundary> initials;
150  std::vector<IntervalBoundary> boundaries;
151  double optimalGamma;
152  ucam::fsttools::Bleu optimalBleu;
153  bool unbounded;
154 };
155 
156 }} // end namespaces
157 #endif
bool IntervalBoundarySortPredicate(const IntervalBoundary &b1, const IntervalBoundary &b2)
#define LINFO(msg)
MertEnvelope< Arc > finalEnvelope
Definition: lmert.hpp:121
ucam::fsttools::BleuStats bleuStats_
VectorFstPtrVector cachedLats
Definition: tuneset.hpp:19
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
BleuStats SentenceBleuStats(const Sid sid, const SentenceIdx &hypIdx)
Definition: bleu.hpp:296
void Surface(ucam::fsttools::BleuScorer &bs)
const std::string kNThreads
ucam::fsttools::Sid Sid
Definition: lmert.hpp:21
ucam::fsttools::SentenceIdx SentenceIdx
Definition: lmert.hpp:22
ucam::fsttools::Bleu OptimalBleu()
LineOptimize(ucam::util::RegistryPO const &rg, ucam::fsttools::TuneSet< Arc > const &ts, ucam::fsttools::BleuScorer &bs, PARAMS32 const &lambda, PARAMS32 const &direction)
IntervalBoundary(Sid sentence, const double gamma, ucam::fsttools::BleuStats const &bleuStats)
ucam::fsttools::PARAMS32 PARAMS32
Definition: lmert.hpp:23
Bleu ComputeBleu(const BleuStats &bs)
Definition: bleu.hpp:326
std::vector< MertLine< Arc > > lines
Definition: lmert.hpp:46
std::ostream & operator<<(std::ostream &os, const IntervalBoundary &b)
Definition: bleu.hpp:14