Cambridge SMT System
Optimize.h
Go to the documentation of this file.
1 //Copyright (c) 2012, University of Cambridge
2 //All rights reserved.
3 //
4 //Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met://
5 //
6 // * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 // * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 // * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9 //
10 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 
12 #ifndef OPTIMIZE_H_
13 #define OPTIMIZE_H_
14 
15 #include "TuneSet.h"
16 #include "ParamsConfig.h"
17 #include "Score.h"
18 #include <utility>
19 #include <BleuStats.h>
20 #include <tr1/unordered_map>
21 #include <tr1/unordered_set>
22 #include <boost/math/constants/constants.hpp>
23 #include <boost/random/mersenne_twister.hpp>
24 
25 #include <global_incls.hpp>
26 #include <main.custom_assert.hpp>
27 #include <multithreading.hpp>
28 
30 
31 struct MERT {
32  ParamsConfig limits; // Maximum and minimum values for delta gamma
33 };
34 
35 extern MERT mert;
36 
37 class Optimizer {
38  public:
39  virtual const pair<PARAMS, double> operator() (PARAMS&) = 0;
40 
41  virtual void Init (int) = 0;
42 
43  virtual std::string ComputeError (const PARAMS& lambda) = 0;
44 
45  virtual void LoadRefData (std::vector<std::string>) = 0;
46 
47  virtual void InitTuneSet (bool) = 0;
48 
49  virtual ~Optimizer() {
50  }
51  ;
52 
53 };
54 
56 
57  unsigned int dim;
58  boost::random::mt19937 rand_gen;
59 
60  public:
61 
62  RandDirGenerator (unsigned int dim = 0);
63 
64  PARAMS GenerateDirection();
65 
66 };
67 
68 template<typename Algo, typename ErrorSurface>
69 class OptimizeTask {
70 
71  Sid sid;
72  PARAMS const *lambda;
73  PARAMS const *direction;
74  TuneSet const *lats;
75  ErrorSurface *surface;
76 
77  public:
78 
79  OptimizeTask (const Sid sid, PARAMS const* lambda, PARAMS const* direction,
80  TuneSet const * lats, ErrorSurface *surface) :
81  sid (sid), lambda (lambda), direction (direction), lats (lats), surface (
82  surface) {
83  }
84 
85  void operator() () {
86  if (opts.verbose) {
87  tracer << "lattice line optimization: sentence s=" << sid << '\n';
88  }
90  if (!fst) {
91  std::cerr << "ERROR: invalid vector lattice for sentence s=" << sid
92  << '\n';
93  exit (1);
94  }
95  //PruneStats pruneStats;
96  const typename Algo::Lines lines = Algo::ComputeLatticeEnvelope (fst,
97  *lambda, *direction);
98  /*
99  if (opts.verbose) {
100  tracer << "Arcs in lattices before prune: " << pruneStats.before
101  << endl;
102  tracer << "Arcs in lattice after prune: " << pruneStats.after
103  << endl;
104  }*/
105  double prevScore = 0.0;
106  double prevExpScore = 0.0;
107  for (typename Algo::Lines::const_iterator line = lines.begin();
108  line != lines.end(); ++line) {
109  //Sometimes we get empty hypotheses
110  int offset;
111  line->t.size() < 2 ? offset = 0 : offset = 1;
112  Sentence h (line->t.begin() + offset,
113  line->t.end() - offset); // Trim sentence start and end markers from hypothesis
114  double expScore;
115  if (line == lines.begin() ) {
116  expScore = ( (++lines.begin() )->x - 1) * line->m + line->y;
117  surface->CreateInitial (sid, line->x, h, line->score, expScore);
118  } else {
119  expScore = line->x * line->m + line->y;
120  surface->CreateInterval (sid, line->x, h, line->score - prevScore,
121  expScore - prevExpScore);
122  }
123  prevScore = line->score;
124  prevExpScore = expScore;
125  }
126  if (!opts.useCache) {
127  delete fst;
128  }
129  }
130 
131 };
132 
133 int GetNoOfThreads();
134 
135 template<class Algo, class ErrorSurface>
137  tasks) {
139  for (typename std::vector<OptimizeTask<Algo, ErrorSurface> >::const_iterator it
140  =
141  tasks.begin(); it != tasks.end(); ++it) {
142  tp (*it);
143  }
144 }
145 
146 PARAMS ComputeFinalPoint (const PARAMS&, const PARAMS&, const double);
147 
148 template<class Algo, class ErrorSurface>
149 class OptimizerImpl: public Optimizer {
150 
151  protected:
153  typedef typename ErrorStats::Error Error;
154  typedef pair<PARAMS, Error> OptimizationResult;
155 
156  OptimizationResult MakeOptimizationResult (ErrorSurface& surface,
157  const PARAMS& direction, const OptimizationResult& prev) {
158  if (surface.boundaries.empty() ) {
159  return prev;
160  }
161  surface.ComputeSurface();
162  PARAMS tunedPoint = ComputeFinalPoint (prev.first, direction,
163  surface.GetOptimalGamma() );
164  return make_pair (tunedPoint, surface.GetOptimalError() );
165  }
166 
167  void LineOptimize (const PARAMS& lambda, const PARAMS& direction,
168  ErrorSurface& surface, const std::vector<Sid>& lattices) {
169  std::vector<OptimizeTask<Algo, ErrorSurface> > tasks;
170  for (std::vector<Sid>::const_iterator sit = lattices.begin();
171  sit != lattices.end(); ++sit) {
172  OptimizeTask<Algo, ErrorSurface> task (*sit, &lambda, &direction,
173  & (this->lats), &surface);
174  tasks.push_back (task);
175  }
176  execute_with_threadpool (tasks);
177  }
178 
180  const std::vector<OptimizationResult>& results) {
181  double max = 0.0;
182  unsigned int d = 0;
183  for (unsigned int k = 0; k < results.size(); ++k) {
184  double bleu = results[k].get_tunedScore().GetError()
185  - results[k].get_startScore().GetError();
186  if (bleu > max) {
187  max = bleu;
188  d = k;
189  }
190  }
191  return d;
192  }
193 
194  void LogLineOptimization (const OptimizationResult& start,
195  const OptimizationResult& tuned, const PARAMS& direction,
196  const double gamma) {
197  Error startError = start.second;
198  Error tunedError = tuned.second;
199  if (opts.fullLog) {
200  const PARAMS& startPoint = start.first;
201  const PARAMS& tunedPoint = tuned.first;
202  tracer << "start: " << std::fixed << std::setprecision (opts.printPrecision)
203  << startPoint << " " << std::setprecision (6) << startError
204  << '\n';
205  tracer << " direction: " << std::fixed
206  << std::setprecision (opts.printPrecision) << direction << '\n';
207  tracer << " final: " << std::fixed << std::setprecision (opts.printPrecision)
208  << tunedPoint << std::setprecision (6) << " " << tunedError
209  << '\n';
210  tracer << " gamma: " << std::fixed << std::setprecision (8) << gamma << '\n';
211  } else {
212  // Print every hundreth gamma
213  static unsigned int counter = 0;
214  static const unsigned int dim = direction.size();
215  static const unsigned int percent = dim < 1000 ? 1 : dim / 1000;
216  counter = (counter + 1) % dim;
217  if (counter % percent == 0) {
218  tracer << "gamma: " << gamma << " error: " << tunedError
219  << endl;
220  }
221  }
222  }
223 
225  TuneSet lats; // Vector lattices for each sentence
226 
227  public:
228 
229  virtual void LoadRefData (std::vector<std::string> refs) {
230  refData.LoadRefData (refs);
231  }
232 
233  virtual void InitTuneSet (bool useCache) {
234  lats.Initialize (useCache);
235  }
236 
237  virtual ~OptimizerImpl() {
238  }
239 
240  virtual std::string ComputeError (const PARAMS& lambda) {
241  return Score (this->refData, this->lats, lambda);
242  }
243 };
244 
245 /*
246  * Scale a PARAMS by the a single parameter value. Designed to combat numerical instability.
247  */
248 
249 PARAMS VectorScale (const PARAMS&, const unsigned int k = 0);
250 
251 PARAMS GenerateRandDir (const unsigned int noOfAxes);
252 
253 class Directions {
254 
255  public:
256 
257  Directions();
258 
259  const PARAMS& Get (const unsigned int);
260 
261  void Set (const unsigned int, PARAMS);
262 
263  void Set (std::vector<PARAMS>, TuneSet&);
264 
265  unsigned int Size();
266 
267  void Resize (unsigned int, TuneSet& lats);
268 
269  const std::vector<Sid> FilteredLattices (unsigned int, const std::vector<Sid>&);
270 
271  bool ContainsAxis (unsigned int);
272 
273  private:
274 
275  bool ContainsAxis (Sid, unsigned int);
276 
277  unordered_map<unsigned int, PARAMS> directions;
278 
279  unsigned int dim;
280 
281  unsigned int prev;
282 
283  PARAMS current;
284 
285  // Stores the features that are populated for each axis
286  unordered_map<Sid, unordered_set<unsigned int> > feature_filter;
287 
288  bool * hasDirection;
289 
290 };
291 
292 template<typename Algo, typename ErrorSurface>
293 class RandomOptimizer: public OptimizerImpl<Algo, ErrorSurface> {
294 
298  RandDirGenerator dirGen;
299  unsigned int dim;
300  unsigned int directions;
301  bool useAxes;
302 
303  public:
304 
305  virtual void Init (int);
306 
307  virtual ~RandomOptimizer() {}
308 
309  virtual const pair<PARAMS, double> operator() (PARAMS& startPoint) {
310  Error startError = ComputeError (this->refData, this->lats, startPoint);
311  OptimizationResult start = make_pair (startPoint, startError);
312  OptimizationResult prev = start;
313  while (true) {
314  OptimizationResult best = prev;
315  std::vector<PARAMS> generatedDirs;
316  if (useAxes) {
317  for (int i = 0; i < dim; ++i) {
318  PARAMS dir = PARAMS (dim);
319  for (int j = 0; j < dim; ++j) {
320  dir[j] = 0;
321  }
322  dir[i] = 1;
323  generatedDirs.push_back (dir);
324  }
325  }
326  for (int i = 0; i < directions; ++i) {
327  generatedDirs.push_back (dirGen.GenerateDirection() );
328  }
329  for (std::vector<PARAMS>::const_iterator direction =
330  generatedDirs.begin(); direction != generatedDirs.end();
331  ++direction) {
332  ErrorSurface surface (this->lats.ids.size(), & (this->refData) );
333  this->LineOptimize (prev.first, *direction, surface, this->lats.ids);
334  OptimizationResult current = this->MakeOptimizationResult (surface,
335  *direction, prev);
336  this->LogLineOptimization (prev, current, *direction,
337  surface.GetOptimalGamma() );
338  if (surface.boundaries.size() == 0) {
339  if (opts.fullLog) {
340  tracer <<
341  " parameters not updated: no interval boundaries\n";
342  }
343  continue;
344  }
345  //Need to do limits check properly
346  //if (!mert.limits.check_in_range(i, surface.GetOptimalGamma())) {
347  // continue;
348  //}
349  if (abs (surface.GetOptimalGamma() ) < opts.gammaThreshold) {
350  if (opts.fullLog) {
351  tracer <<
352  " parameters not updated: change in gamma < "
353  << opts.gammaThreshold << '\n';
354  }
355  continue;
356  }
357  if (current.second > best.second) {
358  best = current;
359  }
360  }
361  if (!opts.fullLog) {
362  tracer << "full random iteration completed with error: "
363  << best.second << '\n';
364  std::cerr << std::fixed << std::setprecision (opts.printPrecision)
365  << VectorScale (best.first) << endl;
366  }
367  if (best.second.GetError() - prev.second.GetError()
368  < opts.bleuThreshold) {
369  tracer <<
370  "no direction gives sufficient improvement: delta bleu < "
371  << opts.bleuThreshold << '\n';
372  break;
373  }
374  prev = best;
375  }
376  return make_pair (prev.first, prev.second.GetError() );
377  }
378 };
379 
380 template<typename Algo, typename ErrorSurface>
381 class PowellOptimizer: public OptimizerImpl<Algo, ErrorSurface> {
382  public:
386  private:
387  Directions directions;
388 
389  void UpdateDirection (const OptimizationResult& start,
390  const OptimizationResult& final, double bestChange,
391  unsigned int bestDirection, Directions& directions) {
392  PARAMS extrapolated_d = final.first -
393  start.first; // Compute new extrapolated direction
394  PARAMS extrapolated_p = final.first +
395  extrapolated_d; // Compute new extrapolated point
396  double dbm = bestChange; // Delta bleu max
397  double dbt = final.second.GetError() -
398  start.second.GetError(); // Delta bleu total
399  double dbe =
400  ComputeError (this->refData, this->lats, extrapolated_p).GetError()
401  - start.second.GetError(); // Delta bleu extrapolated
402  if (dbe > 0
403  && 2 * (2 * dbt - dbe) * powf (dbt - dbm, 2.0)
404  < powf (dbe, 2.0) * dbm) { // Numerical Recipes in C++ (Second Edition) (p422)
405  directions.Set (bestDirection, VectorScale (extrapolated_d) );
406  tracer << "replacing direction[" << bestDirection << "]" << std::fixed
407  << std::setprecision (opts.printPrecision) << '\n';
408  }
409  }
410 
411  public:
412 
413  virtual void Init (int);
414 
415  virtual const pair<PARAMS, double> operator() (PARAMS& startPoint) {
416  ErrorSurface surface (this->lats.ids.size(), & (this->refData) );
417  Error startError = ComputeError (this->refData, this->lats, startPoint);
418  OptimizationResult start = make_pair (startPoint, startError);
419  OptimizationResult prev;
420  while (true) {
421  prev = start;
422  double bestDeltaBleu = 0.0;
423  unsigned int bestDirection = 0;
424  for (unsigned int d = 0; d < directions.Size(); ++d) {
425  if (!directions.ContainsAxis (d) ) {
426  continue;
427  }
428  surface.Reset();
429  ErrorSurface testSurface = surface;
430  const std::vector<Sid> filtered = directions.FilteredLattices (d,
431  this->lats.ids);
432  this->LineOptimize (prev.first, directions.Get (d), testSurface, filtered);
433  OptimizationResult current = this->MakeOptimizationResult (testSurface,
434  directions.Get (d),
435  prev);
436  if (opts.pointTest) {
437  Error debugError = ComputeError (this->refData, this->lats, current.first);
438  if (debugError.GetError() != current.second.GetError() ) {
439  tracer << "Direction: " << d
440  << " Incorrect Error Surface: "
441  << testSurface.GetOptimalGamma() << endl;
442  }
443  }
444  if (!opts.writeSurface.empty() ) {
445  testSurface.WriteErrorSurface (opts.writeSurface.data() );
446  }
447  this->LogLineOptimization (prev, current, directions.Get (d),
448  testSurface.GetOptimalGamma() );
449  double deltaBleu = current.second.GetError()
450  - prev.second.GetError();
451  if (deltaBleu > bestDeltaBleu) {
452  bestDeltaBleu = deltaBleu;
453  bestDirection = d;
454  }
455  if (testSurface.GetUnbounded() ) {
456  continue;
457  }
458  if (testSurface.boundaries.size() == 0) {
459  if (opts.fullLog) {
460  tracer <<
461  " parameters not updated: no interval boundaries\n";
462  }
463  continue;
464  }
465  if (deltaBleu < opts.bleuThreshold) {
466  if (opts.fullLog) {
467  tracer << " parameters not updated: delta bleu < "
468  << opts.bleuThreshold << '\n';
469  }
470  continue;
471  }
472  if (abs (testSurface.GetOptimalGamma() ) < opts.gammaThreshold) {
473  if (opts.fullLog) {
474  tracer <<
475  " parameters not updated: change in gamma < "
476  << opts.gammaThreshold << '\n';
477  }
478  continue;
479  }
480  if (!mert.limits.check_in_range (d,
481  testSurface.GetOptimalGamma() ) ) {
482  continue;
483  }
484  surface = testSurface;
485  prev = current;
486  }
487  if (prev.second.GetError() - start.second.GetError()
488  < opts.bleuThreshold) {
489  tracer <<
490  "no direction gives sufficient improvement: delta bleu < "
491  << opts.bleuThreshold << '\n';
492  break;
493  }
494  UpdateDirection (start, prev, bestDeltaBleu, bestDirection,
495  directions);
496  start = prev;
497  if (!opts.fullLog) {
498  tracer << "full powell iteration compleated with error: "
499  << prev.second << '\n';
500  std::cerr << std::fixed << std::setprecision (opts.printPrecision)
501  << VectorScale (prev.first) << endl;
502  }
503  }
504  return make_pair (prev.first, prev.second.GetError() );
505  }
506 
507  virtual ~PowellOptimizer() {
508  }
509 };
510 
511 #endif /* OPTIMIZE_H_ */
MertOpt opts
Definition: MertCommon.cpp:14
Error GetOptimalError()
Definition: ErrorSurface.h:110
const PARAMS & Get(const unsigned int)
Definition: Optimize.cpp:135
RefData::ErrorStats::Error ComputeError(RefData &refData, const TuneSet &lats, const PARAMS &lambda)
Definition: Score.h:22
PARAMS GenerateDirection()
Definition: Optimize.cpp:47
bool useCache
Definition: MertCommon.h:37
Definition: fstio.hpp:27
void CreateInterval(Sid sid, const double gamma, const Sentence h, const double modelScore, const double expScore)
Definition: ErrorSurface.h:204
#define tracer
Definition: data.lmbr.hpp:18
bool GetUnbounded()
Definition: ErrorSurface.h:114
TupleArcFst * GetVectorLattice(const Sid s, const bool use_cache) const
Definition: TuneSet.cpp:87
OptimizationResult MakeOptimizationResult(ErrorSurface &surface, const PARAMS &direction, const OptimizationResult &prev)
Definition: Optimize.h:156
void CreateInitial(Sid sid, const double gamma, const Sentence h, const double modelScore, const double expScore)
Definition: ErrorSurface.h:197
unsigned int Sid
Definition: MertCommon.h:45
bool verbose
Definition: MertCommon.h:33
void Initialize(const bool use_cache)
Definition: TuneSet.cpp:57
void Set(const unsigned int, PARAMS)
Definition: Optimize.cpp:145
bool pointTest
Definition: MertCommon.h:40
ErrorSurface::RefData refData
Definition: Optimize.h:224
PARAMS GenerateRandDir(const unsigned int noOfAxes)
virtual void InitTuneSet(bool useCache)
Definition: Optimize.h:233
ParamsConfig limits
Definition: Optimize.h:32
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
void LineOptimize(const PARAMS &lambda, const PARAMS &direction, ErrorSurface &surface, const std::vector< Sid > &lattices)
Definition: Optimize.h:167
bool check_in_range(const unsigned int k, const double gamma) const
MERT mert
Definition: Optimize.cpp:23
OptimizerImpl< Algo, ErrorSurface >::OptimizationResult OptimizationResult
Definition: Optimize.h:384
std::vector< Wid > Sentence
Definition: MertCommon.h:48
double gammaThreshold
Definition: MertCommon.h:31
virtual ~RandomOptimizer()
Definition: Optimize.h:307
virtual void LoadRefData(std::vector< std::string > refs)
Definition: Optimize.h:229
PARAMS ComputeFinalPoint(const PARAMS &, const PARAMS &, const double)
Definition: Optimize.cpp:54
TuneSet lats
Definition: Optimize.h:225
std::vector< IntervalBoundary > boundaries
Definition: ErrorSurface.h:215
RefData::ErrorStats ErrorStats
Definition: ErrorSurface.h:38
OptimizerImpl< Algo, ErrorSurface >::Error Error
Definition: Optimize.h:385
PARAMS VectorScale(const PARAMS &, const unsigned int k=0)
Definition: Optimize.cpp:66
int GetNoOfThreads()
Definition: Optimize.cpp:195
bool ContainsAxis(unsigned int)
Definition: Optimize.cpp:188
const std::vector< Sid > FilteredLattices(unsigned int, const std::vector< Sid > &)
Definition: Optimize.cpp:172
ErrorStats::Error Error
Definition: Optimize.h:153
void execute_with_threadpool(std::vector< OptimizeTask< Algo, ErrorSurface > > tasks)
Definition: Optimize.h:136
Implements trivial threadpool using boost::asio library.
unsigned int GetBestOptimizationDirection(const std::vector< OptimizationResult > &results)
Definition: Optimize.h:179
virtual ~Optimizer()
Definition: Optimize.h:49
std::string Score(RefData &refData, const TuneSet &lats, const PARAMS &lambda)
Definition: Score.h:50
double bleuThreshold
Definition: MertCommon.h:32
bool fullLog
Definition: MertCommon.h:39
OptimizeTask(const Sid sid, PARAMS const *lambda, PARAMS const *direction, TuneSet const *lats, ErrorSurface *surface)
Definition: Optimize.h:79
virtual ~OptimizerImpl()
Definition: Optimize.h:237
fst::VectorFst< TupleArc > TupleArcFst
unsigned int Size()
Definition: Optimize.cpp:158
pair< PARAMS, Error > OptimizationResult
Definition: Optimize.h:154
virtual std::string ComputeError(const PARAMS &lambda)
Definition: Optimize.h:240
void WriteErrorSurface(const std::string &filename) const
Definition: ErrorSurface.h:182
int32 printPrecision
Definition: MertCommon.h:29
std::string writeSurface
Definition: MertCommon.h:34
virtual ~PowellOptimizer()
Definition: Optimize.h:507
hifst-specific classes and methods included in this namespace.
Definition: Optimize.h:31
void LogLineOptimization(const OptimizationResult &start, const OptimizationResult &tuned, const PARAMS &direction, const double gamma)
Definition: Optimize.h:194
ErrorSurface::ErrorStats ErrorStats
Definition: Optimize.h:152
All included standard headers, boost headers,...
Static variable for custom_assert. Include only once from main file.
void ComputeSurface()
Definition: ErrorSurface.h:118
double GetOptimalGamma()
Definition: ErrorSurface.h:106