Cambridge SMT System
tuneset.hpp
Go to the documentation of this file.
1 #ifndef LMERT_TUNESET_HPP
2 #define LMERT_TUNESET_HPP
3 
4 #include <vector>
5 
6 #include <constants-fsttools.hpp>
7 #include <fstutils.hpp>
8 #include <bleu.hpp>
9 
10 namespace ucam {
11 namespace fsttools {
12 
13 template <class Arc>
14 class TuneSet {
15  public:
16  typedef boost::shared_ptr<fst::VectorFst<Arc> > VectorFstPtr;
17  typedef std::vector<VectorFstPtr> VectorFstPtrVector;
19  VectorFstPtrVector cachedLats;
20 
21  // \todo: for general purpose, this should not depend on kInput or kRangeOne
23  using namespace ucam::util;
24  using namespace HifstConstants;
25  using namespace fst;
26 
27  for ( IntRangePtr ir ( IntRangeFactory ( rg, kRangeOne ) );
28  !ir->done(); ir->next() ) {
29  PatternAddress<unsigned> input ( rg.get<string> ( kInput.c_str() ) );
30  VectorFstPtr ifst ( VectorFstRead<Arc> ( input ( ir->get() ) ) );
31  TopSort ( &*ifst );
32  cachedLats.push_back ( ifst );
33  }
34  sidMax = cachedLats.size();
35  FORCELINFO ( "Loaded tuneset lattices: " << sidMax );
36  }
37 
38  fst::VectorFst<Arc>* GetLattice ( Sid sid ) {
39  if ( sid > cachedLats.size() ) {
40  LERROR ( "Requested lattice not loaded" << sid );
41  exit ( EXIT_FAILURE );
42  }
43  return &* ( cachedLats[sid] );
44  }
45 
46  // compute bleu under vw vector weight
48  using namespace fst;
49  BleuStats bstats;
50  for ( int i = 0; i < sidMax; ++i ) {
51  SentenceIdx h;
52  FstGetBestHypothesis<Arc, Wid> ( *cachedLats[i], h);
53  if ( h.size() > 2 ) { // remove <s> </s>
54  h.erase ( h.begin() );
55  h.pop_back();
56  }
57  bstats = bstats + bs.SentenceBleuStats ( i, h );
58  }
59  return bs.ComputeBleu ( bstats );
60  };
61 
62 
63  // \todo I think this method will only work with Arc=TupleArc32.
64  // PARAMS32 temporary replacement should be done externally,
65  // perhaps implemented in a semiring-specific wrapper
66  Bleu ComputeBleu ( BleuScorer& bs, PARAMS32 const& vw ) {
67  using namespace fst;
70  BleuStats bstats;
71 
72  for ( int i = 0; i < sidMax; ++i ) {
73  SentenceIdx h;
74  FstGetBestHypothesis<Arc, Wid> ( *cachedLats[i], h);
75 
76  if ( h.size() > 2 ) { // remove <s> </s>
77  h.erase ( h.begin() );
78  h.pop_back();
79  }
80  // \todo define += operator?
81  bstats = bstats + bs.SentenceBleuStats ( i, h );
82  }
84  return bs.ComputeBleu ( bstats );
85  }
86 
87 };
88 
89 }} // end namespaces
90 
91 #endif
Bleu ComputeBleu(BleuScorer &bs, PARAMS32 const &vw)
Definition: tuneset.hpp:66
fst::VectorFst< Arc > * GetLattice(Sid sid)
Definition: tuneset.hpp:38
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::vector< Wid > SentenceIdx
Definition: bleu.hpp:22
Definition: fstio.hpp:27
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
std::string const kInput
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
VectorFstPtrVector cachedLats
Definition: tuneset.hpp:19
#define IntRangeFactory
Definition: range.hpp:213
Bleu ComputeBleu(BleuScorer &bs)
Definition: tuneset.hpp:47
BleuStats SentenceBleuStats(const Sid sid, const SentenceIdx &hypIdx)
Definition: bleu.hpp:296
Implements Tropical Sparse tuple weight semiring, extending from openfst SparsePowerWeight class...
boost::shared_ptr< fst::VectorFst< Arc > > VectorFstPtr
Definition: tuneset.hpp:16
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
std::vector< float > PARAMS32
Definition: bleu.hpp:18
unsigned Sid
Definition: bleu.hpp:19
#define LERROR(msg)
Bleu ComputeBleu(const BleuStats &bs)
Definition: bleu.hpp:326
std::vector< VectorFstPtr > VectorFstPtrVector
Definition: tuneset.hpp:17
TuneSet(ucam::util::RegistryPO const &rg)
Definition: tuneset.hpp:22
const std::string kRangeOne
Definition: range.hpp:26
Definition: bleu.hpp:14