Cambridge SMT System
task.tunewpwrite.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_TUNEWPWRITEFST_HPP
16 #define TASK_TUNEWPWRITEFST_HPP
17 
25 namespace ucam {
26 namespace fsttools {
27 
35 template <class Data, class Arc = fst::StdArc >
37  typedef typename Arc::Label Label;
38  typedef typename Arc::Weight Weight;
39  typedef std::vector<pair<Label, Label> > VectorPair;
40  private:
42  std::string fstkey_;
43  std::string readfstkey_;
47  unordered_set<Label> epsilons_;
48  bool doTune_;
50  public:
52  , const std::string& fstkey
53  , const std::string& readfstkey = ""
54  )
55  : fstkey_ ( fstkey )
56  , readfstkey_ (readfstkey != "" ? readfstkey : fstkey)
57  , fstfile_ ( rg.get<std::string> ( fstkey ) )
58  , wp_( rg.get<std::string>(HC::kTuneWordPenaltyRange ) )
59  , doTune_(rg.getBool(HC::kTune) )
60  {
61  LDEBUG("Init TuneWpWriteFstTask: " << fstfile_() );
62  epsilons_.insert(OOV);
63  epsilons_.insert(EPSILON);
64  epsilons_.insert(DR);
65  };
66 
67  inline static TuneWpWriteFstTask *init ( const ucam::util::RegistryPO& rg
68  , const std::string& fstkey
69  , const std::string& readfstkey = ""
70  ) {
71  if ( rg.exists ( fstkey )
72  && rg.getBool(HC::kTune) )
73  return new TuneWpWriteFstTask ( rg, fstkey, readfstkey );
74  return NULL;
75  };
76 
84  inline bool run ( Data& d ) {
85  if ( fstfile_ ( d.sidx ) == "" ) return false;
86  if ( d.fsts.find ( readfstkey_ ) == d.fsts.end() ) {
87  LERROR ( "fst with key=" << readfstkey_ << " does not exist!" );
88  exit ( EXIT_FAILURE );
89  }
90  using namespace ucam::util;
91  using namespace fst;
92 
93  LDEBUG("Word penalty application --");
94  for ( wp_.start(); !wp_.done(); wp_.next() ) {
95  VectorFst<Arc> mfst (*( static_cast< Fst<Arc> *> (d.fsts[readfstkey_]) ) );
96  LDEBUG("w=" << wp_.get());
97  Map<Arc, WordPenaltyMapper<Arc> >
98  (&mfst, WordPenaltyMapper<Arc> (mw_ (wp_.get() ), epsilons_) );
99  VectorFst<Arc> aux;
100  ShortestPath<Arc> (mfst, &aux);
101  mfst = aux;
102  std::string auxs= fstfile_(d.sidx);
103  find_and_replace (auxs, HC::kUserWpRange, toString<float> (wp_() ) );
104  FstWrite<Arc> ( mfst, auxs);
105  FORCELINFO("Wrote " << auxs );
106  }
107  return false;
108  };
109 
110  private:
112 };
113 
114 }} // end namespaces
115 
116 #endif
#define ZDISALLOW_COPY_AND_ASSIGN(TypeName)
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
templated Mapper that inserts a word penalty over an FST, skipping user defined epsilon arcs...
Convenience class that inherits Taskinterface behaviour and writes an fst to [file] using a key defin...
Definition: fstio.hpp:27
NumberType get(void)
Returns range value at position k_.
Definition: range.hpp:119
#define FORCELINFO(msg)
#define LDEBUG(msg)
#define DR
void start(void)
Empty implementation.
Definition: range.hpp:107
void next(void)
Increment index.
Definition: range.hpp:111
Templated (hybrid) Interface for Task classes.
Templated functor that creates a weight given a float.
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
Definition: registrypo.hpp:235
TuneWpWriteFstTask(const ucam::util::RegistryPO &rg, const std::string &fstkey, const std::string &readfstkey="")
#define EPSILON
bool getBool(const std::string &key) const
To handle yes|no program option values.
Definition: registrypo.hpp:225
static TuneWpWriteFstTask * init(const ucam::util::RegistryPO &rg, const std::string &fstkey, const std::string &readfstkey="")
std::string const kTune
bool done(void)
Checks if reached the last element.
Definition: range.hpp:115
#define OOV
bool run(Data &d)
Method inherited from TaskInterface. Stores fst to [file]. The fst is accessed via data object using ...
#define LERROR(msg)
std::string const kUserWpRange
void find_and_replace(std::string &haystack, const std::string &needle, const std::string &replace)
std::string const kTuneWordPenaltyRange
Definition: bleu.hpp:14