Cambridge SMT System
main-run.rules2weights.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #pragma once
16 
24 namespace ucam {
25 namespace hifst {
26 
27 
30  typedef TupleArc32 ToArc;
31  typedef ToArc::Weight Weight;
33  unsigned lmOffset_;
36  , unsigned lmOffset )
37  : d_(&d)
38  , lmOffset_(lmOffset)
39  { }
40 
41  ToArc operator()(FromArc const &arc) const {
42  if ( arc.weight == FromArc::Weight::Zero() ) //irrelevant labels
43  return ToArc ( arc.ilabel, arc.olabel, ToArc::Weight::Zero(), arc.nextstate );
44 
45  // minimize weight list (this semiring allows for repeated indices!
46  std::map<int, float> wm;
47  for (fst::SparseTupleWeightIterator<FeatureWeight32, int> it ( arc.weight )
48  ; !it.Done()
49  ; it.Next() ) {
50  if (it.Value().first > (int) lmOffset_ ) {
51  continue;
52  }
53  if (it.Value().first < 0 ) {
54  WeightsTableIt itx = d_->weights->find(-it.Value().first );
55  if (itx == d_->weights->end()) {
56  std::cerr << "RULE NOT FOUND:" << -it.Value().first << "," << d_->weights->size() << std::endl;
57  exit(EXIT_FAILURE);
58  }
59  Weight aux = itx->second;
60  for (fst::SparseTupleWeightIterator<FeatureWeight32, int> auxit ( aux )
61  ; !auxit.Done()
62  ; auxit.Next() ) {
63  wm[auxit.Value().first] += auxit.Value().second.Value() * it.Value().second.Value();
64  }
65  continue;
66  }
67  wm[it.Value().first] += it.Value().second.Value();
68  }
69  // finally create the weights ...
70  Weight nw(arc.weight.DefaultValue()); // new weights;
71  for (std::map<int, float>::const_iterator itx = wm.begin()
72  ; itx != wm.end()
73  ; ++itx ) {
74  nw.Push(itx->first, itx->second);
75  }
76  return ToArc ( arc.ilabel, arc.olabel, nw, arc.nextstate );
77  }
78 };
79 
84 class SingleThreadededRulesToWeightsSparseLatsTask: public ucam::util::TaskInterface<RuleIdsToSparseWeightLatsData<> > {
85  private:
87  const ucam::util::RegistryPO& rg_;
88  public:
90  : rg_ ( rg )
91  {};
92 
93  bool run ( Data& d ) {
94  using namespace HifstConstants;
95  using namespace ucam::hifst;
96  using namespace fst;
97  using namespace ucam::util;
98 
99  unsigned offset = 1;
101  offset = rg_.get<unsigned>(kRulesToWeightsNumberOfLanguageModels);
102  } else if (rg_.exists(kLmFeatureweights)) {
103  offset = rg_.getVectorString(kLmFeatureweights).size();
104  } else {
105  LERROR("Cannot determine parameter to find the number of language models! (" << kRulesToWeightsNumberOfLanguageModels << "," << kLmFeatureweights << ")");
106  exit(EXIT_FAILURE);
107  }
108  LINFO("#LMs =" << offset);
109 
110  std::string alilats;
111  std::string range = kRangeOne;
113  alilats = kRulesToWeightsLoadalilats;
114  } else if (rg_.exists(kHifstLatticeStore)) {
115  alilats = kHifstLatticeStore;
116  range = kRangeInfinite; // hifst doesn't have range.
117  } else {
118  LERROR("Could not determine parameter to find input lattices ! (" << kRulesToWeightsLatticeFilterbyAlilats << "," << kHifstLatticeStore << ")" );
119  exit(EXIT_FAILURE);
120  }
121  std::string loadgrammar;
123  loadgrammar=kRulesToWeightsLoadGrammar;
124  } else if (rg_.exists(kGrammarLoad)) {
125  loadgrammar=kGrammarLoad;
126  } else {
127  LERROR("Grammar parameter is unavailable ! (" << kRulesToWeightsLoadGrammar << "," << kGrammarLoad << ")" );
128  exit(EXIT_FAILURE);
129  }
130 
131  LoadSparseWeightsTask<Data> p(rg_, offset, alilats, loadgrammar);
132  p.run(d);
133 
134  typedef TupleArc32 Arc;
135  PatternAddress<unsigned> pi (rg_.get<std::string> (alilats ) );
136  PatternAddress<unsigned> po (rg_.get<std::string> (kRulesToWeightsLatticeStore ) );
137  for ( IntRangePtr ir (IntRangeFactory ( rg_, range ) );
138  !ir->done();
139  ir->next() ) {
140 
141  if (!ucam::util::fileExists(pi (ir->get() ))
142  && range == kRangeInfinite) {
143  // silently finish
144  break;
145  }
146  VectorFst<Arc> *mfst = VectorFstRead<Arc> (pi (ir->get() ) );
147  FORCELINFO("Reading: " << pi (ir->get() ) );
148  myMappingProcedure(mfst, d, offset);
149  std::string auxs = po (ir->get() );
150  FORCELINFO("Writing: " << auxs);
151  FstWrite<Arc> (*mfst, auxs);
152  }
153  };
154 
155  inline bool operator() () {
156  Data d;
157  return run ( d );
158  };
159 
160  private:
161 
162  void myMappingProcedure(fst::VectorFst<TupleArc32> *mfst
163  , Data &d
164  , unsigned lmOffset) {
165 
166  RulesToWeightsMapperObject m(d, lmOffset);
168  fst::Map(mfst, gam);
169  }
170 
171  DISALLOW_COPY_AND_ASSIGN ( SingleThreadededRulesToWeightsSparseLatsTask );
172 };
173 
174 }} // end namespaces
175 
176 
void run(ucam::util::RegistryPO const &rg)
const std::string kHifstLatticeStore
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
const std::string kRulesToWeightsNumberOfLanguageModels
std::vector< std::string > getVectorString(const std::string &key) const
Convenience method that returns a vector of strings taking "," as the separator character.
Definition: registrypo.hpp:245
#define LINFO(msg)
bool run(DataT &d)
Inherited method from ucam::util::TaskInterface. Loads the flower lattice into the data object...
Definition: fstio.hpp:27
SingleThreadededRulesToWeightsSparseLatsTask(const ucam::util::RegistryPO &rg)
ucam::hifst::RuleIdsToSparseWeightLatsData * d_
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
fst::TropicalWeightTpl< F > Map(double)
#define IntRangeFactory
Definition: range.hpp:213
const std::string kRangeInfinite
Definition: range.hpp:27
ucam::hifst::RuleIdsToSparseWeightLatsData::WeightsTableIt WeightsTableIt
RulesToWeightsMapperObject(ucam::hifst::RuleIdsToSparseWeightLatsData<> &d, unsigned lmOffset)
ToArc operator()(FromArc const &arc) const
Templated (hybrid) Interface for Task classes.
const std::string kRulesToWeightsLatticeFilterbyAlilats
std::string const kLmFeatureweights
const std::string kRulesToWeightsLoadalilats
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
Definition: registrypo.hpp:235
Data class containing relevant variables. To be used as template for task classes using it...
fst::ArcTpl< TupleW32 > TupleArc32
const std::string kRulesToWeightsLoadGrammar
bool fileExists(const std::string &fileName)
Full single-threaded Alignment lattices to Sparse lattices.
#define LERROR(msg)
const std::string kRulesToWeightsLatticeStore
const std::string kRangeOne
Definition: range.hpp:26
Definition: bleu.hpp:14
const std::string kGrammarLoad