Cambridge SMT System
task.disambig.flowerfst.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_DISAMBIG_FLOWER_HPP
16 #define TASK_DISAMBIG_FLOWER_HPP
17 
25 namespace ucam {
26 namespace fsttools {
27 
28 // Applies scale factor to grammar at each cost in the arc.
29 template <class Arc>
30 void SetGsf ( fst::VectorFst<Arc> *grmfst, const float gsf ) {
31  if ( gsf == 1.0f ) return;
32  fst::ScaleWeight<Arc> sw ( gsf );
33  fst::Map<Arc> ( grmfst,
35 };
36 
37 template <>
38 void SetGsf ( fst::VectorFst<TupleArc32> *grmfst, const float gsf ) {
39  LERROR("Sorry, TupleArc32 not supported!");
40  exit(EXIT_FAILURE);
41 };
42 
43 
45 template < class Arc >
47  fst::VectorFst<Arc>& flowerlattice ) {
48  std::string line;
49  typedef typename Arc::Weight Weight;
50  flowerlattice.AddState();
51  flowerlattice.SetStart ( 0 );
52  flowerlattice.SetFinal ( 0, Weight::One() );
54  while ( umf.getline ( line ) ) {
56  std::vector<std::string> aux;
57  boost::algorithm::split ( aux, line, boost::algorithm::is_any_of ( " " ) );
58  USER_CHECK ( aux.size() % 2, "Wrong unimap input file format" );
59  if ( aux[0] == "<unk>" ) continue; //skip this line
60  if ( aux[0] == "<s>" ) aux[0] = "1";
61  if ( aux[1] == "<s>" ) aux[1] = "1";
62  if ( aux[0] == "</s>" ) aux[0] = "2";
63  if ( aux[1] == "</s>" ) aux[1] = "2";
64  for ( unsigned k = 1; k < aux.size(); k += 2 ) {
65  float w = -std::log ( toNumber<float> ( aux[k + 1] ) ) ;
66  if (w != std::numeric_limits<float>::infinity() )
67  flowerlattice.AddArc ( 0, Arc ( toNumber<unsigned> ( aux[0] ),
68  toNumber<unsigned> ( aux[k] ), mw ( w ), 0 ) );
69  else
70  LWARN ("Skipping 0 probability at line:" << line);
71  }
72  }
73  flowerlattice.AddArc ( 0, Arc ( RHO, RHO, Weight::One(), 0 ) );
74  flowerlattice.AddArc ( 0, Arc ( OOV, OOV, Weight::One(), 0 ) ); //for OOVs...
75  ArcSort ( &flowerlattice, fst::ILabelCompare<Arc>() );
76 };
77 
80 template<class Arc>
81 inline void tagOOVs ( fst::VectorFst<Arc> *myfst,
82  unordered_set<std::string>& vcb ) {
84  typedef typename Arc::StateId StateId;
85  for ( fst::StateIterator< fst::VectorFst<Arc> > si ( *myfst ); !si.Done();
86  si.Next() ) {
87  StateId state_id = si.Value();
88  for ( fst::MutableArcIterator< fst::MutableFst<Arc> > ai ( myfst, si.Value() );
89  !ai.Done(); ai.Next() ) {
90  Arc arc = ai.Value();
91  if ( vcb.find ( ucam::util::toString<unsigned> ( arc.olabel ) ) == vcb.end() ) {
92  arc.ilabel = arc.olabel;
93  arc.olabel = OOV;
94  arc.weight = mw ( 0 );
95  ai.SetValue ( arc );
96  }
97  }
98  }
99 };
100 
102 
103 template<class Arc>
104 inline void recoverOOVs ( fst::VectorFst<Arc> *myfst ) {
105  typedef typename Arc::StateId StateId;
106  for ( fst::StateIterator< fst::VectorFst<Arc> > si ( *myfst ); !si.Done();
107  si.Next() ) {
108  StateId state_id = si.Value();
109  for ( fst::MutableArcIterator< fst::MutableFst<Arc> > ai ( myfst, si.Value() );
110  !ai.Done(); ai.Next() ) {
111  Arc arc = ai.Value();
112  if ( arc.olabel == OOV ) {
113  arc.olabel = arc.ilabel;
114  ai.SetValue ( arc );
115  }
116  }
117  }
118 };
119 
120 }
121 } // end namespaces
122 
123 #endif // FSTDISAMBIG_HPP
124 
virtual iszfstream & getline(std::string &line)
Read a line.
Definition: szfstream.hpp:183
templated Mapper that modifies weights over an FST, passing through the other values of the arc...
Function object that applies to every single weight a scaling factor.
void loadflowerfst(ucam::util::iszfstream &umf, fst::VectorFst< Arc > &flowerlattice)
Loads flower fst from srilm disambig unigram input file.
Templated functor that creates a weight given a float.
#define LWARN(msg)
void SetGsf(fst::VectorFst< Arc > *grmfst, const float gsf)
void recoverOOVs(fst::VectorFst< Arc > *myfst)
Recover OOV original ids by projecting selectively.
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
T toNumber(const std::string &x)
Converts a string to an arbitrary number Converts strings to a number. Quits execution if conversion ...
#define OOV
#define LERROR(msg)
#define RHO
void tagOOVs(fst::VectorFst< Arc > *myfst, unordered_set< std::string > &vcb)
Wrapper stream class that reads pipes, text files or gzipped files.
Definition: szfstream.hpp:34
Definition: bleu.hpp:14