Cambridge SMT System
task.loadunimap.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_LOADUNIMAP_HPP
16 #define TASK_LOADUNIMAP_HPP
17 
26 
27 namespace ucam {
28 namespace fsttools {
29 
33 template<class Data, class Arc = fst::StdArc >
35 
36  typedef typename Arc::Weight Weight;
37  typedef typename Arc::Label Label;
38 
39  private:
40 
42  std::string unimapfile_;
44  fst::VectorFst<Arc> *unimap_;
46  float uscale_;
48  unordered_set<std::string> vcblm_;
49 
51  const ucam::util::RegistryPO& rg_;
52 
53  //Key for a place to store a pointer to the unimap fst in the data object
54  const std::string unimapkey_;
55 
58 
60  bool loaded_;
61 
62  public:
63 
70  const std::string& unimapkey = HifstConstants::kRecaserUnimapLoad,
71  const std::string& lmkey = HifstConstants::kRecaserLmLoad
72  ) :
73  rg_ ( rg ),
74  unimapkey_ ( unimapkey ),
75  unimapfile_ ( rg.get<std::string> ( unimapkey ) ),
76  uscale_ ( rg.get<float> ( "recaser.unimap.scale" ) ),
77  loaded_ ( false ),
78  unimap_ ( NULL ) {
79  if ( rg.get<std::string> ( lmkey ) == ""
80  && rg.get<std::string> ( unimapkey ) == "" ) return;
81  if ( ! USER_CHECK ( rg.get<std::string> ( lmkey ) != ""
82  && rg.get<std::string> ( unimapkey ) != ""
83  , "recaser.lm.load and recaser.unimap.load must either be both defined or both left to empty string " ) )
84  return;
85  };
86 
91  bool run ( Data& d ) {
92  load();
93  LINFO ( "Unimap model available at key=" << unimapkey_ );
94  d.fsts[unimapkey_] = unimap_;
95  d.recasingvcblm = &vcblm_;
96  return false;
97  };
98 
100  virtual ~LoadUnimapTask() {
101  delete unimap_;
102  }
103 
104  private:
105 
107  inline void load() {
108  if ( loaded_ ) return;
109  if ( unimapfile_ == "" ) return;
110  LINFO ( "Read Unigram Model" );
111  ucam::util::iszfstream umf ( unimapfile_ );
112  unimap_ = new fst::VectorFst<Arc>;
113  loadflowerfst<Arc> ( umf, *unimap_ );
114  umf.close();
115  LINFO ( "Applying uscale=" << uscale_ );
116  SetGsf<Arc> ( unimap_, uscale_ );
117  fst::extractTargetVocabulary<Arc> ( *unimap_, &vcblm_ );
118  LDBG_EXECUTE ( unimap_->Write ( "unimap.fst" ) );
119  loaded_ = true;
120  }
121 
122 };
123 
124 }
125 } // end namespaces
126 
127 #endif // TASK_LOADUNIMAP_HPP
128 
#define LINFO(msg)
std::string const kRecaserUnimapLoad
std::string const kRecaserLmLoad
#define LDBG_EXECUTE(order)
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
Loads a unigram transduction model (aka unimap file) from a file with the format accepted by srilm di...
Templated (hybrid) Interface for Task classes.
virtual ~LoadUnimapTask()
Destructor.
Templated functor that creates a weight given a float.
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
Utilities for DisambigTask and related tasks.
bool run(Data &d)
Loads unimap fst, and delivers pointer in data object.
LoadUnimapTask(const ucam::util::RegistryPO &rg, const std::string &unimapkey=HifstConstants::kRecaserUnimapLoad, const std::string &lmkey=HifstConstants::kRecaserLmLoad)
Constructor.
Wrapper stream class that reads pipes, text files or gzipped files.
Definition: szfstream.hpp:34
Definition: bleu.hpp:14