Cambridge SMT System
disambignffst.main.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #include <main.disambignffst.hpp>
16 #include <main.custom_assert.hpp>
17 #include <main.logger.hpp>
18 #include <common-helpers.hpp>
19 #include <main.hpp>
20 
21 // Simple wrapper around TopoFeaturesHelper class
22 // which handles lattice-specific options provided by the user
23 template<class ArcT>
25  std::string const in_, out_;
26  bool const detOut_;
27  bool const minimize_;
29  bool const useOpenFst_;
31 
32  DisambigFunctor(std::string const &in, std::string const &out
33  , bool detOut, bool minimize, bool exitOnFirstPassFailure
34  , bool useOpenFst
35  )
36  : in_(in)
37  , out_(out)
38  , detOut_(detOut)
39  , minimize_(minimize)
40  , exitOnFirstPassFailure_(exitOnFirstPassFailure)
41  , useOpenFst_(useOpenFst)
42  {}
43 
45  : in_(df.in_)
46  , out_(df.out_)
47  , detOut_(df.detOut_)
48  , minimize_(df.minimize_)
49  , exitOnFirstPassFailure_(df.exitOnFirstPassFailure_)
50  , ssd_(df.ssd_)
51  , useOpenFst_(df.useOpenFst_)
52  {}
53 
54  void operator()() {
55  using namespace fst;
57  using namespace ucam::fsttools;
58 
59  boost::scoped_ptr<fst::VectorFst<ArcT> >mfst
60  (fst::VectorFstRead<ArcT> ( in_ ) );
61  if (mfst->NumStates()) {
62  if (detOut_) // invert fst if selected by user.
63  Invert(&*mfst);
64  RmEpsilon(&*mfst);
65  TopSort(&*mfst);
66 
67  std::string ns = toString(mfst->NumStates());
68  ssd_.setTimeStart("disambig-" + out_ + ", NS=" + ns);
69  if (!useOpenFst_) {
70  if (!minimize_) {
71  LINFO("Only determinize");
72  TopoFeaturesHelper<ProjectDeterminizeAction> tfh(exitOnFirstPassFailure_);
73  tfh(&*mfst);
74  } else { // Experimental option to play with.
75  LINFO("Determinize, Minimize and Push!");
77  tfh(&*mfst);
78  }
79  RmEpsilon(&*mfst); // take out epsilons created by reversals etc.
80  ssd_.setTimeEnd("disambig-" + out_ + ", NS=" + ns);
81  std::string nsd = toString(mfst->NumStates());
82  FORCELINFO(out_ << ": NS=" << ns << ",NSD=" << nsd);
83  LDBG_EXECUTE(mfst->Write("08-det-topo-final-rm.fst"));
84  } else {
85  // for quick comparisons. Note: not relabeling input epsilons.
86  // affiliation lattices are epsilon-free FSTs.
87 #if OPENFSTVERSION >= 1004001
88  LINFO("Openfst Determinize...");
89  DeterminizeFstOptions<ArcT> dto;
90 #if OPENFSTVERSION >= 1005000
91  dto.type = DETERMINIZE_DISAMBIGUATE;
92 #else
93  dto.disambiguate_output = true;
94 #endif
95  *mfst = DeterminizeFst<ArcT>(*mfst, dto);
96 #else
97  LERROR("Openfst Determinize for non-functional FSTs is not supported ("
98  << OPENFSTVERSION << ")");
99  exit(EXIT_FAILURE);
100 #endif
101  ssd_.setTimeEnd("disambig-" + out_ + ", NS=" + ns);
102  }
103  } else {
104  ssd_.setTimeStart("disambig-" + out_ + ", NS=0");
105  ssd_.setTimeEnd("disambig-" + out_ + ", NS=0");
106  }
107  if (detOut_) // invert back
108  Invert(&*mfst);
109 
110  FstWrite<StdArc> (*mfst, out_ );
111  ssd_.write(std::cerr);
112  }
113 };
114 
115 
116 // Templated method that runs over a list of FST files
117 // loads, runs disambiguation and stores.
118 template<class ArcT, class ThreadPoolT >
119 inline void run(ucam::util::RegistryPO const &rg
120  , ThreadPoolT &tp) {
121  using namespace HifstConstants;
122  using namespace ucam::util;
123 
124  PatternAddress<unsigned> pi (rg.get<std::string>(kInput) );
125  PatternAddress<unsigned> po (rg.get<std::string>(kOutput) );
126  bool detOut = rg.getBool(kDeterminizeOutput);
127  bool minimize = rg.getBool(kMinimize);
128  bool exitOnFirstPassFailure = rg.getBool(kExitOnFirstPassFailure);
129  bool useOpenFst = rg.getBool(kUseOpenFst);
130  using namespace fst;
131  for ( IntRangePtr ir (IntRangeFactory ( rg, kRangeOne ) );
132  !ir->done();
133  ir->next() ) {
134  DisambigFunctor<ArcT> df(pi(ir->get()), po(ir->get())
135  , detOut, minimize, exitOnFirstPassFailure
136  , useOpenFst);
137  tp(df);
138  }
139 };
140 
141 // Main function call this overloaded method (see main.hpp for details)
142 // Determine semiring, multithreading and kick off disambiguation of
143 // (potentially non-funcitonal) FSTs using topological features
145  using namespace HifstConstants;
146  using namespace ucam::util;
147  std::string const arctype =rg_->get<std::string>(kHifstSemiring);
148  unsigned nthreads = (rg_->exists( kNThreads) )
149  ? rg_->get<unsigned>(kNThreads) : 0;
150 
151  if (arctype == kHifstSemiringStdArc) {
152  if (nthreads) { // even the single thread is in the trivial pool
153  FORCELINFO("Multithreading with " << nthreads << " threads");
154  TrivialThreadPool tp(nthreads);
155  ::run<fst::StdArc, TrivialThreadPool>(*rg_, tp);
156  } else { // not defined, will run single thread without the trivial pool
157  NoThreadPool ntp;
158  ::run<fst::StdArc, NoThreadPool>(*rg_, ntp);
159  }
160  } else {
161  LERROR("Unknown semiring!");
162  exit(EXIT_FAILURE);
163  }
164 }
std::string const kHifstSemiring
bool const exitOnFirstPassFailure_
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
#define LINFO(msg)
Definition: fstio.hpp:27
void setTimeEnd(const std::string &key)
Store absolute timing value right after an execution.
Definition: data.stats.hpp:39
#define LDBG_EXECUTE(order)
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
std::string const kInput
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
#define IntRangeFactory
Definition: range.hpp:213
void run()
Include all necessary headers here.
A wrapper that runs maps labels to topological features, runs an "action" (sequence of standard fst o...
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
std::string const kOutput
ucam::fsttools::SpeedStatsData ssd_
std::string const kExitOnFirstPassFailure
Trivial struct that can replace seamlessly the threadpool for single threaded executions.
std::string const kDeterminizeOutput
std::string const kHifstSemiringStdArc
Static variables for logger. Include only once from main file.
const std::string kNThreads
std::string const out_
std::string const kUseOpenFst
bool getBool(const std::string &key) const
To handle yes|no program option values.
Definition: registrypo.hpp:225
std::string const in_
void setTimeStart(const std::string &key)
Store absolute timing value last thing, just before executing.
Definition: data.stats.hpp:33
#define LERROR(msg)
void run(ucam::util::RegistryPO const &rg, ThreadPoolT &tp)
#define OPENFSTVERSION
DisambigFunctor(DisambigFunctor const &df)
DisambigFunctor(std::string const &in, std::string const &out, bool detOut, bool minimize, bool exitOnFirstPassFailure, bool useOpenFst)
const std::string kRangeOne
Definition: range.hpp:26
void write(std::ostream &o)
Dumps time measurements as a list of pairs key1:time1 key2:time2 ... Each key is expected to be seman...
Definition: data.stats.hpp:53
Included headers for all the binary should be defined here. This file should be included only once...
std::string const kMinimize
Static variable for custom_assert. Include only once from main file.