Cambridge SMT System
countstrings.main.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Adrià de Gispert, William Byrne
14 
15 #include <main.countstrings.hpp>
16 #include <main.custom_assert.hpp>
17 #include <main.logger.hpp>
18 #include <boost/multiprecision/cpp_int.hpp>
19 
20 //assumes topsorted fst
21 template<class Arc ,
22  typename IntegerT >
23 IntegerT countstrings (fst::VectorFst<Arc>& myfst) {
24  std::vector<IntegerT> counts;
25  IntegerT cumcounts = 0;
26  counts.assign (myfst.NumStates(), 0);
27  counts[0] = 1;
28  typedef typename Arc::StateId StateId;
29  for (fst::StateIterator< fst::VectorFst<Arc> > si (myfst);
30  !si.Done();
31  si.Next() ) {
32  typename Arc::StateId state_id = si.Value();
33  if (myfst.Final (state_id) != Arc::Weight::Zero() ) {
34  cumcounts += counts[state_id];
35  }
36  for (fst::MutableArcIterator< fst::MutableFst<Arc> > ai (&myfst, si.Value() );
37  !ai.Done();
38  ai.Next() ) {
39  Arc arc = ai.Value();
40  counts[arc.nextstate] += counts[state_id];
41  }
42  }
43  return cumcounts;
44 };
45 
46 template<class Arc>
47 void run(ucam::util::RegistryPO const &rg) {
48 
49  ucam::util::PatternAddress<unsigned> pi (rg.get<std::string>
51  ucam::util::PatternAddress<unsigned> po (rg.get<std::string>
55  !ir->done();
56  ir->next() ) {
57  fst::VectorFst<Arc> *mfst = fst::VectorFstRead<Arc> (pi (
58  ir->get() ) );
59  if (!mfst) {
60  LERROR("Could not read file:" << ir->get());
61  exit(EXIT_FAILURE);
62  }
63  TopSort (mfst);
64  boost::multiprecision::uint128_t j =
65  countstrings<Arc, boost::multiprecision::uint128_t> (*mfst);
66  std::stringstream ss;
67  ss << j;
68  ucam::util::oszfstream o (po (ir->get() ), true);
69  o << ss.str() << std::endl;
70  LINFO ( pi (ir->get() ) << ":" << ss.str() ) ;
71  o.close();
72  delete mfst;
73  }
74 }
75 
76 
77 int main (int argc, const char* argv[] ) {
78  ucam::util::initLogger ( argc, argv );
79  FORCELINFO ( argv[0] << " starts!" );
80  ucam::util::RegistryPO rg ( argc, argv );
81  FORCELINFO ( rg.dump ( "CONFIG parameters:\n=====================",
82  "=====================" ) ) ;
83 
84  std::string semiring = rg.get<std::string> (HifstConstants::kHifstSemiring);
85  if (semiring == HifstConstants::kHifstSemiringStdArc) {
86  run<fst::StdArc> (rg);
87  } else if (semiring == HifstConstants::kHifstSemiringLexStdArc) {
88  run<fst::LexStdArc> (rg);
89  } else if (semiring == HifstConstants::kHifstSemiringTupleArc) {
90  run<TupleArc32> (rg);
91  } else {
92  LERROR ("Sorry, semiring option not correctly defined");
93  }
94  FORCELINFO ( argv[0] << " ends!" );
95 }
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
std::string const kHifstSemiring
void run(ucam::util::RegistryPO const &rg)
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
#define LINFO(msg)
T get(const std::string &key) const
Returns parsed value associated to key.
Definition: registrypo.hpp:194
void initLogger(int argc, const char *argv[])
Inits logger, parses param options checking for –logger.verbose.
IntegerT countstrings(fst::VectorFst< Arc > &myfst)
std::string const kInput
#define FORCELINFO(msg)
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
Definition: range.hpp:214
#define IntRangeFactory
Definition: range.hpp:213
std::string const kOutput
std::string const kHifstSemiringLexStdArc
std::string const kHifstSemiringStdArc
Static variables for logger. Include only once from main file.
std::string const kHifstSemiringTupleArc
std::string dump(const std::string &decorator_start="", const std::string &decorator_end="")
Dumps all configuration parameters into a string with a reasonably pretty format. ...
Definition: registrypo.hpp:108
#define LERROR(msg)
Included headers for all the binary should be defined here. This file should be included only once...
int main(int argc, const char *argv[])
const std::string kRangeOne
Definition: range.hpp:26
Static variable for custom_assert. Include only once from main file.
void close()
Closes the file.
Definition: szfstream.hpp:323