12 typedef std::unordered_map<std::size_t, std::string>
labelmap_t;
32 if (filename ==
"")
return NULL;
34 return fst::VectorFstRead<ArcT>(filename);
37 VectorFst<ArcT> *i =
new VectorFst<ArcT>;
43 std::vector<std::string> wrds;
44 boost::split(wrds, sentence, boost::is_any_of(
" "));
46 for (
unsigned k = 0; k < wrds.size(); ++k) {
47 unsigned n =i->AddState();
48 unsigned label = ucam::util::toNumber<unsigned>(wrds[k]);
49 i->AddArc(p,ArcT(label, label, ArcT::Weight::One(), n));
52 i->SetFinal(p, ArcT::Weight::One());
55 *i = DeterminizeFst<ArcT>(*i);
66 HypW (std::basic_string<unsigned>
const& h
67 , std::basic_string<unsigned>
const& oh
68 ,
typename Arc::Weight
const& c)
69 :
Hyp<Arc> (h, oh, c) {
87 os << std::setprecision(precision) << weight;
103 std::map<int,float> costs;
104 std::string separator (
",");
106 for (fst::SparseTupleWeightIterator<fst::TropicalWeight, int> it (weight);
107 !it.Done(); it.Next() ) {
108 costs[it.Value().first] += it.Value().second.Value();
112 for (std::map<int,float>::const_iterator itx=costs.begin();
113 itx != costs.end(); ++itx) {
114 os <<
" " << itx->first <<
":" << itx->second;
120 os <<
"0" << separator << costs.size();
121 for (std::map<int,float>::const_iterator itx=costs.begin()
124 os << separator << itx->first << separator << std::setprecision(precision) << itx->second;
131 for (std::map<int,float>::const_iterator itx=costs.begin()
134 if (itx->first < 1)
continue;
136 float fw = fws[itx->first - 1];
137 w = w + fw * itx->second;
139 os << std::setprecision(precision) << w;
143 std::size_t counter = 1;
145 for (std::map<int,float>::const_iterator itx=costs.begin()
148 if (itx->first < 1 )
continue;
149 std::size_t featureIndex = itx->first;
150 for (std::size_t featureMissingIndex = counter;
151 featureMissingIndex < featureIndex; ++featureMissingIndex) {
152 os << separator <<
"0";
155 os << separator << itx->second;
156 counter = itx->first + 1;
159 for (; counter <= nonSparseSize; ++counter) {
160 os << separator <<
"0";
168 for (
int k=0; k<h.size(); k++) {
169 if (h[k] ==
OOV)
continue;
170 if (h[k] ==
DR)
continue;
172 if (h[k] ==
SEP)
continue;
173 if (h[k] == 1)
continue;
174 if (h[k] == 2)
continue;
184 std::ostream& operator<< (std::ostream& os, const Hyp<Arc>& obj) {
185 for (
unsigned k = 0; k < obj.hyp.size(); ++k) {
186 if (obj.hyp[k] ==
OOV)
continue;
187 if (obj.hyp[k] ==
DR)
continue;
188 if (obj.hyp[k] ==
EPSILON)
continue;
189 if (obj.hyp[k] ==
SEP)
continue;
190 os << obj.hyp[k] <<
" ";
204 std::ostream& operator<< (std::ostream& os, const HypW<Arc>& obj) {
205 for (
unsigned k = 0; k < obj.hyp.size(); ++k) {
206 if (obj.hyp[k] ==
OOV)
continue;
207 if (obj.hyp[k] ==
DR)
continue;
208 if (obj.hyp[k] ==
EPSILON)
continue;
209 if (obj.hyp[k] ==
SEP)
continue;
211 if (itx !=
vmap.end() )
212 os <<
vmap[ obj.hyp[k] ] <<
" ";
214 os <<
"[" << obj.hyp[k] <<
"] ";
215 std::cerr <<
"\nWARNING: word map does not contain word " << obj.hyp[k] <<
226 template <
class Arc,
class HypT>
236 unsigned n = rg.
get<
unsigned> (
kNbest.c_str() );
237 boost::scoped_ptr<oszfstream> out;
242 if (printInputOutputLabels)
243 FORCELINFO(
"Printing input and output labels...");
245 std::string refFiles;
246 bool intRefs =
false;
248 bool sentbleu =
false;
261 LERROR(
"Must provide references to compute sentence level bleu");
275 LERROR(
"Sparse format and dot product are not available at the same time.");
289 LERROR(
"Must provide references to compute features for liblinear rankings");
308 boost::scoped_ptr<fst::VectorFst<Arc> > ifst (fst::VectorFstRead<Arc> (input (
311 if (old != output (ir->get() ) ) {
312 out.reset (
new oszfstream (output (ir->get() ) ) );
313 old = output (ir->get() );
315 if (!ifst->NumStates() ) {
316 *out <<
"[EMPTY]" << std::endl;
320 if (printOutputLabels)
321 fst::Project(&*ifst, PROJECT_OUTPUT);
322 else if (!printInputOutputLabels)
323 fst::Project(&*ifst, PROJECT_INPUT);
325 fst::VectorFst<Arc> nfst;
328 ShortestPath (*ifst, &nfst, 1, unique);
329 std::vector<HypT> hyps1;
330 fst::printStrings<Arc> (nfst, &hyps1);
337 boost::scoped_ptr< VectorFst<Arc> > intersection
338 (createIntersectionSpace<Arc>( intersectionLattice( ir->get() ) ));
341 if (intersection.get()) {
343 *ifst = ComposeFst<Arc>(*intersection, *ifst);
346 if (!ifst->NumStates() ) {
347 *out <<
"[EMPTY]" << std::endl;
355 fst::RmEpsilon<Arc>(&*ifst);
358 ShortestPath (*ifst, &nfst, n, unique );
360 std::vector<HypT> hyps;
361 fst::printStrings<Arc> (nfst, &hyps);
362 for (
unsigned k = 0; k < hyps.size(); ++k) {
373 *out->getStream() << sbleu <<
" qid:" << ir->get();
374 printWeight<Arc>(hyps[k].cost, *out->getStream());
375 *out->getStream() << std::endl;
378 if (printInputOutputLabels) {
379 for (
unsigned j = 0; j < hyps[k].hyp.size(); ++j)
380 if (hyps[k].
hyp[j] != 0)
381 *out->getStream() << hyps[k].hyp[j] <<
" ";
382 *out->getStream() <<
"\t";
383 for (
unsigned j = 0; j < hyps[k].ohyp.size(); ++j)
384 if (hyps[k].
ohyp[j] != 0)
385 *out->getStream() << hyps[k].ohyp[j] <<
" ";
387 *out->getStream() <<
"\t" << std::setprecision(
myPrecision) << hyps[k].cost;
389 *out->getStream() << hyps[k];
392 *out->getStream() <<
"\t" << sbStats <<
"\t" << sbleu;
393 *out->getStream() << std::endl;
399 FORCELINFO(
"Processed " << nlines <<
" files");
408 int main (
int argc,
const char* argv[] ) {
414 FORCELINFO ( rg.
dump (
"CONFIG parameters:\n=====================",
415 "=====================" ) );
426 LERROR(
"Sparse format and dot product are not available at the same time.");
435 const std::string& tuplearcWeights = rg.
exists (
438 if (tuplearcWeights.empty() ) {
439 LERROR (
"The tuplearc.weights option needs to be specified " 440 "for the tropical sparse tuple weight semiring " 441 "(--semiring=tuplearc)");
446 std::string
const& semiring = rg.
get<std::string>
454 while (f >> word >>
id) {
459 run<fst::StdArc, HypW<fst::StdArc> > (rg);
461 run<fst::LexStdArc, HypW<fst::LexStdArc> > (rg);
463 run<TupleArc32, HypW<TupleArc32> > (rg);
465 LERROR (
"Sorry, semiring option not correctly defined");
471 run<fst::StdArc, Hyp<fst::StdArc> > (rg);
473 run<fst::LexStdArc, Hyp<fst::LexStdArc> > (rg);
475 run<TupleArc32, Hyp<TupleArc32> > (rg);
477 LERROR (
"Sorry, semiring option not correctly defined");
Wrapper stream class that writes to pipes, text files or gzipped files.
std::string const kHifstSemiring
HypW(std::basic_string< unsigned > const &h, std::basic_string< unsigned > const &oh, typename Arc::Weight const &c)
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::string const kPrintOutputLabels
bool DetectFstFile(std::string const &filename, std::string const &extname="fst")
Detect trivially by extension whether it is an fst or not.
VectorFst< ArcT > * createIntersectionSpace(std::string const &filename)
int run(ucam::util::RegistryPO const &rg)
std::basic_string< unsigned > ohyp
std::string const kSentBleu
T get(const std::string &key) const
Returns parsed value associated to key.
static std::vector< T > & Params()
void initLogger(int argc, const char *argv[])
Inits logger, parses param options checking for –logger.verbose.
std::string const kExternalTokenizer
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
std::string const kIntRefs
std::string const kWeight
std::string const kOutput
int main(int argc, const char *argv[])
std::string const kWordRefs
Struct template that represents a hypothesis in a lattice.
Implements Tropical Sparse tuple weight semiring, extending from openfst SparsePowerWeight class...
labelmap_t::iterator labelmap_iterator_t
std::string const kPrintInputOutputLabels
Same as Hyp but the printing will convert integer ids to words.
std::string const kLabelMap
iszfstream & getline(iszfstream &izs, std::string &line)
std::string const kHifstSemiringLexStdArc
std::string const kHifstSemiringStdArc
Static variables for logger. Include only once from main file.
std::string getString(const std::string &key) const
Performs get<string> and checks whether the real value is to be loaded from file (–param=file://.....)
std::string const kHifstSemiringTupleArc
ucam::fsttools::SentenceIdx RemoveUnprintable(const ucam::fsttools::SentenceIdx &h)
void printWeight< TupleArc32 >(const TupleW32 &weight, std::ostream &os, unsigned precision)
Template specialization of printWeight for a tropical sparse tuple weight. Uses the global var sparse...
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
std::basic_string< unsigned > hyp
std::unordered_map< std::size_t, std::string > labelmap_t
void printWeight(typename Arc::Weight const &weight, std::ostream &os, unsigned precision=myPrecision)
Templated method that prints an arc weight. By default, reuses the operator<< already defined for eac...
std::string const kLibLinRankFormat
std::string dump(const std::string &decorator_start="", const std::string &decorator_end="")
Dumps all configuration parameters into a string with a reasonably pretty format. ...
std::string const kWeightPrecision
std::string const kTupleArcWeights
HypW(HypW< Arc > const &h)
std::string const kSuppress
const std::string kRangeOne
std::string const kSparseDotProduct
std::string const kSparseFormat
Wrapper stream class that reads pipes, text files or gzipped files.
std::string const kIntersectionWithHypothesesLoad
std::string const kUnique
Static variable for custom_assert. Include only once from main file.