29 #include "lm/model.hh" 46 namespace bfs = boost::filesystem;
50 fst::VectorFst<fst::StdArc> a, b, c;
54 a.AddArc ( 0, fst::StdArc (
DR,
DR, 0.5, 1 ) );
55 a.AddArc ( 0, fst::StdArc (
OOV,
OOV, 1.5, 1 ) );
57 a.AddArc ( 1, fst::StdArc (
OOV,
OOV, 0.5, 2 ) );
59 a.AddArc ( 2, fst::StdArc ( 3, 3, 0.5, 3 ) );
60 a.SetFinal ( 3, fst::StdArc::Weight::One() );
61 fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
66 "999999998 999999998 3 || 999999998 999999998 3 || 2.5\n999999999 999999998 3 || 999999999 999999998 3 || 1.5\n" );
81 EXPECT_EQ ( ss.str(),
"" );
88 TEST ( fstutils, multiepsiloncomposition ) {
89 fst::VectorFst<fst::StdArc> a, b;
93 a.AddArc ( 0, fst::StdArc (
DR,
DR, 0.5, 1 ) );
94 a.AddArc ( 0, fst::StdArc (
OOV,
OOV, 1.5, 1 ) );
96 a.AddArc ( 1, fst::StdArc (
OOV,
OOV, 0.5, 2 ) );
98 a.AddArc ( 2, fst::StdArc ( 3, 3, 0.5, 3 ) );
99 a.SetFinal ( 3, fst::StdArc::Weight::One() );
103 b.AddArc ( 0, fst::StdArc ( 3, 3, 0.5, 1 ) );
104 b.SetFinal ( 1, fst::StdArc::Weight::One() );
105 fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
107 std::vector<fst::StdArc::Label> epsilons;
108 epsilons.push_back (
DR );
109 epsilons.push_back (
OOV );
113 std::vector<pair< fst::StdArc::Label, fst::StdArc::Label> > ipairs;
114 std::vector<pair< fst::StdArc::Label, fst::StdArc::Label> > opairs;
115 opairs.push_back ( pair< fst::StdArc::Label, fst::StdArc::Label> (
OOV,
117 opairs.push_back ( pair< fst::StdArc::Label, fst::StdArc::Label> (
DR,
121 EXPECT_TRUE ( Equivalent ( fst::VectorFst<fst::StdArc>
122 ( fst::DeterminizeFst<fst::StdArc> ( fst::MultiEpsilonCompose<fst::StdArc> ( a,
124 fst::VectorFst<fst::StdArc> ( fst::DeterminizeFst<fst::StdArc>
125 ( fst::ProjectFst<fst::StdArc> ( fst::ComposeFst<fst::StdArc>
126 ( fst::RelabelFst<fst::StdArc> ( a, ipairs, opairs ), b ),
127 fst::PROJECT_INPUT ) ) ) ) );
131 TEST ( fstutils, phicompose ) {
132 fst::VectorFst<fst::StdArc> a, b, c;
136 a.AddArc ( 0, fst::StdArc (
DR,
DR, 0.5, 1 ) );
138 a.SetFinal ( 1, fst::StdArc::Weight::One() );
142 b.AddArc ( 0, fst::StdArc (
PHI,
PHI, 0, 1 ) );
144 b.AddArc ( 1, fst::StdArc (
DR,
DR, 0, 2 ) );
145 b.SetFinal ( 2, fst::StdArc::Weight::One() );
146 fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
152 c.AddArc ( 0, fst::StdArc (
DR,
DR, 0, 1 ) );
153 c.AddArc ( 1, fst::StdArc (
PHI,
PHI, 0, 1 ) );
154 c.SetFinal ( 1, fst::StdArc::Weight::One() );
159 TEST ( fstutils, applylmonthefly ) {
163 o <<
"\\data\\" << endl;
164 o <<
"ngram 1=4" << endl;
165 o <<
"ngram 2=2" << endl;
166 o <<
"ngram 3=1" << endl;
168 o <<
"\\1-grams:" << endl;
169 o <<
"-1\t3\t0" << endl;
170 o <<
"-10\t4\t0" << endl;
171 o <<
"-100\t</s>\t0" << endl;
172 o <<
"0\t<s>\t0" << endl;
174 o <<
"\\2-grams:" << endl;
175 o <<
"-1000\t3 4\t0" << endl;
176 o <<
"-10000\t4 </s>\t0" << endl;
178 o <<
"\\3-grams:" << endl;
179 o <<
"-100000\t3 4 </s>" << endl;
181 o <<
"\\end\\" << endl;
185 fst::VectorFst<fst::StdArc> a;
189 a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
191 a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
193 a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
195 a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
197 a.SetFinal ( 4, fst::StdArc::Weight::One() );
198 fst::VectorFst<fst::StdArc> c ( a );
200 fst::Map<fst::StdArc> ( &c, fst::RmWeightMapper<fst::StdArc>() );
201 std::unordered_set<fst::StdArc::Label> epsilons;
202 lm::ngram::Config kenlm_config;
205 kenlm_config.enumerate_vocab = &hev;
207 lm::ngram::Model *model =
new lm::ngram::Model (
"mylm" , kenlm_config);
211 fst::VectorFst<fst::StdArc> *output = f->
run(c);
212 EXPECT_TRUE ( Equivalent ( *output, a ) );
216 bfs::remove ( bfs::path (
"mylm" ) );
222 const fst::StdArc::Weight
operator() (
const fst::StdArc::Weight& w )
const {
223 return w.Value() != fst::StdArc::Weight::Zero() ? fst::StdArc::Weight::One() :
230 TEST ( fstutils, genericweightautomapper ) {
231 fst::VectorFst<fst::StdArc> a;
235 a.SetFinal ( 1, fst::StdArc::Weight::One() );
236 a.AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
237 fst::VectorFst<fst::StdArc> b ( a );
238 fst::Map<fst::StdArc> ( &b, fst::RmWeightMapper<fst::StdArc>() );
240 fst::Map<fst::StdArc> ( &a,
242 EXPECT_TRUE ( Equivalent ( a, b ) );
246 TEST ( fstutils, genericweightmapper ) {
247 fst::VectorFst<fst::StdArc> a;
251 a.SetFinal ( 1, fst::StdArc::Weight::One() );
252 a.AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
253 fst::VectorFst<fst::StdArc> b ( a );
254 fst::Map<fst::StdArc> ( &b, fst::RmWeightMapper<fst::StdArc>() );
255 fst::VectorFst<fst::StdArc> c;
260 EXPECT_TRUE ( Equivalent ( c, b ) );
263 TEST ( fstutils, multiunionrational ) {
264 fst::VectorFst<fst::StdArc> *a =
new fst::VectorFst<fst::StdArc>;
268 a->SetFinal ( 1, fst::StdArc::Weight::One() );
269 a->AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
270 fst::VectorFst<fst::StdArc> * b =
new fst::VectorFst<fst::StdArc>;
274 b->SetFinal ( 1, fst::StdArc::Weight::One() );
275 b->AddArc ( 0, fst::StdArc ( 100, 100, 0.1, 1 ) );
276 fst::VectorFst<fst::StdArc> * c =
new fst::VectorFst<fst::StdArc>;
280 c->SetFinal ( 1, fst::StdArc::Weight::One() );
281 c->AddArc ( 0, fst::StdArc ( 1000, 1000, 0.1, 1 ) );
282 fst::VectorFst<fst::StdArc> * d =
new fst::VectorFst<fst::StdArc>;
286 d->SetFinal ( 1, fst::StdArc::Weight::One() );
287 d->AddArc ( 0, fst::StdArc ( 10000, 10000, 0.1, 1 ) );
292 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j ( m() );
293 RmEpsilon ( j.get() );
296 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j2 ( m() );
297 RmEpsilon ( j2.get() );
302 EXPECT_TRUE ( Equivalent ( *j, *a ) );
305 EXPECT_TRUE ( Equivalent ( *j2, *a ) );
307 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j3 ( m() );
309 RmEpsilon ( j3.get() );
310 EXPECT_TRUE ( j3.get()->NumStates() != a->NumStates() );
313 TEST ( fstutils, multiunionreplace ) {
315 fst::VectorFst<fst::StdArc> *a =
new fst::VectorFst<fst::StdArc>;
319 a->SetFinal ( 1, fst::StdArc::Weight::One() );
320 a->AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
321 fst::VectorFst<fst::StdArc> * b =
new fst::VectorFst<fst::StdArc>;
325 b->SetFinal ( 1, fst::StdArc::Weight::One() );
326 b->AddArc ( 0, fst::StdArc ( 100, 100, 0.1, 1 ) );
327 fst::VectorFst<fst::StdArc> * c =
new fst::VectorFst<fst::StdArc>;
331 c->SetFinal ( 1, fst::StdArc::Weight::One() );
332 c->AddArc ( 0, fst::StdArc ( 1000, 1000, 0.1, 1 ) );
333 fst::VectorFst<fst::StdArc> * d =
new fst::VectorFst<fst::StdArc>;
337 d->SetFinal ( 1, fst::StdArc::Weight::One() );
338 d->AddArc ( 0, fst::StdArc ( 10000, 10000, 0.1, 1 ) );
343 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j ( m() );
344 RmEpsilon ( j.get() );
347 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j2 ( m() );
348 RmEpsilon ( j2.get() );
353 EXPECT_TRUE ( Equivalent ( *j, *a ) );
356 EXPECT_TRUE ( Equivalent ( *j2, *a ) );
358 boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j3 ( m() );
360 RmEpsilon ( j3.get() );
361 EXPECT_TRUE ( j3.get()->NumStates() != a->NumStates() );
364 TEST (fstutils, extractngrams ) {
365 fst::VectorFst<fst::StdArc> a, b;
369 a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
371 a.AddArc ( 1, fst::StdArc ( 2, 2, 0, 2 ) );
373 a.AddArc ( 2, fst::StdArc ( 3, 3, 0, 3 ) );
374 a.AddArc ( 1, fst::StdArc ( 5, 5, 0, 3 ) );
376 a.AddArc ( 3, fst::StdArc ( 4, 4, 0, 4 ) );
377 a.SetFinal ( 4, fst::StdArc::Weight::One() );
379 a.AddArc ( 2, fst::StdArc ( 6, 6, 0, 5 ) );
380 a.AddArc ( 5, fst::StdArc ( 7, 7, 0, 4 ) );
381 std::vector<fst::NGram> ng;
382 fst::extractNGrams<fst::StdArc> (a, ng, 5);
383 std::stringstream ss;
384 for (uint k = 0; k < ng.size(); ++k)
387 "1\n1 2\n1 2 3\n1 2 3 4\n1 2 6\n1 2 6 7\n1 5\n1 5 4\n2\n2 3\n2 3 4\n2 6\n2 6 7\n3\n3 4\n4\n5\n5 4\n6\n6 7\n7\n";
388 EXPECT_TRUE (ngrams == ss.str() );
389 fst::extractNGrams<fst::StdArc> (b, ng, 5);
393 fst::VectorFst<fst::StdArc> a, b;
397 a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
399 a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
401 a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
403 a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
404 a.SetFinal ( 4, fst::StdArc::Weight::One() );
405 fst::string2fst<fst::StdArc> (
"1 3 4 2", &b,
"", fst::StdArc::Weight (101001) );
406 EXPECT_EQ (Equivalent (a, b),
true);
408 fst::string2fst<fst::StdArc> (
"1 3 4 2", &b,
"1 3 4 2",
409 fst::StdArc::Weight (101001) );
410 EXPECT_EQ (Equivalent (a, b),
true);
414 TEST ( fstutils, relabelutil) {
415 fst::VectorFst<fst::StdArc> a;
419 a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
421 a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
423 a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
425 a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
426 a.SetFinal ( 4, fst::StdArc::Weight::One() );
428 fst::VectorFst<fst::StdArc> b (a);
430 EXPECT_EQ (Equivalent (rb (b), a),
433 std::vector<pair <fst::StdArc::Label, fst::StdArc::Label> > ipairs;
434 std::vector<pair <fst::StdArc::Label, fst::StdArc::Label> > opairs;
435 ipairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (3, 3000 ) );
436 ipairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (4, 4000 ) );
437 opairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (3, 3000) );
438 opairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (4, 4000) );
439 Relabel (&a, ipairs, opairs);
441 EXPECT_EQ (Equivalent (rb.
addIPL (3, 3000).addOPL (3, 3000).addIPL (4,
450 int main (
int argc,
char **argv ) {
451 ::testing::InitGoogleTest ( &argc, argv );
452 return RUN_ALL_TESTS();
Wrapper stream class that writes to pipes, text files or gzipped files.
Unit testing: google testing common header.
Convenience functors/functions for lexicographic<tropical,tropical> semiring.
Contains convenience functions to write and read fsts.
Implementation of different type of compositions (i.e. failure transitions)
ComposeFst< Arc > RPhiCompose(const Fst< Arc > &fstlhs, const Fst< Arc > &fstrhs, const typename Arc::Label kSpecialLabel)
Performs composition with PHI, based on OpenFST matchers PHI transitions are expected on fstrhs...
Extend EnumerateVocab to access kenlm ids.
RelabelUtil & addIPL(typename Arc::Label labelfind, typename Arc::Label labelreplace)
This class extends EnumerateVocab in kenlm code. This class creates a grammar-integer to lm-integer h...
fst::TropicalWeightTpl< F > Map(double)
templated Mapper that modifies weights over an FST, passing through the other values of the arc...
int main(int argc, char **argv)
void string2fst(const std::string &sidxwords, fst::VectorFst< Arc > *fst, const std::string &tidxwords="", typename Arc::Weight finalweight=Arc::Weight::One())
Convenience method that creates an fsa/fst from one/two string(s) of numbers.
TEST(fstutils, printstrings)
Implementations of multiple fst unions.
const fst::StdArc::Weight operator()(const fst::StdArc::Weight &w) const
This class creates the Union of an arbitrarily large number of fsts. This implementation was suggeste...
void printstrings(const fst::VectorFst< Arc > &pcostslat, std::ostream *hyps, unsigned s=0)
Trivial function that outputs all the hypothesis in the lattice with its cost.
Contains implementation of ApplyLanguageModelOnTheFly.
maps between grammar targets ids and lm ids
test-specific classes and functions
Static variables for logger. Include only once from main file.
templated Mapper that modifies weights when copying from one FST to another, passing through the othe...
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
Templated functor that creates a weight given a float.
Class that applies language model on the fly using kenlm.
Headers for standalone shared library.
Utility functor for relabeling one or more lattices. Note that you can chain commands. See Unit test in fstutils.gtest.cpp for an example.
void Add(boost::shared_ptr< Fst< Arc > const > fst)
Adds an fst to the list.
Generalized weight mapper functor.
VectorFst< Arc > * run(const VectorFst< Arc > &fst)
Multiepsilon composition.
Unit testing: google testing common header.
Static variable for custom_assert. Include only once from main file.
void close()
Closes the file.