Cambridge SMT System
fstutils.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #include <openfst.h>
22 #include <googletesting.h>
23 
24 #ifndef GMAINTEST
25 #include "main.custom_assert.hpp"
26 #include "main.logger.hpp"
27 #endif
28 
29 #include "lm/model.hh"
30 
33 
34 #include "fstutils.hpp"
35 #include "fstutils.ftcompose.hpp"
39 #include "fstutils.mapper.hpp"
40 #include "fstutils.multiunion.hpp"
41 #include "fstio.hpp"
42 
43 #include <idbridge.hpp>
45 
46 namespace bfs = boost::filesystem;
47 
48 //Testing recursive implementation of printstrings
49 TEST ( fstutils, printstrings ) {
50  fst::VectorFst<fst::StdArc> a, b, c;
51  a.AddState();
52  a.SetStart ( 0 );
53  a.AddState();
54  a.AddArc ( 0, fst::StdArc ( DR, DR, 0.5, 1 ) );
55  a.AddArc ( 0, fst::StdArc ( OOV, OOV, 1.5, 1 ) );
56  a.AddState();
57  a.AddArc ( 1, fst::StdArc ( OOV, OOV, 0.5, 2 ) );
58  a.AddState();
59  a.AddArc ( 2, fst::StdArc ( 3, 3, 0.5, 3 ) );
60  a.SetFinal ( 3, fst::StdArc::Weight::One() );
61  fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
62  {
63  std::stringstream ss;
64  printstrings ( a, &ss );
65  EXPECT_EQ ( ss.str(),
66  "999999998 999999998 3 || 999999998 999999998 3 || 2.5\n999999999 999999998 3 || 999999999 999999998 3 || 1.5\n" );
67  }
68  b.AddState();
69  b.SetStart ( 0 );
70  {
71  std::stringstream ss;
72  printstrings ( b, &ss );
73  EXPECT_EQ ( ss.str(),
74  "|| || 0\n" );
75  }
76  {
77  std::stringstream ss;
78  user_check_ok = true;
79  printstrings ( c, &ss ); //Test with empty lattice -- will complain.
80  EXPECT_EQ ( user_check_ok, false );
81  EXPECT_EQ ( ss.str(), "" );
82  user_check_ok = true;
83  }
84 }
85 
88 TEST ( fstutils, multiepsiloncomposition ) {
89  fst::VectorFst<fst::StdArc> a, b;
90  a.AddState();
91  a.SetStart ( 0 );
92  a.AddState();
93  a.AddArc ( 0, fst::StdArc ( DR, DR, 0.5, 1 ) );
94  a.AddArc ( 0, fst::StdArc ( OOV, OOV, 1.5, 1 ) );
95  a.AddState();
96  a.AddArc ( 1, fst::StdArc ( OOV, OOV, 0.5, 2 ) );
97  a.AddState();
98  a.AddArc ( 2, fst::StdArc ( 3, 3, 0.5, 3 ) );
99  a.SetFinal ( 3, fst::StdArc::Weight::One() );
100  b.AddState();
101  b.SetStart ( 0 );
102  b.AddState();
103  b.AddArc ( 0, fst::StdArc ( 3, 3, 0.5, 1 ) );
104  b.SetFinal ( 1, fst::StdArc::Weight::One() );
105  fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
107  std::vector<fst::StdArc::Label> epsilons;
108  epsilons.push_back ( DR );
109  epsilons.push_back ( OOV );
110  //Ready to use the multiepsilon compose
111  //Compare to traditional approach:
112  // Traverse a and relabel OOV and DR to epsilon
113  std::vector<pair< fst::StdArc::Label, fst::StdArc::Label> > ipairs;
114  std::vector<pair< fst::StdArc::Label, fst::StdArc::Label> > opairs;
115  opairs.push_back ( pair< fst::StdArc::Label, fst::StdArc::Label> ( OOV,
116  EPSILON ) );
117  opairs.push_back ( pair< fst::StdArc::Label, fst::StdArc::Label> ( DR,
118  EPSILON ) );
119  //Both approaches should be equivalent, right?
120  //test epsilon-free determinized lattices resulting from both methods...
121  EXPECT_TRUE ( Equivalent ( fst::VectorFst<fst::StdArc>
122  ( fst::DeterminizeFst<fst::StdArc> ( fst::MultiEpsilonCompose<fst::StdArc> ( a,
123  b, epsilons ) ) ),
124  fst::VectorFst<fst::StdArc> ( fst::DeterminizeFst<fst::StdArc>
125  ( fst::ProjectFst<fst::StdArc> ( fst::ComposeFst<fst::StdArc>
126  ( fst::RelabelFst<fst::StdArc> ( a, ipairs, opairs ), b ),
127  fst::PROJECT_INPUT ) ) ) ) );
128 };
129 
131 TEST ( fstutils, phicompose ) {
132  fst::VectorFst<fst::StdArc> a, b, c;
133  a.AddState();
134  a.SetStart ( 0 );
135  a.AddState();
136  a.AddArc ( 0, fst::StdArc ( DR, DR, 0.5, 1 ) );
137  a.AddState();
138  a.SetFinal ( 1, fst::StdArc::Weight::One() );
139  b.AddState();
140  b.SetStart ( 0 );
141  b.AddState();
142  b.AddArc ( 0, fst::StdArc ( PHI, PHI, 0, 1 ) );
143  b.AddState();
144  b.AddArc ( 1, fst::StdArc ( DR, DR, 0, 2 ) );
145  b.SetFinal ( 2, fst::StdArc::Weight::One() );
146  fst::ArcSort ( &a, fst::OLabelCompare<fst::StdArc>() );
147  EXPECT_TRUE ( Equivalent ( RPhiCompose ( a, b, PHI ), a ) );
148  //Testing special phi-transition mode: composition with a phi as a cyclic label in a state
149  c.AddState();
150  c.SetStart ( 0 );
151  c.AddState();
152  c.AddArc ( 0, fst::StdArc ( DR, DR, 0, 1 ) );
153  c.AddArc ( 1, fst::StdArc ( PHI, PHI, 0, 1 ) );
154  c.SetFinal ( 1, fst::StdArc::Weight::One() );
155  EXPECT_TRUE ( Equivalent ( fst::RPhiCompose ( a, c, PHI ), a ) );
156 };
157 
159 TEST ( fstutils, applylmonthefly ) {
160  {
161  ucam::util::oszfstream o ( "mylm" );
162  o << endl;
163  o << "\\data\\" << endl;
164  o << "ngram 1=4" << endl;
165  o << "ngram 2=2" << endl;
166  o << "ngram 3=1" << endl;
167  o << endl;
168  o << "\\1-grams:" << endl;
169  o << "-1\t3\t0" << endl;
170  o << "-10\t4\t0" << endl;
171  o << "-100\t</s>\t0" << endl;
172  o << "0\t<s>\t0" << endl;
173  o << endl;
174  o << "\\2-grams:" << endl;
175  o << "-1000\t3 4\t0" << endl;
176  o << "-10000\t4 </s>\t0" << endl;
177  o << endl;
178  o << "\\3-grams:" << endl;
179  o << "-100000\t3 4 </s>" << endl;
180  o << endl;
181  o << "\\end\\" << endl;
182  o.close();
183  }
184  //Build here the resulting lattice with the expected value
185  fst::VectorFst<fst::StdArc> a;
186  a.AddState();
187  a.SetStart ( 0 );
188  a.AddState();
189  a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
190  a.AddState();
191  a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
192  a.AddState();
193  a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
194  a.AddState();
195  a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
196  a.AddState();
197  a.SetFinal ( 4, fst::StdArc::Weight::One() );
198  fst::VectorFst<fst::StdArc> c ( a );
199  //Delete scores, apply lm on-the-fly and see if it matches!
200  fst::Map<fst::StdArc> ( &c, fst::RmWeightMapper<fst::StdArc>() );
201  std::unordered_set<fst::StdArc::Label> epsilons;
202  lm::ngram::Config kenlm_config;
205  kenlm_config.enumerate_vocab = &hev;
207  lm::ngram::Model *model = new lm::ngram::Model ( "mylm" , kenlm_config);
209  fst::ApplyLanguageModelOnTheFly<fst::StdArc> (*model, epsilons, false, 1 ,0 , idb, mw);
210 
211  fst::VectorFst<fst::StdArc> *output = f->run(c);
212  EXPECT_TRUE ( Equivalent ( *output, a ) );
213  delete model;
214  delete f;
215  delete output;
216  bfs::remove ( bfs::path ( "mylm" ) );
217 };
218 
219 namespace googletesting {
220 //Just for test purposes, a functor that would simply delete weights.
221 struct RemoveWeight {
222  const fst::StdArc::Weight operator() ( const fst::StdArc::Weight& w ) const {
223  return w.Value() != fst::StdArc::Weight::Zero() ? fst::StdArc::Weight::One() :
224  w;
225  };
226 };
227 
228 }
230 TEST ( fstutils, genericweightautomapper ) {
231  fst::VectorFst<fst::StdArc> a;
232  a.AddState();
233  a.AddState();
234  a.SetStart ( 0 );
235  a.SetFinal ( 1, fst::StdArc::Weight::One() );
236  a.AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
237  fst::VectorFst<fst::StdArc> b ( a );
238  fst::Map<fst::StdArc> ( &b, fst::RmWeightMapper<fst::StdArc>() );
240  fst::Map<fst::StdArc> ( &a,
242  EXPECT_TRUE ( Equivalent ( a, b ) );
243 }
244 
246 TEST ( fstutils, genericweightmapper ) {
247  fst::VectorFst<fst::StdArc> a;
248  a.AddState();
249  a.AddState();
250  a.SetStart ( 0 );
251  a.SetFinal ( 1, fst::StdArc::Weight::One() );
252  a.AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
253  fst::VectorFst<fst::StdArc> b ( a );
254  fst::Map<fst::StdArc> ( &b, fst::RmWeightMapper<fst::StdArc>() );
255  fst::VectorFst<fst::StdArc> c;
257  fst::Map ( a, &c,
259  ( rw ) );
260  EXPECT_TRUE ( Equivalent ( c, b ) );
261 }
262 
263 TEST ( fstutils, multiunionrational ) {
264  fst::VectorFst<fst::StdArc> *a = new fst::VectorFst<fst::StdArc>;
265  a->AddState();
266  a->AddState();
267  a->SetStart ( 0 );
268  a->SetFinal ( 1, fst::StdArc::Weight::One() );
269  a->AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
270  fst::VectorFst<fst::StdArc> * b = new fst::VectorFst<fst::StdArc>;
271  b->AddState();
272  b->AddState();
273  b->SetStart ( 0 );
274  b->SetFinal ( 1, fst::StdArc::Weight::One() );
275  b->AddArc ( 0, fst::StdArc ( 100, 100, 0.1, 1 ) );
276  fst::VectorFst<fst::StdArc> * c = new fst::VectorFst<fst::StdArc>;
277  c->AddState();
278  c->AddState();
279  c->SetStart ( 0 );
280  c->SetFinal ( 1, fst::StdArc::Weight::One() );
281  c->AddArc ( 0, fst::StdArc ( 1000, 1000, 0.1, 1 ) );
282  fst::VectorFst<fst::StdArc> * d = new fst::VectorFst<fst::StdArc>;
283  d->AddState();
284  d->AddState();
285  d->SetStart ( 0 );
286  d->SetFinal ( 1, fst::StdArc::Weight::One() );
287  d->AddArc ( 0, fst::StdArc ( 10000, 10000, 0.1, 1 ) );
289  m.Add ( a );
290  m.Add ( b );
291  m.Add ( c );
292  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j ( m() );
293  RmEpsilon ( j.get() );
295  m.Add ( d );
296  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j2 ( m() );
297  RmEpsilon ( j2.get() );
299  Union ( a, *b );
300  Union ( a, *c );
301  RmEpsilon ( a );
302  EXPECT_TRUE ( Equivalent ( *j, *a ) );
303  Union ( a, *d );
304  RmEpsilon ( a );
305  EXPECT_TRUE ( Equivalent ( *j2, *a ) );
307  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j3 ( m() );
309  RmEpsilon ( j3.get() );
310  EXPECT_TRUE ( j3.get()->NumStates() != a->NumStates() );
311 }
312 
313 TEST ( fstutils, multiunionreplace ) {
315  fst::VectorFst<fst::StdArc> *a = new fst::VectorFst<fst::StdArc>;
316  a->AddState();
317  a->AddState();
318  a->SetStart ( 0 );
319  a->SetFinal ( 1, fst::StdArc::Weight::One() );
320  a->AddArc ( 0, fst::StdArc ( 10, 10, 0.5, 1 ) );
321  fst::VectorFst<fst::StdArc> * b = new fst::VectorFst<fst::StdArc>;
322  b->AddState();
323  b->AddState();
324  b->SetStart ( 0 );
325  b->SetFinal ( 1, fst::StdArc::Weight::One() );
326  b->AddArc ( 0, fst::StdArc ( 100, 100, 0.1, 1 ) );
327  fst::VectorFst<fst::StdArc> * c = new fst::VectorFst<fst::StdArc>;
328  c->AddState();
329  c->AddState();
330  c->SetStart ( 0 );
331  c->SetFinal ( 1, fst::StdArc::Weight::One() );
332  c->AddArc ( 0, fst::StdArc ( 1000, 1000, 0.1, 1 ) );
333  fst::VectorFst<fst::StdArc> * d = new fst::VectorFst<fst::StdArc>;
334  d->AddState();
335  d->AddState();
336  d->SetStart ( 0 );
337  d->SetFinal ( 1, fst::StdArc::Weight::One() );
338  d->AddArc ( 0, fst::StdArc ( 10000, 10000, 0.1, 1 ) );
340  m.Add ( a );
341  m.Add ( b );
342  m.Add ( c );
343  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j ( m() );
344  RmEpsilon ( j.get() );
346  m.Add ( d );
347  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j2 ( m() );
348  RmEpsilon ( j2.get() );
350  Union ( a, *b );
351  Union ( a, *c );
352  RmEpsilon ( a );
353  EXPECT_TRUE ( Equivalent ( *j, *a ) );
354  Union ( a, *d );
355  RmEpsilon ( a );
356  EXPECT_TRUE ( Equivalent ( *j2, *a ) );
358  boost::scoped_ptr< fst::VectorFst<fst::StdArc> > j3 ( m() );
360  RmEpsilon ( j3.get() );
361  EXPECT_TRUE ( j3.get()->NumStates() != a->NumStates() );
362 }
363 
364 TEST (fstutils, extractngrams ) {
365  fst::VectorFst<fst::StdArc> a, b;
366  a.AddState();
367  a.SetStart ( 0 );
368  a.AddState();
369  a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
370  a.AddState();
371  a.AddArc ( 1, fst::StdArc ( 2, 2, 0, 2 ) );
372  a.AddState();
373  a.AddArc ( 2, fst::StdArc ( 3, 3, 0, 3 ) );
374  a.AddArc ( 1, fst::StdArc ( 5, 5, 0, 3 ) );
375  a.AddState();
376  a.AddArc ( 3, fst::StdArc ( 4, 4, 0, 4 ) );
377  a.SetFinal ( 4, fst::StdArc::Weight::One() );
378  a.AddState();
379  a.AddArc ( 2, fst::StdArc ( 6, 6, 0, 5 ) );
380  a.AddArc ( 5, fst::StdArc ( 7, 7, 0, 4 ) );
381  std::vector<fst::NGram> ng;
382  fst::extractNGrams<fst::StdArc> (a, ng, 5);
383  std::stringstream ss;
384  for (uint k = 0; k < ng.size(); ++k)
385  ss << ng[k] << endl;
386  std::string ngrams =
387  "1\n1 2\n1 2 3\n1 2 3 4\n1 2 6\n1 2 6 7\n1 5\n1 5 4\n2\n2 3\n2 3 4\n2 6\n2 6 7\n3\n3 4\n4\n5\n5 4\n6\n6 7\n7\n";
388  EXPECT_TRUE (ngrams == ss.str() );
389  fst::extractNGrams<fst::StdArc> (b, ng, 5);
390 }
391 
392 TEST ( fstutils, string2fst) {
393  fst::VectorFst<fst::StdArc> a, b;
394  a.AddState();
395  a.SetStart ( 0 );
396  a.AddState();
397  a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
398  a.AddState();
399  a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
400  a.AddState();
401  a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
402  a.AddState();
403  a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
404  a.SetFinal ( 4, fst::StdArc::Weight::One() );
405  fst::string2fst<fst::StdArc> ("1 3 4 2", &b, "", fst::StdArc::Weight (101001) );
406  EXPECT_EQ (Equivalent (a, b), true);
407  b.DeleteStates();
408  fst::string2fst<fst::StdArc> ("1 3 4 2", &b, "1 3 4 2",
409  fst::StdArc::Weight (101001) );
410  EXPECT_EQ (Equivalent (a, b), true);
411 }
412 
413 //Test relabeutil functor -- just a wrapper for relabeling fsts
414 TEST ( fstutils, relabelutil) {
415  fst::VectorFst<fst::StdArc> a;
416  a.AddState();
417  a.SetStart ( 0 );
418  a.AddState();
419  a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
420  a.AddState();
421  a.AddArc ( 1, fst::StdArc ( 3, 3, 1, 2 ) );
422  a.AddState();
423  a.AddArc ( 2, fst::StdArc ( 4, 4, 1000, 3 ) );
424  a.AddState();
425  a.AddArc ( 3, fst::StdArc ( 2, 2, 100000, 4 ) );
426  a.SetFinal ( 4, fst::StdArc::Weight::One() );
428  fst::VectorFst<fst::StdArc> b (a);
429  //Empty should be identical to a, of course...
430  EXPECT_EQ (Equivalent (rb (b), a),
431  true);
433  std::vector<pair <fst::StdArc::Label, fst::StdArc::Label> > ipairs;
434  std::vector<pair <fst::StdArc::Label, fst::StdArc::Label> > opairs;
435  ipairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (3, 3000 ) );
436  ipairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (4, 4000 ) );
437  opairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (3, 3000) );
438  opairs.push_back (pair <fst::StdArc::Label, fst::StdArc::Label> (4, 4000) );
439  Relabel (&a, ipairs, opairs);
440  //and test...
441  EXPECT_EQ (Equivalent (rb.addIPL (3, 3000).addOPL (3, 3000).addIPL (4,
442  4000).addOPL (4,
443  4000) (b),
444  a),
445  true);
446 }
447 
448 #ifndef GMAINTEST
449 
450 int main ( int argc, char **argv ) {
451  ::testing::InitGoogleTest ( &argc, argv );
452  return RUN_ALL_TESTS();
453 }
454 #endif
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
Unit testing: google testing common header.
Convenience functors/functions for lexicographic<tropical,tropical> semiring.
Contains convenience functions to write and read fsts.
Implementation of different type of compositions (i.e. failure transitions)
ComposeFst< Arc > RPhiCompose(const Fst< Arc > &fstlhs, const Fst< Arc > &fstrhs, const typename Arc::Label kSpecialLabel)
Performs composition with PHI, based on OpenFST matchers PHI transitions are expected on fstrhs...
Extend EnumerateVocab to access kenlm ids.
RelabelUtil & addIPL(typename Arc::Label labelfind, typename Arc::Label labelreplace)
Definition: fstutils.hpp:511
This class extends EnumerateVocab in kenlm code. This class creates a grammar-integer to lm-integer h...
fst::TropicalWeightTpl< F > Map(double)
templated Mapper that modifies weights over an FST, passing through the other values of the arc...
#define DR
int main(int argc, char **argv)
bool user_check_ok
#define PHI
void string2fst(const std::string &sidxwords, fst::VectorFst< Arc > *fst, const std::string &tidxwords="", typename Arc::Weight finalweight=Arc::Weight::One())
Convenience method that creates an fsa/fst from one/two string(s) of numbers.
Definition: fstutils.hpp:463
TEST(FstIo, basic_test)
Definition: fstio.gtest.cpp:38
TEST(fstutils, printstrings)
Implementations of multiple fst unions.
const fst::StdArc::Weight operator()(const fst::StdArc::Weight &w) const
This class creates the Union of an arbitrarily large number of fsts. This implementation was suggeste...
void printstrings(const fst::VectorFst< Arc > &pcostslat, std::ostream *hyps, unsigned s=0)
Trivial function that outputs all the hypothesis in the lattice with its cost.
Definition: fstutils.hpp:253
Contains implementation of ApplyLanguageModelOnTheFly.
maps between grammar targets ids and lm ids
test-specific classes and functions
Definition: fstio.gtest.cpp:34
Static variables for logger. Include only once from main file.
templated Mapper that modifies weights when copying from one FST to another, passing through the othe...
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
Templated functor that creates a weight given a float.
Class that applies language model on the fly using kenlm.
Headers for standalone shared library.
Utility functor for relabeling one or more lattices. Note that you can chain commands. See Unit test in fstutils.gtest.cpp for an example.
Definition: fstutils.hpp:503
#define EPSILON
void Add(boost::shared_ptr< Fst< Arc > const > fst)
Adds an fst to the list.
Generalized weight mapper functor.
VectorFst< Arc > * run(const VectorFst< Arc > &fst)
#define OOV
Multiepsilon composition.
Unit testing: google testing common header.
Static variable for custom_assert. Include only once from main file.
void close()
Closes the file.
Definition: szfstream.hpp:323