Cambridge SMT System
lmbr.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Adrià de Gispert, William Byrne
14 
21 #include <openfst.h>
22 #include <googletesting.h>
23 
24 #ifndef GMAINTEST
25 #include "main.custom_assert.hpp"
26 #include "main.logger.hpp"
27 #endif
28 
31 
32 #include "range.hpp"
33 #include "fstio.hpp"
34 #include "fstutils.hpp"
35 #include "fstutils.mapper.hpp"
36 #include "fstutils.ftcompose.hpp"
38 
39 #include "taskinterface.hpp"
40 
41 #include "data.lmbr.hpp"
42 #include "task.lmbr.hpp"
43 
44 namespace bfs = boost::filesystem;
45 
46 namespace ul = ucam::lmbr;
47 
48 //Test the ngram extraction wrapper -- the test is pretty much the same as basic ngram extraction
49 TEST (lmbr, extractngrams) {
50  fst::VectorFst<fst::StdArc> a, b;
51  a.AddState();
52  a.SetStart ( 0 );
53  a.AddState();
54  a.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
55  a.AddState();
56  a.AddArc ( 1, fst::StdArc ( 2, 2, 0, 2 ) );
57  a.AddState();
58  a.AddArc ( 2, fst::StdArc ( 3, 3, 0, 3 ) );
59  a.AddArc ( 1, fst::StdArc ( 5, 5, 0, 3 ) );
60  a.AddState();
61  a.AddArc ( 3, fst::StdArc ( 4, 4, 0, 4 ) );
62  a.SetFinal ( 4, fst::StdArc::Weight::One() );
63  a.AddState();
64  a.AddArc ( 2, fst::StdArc ( 6, 6, 0, 5 ) );
65  a.AddArc ( 5, fst::StdArc ( 7, 7, 0, 4 ) );
66  std::vector<fst::NGramList> ng;
67  ul::extractNGrams<fst::StdArc> (a, ng);
68  std::stringstream ss;
69  for (unsigned k = 0; k < ng.size(); ++k)
70  for (fst::NGramList::iterator itx = ng[k].begin(); itx != ng[k].end(); ++itx) {
71  ss << itx->first << endl;
72  EXPECT_TRUE (itx->second == fst::StdArc::Weight::One() );
73  }
74  // 2 refs due to sorting
75  std::string ngrams = "1\n2\n3\n4\n5\n6\n7\n6 7\n2 3\n1 5\n2 6\n1 2\n3 4\n5 4\n1 2 6\n1 5 4\n2 6 7\n2 3 4\n1 2 3\n1 2 3 4\n1 2 6 7\n";
76  std::string ngrams2 = "7\n6\n5\n4\n3\n2\n1\n5 4\n3 4\n6 7\n2 3\n1 5\n2 6\n1 2\n2 6 7\n2 3 4\n1 5 4\n1 2 6\n1 2 3\n1 2 6 7\n1 2 3 4\n";
77 
78  LDEBUG("ss=" << ss.str());
79  EXPECT_TRUE (ngrams == ss.str() || ngrams2 == ss.str() );
80  unsigned size = ul::extractNGrams<fst::StdArc> (b, ng);
81  EXPECT_TRUE (size == 0);
82 }
83 
84 TEST (lmbr, theta) {
85  ul::Theta theta;
86  EXPECT_EQ (theta (0), 0.0f);
87  for (unsigned k = 1; k <= 4; ++k)
88  EXPECT_EQ (theta (k), 1);
89 }
90 
91 TEST (lmbr, computeapplyposteriors) {
92  //Apply posteriors to a trivial hypotheses space and test scores.
93  //Read evidence space = hypotheses space!
94  fst::VectorFst<fst::StdArc> myfst;
95  myfst.AddState();
96  myfst.SetStart (0);
97  myfst.AddState();
98  myfst.AddArc (0, fst::StdArc (1, 1, 37.6542969, 1) );
99  myfst.AddState();
100  myfst.AddArc (1, fst::StdArc (933, 933, 0, 2) );
101  myfst.AddState();
102  myfst.AddArc (2, fst::StdArc (18, 18, 4.73828125, 3) );
103  myfst.AddState();
104  myfst.AddArc (2, fst::StdArc (150, 150, 0, 4) );
105  myfst.AddState();
106  myfst.AddArc (2, fst::StdArc (226, 226, 5.52148438, 5) );
107  myfst.AddArc (2, fst::StdArc (508, 508, 8.22265625, 5) );
108  myfst.AddArc (3, fst::StdArc (24, 24, 0, 5) );
109  myfst.AddState();
110  myfst.AddArc (5, fst::StdArc (150, 150, 0, 6) );
111  myfst.AddState();
112  myfst.AddArc (4, fst::StdArc (2, 2, 0, 7) );
113  myfst.AddArc (4, fst::StdArc (23, 23, 5.90625, 7) );
114  myfst.AddArc (6, fst::StdArc (2, 2, 0, 7) );
115  myfst.SetFinal (7, fst::StdArc::Weight::One() );
116  std::vector<fst::NGramList> ng;
117  //Extract ngrams
118  ul::extractNGrams<fst::StdArc> (myfst, ng);
119  //Compute posteriors
120  ul::ComputePosteriors cp (ng);
121  cp (&myfst);
122  {
123  std::stringstream ss;
124  uu::oszfstream j (ss);
125  cp.WritePosteriors (j);
126  std::string expectedposteriors =
127  "0.002680170144\t23 \n0.999996857723\t1 \n0.000264338191\t508 \n0.008617965440\t24 \n0.997316613516\t2 \n0.003937893375\t226 \n0.999996857723\t150 \n0.008617965440\t18 \n0.999996857723\t933 \n0.003937893375\t226 150 \n0.997316613516\t150 2 \n0.008617965440\t18 24 \n0.003937893375\t933 226 \n0.000264338191\t933 508 \n0.987176612683\t933 150 \n0.008617965440\t933 18 \n0.999996857723\t1 933 \n0.000264338191\t508 150 \n0.008617965440\t24 150 \n0.002680170144\t150 23 \n0.000264338191\t933 508 150 \n0.008617965440\t18 24 150 \n0.984496437005\t933 150 2 \n0.003937893375\t226 150 2 \n0.987176612683\t1 933 150 \n0.003937893375\t1 933 226 \n0.000264338191\t1 933 508 \n0.003937893375\t933 226 150 \n0.008617965440\t24 150 2 \n0.000264338191\t508 150 2 \n0.008617965440\t933 18 24 \n0.002680170144\t933 150 23 \n0.008617965440\t1 933 18 \n0.008617965440\t1 933 18 24 \n0.008617965440\t933 18 24 150 \n0.003937893375\t1 933 226 150 \n0.008617965440\t18 24 150 2 \n0.002680170144\t1 933 150 23 \n0.003937893375\t933 226 150 2 \n0.000264338191\t1 933 508 150 \n0.000264338191\t933 508 150 2 \n0.984496437005\t1 933 150 2 \n";
128 
129  // sorting differences:
130  std::string expectedposteriors2 =
131  "0.999996857723\t933 \n0.003937893375\t226 \n0.999996857723\t150 \n0.008617965440\t18 \n0.000264338191\t508 \n0.008617965440\t24 \n0.997316613516\t2 \n0.002680170144\t23 \n0.999996857723\t1 \n0.000264338191\t933 508 \n0.999996857723\t1 933 \n0.997316613516\t150 2 \n0.003937893375\t226 150 \n0.008617965440\t24 150 \n0.000264338191\t508 150 \n0.002680170144\t150 23 \n0.008617965440\t933 18 \n0.008617965440\t18 24 \n0.987176612683\t933 150 \n0.003937893375\t933 226 \n0.000264338191\t933 508 150 \n0.003937893375\t933 226 150 \n0.002680170144\t933 150 23 \n0.008617965440\t1 933 18 \n0.000264338191\t1 933 508 \n0.987176612683\t1 933 150 \n0.003937893375\t1 933 226 \n0.984496437005\t933 150 2 \n0.008617965440\t18 24 150 \n0.008617965440\t24 150 2 \n0.008617965440\t933 18 24 \n0.000264338191\t508 150 2 \n0.003937893375\t226 150 2 \n0.000264338191\t933 508 150 2 \n0.003937893375\t933 226 150 2 \n0.000264338191\t1 933 508 150 \n0.008617965440\t933 18 24 150 \n0.003937893375\t1 933 226 150 \n0.008617965440\t18 24 150 2 \n0.002680170144\t1 933 150 23 \n0.984496437005\t1 933 150 2 \n0.008617965440\t1 933 18 24 \n";
132 
133  EXPECT_TRUE (static_cast<std::stringstream *> (j.getStream() )->str() == expectedposteriors
134  || static_cast<std::stringstream *> (j.getStream() )->str() == expectedposteriors2
135  );
136  }
137  ul::Theta theta;
138  ul::NGramToPosteriorsMapper& pst = cp.getPosteriors();
139  ul::ApplyPosteriors ap (ng, pst, theta);
140  fst::Map (&myfst, fst::RmWeightMapper<fst::StdArc>() );
141  fst::VectorFst<fst::StdArc> *output = ap (myfst);
142  myfst = *output;
143  //Just check 1-best
144  fst::ShortestPath (myfst, output);
145  fst::TopSort (output);
146  // output->Write("output-final.fst");
147  {
148  std::stringstream ss;
149  fst::printstrings (*output, &ss);
150  EXPECT_EQ (ss.str(), "1 933 150 2 || 1 933 150 2 || -9.93797\n");
151  }
152  delete output;
153 }
154 
155 #ifndef GMAINTEST
156 
157 int main ( int argc, char **argv ) {
158  ::testing::InitGoogleTest ( &argc, argv );
159  return RUN_ALL_TESTS();
160 }
161 #endif
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
Unit testing: google testing common header.
std::ostream * getStream()
Returns internal stream.
Definition: szfstream.hpp:241
Convenience functors/functions for lexicographic<tropical,tropical> semiring.
Contains convenience functions to write and read fsts.
Implementation of different type of compositions (i.e. failure transitions)
fst::TropicalWeightTpl< F > Map(double)
#define LDEBUG(msg)
unordered_map< fst::NGram, std::vector< std::vector< Posterior > >, ucam::util::hashfvecuint, ucam::util::hasheqvecuint > NGramToPosteriorsMapper
Definition: data.lmbr.hpp:35
Interfaces with basic methods for iteration.
Functor handling LMBR theta parameters.
void printstrings(const fst::VectorFst< Arc > &pcostslat, std::ostream *hyps, unsigned s=0)
Trivial function that outputs all the hypothesis in the lattice with its cost.
Definition: fstutils.hpp:253
Static variables for logger. Include only once from main file.
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
Headers for standalone shared library.
Lattice MBR task – integrates lattice mbr as a task that can be used standalone (implemented) or inc...
Generalized weight mapper functor.
int main(int argc, char **argv)
Definition: lmbr.gtest.cpp:157
Functor that applies posteriors to any hypothesis space. Initializes with previously calculated poste...
Handles different type of integer ranges.
TEST(lmbr, extractngrams)
Definition: lmbr.gtest.cpp:49
Unit testing: google testing common header.
Static variable for custom_assert. Include only once from main file.