Cambridge SMT System
hifst.task.referencefilter.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #include <openfst.h>
22 #include <googletesting.h>
23 
24 #ifndef GMAINTEST
25 #include "main.custom_assert.hpp"
26 #include "main.logger.hpp"
27 #endif
28 
29 #include "addresshandler.hpp"
30 #include "taskinterface.hpp"
31 
32 #include "tropical-sparse-tuple-weight.h"
33 #include "tropical-sparse-tuple-weight-decls.h"
35 
39 
40 #include "fstio.hpp"
41 #include "fstutils.hpp"
42 #include "fstutils.mapper.hpp"
43 #include "task.referencefilter.hpp"
44 
45 using boost::any_cast;
46 namespace bfs = boost::filesystem;
47 namespace uh = ucam::hifst;
48 
49 namespace googletesting {
50 
53  public:
54  unsigned sidx;
55  typedef fst::LexicographicArc< fst::StdArc::Weight, fst::StdArc::Weight> Arc;
56  typedef fst::LexicographicWeight<fst::StdArc::Weight, fst::StdArc::Weight>
58 
59  std::vector < fst::VectorFst<Arc> *> filters;
60  unordered_set<std::string> tvcb;
61  unordered_map<std::string, fst::VectorFst<Arc> *> fsts;
62 };
63 
65 TEST ( HifstReferenceFilter, basic_test ) {
66  //Create test fst file expecto.fst.
67  typedef fst::LexicographicArc< fst::StdArc::Weight, fst::StdArc::Weight> Arc;
68  typedef fst::LexicographicWeight<fst::StdArc::Weight, fst::StdArc::Weight>
69  Weight;
70  fst::VectorFst<Arc> aux;
72  aux.AddState();
73  aux.AddState();
74  aux.AddState();
75  aux.SetStart ( 0 );
76  aux.SetFinal ( 2, Arc::Weight::One() );
77  aux.AddArc ( 0, Arc ( 10, 10, mw ( 0 ), 1 ) );
78  aux.AddArc ( 1, Arc ( 100, 100, mw ( 0 ), 2 ) );
79  fst::FstWrite ( aux, "expecto.fst" );
80  //Prepare RegistryPO object.
81  unordered_map<std::string, boost::any> v;
82  v[HifstConstants::kReferencefilterLoad] = std::string ( "expecto.fst" );
83  v[HifstConstants::kReferencefilterWrite] = std::string ( "" );
84  v[HifstConstants::kReferencefilterSubstring] = std::string ("yes");
86  std::numeric_limits<float>::max() );
88  std::numeric_limits<unsigned>::max() );
90  v[HifstConstants::kHifstSemiring] = std::string("lexstdarc");
92  d.sidx = 0;
93  const uu::RegistryPO rg ( v );
95  EXPECT_EQ ( rft.getBuilt(), false );
96  EXPECT_EQ ( rft.getDisableSubString(), false );
97  EXPECT_EQ ( rft.getWeight(), std::numeric_limits<float>::max() );
98  EXPECT_EQ ( rft.getShortestPath(), std::numeric_limits<unsigned>::max() );
99  EXPECT_EQ ( rft.getVocabulary().size(), 0 );
100  EXPECT_EQ ( rft.getTranslationLatticeFile(), "expecto.fst" );
101  rft.run ( d );
102  EXPECT_EQ ( rft.getBuilt(), true );
103  ASSERT_EQ ( d.tvcb.size(), 2 );
104  EXPECT_TRUE ( d.tvcb.find ( "10" ) != d.tvcb.end() );
105  EXPECT_TRUE ( d.tvcb.find ( "100" ) != d.tvcb.end() );
106  ASSERT_EQ ( d.filters.size(), 1 );
107  EXPECT_EQ ( d.filters[0]->NumStates(), 3 );
108  std::stringstream ss;
109  fst::PrintFst ( *d.filters[0], &ss );
110  LDEBUG("ss=" << ss.str());
111  EXPECT_TRUE ( ss.str() == "0\t1\t10\t10\n0\t2\t100\t100\n0\n1\t2\t100\t100\n1\n2\n"
112  || ss.str() == "0\t1\t10\t0,0\n0\t2\t100\t0,0\n0\t0,0\n1\t2\t100\t0,0\n1\t0,0\n2\t0,0\n" ); //openfst 1.5.0
113  bfs::remove ( bfs::path ( "expecto.fst" ) );
114 };
115 
117 TEST ( HifstReferenceFilter, empty ) {
118  //Prepare RegistryPO object.
119  unordered_map<std::string, boost::any> v;
120  v[HifstConstants::kReferencefilterLoad] = std::string ( "" );
121  v[HifstConstants::kReferencefilterWrite] = std::string ( "" );
122  v[HifstConstants::kReferencefilterSubstring] = std::string ("yes");
124  std::numeric_limits<float>::max() );
126  std::numeric_limits<unsigned>::max() );
127  v[HifstConstants::kReferencefilterLoadSemiring] = std::string("lexstdarc");
128  v[HifstConstants::kHifstSemiring] = std::string("lexstdarc");
130  d.sidx = 0;
131  const uu::RegistryPO rg ( v );
133  rft.run ( d );
134  EXPECT_EQ ( rft.getBuilt(), false );
135  ASSERT_EQ ( d.filters.size(), 0 );
136 };
137 
138 };
139 
140 #ifndef GMAINTEST
141 
142 int main ( int argc, char **argv ) {
143  ::testing::InitGoogleTest ( &argc, argv );
144  return RUN_ALL_TESTS();
145 }
146 #endif
std::string const kHifstSemiring
Handles simple wildcard expansion for strings.
bool run(Data &d)
Runs... Load substring lattice and add pointer in data object.
Unit testing: google testing common header.
Convenience functors/functions for lexicographic<tropical,tropical> semiring.
Contains convenience functions to write and read fsts.
Public class with variables required by TextTask to compile and run.
const std::string kReferencefilterSubstring
fst::LexicographicArc< fst::StdArc::Weight, fst::StdArc::Weight > Arc
Convenience functions for tropical sparse vector weight.
#define LDEBUG(msg)
Generates a substring version of a reference translation lattice and associated vocabulary. This substring fst is typically used to guide translation towards a particular search space. The associated vocabulary can be used e.g. to restrict parsing algorithms.
Interfaces with basic methods for iteration.
unordered_map< std::string, fst::VectorFst< Arc > * > fsts
TEST(FstIo, basic_test)
Definition: fstio.gtest.cpp:38
fst::LexicographicWeight< fst::StdArc::Weight, fst::StdArc::Weight > Weight
const std::string kReferencefilterLoadSemiring
const std::string kReferencefilterPrunereferenceweight
test-specific classes and functions
Definition: fstio.gtest.cpp:34
Static variables for logger. Include only once from main file.
void FstWrite(const Fst< Arc > &fst, const std::string &filename, const std::string &txtname="txt")
Templated method that writes an fst either in binary or text format.
Definition: fstio.hpp:111
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
Templated functor that creates a weight given a float.
Headers for standalone shared library.
void PrintFst(const Fst< Arc > &fst, std::ostream *os)
Templated method that writes an fst in openfst text format to a stream.
Definition: fstio.hpp:35
int main(int argc, char **argv)
Generalized weight mapper functor.
Convenience functors that allow transparent handling for weights within hifst.
const std::string kReferencefilterWrite
const std::string kReferencefilterPrunereferenceshortestpath
std::vector< fst::VectorFst< Arc > * > filters
Describes class ReferenceFilterTask (builds unweighted substring fst for lattice alignment ) ...
const std::string kReferencefilterLoad
Unit testing: google testing common header.
const unordered_set< std::string > & getVocabulary()
Static variable for custom_assert. Include only once from main file.