Cambridge SMT System
hifst.task.cykparser.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #include <googletesting.h>
22 
23 #ifndef GMAINTEST
24 #include "main.custom_assert.hpp"
25 #include "main.logger.hpp"
26 #endif
27 
28 #include "params.hpp"
29 #include "wordmapper.hpp"
30 
31 #include "addresshandler.hpp"
32 #include "taskinterface.hpp"
33 
34 #include "defs.grammar.hpp"
35 #include "defs.ssgrammar.hpp"
36 #include "defs.cykparser.hpp"
37 
38 #include "data.stats.hpp"
39 #include "data.grammar.hpp"
40 #include "data.ssgrammar.hpp"
41 #include "data.cykparser.hpp"
42 
43 #include "task.grammar.hpp"
44 #include "task.ssgrammar.hpp"
45 #include "task.cykparser.hpp"
46 
47 namespace uh = ucam::hifst;
48 namespace uf = ucam::fsttools;
49 namespace uu = ucam::util;
50 
52 struct DataForCyK {
54  sidx ( 0 ),
55  grammar ( NULL ),
56  ssgd ( NULL ),
57  cykdata ( NULL ),
58  stats ( new uf::StatsData ) {
59  }
60 
61  unsigned sidx;
62  std::string sentence;
64 
66  unordered_set<std::string> tvcb;
67 
68  unordered_map<std::string, std::vector< pair <unsigned, unsigned> > >
72  boost::scoped_ptr<uf::StatsData> stats;
73  unordered_map<std::string, uu::WordMapper *> wm;
74 
75 };
76 
82 TEST ( HifstCykParserTask, basic_test ) {
84  unordered_map<std::string, boost::any> v;
85  v[HifstConstants::kGrammarLoad] = std::string ( "" );
86  v[HifstConstants::kGrammarFeatureweights] = std::string ( "1" );
87  v[HifstConstants::kGrammarStorepatterns] = std::string ( "" );
88  v[HifstConstants::kGrammarStorentorder] = std::string ("");
89  v[HifstConstants::kSsgrammarStore] = std::string ( "" );
90  v[HifstConstants::kSsgrammarAddoovsEnable] = std::string ("no");
92  v[HifstConstants::kCykparserHrmaxheight] = unsigned ( 10 );
93  v[HifstConstants::kCykparserHmax] = std::string ( "X,10" );
94  v[HifstConstants::kCykparserHmin] = std::string ( "X,0" );
95  v[HifstConstants::kCykparserNtexceptionsmaxspan] = std::string ( "S" );
96  const uu::RegistryPO rg ( v );
97  //We need to generate some rules. It is easy to do so with GrammarTask, so we do it.
99  std::stringstream ss;
100  ss << "X 3 3 0" << endl << "S S_X S_X 0" << endl;
101  ss << "X 4 4 0" << endl << "X 5 5 0" << endl << "X 1 1 0" << endl << "X 2 2 0"
102  << endl;
103  ss << "X 3_4 3_4 0" << endl << "X 3_X1_5 3_X1_5 0" << endl << "S X1 X1 0" <<
104  endl;
105  gt.load ( ss );
106  DataForCyK d;
107  d.grammar = gt.getGrammarData();
108  d.sentence = "1 3 4 5 2";
109  d.hpinstances["1"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
110  d.hpinstances["3"].push_back ( pair<unsigned, unsigned> ( 1, 0 ) );
111  d.hpinstances["4"].push_back ( pair<unsigned, unsigned> ( 2, 0 ) );
112  d.hpinstances["5"].push_back ( pair<unsigned, unsigned> ( 3, 0 ) );
113  d.hpinstances["2"].push_back ( pair<unsigned, unsigned> ( 4, 0 ) );
114  d.hpinstances["3_4"].push_back ( pair<unsigned, unsigned> ( 1, 1 ) );
115  d.hpinstances["3_X_5"].push_back ( pair<unsigned, unsigned> ( 1, 2 ) );
116  d.hpinstances["X_X"].push_back ( pair<unsigned, unsigned> ( 0, 1 ) );
117  d.hpinstances["X"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
119  ssgt.run ( d );
120  //At this point we are ready to run a cyk parse.
122  EXPECT_TRUE ( d.cykdata == NULL );
123  cyk.run ( d );
124  ASSERT_FALSE ( d.cykdata == NULL );
125  EXPECT_EQ ( cyk.getFinalResult(), 1 );
126  //Testing the cykgrid.
127  ASSERT_EQ ( d.cykdata->cykgrid ( 1, 0, 0 ).size(), 1 );
128  EXPECT_EQ ( d.cykdata->cykgrid ( 1, 0, 0, 0 ), 1 );
129  ASSERT_EQ ( d.cykdata->cykgrid ( 1, 0, 2 ).size(), 2 );
130  EXPECT_EQ ( d.cykdata->cykgrid ( 1, 0, 2, 0 ), 0 );
131  EXPECT_EQ ( d.cykdata->cykgrid ( 1, 0, 2, 1 ), 0 );
132  ASSERT_EQ ( d.cykdata->cykgrid ( 2, 1, 2 ).size(), 1 );
133  EXPECT_EQ ( d.cykdata->cykgrid ( 2, 1, 2, 0 ), 4 );
134  ASSERT_EQ ( d.cykdata->cykgrid ( 2, 1, 1 ).size(), 1 );
135  EXPECT_EQ ( d.cykdata->cykgrid ( 2, 1, 1, 0 ), 5 );
136  //test the cyk backpointers?
137 }
138 
139 TEST ( HifstCykParserTask, cykgridfunctor ) {
140  uh::CYKgrid cyk;
141  EXPECT_EQ ( cyk.size(), 0 );
142  //e.g. Adding rule 1 at cell 1,0,2
143  cyk.Add ( 1, 0, 2, 1 );
144  cyk.Add ( 1, 0, 2, 2 );
145  cyk.Add ( 1, 0, 2, 3 );
146  cyk.Add ( 0, 0, 0, 10 );
147  ASSERT_EQ ( cyk ( 1, 0, 2 ).size(), 3 );
148  ASSERT_EQ ( cyk ( 0, 0, 0 ).size(), 1 );
149  EXPECT_EQ ( cyk ( 1, 0, 2 ) [0], 1 );
150  EXPECT_EQ ( cyk ( 1, 0, 2 ) [1], 2 );
151  EXPECT_EQ ( cyk ( 1, 0, 2 ) [2], 3 );
152  EXPECT_EQ ( cyk ( 0, 0, 0 ) [0], 10 );
153  EXPECT_EQ ( cyk ( 1, 0, 2, 0 ), 1 );
154  EXPECT_EQ ( cyk ( 1, 0, 2, 1 ), 2 );
155  EXPECT_EQ ( cyk ( 1, 0, 2, 2 ), 3 );
156  EXPECT_EQ ( cyk ( 0, 0, 0, 0 ), 10 );
157  //Any other position should be empty for now, e.g.
158  ASSERT_EQ ( cyk ( 2, 0, 3 ).size(), 0 );
159  cyk.reset();
160  EXPECT_EQ ( cyk.size(), 0 );
161 }
162 
163 TEST ( HifstCykParserTask, cykbpfunctor ) {
166  aux.push_back ( 1 );
167  aux.push_back ( 1 );
168  aux.push_back ( 1 );
169  bp.Add ( 1, 0, 2, aux );
170  aux.clear();
171  aux.push_back ( 2 );
172  aux.push_back ( 2 );
173  aux.push_back ( 2 );
174  bp.Add ( 1, 0, 2, aux );
175  aux.clear();
176  bp.Add ( 0, 0, 0, aux );
177  ASSERT_EQ ( bp ( 1, 0, 2 ).size(), 2 );
178  ASSERT_EQ ( bp ( 0, 0, 0 ).size(), 0 );
179  uh::cykparser_ruledependencies_t aux2 = bp ( 1, 0, 2 );
180  ASSERT_EQ ( aux2[0].size(), 3 );
181  ASSERT_EQ ( aux2[1].size(), 3 );
182  EXPECT_EQ ( aux2[0][0], 1 );
183  EXPECT_EQ ( aux2[0][1], 1 );
184  EXPECT_EQ ( aux2[0][2], 1 );
185  EXPECT_EQ ( aux2[1][0], 2 );
186  EXPECT_EQ ( aux2[1][1], 2 );
187  EXPECT_EQ ( aux2[1][2], 2 );
188  bp.reset();
189  EXPECT_EQ ( bp.size(), 0 );
190 }
191 
192 #ifndef GMAINTEST
193 
194 int main ( int argc, char **argv ) {
195  ::testing::InitGoogleTest ( &argc, argv );
196  return RUN_ALL_TESTS();
197 }
198 #endif
Handles simple wildcard expansion for strings.
void reset()
Clear cyk grid.
bool run(Data &d)
Runs the parsing algorithm.
Relative to Stats across the pipeline.
Data structure containing all cyk-related information.
const std::string kCykparserHrmaxheight
Implements cyk+ parser.
Contains structures and classes for GrammarData.
const std::string kCykparserNtexceptionsmaxspan
unordered_map< std::string, uu::WordMapper * > wm
void Add(const uint cc, const uint x, const uint y, const uint ruleidx)
Add a rule to the cyk grid at (cc,x,y)
void reset()
Delete cyk backpointers.
const std::string kGrammarFeatureweights
bool run(Data &d)
run method, given a grammar and instantiated patterns, creates and returns the hashes ...
Interfaces with basic methods for iteration.
Struct containing grammar rules.
uh::GrammarData * grammar
Describes class GrammarTask.
CYKgrid cykgrid
Cyk grid. Each cell of the grid is uniquely defined by three dimensions: [category,x,y].
Task class that loads a grammar into memory.
unordered_map< std::string, std::vector< pair< unsigned, unsigned > > > hpinstances
int getFinalResult()
Returns success (number of nodes in topmost cell) or failure (CYK_RETURNS_FAILURE=0) ...
Contains structures and classes for GrammarData.
Static variables for logger. Include only once from main file.
const std::string kCykparserHmin
std::vector< cykparser_rulebpcoordinates_t > cykparser_ruledependencies_t
const std::string kSsgrammarAddoovsSourcedeletions
unordered_set< std::string > tvcb
Target vocabulary.
const std::string kSsgrammarAddoovsEnable
Convenience functions to parse parameters from a string.
std::basic_string< uint > cykparser_rulebpcoordinates_t
void load(const std::string &file)
Loads rules from a grammar file.
This class uses instantiated patterns to analyze the grammar and deliver two hashes providing candida...
Contains implementation for sentence-specific grammar task.
std::size_t size()
Return actual size of the cyk grid.
Contains definitions for cykparser data and task.
int main(int argc, char **argv)
Contains definitions for cykparser data and task.
Contains cyk parser implementation.
uh::SentenceSpecificGrammarData * ssgd
Contains definitions for sentence-specific grammar data and task.
GrammarData * getGrammarData()
Returns GrammarData.
TEST(HifstCykParserTask, basic_test)
Basic test for CykParserTask.
const std::string kSsgrammarStore
const std::string kGrammarStorepatterns
functor that provides cyk backpointers
class WordMapper
Contains sentence-specific grammar data.
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
const std::string kGrammarStorentorder
std::size_t size()
Size of the backpointer structure (number of cc,x,y elements inserted).
Public Data class with variables required by CyKParser.
void Add(const unsigned cc, const unsigned x, const unsigned y, const cykparser_ruledependencies_t &coords)
Add all the set of backpointers to the grid.
const std::string kCykparserHmax
Unit testing: google testing common header.
boost::scoped_ptr< uf::StatsData > stats
const std::string kGrammarLoad
Static variable for custom_assert. Include only once from main file.
functor that provides cykgrid access methods