Cambridge SMT System
hifst.task.ssgrammar.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #include <googletesting.h>
22 
23 #ifndef GMAINTEST
24 #include "main.custom_assert.hpp"
25 #include "main.logger.hpp"
26 #endif
27 
28 #include "params.hpp"
29 #include "addresshandler.hpp"
30 #include "taskinterface.hpp"
31 
32 #include "defs.grammar.hpp"
33 #include "defs.ssgrammar.hpp"
34 
35 #include "data.stats.hpp"
36 #include "data.grammar.hpp"
37 
38 #include "data.ssgrammar.hpp"
39 #include "task.grammar.hpp"
40 
41 #include "task.ssgrammar.hpp"
42 
43 namespace uh = ucam::hifst;
44 namespace uf = ucam::fsttools;
45 
49  sidx ( 1 ),
50  grammar ( NULL ),
51  ssgd ( NULL ),
52  stats ( new uf::StatsData ) {
53  }
54 
55  unsigned sidx;
56  std::string sentence;
58  unordered_map<std::string, std::vector< pair <unsigned, unsigned> > >
61  std::auto_ptr<uf::StatsData> stats;
62  unordered_set<std::string> tvcb;
63 };
64 
66 TEST ( HifstSentenceSpecificGrammarTask, basic_test ) {
68  unordered_map<std::string, boost::any> v;
69  v[HifstConstants::kGrammarFeatureweights] = std::string ( "1" );
70  v[HifstConstants::kGrammarLoad] = std::string ( "" );
71  v[HifstConstants::kGrammarStorepatterns] = std::string ( "" );
72  v[HifstConstants::kGrammarStorentorder] = std::string ("");
73  v[HifstConstants::kSsgrammarStore] = std::string ( "" );
74  v[HifstConstants::kSsgrammarAddoovsEnable] = std::string ("yes");
76  const uu::RegistryPO rg ( v );
77  //We need to generate some rules. It is easy to do so with GrammarTask, so we do it.
78  boost::scoped_ptr< uh::GrammarTask<DataForSentenceSpecificGrammarTask> > gt (
80  std::stringstream ss;
81  ss << "X 3 3 0" << endl << "S S_X S_X 0" << endl;
82  ss << "X 4 4 0" << endl << "X 5 5 0" << endl;
83  ss << "X 2 2 0" << endl;
84  ss << "X 3_4 3_4 0" << endl << "X 3_X1_5 3_X1_5 0" << endl;
85  //No rule for 1.
86  //Instead, these two will apply on that word, as defined by grid instance pattern
87  ss << "S X1 X1 0" << endl;
88  ss << "S M1 M1 0" << endl;
89  gt->load ( ss );
90  boost::scoped_ptr< DataForSentenceSpecificGrammarTask> dor (
93  d.grammar = gt->getGrammarData();
94  ASSERT_TRUE ( d.grammar != NULL );
95  //Now insert patterns we want to allow, and where.
96  d.grammar->patterns.insert ( "w" );
97  d.grammar->patterns.insert ( "w_w" );
98  d.grammar->patterns.insert ( "w_X_w" );
99  d.grammar->patterns.insert ( "X_X" );
100  d.sentence = "1 3 4 5 2 " + uu::toString ( OOVID );
101  d.hpinstances["1"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
102  d.hpinstances["3"].push_back ( pair<unsigned, unsigned> ( 1, 0 ) );
103  d.hpinstances["4"].push_back ( pair<unsigned, unsigned> ( 2, 0 ) );
104  d.hpinstances["5"].push_back ( pair<unsigned, unsigned> ( 3, 0 ) );
105  d.hpinstances["2"].push_back ( pair<unsigned, unsigned> ( 4, 0 ) );
106  d.hpinstances[uu::toString ( OOVID )].push_back ( pair<unsigned, unsigned> ( 5,
107  0 ) );
108  d.hpinstances["3_4"].push_back ( pair<unsigned, unsigned> ( 1, 1 ) );
109  d.hpinstances["3_X_5"].push_back ( pair<unsigned, unsigned> ( 1, 2 ) );
110  d.hpinstances["X_X"].push_back ( pair<unsigned, unsigned> ( 1, 1 ) );
111  //This one repeated on purpose. hpinstances are repeated if accepted for different spans, but ssgrammar shouldn't have repeated rules
112  d.hpinstances["X_X"].push_back ( pair<unsigned, unsigned> ( 1, 1 ) );
113  d.hpinstances["X"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
114  boost::scoped_ptr< uh::SentenceSpecificGrammarTask<DataForSentenceSpecificGrammarTask> >
116  ( rg ) );
117  ssgt->run ( d );
118  ASSERT_TRUE ( d.ssgd != NULL );
119  EXPECT_EQ ( d.ssgd->rulesWithRhsSpan1.size(), 6 );
120  std::vector<std::string> testrhs1, testrhs1X, testrhs1M;
121  testrhs1.push_back ( "X 1 <oov> 0" );
122  testrhs1.push_back ( "X 3 3 0" );
123  testrhs1.push_back ( "X 4 4 0" );
124  testrhs1.push_back ( "X 5 5 0" );
125  testrhs1.push_back ( "X 2 2 0" );
126  testrhs1.push_back ( "X " + uu::toString ( OOVID ) + " " + uu::toString (
127  OOVID ) + " 0" );
128  testrhs1M.push_back ( "S M1 M1 0" );
129  testrhs1X.push_back ( "S X1 X1 0" );
130  for ( uh::ssgrammar_rulesmap_t::iterator itx =
131  d.ssgd->rulesWithRhsSpan1.begin();
132  itx != d.ssgd->rulesWithRhsSpan1.end();
133  ++itx ) {
134  ASSERT_TRUE ( itx->second.size() >= 1 );
135  for ( uh::ssgrammar_firstelementmap_t::iterator itx2 = itx->second.begin();
136  itx2 != itx->second.end(); ++itx2 ) {
137  ASSERT_EQ ( itx->second[itx2->first].size(), 1 );
138  std::string element = itx2->first;
139  uh::getFilteredNonTerminal ( element );
140  EXPECT_EQ ( element, itx2->first );
141  //A bit hacky, but will suffice to check two rule candidates for the same position under the same pattern
142  if ( itx2->first == "M" ) {
143  EXPECT_EQ ( d.ssgd->getRule ( itx->second[itx2->first][0] ),
144  testrhs1M[itx->first] );
145  } else if ( itx2->first == "X" ) {
146  EXPECT_EQ ( d.ssgd->getRule ( itx->second[itx2->first][0] ),
147  testrhs1X[itx->first] );
148  } else {
149  // cerr << "itx2->first=> " << itx2->first << "," << itx->second[itx2->first].size() << "," << "testrhs1=" << testrhs1[itx->first] << "," << d.ssgd->getRule ( itx->second[itx2->first][0] ) << endl;
150  EXPECT_EQ ( d.ssgd->getRule ( itx->second[itx2->first][0] ),
151  testrhs1[itx->first] );
152  }
153  }
154  }
155  std::vector<std::string> testrhs2;
156  testrhs2.push_back ( "X 3_4 3_4 0" );
157  for ( uh::ssgrammar_rulesmap_t::iterator itx =
158  d.ssgd->rulesWithRhsSpan2OrMore.begin();
159  itx != d.ssgd->rulesWithRhsSpan2OrMore.end();
160  ++itx ) {
161  if ( itx->first != 1 ) ASSERT_TRUE ( !itx->second.size() );
162  else {
163  ASSERT_TRUE ( itx->second.size() == 2 );
164  }
165  }
166  //Rules should only appear once per position. Example:
167  EXPECT_EQ ( d.ssgd->rulesWithRhsSpan2OrMore[1]["S"].size(), 1 );
168  //Testing existence of rules
169  unordered_set<std::string> aux;
170  aux.insert ( d.ssgd->getRule ( d.ssgd->rulesWithRhsSpan2OrMore[1]["S"][0] ) );
171  aux.insert ( d.ssgd->getRule ( d.ssgd->rulesWithRhsSpan2OrMore[1]["3"][0] ) );
172  aux.insert ( d.ssgd->getRule ( d.ssgd->rulesWithRhsSpan2OrMore[1]["3"][1] ) );
173  EXPECT_TRUE ( aux.find ( "S S_X S_X 0" ) != aux.end() );
174  EXPECT_TRUE ( aux.find ( "X 3_X1_5 3_X1_5 0" ) != aux.end() );
175  EXPECT_TRUE ( aux.find ( "X 3_4 3_4 0" ) != aux.end() );
176 }
177 
178 TEST ( HifstSentenceSpecificGrammarTask, data ) {
181  std::stringstream ss;
182  ss << "XT 35_47_T T_43_55_58 0.450" << endl << "ST ST_XT ST_XT 0.370" << endl;
183  gt.load ( ss );
184  gd.grammar = gt.getGrammarData();
185  gd.extrarules[0] = "S S_X S_X 0.37";
186  gd.extrarules[1] = "X 35_47 43_55_58 0.45";
187  EXPECT_EQ ( gd.getRule ( 1 ), "X 35_47 43_55_58 0.45" );
188  EXPECT_EQ ( gd.getLHS ( 1 ), "X" );
189  EXPECT_EQ ( gd.getRHSSource ( 1 ), "35_47" );
190  EXPECT_EQ ( gd.getRHSSource ( 1, 0 ), "35" );
191  EXPECT_EQ ( gd.getRHSSource ( 1, 1 ), "47" );
192  EXPECT_EQ ( gd.getRHSSourceSize ( 1 ), 2 );
193  EXPECT_EQ ( gd.getRHSTranslation ( 1 ), "43_55_58" );
194  EXPECT_EQ ( gd.getRHSTranslationSize ( 1 ), 3 );
195  ASSERT_EQ ( gd.getRHSSplitTranslation ( 1 ).size(), 3 );
196  EXPECT_EQ ( gd.getRHSSplitTranslation ( 1 ) [0], "43" );
197  EXPECT_EQ ( gd.getRHSSplitTranslation ( 1 ) [1], "55" );
198  EXPECT_EQ ( gd.getRHSSplitTranslation ( 1 ) [2], "58" );
199  EXPECT_EQ ( gd.getIdx ( 1 ), 1 );
200  EXPECT_EQ ( gd.getRule ( 0 ), "S S_X S_X 0.37" );
201  EXPECT_EQ ( gd.getLHS ( 0 ), "S" );
202  EXPECT_EQ ( gd.getRHSSource ( 0 ), "S_X" );
203  EXPECT_EQ ( gd.getRHSSourceSize ( 0 ), 2 );
204  EXPECT_EQ ( gd.getRHSTranslation ( 0 ), "S_X" );
205  EXPECT_EQ ( gd.getRHSTranslationSize ( 0 ), 2 );
206  ASSERT_EQ ( gd.getRHSSplitTranslation ( 0 ).size(), 2 );
207  EXPECT_EQ ( gd.getRHSSplitTranslation ( 0 ) [0], "S" );
208  EXPECT_EQ ( gd.getRHSSplitTranslation ( 0 ) [1], "X" );
209  EXPECT_EQ ( gd.getIdx ( 0 ), 0 );
210  EXPECT_EQ ( gd.isPhrase ( 1 ), true );
211  EXPECT_EQ ( gd.isPhrase ( 0 ), false );
212  EXPECT_EQ ( gd.getWeight ( 0 ), 0.37f );
213  EXPECT_EQ ( gd.getWeight ( 1 ), 0.45f );
214  unordered_map<unsigned, unsigned> mappings;
215  gd.getMappings ( 0, &mappings );
216  EXPECT_EQ ( mappings.size(), 2 );
217  mappings.clear();
218  gd.getMappings ( 1, &mappings );
219  EXPECT_EQ ( mappings.size(), 0 );
220 }
221 
222 #ifndef GMAINTEST
223 
224 int main ( int argc, char **argv ) {
225  ::testing::InitGoogleTest ( &argc, argv );
226  return RUN_ALL_TESTS();
227 }
228 #endif
Handles simple wildcard expansion for strings.
const std::string getRHSTranslation(std::size_t idx)
Returns RHS translation of a rule with index idx.
Relative to Stats across the pipeline.
const float getWeight(std::size_t idx)
Returns the weight of a rule. This weight is the dot product of all the features with its scales...
Public Data class with variables required by SentenceSpecificGrammarTask.
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
Contains structures and classes for GrammarData.
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings)
Returns the non-terminal mappings for a rule. For more details see getRuleMappings function...
const std::string kGrammarFeatureweights
const std::string getRule(std::size_t idx)
Returns rule corresponding to index idx.
int main(int argc, char **argv)
Interfaces with basic methods for iteration.
unordered_set< std::string > patterns
Patterns in these rules.
Struct containing grammar rules.
uh::SentenceSpecificGrammarData * ssgd
Describes class GrammarTask.
const bool isPhrase(std::size_t idx)
Task class that loads a grammar into memory.
const std::string getRHSSource(std::size_t idx)
Returns Right-hand-side (source) of the rule with index=idx.
TEST(HifstSentenceSpecificGrammarTask, basic_test)
Basic test for TextTask class.
const std::string getLHS(std::size_t idx)
Returns Left-hand-side of a rule corresponding to index idx.
Static variables for logger. Include only once from main file.
const std::string kSsgrammarAddoovsSourcedeletions
const std::string kSsgrammarAddoovsEnable
#define OOVID
Convenience functions to parse parameters from a string.
void load(const std::string &file)
Loads rules from a grammar file.
This class uses instantiated patterns to analyze the grammar and deliver two hashes providing candida...
Contains implementation for sentence-specific grammar task.
ssgrammar_rulesmap_t rulesWithRhsSpan2OrMore
cells containing potentially applicable rules with two or more elements
const uint getRHSSourceSize(std::size_t idx)
Returns size of RHS source of a rule.
Contains definitions for cykparser data and task.
const std::size_t getIdx(std::size_t idx)
Returns the true idx of a rule (i.e. line in the grammar file). If it is sentence specific...
unordered_map< std::string, std::vector< pair< unsigned, unsigned > > > hpinstances
Contains definitions for sentence-specific grammar data and task.
unordered_map< std::size_t, std::string > extrarules
const GrammarData * grammar
Pointer to the original grammar data (no ownership)
GrammarData * getGrammarData()
Returns GrammarData.
const std::string kSsgrammarStore
const std::string kGrammarStorepatterns
Contains sentence-specific grammar data.
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
const std::string kGrammarStorentorder
const uint getRHSTranslationSize(std::size_t idx)
Returns size of RHS (translation) of a rule.
Unit testing: google testing common header.
const std::string kGrammarLoad
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx)
Returns translation as a vector of elements.
Static variable for custom_assert. Include only once from main file.
void getFilteredNonTerminal(std::string &word)
Return the filtered non-terminal name. For example, for the rule Z 3_XT2_5 XT2, getFilteredNonTermina...