Cambridge SMT System
hifst.task.grammar.gtest.cpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
21 #include <googletesting.h>
22 
23 #ifndef GMAINTEST
24 #include "main.custom_assert.hpp"
25 #include "main.logger.hpp"
26 #endif
27 
28 #include "params.hpp"
29 #include "defs.grammar.hpp"
30 
31 #include "addresshandler.hpp"
32 #include "taskinterface.hpp"
33 #include "data.stats.hpp"
34 #include "data.grammar.hpp"
35 #include "task.grammar.hpp"
36 
37 namespace uf = ucam::fsttools;
38 namespace uh = ucam::hifst;
39 
41 struct TaskData {
42  TaskData() : stats ( new uf::StatsData ) {}
43  uint sidx;
45  std::auto_ptr<uf::StatsData> stats;
46 };
47 
49 TEST ( HifstGrammar, task ) {
50  uh::GrammarTask<TaskData> gt ( "", "" );
51  std::stringstream ss;
52  ss << "X 35 43 0" << endl << "S S_X S_X 0" << endl;
53  gt.load ( ss );
55  ASSERT_EQ ( grammar->sizeofvpos, 2 );
56  EXPECT_EQ ( grammar->vpos[1].p, 2 );
57  EXPECT_EQ ( grammar->vpos[0].p, 12 );
58 }
59 
60 TEST ( HifstGrammar, data_grammar ) {
61  uh::GrammarTask<TaskData> gt ( "", "" );
62  std::stringstream ss;
63  ss << "X 35_47 43_55_58 0.45" << endl << "S S_X S_X 0.37" << endl;
64  gt.load ( ss );
66  ASSERT_EQ ( grammar->vcat.size(), 2 );
67  EXPECT_EQ ( grammar->vcat[1], "S" );
68  EXPECT_EQ ( grammar->vcat[2], "X" );
69  ASSERT_EQ ( grammar->categories.size(), 2 );
70  EXPECT_EQ ( grammar->categories["S"], 1 );
71  EXPECT_EQ ( grammar->categories["X"], 2 );
72  EXPECT_EQ ( grammar->getRule ( 1 ), "X 35_47 43_55_58 0.45" );
73  EXPECT_EQ ( grammar->getLHS ( 1 ), "X" );
74  EXPECT_EQ ( grammar->getRHSSource ( 1 ), "35_47" );
75  EXPECT_EQ ( grammar->getRHSSource ( 1, 0 ), "35" );
76  EXPECT_EQ ( grammar->getRHSSource ( 1, 1 ), "47" );
77  EXPECT_EQ ( grammar->getRHSSourceSize ( 1 ), 2 );
78  EXPECT_EQ ( grammar->getRHSTranslation ( 1 ), "43_55_58" );
79  EXPECT_EQ ( grammar->getRHSTranslationSize ( 1 ), 3 );
80  ASSERT_EQ ( grammar->getRHSSplitTranslation ( 1 ).size(), 3 );
81  EXPECT_EQ ( grammar->getRHSSplitTranslation ( 1 ) [0], "43" );
82  EXPECT_EQ ( grammar->getRHSSplitTranslation ( 1 ) [1], "55" );
83  EXPECT_EQ ( grammar->getRHSSplitTranslation ( 1 ) [2], "58" );
84  EXPECT_EQ ( grammar->getIdx ( 1 ), 0 );
85  EXPECT_EQ ( grammar->getRule ( 0 ), "S S_X S_X 0.37" );
86  EXPECT_EQ ( grammar->getLHS ( 0 ), "S" );
87  EXPECT_EQ ( grammar->getRHSSource ( 0 ), "S_X" );
88  EXPECT_EQ ( grammar->getRHSSourceSize ( 0 ), 2 );
89  EXPECT_EQ ( grammar->getRHSTranslation ( 0 ), "S_X" );
90  EXPECT_EQ ( grammar->getRHSTranslationSize ( 0 ), 2 );
91  ASSERT_EQ ( grammar->getRHSSplitTranslation ( 0 ).size(), 2 );
92  EXPECT_EQ ( grammar->getRHSSplitTranslation ( 0 ) [0], "S" );
93  EXPECT_EQ ( grammar->getRHSSplitTranslation ( 0 ) [1], "X" );
94  EXPECT_EQ ( grammar->getIdx ( 0 ), 1 );
95  EXPECT_EQ ( grammar->isPhrase ( 1 ), true );
96  EXPECT_EQ ( grammar->isPhrase ( 0 ), false );
97  EXPECT_EQ ( grammar->getWeight ( 0 ), 0.37f );
98  EXPECT_EQ ( grammar->getWeight ( 1 ), 0.45f );
99  unordered_map<uint, uint> mappings;
100  grammar->getMappings ( 0, &mappings );
101  EXPECT_EQ ( mappings.size(), 2 );
102  mappings.clear();
103  grammar->getMappings ( 1, &mappings );
104  EXPECT_EQ ( mappings.size(), 0 );
105 }
106 
108 TEST ( HifstGrammar, getSize ) {
109  EXPECT_EQ ( uh::getSize ( "" ), 0 );
110  EXPECT_EQ ( uh::getSize ( "35" ), 1 );
111  EXPECT_EQ ( uh::getSize ( "35_47" ), 2 );
112  EXPECT_EQ ( uh::getSize ( "35_M_47" ), 3 );
113  EXPECT_EQ ( uh::getSize ( "35_M_47_X" ), 4 );
114  EXPECT_EQ ( uh::getSize ( "35_M_47_X_2" ), 5 );
115 }
116 
117 //Test isTerminal
118 TEST ( HifstGrammar, isTerminal ) {
119  EXPECT_EQ ( uh::isTerminal ( "3" ), true );
120  EXPECT_EQ ( uh::isTerminal ( "X" ), false );
121 }
122 
123 //Test getFilteredNonTerminal
124 TEST ( HifstGrammar, getFilteredNonTerminal ) {
126  std::string x = "X";
128  EXPECT_EQ ( x, "X" );
129  x = "X2";
131  EXPECT_EQ ( x, "X" );
132  x = "AB2";
134  EXPECT_EQ ( x, "AB" );
135  x = "3";
137  EXPECT_EQ ( x, "3" );
138 }
139 
140 //Test NonTerminalHierarchy
141 TEST ( HifstGrammar, nonterminalhierarchy ) {
143  NonTerminalHierarchy x;
144  x.insertIdentityRule ( "S X" );
145  x.insertIdentityRule ( "X V" );
146  x.insertIdentityRule ( "V W1" );
147  x.insertIdentityRule ( "Z T" );
148  std::string ntlist;
149  EXPECT_EQ ( x ( ntlist ), true );
150  EXPECT_EQ ( ntlist, "S,Z,T,X,V,W" );
151  x.insertLHS ( "P" );
152  x.insertLHS ( "Y" );
153  EXPECT_EQ ( x ( ntlist ), true );
154  EXPECT_EQ ( ntlist, "S,P,Y,Z,T,X,V,W" );
155  NonTerminalHierarchy x2;
156  x2.insertIdentityRule ( "S X" );
157  x2.insertIdentityRule ( "X V" );
158  x2.insertIdentityRule ( "V S" );
159  EXPECT_EQ ( x2 ( ntlist ), false );
160  EXPECT_EQ ( ntlist, "" );
161  NonTerminalHierarchy x2b;
162  x2b.insertIdentityRule ( "S X" );
163  x2b.insertIdentityRule ( "X V" );
164  x2b.insertIdentityRule ( "V X" );
165  EXPECT_EQ ( x2b ( ntlist ), false );
166  EXPECT_EQ ( ntlist, "" );
167  NonTerminalHierarchy x3;
168  EXPECT_EQ ( x3 ( ntlist ), true );
169  EXPECT_EQ ( ntlist, "" );
170  x3.insertLHS ( "P" );
171  x3.insertLHS ( "A" );
172  //Detects that S is not present
173  EXPECT_EQ ( x3 ( ntlist ), false );
174  EXPECT_EQ ( ntlist, "A,P" );
175  x3.insertLHS ( "S" );
176  EXPECT_EQ ( x3 ( ntlist ), true );
177  EXPECT_EQ ( ntlist, "S,A,P" );
178 }
179 
180 #ifndef GMAINTEST
181 
182 int main ( int argc, char **argv ) {
183  ::testing::InitGoogleTest ( &argc, argv );
184  return RUN_ALL_TESTS();
185 }
186 #endif
Handles simple wildcard expansion for strings.
Relative to Stats across the pipeline.
bool isTerminal(const std::string &word)
Determine if the element is a terminal (i.e. a word, represented by a number) or a non-terminal (i...
grammar_categories_t categories
Ordered list of non-terminals (listed in hierarchical order according to identity rules) ...
Contains structures and classes for GrammarData.
const std::string getLHS(std::size_t idx) const
Gets left-hand-side of the rule indexed by idx.
TEST(HifstGrammar, task)
Tests through stringstream that the loading procedure is done correctly. TaskData is not really requi...
const uint getRHSTranslationSize(std::size_t idx) const
Returns the number of elements in translation for a given rule.
std::auto_ptr< uf::StatsData > stats
const bool isPhrase(std::size_t idx) const
Checks whether the rule is a phrase or not (i.e. is hierarchical)
const uint getRHSSourceSize(std::size_t idx) const
Gets number of elements in the RHS source.
Interfaces with basic methods for iteration.
Struct containing grammar rules.
posindex * vpos
Sorted Indices.
Describes class GrammarTask.
const std::size_t getIdx(std::size_t idx) const
Gets the real position (line) in the (potentially unsorted) file.
Task class that loads a grammar into memory.
const std::string getRHSSource(std::size_t idx) const
Gets right-hand-side source for a rule using rule index idx.
uh::GrammarData * grammar
Static variables for logger. Include only once from main file.
const float getWeight(std::size_t idx) const
Returns weight of a rule accessed by index idx.
Convenience functions to parse parameters from a string.
const std::string getRule(std::size_t idx) const
Gets a rule indexed by idx. Rule format: LHS RHSSource RHSTarget weight.
void load(const std::string &file)
Loads rules from a grammar file.
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx) const
Returns the translation as a vector of elements.
Contains definitions for cykparser data and task.
Trivial Data class with necessary variables for correct compilation.
GrammarData * getGrammarData()
Returns GrammarData.
int main(int argc, char **argv)
const std::string getRHSTranslation(std::size_t idx) const
Returns RHS translation part of a rule accessed by index idx.
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings) const
Returns the non-terminal mappings. For more details see getRuleMappings function. ...
void insertIdentityRule(const std::string &identityrule)
Method to store identity rules, i.e. S -> X X , etc.
const uint getSize(const std::string &rhs)
A generic element counter that can be used to any string. It is intended to use with either source or...
This is a functor with additional methods to include relevant rules (i.e. identify SCFG rules...
grammar_inversecategories_t vcat
std::size_t sizeofvpos
Number of rules.
Unit testing: google testing common header.
Static variable for custom_assert. Include only once from main file.
void getFilteredNonTerminal(std::string &word)
Return the filtered non-terminal name. For example, for the rule Z 3_XT2_5 XT2, getFilteredNonTermina...