Cambridge SMT System
data.cykparser.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef CYKDATA_HPP
16 #define CYKDATA_HPP
17 
27 
28 namespace ucam {
29 namespace hifst {
34 struct CYKdata {
35 
38 
41 
44 
47 
52 
54  uint success;
55 
58 
60  uint nnt;
61 
62  // Non-terminals are bounded by a maximum span threshold (i.e. typically --hifst.hrmaxspan=10).
63  // However, certain non-terminals in a hiero-style grammar
64  // must be able to reach higher spans. Most notably, this is the case of non-terminal S, used as glue rule.
65  // The list of exceptions are included in nt_exceptions_maxspan.
66  unordered_set<std::string> nt_exceptions_maxspan;
67 
71  int freeMemory() {
72  for ( unsigned int k = 0; k < rd.size(); k++ ) {
73  rd[k].clear();
74  }
75  rd.clear();
76  cykgrid.reset();
77  bp.reset();
78  categories.clear();
79  vcat.clear();
80  sentence.clear();
81  success = 0;
82  nnt = 0;
83  return 0;
84  };
85 
90  inline uint getNumberWordsSentence() {
91  return sentence.size();
92  };
93 
98  void storeRules ( unordered_map<uint, std::vector<uint> >& c ) {
99  for ( unsigned int cc = 1; cc <= nnt; cc++ ) {
100  for ( unsigned int x = 0; x < sentence.size(); x++ ) {
101  for ( unsigned int y = 0; y < sentence.size() - x; y++ ) {
102  for ( unsigned int k = 0; k < cykgrid ( cc, x, y ).size(); ++k )
103  c[cc * 1000000 + y * 1000 + x].push_back ( cykgrid ( cc, x, y, k ) );
104  }
105  }
106  }
107  };
108 
114  void storeRuleCounts ( unordered_map<uint, uint>& c ) {
115  for ( unsigned int cc = 1; cc <= nnt; cc++ ) {
116  for ( unsigned int x = 0; x < sentence.size(); x++ ) {
117  for ( unsigned int y = 0; y < sentence.size() - x; y++ ) {
118  if ( cykgrid ( cc, x, y ).size() )
119  c[cc * 1000000 + y * 1000 + x] = cykgrid ( cc, x, y ).size();
120  }
121  }
122  }
123  };
124 
125 };
126 
127 }
128 } // end namespaces
129 
130 #endif
void reset()
Clear cyk grid.
CYKbackpointers bp
Data structure containing all cyk-related information.
grammar_inversecategories_t vcat
Inverse map (1=S,2=X,...)
unordered_map< uint, std::string > grammar_inversecategories_t
void reset()
Delete cyk backpointers.
uint nnt
number of non-terminals
cykparser_ruledependencies_t rd
coordinate dependencies for each candidate.
Contains functor that provides access to cyk backpointers.
void storeRules(unordered_map< uint, std::vector< uint > > &c)
Stores rules in a simple hash.
grammar_categories_t categories
Map between categories (S=1,X=2,...)
CYKgrid cykgrid
Cyk grid. Each cell of the grid is uniquely defined by three dimensions: [category,x,y].
unordered_set< std::string > nt_exceptions_maxspan
std::vector< cykparser_rulebpcoordinates_t > cykparser_ruledependencies_t
void storeRuleCounts(unordered_map< uint, uint > &c)
Stores rule counts in a simple hash.
std::size_t size()
Return actual size of the cyk grid.
std::basic_string< uint > cykparser_sentence_t
Contains functor for the cyk grid.
cykparser_sentence_t sentence
The sentence we want to parse.
functor that provides cyk backpointers
uint success
Success and how many parse S nodes have been found in the topmost cell. If 0, cyk parser has failed...
uint getNumberWordsSentence()
Returns number of words in the sentence.
Definition: bleu.hpp:14
unordered_map< std::string, uint > grammar_categories_t
functor that provides cykgrid access methods