15 #ifndef SENTENCESPECIFICGRAMMARDATA_HPP 16 #define SENTENCESPECIFICGRAMMARDATA_HPP 59 rulesWithRhsSpan1.clear();
60 rulesWithRhsSpan2OrMore.clear();
66 inline const std::string
getRule ( std::size_t idx ) {
67 if ( extrarules.find ( idx ) == extrarules.end() )
68 return grammar->
getRule ( idx );
69 LINFO (
"ssgrammar idx=" << idx );
70 return extrarules[idx];
74 inline const std::string
getLHS ( std::size_t idx ) {
75 if ( extrarules.find ( idx ) == extrarules.end() )
76 return grammar->
getLHS ( idx );
77 std::size_t pos = extrarules[idx].find_first_of (
" " );
78 return extrarules[idx].substr ( 0, pos );
83 if ( extrarules.find ( idx ) == extrarules.end() )
85 std::size_t pos = extrarules[idx].find_first_of (
" " ) + 1;
86 std::size_t pos2 = extrarules[idx].find_first_of (
" ", pos );
87 return extrarules[idx].substr ( pos, pos2 - pos );
91 inline const std::string
getRHSSource ( std::size_t idx, uint rulepos ) {
92 if ( extrarules.find ( idx ) == extrarules.end() )
94 std::size_t pos = extrarules[idx].find_first_of (
" " );
95 std::size_t j = pos , jold;
96 for ( uint k = 0; k <= rulepos; ++k ) {
98 j = extrarules[idx].find_first_of (
"_ ", jold + 1 );
99 if ( j == std::string::npos )
100 if ( rulepos )
return "";
102 return extrarules[idx].substr ( jold + 1, j - jold - 1 );
107 if ( extrarules.find ( idx ) == extrarules.end() )
109 std::vector<std::string> splitsource;
110 boost::algorithm::split ( splitsource,
getRHSSource ( idx ),
111 boost::algorithm::is_any_of (
"_" ) );
117 if ( extrarules.find ( idx ) == extrarules.end() )
119 std::size_t pos = extrarules[idx].find_first_of (
" " ) + 1;
120 std::size_t pos1 = extrarules[idx].find_first_of (
" " , pos ) + 1;
126 if ( extrarules.find ( idx ) == extrarules.end() )
128 std::size_t pos = extrarules[idx].find_first_of (
" " ) + 1;
129 std::size_t pos1 = extrarules[idx].find_first_of (
" ", pos ) + 1;
130 std::size_t pos2 = extrarules[idx].find_first_of (
" ", pos1 );
131 return extrarules[idx].substr ( pos1, pos2 - pos1 );
137 if ( extrarules.find ( idx ) == extrarules.end() )
139 std::vector<std::string> splittranslation;
141 boost::algorithm::is_any_of (
"_" ) );
142 return splittranslation;
147 if ( extrarules.find ( idx ) == extrarules.end() )
149 std::size_t pos = extrarules[idx].find_first_of (
" " ) + 1;
150 std::size_t pos1 = extrarules[idx].find_first_of (
" ", pos ) + 1;
151 std::size_t pos2 = extrarules[idx].find_first_of (
" ", pos1 );
157 if ( extrarules.find ( idx ) == extrarules.end() )
159 std::size_t pos = extrarules[idx].find_first_of (
" " ) + 1;
160 std::size_t pos1 = extrarules[idx].find_first_of (
" ", pos );
161 std::size_t pos2 = extrarules[idx].find_first_of (
" ", pos1 + 1 );
162 std::size_t pos3 = extrarules[idx].find_first_of (
" \n\0", pos2 + 1 );
163 return ucam::util::toNumber<float> ( extrarules[idx].substr ( pos2,
168 , std::vector<unsigned> &links)
const {
169 if ( extrarules.find ( idx ) == extrarules.end() ) {
190 if ( extrarules.find ( idx ) == extrarules.end() )
192 std::size_t pos0 = extrarules[idx].find_first_of (
" " ) + 1;
193 std::size_t pos = extrarules[idx].find_first_of (
" ", pos0 );
194 for (
const char *c = extrarules[idx].c_str() + pos0;
195 c <= extrarules[idx].c_str() + pos; ++c )
196 if ( *c >=
'A' && *c <=
'Z' )
return false;
200 inline const std::size_t
getIdx ( std::size_t idx ) {
201 if ( extrarules.find ( idx ) == extrarules.end() )
202 return grammar->
getIdx ( idx );
209 const unordered_set<std::string>& vcb ) {
210 if ( extrarules.find ( idx ) == extrarules.end() )
221 unordered_map<uint, uint> *mappings ) {
222 if ( extrarules.find ( idx ) == extrarules.end() )
235 for ( uint k = 0; k < phrase.size(); ++k ) {
236 if ( phrase[k] >=
'A' && phrase[k] <=
'Z' )
return false;
237 else if ( phrase[k] ==
'_' )
return false;
const std::string getRHSTranslation(std::size_t idx)
Returns RHS translation of a rule with index idx.
const float getWeight(std::size_t idx)
Returns the weight of a rule. This weight is the dot product of all the features with its scales...
ssgrammar_rulesmap_t rulesWithRhsSpan1
const std::string getLHS(std::size_t idx) const
Gets left-hand-side of the rule indexed by idx.
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings)
Returns the non-terminal mappings for a rule. For more details see getRuleMappings function...
void getLinks(std::size_t idx, std::vector< unsigned > &links) const
const uint getRHSTranslationSize(std::size_t idx) const
Returns the number of elements in translation for a given rule.
const std::string getRule(std::size_t idx)
Returns rule corresponding to index idx.
const bool isPhrase(std::size_t idx) const
Checks whether the rule is a phrase or not (i.e. is hierarchical)
const uint getRHSSourceSize(std::size_t idx) const
Gets number of elements in the RHS source.
Struct containing grammar rules.
const std::vector< std::string > getRHSSplitSource(std::size_t idx) const
Gets a splitted version of RHS (source)
const bool isPhrase(std::size_t idx)
const std::size_t getIdx(std::size_t idx) const
Gets the real position (line) in the (potentially unsorted) file.
const std::string getRHSSource(std::size_t idx) const
Gets right-hand-side source for a rule using rule index idx.
const std::string getRHSSource(std::size_t idx)
Returns Right-hand-side (source) of the rule with index=idx.
const std::string getLHS(std::size_t idx)
Returns Left-hand-side of a rule corresponding to index idx.
const float getWeight(std::size_t idx) const
Returns weight of a rule accessed by index idx.
const bool isAcceptedByVocabulary(const std::size_t idx, const unordered_set< std::string > &vcb) const
Determines whether a particular rule is allowed within a vocabulary, i.e. all target words of the rul...
const std::string getRule(std::size_t idx) const
Gets a rule indexed by idx. Rule format: LHS RHSSource RHSTarget weight.
const std::vector< std::string > getRHSSplitSource(std::size_t idx)
Returns vector of elements of the RHS source.
bool phraseIsTerminalWord(const std::string &phrase)
const std::string getRHSSource(std::size_t idx, uint rulepos)
Returns element at position rulepos of right-hand-side (source)
ssgrammar_rulesmap_t rulesWithRhsSpan2OrMore
cells containing potentially applicable rules with two or more elements
uint count_needles(const std::string &haystack, const char needle, std::size_t start, std::size_t end)
Convenience function that counts the number of times a needle appears.
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx) const
Returns the translation as a vector of elements.
const uint getRHSSourceSize(std::size_t idx)
Returns size of RHS source of a rule.
const std::size_t getIdx(std::size_t idx)
Returns the true idx of a rule (i.e. line in the grammar file). If it is sentence specific...
unordered_map< uint, ssgrammar_firstelementmap_t > ssgrammar_rulesmap_t
unordered_map< std::size_t, std::string > extrarules
const GrammarData * grammar
Pointer to the original grammar data (no ownership)
void getLinks(std::size_t idx, std::vector< unsigned > &links) const
const std::string getRHSTranslation(std::size_t idx) const
Returns RHS translation part of a rule accessed by index idx.
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings) const
Returns the non-terminal mappings. For more details see getRuleMappings function. ...
const bool isAcceptedByVocabulary(const std::size_t idx, const unordered_set< std::string > &vcb)
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
SentenceSpecificGrammarData()
const uint getRHSTranslationSize(std::size_t idx)
Returns size of RHS (translation) of a rule.
void getRuleMappings(const std::vector< std::string > &source, const std::vector< std::string > &translation, unordered_map< uint, uint > *mappings)
Given a source and translation of the same rule, sharing the same non-terminals in RHS...
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx)
Returns translation as a vector of elements.