25 #define CYK_RETURN_FAILURE 0 37 unsigned finalresult_;
43 unordered_map <std::string, unsigned> hmax_;
45 unordered_map <std::string, unsigned> hmin_;
70 LDEBUG (
"Constructor done!" );
79 bool run ( Data& d ) {
80 d.cykdata = &cykdata_;
89 unsigned& nnt = cykdata_.
nnt;
90 nnt = categories.size();
97 "Mandatory to have an S as the first non-terminal guy" );
98 std::vector<std::string> aux;
99 boost::algorithm::split ( aux, d.sentence,
100 boost::algorithm::is_any_of (
" " ) );
101 for (
unsigned k = 0; k < aux.size(); ++k ) {
102 sentence.push_back ( ucam::util::toNumber<unsigned> ( aux[k] ) );
103 if ( categories.find ( aux[k] ) == categories.end() ) {
104 categories[aux[k]] = k + nnt + 1;
105 cykdata_.
cykgrid.
Add ( k + nnt + 1, k, 0, sentence[k] );
107 cykdata_.
cykgrid.
Add ( categories[aux[k]], k, 0, sentence[k] );
109 vcat[k + nnt + 1] = aux[k];
112 "S must be the top guy in the non-terminal hierarchy" );
114 "S must be the top guy in the non-terminal hierarchy (2)" );
116 "sentence not properly initialized?" );
117 d_->stats->setTimeStart (
"cyk");
118 LDEBUG (
"Now start stage 1: Initialize Bottom row of the chart" );
120 LDEBUG (
"Start stage 2: fill bottom-up the chart" );
122 d_->stats->setTimeEnd (
"cyk");
124 sentence.size() - 1 ).size();
125 if ( finalresult_ > 0 ) {
126 LINFO (
"CYK success:" << finalresult_ <<
" parse trees" );
128 LINFO (
"CYK failed!" );
130 d.cykdata->storeRuleCounts ( d.stats->rulecounts );
131 d.stats->numcats = d.cykdata->vcat.size();
132 LINFO (
"Number of rules applied:" << d.stats->rulecounts.size() );
133 cykdata_.
success = finalresult_;
169 void examineRule (
unsigned x,
unsigned ymax,
unsigned rule_idx,
174 std::string cat = ssgd.
getRHSSource ( rule_idx, rule_pos );
178 unsigned z = ( cykdata_.
categories[cat] == 0 ) ? 0 : 1;
179 for (
unsigned n = 0; n < ymax; n++ ) {
182 coord2.push_back ( cykdata_.
categories[cat] );
183 coord2.push_back ( x );
184 coord2.push_back ( n );
185 if ( rule_pos == regla_tam - 1 ) {
188 rd.push_back ( coord2 );
197 examineRule ( x2, ymax - n - 1, rule_idx, rule_pos + 1, coord2 );
213 void insertMonoRHSRules (
unsigned x,
unsigned y ) {
217 unsigned& nnt = cykdata_.
nnt;
220 for (
unsigned cc = nnt; cc > 1;
223 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
224 "Searching for rules with only one element" );
229 LDEBUG (
"Found: " << rules.size() );
230 for (
unsigned jpts = 0; jpts < rules.size(); ++jpts ) {
231 unsigned& idx = rules[jpts];
232 unsigned head = categories[ssgd.
getLHS ( idx )];
239 LWARN ( ssgd.
getLHS ( idx ) <<
"," << x <<
"," << y <<
":" <<
240 "Potentially nasty looping rule (filtering): " << ssgd.
getRule ( idx ) );
243 LDEBUG ( ssgd.
getLHS ( idx ) <<
"," << x <<
"," << y <<
":" <<
"Accepted: " <<
244 ssgd.
getRule ( idx ) <<
", original rule_id=" << ssgd.
getIdx (
245 idx ) <<
",vs" << idx );
248 coord.push_back ( cc );
249 coord.push_back ( x );
250 coord.push_back ( y );
251 cykdata_.
bp.
Add ( head, x, y, coord );
261 void fillBottomChart() {
267 for (
unsigned x = 0; x < sentence.size(); ++x ) {
273 LDEBUG (
"No rules to process" );
276 for (
unsigned p = 0; p < rules.size(); ++p ) {
277 unsigned& idx = rules[p];
278 LDEBUG ( ssgd.
getLHS ( idx ) <<
"," << x <<
",0:" <<
"Accepted: " <<
279 ssgd.
getRule ( idx ) <<
", original rule_id=" << ssgd.
getIdx (
280 idx ) <<
",vs" << idx );
283 cdi.push_back ( categories[cat] );
286 cykdata_.
bp.
Add ( categories[ssgd.
getLHS ( idx )], x, 0, cdi );
288 insertMonoRHSRules ( x, 0 );
312 unsigned& nnt = cykdata_.
nnt;
313 unordered_map<std::string, cykparser_ruledependencies_t > minicache;
319 for (
unsigned y = 1; y < sentence.size(); ++y ) {
320 for (
unsigned x = 0; x < sentence.size() - y; ++x ) {
321 for (
unsigned n = 0; n < y; n++ ) {
325 unsigned x2 = y1 + x1 + 1;
326 for (
unsigned cc = 1; cc <= nnt + sentence.size();
328 std::string cat = vcat[cc];
330 if ( cykdata_.
cykgrid ( cc, x1, y1 ).
size() == 0 )
continue;
335 rules.size() ) <<
" rules." );
336 for (
unsigned i = 0; i < rules.size(); i++ ) {
337 unsigned idx = rules[i];
338 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
"Testing rule idx=" << idx
339 <<
",value=" << ssgd.
getRule ( idx ) <<
"; Iteration=" << i );
341 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
342 "Rejected (rule minimum span too big): " << ssgd.
getRule ( idx ) );
348 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
349 "Rejected (Default Maximum span reached hrmaxheight): " << ssgd.
getRule (
351 LDEBUG (
"Note: hrmaxheight=" << hrmh_ <<
352 " has been reached, only nonterminals listed as exceptions (--cykparser.ntexceptionsmaxspan are allowed!" );
356 if ( hmax_.find ( ssgd.
getLHS ( idx ) ) != hmax_.end() )
357 if ( y > hmax_[ssgd.
getLHS ( idx )] ) {
358 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
"Rejected (hmax): " <<
362 if ( hmin_.find ( ssgd.
getLHS ( idx ) ) != hmin_.end() )
363 if ( y < hmin_[ssgd.
getLHS ( idx )] ) {
364 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
":" <<
"Rejected (hmin): " <<
370 unordered_map<std::string, cykparser_ruledependencies_t > ::iterator itx =
371 minicache.find ( rhs );
372 if ( itx != minicache.end() ) {
376 coord.push_back ( cc );
377 coord.push_back ( x1 );
378 coord.push_back ( y1 );
379 examineRule ( x2 - 0, y - ( x2 - x1 - 1 ), idx, 1, coord );
382 if ( rd.size() == 0 ) {
383 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
"(/" << n <<
"):" <<
"Rejected " <<
384 ssgd.
getRule ( idx ) <<
", for: " << ssgd.
getLHS ( idx ) <<
"," << x <<
"," <<
388 LDEBUG ( vcat[cc] <<
"," << x <<
"," << y <<
"(/" << n <<
"):" <<
"Accepted: " 389 << ssgd.
getRule ( idx ) <<
", for: " << ssgd.
getLHS ( idx ) <<
"," << x <<
"," 390 << y <<
", original rule_id=" << ssgd.
getIdx ( idx ) <<
",vs" << idx );
391 for (
unsigned kpt = 0; kpt < rd.size(); kpt++ ) {
393 cykdata_.
bp.
Add ( categories[ssgd.
getLHS ( idx )], x, y, rd[kpt] );
398 insertMonoRHSRules ( x, y );
bool run(Data &d)
Runs the parsing algorithm.
Data structure containing all cyk-related information.
ssgrammar_rulesmap_t rulesWithRhsSpan1
const std::string kCykparserHrmaxheight
std::string toString(const T &x, uint pr=2)
Converts an arbitrary type to string Converts to string integers, floats, doubles Quits execution if ...
grammar_categories_t categories
Ordered list of non-terminals (listed in hierarchical order according to identity rules) ...
grammar_inversecategories_t vcat
Inverse map (1=S,2=X,...)
const std::string kCykparserNtexceptionsmaxspan
unordered_map< uint, std::string > grammar_inversecategories_t
void Add(const uint cc, const uint x, const uint y, const uint ruleidx)
Add a rule to the cyk grid at (cc,x,y)
uint nnt
number of non-terminals
~CYKParserTask(void)
Destructor.
const std::string getRule(std::size_t idx)
Returns rule corresponding to index idx.
cykparser_ruledependencies_t rd
coordinate dependencies for each candidate.
const CYKdata * getcykdata()
returns cykdata structure
grammar_categories_t categories
Map between categories (S=1,X=2,...)
Struct containing grammar rules.
CYKgrid cykgrid
Cyk grid. Each cell of the grid is uniquely defined by three dimensions: [category,x,y].
Templated (hybrid) Interface for Task classes.
CYKParserTask(const ucam::util::RegistryPO &rg)
Constructor.
const std::string getRHSSource(std::size_t idx)
Returns Right-hand-side (source) of the rule with index=idx.
int getFinalResult()
Returns success (number of nodes in topmost cell) or failure (CYK_RETURNS_FAILURE=0) ...
const std::string getLHS(std::size_t idx)
Returns Left-hand-side of a rule corresponding to index idx.
unordered_set< std::string > nt_exceptions_maxspan
const std::string kCykparserHmin
std::vector< cykparser_rulebpcoordinates_t > cykparser_ruledependencies_t
#define CYK_RETURN_FAILURE
std::basic_string< uint > cykparser_rulebpcoordinates_t
ssgrammar_rulesmap_t rulesWithRhsSpan2OrMore
cells containing potentially applicable rules with two or more elements
std::size_t size()
Return actual size of the cyk grid.
const uint getRHSSourceSize(std::size_t idx)
Returns size of RHS source of a rule.
std::basic_string< uint > cykparser_sentence_t
const std::size_t getIdx(std::size_t idx)
Returns the true idx of a rule (i.e. line in the grammar file). If it is sentence specific...
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
cykparser_sentence_t sentence
The sentence we want to parse.
uint success
Success and how many parse S nodes have been found in the topmost cell. If 0, cyk parser has failed...
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
grammar_inversecategories_t vcat
void Add(const unsigned cc, const unsigned x, const unsigned y, const cykparser_ruledependencies_t &coords)
Add all the set of backpointers to the grid.
std::basic_string< uint > ssgrammar_listofrules_t
const std::string kCykparserHmax
unordered_map< std::string, uint > grammar_categories_t
unordered_set< std::string > getSetString(const std::string &key) const
Convenience method that returns a set of strings taking "," as the separator character.
void getFilteredNonTerminal(std::string &word)
Return the filtered non-terminal name. For example, for the rule Z 3_XT2_5 XT2, getFilteredNonTermina...