tutorial/data_8ssgrammar_8hpp_source.html

 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use these files except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Copyright 2012 - Gonzalo Iglesias, Adrià de Gispert, William Byrne

 #ifndef SENTENCESPECIFICGRAMMARDATA_HPP
 #define SENTENCESPECIFICGRAMMARDATA_HPP

 namespace ucam {
 namespace hifst {

 struct SentenceSpecificGrammarData {

   SentenceSpecificGrammarData()
       : grammar ( NULL )
   {}

   const GrammarData *grammar;

   ssgrammar_rulesmap_t rulesWithRhsSpan1;
   ssgrammar_rulesmap_t  rulesWithRhsSpan2OrMore;

   unordered_map<std::size_t, std::string> extrarules;

   inline void reset() {
     rulesWithRhsSpan1.clear();
     rulesWithRhsSpan2OrMore.clear();
     extrarules.clear();
     grammar = NULL;
   }

   inline const std::string getRule ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRule ( idx );
     LINFO ( "ssgrammar idx=" << idx );
     return extrarules[idx];
   };

   inline const std::string getLHS ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getLHS ( idx );
     std::size_t pos = extrarules[idx].find_first_of ( " " );
     return extrarules[idx].substr ( 0, pos );
   };

   inline const std::string getRHSSource ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSSource ( idx );
     std::size_t pos = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos2 = extrarules[idx].find_first_of ( " ", pos );
     return extrarules[idx].substr ( pos, pos2 - pos );
   };

   inline const std::string getRHSSource ( std::size_t idx, uint rulepos ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSSource ( idx, rulepos );
     std::size_t pos = extrarules[idx].find_first_of ( " " );
     std::size_t j = pos , jold;
     for ( uint k = 0; k <= rulepos; ++k ) {
       jold = j;
       j = extrarules[idx].find_first_of ( "_ ", jold + 1 );
       if ( j == std::string::npos )
         if ( rulepos ) return "";
     }
     return extrarules[idx].substr ( jold + 1, j - jold - 1 );
   };

   inline const std::vector<std::string> getRHSSplitSource ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSSplitSource ( idx );
     std::vector<std::string> splitsource;
     boost::algorithm::split ( splitsource, getRHSSource ( idx ),
                               boost::algorithm::is_any_of ( "_" ) );
     return splitsource;
   };

   inline const uint getRHSSourceSize ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSSourceSize ( idx );
     std::size_t pos  = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos1 = extrarules[idx].find_first_of ( " " , pos ) + 1;
     return ucam::util::count_needles ( extrarules[idx], '_', pos, pos1 ) + 1 ;
   };

   inline const std::string getRHSTranslation ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSTranslation ( idx );
     std::size_t pos  = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos1 = extrarules[idx].find_first_of ( " ", pos ) + 1;
     std::size_t pos2 = extrarules[idx].find_first_of ( " ", pos1 );
     return extrarules[idx].substr ( pos1, pos2 - pos1 );
   };

   inline const std::vector<std::string> getRHSSplitTranslation (
     std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSSplitTranslation ( idx );
     std::vector<std::string> splittranslation;
     boost::algorithm::split ( splittranslation, getRHSTranslation ( idx ),
                               boost::algorithm::is_any_of ( "_" ) );
     return splittranslation;
   };

   inline const uint getRHSTranslationSize ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getRHSTranslationSize ( idx );
     std::size_t pos  = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos1 = extrarules[idx].find_first_of ( " ", pos ) + 1;
     std::size_t pos2 = extrarules[idx].find_first_of ( " ", pos1 );
     return ucam::util::count_needles ( extrarules[idx], '_', pos1, pos2 ) + 1;
   };

   inline const float getWeight ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getWeight ( idx );
     std::size_t pos  = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos1 = extrarules[idx].find_first_of ( " ", pos );
     std::size_t pos2 = extrarules[idx].find_first_of ( " ", pos1 + 1 );
     std::size_t pos3 = extrarules[idx].find_first_of ( " \n\0", pos2 + 1 );
     return ucam::util::toNumber<float> ( extrarules[idx].substr ( pos2,
                                          pos3 - pos2 ) );
   };

   void getLinks(std::size_t idx
                 , std::vector<unsigned> &links) const {
     if ( extrarules.find ( idx ) == extrarules.end() ) {
       grammar->getLinks ( idx , links);
       return;
     }
     LERROR("Untested code");
     exit(EXIT_FAILURE);
     // std::size_t pos  = extrarules[idx].find_first_of ( " " ) + 1;
     // std::size_t pos1 = extrarules[idx].find_first_of ( " " , pos) + 1;
     // std::size_t pos2 = extrarules[idx].find_first_of ( " ", pos1) + 1;
     // std::size_t pos3 = extrarules[idx].find_first_of ( " \n\0", pos2);
     // if (extrarules[idx][pos3] == ' ') {
     //   std::size_t pos4 = extrarules[idx].find_first_of ( " \n\0", pos3 + 1 );
     //   return extrarules[idx].substr ( pos3, pos4 - pos3 );
     // }
     // return ""; // no affiliation or links
   }


   inline const bool isPhrase ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->isPhrase ( idx );
     std::size_t pos0  = extrarules[idx].find_first_of ( " " ) + 1;
     std::size_t pos = extrarules[idx].find_first_of ( " ", pos0 );
     for ( const char *c = extrarules[idx].c_str() + pos0;
           c <= extrarules[idx].c_str() + pos; ++c )
       if ( *c >= 'A' && *c <= 'Z' ) return false; //has non-terminals.
     return true; //pure phrase.
   };
   inline const std::size_t getIdx ( std::size_t idx ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->getIdx ( idx );
     return idx;
   };

   inline const bool isAcceptedByVocabulary ( const std::size_t idx,
       const unordered_set<std::string>& vcb ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       return grammar->isAcceptedByVocabulary ( idx, vcb );
     return true;
   };

   inline void getMappings ( std::size_t idx,
                             unordered_map<uint, uint> *mappings ) {
     if ( extrarules.find ( idx ) == extrarules.end() )
       grammar->getMappings ( idx, mappings );
     if ( isPhrase ( idx ) ) return;
     std::vector<std::string> source = getRHSSplitSource ( idx );
     std::vector<std::string> translation = getRHSSplitTranslation ( idx );
     getRuleMappings ( source, translation, mappings );
   };
 };

 inline bool phraseIsTerminalWord ( const std::string& phrase ) {
   for ( uint k = 0; k < phrase.size(); ++k ) {
     if ( phrase[k] >= 'A' && phrase[k] <= 'Z' ) return false;
     else if ( phrase[k] == '_' ) return false;
   }
   return true;
 };

 }
 }  // end namespaces

 #endif
ucam::hifst::SentenceSpecificGrammarData::reset
void reset()
Definition: data.ssgrammar.hpp:58

ucam::hifst::SentenceSpecificGrammarData::getRHSTranslation
const std::string getRHSTranslation(std::size_t idx)
Returns RHS translation of a rule with index idx.
Definition: data.ssgrammar.hpp:125

ucam::hifst::SentenceSpecificGrammarData::getWeight
const float getWeight(std::size_t idx)
Returns the weight of a rule. This weight is the dot product of all the features with its scales...
Definition: data.ssgrammar.hpp:156

ucam::hifst::SentenceSpecificGrammarData::rulesWithRhsSpan1
ssgrammar_rulesmap_t rulesWithRhsSpan1
Definition: data.ssgrammar.hpp:49

LINFO
#define LINFO(msg)
Definition: logger.boost_log.hpp:78

ucam::hifst::GrammarData::getLHS
const std::string getLHS(std::size_t idx) const
Gets left-hand-side of the rule indexed by idx.
Definition: data.grammar.hpp:90

ucam::hifst::SentenceSpecificGrammarData::getMappings
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings)
Returns the non-terminal mappings for a rule. For more details see getRuleMappings function...
Definition: data.ssgrammar.hpp:220

ucam::hifst::SentenceSpecificGrammarData::getLinks
void getLinks(std::size_t idx, std::vector< unsigned > &links) const
Definition: data.ssgrammar.hpp:167

ucam::hifst::GrammarData::getRHSTranslationSize
const uint getRHSTranslationSize(std::size_t idx) const
Returns the number of elements in translation for a given rule.
Definition: data.grammar.hpp:145

ucam::hifst::SentenceSpecificGrammarData::getRule
const std::string getRule(std::size_t idx)
Returns rule corresponding to index idx.
Definition: data.ssgrammar.hpp:66

ucam::hifst::GrammarData::isPhrase
const bool isPhrase(std::size_t idx) const
Checks whether the rule is a phrase or not (i.e. is hierarchical)
Definition: data.grammar.hpp:189

ucam::hifst::GrammarData::getRHSSourceSize
const uint getRHSSourceSize(std::size_t idx) const
Gets number of elements in the RHS source.
Definition: data.grammar.hpp:123

ucam::hifst::GrammarData
Struct containing grammar rules.
Definition: data.grammar.hpp:42

ucam::hifst::GrammarData::getRHSSplitSource
const std::vector< std::string > getRHSSplitSource(std::size_t idx) const
Gets a splitted version of RHS (source)
Definition: data.grammar.hpp:115

ucam::hifst::SentenceSpecificGrammarData::isPhrase
const bool isPhrase(std::size_t idx)
Definition: data.ssgrammar.hpp:189

ucam::hifst::GrammarData::getIdx
const std::size_t getIdx(std::size_t idx) const
Gets the real position (line) in the (potentially unsorted) file.
Definition: data.grammar.hpp:198

ucam::hifst::GrammarData::getRHSSource
const std::string getRHSSource(std::size_t idx) const
Gets right-hand-side source for a rule using rule index idx.
Definition: data.grammar.hpp:96

ucam::hifst::SentenceSpecificGrammarData::getRHSSource
const std::string getRHSSource(std::size_t idx)
Returns Right-hand-side (source) of the rule with index=idx.
Definition: data.ssgrammar.hpp:82

ucam::hifst::SentenceSpecificGrammarData::getLHS
const std::string getLHS(std::size_t idx)
Returns Left-hand-side of a rule corresponding to index idx.
Definition: data.ssgrammar.hpp:74

ucam::hifst::GrammarData::getWeight
const float getWeight(std::size_t idx) const
Returns weight of a rule accessed by index idx.
Definition: data.grammar.hpp:152

ucam::hifst::GrammarData::isAcceptedByVocabulary
const bool isAcceptedByVocabulary(const std::size_t idx, const unordered_set< std::string > &vcb) const
Determines whether a particular rule is allowed within a vocabulary, i.e. all target words of the rul...
Definition: data.grammar.hpp:225

ucam::hifst::GrammarData::getRule
const std::string getRule(std::size_t idx) const
Gets a rule indexed by idx. Rule format: LHS RHSSource RHSTarget weight.
Definition: data.grammar.hpp:83

ucam::hifst::SentenceSpecificGrammarData::getRHSSplitSource
const std::vector< std::string > getRHSSplitSource(std::size_t idx)
Returns vector of elements of the RHS source.
Definition: data.ssgrammar.hpp:106

ucam::hifst::phraseIsTerminalWord
bool phraseIsTerminalWord(const std::string &phrase)
Definition: data.ssgrammar.hpp:234

ucam::hifst::SentenceSpecificGrammarData::getRHSSource
const std::string getRHSSource(std::size_t idx, uint rulepos)
Returns element at position rulepos of right-hand-side (source)
Definition: data.ssgrammar.hpp:91

ucam::hifst::SentenceSpecificGrammarData::rulesWithRhsSpan2OrMore
ssgrammar_rulesmap_t rulesWithRhsSpan2OrMore
cells containing potentially applicable rules with two or more elements
Definition: data.ssgrammar.hpp:51

ucam::util::count_needles
uint count_needles(const std::string &haystack, const char needle, std::size_t start, std::size_t end)
Convenience function that counts the number of times a needle appears.
Definition: global_funcs.hpp:107

ucam::hifst::GrammarData::getRHSSplitTranslation
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx) const
Returns the translation as a vector of elements.
Definition: data.grammar.hpp:136

ucam::hifst::SentenceSpecificGrammarData::getRHSSourceSize
const uint getRHSSourceSize(std::size_t idx)
Returns size of RHS source of a rule.
Definition: data.ssgrammar.hpp:116

ucam::hifst::SentenceSpecificGrammarData::getIdx
const std::size_t getIdx(std::size_t idx)
Returns the true idx of a rule (i.e. line in the grammar file). If it is sentence specific...
Definition: data.ssgrammar.hpp:200

ucam::hifst::ssgrammar_rulesmap_t
unordered_map< uint, ssgrammar_firstelementmap_t > ssgrammar_rulesmap_t
Definition: defs.ssgrammar.hpp:32

ucam::hifst::SentenceSpecificGrammarData::extrarules
unordered_map< std::size_t, std::string > extrarules
Definition: data.ssgrammar.hpp:55

ucam::hifst::SentenceSpecificGrammarData::grammar
const GrammarData * grammar
Pointer to the original grammar data (no ownership)
Definition: data.ssgrammar.hpp:45

ucam::hifst::GrammarData::getLinks
void getLinks(std::size_t idx, std::vector< unsigned > &links) const
Definition: data.grammar.hpp:162

LERROR
#define LERROR(msg)
Definition: logger.boost_log.hpp:119

ucam::hifst::GrammarData::getRHSTranslation
const std::string getRHSTranslation(std::size_t idx) const
Returns RHS translation part of a rule accessed by index idx.
Definition: data.grammar.hpp:129

ucam::hifst::GrammarData::getMappings
void getMappings(std::size_t idx, unordered_map< uint, uint > *mappings) const
Returns the non-terminal mappings. For more details see getRuleMappings function. ...
Definition: data.grammar.hpp:207

ucam::hifst::SentenceSpecificGrammarData::isAcceptedByVocabulary
const bool isAcceptedByVocabulary(const std::size_t idx, const unordered_set< std::string > &vcb)
Definition: data.ssgrammar.hpp:208

ucam::hifst::SentenceSpecificGrammarData
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
Definition: data.ssgrammar.hpp:38

ucam::hifst::SentenceSpecificGrammarData::SentenceSpecificGrammarData
SentenceSpecificGrammarData()
Definition: data.ssgrammar.hpp:40

ucam::hifst::SentenceSpecificGrammarData::getRHSTranslationSize
const uint getRHSTranslationSize(std::size_t idx)
Returns size of RHS (translation) of a rule.
Definition: data.ssgrammar.hpp:146

ucam
Definition: bleu.hpp:14

ucam::hifst::getRuleMappings
void getRuleMappings(const std::vector< std::string > &source, const std::vector< std::string > &translation, unordered_map< uint, uint > *mappings)
Given a source and translation of the same rule, sharing the same non-terminals in RHS...
Definition: data.grammar.utilities.hpp:73

ucam::hifst::SentenceSpecificGrammarData::getRHSSplitTranslation
const std::vector< std::string > getRHSSplitTranslation(std::size_t idx)
Returns translation as a vector of elements.
Definition: data.ssgrammar.hpp:135