15 #ifndef RULEFILETASK_HPP 16 #define RULEFILETASK_HPP 46 std::string previous_;
52 std::priority_queue<posindex, std::vector<posindex>,
PosIndexCompare> *vpq_;
57 std::vector<float> grammarscales_;
58 std::string ntorderfile_;
69 ,
unsigned featureoffset = 0) :
75 ( featureweightskey ) ) ) {
78 std::vector<float> aux (grammarscales_.size() - featureoffset);
79 std::copy (grammarscales_.begin() + featureoffset, grammarscales_.end(),
84 "0 feature weights. So the grammar is not a probabilistic model? Not my cup of tea." );
97 grammarfile_ ( grammarfilekey ),
98 patternfile_ ( patternfilekey ) ,
118 std::string thisgrammarfile = grammarfile_ ( d.sidx );
119 if ( thisgrammarfile != previous_ ) {
120 FORCELINFO (
"Loading hierarchical grammar: " << thisgrammarfile );
122 "This grammar does not exist" );
123 d.stats->setTimeStart (
"load-grammar-patterns" );
124 load ( thisgrammarfile );
125 d.stats->setTimeEnd (
"load-grammar-patterns" );
126 std::string patternfile = patternfile_ ( d.sidx );
127 if ( patternfile !=
"" ) {
129 for ( unordered_set<std::string>::iterator itx = gd_.
patterns.begin();
130 itx != gd_.
patterns.end(); ++itx ) o << *itx << endl;
133 previous_ = thisgrammarfile;
135 LINFO (
"Skipping grammar loading..." );
148 inline void load (
const std::string& file ) {
150 LINFO (
"=> Loading..." << file );
151 ucam::util::readtextfile<GrammarTask> ( file, *this );
153 LINFO (
"Done! ****" );
164 inline void load ( std::stringstream& s ) {
167 while (
getline ( s, myline ) ) {
183 void generate_ntorder() {
186 LINFO (
"ntorder=" << ntorder );
187 std::vector<std::string> aux;
188 boost::algorithm::split ( aux, ntorder, boost::algorithm::is_any_of (
" ," ) );
189 for ( uint k = 0; k < aux.size(); ++k ) {
190 gd_.
vcat[k + 1] = aux[k];
193 if (ntorderfile_ !=
"") {
195 for ( uint k = 0; k < gd_.
vcat.size(); ++k )
196 o << gd_.
vcat[k + 1] <<
"\t" << k + 1 << std::endl;
205 inline void load_init() {
219 inline void load_sort() {
220 LINFO (
"Sorting indices..." );
226 while ( !vpq_->empty() ) {
227 gd_.
vpos[newidx++] = vpq_->top();
240 __always_inline
void parse ( std::string& line ) {
244 boost::algorithm::trim ( line );
245 if ( line ==
"" )
return;
246 size_t pos1 = line.find_first_of (
" " );
247 size_t pos2 = line.find_first_of (
" ", pos1 + 1 );
248 size_t pos3 = line.find_first_of (
" ", pos2 + 1 );
250 if (pos3 == std::string::npos) {
251 LERROR(
"Grammar not valid. At least one weight is needed: \n=>\t" << line);
254 size_t pos4 = line.find_first_of (
"\t");
255 if (pos4 == std::string::npos) pos4 = line.size();
256 LDEBUG(
"pos1=" << pos1 <<
",pos2=" << pos2 <<
",pos3=" << pos3 <<
",pos4=" << pos4);
258 vector<float> weights;
259 ParseParamString<float> ( line, weights, pos3 + 1 , pos4 - pos3 - 1 );
260 string sweight = toString<float>
261 (
dotproduct (weights, grammarscales_ ), numeric_limits<unsigned>::max() );
263 line = ( pos4 <line.size() )
264 ? line.substr ( 0, pos3 + 1 ) + sweight + line.substr(pos4)
265 : line.substr ( 0, pos3 + 1 ) + sweight;
268 LDEBUG(
"Adding line=[" << line <<
"]");
271 bool waitingfornextfield =
false;
274 for (
unsigned k = 0; k < line.size(); ++k ) {
275 if ( previous ==
' ' && line[k] !=
' ' ) --cf;
286 for (
unsigned k = pi.
o; k < line.size(); ++k ) {
287 if ( line[k] ==
' ' )
break;
288 if ( line[k] >=
'0' && line[k] <=
'9' ) {
289 if ( !word && !nt ) {
294 }
else if ( line[k] >=
'A' && line[k] <=
'Z' ) {
308 pi.
order = vpq_->size();
310 pos_ += line.size() + 1;
311 LDEBUG2 (
"reading rule " << line <<
", at line " << pi.
order <<
", pattern=" <<
313 if ( pattern ==
"X" ) {
314 LINFO (
"Identity rule detected:" << line <<
"===" );
317 nth_.
insertLHS ( line.substr ( 0, pi.
o - 1 ) );
322 template <
typename FM>
Wrapper stream class that writes to pipes, text files or gzipped files.
std::size_t order
absolute index
#define ZDISALLOW_COPY_AND_ASSIGN(TypeName)
GrammarTask(const std::string &grammarfilekey=HifstConstants::kGrammarLoad, const std::string &patternfilekey=HifstConstants::kGrammarStorepatterns)
Constructor used for unit testing.
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
grammar_categories_t categories
Ordered list of non-terminals (listed in hierarchical order according to identity rules) ...
Struct containing rule positions and offsets.
const std::string kGrammarFeatureweights
CompareTool * ct
Pointer to a Comparison object, assumed no ownership.
std::vector< T > ParseParamString(const std::string &stringparams, size_t pos=0)
Function to parse string of parameters, e.g. separated by commas.
unordered_set< std::string > patterns
Patterns in these rules.
Struct containing grammar rules.
posindex * vpos
Sorted Indices.
Task class that loads a grammar into memory.
Templated (hybrid) Interface for Task classes.
iszfstream & getline(iszfstream &izs, std::string &line)
void trim_trailing_zeros(std::string &snumber)
std::string filecontents
The whole grammar.
const std::string getRule(std::size_t idx) const
Gets a rule indexed by idx. Rule format: LHS RHSSource RHSTarget weight.
void load(const std::string &file)
Loads rules from a grammar file.
float dotproduct(std::vector< float > &v1, std::vector< float > &v2)
Implements dot product.
void readtextfile(const std::string &filename, FM &fm)
Function that reads from a file. Templated on any external class with a parse method.
bool fileExists(const std::string &fileName)
this class decides automatically the hierarchy of non-terminals
Functor Class that provides comparison accross the posindex structure. This is typically used e...
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
GrammarData * getGrammarData()
Returns GrammarData.
const std::string kGrammarStorepatterns
bool run(Data &d)
ucam::util::TaskInterface mandatory method implementation. This method loads the hierarchical grammar...
void insertIdentityRule(const std::string &identityrule)
Method to store identity rules, i.e. S -> X X , etc.
This is a functor with additional methods to include relevant rules (i.e. identify SCFG rules...
const std::string kGrammarStorentorder
grammar_inversecategories_t vcat
GrammarTask(ucam::util::RegistryPO const &rg, std::string const &featureweightskey=HifstConstants::kGrammarFeatureweights, unsigned featureoffset=0)
Constructor.
void insertLHS(const std::string &nt)
std::size_t sizeofvpos
Number of rules.
void reset()
Reset object.
const std::string kGrammarLoad
void load(std::stringstream &s)
Loads rules from a stringstream.
void close()
Closes the file.