Cambridge SMT System
task.hifst-stats.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef TASK_HIFST_STATS_HPP
16 #define TASK_HIFST_STATS_HPP
17 
24 #include "task.stats.hpp"
25 
26 namespace ucam {
27 namespace hifst {
28 
35 template <class Data>
37  private:
39 
41 
42  Data *d_;
43  float width_;
44  bool writeCYKStats_;
45 
46  public:
47 
50  d_ ( NULL ),
51  writeCYKStats_ ( rg.getBool ( HifstConstants::kStatsHifstCykgridEnable ) ),
52  width_ ( rg.get<unsigned> ( HifstConstants::kStatsHifstCykgridCellwidth ) ),
53  statsoutput_ ( rg.get<std::string> ( HifstConstants::kStatsHifstWrite ) ) {
55  };
56 
61  bool run ( Data& d ) {
62  if ( statsoutput_() == "" ) return false;
63  d_ = &d;
64  FORCELINFO ( "Writing stats to " << statsoutput_ ( d.sidx ) );
65  ucam::util::oszfstream o ( statsoutput_ ( d.sidx ) );
66  if ( writeCYKStats_ )
67  writeCYKStats ( o );
68  o << "=================================================================" <<
69  endl;
70  o << "Local pruning during lattice construction" << endl;
71  writePruneStats ( o );
72  o.close();
73  return false;
74  };
75 
76  private:
77 
84  void writeCYKStats ( oszfstream& o ) {
85  std::vector<std::string> ws;
86  boost::algorithm::split ( ws, d_->sentence,
87  boost::algorithm::is_any_of ( " " ) );
88  std::stringstream line;
89  for ( unsigned z = 0; z < width_; ++z ) line << "-";
90  o << "Source sentence:" << d_->sentence << endl;
91  o << "Word count:" << ws.size() << endl;
92 // Not available right now.
93 // o << "Sentence-specific grammar size:" << d_->rules.size() << " rules " << endl;
94  o << "Number of rules (R), states (NS) and states after pruning (NSP) for each cell of the CYK grid:"
95  << endl;
96  o << "=================================================================" <<
97  endl;
98  o << std::setw ( width_ + 5 ) << std::setiosflags ( std::ios::left ) << "x\\y";
99  for ( unsigned x = 0; x < ws.size(); x++ ) {
100  o << std::setw ( width_ ) << std::setiosflags ( std::ios::left ) << x + 1;
101  }
102  o << endl;
103  o << std::setw ( 4 ) << std::setiosflags ( std::ios::left ) << "----";
104  for ( unsigned x = 0; x < ws.size() + 1; x++ ) {
105  o << std::setw ( width_ ) << line.str();
106  }
107  o << endl;
108  for ( unsigned x = 0; x < ws.size(); x++ ) {
109  o << std::setw ( 4 ) << std::setiosflags ( std::ios::left ) <<
110  std::resetiosflags ( std::ios::right ) << ( ucam::util::toString<unsigned>
111  ( x + 1 ) + "->" );
112  o << std::setw ( width_ - 4 ) << std::setiosflags ( std::ios::left ) <<
113  std::resetiosflags ( std::ios::right ) << ws[x];
114  o << std::setw ( 4 ) << std::setiosflags ( std::ios::right ) << "R:";
115  for ( unsigned int y = 0; y < ws.size() - x; y++ ) {
116  std::stringstream a1;
117  a1 << "|";
118  for ( unsigned int cc = 1; cc <= d_->stats->numcats; cc++ ) {
119  if ( d_->stats->rulecounts[cc * 1000000 + y * 1000 + x] ) {
120  // a1 << d_->cykdata->vcat[cc] << "=" << d_->stats->rulecounts[cc * 1000000 + y * 1000 + x] << " ";
121  a1 << d_->vcat[cc] << "=" << d_->stats->rulecounts[cc * 1000000 + y * 1000 + x]
122  << " ";
123  }
124  }
125  o << std::setw ( width_ ) << std::setiosflags ( std::ios::left ) <<
126  std::resetiosflags ( std::ios::right ) << a1.str() ;
127  }
128  o << std::setiosflags ( std::ios::left ) << "|" << endl;
129  o << std::setw ( width_ ) << " ";
130  o << std::setw ( 4 ) << std::setiosflags ( std::ios::right ) << "NS:";
131  for ( unsigned int y = 0; y < ws.size() - x; y++ ) {
132  std::stringstream a2;
133  a2 << "|";
134  for ( unsigned int cc = 1; cc <= d_->stats->numcats; cc++ ) {
135  if ( d_->stats->numstates[cc * 1000000 + y * 1000 + x] ) {
136  // a2 << d_->cykdata->vcat[cc] << "=" << d_->stats->numstates[cc * 1000000 + y * 1000 + x] << " ";
137  a2 << d_->vcat[cc] << "=" << d_->stats->numstates[cc * 1000000 + y * 1000 + x]
138  << " ";
139  }
140  }
141  o << std::setw ( width_ ) << std::setiosflags ( std::ios::left ) <<
142  std::resetiosflags ( std::ios::right ) << a2.str();
143  }
144  o << std::setiosflags ( std::ios::left ) << "|" << endl;
145  o << std::setw ( width_ ) << " ";
146  o << std::setw ( 4 ) << std::setiosflags ( std::ios::right ) << "NSP:";
147  for ( unsigned int y = 0; y < ws.size() - x; y++ ) {
148  std::stringstream a3;
149  a3 << "|";
150  for ( unsigned int cc = 1; cc <= d_->stats->numcats; cc++ ) {
151  if ( d_->stats->numprunedstates[cc * 1000000 + y * 1000 + x] ) {
152  // a3 << d_->cykdata->vcat[cc] << "=" << d_->stats->numprunedstates[cc * 1000000 + y * 1000 + x] << " ";
153  a3 << d_->vcat[cc] << "=" << d_->stats->numprunedstates[cc * 1000000 + y * 1000
154  + x] << " ";
155  }
156  }
157  o << std::setw ( width_ ) << std::setiosflags ( std::ios::left ) <<
158  std::resetiosflags ( std::ios::right ) << a3.str();
159  }
160  o << std::setiosflags ( std::ios::left ) << "|" << endl;
161  o << std::setw ( 4 ) << "----";
162  for ( unsigned int y = 0; y < ws.size() - x + 1; y++ ) {
163  o << std::setw ( width_ ) << line.str();
164  }
165  o << endl;
166  }
167  o << endl;
168  }
169 
174  void writePruneStats ( oszfstream& o ) {
175  o << "Number of times=" << d_->stats->lpcount << endl;
176  }
177 
178 };
179 
180 }
181 } // end namespaces
182 
183 #endif
Wrapper stream class that writes to pipes, text files or gzipped files.
Definition: szfstream.hpp:200
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
Task that dumps statistics stored by any previous task in the pipeline.
const std::string kStatsHifstCykgridCellwidth
HifstStatsTask(const ucam::util::RegistryPO &rg)
Constructor with RegistryPO object.
#define FORCELINFO(msg)
const std::string kStatsHifstCykgridEnable
Templated (hybrid) Interface for Task classes.
TaskInterface & appendTask(TaskInterface *t)
Appends a task class. If there is no task, append here, otherwise delegate in next task...
Task that reads stats from data object and writes them to a [file].
Definition: task.stats.hpp:29
const std::string kStatsHifstWrite
Reads StatsData and dumps all stats to (sentence-specific) file. Provides a special method for cyk da...
bool run(Data &d)
General run method from TaskInterface. Dumps all stats to a file.
Definition: bleu.hpp:14
void close()
Closes the file.
Definition: szfstream.hpp:323