Cambridge SMT System
MertCommon.cpp
Go to the documentation of this file.
1 //Copyright (c) 2012, University of Cambridge
2 //All rights reserved.
3 //
4 //Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met://
5 //
6 // * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 // * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 // * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9 //
10 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 
12 #include "MertCommon.h"
13 
15 
16 std::string Time() {
17  time_t t;
18  time (&t);
19  char tmp[128];
20  sprintf (tmp, "%s", ctime (&t) );
21  tmp[strlen (tmp) - 1] = ' ';
22  tmp[strlen (tmp)] = 0;
23  return std::string (tmp);
24 }
25 
26 void ReplacePattern (std::string& newname, std::string pattern, const char *pat,
27  std::string rep) {
28  std::string::size_type pos = 0;
29  newname = pattern;
30  int i = strlen (pat);
31  while ( (pos = newname.find (pat, pos) ) != std::string::npos) {
32  newname.replace (pos, i, rep);
33  }
34 }
35 
36 void ReplacePattern (std::string& newname, std::string pattern, const char *pat,
37  float f) {
38  std::ostringstream si;
39  si << f;
40  si.precision (4);
41  ReplacePattern (newname, pattern, pat, si.str() );
42 }
43 
44 void ReplacePattern (std::string& newname, std::string pattern, const char *pat,
45  int i) {
46  std::ostringstream si;
47  si << i;
48  ReplacePattern (newname, pattern, pat, si.str() );
49 }
50 
51 std::string ExpandPath (std::string pattern, const int idx) {
52  ReplacePattern (pattern, pattern, "%idx%", idx);
53  return pattern;
54 }
55 
56 void InitializeFromLimits (std::vector<Sid>& ids, const std::string range) {
57  unsigned int idxmin = 0;
58  unsigned int idxmax = 0;
59  sscanf (range.c_str(), "%d:%d", &idxmin, &idxmax);
60  for (unsigned int idx = idxmin; idx <= idxmax; idx++) {
61  ids.push_back (idx);
62  }
63 }
64 
65 void InitializeFromScript (std::vector<Sid>& ids, const std::string filename) {
66  std::ifstream ifs (filename.c_str() );
67  if (!ifs.good() ) {
68  std::cerr << "ERROR: unable to open file " << filename << '\n';
69  exit (1);
70  }
71  uint idx;
72  while (ifs >> idx) {
73  ids.push_back (idx);
74  }
75  ifs.close();
76 }
77 
78 std::ostream& operator<< (std::ostream& o, const Sentence& s) {
79  for (unsigned int i = 0; i < s.size(); ++i) {
80  if (i > 0) {
81  o << " ";
82  }
83  o << s[i];
84  }
85  return o;
86 }
87 
88 static const char* CMD_LINE_TOKEN_FILE = "file:";
89 static const char* CMD_LINE_TOKEN_RANDOM = "random";
90 
91 std::vector<PARAMS> InitializeVectorsFromFile (const string& filename) {
92  std::vector<PARAMS> vws;
93  std::ifstream ifs (filename.c_str() );
94  if (!ifs.good() ) {
95  cerr << "ERROR: unable to open file " << filename << '\n';
96  exit (1);
97  }
98  std::string line;
99  while (getline (ifs, line) ) {
100  vws.push_back (fst::ParseParamString<double, std::vector<double> > (line) );
101  }
102  ifs.close();
103  return vws;
104 }
105 
106 // This may need to be revisited. Storing a single direction in a vector is incredibly wasteful of memory
107 // Probably should be replaced with some sort of sparse data structure
108 std::vector<PARAMS> InitializeVectorsFromAxes (unsigned int dim) {
109  std::vector<PARAMS> vws;
110  for (unsigned int k = 0; k < dim; ++k) {
111  PARAMS axis (dim);
112  for (unsigned int i = 0; i < dim; ++i) {
113  axis[i] = 0;
114  }
115  axis[k] = 1;
116  vws.push_back (axis);
117  }
118  return vws;
119 }
120 
121 std::vector<PARAMS> InitializeVectorsFromRandom() {
122  cerr << "ERROR: 'random' vector initialization not supported";
123  exit (1);
124 }
125 
126 std::vector<PARAMS> InitializeVectors (const std::string& pattern) {
127  std::vector<PARAMS> vws;
128  if (pattern.find (CMD_LINE_TOKEN_FILE) == 0) {
129  std::string filename = pattern.substr (strlen (CMD_LINE_TOKEN_FILE) );
130  vws = InitializeVectorsFromFile (filename);
131  } //else if (pattern.find(CMD_LINE_TOKEN_RANDOM) == 0) {
132  // vws = InitializeVectorsFromRandom();
133 // }
134  else {
135  vws.push_back (fst::ParseParamString<double, std::vector<double> > (pattern) );
136  }
137  return vws;
138 }
139 
140 std::string ReadWeight (const std::string& filename) {
141  std::ifstream ifs (filename.c_str() );
142  if (!ifs.good() ) {
143  cerr << "ERROR: unable to open file " << filename << '\n';
144  exit (1);
145  }
146  std::string weight;
147  ifs >> weight;
148  ifs.close();
149  return weight;
150 }
151 
152 std::ostream& operator<< (std::ostream& strm, const PARAMS& vw) {
153  char separator = ',';
154  for (uint k = 0; k < vw.size(); ++k) {
155  if (k > 0) {
156  strm << separator;
157  }
158  strm << vw[k];
159  }
160  return strm;
161 }
162 
163 PARAMS operator- (const PARAMS& vw1, const PARAMS& vw2) {
164  if (vw1.size() != vw2.size() ) {
165  cerr << "Cannot subtract two vectors of different sizes. V1: "
166  << vw1.size() << " V2:" << vw2.size() << endl;
167  exit (1);
168  }
169  PARAMS vv (vw1.size() );
170  for (unsigned int k = 0; k < vw1.size(); ++k) {
171  vv[k] = vw1[k] - vw2[k];
172  }
173  return vv;
174 }
175 
176 PARAMS operator+ (const PARAMS& vw1, const PARAMS& vw2) {
177  if (vw1.size() != vw2.size() ) {
178  cerr << "Cannot sum two vectors of different sizes. V1: " << vw1.size()
179  << " V2:" << vw2.size() << endl;
180  exit (1);
181  }
182  PARAMS vv (vw1.size() );
183  for (unsigned int k = 0; k < vw1.size(); ++k) {
184  vv[k] = vw1[k] + vw2[k];
185  }
186  return vv;
187 }
188 
189 template double fst::DotProduct<double> (
191 
192 std::vector<std::string> InitRefDataFilenames (int argc, char** argv) {
193  std::vector<std::string> refFilenames;
194  if (argc == 1) {
195  cerr << "ERROR: no reference files specified\n";
196  exit (1);
197  } else {
198  tracer << argc - 1 << " reference files specified:\n";
199  for (int i = 1; i < argc; ++i) {
200  tracer << "r[" << i << "]=" << argv[i] << '\n';
201  refFilenames.push_back (argv[i]);
202  }
203  }
204  return refFilenames;
205 }
#define tracer
Definition: data.lmbr.hpp:18
Collection ParseParamString(const std::string &stringparams)
Definition: params.h:45
PARAMS operator+(const PARAMS &vw1, const PARAMS &vw2)
Definition: MertCommon.cpp:176
void ReplacePattern(std::string &newname, std::string pattern, const char *pat, std::string rep)
Definition: MertCommon.cpp:26
std::string Time()
Definition: MertCommon.cpp:16
Implements Tropical Sparse tuple weight semiring, extending from openfst SparsePowerWeight class...
std::vector< Wid > Sentence
Definition: MertCommon.h:48
std::string ReadWeight(const std::string &filename)
Definition: MertCommon.cpp:140
iszfstream & getline(iszfstream &izs, std::string &line)
Definition: szfstream.hpp:178
std::vector< std::string > InitRefDataFilenames(int argc, char **argv)
Definition: MertCommon.cpp:192
std::ostream & operator<<(std::ostream &o, const Sentence &s)
Definition: MertCommon.cpp:78
std::string ExpandPath(std::string pattern, const int idx)
Definition: MertCommon.cpp:51
void InitializeFromLimits(std::vector< Sid > &ids, const std::string range)
Definition: MertCommon.cpp:56
std::vector< PARAMS > InitializeVectorsFromFile(const string &filename)
Definition: MertCommon.cpp:91
std::vector< PARAMS > InitializeVectorsFromAxes(unsigned int dim)
Definition: MertCommon.cpp:108
void InitializeFromScript(std::vector< Sid > &ids, const std::string filename)
Definition: MertCommon.cpp:65
PARAMS operator-(const PARAMS &vw1, const PARAMS &vw2)
Definition: MertCommon.cpp:163
std::vector< PARAMS > InitializeVectors(const std::string &pattern)
Definition: MertCommon.cpp:126
MertOpt opts
Definition: MertCommon.cpp:14
std::vector< PARAMS > InitializeVectorsFromRandom()
Definition: MertCommon.cpp:121