10 HypW (std::basic_string<unsigned>
const& h,
typename Arc::Weight
const& c)
19 std::ostream& operator<< (std::ostream& os, const Hyp<Arc>& obj) {
20 for (
unsigned k = 0; k < obj.hyp.size(); ++k) {
21 if (obj.hyp[k] ==
OOV)
continue;
22 if (obj.hyp[k] ==
DR)
continue;
23 if (obj.hyp[k] ==
EPSILON)
continue;
24 if (obj.hyp[k] ==
SEP)
continue;
25 os << obj.
hyp[k] <<
" ";
35 Sample(
const unsigned j1,
const unsigned j2,
const double scoreDiff) :
36 j1(j1), j2(j2), scoreDiff(scoreDiff) {}
50 unsigned offset = (hyp.size() < 2 ? 0 : 1);
51 for (
unsigned z=offset; z< hyp.size()-offset; z++)
52 h.push_back( hyp[z] );
56 template <
class Value,
class Weight>
62 template <
class Weight,
class HypT>
63 vector< LabeledFeature<float, Weight> >
65 std::vector<HypT>
const& hyps,
unsigned const& sid,
66 unsigned const& n,
unsigned const &ns,
double const& alpha,
bool negatives=
false,
bool negate=
true ) {
68 std::set< std::pair<unsigned, unsigned> > indexpairs;
69 vector< Sample > samples;
70 for (
unsigned s=0; s<n; s++) {
72 unsigned j1 = rand() % hyps.size();
73 unsigned j2 = rand() % hyps.size();
74 LINFO(
"1 [" << j1 <<
"] " <<hyps[j1]);
75 LINFO(
"2: ["<<j2<<
"] "<<hyps[j2]);
76 if (indexpairs.find( std::make_pair(j1, j2)) != indexpairs.end() ) {
77 LINFO(
"--skipping - already done");
80 indexpairs.insert( std::make_pair(j1, j2) );
90 LINFO(
"Positive samples found: " << samples.size());
92 std::vector< LabeledFeature< float, Weight> > ss;
94 for (
unsigned s=0; s<ns && s < samples.size(); s++) {
95 unsigned j1 = samples[s].j1;
96 unsigned j2 = samples[s].j2;
103 lf.
value = samples[s].scoreDiff;
106 LINFO(
"Sample " << s <<
" score diff " << samples[s].scoreDiff);
110 lf.
value = -samples[s].scoreDiff;
111 lf.
fea =
Divide(hyps[j2].cost, hyps[j1].cost);
114 LINFO(
"Positive samples found: " << np <<
" of " << n);
118 template <
class Arc,
class HypT>
135 std::string refFiles;
145 std::cerr << refFiles <<
"**" <<std::endl;
150 unsigned seed = time(NULL);
155 boost::scoped_ptr<oszfstream> out;
157 for (
unsigned i=0; i<tuneSet.
cachedLats.size(); i++) {
158 fst::VectorFst<Arc> ifst(*tuneSet.
cachedLats[i]);
159 fst::VectorFst<Arc> nfst;
160 if (old != output (i) ) {
161 out.reset(
new oszfstream (output(i)));
164 if (!ifst.NumStates() ) {
169 fst::Project(&ifst, (printOutputLabels?PROJECT_OUTPUT:PROJECT_INPUT));
170 ShortestPath (ifst, &nfst, n,
true );
171 std::vector<HypT> hyps;
172 fst::printStrings<Arc> (nfst, &hyps);
173 std::vector< LabeledFeature< float, typename Arc::Weight> > fea =
174 ProSBLEUSample<typename Arc::Weight, HypT>(bleuScorer, hyps, i, n, ns, alpha, negatives, negate);
175 for (
unsigned s=0; s<fea.size(); s++) {
176 *out << (binarytarget ? (fea[s].value > 0.0 ? 1 : 0) : fea[s].value);
177 *out <<
" " << fea[s].fea << std::endl;
185 int main (
int argc,
const char* argv[] ) {
189 FORCELINFO ( rg.
dump (
"CONFIG parameters:\n=====================",
190 "====================="));
193 SampleWFSAs<fst::StdArc, Hyp<fst::StdArc> > (rg);
197 SampleWFSAs<fst::LexStdArc, Hyp<fst::LexStdArc> > (rg);
200 const std::string& tuplearcWeights =
203 if (tuplearcWeights.empty() ) {
204 LERROR (
"The tuplearc.weights option needs to be specified " 205 "for the tropical sparse tuple weight semiring " 206 "(--semiring=tuplearc)");
210 SampleWFSAs<TupleArc32, Hyp<TupleArc32> > (rg);
212 LERROR (
"Sorry, semiring option not correctly defined");
Wrapper stream class that writes to pipes, text files or gzipped files.
std::string const kHifstSemiring
int SampleWFSAs(ucam::util::RegistryPO const &rg)
bool SampleSortPredicate(const Sample &s1, const Sample &s2)
std::string const kBinaryTarget
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
std::string const kPrintOutputLabels
T get(const std::string &key) const
Returns parsed value associated to key.
static std::vector< T > & Params()
void initLogger(int argc, const char *argv[])
Inits logger, parses param options checking for –logger.verbose.
std::string const kExternalTokenizer
std::string const kIntRefs
std::string const kDontNegate
std::string const kNSamples
std::string const kOutput
std::string const kWordRefs
Struct template that represents a hypothesis in a lattice.
std::string const kRandomSeed
Same as Hyp but the printing will convert integer ids to words.
Sample(const unsigned j1, const unsigned j2, const double scoreDiff)
TropicalSparseTupleWeight< T > Divide(const TropicalSparseTupleWeight< T > &w1, const TropicalSparseTupleWeight< T > &w2, DivideType type=DIVIDE_ANY)
HypW(std::basic_string< unsigned > const &h, typename Arc::Weight const &c)
std::string const kHifstSemiringLexStdArc
std::string const kHifstSemiringStdArc
Static variables for logger. Include only once from main file.
std::string getString(const std::string &key) const
Performs get<string> and checks whether the real value is to be loaded from file (–param=file://.....)
std::string const kHifstSemiringTupleArc
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
std::string const kWordMap
std::basic_string< unsigned > hyp
std::string dump(const std::string &decorator_start="", const std::string &decorator_end="")
Dumps all configuration parameters into a string with a reasonably pretty format. ...
std::string const kNegativeExamples
std::string const kTupleArcWeights
HypW(HypW< Arc > const &h)
ucam::fsttools::Bleu LBleuScorer(ucam::fsttools::BleuScorer &bleuScorer, unsigned const &sid, HypT const &hyp)
int main(int argc, const char *argv[])
vector< LabeledFeature< float, Weight > > ProSBLEUSample(ucam::fsttools::BleuScorer &bleuScorer, std::vector< HypT > const &hyps, unsigned const &sid, unsigned const &n, unsigned const &ns, double const &alpha, bool negatives=false, bool negate=true)
Static variable for custom_assert. Include only once from main file.