35 #include "tropical-sparse-tuple-weight.h" 36 #include "tropical-sparse-tuple-weight-decls.h" 49 #include "lm/model.hh" 71 namespace bfs = boost::filesystem;
117 std::stringstream ss;
118 ss <<
"X 3 3 0" << endl <<
"S S_X S_X 0" << endl;
119 ss <<
"X 4 4 0" << endl <<
"X 5 5 0" << endl <<
"X 1 1 0" << endl <<
"X 2 2 0" 121 ss <<
"X 3_4 3_4 0" << endl <<
"X 3_X_5 3_X_5 0" << endl <<
"S X X 0" << endl;
124 for (
unsigned k = 0; k < 9; ++k ) {
131 d_.
hpinstances[
"1"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
132 d_.
hpinstances[
"3"].push_back ( pair<unsigned, unsigned> ( 1, 0 ) );
133 d_.
hpinstances[
"4"].push_back ( pair<unsigned, unsigned> ( 2, 0 ) );
134 d_.
hpinstances[
"5"].push_back ( pair<unsigned, unsigned> ( 3, 0 ) );
135 d_.
hpinstances[
"2"].push_back ( pair<unsigned, unsigned> ( 4, 0 ) );
136 d_.
hpinstances[
"3_4"].push_back ( pair<unsigned, unsigned> ( 1, 1 ) );
137 d_.
hpinstances[
"3_X_5"].push_back ( pair<unsigned, unsigned> ( 1, 2 ) );
138 d_.
hpinstances[
"X_X"].push_back ( pair<unsigned, unsigned> ( 0, 1 ) );
139 d_.
hpinstances[
"X"].push_back ( pair<unsigned, unsigned> ( 0, 0 ) );
146 LINFO (
"Setup finished!" );
154 LINFO (
"Teardown..." );
162 unordered_map<std::string, boost::any>
v_;
177 std::numeric_limits<unsigned>::max() );
187 std::stringstream ss;
190 EXPECT_EQ ( ss.str(),
191 "1 3 4 5 2 || 1 3 4 5 2 || 0,0\n" );
201 std::numeric_limits<unsigned>::max() );
212 std::stringstream ss;
215 EXPECT_EQ ( ss.str(),
216 "1 3 4 5 2 || 1 3 4 5 2 || 0,0\n1 3 4 5 2 || 1 3 4 5 2 || 0,0\n1 3 4 5 2 || 1 3 4 5 2 || 0,0\n" );
228 o <<
"\\data\\" << endl;
229 o <<
"ngram 1=5" << endl;
230 o <<
"ngram 2=2" << endl;
231 o <<
"ngram 3=1" << endl;
233 o <<
"\\1-grams:" << endl;
234 o <<
"-1\t3\t0" << endl;
235 o <<
"-10\t4\t0" << endl;
236 o <<
"-100\t5\t0" << endl;
237 o <<
"-1000\t</s>\t0" << endl;
238 o <<
"0\t<s>\t0" << endl;
240 o <<
"\\2-grams:" << endl;
241 o <<
"-10000\t3 4\t0" << endl;
242 o <<
"-100000\t4 </s>\t0" << endl;
244 o <<
"\\3-grams:" << endl;
245 o <<
"-1000000\t3 4 </s>" << endl;
247 o <<
"\\end\\" << endl;
254 std::numeric_limits<unsigned>::max() );
267 std::stringstream ss;
272 EXPECT_EQ ( ss.str(),
273 "1 3 4 5 2 || 1 3 4 5 2 || 25561,0\n1 3 4 5 2 || 1 3 4 5 2 || 25561,0\n1 3 4 5 2 || 1 3 4 5 2 || 25561,0\n" );
274 bfs::remove ( bfs::path (
"mylm" ) );
284 unordered_map<std::string, boost::any> v =
v_;
287 std::numeric_limits<unsigned>::max() );
307 fst::VectorFst<fst::LexStdArc> *a =
new fst::VectorFst<fst::LexStdArc>;
308 fst::VectorFst<fst::LexStdArc> *b =
new fst::VectorFst<fst::LexStdArc>;
309 fst::VectorFst<fst::LexStdArc> *c =
new fst::VectorFst<fst::LexStdArc>;
310 fst::string2fst<fst::LexStdArc> (
"5 9 1 2 3 2 4 2 6 2", a);
311 fst::string2fst<fst::LexStdArc> (
"5 9 7 2 4 2 6 2", b);
312 fst::string2fst<fst::LexStdArc> (
"5 9 3 8 2 6 2", c);
317 fst::VectorFst<fst::LexStdArc> *j = mur();
318 fst::Determinize (fst::RmEpsilonFst<fst::LexStdArc> (*j), j);
321 fst::Project (
static_cast<fst::VectorFst<fst::LexStdArc> *
> 323 fst::Determinize (fst::RmEpsilonFst<fst::LexStdArc>
324 (*
static_cast<fst::VectorFst<fst::LexStdArc> *
> 326 static_cast<fst::VectorFst<fst::LexStdArc> *>
328 fst::Minimize (
static_cast<fst::VectorFst<fst::LexStdArc> *
> 330 fst::RmEpsilon (
static_cast<fst::VectorFst<fst::LexStdArc> *
> 332 EXPECT_EQ (Equivalent (*
static_cast<fst::VectorFst<fst::LexStdArc> *
> 337 TEST ( HifstTest2, localconditions ) {
350 lpc.
add ( conditions ( 1, 20, 10000, 9.0f ) );
352 lpc.
add ( conditions ( 2, 10, 1000, 8.5f ) );
354 lpc.
add ( conditions ( 3, 5, 50, 10.5f ) );
356 lpc.
add (conditions (3, 5 , 500 , 3.5f ) );
359 EXPECT_EQ ( lpc ( 4, 20, 10000, w ),
false );
360 EXPECT_EQ ( lpc ( 1, 18, 10000, w ),
false );
362 EXPECT_EQ ( lpc ( 1, 19, 10000, w ),
false );
363 EXPECT_EQ ( lpc ( 1, 20, 10000, w ),
true );
364 EXPECT_EQ ( w, 9.0 );
365 EXPECT_EQ ( lpc ( 2, 10, 1000, w ),
true );
366 EXPECT_EQ ( w, 8.5 );
367 EXPECT_EQ ( lpc ( 3, 5, 50, w ),
true );
368 EXPECT_EQ ( w, 10.5 );
369 EXPECT_EQ ( lpc ( 3, 5, 499, w ),
true );
370 EXPECT_EQ ( w, 10.5 );
371 EXPECT_EQ ( lpc ( 3, 5, 500, w ),
true );
372 EXPECT_EQ ( w, 3.5 );
373 EXPECT_EQ ( lpc ( 3, 6, 500, w ),
true );
374 EXPECT_EQ ( w, 3.5 );
375 EXPECT_EQ ( lpc ( 3, 6, 300, w ),
true );
376 EXPECT_EQ ( w, 10.5 );
378 EXPECT_EQ ( lpc ( 3, 5, 49, w ),
false );
380 EXPECT_EQ ( lpc ( 3, 1, 490, w ),
false );
382 EXPECT_EQ ( lpc ( 0, 0, 0, w ),
false );
383 EXPECT_EQ ( lpc ( std::numeric_limits<unsigned>::max(),
384 std::numeric_limits<unsigned>::max(), 0, w ),
false );
385 EXPECT_EQ ( lpc ( -1, -2, 0, w ),
false );
388 EXPECT_EQ ( lpc2 ( 3, 5, 49, w ),
false );
392 TEST ( HifstTest2, expandednumstatesrtn ) {
394 fst::VectorFst<fst::StdArc> a, b;
399 a.SetFinal ( 2, fst::StdArc::Weight::One() );
407 b.SetFinal ( 1, fst::StdArc::Weight::One() );
408 b.AddArc ( 0, fst::StdArc ( 1, 1, 0, 1 ) );
409 test.
update ( 1, 1, 1, &b );
410 test.
update ( 1, 0, 1, &a );
411 EXPECT_EQ ( test ( 1, 0, 1 ), 3 );
414 TEST ( HifstTest2, replacefstbyarc ) {
416 fst::VectorFst<fst::StdArc> a;
417 for (
unsigned k = 0; k < 4; ++k ) a.AddState();
418 fst::VectorFst<fst::StdArc> *aux = x ( a, 1 );
419 ASSERT_TRUE ( aux == NULL );
422 ASSERT_TRUE ( aux != NULL );
423 EXPECT_EQ ( aux->NumStates(), 2 );
424 ASSERT_EQ ( aux->NumArcs ( 0 ), 1 );
428 TEST ( HifstTest2, manualreplacefstbyarc ) {
432 unordered_set<std::string> replacefstbyarc;
433 replacefstbyarc.insert (
"X" );
434 unordered_set<std::string> replacefstbyarcexceptions;
435 replacefstbyarcexceptions.insert (
"S");
436 fst::VectorFst<fst::StdArc> a;
437 for (
unsigned k = 0; k < 2; ++k ) a.AddState();
441 fst::VectorFst<fst::StdArc> *aux = x ( a, label1 );
442 ASSERT_TRUE ( aux == NULL );
443 fst::StdArc::Label label2 =
APBASETAG + 2 * APCCTAG + 1;
444 aux = x ( a, label2 );
445 ASSERT_TRUE ( aux != NULL );
446 EXPECT_EQ ( aux->NumStates(), 2 );
453 fst::VectorFst<fst::StdArc> *aux = x ( a, label1 );
454 ASSERT_TRUE ( aux != NULL );
455 EXPECT_EQ ( aux->NumStates(), 2 );
457 fst::StdArc::Label label2 =
APBASETAG + 2 * APCCTAG + 1;
458 aux = x ( a, label2 );
459 ASSERT_TRUE ( aux != NULL );
460 EXPECT_EQ ( aux->NumStates(), 2 );
466 replacefstbyarcexceptions,
true, 0);
468 fst::VectorFst<fst::StdArc> *aux = x ( a, label1 );
469 ASSERT_TRUE ( aux == NULL );
470 fst::StdArc::Label label2 =
APBASETAG + 2 * APCCTAG + 1;
471 aux = x ( a, label2 );
472 ASSERT_TRUE ( aux != NULL );
473 EXPECT_EQ ( aux->NumStates(), 2 );
478 TEST ( HifstTest2, optimize ) {
481 fst::VectorFst<fst::StdArc> a ;
487 a.SetFinal ( 1, fst::StdArc::Weight::One() );
488 a.SetFinal ( 3, fst::StdArc::Weight::One() );
489 a.AddArc ( 0, fst::StdArc ( 1 , 1 , 0, 1 ) );
490 a.AddArc ( 0, fst::StdArc ( 1 , 1 , 0, 2 ) );
491 a.AddArc ( 2, fst::StdArc ( 0 , 0 , 0, 3 ) );
492 fst::VectorFst<fst::StdArc> d (a);
495 fst::VectorFst<fst::StdArc> e (a);
496 fst::Determinize (fst::RmEpsilonFst<fst::StdArc> (e), &e);
498 fst::VectorFst<fst::StdArc> b (a);
500 EXPECT_EQ ( d.NumStates(), b.NumStates() );
503 EXPECT_TRUE ( Equivalent (e, b) );
505 fst::VectorFst<fst::StdArc> c;
511 c.SetFinal ( 1, fst::StdArc::Weight::One() );
512 c.SetFinal ( 3, fst::StdArc::Weight::One() );
513 c.AddArc ( 0, fst::StdArc ( 1 , 2 , 0, 1 ) );
514 c.AddArc ( 0, fst::StdArc ( 1 , 2 , 0, 2 ) );
515 c.AddArc ( 2, fst::StdArc ( 0 , 0 , 0, 3 ) );
518 EXPECT_EQ (c.NumStates(), 2);
523 int main (
int argc,
char **argv ) {
524 ::testing::InitGoogleTest ( &argc, argv );
525 return RUN_ALL_TESTS();
Wrapper stream class that writes to pipes, text files or gzipped files.
Handles simple wildcard expansion for strings.
unordered_map< std::string, boost::any > v_
Unit testing: google testing common header.
struct containing the elements that trigger local pruning.
Convenience functors/functions for lexicographic<tropical,tropical> semiring.
Contains convenience functions to write and read fsts.
bool run(Data &d)
Runs the parsing algorithm.
Creates FST replacement or not depending on conditions.
Relative to Stats across the pipeline.
Data object for hifst or related tools.
const std::string kHifstLatticeStore
const std::string kHifstLocalpruneLmFeatureweights
const std::string kCykparserHrmaxheight
const std::string kHifstWritertn
const std::string kHifstPrune
Contains structures and classes for GrammarData.
const std::string kCykparserNtexceptionsmaxspan
unordered_map< uint, std::string > grammar_inversecategories_t
Convenience functions for tropical sparse vector weight.
const std::string kGrammarFeatureweights
void Add(Fst< Arc > const *fst)
Adds an fst to the list.
bool run(Data &d)
run method, given a grammar and instantiated patterns, creates and returns the hashes ...
const std::string kHifstUsepdt
Core of Hifst. Implements the lattice-building procedure for a cyk-parsed sentence.
This class creates the Union of an arbitrarily large number of fsts. This implementation uses one RTN...
const std::string kHifstReplacefstbyarcNonterminals
TEST(HifstTest2, localconditions)
Interfaces with basic methods for iteration.
const std::string kHifstAlilatsmode
Implementations of multiple fst unions.
int main(int argc, char **argv)
Describes class GrammarTask.
Contains structures and classes for hifst task (target lattice building)
const std::string kHifstLocalpruneLmLoad
const std::size_t getIdx(std::size_t idx) const
Gets the real position (line) in the (potentially unsorted) file.
Task class that loads a grammar into memory.
const std::string kHifstLocalpruneNumstates
Implementation of a language model task.
uh::GrammarTask< uh::HifstTaskData<> > * gt_
void printstrings(const fst::VectorFst< Arc > &pcostslat, std::ostream *hyps, unsigned s=0)
Trivial function that outputs all the hypothesis in the lattice with its cost.
const std::string kHifstOptimizecells
Contains implementation of ApplyLanguageModelOnTheFly.
uh::CYKParserTask< uh::HifstTaskData<> > * cyk_
const std::string kHifstAlilatsmodeLinks
const std::string kHifstLocalpruneLmWordpenalty
std::string const kLmFeatureweights
Contains structures and classes for GrammarData.
Static variables for logger. Include only once from main file.
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
const std::string kCykparserHmin
Headers for standalone shared library.
Implementation of a language model data structure using kenlm.
const std::string kHifstLocalpruneConditions
const std::string kSsgrammarAddoovsSourcedeletions
void add(const conditions &c)
Add condition.
const std::string kSsgrammarAddoovsEnable
Convenience functions to parse parameters from a string.
const std::string getRule(std::size_t idx) const
Gets a rule indexed by idx. Rule format: LHS RHSSource RHSTarget weight.
convenience class that takes care of local pruning conditions. Conditions are indexed by 1000*cc+y...
void load(const std::string &file)
Loads rules from a grammar file.
This class uses instantiated patterns to analyze the grammar and deliver two hashes providing candida...
Contains implementation for sentence-specific grammar task.
const GrammarData * grammar
Contains translation grammar.
void update(uint cc, uint x, uint y, fst::VectorFst< Arc > *myfst)
Estimates (expanded) number of states of rtn_[cc][x][y] Traverses arcs and and add number of states o...
bool run(Data &d)
Runs the lattice building procedure.
Contains definitions for cykparser data and task.
hifst test class for google test fixtures
Contains definitions for cykparser data and task.
Contains cyk parser implementation.
Generalized weight mapper functor.
unordered_map< std::string, std::vector< pair< uint, uint > > > hpinstances
uh::SentenceSpecificGrammarTask< uh::HifstTaskData<> > * ssgt_
Contains definitions for sentence-specific grammar data and task.
const std::string kHifstRtnopt
GrammarData * getGrammarData()
Returns GrammarData.
const std::string kSsgrammarStore
Convenience functors that allow transparent handling for weights within hifst.
const std::string kGrammarStorepatterns
unordered_map< std::string, void * > fsts
Pointers to lattices (e.g. translation lattice, lmbr, etc) , and related, accessed by unique keys...
std::string const kLmWordmap
Contains sentence-specific grammar data.
Utility class that, given an RTN with root at (cc,x,y), estimates the number of states of an expanded...
std::string const kLmLoad
const std::string kHifstReplacefstbyarcNumstates
const std::string kGrammarStorentorder
Multiepsilon composition.
const std::string kReferencefilterLoad
TEST_F(HifstTest, basic_translation)
Basic test for HifstTask.
const std::string kHifstReplacefstbyarcExceptions
const std::string kCykparserHmax
Unit testing: google testing common header.
CYKdata * cykdata
cyk data structures
const std::string kGrammarLoad
Creates FST replacement or not depending on conditions including program options. ...
const std::string kHifstLocalpruneEnable
Static variable for custom_assert. Include only once from main file.
void close()
Closes the file.