39 using boost::any_cast;
40 namespace bfs = boost::filesystem;
51 unordered_map<std::size_t, std::string>
oovwmap;
52 unordered_map<std::string, fst::VectorFst<fst::StdArc> *>
fsts;
56 unordered_map<std::string, uu::WordMapper *>
wm;
61 TEST ( HifstPostPro, basic_test ) {
64 fst::VectorFst<fst::StdArc> aux;
75 aux.SetFinal ( 8, fst::StdArc::Weight::One() );
76 aux.AddArc ( 0, fst::StdArc ( 1, 1, fst::StdArc::Weight ( 0 ), 1 ) );
77 aux.AddArc ( 1, fst::StdArc ( 2, 2, fst::StdArc::Weight ( 0 ), 2 ) );
78 aux.AddArc ( 2, fst::StdArc ( 3, 3, fst::StdArc::Weight ( 0 ), 3 ) );
79 aux.AddArc ( 3, fst::StdArc (
OOVID,
OOVID, fst::StdArc::Weight ( 0 ), 4 ) );
80 aux.AddArc ( 4, fst::StdArc (
OOVID,
OOVID, fst::StdArc::Weight ( 0 ), 5 ) );
81 aux.AddArc ( 5, fst::StdArc (
OOVID + 1,
OOVID + 1, fst::StdArc::Weight ( 0 ),
83 aux.AddArc ( 6, fst::StdArc ( 4, 4, fst::StdArc::Weight ( 0 ), 7 ) );
84 aux.AddArc ( 7, fst::StdArc ( 5, 5, fst::StdArc::Weight ( 0 ), 8 ) );
90 ss <<
"potatoes\t4\n";
94 unordered_map<std::string, boost::any> v;
95 v[kPostproWordmapperLoad] = std::string (
"" );
99 uh::PostProTaskData d;
100 d.translation =
new std::string;
101 d.oovwmap[
OOVID] =
"creamy";
102 d.oovwmap[
OOVID + 1] =
"lovely";
103 d.fsts[
"postpro.input"] = &aux;
111 EXPECT_EQ ( *d.translation,
"He's eating creamy creamy lovely potatoes." );
112 delete d.translation;
118 TEST ( hifstpostpro, deletesentencemarkers ) {
119 std::string s =
"1 3 2";
121 EXPECT_EQ ( s,
"3" );
124 EXPECT_EQ ( s,
"hola" );
137 int main (
int argc,
char **argv ) {
138 ::testing::InitGoogleTest ( &argc, argv );
139 return RUN_ALL_TESTS();
Handles simple wildcard expansion for strings.
Unit testing: google testing common header.
unordered_map< std::size_t, std::string > oovwmap
unordered_map< std::string, fst::VectorFst< fst::StdArc > * > fsts
const std::string kPostproDetokenizeLanguage
void setDetokenize(bool detok)
Turn on/off tokenization.
Task that writes translation to a text file. This translation might be recased, wordmapped and tokeni...
Lower casing/Tokenization/Detokenization not available for open source release.
const std::string kPostproWordmapLoad
Interfaces with basic methods for iteration.
test-specific classes and functions
void deleteSentenceMarkers(std::string &sentence)
Deletes sentence markers 1/2 or <s>/</s> for a sentence.
Static variables for logger. Include only once from main file.
Utilites to extract vocabulary, pseudo-determinize lattices and build substring transducers.
bool run(Data &d)
Writes 1-best to file. Optionally, recases, maps back to words, and detokenizes.
unordered_map< std::string, uu::WordMapper * > wm
Wordmap/Integer map objects.
int main(int argc, char **argv)
Task that writes translation to a text file. This translation might be wordmapped and tokenized...
const std::string kPostproDetokenizeEnable
Loads efficiently a wordmap file and provides methods to map word-to-integer or integer-to-word. To avoid memory footprint issues, hashing the wordmap entries is avoided.
std::string * translation
Wrapper stream class that reads pipes, text files or gzipped files.
Unit testing: google testing common header.
Static variable for custom_assert. Include only once from main file.