15 #ifndef TEXTOUTPUTTASK_HPP 16 #define TEXTOUTPUTTASK_HPP 31 template <
class Data,
class Arc = fst::StdArc>
40 const std::string wordmapkey_;
49 std::string detokenizationlanguage_;
51 const std::string inputkey_;
54 bool capitalizeFirstWord_;
62 : inputkey_ ( inputkey )
63 , wordmapkey_ ( wordmapkey )
64 , trgidx2wmap_ ( NULL )
66 , detokenizationlanguage_ ( rg.
exists (
69 , capitalizeFirstWord_ (rg.getBool (
73 LDEBUG (
"Constructor ready..." );
86 bool run ( Data& d ) {
88 "translation lattice not initialized?" ) )
return true;
90 "d.translation not initialized?" ) )
return true;
91 fst::VectorFst<Arc> ofst ( * (
static_cast< fst::VectorFst<Arc> *
> 92 (d.fsts[inputkey_]) ) );
95 LINFO (
"1best is " << text );
96 std::string detokutext;
97 if ( d.wm.find ( wordmapkey_ ) != d.wm.end() )
98 trgidx2wmap_ = d.wm[wordmapkey_];
99 else trgidx2wmap_ = NULL;
100 if ( trgidx2wmap_ ) {
103 ( *trgidx2wmap_ ) ( text, &utext , false );
104 LINFO (
"(unmapped) 1best is:" << utext );
109 LINFO (
"1best (detok) is:" << detokutext );
110 }
else detokutext = utext;
111 if ( capitalizeFirstWord_ ) {
114 }
else detokutext = text;
115 FORCELINFO (
"Translation 1best is: " << detokutext );
116 *d.translation = detokutext;
void FstGetBestStringHypothesis(const fst::VectorFst< Arc > &latfst, std::string &hyp)
const std::string kPostproDetokenizeLanguage
void capitalizeFirstWord(std::vector< std::string > &words)
Simple function that capitalizes first word and first word of sentence and first word.
void setDetokenize(bool detok)
Turn on/off tokenization.
void set_oovwmap(unordered_map< std::size_t, std::string > &oovmap)
void detokenize(const std::string &is, std::string *os, std::string languagespecific="")
Not implemented, just pass through.
Task that writes translation to a text file. This translation might be recased, wordmapped and tokeni...
const std::string kPostproWordmapLoad
const std::string kPostproInput
PostProTask(const ucam::util::RegistryPO &rg, const std::string &inputkey=HifstConstants::kPostproInput, const std::string &wordmapkey=HifstConstants::kPostproWordmapLoad)
Constructor with ucam::util::RegistryPO object and keys to access lattice and wordmap.
Templated (hybrid) Interface for Task classes.
void deleteSentenceMarkers(std::string &sentence)
Deletes sentence markers 1/2 or <s>/</s> for a sentence.
bool exists(const std::string &source, const std::string &needle)
Convenience function to find out whether a needle exists in a text.
bool run(Data &d)
Writes 1-best to file. Optionally, recases, maps back to words, and detokenizes.
const std::string kPostproDetokenizeEnable
const std::string kPostproCapitalizefirstwordEnable
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
Loads efficiently a wordmap file and provides methods to map word-to-integer or integer-to-word. To avoid memory footprint issues, hashing the wordmap entries is avoided.