31 template <
class ArcT = fst::LexicographicArc< fst::StdArc::Weight, fst::StdArc::Weight> >
44 stats ( new StatsData ),
54 unordered_map<std::size_t, std::string>
oovwmap;
67 unordered_map<std::string, std::vector< pair <unsigned, unsigned> > >
hpinstances;
73 unordered_set<std::string>
tvcb;
80 std::vector< fst::VectorFst<ArcT> *>
filters;
84 unordered_map<std::string, void * >
fsts;
86 inline fst::VectorFst<ArcT> *
getFst(std::string
const &key) {
87 unordered_map<std::string, void * >::iterator itx
89 if ( itx == fsts.end() ) {
90 LWARN(
"Empty lattice key=" << key);
93 return reinterpret_cast<fst::VectorFst<ArcT> *
>(itx->second);
99 unordered_map<std::string, std::vector <const KenLMData *> >
klm;
116 unordered_map<std::string, WordMapper *>
wm;
unordered_map< std::size_t, std::string > oovwmap
Contains oovs.
Data structure containing all cyk-related information.
unordered_map< uint, std::string > grammar_inversecategories_t
std::vector< fst::VectorFst< ArcT > * > filters
unordered_map< std::string, std::vector< const KenLMData * > > klm
Collections of language models accessed by keys (e.g. in translation we need a bunch for hifst and on...
boost::shared_ptr< ucam::fsttools::StatsData > stats
To collect statistics across the whole pipeline.
Struct containing grammar rules.
unordered_set< std::string > * recasingvcblm
mixed-case vocabulary of the recasing unigram language model
unordered_map< std::string, std::vector< pair< unsigned, unsigned > > > hpinstances
std::string * translation
Translated sentence will be stored here.
std::string originalsentence
source sentence
unordered_map< std::string, WordMapper * > wm
Wordmap/Integer map objects.
unsigned sidx
Sentence index.
boost::shared_ptr< StatsData > stats
To collect statistics across the whole pipeline.
const GrammarData * grammar
Contains translation grammar.
std::string tokenizedsentence
grammar_inversecategories_t vcat
This information used for stats.
Loads efficiently a wordmap file and provides methods to map word-to-integer or integer-to-word. To avoid memory footprint issues, hashing the wordmap entries is avoided.
unordered_map< std::string, void * > fsts
Pointers to lattices (e.g. translation lattice, lmbr, etc) , and related, accessed by unique keys...
std::vector< std::string > pinstances
Pattern instances.
unsigned numlocallm
Number of local language models used in hifst.
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
unordered_set< std::string > tvcb
Target vocabulary.
SentenceSpecificGrammarData * ssgd
Sentence-specific grammar information – hashes to rule indices.
fst::VectorFst< ArcT > * getFst(std::string const &key)
CYKdata * cykdata
cyk data structures