Cambridge SMT System
data-main.createssgrammar.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, Adrià de Gispert, William Byrne
14 
15 #ifndef TASKDATA_HPP
16 #define TASKDATA_HPP
17 
24 namespace ucam {
25 namespace hifst {
26 
32  typedef fst::LexicographicArc< fst::StdArc::Weight, fst::StdArc::Weight> Arc;
33  typedef fst::LexicographicWeight<fst::StdArc::Weight, fst::StdArc::Weight>
34  Weight;
35 
36  public:
38  sidx ( 0 ),
39  grammar ( NULL ),
40  ssgd ( NULL ),
41  cykdata ( NULL ),
42  stats ( new ucam::fsttools::StatsData ),
43  translation ( NULL ) {
44  };
45 
47  uint sidx;
50 
52  unordered_map<std::size_t, std::string> oovwmap;
53 
55  std::string originalsentence;
56  std::string tokenizedsentence;
57  std::string sentence;
58 
60  std::vector<std::string> pinstances;
61 
64  unordered_map<std::string, std::vector< pair <uint, uint> > > hpinstances;
65 
68 
70  unordered_set<std::string> tvcb;
71 
74 
76  boost::shared_ptr<ucam::fsttools::StatsData> stats;
77 
79  std::string *translation;
80 
82  unordered_set<std::string> *recasingvcblm;
83 
85  unordered_map<std::string, ucam::util::WordMapper *> wm;
86 
87 };
88 
89 }
90 } // end namespaces
91 #endif
92 
Contains data for statistics, i.e. allows timing actions and methods called during execution...
Definition: data.stats.hpp:88
unordered_map< std::size_t, std::string > oovwmap
Contains oovs.
Data structure containing all cyk-related information.
Data class containing relevant variables. To be used as template for task classes using it...
unordered_map< std::string, ucam::util::WordMapper * > wm
Wordmap/Integer map objects.
boost::shared_ptr< ucam::fsttools::StatsData > stats
To collect statistics across the whole pipeline.
Struct containing grammar rules.
unordered_set< std::string > * recasingvcblm
mixed-case vocabulary of the recasing unigram language model
std::string * translation
Translated sentence will be stored here.
std::string originalsentence
source sentence
const GrammarData * grammar
Contains translation grammar.
unordered_map< std::string, std::vector< pair< uint, uint > > > hpinstances
std::vector< std::string > pinstances
Pattern instances.
Structure for sentence-specific grammar Rules will be queried by cyk per position and number of eleme...
unordered_set< std::string > tvcb
Target vocabulary.
SentenceSpecificGrammarData * ssgd
Sentence-specific grammar information – hashes to rule indices.
Definition: bleu.hpp:14
CYKdata * cykdata
cyk data structures