Cambridge SMT System
main.hifst.init_param_options.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
18 
25 namespace ucam {
26 namespace util {
27 
28 namespace po = boost::program_options;
29 
43 inline void init_param_options ( int argc, const char* argv[],
44  po::variables_map *vm ) {
45  using namespace HifstConstants;
46  using namespace po;
47  try {
48  po::options_description desc ( "Command-line/configuration file options" );
49  initAllCreateSSGrammarOptions (desc); // All createssgrammar options are used
50  initCommonApplylmOptions (desc); // Add generic language model options
51  desc.add_options()
52  ( kServerEnable.c_str()
53  , po::value<std::string>()->default_value ( "no" )
54  , "Run in server mode (yes|no)" )
55  ( kServerPort.c_str()
56  , po::value<short>()->default_value ( 1209 )
57  , "Server port" )
58  ( kTargetStore.c_str()
59  , po::value<std::string>()->default_value ( "-" )
60  , "Source text file -- this option is ignored in server mode" )
61  ( kFeatureweights.c_str()
62  , po::value<std::string>()->default_value ( "" )
63  , "Feature weights applied in hifst. This is a comma-separated sequence "
64  "of language model(s) and grammar feature weights.\n"
65  "IMPORTANT: If this option is not empty string, then it will override "
66  "any values in lm.featureweights and grammar.featureweights"
67  )
68  ( kReferencefilterLoad.c_str()
69  , po::value<std::string>()->default_value ( "" )
70  , "Reference lattice to filter the translation" )
72  , po::value<std::string>()->default_value ( "" )
73  , "Load lattices from other semirings. These will be converted automatically. Possible values: tropical, lexstdarc, tuplearc")
74  ( kReferencefilterWrite.c_str()
75  , po::value<std::string>()->default_value ( "" )
76  , "Write reference lattice" )
78  , po::value<std::string>()->default_value ( "yes" )
79  , "Substring the reference lattice (yes|no)" )
81  , po::value<float>()->default_value ( std::numeric_limits<float>::max() )
82  , "Likelihood beam to prune the reference lattice. Use positive value. Maxfloat (default) turns it off." )
84  , po::value<unsigned>()->default_value (
85  std::numeric_limits<unsigned>::max() )
86  , "Apply a shortest path to the reference lattice to only use n hypotheses. Max unsigned (default) turns it off." )
87  ( kCykparserHrmaxheight.c_str()
88  , po::value<unsigned>()->default_value ( 10 )
89  , "Default maximum span for hierarchical rules" )
90  ( kCykparserHmax.c_str()
91  , po::value<std::string>()->default_value ( "" )
92  , "Maximum span for individual non-terminals, constrained to hrmaxheight : e.g. X,10,V,6" )
93  ( kCykparserHmin.c_str()
94  , po::value<std::string>()->default_value ( "" )
95  , "Minimum span for individual non-terminals, constrained to hrmaxheight: e.g. X,3,V,2" )
97  , po::value<std::string>()->default_value ( "S" )
98  , "List of non-terminals not affected by cykparser.hrmaxheight. S should always be in this list!" )
99  ( kHifstLatticeStore.c_str()
100  , po::value<std::string>()->default_value ( "" )
101  , "Store hifst translation lattice" )
102  ( kHifstLatticeOptimize.c_str()
103  , po::value<std::string>()->default_value ( "no" )
104  , "Optimize translation lattices (yes|no)." )
106  po::value<std::string>()->default_value ( "no" ),
107  "Strip any special Hifst epsilon labels (e.g. oov, deletion rule, ...)."
108  " Option only available if translation lattices are optimized."
109  " Recommended ONLY for forced decoding" )
110  ( kHifstAlilatsmode.c_str()
111  , po::value<std::string>()->default_value ( "no" )
112  , "Include derivations in the left side of transducers (yes|no)" )
113  ( kHifstAlilatsmodeLinks.c_str()
114  , po::value<std::string>()->default_value ( "rules" )
115  , "What kind of alignment info to include (affiliation|rules)" )
116  ( kHifstUsepdt.c_str()
117  , po::value<std::string>()->default_value ( "no" )
118  , "Run hifst using pdt representation, aka hipdt (yes|no)" )
119  ( kHifstRtnopt.c_str()
120  , po::value<std::string>()->default_value ( "yes" )
121  , " Use openfst rtn optimizations (yes|no)" )
122  ( kHifstOptimizecells.c_str()
123  , po::value<std::string>()->default_value ( "yes" )
124  , "Determinize/minimize any FSA component of the RTN (yes|no)" )
126  , po::value<std::string>()->default_value ( "" )
127  , "Determine which cell fsts are always replaced by single arc according to its non-terminals, e.g: replacefstbyarc=X,V" )
129  , po::value<unsigned>()->default_value ( 4 )
130  , "Determine the minimum number of states that triggers replacement by arc." )
132  , po::value<std::string>()->default_value ( "S" )
133  , "Categories that will definitely not be replaced (takes over replacefstbyarc and replacefstbyarc.numstates)" )
134  ( kHifstLocalpruneEnable.c_str()
135  , po::value<std::string>()->default_value ( "no" )
136  , "Apply local pruning strategy based con cyk cells and number of states (yes|no)" )
137  ( kHifstLocalpruneLmLoad.c_str()
138  , po::value<std::string>()->default_value ( "" )
139  , "Load one or more language model files: (gzipped) arpa format or kenlm binary format (uses memory mapping); separated by commas" )
141  , po::value<std::string>()->default_value ( "1.0" )
142  , "Scaling factor(s) applied to the language model: arpa_weight * -log(10) * gscale. Scales separated by commas." )
144  , po::value<std::string>()->default_value ( "0.0" )
145  , "Word penalty applied along the language models (separated by commas). Assumed as 0 if not specified " )
146  ( kHifstLocalpruneNumstates.c_str()
147  , po::value<unsigned>()->default_value ( 10000000 )
148  , "Maximum number of states threshold after cell pruning an FSA, If beneath the threshold, determinization/minimization is applied to pruned lattice. Also applicable in alignment mode when filtering against substring acceptor. Use a big value for HiFST and small value for HiPDT.")
150  , po::value<std::string>()->default_value ( "" )
151  , "Local pruning conditions. These are sequences of 4-tuples separated by commas: category,span,number_of_states,weight. The three first are actual thresholds that trigger local pruning, whereas the weight is the likelihood beam for pruning, IF a language model has been applied." )
152  ( kHifstPrune.c_str()
153  , po::value<float>()->default_value ( std::numeric_limits<float>::max() )
154  , "Likelihood beam to prune the translation lattice. Only applied IF a language model is available." )
155  ( kHifstWritertn.c_str()
156  , po::value<std::string>()->default_value ( "")
157  , "Write the rtn to disk -- long list of FSAs. Use %%rtn_label%% and ? to format file names appropriately, e.g. --hifst.writertn=rtn/?/%%rtn_label%%.fst" )
158  ( kRecaserLmLoad.c_str()
159  , po::value<std::string>()->default_value ( "" )
160  , "Language model for recasing" )
161  ( kRecaserLmFeatureweight.c_str()
162  , po::value<std::string>()->default_value ( "1.0" )
163  , "Scaling factor applied to the language model" )
164  ( kRecaserUnimapLoad.c_str()
165  , po::value<std::string>()->default_value ( "" )
166  , "unigram transduction model " )
167  ( kRecaserUnimapWeight.c_str()
168  , po::value<float>()->default_value ( 1.0f )
169  , "Scaling factors applied to the unigram model " )
170  ( kRecaserPrune.c_str()
171  , po::value<std::string>()->default_value ( "byshortestpath,1" )
172  , "Choose between byshortestpath,numpaths or byweight,weight" )
173  ( kRecaserOutput.c_str()
174  , po::value<std::string>()->default_value ("")
175  , "Output true cased lattice" )
176  ( kPostproWordmapLoad.c_str()
177  , po::value<std::string>()->default_value ( "" )
178  , "Load a reverse integer mapping file so the decoder can map integers to target words" )
179  ( kPostproDetokenizeEnable.c_str()
180  , po::value<std::string>()->default_value ( "no" )
181  , "Detokenize translated 1best (yes|no) -- NOT IMPLEMENTED!" )
183  , po::value<std::string>()->default_value ( "" ), "NOT IMPLEMENTED" )
185  , po::value<std::string>()->default_value ( "no" )
186  , "Capitalize first word (yes|no). Only applies if previously mapped back to words (postpro.wordmap.load)" )
187  ( kStatsHifstWrite.c_str()
188  , po::value<std::string>()->default_value ( "" )
189  , "Dump hifst-specific stats (cyk, local pruning, etc)" )
190  ( kStatsHifstCykgridEnable.c_str()
191  , po::value<std::string>()->default_value ( "no" )
192  , "Write cyk/rtn stats to the file (yes|no)" )
194  , po::value<unsigned>()->default_value ( 30 )
195  , "Width of the printed cyk cell" )
196  ( kStatsWrite.c_str()
197  , po::value<std::string>()->default_value ( "" )
198  , "Dump general stats (speed and general messages)" )
199  ( kHifstSemiring.c_str(),
200  po::value<std::string>()->default_value ("lexstdarc"),
201  "Choose between stdarc, lexstdarc, and tuplearc (for the tropical sparse tuple arc semiring).")
202  ( kHifstDisableRuleFeatures.c_str(),
203  po::value<std::string>()->default_value ("no"),
204  "If using tuplearc, rules are passed in by default as 0-weighted sparse features. Use this parameter to disable (i.e. not pass them)."
205  "This option is ignored for other arc types.")
206  ( kRulesToWeightsEnable.c_str()
207  , po::value<std::string>()->default_value ( "no" )
208  , "Enable postprocessing rule-ids-to-rule-specific-features. This option only works if semiring=tuplearc" )
209  ;
210 
211  initRules2WeightsOptions(desc, false);
212  parseOptionsGeneric (desc, vm, argc, argv);
214 
215 
216 
217  if ( (*vm) [kPatternstoinstancesMaxspan.c_str() ].as<unsigned>()
218  < (*vm) [ kCykparserHrmaxheight.c_str()].as<unsigned>() ) {
220  " cannot be smaller than " << kCykparserHrmaxheight);
221  exit (EXIT_FAILURE );
222  }
223  if ( (*vm) [kFeatureweights.c_str()].as<std::string>() != ""
224  && ( (*vm) [kLmFeatureweights.c_str()].as<std::string>() != ""
225  || (*vm) [kGrammarFeatureweights.c_str()].as<std::string>() !=
226  "" ) ) {
227  LWARN ("Program option featureweights OVERRIDES grammar.featureweights and lm.featureweights!!");
228  }
229  } catch ( std::exception& e ) {
230  cerr << "error: " << e.what() << "\n";
231  exit ( EXIT_FAILURE );
232  } catch ( ... ) {
233  cerr << "Exception of unknown type!\n";
234  exit ( EXIT_FAILURE );
235  }
236  LINFO ( "Configuration loaded" );
237 };
238 
239 }
240 } // end namespaces
void parseOptionsGeneric(bpo::options_description &desc, bpo::variables_map *vm, int argc, const char *argv[])
Definition: registrypo.hpp:58
std::string const kHifstSemiring
const std::string kHifstLatticeStore
const std::string kServerPort
List of constants to be used both across program options and class runners.
void checkCreateSSGrammarOptions(po::variables_map *vm)
const std::string kHifstLocalpruneLmFeatureweights
const std::string kServerEnable
const std::string kCykparserHrmaxheight
void init_param_options(int argc, const char *argv[], po::variables_map *vm)
Function to initialize boost program_options module with command-line and config file options...
const std::string kHifstStripSpecialEpsilonLabels
const std::string kReferencefilterSubstring
const std::string kPostproDetokenizeLanguage
void initRules2WeightsOptions(po::options_description &desc, bool addAllOptions=true)
const std::string kHifstWritertn
std::string const kRecaserPrune
const std::string kHifstPrune
#define LINFO(msg)
std::string const kRecaserUnimapLoad
std::string const kRecaserLmLoad
const std::string kCykparserNtexceptionsmaxspan
const std::string kStatsHifstCykgridCellwidth
const std::string kGrammarFeatureweights
const std::string kTargetStore
To initialize boost parameter options.
const std::string kRulesToWeightsEnable
const std::string kHifstUsepdt
const std::string kStatsHifstCykgridEnable
const std::string kPostproWordmapLoad
const std::string kHifstDisableRuleFeatures
const std::string kHifstReplacefstbyarcNonterminals
const std::string kHifstAlilatsmode
std::string const kRecaserOutput
const std::string kHifstLocalpruneLmLoad
const std::string kReferencefilterLoadSemiring
const std::string kReferencefilterPrunereferenceweight
const std::string kHifstLocalpruneNumstates
const std::string kHifstOptimizecells
const std::string kHifstAlilatsmodeLinks
const std::string kHifstLocalpruneLmWordpenalty
std::string const kLmFeatureweights
const std::string kCykparserHmin
const std::string kHifstLocalpruneConditions
#define LWARN(msg)
void initAllCreateSSGrammarOptions(po::options_description &desc)
std::string const kRecaserUnimapWeight
const std::string kPostproDetokenizeEnable
const std::string kPostproCapitalizefirstwordEnable
std::string const kRecaserLmFeatureweight
const std::string kStatsHifstWrite
const std::string kHifstRtnopt
To initialize boost parameter options.
const std::string kHifstLatticeOptimize
const std::string kReferencefilterWrite
const std::string kReferencefilterPrunereferenceshortestpath
#define LERROR(msg)
const std::string kHifstReplacefstbyarcNumstates
To initialize boost parameter options for createssgrammar tool.
void initCommonApplylmOptions(po::options_description &desc)
const std::string kFeatureweights
const std::string kReferencefilterLoad
const std::string kHifstReplacefstbyarcExceptions
const std::string kCykparserHmax
std::string const kStatsWrite
Definition: bleu.hpp:14
const std::string kPatternstoinstancesMaxspan
const std::string kHifstLocalpruneEnable