16 package uk.ac.cam.eng.util;
18 import com.beust.jcommander.Parameter;
19 import com.beust.jcommander.Parameters;
20 import com.beust.jcommander.ParametersDelegate;
27 public final class CLI {
29 @Parameters(separators =
"=")
30 public static class Features {
32 public static final String FEATURES =
"--features";
34 @Parameter(names = { FEATURES }, description =
"Comma-separated features", required =
true)
35 public String features;
38 @Parameters(separators =
"=")
39 public static class Provenance {
41 public static final String PROV =
"--provenance";
43 @Parameter(names = { PROV }, description =
"Comma-separated provenances", required =
true)
44 public String provenance;
47 @Parameters(separators =
"=")
48 public static class RuleParameters {
49 public static final String MAX_SOURCE_PHRASE =
"--max_source_phrase";
50 @Parameter(names = { MAX_SOURCE_PHRASE }, description =
"Maximum source phrase length in a phrase-based rule")
51 public int maxSourcePhrase = 9;
53 public static final String MAX_SOURCE_ELEMENTS =
"--max_source_elements";
54 @Parameter(names = { MAX_SOURCE_ELEMENTS }, description =
"Maximum number of source elements (terminals and nonterminals) in a hiero rule")
55 public int maxSourceElements = 5;
57 public static final String MAX_TERMINAL_LENGTH =
"--max_terminal_length";
58 @Parameter(names = { MAX_TERMINAL_LENGTH }, description =
"Maximum number of consecutive source terminals in a hiero rule")
59 public int maxTerminalLength = 5;
61 public static final String MAX_NONTERMINAL_SPAN =
"--max_nonterminal_span";
62 @Parameter(names = { MAX_NONTERMINAL_SPAN }, description =
"Maximum number of source terminals covered by a right-hand-side source nonterminal in a hiero rule")
63 public int maxNonTerminalSpan = 10;
66 public Provenance prov =
new Provenance();
72 @Parameters(separators =
"=")
73 public static class ExtractorJobParameters {
74 @Parameter(names = {
"--input",
"-i" }, description =
"Input training data on HDFS", required =
true)
77 @Parameter(names = {
"--output",
"-o" }, description =
"Output rules on HDFS", required =
true)
80 public static final String REMOVE_MONOTONIC_REPEATS =
"--remove_monotonic_repeats";
81 @Parameter(names = { REMOVE_MONOTONIC_REPEATS }, description =
"Gives an " 82 +
"occurrence count of 1 to monotonic hiero rules (e.g. " 83 +
"phrase-pair <a b c, d e f> with alignment 0-0 1-1 2-2 " 84 +
"generates hiero rule <a X, d X> twice but the count is " 86 public boolean removeMonotonicRepeats =
true;
88 public static final String COMPATIBILITY_MODE =
"--compatibility_mode";
89 @Parameter(names = { COMPATIBILITY_MODE }, description =
"Replicates old-style rule extraction")
90 public boolean compability_mode =
false;
93 public RuleParameters rp =
new RuleParameters();
97 @Parameters(separators =
"=")
98 public static class MarginalReducerParameters {
99 @Parameter(names = {
"--input",
"-i" }, description =
"Input rules on HDFS", required =
true)
102 @Parameter(names = {
"--output",
"-o" }, description =
"Output source-to-target probabilities on HDFS", required =
true)
103 public String output;
106 @Parameters(separators =
"=")
107 public static class MergeJobParameters {
108 @Parameter(names = {
"--input_features" }, description =
"Comma separated directories on HDFS with computed features", required =
true)
109 public String inputFeatures;
111 @Parameter(names = {
"--input_rules" }, description =
"HDFS directory with extracted rules", required =
true)
112 public String inputRules;
114 @Parameter(names = {
"--output",
"-o" }, description =
"Output directory on HDFS that will contain rules and features in HFile format", required =
true)
115 public String output;
118 public FilterParams fp =
new FilterParams();
121 @Parameters(separators =
"=")
122 public static class ServerParams {
123 @Parameter(names = {
"--ttable_s2t_server_port" }, description =
"TTable source-to-target server port")
124 public int ttableS2TServerPort = 4949;
126 @Parameter(names = {
"--ttable_s2t_host" }, description =
"TTable source-to-target host name")
127 public String ttableS2THost =
"localhost";
129 @Parameter(names = {
"--ttable_t2s_server_port" }, description =
"TTable target-to-source server port")
130 public int ttableT2SServerPort = 9494;
132 @Parameter(names = {
"--ttable_t2s_host" }, description =
"TTable target-to-source host name")
133 public String ttableT2SHost =
"localhost";
136 @Parameters(separators =
"=")
137 public static class FilterParams {
138 public static final String MIN_SOURCE2TARGET_PHRASE =
"--min_source2target_phrase";
139 @Parameter(names = {MIN_SOURCE2TARGET_PHRASE}, description =
"Minimum source to target probability for phrase based rules", required =
true)
140 public double minSource2TargetPhrase;
142 public static final String MIN_TARGET2SOURCE_PHRASE =
"--min_target2source_phrase";
143 @Parameter(names = {MIN_TARGET2SOURCE_PHRASE }, description =
"Minimum target to source probability for phrase based rules", required =
true)
144 public double minTarget2SourcePhrase;
146 public static final String MIN_SOURCE2TARGET_RULE =
"--min_source2target_rule" ;
147 @Parameter(names = {MIN_SOURCE2TARGET_RULE }, description =
"Minimum source to target probability for hierarchical rules", required =
true)
148 public double minSource2TargetRule;
150 public static final String MIN_TARGET2SOURCE_RULE =
"--min_target2source_rule" ;
151 @Parameter(names = {MIN_TARGET2SOURCE_RULE }, description =
"Minimum target to source probability for hierarchical rules", required =
true)
152 public double minTarget2SourceRule;
154 public static final String PROVENANCE_UNION =
"--provenance_union";
155 @Parameter(names = { PROVENANCE_UNION }, description =
"Union rules extracted from different provenances")
156 public boolean provenanceUnion;
158 public static final String ALLOWED_PATTERNS =
"--allowed_patterns";
159 @Parameter(names = {ALLOWED_PATTERNS }, description =
"File containing a list of allowed rule patterns", required =
true)
160 public String allowedPatternsFile;
162 public static final String SOURCE_PATTERNS =
"--source_patterns";
163 @Parameter(names = { SOURCE_PATTERNS }, description =
"File containing a list of allowed source patterns", required =
true)
164 public String sourcePatterns;
167 @Parameters(separators =
"=")
168 public static class RuleRetrieverParameters {
170 @Parameter(names = {
"--hr_max_height" }, description =
"Maximum number of source terminals covered by the left-hand-side nonterminal in a hiero rule")
171 public int hr_max_height = 10;
174 public Features features =
new Features();
177 public RuleParameters rp =
new RuleParameters();
179 @Parameter(names = {
"--pass_through_rules" }, description =
"File containing pass-through rules")
180 public String passThroughRules;
183 public ServerParams sp =
new ServerParams();
185 @Parameter(names = {
"--retrieval_threads" }, description =
"Number of threads for retrieval, corresponds to the number of hfiles", required=
true)
186 public int retrievalThreads;
188 @Parameter(names = {
"--hfile" }, description =
"Directory containing the hfiles")
191 @Parameter(names = {
"--test_file" }, description =
"File containing the sentences to be translated")
192 public String testFile;
194 @Parameter(names = {
"--rules" }, description =
"Output file containing filtered rules", required=
true)
197 @Parameter(names = {
"--vocab" }, description =
"Output file containing vocab to be used for language model filtering")
201 public FilterParams fp =
new FilterParams();
205 @Parameters(separators =
"=")
206 public static class TTableServerParameters {
208 @Parameter(names = {
"--ttable_server_template" }, description =
"TTable target-to-source host name", required =
true)
209 public String ttableServerTemplate;
211 @Parameter(names = {
"--ttable_direction" }, description =
"TTable direction for the lexical model ('s2t' or 't2s')", required =
true)
212 public String ttableDirection;
214 @Parameter(names = {
"--ttable_language_pair" }, description =
"TTable language pair for the lexical model (e.g. 'en2ru' or 'ru2en')", required =
true)
215 public String ttableLanguagePair;
218 public Provenance prov =
new Provenance();
220 @Parameter(names = {
"--min_lex_prob" }, description =
"Minimum probability for a Model 1 entry. Entries with lower probability are discarded.")
221 public double minLexProb = 0.0;
224 public ServerParams sp =
new ServerParams();
230 @Parameters(separators =
"=")
231 public static class ExtractorDataLoaderParameters {
232 @Parameter(names = {
"--source",
"-src" }, description =
"Source text file", required =
true)
233 public String sourceTextFile;
235 @Parameter(names = {
"--target",
"-trg" }, description =
"Target text file", required =
true)
236 public String targetTextFile;
238 @Parameter(names = {
"--alignment",
"-align" }, description =
"Word alignment file", required =
true)
239 public String alignmentFile;
241 @Parameter(names = {
"--provenance_file",
"-prov_file" }, description =
"Provenance file", required =
true)
242 public String provenanceFile;
244 @Parameter(names = {
"--hdfsout",
"-hdfs" }, description =
"Output file name on HDFS", required =
true)
245 public String hdfsName;