Cambridge SMT System
CLI.java
Go to the documentation of this file.
1 /*******************************************************************************
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use these files except in compliance with the License.
4  * You may obtain a copy of the License at
5  *
6  * http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  *
14  *******************************************************************************/
15 
16 package uk.ac.cam.eng.util;
17 
18 import com.beust.jcommander.Parameter;
19 import com.beust.jcommander.Parameters;
20 import com.beust.jcommander.ParametersDelegate;
21 
27 public final class CLI {
28 
29  @Parameters(separators = "=")
30  public static class Features {
31 
32  public static final String FEATURES = "--features";
33 
34  @Parameter(names = { FEATURES }, description = "Comma-separated features", required = true)
35  public String features;
36  }
37 
38  @Parameters(separators = "=")
39  public static class Provenance {
40 
41  public static final String PROV = "--provenance";
42 
43  @Parameter(names = { PROV }, description = "Comma-separated provenances", required = true)
44  public String provenance;
45  }
46 
47  @Parameters(separators = "=")
48  public static class RuleParameters {
49  public static final String MAX_SOURCE_PHRASE = "--max_source_phrase";
50  @Parameter(names = { MAX_SOURCE_PHRASE }, description = "Maximum source phrase length in a phrase-based rule")
51  public int maxSourcePhrase = 9;
52 
53  public static final String MAX_SOURCE_ELEMENTS = "--max_source_elements";
54  @Parameter(names = { MAX_SOURCE_ELEMENTS }, description = "Maximum number of source elements (terminals and nonterminals) in a hiero rule")
55  public int maxSourceElements = 5;
56 
57  public static final String MAX_TERMINAL_LENGTH = "--max_terminal_length";
58  @Parameter(names = { MAX_TERMINAL_LENGTH }, description = "Maximum number of consecutive source terminals in a hiero rule")
59  public int maxTerminalLength = 5;
60 
61  public static final String MAX_NONTERMINAL_SPAN = "--max_nonterminal_span";
62  @Parameter(names = { MAX_NONTERMINAL_SPAN }, description = "Maximum number of source terminals covered by a right-hand-side source nonterminal in a hiero rule")
63  public int maxNonTerminalSpan = 10;
64 
65  @ParametersDelegate
66  public Provenance prov = new Provenance();
67  }
68 
72  @Parameters(separators = "=")
73  public static class ExtractorJobParameters {
74  @Parameter(names = { "--input", "-i" }, description = "Input training data on HDFS", required = true)
75  public String input;
76 
77  @Parameter(names = { "--output", "-o" }, description = "Output rules on HDFS", required = true)
78  public String output;
79 
80  public static final String REMOVE_MONOTONIC_REPEATS = "--remove_monotonic_repeats";
81  @Parameter(names = { REMOVE_MONOTONIC_REPEATS }, description = "Gives an "
82  + "occurrence count of 1 to monotonic hiero rules (e.g. "
83  + "phrase-pair <a b c, d e f> with alignment 0-0 1-1 2-2 "
84  + "generates hiero rule <a X, d X> twice but the count is "
85  + "still one)")
86  public boolean removeMonotonicRepeats = true;
87 
88  public static final String COMPATIBILITY_MODE = "--compatibility_mode";
89  @Parameter(names = { COMPATIBILITY_MODE }, description = "Replicates old-style rule extraction")
90  public boolean compability_mode = false;
91 
92  @ParametersDelegate
93  public RuleParameters rp = new RuleParameters();
94 
95  }
96 
97  @Parameters(separators = "=")
98  public static class MarginalReducerParameters {
99  @Parameter(names = { "--input", "-i" }, description = "Input rules on HDFS", required = true)
100  public String input;
101 
102  @Parameter(names = { "--output", "-o" }, description = "Output source-to-target probabilities on HDFS", required = true)
103  public String output;
104  }
105 
106  @Parameters(separators = "=")
107  public static class MergeJobParameters {
108  @Parameter(names = { "--input_features" }, description = "Comma separated directories on HDFS with computed features", required = true)
109  public String inputFeatures;
110 
111  @Parameter(names = { "--input_rules" }, description = "HDFS directory with extracted rules", required = true)
112  public String inputRules;
113 
114  @Parameter(names = { "--output", "-o" }, description = "Output directory on HDFS that will contain rules and features in HFile format", required = true)
115  public String output;
116 
117  @ParametersDelegate
118  public FilterParams fp = new FilterParams();
119  }
120 
121  @Parameters(separators = "=")
122  public static class ServerParams {
123  @Parameter(names = { "--ttable_s2t_server_port" }, description = "TTable source-to-target server port")
124  public int ttableS2TServerPort = 4949;
125 
126  @Parameter(names = { "--ttable_s2t_host" }, description = "TTable source-to-target host name")
127  public String ttableS2THost = "localhost";
128 
129  @Parameter(names = { "--ttable_t2s_server_port" }, description = "TTable target-to-source server port")
130  public int ttableT2SServerPort = 9494;
131 
132  @Parameter(names = { "--ttable_t2s_host" }, description = "TTable target-to-source host name")
133  public String ttableT2SHost = "localhost";
134  }
135 
136  @Parameters(separators = "=")
137  public static class FilterParams {
138  public static final String MIN_SOURCE2TARGET_PHRASE = "--min_source2target_phrase";
139  @Parameter(names = {MIN_SOURCE2TARGET_PHRASE}, description = "Minimum source to target probability for phrase based rules", required = true)
140  public double minSource2TargetPhrase;
141 
142  public static final String MIN_TARGET2SOURCE_PHRASE = "--min_target2source_phrase";
143  @Parameter(names = {MIN_TARGET2SOURCE_PHRASE }, description = "Minimum target to source probability for phrase based rules", required = true)
144  public double minTarget2SourcePhrase;
145 
146  public static final String MIN_SOURCE2TARGET_RULE = "--min_source2target_rule" ;
147  @Parameter(names = {MIN_SOURCE2TARGET_RULE }, description = "Minimum source to target probability for hierarchical rules", required = true)
148  public double minSource2TargetRule;
149 
150  public static final String MIN_TARGET2SOURCE_RULE = "--min_target2source_rule" ;
151  @Parameter(names = {MIN_TARGET2SOURCE_RULE }, description = "Minimum target to source probability for hierarchical rules", required = true)
152  public double minTarget2SourceRule;
153 
154  public static final String PROVENANCE_UNION = "--provenance_union";
155  @Parameter(names = { PROVENANCE_UNION }, description = "Union rules extracted from different provenances")
156  public boolean provenanceUnion;
157 
158  public static final String ALLOWED_PATTERNS = "--allowed_patterns";
159  @Parameter(names = {ALLOWED_PATTERNS }, description = "File containing a list of allowed rule patterns", required = true)
160  public String allowedPatternsFile;
161 
162  public static final String SOURCE_PATTERNS = "--source_patterns";
163  @Parameter(names = { SOURCE_PATTERNS }, description = "File containing a list of allowed source patterns", required = true)
164  public String sourcePatterns;
165  }
166 
167  @Parameters(separators = "=")
168  public static class RuleRetrieverParameters {
169 
170  @Parameter(names = { "--hr_max_height" }, description = "Maximum number of source terminals covered by the left-hand-side nonterminal in a hiero rule")
171  public int hr_max_height = 10;
172 
173  @ParametersDelegate
174  public Features features = new Features();
175 
176  @ParametersDelegate
177  public RuleParameters rp = new RuleParameters();
178 
179  @Parameter(names = { "--pass_through_rules" }, description = "File containing pass-through rules")
180  public String passThroughRules;
181 
182  @ParametersDelegate
183  public ServerParams sp = new ServerParams();
184 
185  @Parameter(names = { "--retrieval_threads" }, description = "Number of threads for retrieval, corresponds to the number of hfiles", required=true)
186  public int retrievalThreads;
187 
188  @Parameter(names = { "--hfile" }, description = "Directory containing the hfiles")
189  public String hfile;
190 
191  @Parameter(names = { "--test_file" }, description = "File containing the sentences to be translated")
192  public String testFile;
193 
194  @Parameter(names = { "--rules" }, description = "Output file containing filtered rules", required=true)
195  public String rules;
196 
197  @Parameter(names = { "--vocab" }, description = "Output file containing vocab to be used for language model filtering")
198  public String vocab;
199 
200  @ParametersDelegate
201  public FilterParams fp = new FilterParams();
202 
203  }
204 
205  @Parameters(separators = "=")
206  public static class TTableServerParameters {
207 
208  @Parameter(names = { "--ttable_server_template" }, description = "TTable target-to-source host name", required = true)
209  public String ttableServerTemplate;
210 
211  @Parameter(names = { "--ttable_direction" }, description = "TTable direction for the lexical model ('s2t' or 't2s')", required = true)
212  public String ttableDirection;
213 
214  @Parameter(names = { "--ttable_language_pair" }, description = "TTable language pair for the lexical model (e.g. 'en2ru' or 'ru2en')", required = true)
215  public String ttableLanguagePair;
216 
217  @ParametersDelegate
218  public Provenance prov = new Provenance();
219 
220  @Parameter(names = { "--min_lex_prob" }, description = "Minimum probability for a Model 1 entry. Entries with lower probability are discarded.")
221  public double minLexProb = 0.0;
222 
223  @ParametersDelegate
224  public ServerParams sp = new ServerParams();
225  }
226 
230  @Parameters(separators = "=")
231  public static class ExtractorDataLoaderParameters {
232  @Parameter(names = { "--source", "-src" }, description = "Source text file", required = true)
233  public String sourceTextFile;
234 
235  @Parameter(names = { "--target", "-trg" }, description = "Target text file", required = true)
236  public String targetTextFile;
237 
238  @Parameter(names = { "--alignment", "-align" }, description = "Word alignment file", required = true)
239  public String alignmentFile;
240 
241  @Parameter(names = { "--provenance_file", "-prov_file" }, description = "Provenance file", required = true)
242  public String provenanceFile;
243 
244  @Parameter(names = { "--hdfsout", "-hdfs" }, description = "Output file name on HDFS", required = true)
245  public String hdfsName;
246  }
247 
248 }