21 #ifndef TASK_REFERENCE_FST_HPP 22 #define TASK_REFERENCE_FST_HPP 32 template <
class Data ,
class Arc = fst::LexStdArc >
35 typedef typename Arc::Weight Weight;
36 typedef typename Arc::Label Label;
42 unordered_set<std::string> vocabulary_;
45 fst::VectorFst<Arc> *referencesubstringlattice_;
48 fst::VectorFst<Arc> *referencelattice_;
55 writereferencelatticefile_;
57 std::string translationlatticefilesemiring_;
58 std::string semiring_;
65 unsigned shortestpath_;
67 bool useshortestpath_, useweight_;
69 bool disablesubstring_;
72 const std::string referencelatticekey_;
82 const std::string& referencelatticekey =
84 referencelatticekey_ ( referencelatticekey ),
86 translationlatticefile_ ( rg.get<std::string>
88 translationlatticefilesemiring_ ( rg.get<std::string>
90 semiring_ ( rg.get<std::string>
92 writereferencelatticefile_ ( rg.get<std::string>
96 weight_ ( rg.get<float>
98 shortestpath_ ( rg.get<unsigned>
100 useshortestpath_ ( rg.get<unsigned>
102 std::numeric_limits<unsigned>::max() ),
103 useweight_ ( rg.get<float>
105 std::numeric_limits<float>::max() ),
106 referencesubstringlattice_ ( NULL ),
107 referencelattice_ (NULL) {
111 return disablesubstring_;
120 return shortestpath_;
126 return translationlatticefile_();
137 const std::string& referencelatticekey
139 if ( rg.
exists ( referenceloadkey ) )
140 if ( rg.
get<std::string> ( referenceloadkey ) !=
"" )
return new 150 fst::VectorFst<Arc> pruned, dweight;
151 if ( useshortestpath_ ) {
152 LINFO (
"Using shortestpath with reference lattice n=" << shortestpath_ );
153 fst::ShortestPath<Arc> ( *referencesubstringlattice_, &pruned, shortestpath_,
157 LINFO (
"Pruning reference lattice with weight=" << weight_ );
159 fst::Prune<Arc> ( referencesubstringlattice_, mw ( weight_ ) );
160 LINFO (
"Weighted determinization with weight=" << weight_ );
161 fst::DeterminizeOptions<Arc> dopts;
162 dopts.weight_threshold = mw ( weight_ );
163 fst::Determinize<Arc> ( *referencesubstringlattice_, &dweight, dopts );
165 if ( useshortestpath_ || useweight_ ) {
170 *referencesubstringlattice_ = pruned;
171 fst::Union(referencesubstringlattice_, dweight);
180 fst::Map<Arc> ( referencesubstringlattice_,
181 fst::RmWeightMapper<Arc>() );
182 fst::Determinize<Arc> ( fst::RmEpsilonFst<Arc> ( *referencesubstringlattice_ ),
183 referencesubstringlattice_ );
184 fst::Minimize<Arc> ( referencesubstringlattice_ );
192 void build (
const std::string& file ) {
193 if ( file ==
"" )
return;
194 if ( built_ && oldfile_ == file )
return;
202 referencelattice_ =
new fst::VectorFst<Arc> ( *referencesubstringlattice_ );
204 if ( !disablesubstring_ ) {
205 LINFO (
"building substring reference" );
206 fst::buildSubstringTransducer<Arc>
207 ( referencesubstringlattice_ );
209 LWARN (
"Using lattice as-is... substring will not be implemented!!!" );
211 fst::ArcSort<Arc> ( referencesubstringlattice_, fst::ILabelCompare<Arc>() );
212 fst::extractTargetVocabulary<Arc> ( *referencesubstringlattice_, &vocabulary_ );
218 if ( referencesubstringlattice_ )
delete referencesubstringlattice_;
219 referencesubstringlattice_ = NULL;
221 if ( referencelattice_ )
delete referencelattice_;
222 referencelattice_ = NULL;
227 if ( writereferencelatticefile_ ( d.sidx ) !=
"" )
229 writereferencelatticefile_ ( d.sidx ) );
234 LINFO (
"build reference filter from lattice=" << translationlatticefile_.
get (
236 build ( translationlatticefile_.
get ( d.sidx ) );
237 if ( referencesubstringlattice_ ) {
238 d.filters.push_back ( referencesubstringlattice_ );
239 d.tvcb = vocabulary_;
240 d.fsts[referencelatticekey_] = referencelattice_;
241 LINFO (
"Done! Full lattice stored with key=" 242 << referencelatticekey_
243 <<
", NS=" <<
static_cast<fst::VectorFst<Arc> *
> 244 ( d.fsts[referencelatticekey_])->NumStates() );
252 void loadLattice(std::string
const &file) {
254 if (translationlatticefilesemiring_ ==
"" ) {
255 referencesubstringlattice_ = VectorFstRead<Arc> ( file );
258 if (semiring_ !=
"tuplearc") {
259 LERROR(
"Conversions currently allowed only from lexstdarc,tropical TO tuplearc)");
262 referencesubstringlattice_ =
new VectorFst<Arc>;
264 if (translationlatticefilesemiring_ ==
"lexstdarc") {
265 VectorFst<LexStdArc> *aux= VectorFstRead<LexStdArc> ( file );
266 VectorFst<TupleArc32> *vwfst =
new VectorFst<TupleArc32>;
268 LINFO (
"Mapping Arc Target Lattice to TupleArc32" );
271 Map ( *aux, vwfst, WeightMapper(mwcopy));
274 referencesubstringlattice_ =
reinterpret_cast<VectorFst<Arc> *
>(vwfst);
289 #endif // TASK_REFERENCE_FST_HPP std::string const kHifstSemiring
bool run(Data &d)
Runs... Load substring lattice and add pointer in data object.
#define ZDISALLOW_COPY_AND_ASSIGN(TypeName)
void unload(void)
Clean up fsts...
class that expands a wildcard into its actual value. This is useful e.g. for filenames ranging severa...
ReferenceFilterTask(const ucam::util::RegistryPO &rg, const std::string &referencelatticekey=HifstConstants::kReferencefilterNosubstringStore)
Constructor.
const std::string kReferencefilterSubstring
T get(const std::string &key) const
Returns parsed value associated to key.
void reduce()
Removes weights and reduces the reference lattice with determinization and minimization.
Template specialization of MakeSparseVectorWeight functor for LexStdArc.
fst::TropicalWeightTpl< F > Map(double)
Generates a substring version of a reference translation lattice and associated vocabulary. This substring fst is typically used to guide translation towards a particular search space. The associated vocabulary can be used e.g. to restrict parsing algorithms.
static ReferenceFilterTask * init(const ucam::util::RegistryPO &rg, const std::string &referenceloadkey=HifstConstants::kReferencefilterLoad, const std::string &referencelatticekey=HifstConstants::kReferencefilterNosubstringStore)
Static constructor, returns NULL if the substring lattice is not needed (e.g. hifst not in alignment ...
void prune()
Filters the reference lattice using either shortestpath, weighted determinization or both (union)...
const std::string getTranslationLatticeFile()
const std::string kReferencefilterLoadSemiring
const std::string kReferencefilterPrunereferenceweight
Templated (hybrid) Interface for Task classes.
void FstWrite(const Fst< Arc > &fst, const std::string &filename, const std::string &txtname="txt")
Templated method that writes an fst either in binary or text format.
templated Mapper that modifies weights when copying from one FST to another, passing through the othe...
Templated functor that creates a weight given a float.
~ReferenceFilterTask()
Destructor.
bool exists(const std::string &key) const
Determines whether a program option (key) has been defined by the user.
void build(const std::string &file)
Given an fst file, builds the unweighted substring transducer.
bool getDisableSubString(void)
unsigned getShortestPath(void)
void write(Data &d)
Write reference substring lattice to [file].
const std::string kReferencefilterWrite
const std::string kReferencefilterPrunereferenceshortestpath
const std::string kReferencefilterNosubstringStore
const std::string kReferencefilterLoad
const std::string get(T idx)
Expands string and returns.
const unordered_set< std::string > & getVocabulary()