15 #ifndef MAIN_RUN_HIFST_HPP 16 #define MAIN_RUN_HIFST_HPP 28 using boost::asio::ip::tcp;
30 typedef boost::shared_ptr<tcp::socket>
socket_ptr;
35 template <
template <
class>
class DataT
40 typedef DataT<ArcT> Data;
62 std::string textoutput_;
72 : fastforwardread_ ( new iszfstream ( rg.get<std::string>
78 if (!usingTupleArc_)
return;
94 bool run ( Data& d ) {
105 unsigned grammarFeatureweightOffset = 0;
111 boost::scoped_ptr < LoadGrammar> grammartask
112 (
new LoadGrammar ( rg_, grammarFeatureweights, grammarFeatureweightOffset ) );
113 grammartask->appendTask
120 (
new PrePro ( rg_ ) )
121 (
new LoadLanguageModel ( rg_
123 , (usingTupleArc_)?
"":lmFeatureweights ) )
124 (
new LoadLanguageModel ( rg_
128 (
new LoadLanguageModel ( rg_
133 (
new PatternsToInstances ( rg_ ) )
135 (
new SentenceSpecificGrammar ( rg_ ) )
136 (
new Parse ( rg_ ) )
137 (
new HiFST ( rg_ ) )
145 (
new PostPro ( rg_ ) )
146 (
new HifstStats ( rg_ ) )
148 bool finished =
false;
149 oszfstream *fileoutput = NULL;
150 if ( textoutput_ !=
"" ) {
151 fileoutput =
new oszfstream ( textoutput_ );
158 boost::scoped_ptr<std::string> aux (
new std::string (
"" ) );
159 d.translation = aux.get();
161 finished = fastforwardread_ ( d.sidx , &d.originalsentence );
162 boost::algorithm::trim (d.originalsentence);
163 if (finished && d.originalsentence ==
"" )
break;
164 FORCELINFO (
"=====Translate sentence " << d.sidx <<
":" <<
165 d.originalsentence );
166 grammartask->chainrun ( d );
167 if ( fileoutput != NULL )
168 *fileoutput << *d.translation << endl;
169 if ( finished )
break;
171 if ( fileoutput != NULL )
192 template <
template <
class>
class DataT
198 typedef DataT<ArcT> Data;
221 std::string textoutput_;
227 unsigned threadcount_;
235 : fastforwardread_ ( new iszfstream ( rg.get<std::string>
242 if (!usingTupleArc_)
return;
259 bool run ( Data& original_data ) {
264 unsigned grammarFeatureweightOffset = 0;
270 boost::scoped_ptr < LoadGrammar > grammartask
271 (
new LoadGrammar ( rg_, grammarFeatureweights, grammarFeatureweightOffset ) );
272 grammartask->appendTask
273 (
new LoadLanguageModel ( rg_
275 , lmFeatureweights ) )
276 (
new LoadLanguageModel ( rg_
280 (
new LoadLanguageModel ( rg_
290 grammartask->chainrun ( original_data );
291 std::vector < boost::shared_ptr<std::string> >translations;
294 bool finished =
false;
299 d->grammar = original_data.grammar;
301 d->klm = original_data.klm;
302 translations.push_back ( boost::shared_ptr<std::string>
303 (
new std::string (
"" ) ) );
304 d->translation = translations[translations.size() - 1].get();
306 original_data.fsts.end() )
309 d->recasingvcblm = original_data.recasingvcblm;
310 d->wm = original_data.wm;
311 finished = fastforwardread_ ( d->sidx ,
312 & ( d->originalsentence ) );
313 if (finished && d->originalsentence ==
"")
break;
314 FORCELINFO (
"=====Translate sentence " << d->sidx <<
":" <<
315 d->originalsentence );
316 PrePro *p =
new PrePro ( rg_ );
318 (
new PatternsToInstances ( rg_ ) )
320 (
new SentenceSpecificGrammar ( rg_ ) )
321 (
new Parse ( rg_ ) )
322 (
new HiFST ( rg_ ) )
331 (
new PostPro ( rg_ ) )
332 (
new HifstStats ( rg_ ) )
335 if ( finished )
break;
339 if ( textoutput_ ==
"" )
return false;
340 boost::scoped_ptr<oszfstream> fileoutput (
new oszfstream ( textoutput_ ) );
341 for (
unsigned k = 0; k < translations.size(); ++k )
342 *fileoutput << *translations[k] << endl;
362 template <
template <
class>
class DataT
367 typedef DataT<ArcT> Data;
393 boost::scoped_ptr < GrammarTask < Data > >ttask_;
415 LINFO (
"Init new taskdata..." );
416 boost::scoped_ptr<Data> mydata (
new Data );
417 mydata->grammar = d.grammar;
419 mydata->filters.clear();
423 mydata->recasingvcblm = d.recasingvcblm;
425 LINFO (
"Number of wordmaps... " << mydata->wm.size() );
427 char data[max_length + 1];
428 std::size_t query_length = 0;
429 std::size_t query_length1 = boost::asio::read ( *sock,
430 boost::asio::buffer ( &query_length,
sizeof ( std::size_t ) ) );
431 std::size_t query_length2 = boost::asio::read ( *sock,
432 boost::asio::buffer ( data, query_length ) );
433 data[query_length2] = 0;
434 mydata->originalsentence = data;
435 FORCELINFO (
"Query to translate: " << mydata->originalsentence );
436 boost::scoped_ptr<std::string> translation (
new std::string );
437 mydata->translation = translation.get();
439 char datasend[max_length + 1];
441 strcpy ( datasend, (
char * ) translation.get()->c_str() );
442 std::size_t length = strlen ( datasend );
443 boost::asio::write ( *sock, boost::asio::buffer ( &length,
444 sizeof ( std::size_t ) ) );
446 boost::asio::write ( *sock, boost::asio::buffer ( datasend, length ) );
448 }
catch ( std::exception& e ) {
449 std::cerr <<
"Exception in thread! " << e.what() <<
"\n";
463 (
new PatternsToInstances ( rg_ ) )
464 (
new SentenceSpecificGrammar ( rg_ ) )
465 (
new Parse ( rg_ ) )
466 (
new HiFST ( rg_ ) )
473 (
new PostPro ( rg_ ) )
496 unsigned grammarFeatureweightOffset = 0;
501 ttask_.reset (
new LoadGrammar ( rg_, grammarFeatureweights,
502 grammarFeatureweightOffset ) );
504 (
new LoadLanguageModel ( rg_
506 , lmFeatureweights ) )
507 (
new LoadLanguageModel ( rg_
512 (
new LoadLanguageModel ( rg_
521 ttask_->chainrun ( d_ );
534 bool run ( Data& d ) {
535 boost::asio::io_service io_service;
536 tcp::acceptor a ( io_service, tcp::endpoint ( tcp::v4(), port_ ) );
538 LINFO (
"Waiting for a connection at port=" << port_ );
539 socket_ptr sock (
new tcp::socket ( io_service ) );
541 translation tr ( rg_ );
542 boost::thread t ( boost::bind<void> ( tr, sock, d ) );
543 LINFO (
"Connection accepted... Thread created..." );
554 #endif // MAIN_RUN_HIFST_HPP Wrapper stream class that writes to pipes, text files or gzipped files.
std::string const kHifstSemiring
Convenience class that reads "quickly" until a queried line.
const std::string kHifstLatticeStore
const std::string kServerPort
List of constants to be used both across program options and class runners.
const std::string kHifstLocalpruneLmFeatureweights
const std::string kHifstStripSpecialEpsilonLabels
std::vector< std::string > getVectorString(const std::string &key) const
Convenience method that returns a vector of strings taking "," as the separator character.
Reads text file, performs tokenization and integer-mapping.
std::string const kRecaserUnimapLoad
std::string const kRecaserLmLoad
Full multi-threaded Translation system.
Converts patterns to instanced patterns.
const std::string kGrammarFeatureweights
const std::string kTargetStore
const std::string kPreproWordmapLoad
T get(const std::string &key) const
Returns parsed value associated to key.
static std::vector< T > & Params()
Task that writes translation to a text file. This translation might be recased, wordmapped and tokeni...
boost::scoped_ptr< NumberRangeInterface< unsigned > > IntRangePtr
const std::string kPostproWordmapLoad
Generates a substring version of a reference translation lattice and associated vocabulary. This substring fst is typically used to guide translation towards a particular search space. The associated vocabulary can be used e.g. to restrict parsing algorithms.
boost::shared_ptr< tcp::socket > socket_ptr
static ReferenceFilterTask * init(const ucam::util::RegistryPO &rg, const std::string &referenceloadkey=HifstConstants::kReferencefilterLoad, const std::string &referencelatticekey=HifstConstants::kReferencefilterNosubstringStore)
Static constructor, returns NULL if the substring lattice is not needed (e.g. hifst not in alignment ...
Core of Hifst. Implements the lattice-building procedure for a cyk-parsed sentence.
Trivial implementation of a threadpool based on boost::asio methods When initiated, creates a threadpool of n threads (n <= number of cpus). Jobs should be submitted with the templated operator(). When the object is deleted it will wait for all threads to finish.
const std::string kPostproInput
std::string const kRecaserOutput
const std::string kHifstLocalpruneLmLoad
Task class that loads a grammar into memory.
bool chainrun(Data &d)
Implements chain of responsability. Calls run method and, if there is another task, call its run method too.
Templated (hybrid) Interface for Task classes.
bool operator()()
Runs using its own internal data object.
Simple functor that accepts an interface and pointer to the data object in which it will have to run ...
TaskInterface & appendTask(TaskInterface *t)
Appends a task class. If there is no task, append here, otherwise delegate in next task...
const std::string kHifstLocalpruneLmWordpenalty
std::string const kLmFeatureweights
const std::string kNThreads
std::string const kHifstSemiringTupleArc
SingleThreadedHifstTask(const ucam::util::RegistryPO &rg)
Constructor.
bool run(Data &original_data)
Translates an input sentence (multithreaded)
HifstServerTask(const ucam::util::RegistryPO &rg)
Constructor.
This class uses instantiated patterns to analyze the grammar and deliver two hashes providing candida...
Full single-threaded Translation system.
const std::string kSourceLoad
std::string const kRecaserLmFeatureweight
bool run(Data &d)
Translates an input sentence (single threaded)
const std::string kHifstLatticeOptimize
Reads StatsData and dumps all stats to (sentence-specific) file. Provides a special method for cyk da...
std::string const kLmWordmap
std::string const kRecaserLmWps
std::string const kLmLoad
std::string const kRecaserLmWordmap
const std::string kFeatureweights
Wrapper stream class that reads pipes, text files or gzipped files.
MultiThreadedHifstTask(const ucam::util::RegistryPO &rg)
Constructor.