Cambridge SMT System
data.grammar.utilities.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef DATA_GRAMMAR_UTILITIES_HPP
16 #define DATA_GRAMMAR_UTILITIES_HPP
17 
25 namespace ucam {
26 namespace hifst {
27 
33 inline const uint getSize ( const std::string& rhs ) {
34  if ( rhs == "" ) return 0;
35  return ucam::util::count_needles ( rhs, '_', 0, rhs.size() ) + 1;
36 }
37 
44 inline bool isTerminal ( const std::string& word ) {
45  if ( word[0] < 'A' || word[0] > 'Z' ) return true;
46  return false;
47 };
48 
56 inline void getFilteredNonTerminal ( std::string& word ) {
57  if ( isTerminal ( word ) ) return;
58  if ( word[word.size() - 1] >= '0' && word[word.size() - 1] <= '9' )
59  word.resize ( word.size() - 1 );
60 };
61 
73 inline void getRuleMappings ( const std::vector<std::string>& source,
74  const std::vector<std::string>& translation ,
75  unordered_map<uint, uint> *mappings ) {
76  unordered_map<std::string, uint> partial_mappings;
77  uint nt = 0;
78  for ( uint k = 0; k < source.size(); ++k ) {
79  if ( isTerminal ( source[k] ) ) continue;
80  partial_mappings[source[k]] = nt++;
81  }
82  nt = 0;
83  for ( uint k = 0; k < translation.size(); ++k ) {
84  if ( isTerminal ( translation[k] ) ) continue;
85  USER_CHECK ( partial_mappings.find ( translation[k] ) != partial_mappings.end(),
86  "RHS source and RHS target do not match!" );
87  ( *mappings ) [partial_mappings[translation[k]]] = nt++;
88  }
89  return;
90 };
91 
92 }
93 } // end namespaces
94 
95 #endif
bool isTerminal(const std::string &word)
Determine if the element is a terminal (i.e. a word, represented by a number) or a non-terminal (i...
uint count_needles(const std::string &haystack, const char needle, std::size_t start, std::size_t end)
Convenience function that counts the number of times a needle appears.
#define USER_CHECK(exp, comment)
Tests whether exp is true. If not, comment is printed and program ends.
const uint getSize(const std::string &rhs)
A generic element counter that can be used to any string. It is intended to use with either source or...
Definition: bleu.hpp:14
void getRuleMappings(const std::vector< std::string > &source, const std::vector< std::string > &translation, unordered_map< uint, uint > *mappings)
Given a source and translation of the same rule, sharing the same non-terminals in RHS...
void getFilteredNonTerminal(std::string &word)
Return the filtered non-terminal name. For example, for the rule Z 3_XT2_5 XT2, getFilteredNonTermina...