Cambridge SMT System
data.grammar.comparetool.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use these files except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 // Copyright 2012 - Gonzalo Iglesias, AdriĆ  de Gispert, William Byrne
14 
15 #ifndef DATA_GRAMMAR_COMPARETOOL_HPP
16 #define DATA_GRAMMAR_COMPARETOOL_HPP
17 
25 namespace ucam {
26 namespace hifst {
27 
36 struct posindex {
38  std::size_t p;
40  short o;
42  std::size_t order;
43 };
44 
50 class CompareTool {
51  public:
52  virtual __always_inline int compare ( const char *s1, const char *s2 ) {
53  return strcmp ( s1, s2 );
54  };
55  virtual __always_inline int ncompare ( const char *s1, const char *s2,
56  uint n ) {
57  return strncmp ( s1, s2, n );
58  }
59 
60 };
61 
69  private:
70  std::string *s_;
71  CompareTool *ct_;
72 
73  public:
74  inline PosIndexCompare ( std::string *c, CompareTool *myct ) : s_ ( c ),
75  ct_ ( myct ) {};
76  inline bool operator() ( const posindex& lhs, const posindex& rhs ) const {
77  const char *nh = s_->c_str();
78  if ( ct_->compare ( nh + lhs.p,
79  nh + rhs.p ) <= 0 ) return
80  true;
81  return false;
82  };
83 };
84 
94  public:
95  virtual __always_inline int compare ( const char *s1,
96  const char
97  *s2 ) { //IMPORTANT: s1 is portion (source side of rule) of file, s2 is portion of file. DO NOT use for pattern search!
98  for ( ; *s1 == *s2 || ( *s1 >= 'A' && *s1 <= 'Z' && *s2 >= 'A'
99  && *s2 <= 'Z' ); ++s1, ++s2 ) {
100  if ( *s1 == ' ' ) return
101  0; //we don't really care about the order of translations
102  if ( *s1 == 0 ) return 0;
103  if ( *s2 >= 'A' && *s2 <= 'Z' ) while ( * ( s2 + 1 ) != '_'
104  && * ( s2 + 1 ) != ' ' && * ( s2 + 1 ) != '\0' ) s2++; // jump indices.
105  if ( *s1 >= 'A' && *s1 <= 'Z' ) while ( * ( s1 + 1 ) != '_'
106  && * ( s1 + 1 ) != ' ' && * ( s1 + 1 ) != '\0' ) s1++; // jump indices.
107  }
108  return * ( const unsigned char * ) s1 - * ( const unsigned char * ) s2;
109  };
110 
111  //IMPORTANT: s1 is pattern and MUST use X for generic non-terminals! ended with space, s2 is portion of file, n is size of pattern
112  virtual __always_inline int ncompare ( const char *s1, const char *s2,
113  uint n ) {
114  if ( n == 0 ) return
115  0;
116  while ( n-- > 0 && ( *s1 == *s2 || ( *s1 == 'X' && *s2 >= 'A'
117  && *s2 <= 'Z' ) ) ) { // Loop, comparing bytes.
118  if ( n == 0 || *s1 == '\0' ) return 0;
119  s1++; //it's a pattern, no indices after generic non-terminal, just increment
120  if ( *s2 >= 'A' && *s2 <= 'Z' ) while ( * ( s2 + 1 ) != '_'
121  && * ( s2 + 1 ) != ' ' ) s2++; // jump any indices.
122  s2++;
123  }
124  unsigned char uc1, uc2;
125  uc1 = ( * ( unsigned char * ) s1 );
126  uc2 = ( * ( unsigned char * ) s2 );
127  return uc1 - uc2;
128  }
129 };
130 
131 }
132 } // end namespaces
133 
134 #endif
std::size_t order
absolute index
PosIndexCompare(std::string *c, CompareTool *myct)
Class that provides "pattern" comparison between two const char *. The "patterns" are an abstraction ...
Struct containing rule positions and offsets.
virtual __always_inline int ncompare(const char *s1, const char *s2, uint n)
Functor Class that provides comparison accross the posindex structure. This is typically used e...
virtual __always_inline int compare(const char *s1, const char *s2)
virtual __always_inline int ncompare(const char *s1, const char *s2, uint n)
virtual __always_inline int compare(const char *s1, const char *s2)
Class that provides basic string comparison between two const char *.
Definition: bleu.hpp:14