- Timestamp:
- 2007-01-17T11:21:18+13:00 (17 years ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/indexers/mgpp/text/GSDLQueryParser.cpp
r12321 r13653 141 141 142 142 static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) { 143 // here expect el to contain some of c,s,i,u 144 // stem method 0 = c,u 00 145 // stem method 1 = i,u 01 - default for DL 146 // stem method 2 = c, s 10 147 // stem method 3 = i,s 11 148 143 // here expect el to contain some of c,s,i,u,f,a -- see mg_files.h CHAR_FLAG_STEM_* constants 149 144 unsigned long stem = (unsigned long)defaultStemMethod; 150 145 … … 152 147 UCArray::const_iterator end = el.text.end(); 153 148 154 unsigned char c1 = *here; 155 if (!(c1 == 'c'|| c1 == 'i' || c1 == 'u' || c1 == 's')) 156 return 4; // incorrect format 157 158 ++here; 159 unsigned char c2 = 'a'; 160 if (here !=end) { 161 c2 = *here; 162 if (!(c2 == 'c'|| c2 == 'i' || c2 == 'u' || c2 == 's')) 163 return 4; // incorrect format 164 } 165 166 if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1 167 if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0 168 if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1 169 if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0 149 /* [JFG - Mar 06: Accent folding patch] */ 150 /* Changed to use CHAR_FLAG_STEM* constants from mg_files.h */ 151 while(here != end) { 152 unsigned char ch = *here; 153 if (strchr (CHAR_FLAG_STEM_Validator, ch) == NULL) 154 return STEM_INVALID; // incorrect format 155 156 switch(ch) { 157 case CHAR_FLAG_STEM_CaseFold: // ignore case (fold) 158 stem |= STEM_CaseFolding; 159 break; 160 case CHAR_FLAG_STEM_NoCaseFold: // case sensitive 161 stem &= (~STEM_CaseFolding); 162 break; 163 case CHAR_FLAG_STEM_Stemming: // stem words 164 stem |= STEM_Stemming; 165 break; 166 case CHAR_FLAG_STEM_NoStemming: // do not stem words 167 stem &= (~STEM_Stemming); 168 break; 169 #ifdef ENABLE_ACCENTFOLD 170 case CHAR_FLAG_STEM_AccentFold: // accent fold 171 stem |= STEM_AccentFolding; 172 break; 173 case CHAR_FLAG_STEM_NoAccentFold: // do no accent folding 174 stem &= (~STEM_AccentFolding); 175 break; 176 #endif 177 }; 178 179 ++here; 180 } 170 181 return stem; 171 182 } … … 190 201 if (ParseLexEl (here, end, stem) && stem.lexType == TermE) { 191 202 termNode.stemMethod = GetStemMethod(stem, defaultStemMethod); 192 if (termNode.stemMethod == 4) { // error so backtrack 203 /* [JFG - Mar 06: Accent folding patch] */ 204 /* use STEM_INVALID instead of hardcoded 4 */ 205 if (termNode.stemMethod == STEM_INVALID) { // error so backtrack 193 206 here = oldHere; 194 207 termNode.stemMethod = (unsigned long)defaultStemMethod; 195 208 } 196 } else here = oldHere; //ignore - wrong syntax209 } else here = oldHere; //ignore - wrong syntax 197 210 198 211 } else if (el.lexType == RangeE) { … … 211 224 212 225 if (partial_match) { 213 termNode.stemMethod |= 4; // set bit 2 to 1 214 termNode.stemMethod &=0xd; // set bit 1 to 0 // we dont have stemming on if doing partial matching. 226 /* [JFG - Mar 06: Accent folding patch] */ 227 /* use STEM_PARTIAL_MATCH flag */ 228 termNode.stemMethod |= STEM_PARTIAL_MATCH; // set partial match flag 229 termNode.stemMethod &= (~STEM_Stemming); // we dont have stemming on if doing partial matching. 230 termNode.stemMethod &= (~STEM_AccentFolding); // we dont have accentfolding on if doing partial matching. 215 231 } 216 232 oldHere = here; … … 510 526 QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine, 511 527 int defaultStemMethod, int maxnumeric) { 512 if (4 < maxnumeric < 512) {528 if (4 < maxnumeric && maxnumeric < 512) { 513 529 MAXNUMERIC = maxnumeric; 514 530 } -
trunk/mgpp/text/GSDLQueryParser.cpp
r12321 r13653 141 141 142 142 static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) { 143 // here expect el to contain some of c,s,i,u 144 // stem method 0 = c,u 00 145 // stem method 1 = i,u 01 - default for DL 146 // stem method 2 = c, s 10 147 // stem method 3 = i,s 11 148 143 // here expect el to contain some of c,s,i,u,f,a -- see mg_files.h CHAR_FLAG_STEM_* constants 149 144 unsigned long stem = (unsigned long)defaultStemMethod; 150 145 … … 152 147 UCArray::const_iterator end = el.text.end(); 153 148 154 unsigned char c1 = *here; 155 if (!(c1 == 'c'|| c1 == 'i' || c1 == 'u' || c1 == 's')) 156 return 4; // incorrect format 157 158 ++here; 159 unsigned char c2 = 'a'; 160 if (here !=end) { 161 c2 = *here; 162 if (!(c2 == 'c'|| c2 == 'i' || c2 == 'u' || c2 == 's')) 163 return 4; // incorrect format 164 } 165 166 if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1 167 if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0 168 if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1 169 if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0 149 /* [JFG - Mar 06: Accent folding patch] */ 150 /* Changed to use CHAR_FLAG_STEM* constants from mg_files.h */ 151 while(here != end) { 152 unsigned char ch = *here; 153 if (strchr (CHAR_FLAG_STEM_Validator, ch) == NULL) 154 return STEM_INVALID; // incorrect format 155 156 switch(ch) { 157 case CHAR_FLAG_STEM_CaseFold: // ignore case (fold) 158 stem |= STEM_CaseFolding; 159 break; 160 case CHAR_FLAG_STEM_NoCaseFold: // case sensitive 161 stem &= (~STEM_CaseFolding); 162 break; 163 case CHAR_FLAG_STEM_Stemming: // stem words 164 stem |= STEM_Stemming; 165 break; 166 case CHAR_FLAG_STEM_NoStemming: // do not stem words 167 stem &= (~STEM_Stemming); 168 break; 169 #ifdef ENABLE_ACCENTFOLD 170 case CHAR_FLAG_STEM_AccentFold: // accent fold 171 stem |= STEM_AccentFolding; 172 break; 173 case CHAR_FLAG_STEM_NoAccentFold: // do no accent folding 174 stem &= (~STEM_AccentFolding); 175 break; 176 #endif 177 }; 178 179 ++here; 180 } 170 181 return stem; 171 182 } … … 190 201 if (ParseLexEl (here, end, stem) && stem.lexType == TermE) { 191 202 termNode.stemMethod = GetStemMethod(stem, defaultStemMethod); 192 if (termNode.stemMethod == 4) { // error so backtrack 203 /* [JFG - Mar 06: Accent folding patch] */ 204 /* use STEM_INVALID instead of hardcoded 4 */ 205 if (termNode.stemMethod == STEM_INVALID) { // error so backtrack 193 206 here = oldHere; 194 207 termNode.stemMethod = (unsigned long)defaultStemMethod; 195 208 } 196 } else here = oldHere; //ignore - wrong syntax209 } else here = oldHere; //ignore - wrong syntax 197 210 198 211 } else if (el.lexType == RangeE) { … … 211 224 212 225 if (partial_match) { 213 termNode.stemMethod |= 4; // set bit 2 to 1 214 termNode.stemMethod &=0xd; // set bit 1 to 0 // we dont have stemming on if doing partial matching. 226 /* [JFG - Mar 06: Accent folding patch] */ 227 /* use STEM_PARTIAL_MATCH flag */ 228 termNode.stemMethod |= STEM_PARTIAL_MATCH; // set partial match flag 229 termNode.stemMethod &= (~STEM_Stemming); // we dont have stemming on if doing partial matching. 230 termNode.stemMethod &= (~STEM_AccentFolding); // we dont have accentfolding on if doing partial matching. 215 231 } 216 232 oldHere = here; … … 510 526 QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine, 511 527 int defaultStemMethod, int maxnumeric) { 512 if (4 < maxnumeric < 512) {528 if (4 < maxnumeric && maxnumeric < 512) { 513 529 MAXNUMERIC = maxnumeric; 514 530 }
Note:
See TracChangeset
for help on using the changeset viewer.