Changeset 12884 for trunk/gsdl/src/mgpp
- Timestamp:
- 2006-09-28T10:44:01+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/GSDLQueryParser.cpp
r12313 r12884 141 141 142 142 static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) { 143 // here expect el to contain some of c,s,i,u 144 // stem method 0 = c,u 00 145 // stem method 1 = i,u 01 - default for DL 146 // stem method 2 = c, s 10 147 // stem method 3 = i,s 11 148 143 // here expect el to contain some of c,s,i,u,f,a -- see mg_files.h CHAR_FLAG_STEM_* constants 149 144 unsigned long stem = (unsigned long)defaultStemMethod; 150 145 … … 152 147 UCArray::const_iterator end = el.text.end(); 153 148 154 unsigned char c1 = *here; 155 if (!(c1 == 'c'|| c1 == 'i' || c1 == 'u' || c1 == 's')) 156 return 4; // incorrect format 157 158 ++here; 159 unsigned char c2 = 'a'; 160 if (here !=end) { 161 c2 = *here; 162 if (!(c2 == 'c'|| c2 == 'i' || c2 == 'u' || c2 == 's')) 163 return 4; // incorrect format 164 } 165 166 if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1 167 if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0 168 if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1 169 if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0 149 /* [JFG - Mar 06: Accent folding patch] */ 150 /* Changed to use CHAR_FLAG_STEM* constants from mg_files.h */ 151 while(here != end) { 152 unsigned char ch = *here; 153 if (strchr (CHAR_FLAG_STEM_Validator, ch) == NULL) 154 return STEM_INVALID; // incorrect format 155 156 switch(ch) { 157 case CHAR_FLAG_STEM_CaseFold: // ignore case (fold) 158 stem |= STEM_CaseFolding; 159 break; 160 case CHAR_FLAG_STEM_NoCaseFold: // case sensitive 161 stem &= (~STEM_CaseFolding); 162 break; 163 case CHAR_FLAG_STEM_Stemming: // stem words 164 stem |= STEM_Stemming; 165 break; 166 case CHAR_FLAG_STEM_NoStemming: // do not stem words 167 stem &= (~STEM_Stemming); 168 break; 169 case CHAR_FLAG_STEM_AccentFold: // accent fold 170 stem |= STEM_AccentFolding; 171 break; 172 case CHAR_FLAG_STEM_NoAccentFold: // do no accent folding 173 stem &= (~STEM_AccentFolding); 174 break; 175 }; 176 177 ++here; 178 } 170 179 return stem; 171 180 } … … 190 199 if (ParseLexEl (here, end, stem) && stem.lexType == TermE) { 191 200 termNode.stemMethod = GetStemMethod(stem, defaultStemMethod); 192 if (termNode.stemMethod == 4) { // error so backtrack 201 /* [JFG - Mar 06: Accent folding patch] */ 202 /* use STEM_INVALID instead of hardcoded 4 */ 203 if (termNode.stemMethod == STEM_INVALID) { // error so backtrack 193 204 here = oldHere; 194 205 termNode.stemMethod = (unsigned long)defaultStemMethod; … … 211 222 212 223 if (partial_match) { 213 termNode.stemMethod |= 4; // set bit 2 to 1 214 termNode.stemMethod &=0xd; // set bit 1 to 0 // we dont have stemming on if doing partial matching. 224 /* [JFG - Mar 06: Accent folding patch] */ 225 /* use STEM_PARTIAL_MATCH flag */ 226 termNode.stemMethod |= STEM_PARTIAL_MATCH; // set partial match flag 227 termNode.stemMethod &= (~STEM_Stemming); // we dont have stemming on if doing partial matching. 215 228 } 216 229 oldHere = here;
Note:
See TracChangeset
for help on using the changeset viewer.