Changeset 1296 for trunk/gsdl/src
- Timestamp:
- 2000-07-24T14:10:01+12:00 (24 years ago)
- Location:
- trunk/gsdl/src/mgpp/text
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/GSDLQueryParser.cpp
r1127 r1296 27 27 28 28 static QueryNode *ParseExpression (UCArray::const_iterator &here, 29 UCArray::const_iterator end); 29 UCArray::const_iterator end, 30 int defaultStemMethod); 30 31 31 32 static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) { … … 62 63 // and discarded 63 64 static QueryNode *ParseBracketExpression (UCArray::const_iterator &here, 64 UCArray::const_iterator end) { 65 UCArray::const_iterator end, 66 int defaultStemMethod) { 65 67 // get everything in the expression 66 QueryNode *curTree = ParseExpression (here, end );68 QueryNode *curTree = ParseExpression (here, end, defaultStemMethod); 67 69 68 70 // gobble up tokens until a closing bracket is found … … 92 94 UCArray NEAR; SetCStr(NEAR, "NEAR"); 93 95 if (near == NEAR) { // no modifier 94 termNode.startRange = -2 0;96 termNode.startRange = -21; 95 97 termNode.endRange = 20; 96 98 } … … 104 106 } 105 107 cerr <<"size= "<<size<<endl; 106 termNode.startRange = -1 * size;108 termNode.startRange = -1 * (size+1); 107 109 termNode.endRange = size; 108 110 } … … 110 112 111 113 } 112 static unsigned long GetStemMethod(LexEl &el ) {114 static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) { 113 115 // here expect el to contain some of c,s,i,u 114 // stem method 0 = i,u 00 115 // stem method 1 = c,u 01 116 // stem method 2 = i, s 10 117 // stem method 3 = c,s 11 118 119 unsigned long stem = 4; 116 // stem method 0 = c,u 00 117 // stem method 1 = i,u 01 - default for DL 118 // stem method 2 = c, s 10 119 // stem method 3 = i,s 11 120 121 unsigned long stem = (unsigned long)defaultStemMethod; 122 120 123 UCArray::const_iterator here = el.text.begin(); 121 124 UCArray::const_iterator end = el.text.end(); 122 125 123 126 unsigned char c1 = *here; 124 if (c1 == 'c'| c1 == 'i' | c1 == 'u' | c1 == 's') 125 stem = 0; 126 else return 4; // incorrect format 127 if (!(c1 == 'c'| c1 == 'i' | c1 == 'u' | c1 == 's')) 128 return 4; // incorrect format 127 129 128 130 here++; 129 131 unsigned char c2 = 'a'; 130 if (here !=end) c2 = *here; 131 132 if (c1 == 'c'|| c2=='c') stem |= 1; 133 if (c1 == 's'|| c2 == 's') stem |= 2; 132 if (here !=end) { 133 c2 = *here; 134 if (!(c2 == 'c'| c2 == 'i' | c2 == 'u' | c2 == 's')) 135 return 4; // incorrect format 136 } 137 138 if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1 139 if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0 140 if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1 141 if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0 134 142 cerr << "stem is "<<stem<<endl; 135 143 return stem; … … 139 147 static void ParseTermModifiers (UCArray::const_iterator &here, 140 148 UCArray::const_iterator end, 141 TermNode &termNode) { 149 TermNode &termNode, 150 int defaultStemMethod) { 151 152 termNode.stemMethod = defaultStemMethod; 153 142 154 LexEl el; 143 155 UCArray::const_iterator oldHere = here; … … 147 159 148 160 } else if (el.lexType == StemMethodE) { 149 // termNode.stemMethod = ParseInt (here, end);150 cerr << "in stem method bit"<<endl;151 161 oldHere = here; 152 162 LexEl stem; 153 163 if (ParseLexEl (here, end, stem) && stem.lexType == TermE) { 154 termNode.stemMethod = GetStemMethod(stem );164 termNode.stemMethod = GetStemMethod(stem, defaultStemMethod); 155 165 if (termNode.stemMethod == 4) { // error so backtrack 156 166 here = oldHere; 167 termNode.stemMethod = (unsigned long)defaultStemMethod; 157 168 } 158 169 }else here = oldHere; //ignore - wrong syntax … … 207 218 // expects starting brackets to have been parsed 208 219 static void ParseSquareBrackets(UCArray::const_iterator &here, 209 UCArray::const_iterator end, 210 ProxMatchQueryNode *proxNode) { 220 UCArray::const_iterator end, 221 ProxMatchQueryNode *proxNode, 222 int defaultStemMethod) { 211 223 212 224 LexEl el; … … 215 227 TermNode termNode; 216 228 termNode.term = el.text; 217 ParseTermModifiers (here, end, termNode );229 ParseTermModifiers (here, end, termNode, defaultStemMethod); 218 230 proxNode->terms.push_back(termNode); 219 231 } 220 232 else if (el.lexType == CloseSquareBracketE) { 221 233 break; 234 } 235 else if (el.lexType == AndOpE) { 236 // ignore, the words are AND'ed anyway 237 cerr << "and inside []\n"; 238 } 239 else if (el.lexType == OrOpE) { 240 cerr << "or inside []\n"; 222 241 } 223 242 else { 224 243 //error 225 break; 244 // just ignore for now 245 cerr <<"bad syntax inside []\n"; 226 246 } 227 247 } // while … … 261 281 262 282 static QueryNode *ParseTerm (UCArray::const_iterator &here, 263 UCArray::const_iterator end) { 283 UCArray::const_iterator end, 284 int defaultStemMethod) { 264 285 LexEl el; 265 286 … … 268 289 269 290 if (el.lexType == OpenBracketE) 270 return ParseBracketExpression (here, end );291 return ParseBracketExpression (here, end, defaultStemMethod); 271 292 272 293 ProxMatchQueryNode *proxNode = new ProxMatchQueryNode; 273 294 274 // check for a tag275 /* if (el.lexType == TagE) {276 oldHere = here; // don't backtrack past here277 if (!ParseLexEl (here, end, el)) return NULL;278 if (el.lexType == TermE) {279 proxNode->tagNodePtr = new TagNode;280 proxNode->tagNodePtr->tagName = el.text;281 if (!ParseLexEl (here, end, el)) return NULL;282 }283 }284 */285 295 if (el.lexType == TermE || el.lexType == IntegerE) { 286 296 TermNode termNode; 287 297 termNode.term = el.text; 288 ParseTermModifiers (here, end, termNode );298 ParseTermModifiers (here, end, termNode, defaultStemMethod); 289 299 oldHere = here; // dont backtrack past here 290 300 if (ParseLexEl(here, end, el) && el.lexType == NearOpE) { 291 301 delete proxNode; 292 proxNode = (ProxMatchQueryNode *)ParseTerm(here, end );302 proxNode = (ProxMatchQueryNode *)ParseTerm(here, end, defaultStemMethod); 293 303 SetRangeValues(termNode, el.text); 294 304 proxNode->terms.push_back (termNode); … … 306 316 } 307 317 else if (el.lexType == OpenSquareBracketE) { 308 ParseSquareBrackets (here, end, proxNode );318 ParseSquareBrackets (here, end, proxNode, defaultStemMethod); 309 319 ParseProxModifiers (here, end, proxNode); 310 320 return proxNode; … … 319 329 320 330 static QueryNode *ParseExpression (UCArray::const_iterator &here, 321 UCArray::const_iterator end) { 331 UCArray::const_iterator end, 332 int defaultStemMethod) { 322 333 LexEl el; 323 334 QueryNode *curTree = NULL; … … 330 341 el.lexType == QuoteE || 331 342 el.lexType == IntegerE ) { 332 // el.lexType == TagE) { tag at end of term now343 // el.lexType == TagE) { //tag at end of term now 333 344 // some type of term, back track and parse it 334 345 here = oldHere; 335 curTree = OrAdd (curTree, ParseTerm (here, end ));346 curTree = OrAdd (curTree, ParseTerm (here, end, defaultStemMethod)); 336 347 337 348 } else if (el.lexType == AndOpE) { 338 curTree = AndAdd (curTree, ParseTerm (here, end ));349 curTree = AndAdd (curTree, ParseTerm (here, end, defaultStemMethod)); 339 350 340 351 } else if (el.lexType == OrOpE) { 341 curTree = OrAdd (curTree, ParseTerm (here, end ));352 curTree = OrAdd (curTree, ParseTerm (here, end, defaultStemMethod)); 342 353 343 354 } else if (el.lexType == NotOpE) { 344 curTree = NotAdd (curTree, ParseTerm (here, end ));355 curTree = NotAdd (curTree, ParseTerm (here, end, defaultStemMethod)); 345 356 346 357 } else if (el.lexType == CloseBracketE) { … … 357 368 } 358 369 359 QueryNode *ParseQuery (const UCArray &queryStr ) {370 QueryNode *ParseQuery (const UCArray &queryStr, int defaultStemMethod) { 360 371 UCArray::const_iterator here = queryStr.begin(); 361 372 UCArray::const_iterator end = queryStr.end(); 362 return ParseExpression (here, end );363 } 373 return ParseExpression (here, end, defaultStemMethod); 374 } -
trunk/gsdl/src/mgpp/text/GSDLQueryParser.h
r1127 r1296 29 29 30 30 // returns NULL if the query could not be parsed 31 QueryNode *ParseQuery (const UCArray &queryStr); 31 // defaultStemMethod used to set stemming and casefolding for terms where 32 // its not set explicitly in the query string. THe defaultStemMethod value 33 // should be set using the values from the preferences page 34 QueryNode *ParseQuery (const UCArray &queryStr, int defaultStemMethod); 32 35 33 36 #endif
Note:
See TracChangeset
for help on using the changeset viewer.