[3365] | 1 | /**************************************************************************
|
---|
| 2 | *
|
---|
| 3 | * QueryParser.cpp -- Query parser for a simple query language
|
---|
| 4 | * Copyright (C) 2000 Rodger McNab
|
---|
| 5 | *
|
---|
| 6 | * This program is free software; you can redistribute it and/or modify
|
---|
| 7 | * it under the terms of the GNU General Public License as published by
|
---|
| 8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 9 | * (at your option) any later version.
|
---|
| 10 | *
|
---|
| 11 | * This program is distributed in the hope that it will be useful,
|
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | * GNU General Public License for more details.
|
---|
| 15 | *
|
---|
| 16 | * You should have received a copy of the GNU General Public License
|
---|
| 17 | * along with this program; if not, write to the Free Software
|
---|
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 19 | *
|
---|
| 20 | **************************************************************************/
|
---|
| 21 |
|
---|
| 22 | #include "GSDLQueryParser.h"
|
---|
| 23 | #include "GSDLQueryLex.h"
|
---|
[12321] | 24 | #include "words.h"
|
---|
[3365] | 25 |
|
---|
| 26 | static QueryNode *ParseExpression (UCArray::const_iterator &here,
|
---|
| 27 | UCArray::const_iterator end,
|
---|
[4210] | 28 | int defaultBoolCombine,
|
---|
[3365] | 29 | int defaultStemMethod);
|
---|
| 30 |
|
---|
| 31 | static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 32 | if (t1 == NULL) return t2;
|
---|
| 33 | if (t2 == NULL) return t1;
|
---|
| 34 |
|
---|
| 35 | AndQueryNode *andNode = new AndQueryNode;
|
---|
| 36 | andNode->leftNode = t1;
|
---|
| 37 | andNode->rightNode = t2;
|
---|
| 38 | return andNode;
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 42 | if (t1 == NULL) return t2;
|
---|
| 43 | if (t2 == NULL) return t1;
|
---|
| 44 |
|
---|
| 45 | OrQueryNode *orNode = new OrQueryNode;
|
---|
| 46 | orNode->leftNode = t1;
|
---|
| 47 | orNode->rightNode = t2;
|
---|
| 48 | return orNode;
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 52 | if (t1 == NULL) return t2;
|
---|
| 53 | if (t2 == NULL) return t1;
|
---|
| 54 |
|
---|
| 55 | NotQueryNode *notNode = new NotQueryNode;
|
---|
| 56 | notNode->queryNode = t1;
|
---|
| 57 | notNode->notNode = t2;
|
---|
| 58 | return notNode;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | // expects the opening bracket to have already been parsed
|
---|
| 62 | // and discarded
|
---|
| 63 | static QueryNode *ParseBracketExpression (UCArray::const_iterator &here,
|
---|
| 64 | UCArray::const_iterator end,
|
---|
| 65 | int defaultBoolCombine,
|
---|
| 66 | int defaultStemMethod) {
|
---|
| 67 | // get everything in the expression
|
---|
| 68 | QueryNode *curTree = ParseExpression (here, end, defaultBoolCombine,
|
---|
| 69 | defaultStemMethod);
|
---|
| 70 |
|
---|
| 71 | // gobble up tokens until a closing bracket is found
|
---|
| 72 | // or the end of the string
|
---|
| 73 | LexEl el;
|
---|
| 74 | while (ParseLexEl (here, end, el)) {
|
---|
| 75 | if (el.lexType == CloseBracketE) break;
|
---|
| 76 | }
|
---|
| 77 |
|
---|
| 78 | return curTree;
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | static int ParseInt (UCArray::const_iterator &here,
|
---|
| 82 | UCArray::const_iterator end) {
|
---|
| 83 | LexEl el;
|
---|
| 84 | UCArray::const_iterator oldHere = here;
|
---|
| 85 | if (ParseLexEl (here, end, el) && el.lexType == IntegerE)
|
---|
| 86 | return el.num;
|
---|
| 87 |
|
---|
| 88 | here = oldHere; // not an integer
|
---|
| 89 | return 0;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | // default is within 20 words
|
---|
| 93 | static void SetRangeValues (TermNode &termNode,
|
---|
[6119] | 94 | UCArray &nearby,
|
---|
| 95 | bool reverse) {
|
---|
[8692] | 96 | UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4);
|
---|
| 97 | UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6);
|
---|
[6119] | 98 |
|
---|
[3365] | 99 | if (nearby == NEARBY) { // no modifier
|
---|
[3782] | 100 | termNode.startRange = (NEAR_DEFAULT+1)*-1;
|
---|
| 101 | termNode.endRange = NEAR_DEFAULT;
|
---|
| 102 |
|
---|
[6119] | 103 | } else if (nearby == WITHIN) { // no modifier
|
---|
| 104 | if (reverse) {
|
---|
| 105 | termNode.startRange = (NEAR_DEFAULT+1)*-1;
|
---|
| 106 | termNode.endRange = -1;
|
---|
| 107 | } else {
|
---|
| 108 | termNode.startRange = NEAR_DEFAULT;
|
---|
| 109 | termNode.endRange = 0;
|
---|
| 110 | }
|
---|
[3365] | 111 | }
|
---|
| 112 | else { // extract number
|
---|
[6119] | 113 | UCArray::const_iterator here;
|
---|
| 114 | bool within = false;
|
---|
| 115 | if (PrefixLen(nearby, WITHIN)==6) {
|
---|
| 116 | within=true;
|
---|
| 117 | here = nearby.begin()+6;
|
---|
| 118 | } else {
|
---|
| 119 | here = nearby.begin()+4;
|
---|
| 120 | }
|
---|
[3365] | 121 | UCArray::const_iterator end = nearby.end();
|
---|
| 122 | int size=0;
|
---|
| 123 | while (here != end) {
|
---|
| 124 | size = size*10 + (*here-'0');
|
---|
[8692] | 125 | ++here;
|
---|
[3365] | 126 | }
|
---|
[6119] | 127 | if (within) {
|
---|
| 128 | if (reverse) {
|
---|
| 129 | termNode.startRange = size;
|
---|
| 130 | termNode.endRange = 0;
|
---|
| 131 | } else {
|
---|
| 132 | termNode.startRange = -1 * (size+1);
|
---|
| 133 | termNode.endRange = -1;
|
---|
| 134 | }
|
---|
| 135 | } else {
|
---|
| 136 | termNode.startRange = -1 * (size+1);
|
---|
| 137 | termNode.endRange = size;
|
---|
| 138 | }
|
---|
[3365] | 139 | }
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) {
|
---|
[13653] | 143 | // here expect el to contain some of c,s,i,u,f,a -- see mg_files.h CHAR_FLAG_STEM_* constants
|
---|
[3365] | 144 | unsigned long stem = (unsigned long)defaultStemMethod;
|
---|
| 145 |
|
---|
| 146 | UCArray::const_iterator here = el.text.begin();
|
---|
| 147 | UCArray::const_iterator end = el.text.end();
|
---|
| 148 |
|
---|
[13653] | 149 | /* [JFG - Mar 06: Accent folding patch] */
|
---|
| 150 | /* Changed to use CHAR_FLAG_STEM* constants from mg_files.h */
|
---|
| 151 | while(here != end) {
|
---|
| 152 | unsigned char ch = *here;
|
---|
| 153 | if (strchr (CHAR_FLAG_STEM_Validator, ch) == NULL)
|
---|
| 154 | return STEM_INVALID; // incorrect format
|
---|
| 155 |
|
---|
| 156 | switch(ch) {
|
---|
| 157 | case CHAR_FLAG_STEM_CaseFold: // ignore case (fold)
|
---|
| 158 | stem |= STEM_CaseFolding;
|
---|
| 159 | break;
|
---|
| 160 | case CHAR_FLAG_STEM_NoCaseFold: // case sensitive
|
---|
| 161 | stem &= (~STEM_CaseFolding);
|
---|
| 162 | break;
|
---|
| 163 | case CHAR_FLAG_STEM_Stemming: // stem words
|
---|
| 164 | stem |= STEM_Stemming;
|
---|
| 165 | break;
|
---|
| 166 | case CHAR_FLAG_STEM_NoStemming: // do not stem words
|
---|
| 167 | stem &= (~STEM_Stemming);
|
---|
| 168 | break;
|
---|
| 169 | #ifdef ENABLE_ACCENTFOLD
|
---|
| 170 | case CHAR_FLAG_STEM_AccentFold: // accent fold
|
---|
| 171 | stem |= STEM_AccentFolding;
|
---|
| 172 | break;
|
---|
| 173 | case CHAR_FLAG_STEM_NoAccentFold: // do no accent folding
|
---|
| 174 | stem &= (~STEM_AccentFolding);
|
---|
| 175 | break;
|
---|
| 176 | #endif
|
---|
| 177 | };
|
---|
| 178 |
|
---|
| 179 | ++here;
|
---|
[3365] | 180 | }
|
---|
| 181 | return stem;
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 |
|
---|
| 185 | static void ParseTermModifiers (UCArray::const_iterator &here,
|
---|
| 186 | UCArray::const_iterator end,
|
---|
| 187 | TermNode &termNode,
|
---|
| 188 | int defaultStemMethod) {
|
---|
| 189 |
|
---|
| 190 | termNode.stemMethod = defaultStemMethod;
|
---|
[8242] | 191 | bool partial_match = false;
|
---|
[3365] | 192 | LexEl el;
|
---|
| 193 | UCArray::const_iterator oldHere = here;
|
---|
| 194 | while (ParseLexEl (here, end, el)) {
|
---|
| 195 | if (el.lexType == TermWeightE) {
|
---|
| 196 | termNode.termWeight = ParseInt (here, end);
|
---|
| 197 |
|
---|
| 198 | } else if (el.lexType == StemMethodE) {
|
---|
| 199 | oldHere = here;
|
---|
| 200 | LexEl stem;
|
---|
| 201 | if (ParseLexEl (here, end, stem) && stem.lexType == TermE) {
|
---|
| 202 | termNode.stemMethod = GetStemMethod(stem, defaultStemMethod);
|
---|
[13653] | 203 | /* [JFG - Mar 06: Accent folding patch] */
|
---|
| 204 | /* use STEM_INVALID instead of hardcoded 4 */
|
---|
| 205 | if (termNode.stemMethod == STEM_INVALID) { // error so backtrack
|
---|
[3365] | 206 | here = oldHere;
|
---|
| 207 | termNode.stemMethod = (unsigned long)defaultStemMethod;
|
---|
| 208 | }
|
---|
[13653] | 209 | } else here = oldHere; //ignore - wrong syntax
|
---|
[3365] | 210 |
|
---|
| 211 | } else if (el.lexType == RangeE) {
|
---|
| 212 | termNode.startRange = ParseInt (here, end);
|
---|
| 213 | termNode.endRange = ParseInt (here, end);
|
---|
| 214 |
|
---|
| 215 | } else if (el.lexType == AtE) {
|
---|
| 216 | termNode.startRange = termNode.endRange = ParseInt (here, end);
|
---|
[8242] | 217 | } else if (el.lexType == StarE) {
|
---|
| 218 | partial_match = true;
|
---|
[3365] | 219 | } else {
|
---|
| 220 | // no term modifiers
|
---|
| 221 | here = oldHere;
|
---|
| 222 | break;
|
---|
| 223 | }
|
---|
| 224 |
|
---|
[8242] | 225 | if (partial_match) {
|
---|
[13653] | 226 | /* [JFG - Mar 06: Accent folding patch] */
|
---|
| 227 | /* use STEM_PARTIAL_MATCH flag */
|
---|
| 228 | termNode.stemMethod |= STEM_PARTIAL_MATCH; // set partial match flag
|
---|
| 229 | termNode.stemMethod &= (~STEM_Stemming); // we dont have stemming on if doing partial matching.
|
---|
| 230 | termNode.stemMethod &= (~STEM_AccentFolding); // we dont have accentfolding on if doing partial matching.
|
---|
[8242] | 231 | }
|
---|
[3365] | 232 | oldHere = here;
|
---|
| 233 | }
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | static void ParseProxModifiers (UCArray::const_iterator &here,
|
---|
| 237 | UCArray::const_iterator end,
|
---|
| 238 | ProxMatchQueryNode *proxNode) {
|
---|
| 239 | // so far only have one - the tag stuff
|
---|
| 240 | LexEl el;
|
---|
| 241 | UCArray::const_iterator oldHere = here;
|
---|
| 242 | while (ParseLexEl (here, end, el)) {
|
---|
| 243 | if (el.lexType == TagE) {
|
---|
| 244 | oldHere = here; // don't backtrack past here
|
---|
| 245 | if (ParseLexEl (here, end, el) && el.lexType == TermE) {
|
---|
| 246 | proxNode->tagNodePtr = new TagNode;
|
---|
| 247 | proxNode->tagNodePtr->tagName = el.text;
|
---|
| 248 |
|
---|
| 249 | }
|
---|
| 250 | else { // error in tag
|
---|
| 251 | here = oldHere;
|
---|
| 252 | }
|
---|
| 253 | } // TagE
|
---|
| 254 | // add in other cases here
|
---|
| 255 | else {
|
---|
| 256 | // no modifiers
|
---|
| 257 | here = oldHere;
|
---|
| 258 | break;
|
---|
| 259 | }
|
---|
| 260 | oldHere = here;
|
---|
| 261 | }//while
|
---|
| 262 |
|
---|
| 263 |
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | // expects starting brackets to have been parsed
|
---|
[4210] | 267 | // sets error to true if something has gone wrong
|
---|
[6082] | 268 | static ProxMatchQueryNode *ParseSquareBrackets(UCArray::const_iterator &here,
|
---|
[3365] | 269 | UCArray::const_iterator end,
|
---|
[6082] | 270 | /*ProxMatchQueryNode *proxNode,*/
|
---|
[4210] | 271 | int defaultStemMethod,
|
---|
| 272 | bool & error) {
|
---|
[3365] | 273 |
|
---|
[6082] | 274 | ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;
|
---|
[3365] | 275 | LexEl el;
|
---|
| 276 | bool phrase=false;
|
---|
| 277 | bool first=true;
|
---|
[6129] | 278 | bool prox = false;
|
---|
[6082] | 279 | UCArray near_string;
|
---|
[3365] | 280 | while (ParseLexEl (here, end, el)) {
|
---|
[8692] | 281 | // cant have AND, OR, NOT in square brackets, so assume they are words
|
---|
| 282 | if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) {
|
---|
[3365] | 283 | TermNode termNode;
|
---|
| 284 | termNode.term = el.text;
|
---|
| 285 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 286 | if (phrase) {
|
---|
| 287 | if (first) first=false;
|
---|
| 288 | else {
|
---|
| 289 | termNode.startRange = -2;
|
---|
| 290 | termNode.endRange = -1;
|
---|
| 291 | }
|
---|
[6129] | 292 | } else if (prox) {
|
---|
[6119] | 293 | SetRangeValues(termNode, near_string, false);
|
---|
[6129] | 294 | prox = false;
|
---|
[6082] | 295 | }
|
---|
[3365] | 296 | proxNode->terms.push_back(termNode);
|
---|
| 297 | }
|
---|
| 298 | else if (el.lexType == CloseSquareBracketE) {
|
---|
| 299 | break;
|
---|
| 300 | }
|
---|
| 301 | else if (el.lexType == QuoteE) {
|
---|
| 302 | // phrase inside square brackets
|
---|
[6082] | 303 | if (phrase) { // end of phrase
|
---|
| 304 | phrase=false;
|
---|
| 305 | first = true;
|
---|
| 306 | } else {
|
---|
| 307 | phrase=true; // start of phrase
|
---|
| 308 | }
|
---|
[6119] | 309 | } else if (el.lexType == NearOpE || el.lexType == WithinOpE) {
|
---|
[6082] | 310 | if (phrase) {
|
---|
[6119] | 311 | // cant have proximity op in a phrase - just assume its an actual word
|
---|
[6082] | 312 | TermNode termNode;
|
---|
| 313 | termNode.term = el.text;
|
---|
| 314 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 315 | proxNode->terms.push_back(termNode);
|
---|
| 316 | } else {
|
---|
[6119] | 317 | // its a NEAR or within op
|
---|
[6129] | 318 | prox = true;
|
---|
[6082] | 319 | near_string = el.text;
|
---|
| 320 | }
|
---|
| 321 |
|
---|
| 322 | }
|
---|
| 323 | else if (el.lexType == UnknownE) {
|
---|
[5449] | 324 | // just ignore it
|
---|
[3365] | 325 | }
|
---|
| 326 | else {
|
---|
[4210] | 327 | //error - we set the proxNode to NULL,
|
---|
| 328 | cerr <<"GSDLQueryParser: bad syntax inside []\n";
|
---|
| 329 | error = true;
|
---|
[6082] | 330 | return NULL;
|
---|
[3365] | 331 | }
|
---|
| 332 | } // while
|
---|
[6082] | 333 | return proxNode;
|
---|
[3365] | 334 | }
|
---|
| 335 | // expects the starting quote to have been parsed
|
---|
| 336 | // and discarded
|
---|
| 337 | // now phrases use the case and stem preference options
|
---|
| 338 | // ie can search for a phrase ignoring case
|
---|
| 339 | static void ParsePhrase (UCArray::const_iterator &here,
|
---|
| 340 | UCArray::const_iterator end,
|
---|
| 341 | ProxMatchQueryNode &proxNode,
|
---|
[4210] | 342 | int defaultStemMethod,
|
---|
| 343 | bool &error) {
|
---|
[3365] | 344 | LexEl el;
|
---|
| 345 | bool first = true;
|
---|
| 346 | while (ParseLexEl (here, end, el)) {
|
---|
| 347 | if (el.lexType == TermE || el.lexType == IntegerE) {
|
---|
| 348 | TermNode termNode;
|
---|
| 349 | termNode.term = el.text;
|
---|
| 350 | //termNode.stemMethod = defaultStemMethod;
|
---|
| 351 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 352 | if (first) {
|
---|
| 353 | first = false;
|
---|
| 354 | }
|
---|
| 355 | else {
|
---|
| 356 | termNode.startRange = -2;
|
---|
| 357 | termNode.endRange = -1;
|
---|
| 358 | }
|
---|
| 359 | proxNode.terms.push_back (termNode);
|
---|
| 360 |
|
---|
| 361 | } else if (el.lexType == QuoteE) {
|
---|
| 362 | break;
|
---|
| 363 |
|
---|
[5449] | 364 | } else if (el.lexType == UnknownE) {
|
---|
| 365 | // just ignore it
|
---|
[3365] | 366 | } else {
|
---|
[4210] | 367 | // error
|
---|
| 368 | error = true;
|
---|
| 369 | return;
|
---|
[3365] | 370 | }
|
---|
| 371 | }
|
---|
| 372 | }
|
---|
| 373 |
|
---|
| 374 | static QueryNode *ParseTerm (UCArray::const_iterator &here,
|
---|
| 375 | UCArray::const_iterator end,
|
---|
| 376 | int defaultBoolCombine,
|
---|
| 377 | int defaultStemMethod) {
|
---|
| 378 | LexEl el;
|
---|
| 379 |
|
---|
| 380 | UCArray::const_iterator oldHere = here;
|
---|
| 381 | if (!ParseLexEl (here, end, el)) return NULL;
|
---|
| 382 |
|
---|
| 383 | if (el.lexType == OpenBracketE)
|
---|
| 384 | return ParseBracketExpression (here, end, defaultBoolCombine,
|
---|
| 385 | defaultStemMethod);
|
---|
| 386 |
|
---|
| 387 | ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;
|
---|
| 388 |
|
---|
| 389 | if (el.lexType == TermE || el.lexType == IntegerE) {
|
---|
| 390 | TermNode termNode;
|
---|
| 391 | termNode.term = el.text;
|
---|
| 392 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 393 | oldHere = here; // dont backtrack past here
|
---|
[6119] | 394 | if (ParseLexEl(here, end, el) && (el.lexType == NearOpE || el.lexType == WithinOpE )) {
|
---|
[4210] | 395 | delete proxNode;
|
---|
| 396 | oldHere = here;
|
---|
| 397 | // this is calling ParseTerm again, but only a subset of the things accepted by ParseTerm are appropriate here. add in some hacks to avoid segmentation faults - kjdon, 04/2003
|
---|
| 398 |
|
---|
| 399 | // if the next element is a '(' have a syntax error, return NULL
|
---|
| 400 | LexEl temp_el;
|
---|
| 401 | if (ParseLexEl(here, end, temp_el) && temp_el.lexType == OpenBracketE) {
|
---|
[6119] | 402 | cerr << "GSDLQueryParser: NEAR/WITHIN cannot be followed by a '('\n";
|
---|
[4210] | 403 | return NULL;
|
---|
| 404 | }
|
---|
| 405 | here = oldHere; // else backtrack
|
---|
| 406 |
|
---|
[3365] | 407 | proxNode = (ProxMatchQueryNode *)ParseTerm(here, end, defaultBoolCombine,
|
---|
| 408 | defaultStemMethod);
|
---|
[6119] | 409 | SetRangeValues(termNode, el.text, true);
|
---|
[3365] | 410 | proxNode->terms.push_back (termNode);
|
---|
| 411 | return proxNode;
|
---|
[4210] | 412 |
|
---|
| 413 | } else {
|
---|
[3365] | 414 | here = oldHere; // backtrack
|
---|
| 415 | proxNode->terms.push_back (termNode);
|
---|
| 416 | ParseProxModifiers(here, end, proxNode);
|
---|
| 417 | return proxNode;
|
---|
| 418 | }
|
---|
| 419 | } else if (el.lexType == QuoteE) {
|
---|
[4210] | 420 | bool error = false;
|
---|
| 421 | ParsePhrase (here, end, *proxNode, defaultStemMethod, error);
|
---|
| 422 | if (error) {
|
---|
| 423 | delete proxNode;
|
---|
| 424 | return NULL;
|
---|
| 425 | }
|
---|
[3365] | 426 | return proxNode;
|
---|
| 427 | }
|
---|
| 428 | else if (el.lexType == OpenSquareBracketE) {
|
---|
[4210] | 429 | bool error = false;
|
---|
[6082] | 430 | proxNode = ParseSquareBrackets (here, end, /*proxNode, */defaultStemMethod, error);
|
---|
[4210] | 431 | if (error) {
|
---|
| 432 | delete proxNode;
|
---|
| 433 | return NULL;
|
---|
| 434 | }
|
---|
[3365] | 435 | ParseProxModifiers (here, end, proxNode);
|
---|
| 436 | return proxNode;
|
---|
| 437 | }
|
---|
| 438 |
|
---|
| 439 | // not a term
|
---|
| 440 | here = oldHere;
|
---|
| 441 | delete proxNode;
|
---|
| 442 | return NULL;
|
---|
| 443 | }
|
---|
| 444 |
|
---|
| 445 |
|
---|
| 446 | static QueryNode *ParseExpression (UCArray::const_iterator &here,
|
---|
| 447 | UCArray::const_iterator end,
|
---|
| 448 | int defaultBoolCombine,
|
---|
| 449 | int defaultStemMethod) {
|
---|
| 450 | LexEl el;
|
---|
| 451 | QueryNode *curTree = NULL;
|
---|
| 452 | UCArray::const_iterator oldHere = here;
|
---|
| 453 | while (ParseLexEl (here, end, el)) {
|
---|
[4210] | 454 | if (el.lexType == CloseBracketE) {
|
---|
| 455 | // parsebracketexpression is waiting for the last bracket, so put it back
|
---|
| 456 | here = oldHere;
|
---|
| 457 | break;
|
---|
| 458 |
|
---|
| 459 | } else if (el.lexType == OpenSquareBracketE ||
|
---|
| 460 | el.lexType == OpenBracketE ||
|
---|
| 461 | el.lexType == TermE ||
|
---|
| 462 | el.lexType == QuoteE ||
|
---|
| 463 | el.lexType == IntegerE ) {
|
---|
| 464 |
|
---|
[3365] | 465 | // some type of term, back track and parse it
|
---|
| 466 | here = oldHere;
|
---|
[4210] | 467 |
|
---|
| 468 | // parse the term
|
---|
| 469 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 470 | defaultStemMethod);
|
---|
| 471 | if (newTerm == NULL) {
|
---|
| 472 | delete curTree;
|
---|
| 473 | return NULL;
|
---|
| 474 | }
|
---|
| 475 |
|
---|
[3365] | 476 | // if default==1, AND, else if==0, OR
|
---|
| 477 | if (defaultBoolCombine) {
|
---|
[4210] | 478 | curTree = AndAdd (curTree, newTerm);
|
---|
[3365] | 479 | }
|
---|
| 480 | else {
|
---|
[4210] | 481 | curTree = OrAdd (curTree, newTerm);
|
---|
[3365] | 482 | }
|
---|
[4210] | 483 |
|
---|
[3365] | 484 | } else if (el.lexType == AndOpE) {
|
---|
[4210] | 485 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 486 | defaultStemMethod);
|
---|
| 487 | if (newTerm == NULL) {
|
---|
| 488 | delete curTree;
|
---|
| 489 | return NULL;
|
---|
| 490 | }
|
---|
| 491 | curTree = AndAdd (curTree, newTerm);
|
---|
[3365] | 492 |
|
---|
| 493 | } else if (el.lexType == OrOpE) {
|
---|
[4210] | 494 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 495 | defaultStemMethod);
|
---|
| 496 | if (newTerm == NULL) {
|
---|
| 497 | delete curTree;
|
---|
| 498 | return NULL;
|
---|
| 499 | }
|
---|
| 500 | curTree = OrAdd (curTree, newTerm);
|
---|
[3365] | 501 |
|
---|
| 502 | } else if (el.lexType == NotOpE) {
|
---|
[4210] | 503 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 504 | defaultStemMethod);
|
---|
| 505 | if (newTerm == NULL) {
|
---|
| 506 | delete curTree;
|
---|
| 507 | return NULL;
|
---|
| 508 | }
|
---|
| 509 | curTree = NotAdd (curTree, newTerm);
|
---|
[3365] | 510 |
|
---|
[5449] | 511 | } else if (el.lexType == UnknownE) {
|
---|
| 512 | // just ignore it
|
---|
[4210] | 513 | } else {
|
---|
| 514 |
|
---|
| 515 | // syntax error, return NUll
|
---|
| 516 | delete curTree;
|
---|
| 517 | return NULL;
|
---|
| 518 | }
|
---|
| 519 |
|
---|
[3365] | 520 | oldHere = here;
|
---|
| 521 | }
|
---|
| 522 |
|
---|
| 523 | return curTree;
|
---|
| 524 | }
|
---|
| 525 |
|
---|
| 526 | QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
|
---|
[12321] | 527 | int defaultStemMethod, int maxnumeric) {
|
---|
[13653] | 528 | if (4 < maxnumeric && maxnumeric < 512) {
|
---|
[12321] | 529 | MAXNUMERIC = maxnumeric;
|
---|
| 530 | }
|
---|
[3365] | 531 | UCArray::const_iterator here = queryStr.begin();
|
---|
| 532 | UCArray::const_iterator end = queryStr.end();
|
---|
| 533 | return ParseExpression (here, end, defaultBoolCombine, defaultStemMethod);
|
---|
| 534 | }
|
---|