[3365] | 1 | /**************************************************************************
|
---|
| 2 | *
|
---|
| 3 | * QueryParser.cpp -- Query parser for a simple query language
|
---|
| 4 | * Copyright (C) 2000 Rodger McNab
|
---|
| 5 | *
|
---|
| 6 | * This program is free software; you can redistribute it and/or modify
|
---|
| 7 | * it under the terms of the GNU General Public License as published by
|
---|
| 8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 9 | * (at your option) any later version.
|
---|
| 10 | *
|
---|
| 11 | * This program is distributed in the hope that it will be useful,
|
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | * GNU General Public License for more details.
|
---|
| 15 | *
|
---|
| 16 | * You should have received a copy of the GNU General Public License
|
---|
| 17 | * along with this program; if not, write to the Free Software
|
---|
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 19 | *
|
---|
| 20 | **************************************************************************/
|
---|
| 21 |
|
---|
| 22 | #include "GSDLQueryParser.h"
|
---|
| 23 | #include "GSDLQueryLex.h"
|
---|
[12321] | 24 | #include "words.h"
|
---|
[3365] | 25 |
|
---|
| 26 | static QueryNode *ParseExpression (UCArray::const_iterator &here,
|
---|
| 27 | UCArray::const_iterator end,
|
---|
[4210] | 28 | int defaultBoolCombine,
|
---|
[3365] | 29 | int defaultStemMethod);
|
---|
| 30 |
|
---|
| 31 | static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 32 | if (t1 == NULL) return t2;
|
---|
| 33 | if (t2 == NULL) return t1;
|
---|
| 34 |
|
---|
| 35 | AndQueryNode *andNode = new AndQueryNode;
|
---|
| 36 | andNode->leftNode = t1;
|
---|
| 37 | andNode->rightNode = t2;
|
---|
| 38 | return andNode;
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 42 | if (t1 == NULL) return t2;
|
---|
| 43 | if (t2 == NULL) return t1;
|
---|
| 44 |
|
---|
| 45 | OrQueryNode *orNode = new OrQueryNode;
|
---|
| 46 | orNode->leftNode = t1;
|
---|
| 47 | orNode->rightNode = t2;
|
---|
| 48 | return orNode;
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) {
|
---|
| 52 | if (t1 == NULL) return t2;
|
---|
| 53 | if (t2 == NULL) return t1;
|
---|
| 54 |
|
---|
| 55 | NotQueryNode *notNode = new NotQueryNode;
|
---|
| 56 | notNode->queryNode = t1;
|
---|
| 57 | notNode->notNode = t2;
|
---|
| 58 | return notNode;
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | // expects the opening bracket to have already been parsed
|
---|
| 62 | // and discarded
|
---|
| 63 | static QueryNode *ParseBracketExpression (UCArray::const_iterator &here,
|
---|
| 64 | UCArray::const_iterator end,
|
---|
| 65 | int defaultBoolCombine,
|
---|
| 66 | int defaultStemMethod) {
|
---|
| 67 | // get everything in the expression
|
---|
| 68 | QueryNode *curTree = ParseExpression (here, end, defaultBoolCombine,
|
---|
| 69 | defaultStemMethod);
|
---|
| 70 |
|
---|
| 71 | // gobble up tokens until a closing bracket is found
|
---|
| 72 | // or the end of the string
|
---|
| 73 | LexEl el;
|
---|
| 74 | while (ParseLexEl (here, end, el)) {
|
---|
| 75 | if (el.lexType == CloseBracketE) break;
|
---|
| 76 | }
|
---|
| 77 |
|
---|
| 78 | return curTree;
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | static int ParseInt (UCArray::const_iterator &here,
|
---|
| 82 | UCArray::const_iterator end) {
|
---|
| 83 | LexEl el;
|
---|
| 84 | UCArray::const_iterator oldHere = here;
|
---|
| 85 | if (ParseLexEl (here, end, el) && el.lexType == IntegerE)
|
---|
| 86 | return el.num;
|
---|
| 87 |
|
---|
| 88 | here = oldHere; // not an integer
|
---|
| 89 | return 0;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | // default is within 20 words
|
---|
| 93 | static void SetRangeValues (TermNode &termNode,
|
---|
[6119] | 94 | UCArray &nearby,
|
---|
| 95 | bool reverse) {
|
---|
[8692] | 96 | UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4);
|
---|
| 97 | UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6);
|
---|
[6119] | 98 |
|
---|
[3365] | 99 | if (nearby == NEARBY) { // no modifier
|
---|
[3782] | 100 | termNode.startRange = (NEAR_DEFAULT+1)*-1;
|
---|
| 101 | termNode.endRange = NEAR_DEFAULT;
|
---|
| 102 |
|
---|
[6119] | 103 | } else if (nearby == WITHIN) { // no modifier
|
---|
| 104 | if (reverse) {
|
---|
| 105 | termNode.startRange = (NEAR_DEFAULT+1)*-1;
|
---|
| 106 | termNode.endRange = -1;
|
---|
| 107 | } else {
|
---|
| 108 | termNode.startRange = NEAR_DEFAULT;
|
---|
| 109 | termNode.endRange = 0;
|
---|
| 110 | }
|
---|
[3365] | 111 | }
|
---|
| 112 | else { // extract number
|
---|
[6119] | 113 | UCArray::const_iterator here;
|
---|
| 114 | bool within = false;
|
---|
| 115 | if (PrefixLen(nearby, WITHIN)==6) {
|
---|
| 116 | within=true;
|
---|
| 117 | here = nearby.begin()+6;
|
---|
| 118 | } else {
|
---|
| 119 | here = nearby.begin()+4;
|
---|
| 120 | }
|
---|
[3365] | 121 | UCArray::const_iterator end = nearby.end();
|
---|
| 122 | int size=0;
|
---|
| 123 | while (here != end) {
|
---|
| 124 | size = size*10 + (*here-'0');
|
---|
[8692] | 125 | ++here;
|
---|
[3365] | 126 | }
|
---|
[6119] | 127 | if (within) {
|
---|
| 128 | if (reverse) {
|
---|
| 129 | termNode.startRange = size;
|
---|
| 130 | termNode.endRange = 0;
|
---|
| 131 | } else {
|
---|
| 132 | termNode.startRange = -1 * (size+1);
|
---|
| 133 | termNode.endRange = -1;
|
---|
| 134 | }
|
---|
| 135 | } else {
|
---|
| 136 | termNode.startRange = -1 * (size+1);
|
---|
| 137 | termNode.endRange = size;
|
---|
| 138 | }
|
---|
[3365] | 139 | }
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) {
|
---|
| 143 | // here expect el to contain some of c,s,i,u
|
---|
| 144 | // stem method 0 = c,u 00
|
---|
| 145 | // stem method 1 = i,u 01 - default for DL
|
---|
| 146 | // stem method 2 = c, s 10
|
---|
| 147 | // stem method 3 = i,s 11
|
---|
| 148 |
|
---|
| 149 | unsigned long stem = (unsigned long)defaultStemMethod;
|
---|
| 150 |
|
---|
| 151 | UCArray::const_iterator here = el.text.begin();
|
---|
| 152 | UCArray::const_iterator end = el.text.end();
|
---|
| 153 |
|
---|
| 154 | unsigned char c1 = *here;
|
---|
[9616] | 155 | if (!(c1 == 'c'|| c1 == 'i' || c1 == 'u' || c1 == 's'))
|
---|
[3365] | 156 | return 4; // incorrect format
|
---|
| 157 |
|
---|
[9613] | 158 | ++here;
|
---|
[3365] | 159 | unsigned char c2 = 'a';
|
---|
| 160 | if (here !=end) {
|
---|
| 161 | c2 = *here;
|
---|
[9616] | 162 | if (!(c2 == 'c'|| c2 == 'i' || c2 == 'u' || c2 == 's'))
|
---|
[3365] | 163 | return 4; // incorrect format
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 | if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1
|
---|
| 167 | if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0
|
---|
| 168 | if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1
|
---|
| 169 | if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0
|
---|
| 170 | return stem;
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 |
|
---|
| 174 | static void ParseTermModifiers (UCArray::const_iterator &here,
|
---|
| 175 | UCArray::const_iterator end,
|
---|
| 176 | TermNode &termNode,
|
---|
| 177 | int defaultStemMethod) {
|
---|
| 178 |
|
---|
| 179 | termNode.stemMethod = defaultStemMethod;
|
---|
[8242] | 180 | bool partial_match = false;
|
---|
[3365] | 181 | LexEl el;
|
---|
| 182 | UCArray::const_iterator oldHere = here;
|
---|
| 183 | while (ParseLexEl (here, end, el)) {
|
---|
| 184 | if (el.lexType == TermWeightE) {
|
---|
| 185 | termNode.termWeight = ParseInt (here, end);
|
---|
| 186 |
|
---|
| 187 | } else if (el.lexType == StemMethodE) {
|
---|
| 188 | oldHere = here;
|
---|
| 189 | LexEl stem;
|
---|
| 190 | if (ParseLexEl (here, end, stem) && stem.lexType == TermE) {
|
---|
| 191 | termNode.stemMethod = GetStemMethod(stem, defaultStemMethod);
|
---|
| 192 | if (termNode.stemMethod == 4) { // error so backtrack
|
---|
| 193 | here = oldHere;
|
---|
| 194 | termNode.stemMethod = (unsigned long)defaultStemMethod;
|
---|
| 195 | }
|
---|
| 196 | }else here = oldHere; //ignore - wrong syntax
|
---|
| 197 |
|
---|
| 198 | } else if (el.lexType == RangeE) {
|
---|
| 199 | termNode.startRange = ParseInt (here, end);
|
---|
| 200 | termNode.endRange = ParseInt (here, end);
|
---|
| 201 |
|
---|
| 202 | } else if (el.lexType == AtE) {
|
---|
| 203 | termNode.startRange = termNode.endRange = ParseInt (here, end);
|
---|
[8242] | 204 | } else if (el.lexType == StarE) {
|
---|
| 205 | partial_match = true;
|
---|
[3365] | 206 | } else {
|
---|
| 207 | // no term modifiers
|
---|
| 208 | here = oldHere;
|
---|
| 209 | break;
|
---|
| 210 | }
|
---|
| 211 |
|
---|
[8242] | 212 | if (partial_match) {
|
---|
| 213 | termNode.stemMethod |= 4; // set bit 2 to 1
|
---|
| 214 | termNode.stemMethod &=0xd; // set bit 1 to 0 // we dont have stemming on if doing partial matching.
|
---|
| 215 | }
|
---|
[3365] | 216 | oldHere = here;
|
---|
| 217 | }
|
---|
| 218 | }
|
---|
| 219 |
|
---|
| 220 | static void ParseProxModifiers (UCArray::const_iterator &here,
|
---|
| 221 | UCArray::const_iterator end,
|
---|
| 222 | ProxMatchQueryNode *proxNode) {
|
---|
| 223 | // so far only have one - the tag stuff
|
---|
| 224 | LexEl el;
|
---|
| 225 | UCArray::const_iterator oldHere = here;
|
---|
| 226 | while (ParseLexEl (here, end, el)) {
|
---|
| 227 | if (el.lexType == TagE) {
|
---|
| 228 | oldHere = here; // don't backtrack past here
|
---|
| 229 | if (ParseLexEl (here, end, el) && el.lexType == TermE) {
|
---|
| 230 | proxNode->tagNodePtr = new TagNode;
|
---|
| 231 | proxNode->tagNodePtr->tagName = el.text;
|
---|
| 232 |
|
---|
| 233 | }
|
---|
| 234 | else { // error in tag
|
---|
| 235 | here = oldHere;
|
---|
| 236 | }
|
---|
| 237 | } // TagE
|
---|
| 238 | // add in other cases here
|
---|
| 239 | else {
|
---|
| 240 | // no modifiers
|
---|
| 241 | here = oldHere;
|
---|
| 242 | break;
|
---|
| 243 | }
|
---|
| 244 | oldHere = here;
|
---|
| 245 | }//while
|
---|
| 246 |
|
---|
| 247 |
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | // expects starting brackets to have been parsed
|
---|
[4210] | 251 | // sets error to true if something has gone wrong
|
---|
[6082] | 252 | static ProxMatchQueryNode *ParseSquareBrackets(UCArray::const_iterator &here,
|
---|
[3365] | 253 | UCArray::const_iterator end,
|
---|
[6082] | 254 | /*ProxMatchQueryNode *proxNode,*/
|
---|
[4210] | 255 | int defaultStemMethod,
|
---|
| 256 | bool & error) {
|
---|
[3365] | 257 |
|
---|
[6082] | 258 | ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;
|
---|
[3365] | 259 | LexEl el;
|
---|
| 260 | bool phrase=false;
|
---|
| 261 | bool first=true;
|
---|
[6129] | 262 | bool prox = false;
|
---|
[6082] | 263 | UCArray near_string;
|
---|
[3365] | 264 | while (ParseLexEl (here, end, el)) {
|
---|
[8692] | 265 | // cant have AND, OR, NOT in square brackets, so assume they are words
|
---|
| 266 | if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) {
|
---|
[3365] | 267 | TermNode termNode;
|
---|
| 268 | termNode.term = el.text;
|
---|
| 269 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 270 | if (phrase) {
|
---|
| 271 | if (first) first=false;
|
---|
| 272 | else {
|
---|
| 273 | termNode.startRange = -2;
|
---|
| 274 | termNode.endRange = -1;
|
---|
| 275 | }
|
---|
[6129] | 276 | } else if (prox) {
|
---|
[6119] | 277 | SetRangeValues(termNode, near_string, false);
|
---|
[6129] | 278 | prox = false;
|
---|
[6082] | 279 | }
|
---|
[3365] | 280 | proxNode->terms.push_back(termNode);
|
---|
| 281 | }
|
---|
| 282 | else if (el.lexType == CloseSquareBracketE) {
|
---|
| 283 | break;
|
---|
| 284 | }
|
---|
| 285 | else if (el.lexType == QuoteE) {
|
---|
| 286 | // phrase inside square brackets
|
---|
[6082] | 287 | if (phrase) { // end of phrase
|
---|
| 288 | phrase=false;
|
---|
| 289 | first = true;
|
---|
| 290 | } else {
|
---|
| 291 | phrase=true; // start of phrase
|
---|
| 292 | }
|
---|
[6119] | 293 | } else if (el.lexType == NearOpE || el.lexType == WithinOpE) {
|
---|
[6082] | 294 | if (phrase) {
|
---|
[6119] | 295 | // cant have proximity op in a phrase - just assume its an actual word
|
---|
[6082] | 296 | TermNode termNode;
|
---|
| 297 | termNode.term = el.text;
|
---|
| 298 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 299 | proxNode->terms.push_back(termNode);
|
---|
| 300 | } else {
|
---|
[6119] | 301 | // its a NEAR or within op
|
---|
[6129] | 302 | prox = true;
|
---|
[6082] | 303 | near_string = el.text;
|
---|
| 304 | }
|
---|
| 305 |
|
---|
| 306 | }
|
---|
| 307 | else if (el.lexType == UnknownE) {
|
---|
[5449] | 308 | // just ignore it
|
---|
[3365] | 309 | }
|
---|
| 310 | else {
|
---|
[4210] | 311 | //error - we set the proxNode to NULL,
|
---|
| 312 | cerr <<"GSDLQueryParser: bad syntax inside []\n";
|
---|
| 313 | error = true;
|
---|
[6082] | 314 | return NULL;
|
---|
[3365] | 315 | }
|
---|
| 316 | } // while
|
---|
[6082] | 317 | return proxNode;
|
---|
[3365] | 318 | }
|
---|
| 319 | // expects the starting quote to have been parsed
|
---|
| 320 | // and discarded
|
---|
| 321 | // now phrases use the case and stem preference options
|
---|
| 322 | // ie can search for a phrase ignoring case
|
---|
| 323 | static void ParsePhrase (UCArray::const_iterator &here,
|
---|
| 324 | UCArray::const_iterator end,
|
---|
| 325 | ProxMatchQueryNode &proxNode,
|
---|
[4210] | 326 | int defaultStemMethod,
|
---|
| 327 | bool &error) {
|
---|
[3365] | 328 | LexEl el;
|
---|
| 329 | bool first = true;
|
---|
| 330 | while (ParseLexEl (here, end, el)) {
|
---|
| 331 | if (el.lexType == TermE || el.lexType == IntegerE) {
|
---|
| 332 | TermNode termNode;
|
---|
| 333 | termNode.term = el.text;
|
---|
| 334 | //termNode.stemMethod = defaultStemMethod;
|
---|
| 335 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 336 | if (first) {
|
---|
| 337 | first = false;
|
---|
| 338 | }
|
---|
| 339 | else {
|
---|
| 340 | termNode.startRange = -2;
|
---|
| 341 | termNode.endRange = -1;
|
---|
| 342 | }
|
---|
| 343 | proxNode.terms.push_back (termNode);
|
---|
| 344 |
|
---|
| 345 | } else if (el.lexType == QuoteE) {
|
---|
| 346 | break;
|
---|
| 347 |
|
---|
[5449] | 348 | } else if (el.lexType == UnknownE) {
|
---|
| 349 | // just ignore it
|
---|
[3365] | 350 | } else {
|
---|
[4210] | 351 | // error
|
---|
| 352 | error = true;
|
---|
| 353 | return;
|
---|
[3365] | 354 | }
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 |
|
---|
| 358 | static QueryNode *ParseTerm (UCArray::const_iterator &here,
|
---|
| 359 | UCArray::const_iterator end,
|
---|
| 360 | int defaultBoolCombine,
|
---|
| 361 | int defaultStemMethod) {
|
---|
| 362 | LexEl el;
|
---|
| 363 |
|
---|
| 364 | UCArray::const_iterator oldHere = here;
|
---|
| 365 | if (!ParseLexEl (here, end, el)) return NULL;
|
---|
| 366 |
|
---|
| 367 | if (el.lexType == OpenBracketE)
|
---|
| 368 | return ParseBracketExpression (here, end, defaultBoolCombine,
|
---|
| 369 | defaultStemMethod);
|
---|
| 370 |
|
---|
| 371 | ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;
|
---|
| 372 |
|
---|
| 373 | if (el.lexType == TermE || el.lexType == IntegerE) {
|
---|
| 374 | TermNode termNode;
|
---|
| 375 | termNode.term = el.text;
|
---|
| 376 | ParseTermModifiers (here, end, termNode, defaultStemMethod);
|
---|
| 377 | oldHere = here; // dont backtrack past here
|
---|
[6119] | 378 | if (ParseLexEl(here, end, el) && (el.lexType == NearOpE || el.lexType == WithinOpE )) {
|
---|
[4210] | 379 | delete proxNode;
|
---|
| 380 | oldHere = here;
|
---|
| 381 | // this is calling ParseTerm again, but only a subset of the things accepted by ParseTerm are appropriate here. add in some hacks to avoid segmentation faults - kjdon, 04/2003
|
---|
| 382 |
|
---|
| 383 | // if the next element is a '(' have a syntax error, return NULL
|
---|
| 384 | LexEl temp_el;
|
---|
| 385 | if (ParseLexEl(here, end, temp_el) && temp_el.lexType == OpenBracketE) {
|
---|
[6119] | 386 | cerr << "GSDLQueryParser: NEAR/WITHIN cannot be followed by a '('\n";
|
---|
[4210] | 387 | return NULL;
|
---|
| 388 | }
|
---|
| 389 | here = oldHere; // else backtrack
|
---|
| 390 |
|
---|
[3365] | 391 | proxNode = (ProxMatchQueryNode *)ParseTerm(here, end, defaultBoolCombine,
|
---|
| 392 | defaultStemMethod);
|
---|
[6119] | 393 | SetRangeValues(termNode, el.text, true);
|
---|
[3365] | 394 | proxNode->terms.push_back (termNode);
|
---|
| 395 | return proxNode;
|
---|
[4210] | 396 |
|
---|
| 397 | } else {
|
---|
[3365] | 398 | here = oldHere; // backtrack
|
---|
| 399 | proxNode->terms.push_back (termNode);
|
---|
| 400 | ParseProxModifiers(here, end, proxNode);
|
---|
| 401 | return proxNode;
|
---|
| 402 | }
|
---|
| 403 | } else if (el.lexType == QuoteE) {
|
---|
[4210] | 404 | bool error = false;
|
---|
| 405 | ParsePhrase (here, end, *proxNode, defaultStemMethod, error);
|
---|
| 406 | if (error) {
|
---|
| 407 | delete proxNode;
|
---|
| 408 | return NULL;
|
---|
| 409 | }
|
---|
[3365] | 410 | return proxNode;
|
---|
| 411 | }
|
---|
| 412 | else if (el.lexType == OpenSquareBracketE) {
|
---|
[4210] | 413 | bool error = false;
|
---|
[6082] | 414 | proxNode = ParseSquareBrackets (here, end, /*proxNode, */defaultStemMethod, error);
|
---|
[4210] | 415 | if (error) {
|
---|
| 416 | delete proxNode;
|
---|
| 417 | return NULL;
|
---|
| 418 | }
|
---|
[3365] | 419 | ParseProxModifiers (here, end, proxNode);
|
---|
| 420 | return proxNode;
|
---|
| 421 | }
|
---|
| 422 |
|
---|
| 423 | // not a term
|
---|
| 424 | here = oldHere;
|
---|
| 425 | delete proxNode;
|
---|
| 426 | return NULL;
|
---|
| 427 | }
|
---|
| 428 |
|
---|
| 429 |
|
---|
| 430 | static QueryNode *ParseExpression (UCArray::const_iterator &here,
|
---|
| 431 | UCArray::const_iterator end,
|
---|
| 432 | int defaultBoolCombine,
|
---|
| 433 | int defaultStemMethod) {
|
---|
| 434 | LexEl el;
|
---|
| 435 | QueryNode *curTree = NULL;
|
---|
| 436 | UCArray::const_iterator oldHere = here;
|
---|
| 437 | while (ParseLexEl (here, end, el)) {
|
---|
[4210] | 438 | if (el.lexType == CloseBracketE) {
|
---|
| 439 | // parsebracketexpression is waiting for the last bracket, so put it back
|
---|
| 440 | here = oldHere;
|
---|
| 441 | break;
|
---|
| 442 |
|
---|
| 443 | } else if (el.lexType == OpenSquareBracketE ||
|
---|
| 444 | el.lexType == OpenBracketE ||
|
---|
| 445 | el.lexType == TermE ||
|
---|
| 446 | el.lexType == QuoteE ||
|
---|
| 447 | el.lexType == IntegerE ) {
|
---|
| 448 |
|
---|
[3365] | 449 | // some type of term, back track and parse it
|
---|
| 450 | here = oldHere;
|
---|
[4210] | 451 |
|
---|
| 452 | // parse the term
|
---|
| 453 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 454 | defaultStemMethod);
|
---|
| 455 | if (newTerm == NULL) {
|
---|
| 456 | delete curTree;
|
---|
| 457 | return NULL;
|
---|
| 458 | }
|
---|
| 459 |
|
---|
[3365] | 460 | // if default==1, AND, else if==0, OR
|
---|
| 461 | if (defaultBoolCombine) {
|
---|
[4210] | 462 | curTree = AndAdd (curTree, newTerm);
|
---|
[3365] | 463 | }
|
---|
| 464 | else {
|
---|
[4210] | 465 | curTree = OrAdd (curTree, newTerm);
|
---|
[3365] | 466 | }
|
---|
[4210] | 467 |
|
---|
[3365] | 468 | } else if (el.lexType == AndOpE) {
|
---|
[4210] | 469 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 470 | defaultStemMethod);
|
---|
| 471 | if (newTerm == NULL) {
|
---|
| 472 | delete curTree;
|
---|
| 473 | return NULL;
|
---|
| 474 | }
|
---|
| 475 | curTree = AndAdd (curTree, newTerm);
|
---|
[3365] | 476 |
|
---|
| 477 | } else if (el.lexType == OrOpE) {
|
---|
[4210] | 478 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 479 | defaultStemMethod);
|
---|
| 480 | if (newTerm == NULL) {
|
---|
| 481 | delete curTree;
|
---|
| 482 | return NULL;
|
---|
| 483 | }
|
---|
| 484 | curTree = OrAdd (curTree, newTerm);
|
---|
[3365] | 485 |
|
---|
| 486 | } else if (el.lexType == NotOpE) {
|
---|
[4210] | 487 | QueryNode * newTerm = ParseTerm (here, end, defaultBoolCombine,
|
---|
| 488 | defaultStemMethod);
|
---|
| 489 | if (newTerm == NULL) {
|
---|
| 490 | delete curTree;
|
---|
| 491 | return NULL;
|
---|
| 492 | }
|
---|
| 493 | curTree = NotAdd (curTree, newTerm);
|
---|
[3365] | 494 |
|
---|
[5449] | 495 | } else if (el.lexType == UnknownE) {
|
---|
| 496 | // just ignore it
|
---|
[4210] | 497 | } else {
|
---|
| 498 |
|
---|
| 499 | // syntax error, return NUll
|
---|
| 500 | delete curTree;
|
---|
| 501 | return NULL;
|
---|
| 502 | }
|
---|
| 503 |
|
---|
[3365] | 504 | oldHere = here;
|
---|
| 505 | }
|
---|
| 506 |
|
---|
| 507 | return curTree;
|
---|
| 508 | }
|
---|
| 509 |
|
---|
| 510 | QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
|
---|
[12321] | 511 | int defaultStemMethod, int maxnumeric) {
|
---|
| 512 | if (4 < maxnumeric < 512) {
|
---|
| 513 | MAXNUMERIC = maxnumeric;
|
---|
| 514 | }
|
---|
[3365] | 515 | UCArray::const_iterator here = queryStr.begin();
|
---|
| 516 | UCArray::const_iterator end = queryStr.end();
|
---|
| 517 | return ParseExpression (here, end, defaultBoolCombine, defaultStemMethod);
|
---|
| 518 | }
|
---|