/************************************************************************** * * QueryParser.cpp -- Query parser for a simple query language * Copyright (C) 2000 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #include "GSDLQueryParser.h" #include "GSDLQueryLex.h" static QueryNode *ParseExpression (UCArray::const_iterator &here, UCArray::const_iterator end, int defaultBoolCOmbine, int defaultStemMethod); static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; AndQueryNode *andNode = new AndQueryNode; andNode->leftNode = t1; andNode->rightNode = t2; return andNode; } static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; OrQueryNode *orNode = new OrQueryNode; orNode->leftNode = t1; orNode->rightNode = t2; return orNode; } static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; NotQueryNode *notNode = new NotQueryNode; notNode->queryNode = t1; notNode->notNode = t2; return notNode; } // expects the opening bracket to have already been parsed // and discarded static QueryNode *ParseBracketExpression (UCArray::const_iterator &here, UCArray::const_iterator end, int defaultBoolCombine, int defaultStemMethod) { // get everything in the expression QueryNode *curTree = ParseExpression (here, end, defaultBoolCombine, defaultStemMethod); // gobble up tokens until a closing bracket is found // or the end of the string LexEl el; while (ParseLexEl (here, end, el)) { if (el.lexType == CloseBracketE) break; } return curTree; } static int ParseInt (UCArray::const_iterator &here, UCArray::const_iterator end) { LexEl el; UCArray::const_iterator oldHere = here; if (ParseLexEl (here, end, el) && el.lexType == IntegerE) return el.num; here = oldHere; // not an integer return 0; } // default is within 20 words static void SetRangeValues (TermNode &termNode, UCArray &nearby) { UCArray NEARBY; SetCStr(NEARBY, "NEAR"); if (nearby == NEARBY) { // no modifier termNode.startRange = -21; termNode.endRange = 20; } else { // extract number UCArray::const_iterator here = nearby.begin()+4; UCArray::const_iterator end = nearby.end(); int size=0; while (here != end) { size = size*10 + (*here-'0'); here++; } cerr <<"size= "<tagNodePtr = new TagNode; proxNode->tagNodePtr->tagName = el.text; } else { // error in tag here = oldHere; } } // TagE // add in other cases here else { // no modifiers here = oldHere; break; } oldHere = here; }//while } // expects starting brackets to have been parsed static void ParseSquareBrackets(UCArray::const_iterator &here, UCArray::const_iterator end, ProxMatchQueryNode *proxNode, int defaultStemMethod) { LexEl el; bool phrase=false; bool first=true; while (ParseLexEl (here, end, el)) { if (el.lexType == TermE || el.lexType == IntegerE) { TermNode termNode; termNode.term = el.text; ParseTermModifiers (here, end, termNode, defaultStemMethod); if (phrase) { if (first) first=false; else { termNode.startRange = -2; termNode.endRange = -1; } } proxNode->terms.push_back(termNode); } else if (el.lexType == CloseSquareBracketE) { break; } else if (el.lexType == AndOpE) { // ignore, the words are AND'ed anyway cerr << "and inside []\n"; } else if (el.lexType == OrOpE) { cerr << "or inside []\n"; } else if (el.lexType == QuoteE) { // phrase inside square brackets if (phrase) phrase=false; else phrase=true; } else { //error // just ignore for now cerr <<"bad syntax inside []\n"; } } // while } // expects the starting quote to have been parsed // and discarded // now phrases use the case and stem preference options // ie can search for a phrase ignoring case static void ParsePhrase (UCArray::const_iterator &here, UCArray::const_iterator end, ProxMatchQueryNode &proxNode, int defaultStemMethod) { LexEl el; bool first = true; while (ParseLexEl (here, end, el)) { if (el.lexType == TermE || el.lexType == IntegerE) { TermNode termNode; termNode.term = el.text; //termNode.stemMethod = defaultStemMethod; ParseTermModifiers (here, end, termNode, defaultStemMethod); if (first) { first = false; } else { termNode.startRange = -2; termNode.endRange = -1; } proxNode.terms.push_back (termNode); } else if (el.lexType == QuoteE) { break; } else { // error break; } } } static QueryNode *ParseTerm (UCArray::const_iterator &here, UCArray::const_iterator end, int defaultBoolCombine, int defaultStemMethod) { LexEl el; UCArray::const_iterator oldHere = here; if (!ParseLexEl (here, end, el)) return NULL; if (el.lexType == OpenBracketE) return ParseBracketExpression (here, end, defaultBoolCombine, defaultStemMethod); ProxMatchQueryNode *proxNode = new ProxMatchQueryNode; if (el.lexType == TermE || el.lexType == IntegerE) { TermNode termNode; termNode.term = el.text; ParseTermModifiers (here, end, termNode, defaultStemMethod); oldHere = here; // dont backtrack past here if (ParseLexEl(here, end, el) && el.lexType == NearOpE) { delete proxNode; proxNode = (ProxMatchQueryNode *)ParseTerm(here, end, defaultBoolCombine, defaultStemMethod); SetRangeValues(termNode, el.text); proxNode->terms.push_back (termNode); return proxNode; } else { here = oldHere; // backtrack proxNode->terms.push_back (termNode); ParseProxModifiers(here, end, proxNode); return proxNode; } } else if (el.lexType == QuoteE) { ParsePhrase (here, end, *proxNode, defaultStemMethod); return proxNode; } else if (el.lexType == OpenSquareBracketE) { ParseSquareBrackets (here, end, proxNode, defaultStemMethod); ParseProxModifiers (here, end, proxNode); return proxNode; } // not a term here = oldHere; delete proxNode; return NULL; } static QueryNode *ParseExpression (UCArray::const_iterator &here, UCArray::const_iterator end, int defaultBoolCombine, int defaultStemMethod) { LexEl el; QueryNode *curTree = NULL; UCArray::const_iterator oldHere = here; while (ParseLexEl (here, end, el)) { if (el.lexType == OpenSquareBracketE || el.lexType == OpenBracketE || el.lexType == TermE || el.lexType == QuoteE || el.lexType == IntegerE ) { // el.lexType == TagE) { //tag at end of term now // some type of term, back track and parse it here = oldHere; // if default==1, AND, else if==0, OR if (defaultBoolCombine) { curTree = AndAdd (curTree, ParseTerm (here, end, defaultBoolCombine, defaultStemMethod)); } else { curTree = OrAdd (curTree, ParseTerm (here, end, defaultBoolCombine, defaultStemMethod)); } } else if (el.lexType == AndOpE) { curTree = AndAdd (curTree, ParseTerm (here, end, defaultBoolCombine, defaultStemMethod)); } else if (el.lexType == OrOpE) { curTree = OrAdd (curTree, ParseTerm (here, end, defaultBoolCombine, defaultStemMethod)); } else if (el.lexType == NotOpE) { curTree = NotAdd (curTree, ParseTerm (here, end, defaultBoolCombine, defaultStemMethod)); } else if (el.lexType == CloseBracketE) { // parsebracketexpression is waiting for the last bracket, so put it back here = oldHere; break; } else break; oldHere = here; } return curTree; } QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine, int defaultStemMethod) { UCArray::const_iterator here = queryStr.begin(); UCArray::const_iterator end = queryStr.end(); return ParseExpression (here, end, defaultBoolCombine, defaultStemMethod); }