/************************************************************************** * * QueryParser.cpp -- Query parser for a simple query language * Copyright (C) 2000 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #include "QueryParser.h" #include "QueryLex.h" static QueryNode *ParseExpression (UCArray::const_iterator &here, UCArray::const_iterator end); static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; AndQueryNode *andNode = new AndQueryNode; andNode->leftNode = t1; andNode->rightNode = t2; return andNode; } static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; OrQueryNode *orNode = new OrQueryNode; orNode->leftNode = t1; orNode->rightNode = t2; return orNode; } static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) { if (t1 == NULL) return t2; if (t2 == NULL) return t1; NotQueryNode *notNode = new NotQueryNode; notNode->queryNode = t1; notNode->notNode = t2; return notNode; } // expects the opening bracket to have already been parsed // and discarded static QueryNode *ParseBracketExpression (UCArray::const_iterator &here, UCArray::const_iterator end) { // get everything in the expression QueryNode *curTree = ParseExpression (here, end); // gobble up tokens until a closing bracket is found // or the end of the string LexEl el; while (ParseLexEl (here, end, el)) { if (el.lexType == CloseBracketE) break; } return curTree; } static int ParseInt (UCArray::const_iterator &here, UCArray::const_iterator end) { LexEl el; UCArray::const_iterator oldHere = here; if (ParseLexEl (here, end, el) && el.lexType == IntegerE) return el.num; here = oldHere; // not an integer return 0; } static void ParseTermModifiers (UCArray::const_iterator &here, UCArray::const_iterator end, TermNode &termNode) { LexEl el; UCArray::const_iterator oldHere = here; while (ParseLexEl (here, end, el)) { if (el.lexType == TermWeightE) { termNode.termWeight = ParseInt (here, end); } else if (el.lexType == StemMethodE) { termNode.stemMethod = ParseInt (here, end); } else if (el.lexType == RangeE) { termNode.startRange = ParseInt (here, end); termNode.endRange = ParseInt (here, end); } else if (el.lexType == AtE) { termNode.startRange = termNode.endRange = ParseInt (here, end); } else { // no term modifiers here = oldHere; break; } oldHere = here; } } // expects the starting quote to have been parsed // and discarded static void ParsePhrase (UCArray::const_iterator &here, UCArray::const_iterator end, ProxMatchQueryNode &proxNode) { LexEl el; while (ParseLexEl (here, end, el)) { if (el.lexType == TermE || el.lexType == IntegerE) { TermNode termNode; termNode.term = el.text; ParseTermModifiers (here, end, termNode); proxNode.terms.push_back (termNode); } else if (el.lexType == QuoteE) { break; } else { // error break; } } } static QueryNode *ParseTerm (UCArray::const_iterator &here, UCArray::const_iterator end) { LexEl el; UCArray::const_iterator oldHere = here; if (!ParseLexEl (here, end, el)) return NULL; if (el.lexType == OpenBracketE) return ParseBracketExpression (here, end); ProxMatchQueryNode *proxNode = new ProxMatchQueryNode; // check for a tag if (el.lexType == TagE) { oldHere = here; // don't backtrack past here if (!ParseLexEl (here, end, el)) return NULL; if (el.lexType == TermE) { proxNode->tagNodePtr = new TagNode; proxNode->tagNodePtr->tagName = el.text; if (!ParseLexEl (here, end, el)) return NULL; } } if (el.lexType == TermE || el.lexType == IntegerE) { TermNode termNode; termNode.term = el.text; ParseTermModifiers (here, end, termNode); proxNode->terms.push_back (termNode); return proxNode; } else if (el.lexType == QuoteE) { ParsePhrase (here, end, *proxNode); return proxNode; } // not a term here = oldHere; delete proxNode; return NULL; } static QueryNode *ParseExpression (UCArray::const_iterator &here, UCArray::const_iterator end) { LexEl el; QueryNode *curTree = NULL; UCArray::const_iterator oldHere = here; while (ParseLexEl (here, end, el)) { if (el.lexType == OpenBracketE || el.lexType == TermE || el.lexType == QuoteE || el.lexType == IntegerE || el.lexType == TagE) { // some type of term, back track and parse it here = oldHere; curTree = OrAdd (curTree, ParseTerm (here, end)); } else if (el.lexType == AndOpE) { curTree = AndAdd (curTree, ParseTerm (here, end)); } else if (el.lexType == OrOpE) { curTree = OrAdd (curTree, ParseTerm (here, end)); } else if (el.lexType == NotOpE) { curTree = NotAdd (curTree, ParseTerm (here, end)); } else break; oldHere = here; } return curTree; } QueryNode *ParseQuery (const UCArray &queryStr) { UCArray::const_iterator here = queryStr.begin(); UCArray::const_iterator end = queryStr.end(); return ParseExpression (here, end); }