/************************************************************************** * * QueryLex.cpp -- Lexical analyser for a simple query language * Copyright (C) 2000 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: QueryLex.cpp 855 2000-01-14 02:17:52Z sjboddie $ * **************************************************************************/ #include "QueryLex.h" #include "unitool.h" #include "words.h" inline void AddNChar (UCArray::const_iterator &here, UCArray &text, int len) { while (len > 0) { text.push_back (*here++); len--; } } static bool ParseInteger (UCArray::const_iterator &here, UCArray::const_iterator end, LexEl &el) { el.Clear(); // this version of end is used in unitool UCArray::const_iterator endMinus1 = end-1; int charLen; unsigned short c; // one character lookahead charLen = parse_utf8_char (here, endMinus1, &c); // check for positive or negative bool neg = false; if (c == '+') { AddNChar (here, el.text, charLen); charLen = parse_utf8_char (here, endMinus1, &c); } else if (c == '-') { neg = true; AddNChar (here, el.text, charLen); charLen = parse_utf8_char (here, endMinus1, &c); } // read in number part el.num = 0; el.lexType = IntegerE; while (c >= '0' && c <= '9') { el.num = el.num*10 + c - '0'; AddNChar (here, el.text, charLen); charLen = parse_utf8_char (here, endMinus1, &c); } if (neg) el.num *= -1; return (!el.text.empty()); } static bool ParseTerm (UCArray::const_iterator &here, UCArray::const_iterator end, UCArray &text) { UCArray::const_iterator endMinus1 = end-1; here = ParseIndexWord (here, endMinus1, text); return !text.empty(); } bool ParseLexEl (UCArray::const_iterator &here, UCArray::const_iterator end, LexEl &el) { el.Clear(); // strange things can happen if here == end == 0 if (here == end) return false; // this version of end is used in unitool UCArray::const_iterator endMinus1 = end-1; // ignore all white space int charLen; unsigned short c; // one character lookahead charLen = parse_utf8_char (here, endMinus1, &c); while (here != end && is_unicode_space (c)) { here += charLen; charLen = parse_utf8_char (here, endMinus1, &c); } if (here == end) return false; if (c == '(') { el.lexType = OpenBracketE; AddNChar (here, el.text, charLen); return true; } else if (c == ')') { el.lexType = CloseBracketE; AddNChar (here, el.text, charLen); return true; } else if (c == '\"') { el.lexType = QuoteE; AddNChar (here, el.text, charLen); return true; } else if (c == '#') { el.lexType = TermWeightE; AddNChar (here, el.text, charLen); return true; } else if (c == '$') { el.lexType = StemMethodE; AddNChar (here, el.text, charLen); return true; } else if (c == '^') { el.lexType = RangeE; AddNChar (here, el.text, charLen); return true; } else if (c == '@') { el.lexType = AtE; AddNChar (here, el.text, charLen); return true; } else if (c == ':') { el.lexType = TagE; AddNChar (here, el.text, charLen); return true; } else if (c == '+' || c == '-' || (c >= '0' && c <= '9')) { return ParseInteger (here, end, el); } // assume it is a term of some sort if (!ParseTerm (here, end, el.text)) return false; UCArray AND; SetCStr (AND, "AND"); if (el.text == AND) { el.lexType = AndOpE; return true; } UCArray OR; SetCStr (OR, "OR"); if (el.text == OR) { el.lexType = OrOpE; return true; } UCArray NOT; SetCStr (NOT, "NOT"); if (el.text == NOT) { el.lexType = NotOpE; return true; } el.lexType = TermE; return true; }