Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/gsdl/src/mgpp/text/QueryLex.cpp@ 879

Last change on this file since 879 was 855, checked in by sjboddie, 24 years ago
Rodgers new C++ mg
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 4.3 KB

Rev	Line
[855]	1	/**************************************************************************
	2	*
	3	* QueryLex.cpp -- Lexical analyser for a simple query language
	4	* Copyright (C) 2000 Rodger McNab
	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU General Public License as published by
	8	* the Free Software Foundation; either version 2 of the License, or
	9	* (at your option) any later version.
	10	*
	11	* This program is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	* GNU General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU General Public License
	17	* along with this program; if not, write to the Free Software
	18	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	19	*
	20	* $Id: QueryLex.cpp 855 2000-01-14 02:17:52Z sjboddie $
	21	*
	22	**************************************************************************/
	23
	24	#include "QueryLex.h"
	25	#include "unitool.h"
	26	#include "words.h"
	27
	28	inline void AddNChar (UCArray::const_iterator &here,
	29	UCArray &text,
	30	int len) {
	31	while (len > 0) {
	32	text.push_back (*here++);
	33	len--;
	34	}
	35	}
	36
	37	static bool ParseInteger (UCArray::const_iterator &here,
	38	UCArray::const_iterator end,
	39	LexEl &el) {
	40	el.Clear();
	41
	42	// this version of end is used in unitool
	43	UCArray::const_iterator endMinus1 = end-1;
	44
	45	int charLen;
	46	unsigned short c; // one character lookahead
	47	charLen = parse_utf8_char (here, endMinus1, &c);
	48
	49	// check for positive or negative
	50	bool neg = false;
	51	if (c == '+') {
	52	AddNChar (here, el.text, charLen);
	53	charLen = parse_utf8_char (here, endMinus1, &c);
	54	} else if (c == '-') {
	55	neg = true;
	56	AddNChar (here, el.text, charLen);
	57	charLen = parse_utf8_char (here, endMinus1, &c);
	58	}
	59
	60	// read in number part
	61	el.num = 0;
	62	el.lexType = IntegerE;
	63	while (c >= '0' && c <= '9') {
	64	el.num = el.num*10 + c - '0';
	65	AddNChar (here, el.text, charLen);
	66	charLen = parse_utf8_char (here, endMinus1, &c);
	67	}
	68
	69	if (neg) el.num *= -1;
	70
	71	return (!el.text.empty());
	72	}
	73
	74	static bool ParseTerm (UCArray::const_iterator &here,
	75	UCArray::const_iterator end,
	76	UCArray &text) {
	77	UCArray::const_iterator endMinus1 = end-1;
	78	here = ParseIndexWord (here, endMinus1, text);
	79	return !text.empty();
	80	}
	81
	82
	83	bool ParseLexEl (UCArray::const_iterator &here,
	84	UCArray::const_iterator end,
	85	LexEl &el) {
	86	el.Clear();
	87
	88	// strange things can happen if here == end == 0
	89	if (here == end) return false;
	90
	91	// this version of end is used in unitool
	92	UCArray::const_iterator endMinus1 = end-1;
	93
	94	// ignore all white space
	95	int charLen;
	96	unsigned short c; // one character lookahead
	97	charLen = parse_utf8_char (here, endMinus1, &c);
	98	while (here != end && is_unicode_space (c)) {
	99	here += charLen;
	100	charLen = parse_utf8_char (here, endMinus1, &c);
	101	}
	102	if (here == end) return false;
	103
	104	if (c == '(') {
	105	el.lexType = OpenBracketE;
	106	AddNChar (here, el.text, charLen);
	107	return true;
	108
	109	} else if (c == ')') {
	110	el.lexType = CloseBracketE;
	111	AddNChar (here, el.text, charLen);
	112	return true;
	113
	114	} else if (c == '\"') {
	115	el.lexType = QuoteE;
	116	AddNChar (here, el.text, charLen);
	117	return true;
	118
	119	} else if (c == '#') {
	120	el.lexType = TermWeightE;
	121	AddNChar (here, el.text, charLen);
	122	return true;
	123
	124	} else if (c == '$') {
	125	el.lexType = StemMethodE;
	126	AddNChar (here, el.text, charLen);
	127	return true;
	128
	129	} else if (c == '^') {
	130	el.lexType = RangeE;
	131	AddNChar (here, el.text, charLen);
	132	return true;
	133
	134	} else if (c == '@') {
	135	el.lexType = AtE;
	136	AddNChar (here, el.text, charLen);
	137	return true;
	138
	139	} else if (c == ':') {
	140	el.lexType = TagE;
	141	AddNChar (here, el.text, charLen);
	142	return true;
	143
	144	} else if (c == '+' \|\| c == '-' \|\|
	145	(c >= '0' && c <= '9')) {
	146	return ParseInteger (here, end, el);
	147	}
	148
	149	// assume it is a term of some sort
	150	if (!ParseTerm (here, end, el.text)) return false;
	151
	152	UCArray AND; SetCStr (AND, "AND");
	153	if (el.text == AND) {
	154	el.lexType = AndOpE;
	155	return true;
	156	}
	157	UCArray OR; SetCStr (OR, "OR");
	158	if (el.text == OR) {
	159	el.lexType = OrOpE;
	160	return true;
	161	}
	162	UCArray NOT; SetCStr (NOT, "NOT");
	163	if (el.text == NOT) {
	164	el.lexType = NotOpE;
	165	return true;
	166	}
	167
	168	el.lexType = TermE;
	169	return true;
	170	}
	171

Note: See TracBrowser for help on using the repository browser.

Download in other formats: