Context Navigation

source: indexers/trunk/mgpp/text/QueryLex.cpp@ 16528

Last change on this file since 16528 was 8692, checked in by kjdon, 19 years ago
Added the changes from Emanuel Dejanu (Simple Words) - mostly efficiency changes. For example, changing i++ to ++i, delete xxx to delete []xxx, some stuff to do with UCArrays...
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 4.5 KB

Line
1	/**************************************************************************
2	*
3	* QueryLex.cpp -- Lexical analyser for a simple query language
4	* Copyright (C) 2000 Rodger McNab
5	*
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License as published by
8	* the Free Software Foundation; either version 2 of the License, or
9	* (at your option) any later version.
10	*
11	* This program is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	* GNU General Public License for more details.
15	*
16	* You should have received a copy of the GNU General Public License
17	* along with this program; if not, write to the Free Software
18	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19	*
20	**************************************************************************/
21
22	#include "QueryLex.h"
23	#include "unitool.h"
24	#include "words.h"
25
26	inline void AddNChar (UCArray::const_iterator &here,
27	UCArray &text,
28	int len) {
29	if (text.capacity() < text.size() + len + 1) {
30	text.reserve(text.size + len + 1);
31	}
32	while (len > 0) {
33	text.push_back (*here++);
34	--len;
35	}
36	}
37
38	static bool ParseInteger (UCArray::const_iterator &here,
39	UCArray::const_iterator end,
40	LexEl &el) {
41	el.Clear();
42
43	// this version of end is used in unitool
44	UCArray::const_iterator endMinus1 = end-1;
45
46	int charLen;
47	unsigned short c; // one character lookahead
48	charLen = parse_utf8_char (here, endMinus1, &c);
49
50	// check for positive or negative
51	bool neg = false;
52	if (c == '+') {
53	AddNChar (here, el.text, charLen);
54	charLen = parse_utf8_char (here, endMinus1, &c);
55	} else if (c == '-') {
56	neg = true;
57	AddNChar (here, el.text, charLen);
58	charLen = parse_utf8_char (here, endMinus1, &c);
59	}
60
61	// read in number part
62	el.num = 0;
63	el.lexType = IntegerE;
64	while (c >= '0' && c <= '9') {
65	el.num = el.num*10 + c - '0';
66	AddNChar (here, el.text, charLen);
67	charLen = parse_utf8_char (here, endMinus1, &c);
68	}
69
70	if (neg) el.num *= -1;
71
72	return (!el.text.empty());
73	}
74
75	static bool ParseTerm (UCArray::const_iterator &here,
76	UCArray::const_iterator end,
77	UCArray &text) {
78	UCArray::const_iterator endMinus1 = end-1;
79	here = ParseIndexWord (here, endMinus1, text);
80	return !text.empty();
81	}
82
83
84	bool ParseLexEl (UCArray::const_iterator &here,
85	UCArray::const_iterator end,
86	LexEl &el) {
87	el.Clear();
88
89	// strange things can happen if here == end == 0
90	if (here == end) return false;
91
92	// this version of end is used in unitool
93	UCArray::const_iterator endMinus1 = end-1;
94
95	// ignore all white space
96	int charLen;
97	unsigned short c; // one character lookahead
98	charLen = parse_utf8_char (here, endMinus1, &c);
99	while (here != end && is_unicode_space (c)) {
100	here += charLen;
101	charLen = parse_utf8_char (here, endMinus1, &c);
102	}
103	if (here == end) return false;
104
105	if (c == '(') {
106	el.lexType = OpenBracketE;
107	AddNChar (here, el.text, charLen);
108	return true;
109
110	} else if (c == ')') {
111	el.lexType = CloseBracketE;
112	AddNChar (here, el.text, charLen);
113	return true;
114
115	} else if (c == '\"') {
116	el.lexType = QuoteE;
117	AddNChar (here, el.text, charLen);
118	return true;
119
120	} else if (c == '#') {
121	el.lexType = TermWeightE;
122	AddNChar (here, el.text, charLen);
123	return true;
124
125	} else if (c == '$') {
126	el.lexType = StemMethodE;
127	AddNChar (here, el.text, charLen);
128	return true;
129
130	} else if (c == '^') {
131	el.lexType = RangeE;
132	AddNChar (here, el.text, charLen);
133	return true;
134
135	} else if (c == '@') {
136	el.lexType = AtE;
137	AddNChar (here, el.text, charLen);
138	return true;
139
140	} else if (c == ':') {
141	el.lexType = TagE;
142	AddNChar (here, el.text, charLen);
143	return true;
144
145	} else if (c == '+' \|\| c == '-' \|\|
146	(c >= '0' && c <= '9')) {
147	return ParseInteger (here, end, el);
148	}
149
150	// assume it is a term of some sort
151	if (!ParseTerm (here, end, el.text)) return false;
152
153	//UCArray AND; SetCStr (AND, "AND");
154	//if (el.text == AND) {
155	if (UCArrayCStrEquals(el.text, "AND")) {
156	el.lexType = AndOpE;
157	return true;
158	}
159	//UCArray OR; SetCStr (OR, "OR");
160	//if (el.text == OR) {
161	if (UCArrayCStrEquals(el.text, "OR")) {
162	el.lexType = OrOpE;
163	return true;
164	}
165	//UCArray NOT; SetCStr (NOT, "NOT");
166	//if (el.text == NOT) {
167	if (UCArrayCStrEquals(el.text, "NOT")) {
168	el.lexType = NotOpE;
169	return true;
170	}
171
172	el.lexType = TermE;
173	return true;
174	}
175

Note: See TracBrowser for help on using the repository browser.

Download in other formats: