Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: branches/New_Config_Format-branch/gsdl/src/mgpp/text/GSDLQueryLex.cpp@ 1278

Last change on this file since 1278 was 1278, checked in by (none), 24 years ago
This commit was manufactured by cvs2svn to create branch 'New_Config_Format-branch'.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 5.0 KB

Line
1	/**************************************************************************
2	*
3	* GSDLQueryLex.cpp -- Lexical analyser for a simple query language
4	* Copyright (C) 2000 Rodger McNab
5	*
6	* This program is free software; you can redistribute it and/or modify
7	* it under the terms of the GNU General Public License as published by
8	* the Free Software Foundation; either version 2 of the License, or
9	* (at your option) any later version.
10	*
11	* This program is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	* GNU General Public License for more details.
15	*
16	* You should have received a copy of the GNU General Public License
17	* along with this program; if not, write to the Free Software
18	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19	*
20	* $Id: GSDLQueryLex.cpp 1278 2000-07-12 22:03:38Z None $
21	*
22	**************************************************************************/
23
24	#include "GSDLQueryLex.h"
25	#include "unitool.h"
26	#include "words.h"
27
28	inline void AddNChar (UCArray::const_iterator &here,
29	UCArray &text,
30	int len) {
31	while (len > 0) {
32	text.push_back (*here++);
33	len--;
34	}
35	}
36
37	static bool ParseInteger (UCArray::const_iterator &here,
38	UCArray::const_iterator end,
39	LexEl &el) {
40	el.Clear();
41
42	// this version of end is used in unitool
43	UCArray::const_iterator endMinus1 = end-1;
44
45	int charLen;
46	unsigned short c; // one character lookahead
47	charLen = parse_utf8_char (here, endMinus1, &c);
48
49	// check for positive or negative
50	bool neg = false;
51	if (c == '+') {
52	AddNChar (here, el.text, charLen);
53	charLen = parse_utf8_char (here, endMinus1, &c);
54	} else if (c == '-') {
55	neg = true;
56	AddNChar (here, el.text, charLen);
57	charLen = parse_utf8_char (here, endMinus1, &c);
58	}
59
60	// read in number part
61	el.num = 0;
62	el.lexType = IntegerE;
63	while (c >= '0' && c <= '9') {
64	el.num = el.num*10 + c - '0';
65	AddNChar (here, el.text, charLen);
66	charLen = parse_utf8_char (here, endMinus1, &c);
67	}
68
69	if (neg) el.num *= -1;
70
71	return (!el.text.empty());
72	}
73
74	static bool ParseTerm (UCArray::const_iterator &here,
75	UCArray::const_iterator end,
76	UCArray &text) {
77	UCArray::const_iterator endMinus1 = end-1;
78	here = ParseIndexWord (here, endMinus1, text);
79	return !text.empty();
80	}
81
82
83	bool ParseLexEl (UCArray::const_iterator &here,
84	UCArray::const_iterator end,
85	LexEl &el) {
86	el.Clear();
87
88	// strange things can happen if here == end == 0
89	if (here == end) return false;
90
91	// this version of end is used in unitool
92	UCArray::const_iterator endMinus1 = end-1;
93
94	// ignore all white space
95	int charLen;
96	unsigned short c; // one character lookahead
97	charLen = parse_utf8_char (here, endMinus1, &c);
98	while (here != end && is_unicode_space (c)) {
99	here += charLen;
100	charLen = parse_utf8_char (here, endMinus1, &c);
101	}
102	if (here == end) return false;
103
104	if (c == '(') {
105	el.lexType = OpenBracketE;
106	AddNChar (here, el.text, charLen);
107	return true;
108
109	} else if (c == ')') {
110	el.lexType = CloseBracketE;
111	AddNChar (here, el.text, charLen);
112	return true;
113
114	} else if (c =='[') {
115	el.lexType = OpenSquareBracketE;
116	AddNChar (here, el.text, charLen);
117	return true;
118
119	} else if (c ==']') {
120	el.lexType = CloseSquareBracketE;
121	AddNChar (here, el.text, charLen);
122	return true;
123
124	} else if (c == '\"') {
125	el.lexType = QuoteE;
126	AddNChar (here, el.text, charLen);
127	return true;
128
129	} else if (c == '/') {
130	el.lexType = TermWeightE;
131	AddNChar (here, el.text, charLen);
132	return true;
133
134	} else if (c == '#') {
135	el.lexType = StemMethodE;
136	AddNChar (here, el.text, charLen);
137	return true;
138
139	} else if (c == '^') {
140	el.lexType = RangeE;
141	AddNChar (here, el.text, charLen);
142	return true;
143
144	} else if (c == '@') {
145	el.lexType = AtE;
146	AddNChar (here, el.text, charLen);
147	return true;
148
149	} else if (c == ':') {
150	el.lexType = TagE;
151	AddNChar (here, el.text, charLen);
152	return true;
153
154	} else if (c=='&') {
155	el.lexType = AndOpE;
156	AddNChar (here, el.text, charLen);
157	return true;
158
159	} else if (c == '\|') {
160	el.lexType = OrOpE;
161	AddNChar (here, el.text, charLen);
162	return true;
163
164	} else if (c == '!') {
165	el.lexType = NotOpE;
166	AddNChar (here, el.text, charLen);
167	return true;
168
169	} else if (c == '+' \|\| c == '-' \|\|
170	(c >= '0' && c <= '9')) {
171	return ParseInteger (here, end, el);
172	}
173
174	// assume it is a term of some sort
175	if (!ParseTerm (here, end, el.text)) return false;
176
177	UCArray AND; SetCStr (AND, "AND");
178	if (el.text == AND) {
179	el.lexType = AndOpE;
180	return true;
181	}
182	UCArray OR; SetCStr (OR, "OR");
183	if (el.text == OR) {
184	el.lexType = OrOpE;
185	return true;
186	}
187	UCArray NOT; SetCStr (NOT, "NOT");
188	if (el.text == NOT) {
189	el.lexType = NotOpE;
190	return true;
191	}
192	UCArray NEAR; SetCStr (NEAR, "NEAR");
193	if (PrefixLen(el.text, NEAR)==4) {
194	el.lexType = NearOpE;
195	return true;
196	}
197	el.lexType = TermE;
198	return true;
199	}
200

Note: See TracBrowser for help on using the repository browser.

Download in other formats: