source: trunk/gsdl/src/mgpp/text/GSDLQueryParser.cpp@ 1127

Last change on this file since 1127 was 1127, checked in by kjm18, 24 years ago

new parser and lex modules for GSDL syntax (designed for my bibliosystem)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.7 KB
Line 
1/**************************************************************************
2 *
3 * QueryParser.cpp -- Query parser for a simple query language
4 * Copyright (C) 2000 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: GSDLQueryParser.cpp 1127 2000-04-18 04:09:11Z kjm18 $
21 *
22 **************************************************************************/
23
24#include "GSDLQueryParser.h"
25#include "GSDLQueryLex.h"
26
27
28static QueryNode *ParseExpression (UCArray::const_iterator &here,
29 UCArray::const_iterator end);
30
31static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) {
32 if (t1 == NULL) return t2;
33 if (t2 == NULL) return t1;
34
35 AndQueryNode *andNode = new AndQueryNode;
36 andNode->leftNode = t1;
37 andNode->rightNode = t2;
38 return andNode;
39}
40
41static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) {
42 if (t1 == NULL) return t2;
43 if (t2 == NULL) return t1;
44
45 OrQueryNode *orNode = new OrQueryNode;
46 orNode->leftNode = t1;
47 orNode->rightNode = t2;
48 return orNode;
49}
50
51static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) {
52 if (t1 == NULL) return t2;
53 if (t2 == NULL) return t1;
54
55 NotQueryNode *notNode = new NotQueryNode;
56 notNode->queryNode = t1;
57 notNode->notNode = t2;
58 return notNode;
59}
60
61// expects the opening bracket to have already been parsed
62// and discarded
63static QueryNode *ParseBracketExpression (UCArray::const_iterator &here,
64 UCArray::const_iterator end) {
65 // get everything in the expression
66 QueryNode *curTree = ParseExpression (here, end);
67
68 // gobble up tokens until a closing bracket is found
69 // or the end of the string
70 LexEl el;
71 while (ParseLexEl (here, end, el)) {
72 if (el.lexType == CloseBracketE) break;
73 }
74
75 return curTree;
76}
77
78static int ParseInt (UCArray::const_iterator &here,
79 UCArray::const_iterator end) {
80 LexEl el;
81 UCArray::const_iterator oldHere = here;
82 if (ParseLexEl (here, end, el) && el.lexType == IntegerE)
83 return el.num;
84
85 here = oldHere; // not an integer
86 return 0;
87}
88
89// default is within 20 words
90static void SetRangeValues (TermNode &termNode,
91 UCArray &near) {
92 UCArray NEAR; SetCStr(NEAR, "NEAR");
93 if (near == NEAR) { // no modifier
94 termNode.startRange = -20;
95 termNode.endRange = 20;
96 }
97 else { // extract number
98 UCArray::const_iterator here = near.begin()+4;
99 UCArray::const_iterator end = near.end();
100 int size=0;
101 while (here != end) {
102 size = size*10 + (*here-'0');
103 here++;
104 }
105 cerr <<"size= "<<size<<endl;
106 termNode.startRange = -1 * size;
107 termNode.endRange = size;
108 }
109
110
111}
112static unsigned long GetStemMethod(LexEl &el) {
113 // here expect el to contain some of c,s,i,u
114 // stem method 0 = i,u 00
115 // stem method 1 = c,u 01
116 // stem method 2 = i, s 10
117 // stem method 3 = c,s 11
118
119 unsigned long stem = 4;
120 UCArray::const_iterator here = el.text.begin();
121 UCArray::const_iterator end = el.text.end();
122
123 unsigned char c1 = *here;
124 if (c1 == 'c'| c1 == 'i' | c1 == 'u' | c1 == 's')
125 stem = 0;
126 else return 4; // incorrect format
127
128 here++;
129 unsigned char c2 = 'a';
130 if (here !=end) c2 = *here;
131
132 if (c1 == 'c'|| c2=='c') stem |= 1;
133 if (c1 == 's'|| c2 == 's') stem |= 2;
134 cerr << "stem is "<<stem<<endl;
135 return stem;
136}
137
138
139static void ParseTermModifiers (UCArray::const_iterator &here,
140 UCArray::const_iterator end,
141 TermNode &termNode) {
142 LexEl el;
143 UCArray::const_iterator oldHere = here;
144 while (ParseLexEl (here, end, el)) {
145 if (el.lexType == TermWeightE) {
146 termNode.termWeight = ParseInt (here, end);
147
148 } else if (el.lexType == StemMethodE) {
149 // termNode.stemMethod = ParseInt (here, end);
150 cerr << "in stem method bit"<<endl;
151 oldHere = here;
152 LexEl stem;
153 if (ParseLexEl (here, end, stem) && stem.lexType == TermE) {
154 termNode.stemMethod = GetStemMethod(stem);
155 if (termNode.stemMethod == 4) { // error so backtrack
156 here = oldHere;
157 }
158 }else here = oldHere; //ignore - wrong syntax
159
160 } else if (el.lexType == RangeE) {
161 termNode.startRange = ParseInt (here, end);
162 termNode.endRange = ParseInt (here, end);
163
164 } else if (el.lexType == AtE) {
165 termNode.startRange = termNode.endRange = ParseInt (here, end);
166
167 } else {
168 // no term modifiers
169 here = oldHere;
170 break;
171 }
172
173 oldHere = here;
174 }
175}
176
177static void ParseProxModifiers (UCArray::const_iterator &here,
178 UCArray::const_iterator end,
179 ProxMatchQueryNode *proxNode) {
180 // so far only have one - the tag stuff
181 LexEl el;
182 UCArray::const_iterator oldHere = here;
183 while (ParseLexEl (here, end, el)) {
184 if (el.lexType == TagE) {
185 oldHere = here; // don't backtrack past here
186 if (ParseLexEl (here, end, el) && el.lexType == TermE) {
187 proxNode->tagNodePtr = new TagNode;
188 proxNode->tagNodePtr->tagName = el.text;
189
190 }
191 else { // error in tag
192 here = oldHere;
193 }
194 } // TagE
195 // add in other cases here
196 else {
197 // no modifiers
198 here = oldHere;
199 break;
200 }
201 oldHere = here;
202 }//while
203
204
205}
206
207// expects starting brackets to have been parsed
208static void ParseSquareBrackets(UCArray::const_iterator &here,
209 UCArray::const_iterator end,
210 ProxMatchQueryNode *proxNode) {
211
212 LexEl el;
213 while (ParseLexEl (here, end, el)) {
214 if (el.lexType == TermE || el.lexType == IntegerE) {
215 TermNode termNode;
216 termNode.term = el.text;
217 ParseTermModifiers (here, end, termNode);
218 proxNode->terms.push_back(termNode);
219 }
220 else if (el.lexType == CloseSquareBracketE) {
221 break;
222 }
223 else {
224 //error
225 break;
226 }
227 } // while
228
229}
230// expects the starting quote to have been parsed
231// and discarded
232// is now for exact phrases
233static void ParsePhrase (UCArray::const_iterator &here,
234 UCArray::const_iterator end,
235 ProxMatchQueryNode &proxNode) {
236 LexEl el;
237 bool first = true;
238 while (ParseLexEl (here, end, el)) {
239 if (el.lexType == TermE || el.lexType == IntegerE) {
240 TermNode termNode;
241 termNode.term = el.text;
242 //ParseTermModifiers (here, end, termNode);
243 if (first) {
244 first = false;
245 }
246 else {
247 termNode.startRange = -2;
248 termNode.endRange = -1;
249 }
250 proxNode.terms.push_back (termNode);
251
252 } else if (el.lexType == QuoteE) {
253 break;
254
255 } else {
256 // error
257 break;
258 }
259 }
260}
261
262static QueryNode *ParseTerm (UCArray::const_iterator &here,
263 UCArray::const_iterator end) {
264 LexEl el;
265
266 UCArray::const_iterator oldHere = here;
267 if (!ParseLexEl (here, end, el)) return NULL;
268
269 if (el.lexType == OpenBracketE)
270 return ParseBracketExpression (here, end);
271
272 ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;
273
274 // check for a tag
275 /* if (el.lexType == TagE) {
276 oldHere = here; // don't backtrack past here
277 if (!ParseLexEl (here, end, el)) return NULL;
278 if (el.lexType == TermE) {
279 proxNode->tagNodePtr = new TagNode;
280 proxNode->tagNodePtr->tagName = el.text;
281 if (!ParseLexEl (here, end, el)) return NULL;
282 }
283 }
284 */
285 if (el.lexType == TermE || el.lexType == IntegerE) {
286 TermNode termNode;
287 termNode.term = el.text;
288 ParseTermModifiers (here, end, termNode);
289 oldHere = here; // dont backtrack past here
290 if (ParseLexEl(here, end, el) && el.lexType == NearOpE) {
291 delete proxNode;
292 proxNode = (ProxMatchQueryNode *)ParseTerm(here, end);
293 SetRangeValues(termNode, el.text);
294 proxNode->terms.push_back (termNode);
295 return proxNode;
296 }
297 else {
298 here = oldHere; // backtrack
299 proxNode->terms.push_back (termNode);
300 ParseProxModifiers(here, end, proxNode);
301 return proxNode;
302 }
303 } else if (el.lexType == QuoteE) {
304 ParsePhrase (here, end, *proxNode);
305 return proxNode;
306 }
307 else if (el.lexType == OpenSquareBracketE) {
308 ParseSquareBrackets (here, end, proxNode);
309 ParseProxModifiers (here, end, proxNode);
310 return proxNode;
311 }
312
313 // not a term
314 here = oldHere;
315 delete proxNode;
316 return NULL;
317}
318
319
320static QueryNode *ParseExpression (UCArray::const_iterator &here,
321 UCArray::const_iterator end) {
322 LexEl el;
323 QueryNode *curTree = NULL;
324
325 UCArray::const_iterator oldHere = here;
326 while (ParseLexEl (here, end, el)) {
327 if (el.lexType == OpenSquareBracketE ||
328 el.lexType == OpenBracketE ||
329 el.lexType == TermE ||
330 el.lexType == QuoteE ||
331 el.lexType == IntegerE ) {
332 // el.lexType == TagE) { tag at end of term now
333 // some type of term, back track and parse it
334 here = oldHere;
335 curTree = OrAdd (curTree, ParseTerm (here, end));
336
337 } else if (el.lexType == AndOpE) {
338 curTree = AndAdd (curTree, ParseTerm (here, end));
339
340 } else if (el.lexType == OrOpE) {
341 curTree = OrAdd (curTree, ParseTerm (here, end));
342
343 } else if (el.lexType == NotOpE) {
344 curTree = NotAdd (curTree, ParseTerm (here, end));
345
346 } else if (el.lexType == CloseBracketE) {
347 // parsebracketexpression is waiting for the last bracket, so put it back
348 here = oldHere;
349 break;
350
351 } else break;
352
353 oldHere = here;
354 }
355
356 return curTree;
357}
358
359QueryNode *ParseQuery (const UCArray &queryStr) {
360 UCArray::const_iterator here = queryStr.begin();
361 UCArray::const_iterator end = queryStr.end();
362 return ParseExpression (here, end);
363}
Note: See TracBrowser for help on using the repository browser.