source: trunk/gsdl/src/mgpp/text/bool_parser.cpp@ 711

Last change on this file since 711 was 711, checked in by cs025, 25 years ago

Changes to eradicate Xmalloc

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1#ifndef lint
2static char yysccsid[] = "@(#)yaccpar 1.8 (Berkeley) 01/20/90";
3#endif
4#define YYBYACC 1
5#line 25 "bool_parser.ypp"
6
7#include "sysfuncs.h"
8
9#include "messages.h"
10
11#include "memlib.h"
12#include "words.h"
13#include "stemmer.h"
14#include "term_lists.h"
15#include "bool_tree.h"
16/* [RPAP - Jan 97: Stem Index Change] */
17#include "backend.h" /* for stemmed_dict def */
18#include "stem_search.h"
19
20#include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
21
22/* --- routines --- */
23static int query_lex(char **, const char *);
24static int yyerror(char *);
25int yyparse();
26#define yylex() query_lex(&ch_buf, end_buf)
27
28/* --- module variables --- */
29static char *ch_buf; /* ptr to the character query line buffer */
30static char *end_buf; /* ptr to the last character of the line buffer */
31static bool_tree_node *tree_base = NULL;
32static TermList **term_list;
33static int stem_method;
34/* [RPAP - Jan 97: Stem Index Change] */
35stemmed_dict *p__sd;
36static int indexed;
37/* [RPAP - Feb 97: Term Frequency] */
38static QueryTermList **query_term_list;
39static int word_num;
40static u_long count;
41static u_long doc_count;
42static u_long invf_ptr;
43static u_long invf_len;
44static int stemmer_num; /* George Buchanan - Aug 99: Stemmer id */
45#line 67 "bool_parser.ypp"
46typedef union {
47 char *text;
48 bool_tree_node *node;
49} YYSTYPE;
50#line 51 "y.tab.c"
51#define TERM 257
52#define YYERRCODE 256
53short yylhs[] = { -1,
54 0, 1, 1, 1, 1, 2, 2, 3, 3, 3,
55 4, 4,
56};
57short yylen[] = { 2,
58 1, 1, 3, 1, 1, 1, 2, 3, 2, 1,
59 3, 1,
60};
61short yydefred[] = { 0,
62 2, 0, 4, 5, 0, 0, 6, 10, 0, 0,
63 0, 7, 0, 9, 0, 3, 8, 0,
64};
65short yydgoto[] = { 6,
66 7, 8, 9, 10,
67};
68short yysindex[] = { -32,
69 0, -32, 0, 0, -32, 0, 0, 0, -33, -118,
70 -37, 0, -32, 0, -32, 0, 0, -33,
71};
72short yyrindex[] = { 0,
73 0, 0, 0, 0, 0, 0, 0, 0, 2, 12,
74 0, 0, 0, 0, 0, 0, 0, 3,
75};
76short yygindex[] = { 0,
77 0, 6, -2, 14,
78};
79#define YYTABLESIZE 225
80short yytable[] = { 5,
81 5, 12, 11, 16, 13, 15, 2, 2, 3, 3,
82 12, 1, 18, 0, 14, 11, 0, 0, 17, 0,
83 0, 0, 0, 14, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 12, 11, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 4, 4, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 15, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 12, 11, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 1, 1,
104};
105short yycheck[] = { 33,
106 33, 0, 0, 41, 38, 124, 40, 40, 42, 42,
107 5, 0, 15, -1, 9, 2, -1, -1, 13, -1,
108 -1, -1, -1, 18, -1, -1, -1, -1, -1, -1,
109 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
110 -1, -1, 41, 41, -1, -1, -1, -1, -1, -1,
111 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
112 -1, 95, 95, -1, -1, -1, -1, -1, -1, -1,
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1, -1, -1, 124, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
118 -1, -1, -1, -1, -1, 124, 124, -1, -1, -1,
119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
126 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
127 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
128 -1, -1, -1, 257, 257,
129};
130#define YYFINAL 6
131#ifndef YYDEBUG
132#define YYDEBUG 0
133#endif
134#define YYMAXTOKEN 257
135#if YYDEBUG
136char *yyname[] = {
137"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138"'!'",0,0,0,0,"'&'",0,"'('","')'","'*'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1390,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'_'",0,0,0,0,0,
1400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1410,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1420,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1430,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
144"TERM",
145};
146char *yyrule[] = {
147"$accept : query",
148"query : or",
149"term : TERM",
150"term : '(' or ')'",
151"term : '*'",
152"term : '_'",
153"not : term",
154"not : '!' not",
155"and : and '&' not",
156"and : and not",
157"and : not",
158"or : or '|' and",
159"or : and",
160};
161#endif
162#define yyclearin (yychar=(-1))
163#define yyerrok (yyerrflag=0)
164#ifdef YYSTACKSIZE
165#ifndef YYMAXDEPTH
166#define YYMAXDEPTH YYSTACKSIZE
167#endif
168#else
169#ifdef YYMAXDEPTH
170#define YYSTACKSIZE YYMAXDEPTH
171#else
172#define YYSTACKSIZE 500
173#define YYMAXDEPTH 500
174#endif
175#endif
176int yydebug;
177int yynerrs;
178int yyerrflag;
179int yychar;
180short *yyssp;
181YYSTYPE *yyvsp;
182YYSTYPE yyval;
183YYSTYPE yylval;
184short yyss[YYSTACKSIZE];
185YYSTYPE yyvs[YYSTACKSIZE];
186#define yystacksize YYSTACKSIZE
187#line 101 "bool_parser.ypp"
188
189/* Bison on one mips machine defined "const" to be nothing but
190 then did not undef it */
191#ifdef const
192#undef const
193#endif
194
195/**************************************************************************/
196
197
198/* =========================================================================
199 * Function: query_lex
200 * Description:
201 * Hand written lexical analyser for the parser.
202 * Input:
203 * ptr = ptr to a ptr into character query-line buffer
204 * end = ptr to last char in buffer
205 * Output:
206 * yylval.text = the token's text
207 * Notes:
208 * does NOT produce WILD tokens at the moment
209 * ========================================================================= */
210
211/* [RPAP - Jan 97: Stem Index Change]
212 state mode:
213 0 = Read next token
214 1 = Output word
215 2 = Output '|' or ')'
216 */
217static int query_lex(char **ptr, const char *end)
218{
219 char *buf_ptr = *ptr;
220 static int mode = 0;
221 static int termnum = 0;
222 static TermList *Terms = NULL;
223
224 if (mode == 0)
225 {
226 /* jump over whitespace */
227 while (isspace(*buf_ptr))
228 buf_ptr++;
229
230 if (inaword((unsigned char *) buf_ptr, (unsigned char *) end))
231 {
232 static char word[MAXSTEMLEN + 1]; /* [RJM 07/98: Memory Leak] */
233 char *sWord = new char[MAXSTEMLEN + 1];
234 int stem_to_apply, method_using = -1;
235
236 PARSE_STEM_WORD((unsigned char *) word, (unsigned char *) buf_ptr,
237 (unsigned char *) end);
238
239 /* Extract any parameters */
240 stem_to_apply = stem_method;
241 while (buf_ptr <= end)
242 {
243 int stem_param, param_type;
244 char param[MAXPARAMLEN + 1];
245
246 param_type = 0;
247 PARSE_OPT_TERM_PARAM (param, param_type, buf_ptr, end);
248 if (!param_type)
249 break;
250
251 if (param_type == STEMPARAM)
252 {
253 stem_param = atoi (param);
254 if (errno != ERANGE && indexed && stem_param >= 0 && stem_param <= 3)
255 method_using = stem_to_apply = stem_param;
256 }
257 }
258
259 bcopy ((char *) word, (char *) sWord, *word + 1);
260 stemmer (stem_to_apply, stemmer_num, (unsigned char *) sWord);
261
262 if (stem_to_apply == 0 || !indexed || p__sd == NULL)
263 {
264 /* [RPAP - Feb 97: Term Frequency] */
265 word_num = FindWord (p__sd, (unsigned char *) sWord, &count, &doc_count, &invf_ptr, &invf_len);
266 if (word_num == -1)
267 count = doc_count = invf_ptr = invf_len = 0;
268 QueryTermList_AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
269
270 yylval.text = word;
271 *ptr = buf_ptr; /* fix up ptr */
272 delete sWord;
273 return TERM;
274 }
275 else
276 {
277 *ptr = buf_ptr; /* fix up ptr */
278 termnum = 0;
279 TermList_reset (&Terms);
280 if (FindWords (p__sd, (u_char *) sWord, stem_to_apply, &Terms) > 0)
281 {
282 /* [RPAP - Feb 97: Term Frequency] */
283 int i, freq = 0;
284 for (i = 0; i < Terms->num; i++)
285 freq += Terms->TE[i].WE.count;
286 QueryTermList_AddQueryTerm (query_term_list, (unsigned char *) word, freq, method_using);
287
288 delete sWord;
289 mode = 1;
290 return '(';
291 }
292 else
293 {
294 /* Word does not exists - include in tree anyway */
295 delete sWord;
296
297 /* [RPAP - Feb 97: Term Frequency] */
298 word_num = -1;
299 count = doc_count = invf_ptr = invf_len = 0;
300 QueryTermList_AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
301
302 yylval.text = word;
303 return TERM;
304 }
305 }
306 }
307 else /* NON-WORD */
308 {
309 if (*buf_ptr == '\0')
310 {
311 /* return null-char if it is one */
312 *ptr = buf_ptr; /* fix up ptr */
313 return 0;
314 }
315 else
316 {
317 /* return 1st char, and delete from buffer */
318 char c = *buf_ptr++;
319 *ptr = buf_ptr; /* fix up ptr */
320 return c;
321 }
322 }
323 }
324 else if (mode == 1)
325 {
326 yylval.text = (char *) Terms->TE[termnum].Word;
327
328 /* [RPAP - Feb 97: Term Frequency] */
329 word_num = Terms->TE[termnum].WE.word_num;
330 count = Terms->TE[termnum].WE.count;
331 doc_count = Terms->TE[termnum].WE.doc_count;
332 invf_ptr = Terms->TE[termnum].WE.invf_ptr;
333 invf_len = Terms->TE[termnum].WE.invf_len;
334
335 termnum++;
336 mode = 2;
337 return TERM;
338 }
339 else /* mode == 2 */
340 {
341 if (termnum >= Terms->num)
342 {
343 mode = 0;
344 return ')';
345 }
346 else
347 {
348 mode = 1;
349 return '|';
350 }
351 }
352}/*query_lex*/
353
354/* =========================================================================
355 * Function: yyerror
356 * Description:
357 * Input:
358 * Output:
359 * ========================================================================= */
360static int yyerror(char *s)
361{
362 Message("%s", s);
363 return(1);
364}
365
366
367/* =========================================================================
368 * Function: ParseBool
369 * Description:
370 * Parse a boolean query string into a term-list and a boolean parse tree
371 * Input:
372 * query_line = query line string
373 * query_len = query line length
374 * the_stem_method = stem method id used for stemming
375 * Output:
376 * the_term_list = the list of terms
377 * res = parser result code
378 * ========================================================================= */
379
380extern "C" bool_tree_node *
381ParseBool(char *query_line, int query_len,
382 TermList **the_term_list, int the_stem_method, int the_stemmer_num, int *res,
383 stemmed_dict * the_sd, int is_indexed, /* [RPAP - Jan 97: Stem Index Change] */
384 QueryTermList **the_query_term_list) /* [RPAP - Feb 97: Term Frequency] */
385{ /* global variables to be accessed by bison/yacc created parser */
386 term_list = the_term_list;
387 stem_method = the_stem_method;
388 stemmer_num = the_stemmer_num;
389 ch_buf = query_line;
390 end_buf = query_line + query_len;
391 p__sd = the_sd; /* [RPAP - Jan 97: Stem Index Change] */
392 indexed = is_indexed; /* [RPAP - Jan 97: Stem Index Change] */
393 query_term_list = the_query_term_list; /* [RPAP - Feb 97: Term Frequency] */
394
395 FreeBoolTree(&(tree_base));
396
397 TermList_reset(term_list);
398
399 QueryTermList_reset(query_term_list); /* [RPAP - Feb 97: Term Frequency] */
400
401 *res = yyparse();
402
403 return tree_base;
404}
405
406
407#line 408 "y.tab.c"
408#define YYABORT goto yyabort
409#define YYACCEPT goto yyaccept
410#define YYERROR goto yyerrlab
411int
412yyparse()
413{
414 register int yym, yyn, yystate;
415#if YYDEBUG
416 register char *yys;
417 extern char *getenv();
418
419 if (yys = getenv("YYDEBUG"))
420 {
421 yyn = *yys;
422 if (yyn >= '0' && yyn <= '9')
423 yydebug = yyn - '0';
424 }
425#endif
426
427 yynerrs = 0;
428 yyerrflag = 0;
429 yychar = (-1);
430
431 yyssp = yyss;
432 yyvsp = yyvs;
433 *yyssp = yystate = 0;
434
435yyloop:
436 if (yyn = yydefred[yystate]) goto yyreduce;
437 if (yychar < 0)
438 {
439 if ((yychar = yylex()) < 0) yychar = 0;
440#if YYDEBUG
441 if (yydebug)
442 {
443 yys = 0;
444 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
445 if (!yys) yys = "illegal-symbol";
446 printf("yydebug: state %d, reading %d (%s)\n", yystate,
447 yychar, yys);
448 }
449#endif
450 }
451 if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
452 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
453 {
454#if YYDEBUG
455 if (yydebug)
456 printf("yydebug: state %d, shifting to state %d\n",
457 yystate, yytable[yyn]);
458#endif
459 if (yyssp >= yyss + yystacksize - 1)
460 {
461 goto yyoverflow;
462 }
463 *++yyssp = yystate = yytable[yyn];
464 *++yyvsp = yylval;
465 yychar = (-1);
466 if (yyerrflag > 0) --yyerrflag;
467 goto yyloop;
468 }
469 if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
470 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
471 {
472 yyn = yytable[yyn];
473 goto yyreduce;
474 }
475 if (yyerrflag) goto yyinrecovery;
476#ifdef lint
477 goto yynewerror;
478#endif
479yynewerror:
480 yyerror("syntax error");
481#ifdef lint
482 goto yyerrlab;
483#endif
484yyerrlab:
485 ++yynerrs;
486yyinrecovery:
487 if (yyerrflag < 3)
488 {
489 yyerrflag = 3;
490 for (;;)
491 {
492 if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
493 yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
494 {
495#if YYDEBUG
496 if (yydebug)
497 printf("yydebug: state %d, error recovery shifting\
498 to state %d\n", *yyssp, yytable[yyn]);
499#endif
500 if (yyssp >= yyss + yystacksize - 1)
501 {
502 goto yyoverflow;
503 }
504 *++yyssp = yystate = yytable[yyn];
505 *++yyvsp = yylval;
506 goto yyloop;
507 }
508 else
509 {
510#if YYDEBUG
511 if (yydebug)
512 printf("yydebug: error recovery discarding state %d\n",
513 *yyssp);
514#endif
515 if (yyssp <= yyss) goto yyabort;
516 --yyssp;
517 --yyvsp;
518 }
519 }
520 }
521 else
522 {
523 if (yychar == 0) goto yyabort;
524#if YYDEBUG
525 if (yydebug)
526 {
527 yys = 0;
528 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
529 if (!yys) yys = "illegal-symbol";
530 printf("yydebug: state %d, error recovery discards token %d (%s)\n",
531 yystate, yychar, yys);
532 }
533#endif
534 yychar = (-1);
535 goto yyloop;
536 }
537yyreduce:
538#if YYDEBUG
539 if (yydebug)
540 printf("yydebug: state %d, reducing by rule %d (%s)\n",
541 yystate, yyn, yyrule[yyn]);
542#endif
543 yym = yylen[yyn];
544 yyval = yyvsp[1-yym];
545 switch (yyn)
546 {
547case 1:
548#line 77 "bool_parser.ypp"
549{ tree_base = yyvsp[0].node;}
550break;
551case 2:
552#line 81 "bool_parser.ypp"
553{ yyval.node = CreateBoolTermNode(term_list, yyvsp[0].text, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); }
554break;
555case 3:
556#line 82 "bool_parser.ypp"
557{ yyval.node = yyvsp[-1].node; }
558break;
559case 4:
560#line 83 "bool_parser.ypp"
561{ yyval.node = CreateBoolTreeNode(N_all, NULL, NULL); }
562break;
563case 5:
564#line 84 "bool_parser.ypp"
565{ yyval.node = CreateBoolTreeNode(N_none, NULL, NULL); }
566break;
567case 7:
568#line 88 "bool_parser.ypp"
569{ yyval.node = CreateBoolTreeNode(N_not, yyvsp[0].node, NULL); }
570break;
571case 8:
572#line 91 "bool_parser.ypp"
573{ yyval.node = CreateBoolTreeNode(N_and, yyvsp[-2].node, yyvsp[0].node); }
574break;
575case 9:
576#line 92 "bool_parser.ypp"
577{ yyval.node = CreateBoolTreeNode(N_and, yyvsp[-1].node, yyvsp[0].node); }
578break;
579case 11:
580#line 96 "bool_parser.ypp"
581{ yyval.node = CreateBoolTreeNode(N_or, yyvsp[-2].node, yyvsp[0].node); }
582break;
583#line 584 "y.tab.c"
584 }
585 yyssp -= yym;
586 yystate = *yyssp;
587 yyvsp -= yym;
588 yym = yylhs[yyn];
589 if (yystate == 0 && yym == 0)
590 {
591#if YYDEBUG
592 if (yydebug)
593 printf("yydebug: after reduction, shifting from state 0 to\
594 state %d\n", YYFINAL);
595#endif
596 yystate = YYFINAL;
597 *++yyssp = YYFINAL;
598 *++yyvsp = yyval;
599 if (yychar < 0)
600 {
601 if ((yychar = yylex()) < 0) yychar = 0;
602#if YYDEBUG
603 if (yydebug)
604 {
605 yys = 0;
606 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
607 if (!yys) yys = "illegal-symbol";
608 printf("yydebug: state %d, reading %d (%s)\n",
609 YYFINAL, yychar, yys);
610 }
611#endif
612 }
613 if (yychar == 0) goto yyaccept;
614 goto yyloop;
615 }
616 if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
617 yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
618 yystate = yytable[yyn];
619 else
620 yystate = yydgoto[yym];
621#if YYDEBUG
622 if (yydebug)
623 printf("yydebug: after reduction, shifting from state %d \
624to state %d\n", *yyssp, yystate);
625#endif
626 if (yyssp >= yyss + yystacksize - 1)
627 {
628 goto yyoverflow;
629 }
630 *++yyssp = yystate;
631 *++yyvsp = yyval;
632 goto yyloop;
633yyoverflow:
634 yyerror("yacc stack overflow");
635yyabort:
636 return (1);
637yyaccept:
638 return (0);
639}
Note: See TracBrowser for help on using the repository browser.