source: trunk/gsdl3/src/packages/mg/src/text/bool_parser.y@ 7431

Last change on this file since 7431 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1/**************************************************************************
2 *
3 * bool_parser - boolean query parser
4 * Copyright (C) 1994 Neil Sharman & Tim Shimmin
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 *
21 **************************************************************************/
22
23/**************************************************************************/
24%{
25
26#include "sysfuncs.h"
27
28#include "messages.h"
29
30#include "memlib.h"
31#include "words.h"
32#include "stemmer.h"
33#include "term_lists.h"
34#include "bool_tree.h"
35/* [RPAP - Jan 97: Stem Index Change] */
36#include "backend.h" /* for stemmed_dict def */
37#include "stem_search.h"
38
39#include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
40
41/* --- routines --- */
42static int query_lex();
43static int yyerror(char *);
44#define yylex() query_lex(&ch_buf, end_buf)
45
46/* --- module variables --- */
47static char *ch_buf; /* ptr to the character query line buffer */
48static char *end_buf; /* ptr to the last character of the line buffer */
49static bool_tree_node *tree_base = NULL;
50static TermList **term_list;
51static int stemmer_num;
52static int stem_method;
53/* [RPAP - Jan 97: Stem Index Change] */
54stemmed_dict *p__sd;
55static int indexed;
56/* [RPAP - Feb 97: Term Frequency] */
57static QueryTermList **query_term_list;
58static int word_num;
59static u_long count;
60static u_long doc_count;
61static u_long invf_ptr;
62static u_long invf_len;
63%}
64
65
66%union {
67 char *text;
68 bool_tree_node *node;
69}
70
71%token <text> TERM
72%type <node> query term not and or
73
74%%
75
76query: or { tree_base = $1;}
77;
78
79
80term: TERM { $$ = CreateBoolTermNode(term_list, $1, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); }
81 | '(' or ')' { $$ = $2; }
82 | '*' { $$ = CreateBoolTreeNode(N_all, NULL, NULL); }
83 | '_' { $$ = CreateBoolTreeNode(N_none, NULL, NULL); }
84;
85
86not: term
87 | '!' not { $$ = CreateBoolTreeNode(N_not, $2, NULL); }
88;
89
90and: and '&' not { $$ = CreateBoolTreeNode(N_and, $1, $3); }
91 | and not { $$ = CreateBoolTreeNode(N_and, $1, $2); }
92 | not
93;
94
95or: or '|' and { $$ = CreateBoolTreeNode(N_or, $1, $3); }
96 | and
97;
98
99%%
100
101/* Bison on one mips machine defined "const" to be nothing but
102 then did not undef it */
103#ifdef const
104#undef const
105#endif
106
107/**************************************************************************/
108
109
110/* =========================================================================
111 * Function: query_lex
112 * Description:
113 * Hand written lexical analyser for the parser.
114 * Input:
115 * ptr = ptr to a ptr into character query-line buffer
116 * end = ptr to last char in buffer
117 * Output:
118 * yylval.text = the token's text
119 * Notes:
120 * does NOT produce WILD tokens at the moment
121 * ========================================================================= */
122
123/* [RPAP - Jan 97: Stem Index Change]
124 state mode:
125 0 = Read next token
126 1 = Output word
127 2 = Output '|' or ')'
128 */
129static int query_lex(char **ptr, const char *end)
130{
131 char *buf_ptr = *ptr;
132 static int mode = 0;
133 static int termnum = 0;
134 static TermList *Terms = NULL;
135
136 if (mode == 0)
137 {
138 /* jump over whitespace */
139 while (isspace(*buf_ptr))
140 buf_ptr++;
141
142 if (inaword(buf_ptr, end))
143 {
144 static char word[MAXSTEMLEN + 1]; /* [RJM 07/98: Memory Leak] */
145 char *sWord = Xmalloc(MAXSTEMLEN + 1);
146 int stem_to_apply, method_using = -1;
147
148 PARSE_STEM_WORD(word, buf_ptr, end);
149
150 /* Extract any parameters */
151 stem_to_apply = stem_method;
152 while (buf_ptr <= end)
153 {
154 int stem_param, param_type;
155 char param[MAXPARAMLEN + 1];
156
157 param_type = 0;
158 PARSE_OPT_TERM_PARAM (param, param_type, buf_ptr, end);
159 if (!param_type)
160 break;
161
162 if (param_type == STEMPARAM)
163 {
164 stem_param = atoi (param);
165 if (errno != ERANGE && indexed && stem_param >= 0 && stem_param <= 3)
166 method_using = stem_to_apply = stem_param;
167 }
168 }
169
170 bcopy ((char *) word, (char *) sWord, *word + 1);
171 stemmer (stem_to_apply, stemmer_num, sWord);
172
173 if (stem_to_apply == 0 || !indexed || p__sd == NULL)
174 {
175 /* [RPAP - Feb 97: Term Frequency] */
176 word_num = FindWord (p__sd, sWord, &count, &doc_count, &invf_ptr, &invf_len);
177 if (word_num == -1)
178 count = doc_count = invf_ptr = invf_len = 0;
179 AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
180
181 yylval.text = word;
182 *ptr = buf_ptr; /* fix up ptr */
183 Xfree (sWord);
184 return TERM;
185 }
186 else
187 {
188 *ptr = buf_ptr; /* fix up ptr */
189 termnum = 0;
190 ResetTermList (&Terms);
191 if (FindWords (p__sd, (u_char *) sWord, stem_to_apply, &Terms) > 0)
192 {
193 /* [RPAP - Feb 97: Term Frequency] */
194 int i, freq = 0;
195 for (i = 0; i < Terms->num; i++)
196 freq += Terms->TE[i].WE.count;
197 AddQueryTerm (query_term_list, word, freq, method_using);
198
199 Xfree (sWord);
200 mode = 1;
201 return '(';
202 }
203 else
204 {
205 /* Word does not exists - include in tree anyway */
206 Xfree (sWord);
207
208 /* [RPAP - Feb 97: Term Frequency] */
209 word_num = -1;
210 count = doc_count = invf_ptr = invf_len = 0;
211 AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
212
213 yylval.text = word;
214 return TERM;
215 }
216 }
217 }
218 else /* NON-WORD */
219 {
220 if (*buf_ptr == '\0')
221 {
222 /* return null-char if it is one */
223 *ptr = buf_ptr; /* fix up ptr */
224 return 0;
225 }
226 else
227 {
228 /* return 1st char, and delete from buffer */
229 char c = *buf_ptr++;
230 *ptr = buf_ptr; /* fix up ptr */
231 return c;
232 }
233 }
234 }
235 else if (mode == 1)
236 {
237 yylval.text = Terms->TE[termnum].Word;
238
239 /* [RPAP - Feb 97: Term Frequency] */
240 word_num = Terms->TE[termnum].WE.word_num;
241 count = Terms->TE[termnum].WE.count;
242 doc_count = Terms->TE[termnum].WE.doc_count;
243 invf_ptr = Terms->TE[termnum].WE.invf_ptr;
244 invf_len = Terms->TE[termnum].WE.invf_len;
245
246 termnum++;
247 mode = 2;
248 return TERM;
249 }
250 else /* mode == 2 */
251 {
252 if (termnum >= Terms->num)
253 {
254 mode = 0;
255 return ')';
256 }
257 else
258 {
259 mode = 1;
260 return '|';
261 }
262 }
263}/*query_lex*/
264
265/* =========================================================================
266 * Function: yyerror
267 * Description:
268 * Input:
269 * Output:
270 * ========================================================================= */
271static int yyerror(char *s)
272{
273 Message("%s", s);
274 return(1);
275}
276
277
278/* =========================================================================
279 * Function: ParseBool
280 * Description:
281 * Parse a boolean query string into a term-list and a boolean parse tree
282 * Input:
283 * query_line = query line string
284 * query_len = query line length
285 * the_stem_method = stem method id used for stemming
286 * Output:
287 * the_term_list = the list of terms
288 * res = parser result code
289 * ========================================================================= */
290
291bool_tree_node *
292ParseBool(char *query_line, int query_len,
293 TermList **the_term_list, int the_stemmer_num, int the_stem_method, int *res,
294 stemmed_dict * the_sd, int is_indexed, /* [RPAP - Jan 97: Stem Index Change] */
295 QueryTermList **the_query_term_list) /* [RPAP - Feb 97: Term Frequency] */
296{
297 /* global variables to be accessed by bison/yacc created parser */
298 term_list = the_term_list;
299 stemmer_num = the_stemmer_num;
300 stem_method = the_stem_method;
301 ch_buf = query_line;
302 end_buf = query_line + query_len;
303 p__sd = the_sd; /* [RPAP - Jan 97: Stem Index Change] */
304 indexed = is_indexed; /* [RPAP - Jan 97: Stem Index Change] */
305 query_term_list = the_query_term_list; /* [RPAP - Feb 97: Term Frequency] */
306
307 FreeBoolTree(&(tree_base));
308
309 ResetTermList(term_list);
310 ResetQueryTermList(query_term_list); /* [RPAP - Feb 97: Term Frequency] */
311
312 *res = yyparse();
313
314 return tree_base;
315}
316
317
Note: See TracBrowser for help on using the repository browser.