source: main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mg/src/text/bool_parser.y@ 23508

Last change on this file since 23508 was 23508, checked in by sjm84, 13 years ago

Committing 64 bit changes into the branch

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1/**************************************************************************
2 *
3 * bool_parser - boolean query parser
4 * Copyright (C) 1994 Neil Sharman & Tim Shimmin
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 *
21 **************************************************************************/
22
23/**************************************************************************/
24%{
25
26#include "sysfuncs.h"
27
28#include "messages.h"
29
30#include "memlib.h"
31#include "words.h"
32#include "stemmer.h"
33#include "term_lists.h"
34#include "bool_tree.h"
35/* [RPAP - Jan 97: Stem Index Change] */
36#include "backend.h" /* for stemmed_dict def */
37#include "stem_search.h"
38
39#include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
40
41/* --- routines --- */
42static int query_lex();
43static int yyerror(char *);
44#define yylex() query_lex(&ch_buf, end_buf)
45
46/* --- module variables --- */
47static char *ch_buf; /* ptr to the character query line buffer */
48static char *end_buf; /* ptr to the last character of the line buffer */
49static bool_tree_node *tree_base = NULL;
50static TermList **term_list;
51static int stemmer_num;
52static int stem_method;
53/* [RPAP - Jan 97: Stem Index Change] */
54stemmed_dict *p__sd;
55static int indexed;
56/* [RPAP - Feb 97: Term Frequency] */
57static QueryTermList **query_term_list;
58static int word_num;
59static mg_u_long count;
60static mg_u_long doc_count;
61static mg_u_long invf_ptr;
62static mg_u_long invf_len;
63%}
64
65
66%union {
67 char *text;
68 bool_tree_node *node;
69}
70
71%token <text> TERM
72%type <node> query term not and or
73
74%%
75
76query: or { tree_base = $1;}
77;
78
79
80term: TERM { $$ = CreateBoolTermNode(term_list, $1, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); }
81 | '(' or ')' { $$ = $2; }
82 | '*' { $$ = CreateBoolTreeNode(N_all, NULL, NULL); }
83 | '_' { $$ = CreateBoolTreeNode(N_none, NULL, NULL); }
84;
85
86not: term
87 | '!' not { $$ = CreateBoolTreeNode(N_not, $2, NULL); }
88;
89
90and: and '&' not { $$ = CreateBoolTreeNode(N_and, $1, $3); }
91 | and not { $$ = CreateBoolTreeNode(N_and, $1, $2); }
92 | not
93;
94
95or: or '|' and { $$ = CreateBoolTreeNode(N_or, $1, $3); }
96 | and
97;
98
99%%
100
101/* Bison on one mips machine defined "const" to be nothing but
102 then did not undef it */
103#ifdef const
104#undef const
105#endif
106
107/**************************************************************************/
108
109
110/* =========================================================================
111 * Function: query_lex
112 * Description:
113 * Hand written lexical analyser for the parser.
114 * Input:
115 * ptr = ptr to a ptr into character query-line buffer
116 * end = ptr to last char in buffer
117 * Output:
118 * yylval.text = the token's text
119 * Notes:
120 * does NOT produce WILD tokens at the moment
121 * ========================================================================= */
122
123/* [RPAP - Jan 97: Stem Index Change]
124 state mode:
125 0 = Read next token
126 1 = Output word
127 2 = Output '|' or ')'
128 */
129static int query_lex(char **ptr, const char *end)
130{
131 char *buf_ptr = *ptr;
132 static int mode = 0;
133 static int termnum = 0;
134 static TermList *Terms = NULL;
135
136 if (mode == 0)
137 {
138 /* jump over whitespace */
139 buf_ptr = skipspace(buf_ptr, end);
140
141 if (inaword(buf_ptr, end))
142 {
143 static char word[MAXSTEMLEN + 1]; /* [RJM 07/98: Memory Leak] */
144 char *sWord = Xmalloc(MAXSTEMLEN + 1);
145 int stem_to_apply, method_using = -1;
146
147 PARSE_STEM_WORD(word, buf_ptr, end);
148
149 /* Extract any parameters */
150 stem_to_apply = stem_method;
151 while (buf_ptr <= end)
152 {
153 int stem_param, param_type;
154 char param[MAXPARAMLEN + 1];
155
156 param_type = 0;
157 PARSE_OPT_TERM_PARAM (param, param_type, buf_ptr, end);
158 if (!param_type)
159 break;
160
161 if (param_type == STEMPARAM)
162 {
163 stem_param = atoi (param);
164 if (errno != ERANGE && indexed && stem_param >= 0 && stem_param <= 3)
165 method_using = stem_to_apply = stem_param;
166 }
167 }
168
169 bcopy ((char *) word, (char *) sWord, *word + 1);
170 stemmer (stem_to_apply, stemmer_num, sWord);
171
172 if (stem_to_apply == 0 || !indexed || p__sd == NULL)
173 {
174 /* [RPAP - Feb 97: Term Frequency] */
175 word_num = FindWord (p__sd, sWord, &count, &doc_count, &invf_ptr, &invf_len);
176 if (word_num == -1)
177 count = doc_count = invf_ptr = invf_len = 0;
178 AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
179
180 yylval.text = word;
181 *ptr = buf_ptr; /* fix up ptr */
182 Xfree (sWord);
183 return TERM;
184 }
185 else
186 {
187 *ptr = buf_ptr; /* fix up ptr */
188 termnum = 0;
189 ResetTermList (&Terms);
190 if (FindWords (p__sd, (u_char *) sWord, stem_to_apply, &Terms) > 0)
191 {
192 /* [RPAP - Feb 97: Term Frequency] */
193 int i, freq = 0;
194 for (i = 0; i < Terms->num; i++)
195 freq += Terms->TE[i].WE.count;
196 AddQueryTerm (query_term_list, word, freq, method_using);
197
198 Xfree (sWord);
199 mode = 1;
200 return '(';
201 }
202 else
203 {
204 /* Word does not exists - include in tree anyway */
205 Xfree (sWord);
206
207 /* [RPAP - Feb 97: Term Frequency] */
208 word_num = -1;
209 count = doc_count = invf_ptr = invf_len = 0;
210 AddQueryTerm (query_term_list, (u_char *) word, count, method_using);
211
212 yylval.text = word;
213 return TERM;
214 }
215 }
216 }
217 else /* NON-WORD */
218 {
219 if (*buf_ptr == '\0')
220 {
221 /* return null-char if it is one */
222 *ptr = buf_ptr; /* fix up ptr */
223 return 0;
224 }
225 else
226 {
227 /* return 1st char, and delete from buffer */
228 char c = *buf_ptr++;
229 *ptr = buf_ptr; /* fix up ptr */
230 return c;
231 }
232 }
233 }
234 else if (mode == 1)
235 {
236 yylval.text = Terms->TE[termnum].Word;
237
238 /* [RPAP - Feb 97: Term Frequency] */
239 word_num = Terms->TE[termnum].WE.word_num;
240 count = Terms->TE[termnum].WE.count;
241 doc_count = Terms->TE[termnum].WE.doc_count;
242 invf_ptr = Terms->TE[termnum].WE.invf_ptr;
243 invf_len = Terms->TE[termnum].WE.invf_len;
244
245 termnum++;
246 mode = 2;
247 return TERM;
248 }
249 else /* mode == 2 */
250 {
251 if (termnum >= Terms->num)
252 {
253 mode = 0;
254 return ')';
255 }
256 else
257 {
258 mode = 1;
259 return '|';
260 }
261 }
262}/*query_lex*/
263
264/* =========================================================================
265 * Function: yyerror
266 * Description:
267 * Input:
268 * Output:
269 * ========================================================================= */
270static int yyerror(char *s)
271{
272 Message("%s", s);
273 return(1);
274}
275
276
277/* =========================================================================
278 * Function: ParseBool
279 * Description:
280 * Parse a boolean query string into a term-list and a boolean parse tree
281 * Input:
282 * query_line = query line string
283 * query_len = query line length
284 * the_stem_method = stem method id used for stemming
285 * Output:
286 * the_term_list = the list of terms
287 * res = parser result code
288 * ========================================================================= */
289
290bool_tree_node *
291ParseBool(char *query_line, int query_len,
292 TermList **the_term_list, int the_stemmer_num, int the_stem_method, int *res,
293 stemmed_dict * the_sd, int is_indexed, /* [RPAP - Jan 97: Stem Index Change] */
294 QueryTermList **the_query_term_list) /* [RPAP - Feb 97: Term Frequency] */
295{
296 /* global variables to be accessed by bison/yacc created parser */
297 term_list = the_term_list;
298 stemmer_num = the_stemmer_num;
299 stem_method = the_stem_method;
300 ch_buf = query_line;
301 end_buf = query_line + query_len;
302 p__sd = the_sd; /* [RPAP - Jan 97: Stem Index Change] */
303 indexed = is_indexed; /* [RPAP - Jan 97: Stem Index Change] */
304 query_term_list = the_query_term_list; /* [RPAP - Feb 97: Term Frequency] */
305
306 FreeBoolTree(&(tree_base));
307
308 ResetTermList(term_list);
309 ResetQueryTermList(query_term_list); /* [RPAP - Feb 97: Term Frequency] */
310
311 *res = yyparse();
312
313 return tree_base;
314}
315
316
Note: See TracBrowser for help on using the repository browser.