1 | /*
|
---|
2 | * Copyright (c) 1995, the EUROPAGATE consortium (see below).
|
---|
3 | *
|
---|
4 | * The EUROPAGATE consortium members are:
|
---|
5 | *
|
---|
6 | * University College Dublin
|
---|
7 | * Danmarks Teknologiske Videnscenter
|
---|
8 | * An Chomhairle Leabharlanna
|
---|
9 | * Consejo Superior de Investigaciones Cientificas
|
---|
10 | *
|
---|
11 | * Permission to use, copy, modify, distribute, and sell this software and
|
---|
12 | * its documentation, in whole or in part, for any purpose, is hereby granted,
|
---|
13 | * provided that:
|
---|
14 | *
|
---|
15 | * 1. This copyright and permission notice appear in all copies of the
|
---|
16 | * software and its documentation. Notices of copyright or attribution
|
---|
17 | * which appear at the beginning of any file must remain unchanged.
|
---|
18 | *
|
---|
19 | * 2. The names of EUROPAGATE or the project partners may not be used to
|
---|
20 | * endorse or promote products derived from this software without specific
|
---|
21 | * prior written permission.
|
---|
22 | *
|
---|
23 | * 3. Users of this software (implementors and gateway operators) agree to
|
---|
24 | * inform the EUROPAGATE consortium of their use of the software. This
|
---|
25 | * information will be used to evaluate the EUROPAGATE project and the
|
---|
26 | * software, and to plan further developments. The consortium may use
|
---|
27 | * the information in later publications.
|
---|
28 | *
|
---|
29 | * 4. Users of this software agree to make their best efforts, when
|
---|
30 | * documenting their use of the software, to acknowledge the EUROPAGATE
|
---|
31 | * consortium, and the role played by the software in their work.
|
---|
32 | *
|
---|
33 | * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
|
---|
34 | * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
|
---|
35 | * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
---|
36 | * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
|
---|
37 | * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
|
---|
38 | * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
---|
39 | * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
|
---|
40 | * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
|
---|
41 | * USE OR PERFORMANCE OF THIS SOFTWARE.
|
---|
42 | *
|
---|
43 | */
|
---|
44 | /* CCL - lexical analysis
|
---|
45 | * Europagate, 1995
|
---|
46 | *
|
---|
47 | * $Log$
|
---|
48 | * Revision 1.1 2000/08/03 03:09:59 johnmcp
|
---|
49 | * Added the YAZ toolkit source to the packages directory (for z39.50 stuff)
|
---|
50 | *
|
---|
51 | * Revision 1.13 2000/02/08 10:39:53 adam
|
---|
52 | * Added a few functions to set name of operands, etc.
|
---|
53 | *
|
---|
54 | * Revision 1.12 2000/01/31 13:15:21 adam
|
---|
55 | * Removed uses of assert(3). Cleanup of ODR. CCL parser update so
|
---|
56 | * that some characters are not surrounded by spaces in resulting term.
|
---|
57 | * ILL-code updates.
|
---|
58 | *
|
---|
59 | * Revision 1.11 1999/11/30 13:47:11 adam
|
---|
60 | * Improved installation. Moved header files to include/yaz.
|
---|
61 | *
|
---|
62 | * Revision 1.10 1998/07/07 15:49:41 adam
|
---|
63 | * Added braces to avoid warning.
|
---|
64 | *
|
---|
65 | * Revision 1.9 1998/02/11 11:53:33 adam
|
---|
66 | * Changed code so that it compiles as C++.
|
---|
67 | *
|
---|
68 | * Revision 1.8 1997/09/29 08:56:38 adam
|
---|
69 | * Changed CCL parser to be thread safe. New type, CCL_parser, declared
|
---|
70 | * and a create/destructers ccl_parser_create/ccl_parser/destory has
|
---|
71 | * been added.
|
---|
72 | *
|
---|
73 | * Revision 1.7 1997/09/01 08:48:12 adam
|
---|
74 | * New windows NT/95 port using MSV5.0. Only a few changes made
|
---|
75 | * to avoid warnings.
|
---|
76 | *
|
---|
77 | * Revision 1.6 1997/04/30 08:52:07 quinn
|
---|
78 | * Null
|
---|
79 | *
|
---|
80 | * Revision 1.5 1996/10/11 15:00:26 adam
|
---|
81 | * CCL parser from Europagate Email gateway 1.0.
|
---|
82 | *
|
---|
83 | * Revision 1.10 1995/07/11 12:28:31 adam
|
---|
84 | * New function: ccl_token_simple (split into simple tokens) and
|
---|
85 | * ccl_token_del (delete tokens).
|
---|
86 | *
|
---|
87 | * Revision 1.9 1995/05/16 09:39:28 adam
|
---|
88 | * LICENSE.
|
---|
89 | *
|
---|
90 | * Revision 1.8 1995/05/11 14:03:57 adam
|
---|
91 | * Changes in the reading of qualifier(s). New function: ccl_qual_fitem.
|
---|
92 | * New variable ccl_case_sensitive, which controls whether reserved
|
---|
93 | * words and field names are case sensitive or not.
|
---|
94 | *
|
---|
95 | * Revision 1.7 1995/04/19 12:11:24 adam
|
---|
96 | * Minor change.
|
---|
97 | *
|
---|
98 | * Revision 1.6 1995/04/17 09:31:48 adam
|
---|
99 | * Improved handling of qualifiers. Aliases or reserved words.
|
---|
100 | *
|
---|
101 | * Revision 1.5 1995/02/23 08:32:00 adam
|
---|
102 | * Changed header.
|
---|
103 | *
|
---|
104 | * Revision 1.3 1995/02/15 17:42:16 adam
|
---|
105 | * Minor changes of the api of this module. FILE* argument added
|
---|
106 | * to ccl_pr_tree.
|
---|
107 | *
|
---|
108 | * Revision 1.2 1995/02/14 19:55:13 adam
|
---|
109 | * Header files ccl.h/cclp.h are gone! They have been merged an
|
---|
110 | * moved to ../include/ccl.h.
|
---|
111 | * Node kind(s) in ccl_rpn_node have changed names.
|
---|
112 | *
|
---|
113 | * Revision 1.1 1995/02/13 12:35:21 adam
|
---|
114 | * First version of CCL. Qualifiers aren't handled yet.
|
---|
115 | *
|
---|
116 | */
|
---|
117 |
|
---|
118 | #include <stdio.h>
|
---|
119 | #include <string.h>
|
---|
120 | #include <stdlib.h>
|
---|
121 |
|
---|
122 | #include <yaz/ccl.h>
|
---|
123 |
|
---|
124 | /*
|
---|
125 | * token_cmp: Compare token with keyword(s)
|
---|
126 | * kw: Keyword list. Each keyword is separated by space.
|
---|
127 | * token: CCL token.
|
---|
128 | * return: 1 if token string matches one of the keywords in list;
|
---|
129 | * 0 otherwise.
|
---|
130 | */
|
---|
131 | static int token_cmp (CCL_parser cclp, const char *kw, struct ccl_token *token)
|
---|
132 | {
|
---|
133 | const char *cp1 = kw;
|
---|
134 | const char *cp2;
|
---|
135 | if (!kw)
|
---|
136 | return 0;
|
---|
137 | while ((cp2 = strchr (cp1, ' ')))
|
---|
138 | {
|
---|
139 | if (token->len == (size_t) (cp2-cp1))
|
---|
140 | {
|
---|
141 | if (cclp->ccl_case_sensitive)
|
---|
142 | {
|
---|
143 | if (!memcmp (cp1, token->name, token->len))
|
---|
144 | return 1;
|
---|
145 | }
|
---|
146 | else
|
---|
147 | {
|
---|
148 | if (!ccl_memicmp (cp1, token->name, token->len))
|
---|
149 | return 1;
|
---|
150 | }
|
---|
151 | }
|
---|
152 | cp1 = cp2+1;
|
---|
153 | }
|
---|
154 | if (cclp->ccl_case_sensitive)
|
---|
155 | return token->len == strlen(cp1)
|
---|
156 | && !memcmp (cp1, token->name, token->len);
|
---|
157 | return token->len == strlen(cp1) &&
|
---|
158 | !ccl_memicmp (cp1, token->name, token->len);
|
---|
159 | }
|
---|
160 |
|
---|
161 | /*
|
---|
162 | * ccl_token_simple: tokenize CCL raw tokens
|
---|
163 | */
|
---|
164 | struct ccl_token *ccl_token_simple (const char *command)
|
---|
165 | {
|
---|
166 | const char *cp = command;
|
---|
167 | struct ccl_token *first = NULL;
|
---|
168 | struct ccl_token *last = NULL;
|
---|
169 |
|
---|
170 | while (1)
|
---|
171 | {
|
---|
172 | while (*cp && strchr (" \t\r\n", *cp))
|
---|
173 | {
|
---|
174 | cp++;
|
---|
175 | continue;
|
---|
176 | }
|
---|
177 | if (!first)
|
---|
178 | {
|
---|
179 | first = last = (struct ccl_token *)malloc (sizeof (*first));
|
---|
180 | ccl_assert (first);
|
---|
181 | last->prev = NULL;
|
---|
182 | }
|
---|
183 | else
|
---|
184 | {
|
---|
185 | last->next = (struct ccl_token *)malloc (sizeof(*first));
|
---|
186 | ccl_assert (last->next);
|
---|
187 | last->next->prev = last;
|
---|
188 | last = last->next;
|
---|
189 | }
|
---|
190 | last->next = NULL;
|
---|
191 | last->name = cp;
|
---|
192 | last->len = 1;
|
---|
193 | switch (*cp++)
|
---|
194 | {
|
---|
195 | case '\0':
|
---|
196 | last->kind = CCL_TOK_EOL;
|
---|
197 | return first;
|
---|
198 | case '\"':
|
---|
199 | last->kind = CCL_TOK_TERM;
|
---|
200 | last->name = cp;
|
---|
201 | last->len = 0;
|
---|
202 | while (*cp && *cp != '\"')
|
---|
203 | {
|
---|
204 | cp++;
|
---|
205 | ++ last->len;
|
---|
206 | }
|
---|
207 | if (*cp == '\"')
|
---|
208 | cp++;
|
---|
209 | break;
|
---|
210 | default:
|
---|
211 | while (*cp && !strchr (" \t\n\r", *cp))
|
---|
212 | {
|
---|
213 | cp++;
|
---|
214 | ++ last->len;
|
---|
215 | }
|
---|
216 | last->kind = CCL_TOK_TERM;
|
---|
217 | }
|
---|
218 | }
|
---|
219 | return first;
|
---|
220 | }
|
---|
221 |
|
---|
222 |
|
---|
223 | /*
|
---|
224 | * ccl_tokenize: tokenize CCL command string.
|
---|
225 | * return: CCL token list.
|
---|
226 | */
|
---|
227 | struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command)
|
---|
228 | {
|
---|
229 | const char *cp = command;
|
---|
230 | struct ccl_token *first = NULL;
|
---|
231 | struct ccl_token *last = NULL;
|
---|
232 |
|
---|
233 | while (1)
|
---|
234 | {
|
---|
235 | while (*cp && strchr (" \t\r\n", *cp))
|
---|
236 | {
|
---|
237 | cp++;
|
---|
238 | continue;
|
---|
239 | }
|
---|
240 | if (!first)
|
---|
241 | {
|
---|
242 | first = last = (struct ccl_token *)malloc (sizeof (*first));
|
---|
243 | ccl_assert (first);
|
---|
244 | last->prev = NULL;
|
---|
245 | }
|
---|
246 | else
|
---|
247 | {
|
---|
248 | last->next = (struct ccl_token *)malloc (sizeof(*first));
|
---|
249 | ccl_assert (last->next);
|
---|
250 | last->next->prev = last;
|
---|
251 | last = last->next;
|
---|
252 | }
|
---|
253 | last->next = NULL;
|
---|
254 | last->name = cp;
|
---|
255 | last->len = 1;
|
---|
256 | switch (*cp++)
|
---|
257 | {
|
---|
258 | case '\0':
|
---|
259 | last->kind = CCL_TOK_EOL;
|
---|
260 | return first;
|
---|
261 | case '(':
|
---|
262 | last->kind = CCL_TOK_LP;
|
---|
263 | break;
|
---|
264 | case ')':
|
---|
265 | last->kind = CCL_TOK_RP;
|
---|
266 | break;
|
---|
267 | case ',':
|
---|
268 | last->kind = CCL_TOK_COMMA;
|
---|
269 | break;
|
---|
270 | case '%':
|
---|
271 | case '!':
|
---|
272 | last->kind = CCL_TOK_PROX;
|
---|
273 | while (*cp == '%' || *cp == '!')
|
---|
274 | {
|
---|
275 | ++ last->len;
|
---|
276 | cp++;
|
---|
277 | }
|
---|
278 | break;
|
---|
279 | case '>':
|
---|
280 | case '<':
|
---|
281 | case '=':
|
---|
282 | if (*cp == '=' || *cp == '<' || *cp == '>')
|
---|
283 | {
|
---|
284 | cp++;
|
---|
285 | last->kind = CCL_TOK_REL;
|
---|
286 | ++ last->len;
|
---|
287 | }
|
---|
288 | else if (cp[-1] == '=')
|
---|
289 | last->kind = CCL_TOK_EQ;
|
---|
290 | else
|
---|
291 | last->kind = CCL_TOK_REL;
|
---|
292 | break;
|
---|
293 | case '-':
|
---|
294 | last->kind = CCL_TOK_MINUS;
|
---|
295 | break;
|
---|
296 | case '\"':
|
---|
297 | last->kind = CCL_TOK_TERM;
|
---|
298 | last->name = cp;
|
---|
299 | last->len = 0;
|
---|
300 | while (*cp && *cp != '\"')
|
---|
301 | {
|
---|
302 | cp++;
|
---|
303 | ++ last->len;
|
---|
304 | }
|
---|
305 | if (*cp == '\"')
|
---|
306 | cp++;
|
---|
307 | break;
|
---|
308 | default:
|
---|
309 | while (*cp && !strchr ("(),%!><=- \t\n\r", *cp))
|
---|
310 | {
|
---|
311 | cp++;
|
---|
312 | ++ last->len;
|
---|
313 | }
|
---|
314 | if (token_cmp (cclp, cclp->ccl_token_and, last))
|
---|
315 | last->kind = CCL_TOK_AND;
|
---|
316 | else if (token_cmp (cclp, cclp->ccl_token_or, last))
|
---|
317 | last->kind = CCL_TOK_OR;
|
---|
318 | else if (token_cmp (cclp, cclp->ccl_token_not, last))
|
---|
319 | last->kind = CCL_TOK_NOT;
|
---|
320 | else if (token_cmp (cclp, cclp->ccl_token_set, last))
|
---|
321 | last->kind = CCL_TOK_SET;
|
---|
322 | else
|
---|
323 | last->kind = CCL_TOK_TERM;
|
---|
324 | }
|
---|
325 | }
|
---|
326 | return first;
|
---|
327 | }
|
---|
328 |
|
---|
329 | struct ccl_token *ccl_tokenize (const char *command)
|
---|
330 | {
|
---|
331 | CCL_parser cclp = ccl_parser_create ();
|
---|
332 | struct ccl_token *list;
|
---|
333 |
|
---|
334 | list = ccl_parser_tokenize (cclp, command);
|
---|
335 |
|
---|
336 | ccl_parser_destroy (cclp);
|
---|
337 | return list;
|
---|
338 | }
|
---|
339 |
|
---|
340 | /*
|
---|
341 | * ccl_token_del: delete CCL tokens
|
---|
342 | */
|
---|
343 | void ccl_token_del (struct ccl_token *list)
|
---|
344 | {
|
---|
345 | struct ccl_token *list1;
|
---|
346 |
|
---|
347 | while (list)
|
---|
348 | {
|
---|
349 | list1 = list->next;
|
---|
350 | free (list);
|
---|
351 | list = list1;
|
---|
352 | }
|
---|
353 | }
|
---|
354 |
|
---|
355 | static char *ccl_strdup (const char *str)
|
---|
356 | {
|
---|
357 | int len = strlen(str);
|
---|
358 | char *p = (char*) malloc (len+1);
|
---|
359 | strcpy (p, str);
|
---|
360 | return p;
|
---|
361 | }
|
---|
362 |
|
---|
363 | CCL_parser ccl_parser_create (void)
|
---|
364 | {
|
---|
365 | CCL_parser p = (CCL_parser)malloc (sizeof(*p));
|
---|
366 | if (!p)
|
---|
367 | return p;
|
---|
368 | p->look_token = NULL;
|
---|
369 | p->error_code = 0;
|
---|
370 | p->error_pos = NULL;
|
---|
371 | p->bibset = NULL;
|
---|
372 |
|
---|
373 | p->ccl_token_and = ccl_strdup("and");
|
---|
374 | p->ccl_token_or = ccl_strdup("or");
|
---|
375 | p->ccl_token_not = ccl_strdup("not andnot");
|
---|
376 | p->ccl_token_set = ccl_strdup("set");
|
---|
377 | p->ccl_case_sensitive = 1;
|
---|
378 |
|
---|
379 | return p;
|
---|
380 | }
|
---|
381 |
|
---|
382 | void ccl_parser_destroy (CCL_parser p)
|
---|
383 | {
|
---|
384 | if (!p)
|
---|
385 | return;
|
---|
386 | free (p->ccl_token_and);
|
---|
387 | free (p->ccl_token_or);
|
---|
388 | free (p->ccl_token_not);
|
---|
389 | free (p->ccl_token_set);
|
---|
390 | free (p);
|
---|
391 | }
|
---|
392 |
|
---|
393 | void ccl_parser_set_op_and (CCL_parser p, const char *op)
|
---|
394 | {
|
---|
395 | if (p && op)
|
---|
396 | p->ccl_token_and = ccl_strdup (op);
|
---|
397 | }
|
---|
398 |
|
---|
399 | void ccl_parser_set_op_or (CCL_parser p, const char *op)
|
---|
400 | {
|
---|
401 | if (p && op)
|
---|
402 | p->ccl_token_or = ccl_strdup (op);
|
---|
403 | }
|
---|
404 | void ccl_parser_set_op_not (CCL_parser p, const char *op)
|
---|
405 | {
|
---|
406 | if (p && op)
|
---|
407 | p->ccl_token_not = ccl_strdup (op);
|
---|
408 | }
|
---|
409 | void ccl_parser_set_op_set (CCL_parser p, const char *op)
|
---|
410 | {
|
---|
411 | if (p && op)
|
---|
412 | p->ccl_token_set = ccl_strdup (op);
|
---|
413 | }
|
---|
414 |
|
---|
415 | void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag)
|
---|
416 | {
|
---|
417 | if (p)
|
---|
418 | p->ccl_case_sensitive = case_sensitivity_flag;
|
---|
419 | }
|
---|