[1343] | 1 | /*
|
---|
| 2 | * Copyright (c) 1995, the EUROPAGATE consortium (see below).
|
---|
| 3 | *
|
---|
| 4 | * The EUROPAGATE consortium members are:
|
---|
| 5 | *
|
---|
| 6 | * University College Dublin
|
---|
| 7 | * Danmarks Teknologiske Videnscenter
|
---|
| 8 | * An Chomhairle Leabharlanna
|
---|
| 9 | * Consejo Superior de Investigaciones Cientificas
|
---|
| 10 | *
|
---|
| 11 | * Permission to use, copy, modify, distribute, and sell this software and
|
---|
| 12 | * its documentation, in whole or in part, for any purpose, is hereby granted,
|
---|
| 13 | * provided that:
|
---|
| 14 | *
|
---|
| 15 | * 1. This copyright and permission notice appear in all copies of the
|
---|
| 16 | * software and its documentation. Notices of copyright or attribution
|
---|
| 17 | * which appear at the beginning of any file must remain unchanged.
|
---|
| 18 | *
|
---|
| 19 | * 2. The names of EUROPAGATE or the project partners may not be used to
|
---|
| 20 | * endorse or promote products derived from this software without specific
|
---|
| 21 | * prior written permission.
|
---|
| 22 | *
|
---|
| 23 | * 3. Users of this software (implementors and gateway operators) agree to
|
---|
| 24 | * inform the EUROPAGATE consortium of their use of the software. This
|
---|
| 25 | * information will be used to evaluate the EUROPAGATE project and the
|
---|
| 26 | * software, and to plan further developments. The consortium may use
|
---|
| 27 | * the information in later publications.
|
---|
| 28 | *
|
---|
| 29 | * 4. Users of this software agree to make their best efforts, when
|
---|
| 30 | * documenting their use of the software, to acknowledge the EUROPAGATE
|
---|
| 31 | * consortium, and the role played by the software in their work.
|
---|
| 32 | *
|
---|
| 33 | * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
|
---|
| 34 | * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
|
---|
| 35 | * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
---|
| 36 | * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
|
---|
| 37 | * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
|
---|
| 38 | * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
|
---|
| 39 | * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
|
---|
| 40 | * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
|
---|
| 41 | * USE OR PERFORMANCE OF THIS SOFTWARE.
|
---|
| 42 | *
|
---|
| 43 | */
|
---|
| 44 | /* CCL - lexical analysis
|
---|
| 45 | * Europagate, 1995
|
---|
| 46 | *
|
---|
| 47 | * $Log$
|
---|
| 48 | * Revision 1.1 2000/08/03 03:09:59 johnmcp
|
---|
| 49 | * Added the YAZ toolkit source to the packages directory (for z39.50 stuff)
|
---|
| 50 | *
|
---|
| 51 | * Revision 1.13 2000/02/08 10:39:53 adam
|
---|
| 52 | * Added a few functions to set name of operands, etc.
|
---|
| 53 | *
|
---|
| 54 | * Revision 1.12 2000/01/31 13:15:21 adam
|
---|
| 55 | * Removed uses of assert(3). Cleanup of ODR. CCL parser update so
|
---|
| 56 | * that some characters are not surrounded by spaces in resulting term.
|
---|
| 57 | * ILL-code updates.
|
---|
| 58 | *
|
---|
| 59 | * Revision 1.11 1999/11/30 13:47:11 adam
|
---|
| 60 | * Improved installation. Moved header files to include/yaz.
|
---|
| 61 | *
|
---|
| 62 | * Revision 1.10 1998/07/07 15:49:41 adam
|
---|
| 63 | * Added braces to avoid warning.
|
---|
| 64 | *
|
---|
| 65 | * Revision 1.9 1998/02/11 11:53:33 adam
|
---|
| 66 | * Changed code so that it compiles as C++.
|
---|
| 67 | *
|
---|
| 68 | * Revision 1.8 1997/09/29 08:56:38 adam
|
---|
| 69 | * Changed CCL parser to be thread safe. New type, CCL_parser, declared
|
---|
| 70 | * and a create/destructers ccl_parser_create/ccl_parser/destory has
|
---|
| 71 | * been added.
|
---|
| 72 | *
|
---|
| 73 | * Revision 1.7 1997/09/01 08:48:12 adam
|
---|
| 74 | * New windows NT/95 port using MSV5.0. Only a few changes made
|
---|
| 75 | * to avoid warnings.
|
---|
| 76 | *
|
---|
| 77 | * Revision 1.6 1997/04/30 08:52:07 quinn
|
---|
| 78 | * Null
|
---|
| 79 | *
|
---|
| 80 | * Revision 1.5 1996/10/11 15:00:26 adam
|
---|
| 81 | * CCL parser from Europagate Email gateway 1.0.
|
---|
| 82 | *
|
---|
| 83 | * Revision 1.10 1995/07/11 12:28:31 adam
|
---|
| 84 | * New function: ccl_token_simple (split into simple tokens) and
|
---|
| 85 | * ccl_token_del (delete tokens).
|
---|
| 86 | *
|
---|
| 87 | * Revision 1.9 1995/05/16 09:39:28 adam
|
---|
| 88 | * LICENSE.
|
---|
| 89 | *
|
---|
| 90 | * Revision 1.8 1995/05/11 14:03:57 adam
|
---|
| 91 | * Changes in the reading of qualifier(s). New function: ccl_qual_fitem.
|
---|
| 92 | * New variable ccl_case_sensitive, which controls whether reserved
|
---|
| 93 | * words and field names are case sensitive or not.
|
---|
| 94 | *
|
---|
| 95 | * Revision 1.7 1995/04/19 12:11:24 adam
|
---|
| 96 | * Minor change.
|
---|
| 97 | *
|
---|
| 98 | * Revision 1.6 1995/04/17 09:31:48 adam
|
---|
| 99 | * Improved handling of qualifiers. Aliases or reserved words.
|
---|
| 100 | *
|
---|
| 101 | * Revision 1.5 1995/02/23 08:32:00 adam
|
---|
| 102 | * Changed header.
|
---|
| 103 | *
|
---|
| 104 | * Revision 1.3 1995/02/15 17:42:16 adam
|
---|
| 105 | * Minor changes of the api of this module. FILE* argument added
|
---|
| 106 | * to ccl_pr_tree.
|
---|
| 107 | *
|
---|
| 108 | * Revision 1.2 1995/02/14 19:55:13 adam
|
---|
| 109 | * Header files ccl.h/cclp.h are gone! They have been merged an
|
---|
| 110 | * moved to ../include/ccl.h.
|
---|
| 111 | * Node kind(s) in ccl_rpn_node have changed names.
|
---|
| 112 | *
|
---|
| 113 | * Revision 1.1 1995/02/13 12:35:21 adam
|
---|
| 114 | * First version of CCL. Qualifiers aren't handled yet.
|
---|
| 115 | *
|
---|
| 116 | */
|
---|
| 117 |
|
---|
| 118 | #include <stdio.h>
|
---|
| 119 | #include <string.h>
|
---|
| 120 | #include <stdlib.h>
|
---|
| 121 |
|
---|
| 122 | #include <yaz/ccl.h>
|
---|
| 123 |
|
---|
| 124 | /*
|
---|
| 125 | * token_cmp: Compare token with keyword(s)
|
---|
| 126 | * kw: Keyword list. Each keyword is separated by space.
|
---|
| 127 | * token: CCL token.
|
---|
| 128 | * return: 1 if token string matches one of the keywords in list;
|
---|
| 129 | * 0 otherwise.
|
---|
| 130 | */
|
---|
| 131 | static int token_cmp (CCL_parser cclp, const char *kw, struct ccl_token *token)
|
---|
| 132 | {
|
---|
| 133 | const char *cp1 = kw;
|
---|
| 134 | const char *cp2;
|
---|
| 135 | if (!kw)
|
---|
| 136 | return 0;
|
---|
| 137 | while ((cp2 = strchr (cp1, ' ')))
|
---|
| 138 | {
|
---|
| 139 | if (token->len == (size_t) (cp2-cp1))
|
---|
| 140 | {
|
---|
| 141 | if (cclp->ccl_case_sensitive)
|
---|
| 142 | {
|
---|
| 143 | if (!memcmp (cp1, token->name, token->len))
|
---|
| 144 | return 1;
|
---|
| 145 | }
|
---|
| 146 | else
|
---|
| 147 | {
|
---|
| 148 | if (!ccl_memicmp (cp1, token->name, token->len))
|
---|
| 149 | return 1;
|
---|
| 150 | }
|
---|
| 151 | }
|
---|
| 152 | cp1 = cp2+1;
|
---|
| 153 | }
|
---|
| 154 | if (cclp->ccl_case_sensitive)
|
---|
| 155 | return token->len == strlen(cp1)
|
---|
| 156 | && !memcmp (cp1, token->name, token->len);
|
---|
| 157 | return token->len == strlen(cp1) &&
|
---|
| 158 | !ccl_memicmp (cp1, token->name, token->len);
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | /*
|
---|
| 162 | * ccl_token_simple: tokenize CCL raw tokens
|
---|
| 163 | */
|
---|
| 164 | struct ccl_token *ccl_token_simple (const char *command)
|
---|
| 165 | {
|
---|
| 166 | const char *cp = command;
|
---|
| 167 | struct ccl_token *first = NULL;
|
---|
| 168 | struct ccl_token *last = NULL;
|
---|
| 169 |
|
---|
| 170 | while (1)
|
---|
| 171 | {
|
---|
| 172 | while (*cp && strchr (" \t\r\n", *cp))
|
---|
| 173 | {
|
---|
| 174 | cp++;
|
---|
| 175 | continue;
|
---|
| 176 | }
|
---|
| 177 | if (!first)
|
---|
| 178 | {
|
---|
| 179 | first = last = (struct ccl_token *)malloc (sizeof (*first));
|
---|
| 180 | ccl_assert (first);
|
---|
| 181 | last->prev = NULL;
|
---|
| 182 | }
|
---|
| 183 | else
|
---|
| 184 | {
|
---|
| 185 | last->next = (struct ccl_token *)malloc (sizeof(*first));
|
---|
| 186 | ccl_assert (last->next);
|
---|
| 187 | last->next->prev = last;
|
---|
| 188 | last = last->next;
|
---|
| 189 | }
|
---|
| 190 | last->next = NULL;
|
---|
| 191 | last->name = cp;
|
---|
| 192 | last->len = 1;
|
---|
| 193 | switch (*cp++)
|
---|
| 194 | {
|
---|
| 195 | case '\0':
|
---|
| 196 | last->kind = CCL_TOK_EOL;
|
---|
| 197 | return first;
|
---|
| 198 | case '\"':
|
---|
| 199 | last->kind = CCL_TOK_TERM;
|
---|
| 200 | last->name = cp;
|
---|
| 201 | last->len = 0;
|
---|
| 202 | while (*cp && *cp != '\"')
|
---|
| 203 | {
|
---|
| 204 | cp++;
|
---|
| 205 | ++ last->len;
|
---|
| 206 | }
|
---|
| 207 | if (*cp == '\"')
|
---|
| 208 | cp++;
|
---|
| 209 | break;
|
---|
| 210 | default:
|
---|
| 211 | while (*cp && !strchr (" \t\n\r", *cp))
|
---|
| 212 | {
|
---|
| 213 | cp++;
|
---|
| 214 | ++ last->len;
|
---|
| 215 | }
|
---|
| 216 | last->kind = CCL_TOK_TERM;
|
---|
| 217 | }
|
---|
| 218 | }
|
---|
| 219 | return first;
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 |
|
---|
| 223 | /*
|
---|
| 224 | * ccl_tokenize: tokenize CCL command string.
|
---|
| 225 | * return: CCL token list.
|
---|
| 226 | */
|
---|
| 227 | struct ccl_token *ccl_parser_tokenize (CCL_parser cclp, const char *command)
|
---|
| 228 | {
|
---|
| 229 | const char *cp = command;
|
---|
| 230 | struct ccl_token *first = NULL;
|
---|
| 231 | struct ccl_token *last = NULL;
|
---|
| 232 |
|
---|
| 233 | while (1)
|
---|
| 234 | {
|
---|
| 235 | while (*cp && strchr (" \t\r\n", *cp))
|
---|
| 236 | {
|
---|
| 237 | cp++;
|
---|
| 238 | continue;
|
---|
| 239 | }
|
---|
| 240 | if (!first)
|
---|
| 241 | {
|
---|
| 242 | first = last = (struct ccl_token *)malloc (sizeof (*first));
|
---|
| 243 | ccl_assert (first);
|
---|
| 244 | last->prev = NULL;
|
---|
| 245 | }
|
---|
| 246 | else
|
---|
| 247 | {
|
---|
| 248 | last->next = (struct ccl_token *)malloc (sizeof(*first));
|
---|
| 249 | ccl_assert (last->next);
|
---|
| 250 | last->next->prev = last;
|
---|
| 251 | last = last->next;
|
---|
| 252 | }
|
---|
| 253 | last->next = NULL;
|
---|
| 254 | last->name = cp;
|
---|
| 255 | last->len = 1;
|
---|
| 256 | switch (*cp++)
|
---|
| 257 | {
|
---|
| 258 | case '\0':
|
---|
| 259 | last->kind = CCL_TOK_EOL;
|
---|
| 260 | return first;
|
---|
| 261 | case '(':
|
---|
| 262 | last->kind = CCL_TOK_LP;
|
---|
| 263 | break;
|
---|
| 264 | case ')':
|
---|
| 265 | last->kind = CCL_TOK_RP;
|
---|
| 266 | break;
|
---|
| 267 | case ',':
|
---|
| 268 | last->kind = CCL_TOK_COMMA;
|
---|
| 269 | break;
|
---|
| 270 | case '%':
|
---|
| 271 | case '!':
|
---|
| 272 | last->kind = CCL_TOK_PROX;
|
---|
| 273 | while (*cp == '%' || *cp == '!')
|
---|
| 274 | {
|
---|
| 275 | ++ last->len;
|
---|
| 276 | cp++;
|
---|
| 277 | }
|
---|
| 278 | break;
|
---|
| 279 | case '>':
|
---|
| 280 | case '<':
|
---|
| 281 | case '=':
|
---|
| 282 | if (*cp == '=' || *cp == '<' || *cp == '>')
|
---|
| 283 | {
|
---|
| 284 | cp++;
|
---|
| 285 | last->kind = CCL_TOK_REL;
|
---|
| 286 | ++ last->len;
|
---|
| 287 | }
|
---|
| 288 | else if (cp[-1] == '=')
|
---|
| 289 | last->kind = CCL_TOK_EQ;
|
---|
| 290 | else
|
---|
| 291 | last->kind = CCL_TOK_REL;
|
---|
| 292 | break;
|
---|
| 293 | case '-':
|
---|
| 294 | last->kind = CCL_TOK_MINUS;
|
---|
| 295 | break;
|
---|
| 296 | case '\"':
|
---|
| 297 | last->kind = CCL_TOK_TERM;
|
---|
| 298 | last->name = cp;
|
---|
| 299 | last->len = 0;
|
---|
| 300 | while (*cp && *cp != '\"')
|
---|
| 301 | {
|
---|
| 302 | cp++;
|
---|
| 303 | ++ last->len;
|
---|
| 304 | }
|
---|
| 305 | if (*cp == '\"')
|
---|
| 306 | cp++;
|
---|
| 307 | break;
|
---|
| 308 | default:
|
---|
| 309 | while (*cp && !strchr ("(),%!><=- \t\n\r", *cp))
|
---|
| 310 | {
|
---|
| 311 | cp++;
|
---|
| 312 | ++ last->len;
|
---|
| 313 | }
|
---|
| 314 | if (token_cmp (cclp, cclp->ccl_token_and, last))
|
---|
| 315 | last->kind = CCL_TOK_AND;
|
---|
| 316 | else if (token_cmp (cclp, cclp->ccl_token_or, last))
|
---|
| 317 | last->kind = CCL_TOK_OR;
|
---|
| 318 | else if (token_cmp (cclp, cclp->ccl_token_not, last))
|
---|
| 319 | last->kind = CCL_TOK_NOT;
|
---|
| 320 | else if (token_cmp (cclp, cclp->ccl_token_set, last))
|
---|
| 321 | last->kind = CCL_TOK_SET;
|
---|
| 322 | else
|
---|
| 323 | last->kind = CCL_TOK_TERM;
|
---|
| 324 | }
|
---|
| 325 | }
|
---|
| 326 | return first;
|
---|
| 327 | }
|
---|
| 328 |
|
---|
| 329 | struct ccl_token *ccl_tokenize (const char *command)
|
---|
| 330 | {
|
---|
| 331 | CCL_parser cclp = ccl_parser_create ();
|
---|
| 332 | struct ccl_token *list;
|
---|
| 333 |
|
---|
| 334 | list = ccl_parser_tokenize (cclp, command);
|
---|
| 335 |
|
---|
| 336 | ccl_parser_destroy (cclp);
|
---|
| 337 | return list;
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | /*
|
---|
| 341 | * ccl_token_del: delete CCL tokens
|
---|
| 342 | */
|
---|
| 343 | void ccl_token_del (struct ccl_token *list)
|
---|
| 344 | {
|
---|
| 345 | struct ccl_token *list1;
|
---|
| 346 |
|
---|
| 347 | while (list)
|
---|
| 348 | {
|
---|
| 349 | list1 = list->next;
|
---|
| 350 | free (list);
|
---|
| 351 | list = list1;
|
---|
| 352 | }
|
---|
| 353 | }
|
---|
| 354 |
|
---|
| 355 | static char *ccl_strdup (const char *str)
|
---|
| 356 | {
|
---|
| 357 | int len = strlen(str);
|
---|
| 358 | char *p = (char*) malloc (len+1);
|
---|
| 359 | strcpy (p, str);
|
---|
| 360 | return p;
|
---|
| 361 | }
|
---|
| 362 |
|
---|
| 363 | CCL_parser ccl_parser_create (void)
|
---|
| 364 | {
|
---|
| 365 | CCL_parser p = (CCL_parser)malloc (sizeof(*p));
|
---|
| 366 | if (!p)
|
---|
| 367 | return p;
|
---|
| 368 | p->look_token = NULL;
|
---|
| 369 | p->error_code = 0;
|
---|
| 370 | p->error_pos = NULL;
|
---|
| 371 | p->bibset = NULL;
|
---|
| 372 |
|
---|
| 373 | p->ccl_token_and = ccl_strdup("and");
|
---|
| 374 | p->ccl_token_or = ccl_strdup("or");
|
---|
| 375 | p->ccl_token_not = ccl_strdup("not andnot");
|
---|
| 376 | p->ccl_token_set = ccl_strdup("set");
|
---|
| 377 | p->ccl_case_sensitive = 1;
|
---|
| 378 |
|
---|
| 379 | return p;
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | void ccl_parser_destroy (CCL_parser p)
|
---|
| 383 | {
|
---|
| 384 | if (!p)
|
---|
| 385 | return;
|
---|
| 386 | free (p->ccl_token_and);
|
---|
| 387 | free (p->ccl_token_or);
|
---|
| 388 | free (p->ccl_token_not);
|
---|
| 389 | free (p->ccl_token_set);
|
---|
| 390 | free (p);
|
---|
| 391 | }
|
---|
| 392 |
|
---|
| 393 | void ccl_parser_set_op_and (CCL_parser p, const char *op)
|
---|
| 394 | {
|
---|
| 395 | if (p && op)
|
---|
| 396 | p->ccl_token_and = ccl_strdup (op);
|
---|
| 397 | }
|
---|
| 398 |
|
---|
| 399 | void ccl_parser_set_op_or (CCL_parser p, const char *op)
|
---|
| 400 | {
|
---|
| 401 | if (p && op)
|
---|
| 402 | p->ccl_token_or = ccl_strdup (op);
|
---|
| 403 | }
|
---|
| 404 | void ccl_parser_set_op_not (CCL_parser p, const char *op)
|
---|
| 405 | {
|
---|
| 406 | if (p && op)
|
---|
| 407 | p->ccl_token_not = ccl_strdup (op);
|
---|
| 408 | }
|
---|
| 409 | void ccl_parser_set_op_set (CCL_parser p, const char *op)
|
---|
| 410 | {
|
---|
| 411 | if (p && op)
|
---|
| 412 | p->ccl_token_set = ccl_strdup (op);
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag)
|
---|
| 416 | {
|
---|
| 417 | if (p)
|
---|
| 418 | p->ccl_case_sensitive = case_sensitivity_flag;
|
---|
| 419 | }
|
---|