Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: trunk/gsdl/packages/yaz/ccl/ccltoken.c@ 1343

Last change on this file since 1343 was 1343, checked in by johnmcp, 24 years ago
Added the YAZ toolkit source to the packages directory (for z39.50 stuff)
Property svn:keywords set to `Author Date Id Revision`
File size: 10.3 KB

Rev	Line
[1343]	1	/*
	2	* Copyright (c) 1995, the EUROPAGATE consortium (see below).
	3	*
	4	* The EUROPAGATE consortium members are:
	5	*
	6	* University College Dublin
	7	* Danmarks Teknologiske Videnscenter
	8	* An Chomhairle Leabharlanna
	9	* Consejo Superior de Investigaciones Cientificas
	10	*
	11	* Permission to use, copy, modify, distribute, and sell this software and
	12	* its documentation, in whole or in part, for any purpose, is hereby granted,
	13	* provided that:
	14	*
	15	* 1. This copyright and permission notice appear in all copies of the
	16	* software and its documentation. Notices of copyright or attribution
	17	* which appear at the beginning of any file must remain unchanged.
	18	*
	19	* 2. The names of EUROPAGATE or the project partners may not be used to
	20	* endorse or promote products derived from this software without specific
	21	* prior written permission.
	22	*
	23	* 3. Users of this software (implementors and gateway operators) agree to
	24	* inform the EUROPAGATE consortium of their use of the software. This
	25	* information will be used to evaluate the EUROPAGATE project and the
	26	* software, and to plan further developments. The consortium may use
	27	* the information in later publications.
	28	*
	29	* 4. Users of this software agree to make their best efforts, when
	30	* documenting their use of the software, to acknowledge the EUROPAGATE
	31	* consortium, and the role played by the software in their work.
	32	*
	33	* THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
	34	* EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
	35	* WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
	36	* IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
	37	* FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
	38	* ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
	39	* OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
	40	* ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
	41	* USE OR PERFORMANCE OF THIS SOFTWARE.
	42	*
	43	*/
	44	/* CCL - lexical analysis
	45	* Europagate, 1995
	46	*
	47	* $Log$
	48	* Revision 1.1 2000/08/03 03:09:59 johnmcp
	49	* Added the YAZ toolkit source to the packages directory (for z39.50 stuff)
	50	*
	51	* Revision 1.13 2000/02/08 10:39:53 adam
	52	* Added a few functions to set name of operands, etc.
	53	*
	54	* Revision 1.12 2000/01/31 13:15:21 adam
	55	* Removed uses of assert(3). Cleanup of ODR. CCL parser update so
	56	* that some characters are not surrounded by spaces in resulting term.
	57	* ILL-code updates.
	58	*
	59	* Revision 1.11 1999/11/30 13:47:11 adam
	60	* Improved installation. Moved header files to include/yaz.
	61	*
	62	* Revision 1.10 1998/07/07 15:49:41 adam
	63	* Added braces to avoid warning.
	64	*
	65	* Revision 1.9 1998/02/11 11:53:33 adam
	66	* Changed code so that it compiles as C++.
	67	*
	68	* Revision 1.8 1997/09/29 08:56:38 adam
	69	* Changed CCL parser to be thread safe. New type, CCL_parser, declared
	70	* and a create/destructers ccl_parser_create/ccl_parser/destory has
	71	* been added.
	72	*
	73	* Revision 1.7 1997/09/01 08:48:12 adam
	74	* New windows NT/95 port using MSV5.0. Only a few changes made
	75	* to avoid warnings.
	76	*
	77	* Revision 1.6 1997/04/30 08:52:07 quinn
	78	* Null
	79	*
	80	* Revision 1.5 1996/10/11 15:00:26 adam
	81	* CCL parser from Europagate Email gateway 1.0.
	82	*
	83	* Revision 1.10 1995/07/11 12:28:31 adam
	84	* New function: ccl_token_simple (split into simple tokens) and
	85	* ccl_token_del (delete tokens).
	86	*
	87	* Revision 1.9 1995/05/16 09:39:28 adam
	88	* LICENSE.
	89	*
	90	* Revision 1.8 1995/05/11 14:03:57 adam
	91	* Changes in the reading of qualifier(s). New function: ccl_qual_fitem.
	92	* New variable ccl_case_sensitive, which controls whether reserved
	93	* words and field names are case sensitive or not.
	94	*
	95	* Revision 1.7 1995/04/19 12:11:24 adam
	96	* Minor change.
	97	*
	98	* Revision 1.6 1995/04/17 09:31:48 adam
	99	* Improved handling of qualifiers. Aliases or reserved words.
	100	*
	101	* Revision 1.5 1995/02/23 08:32:00 adam
	102	* Changed header.
	103	*
	104	* Revision 1.3 1995/02/15 17:42:16 adam
	105	* Minor changes of the api of this module. FILE* argument added
	106	* to ccl_pr_tree.
	107	*
	108	* Revision 1.2 1995/02/14 19:55:13 adam
	109	* Header files ccl.h/cclp.h are gone! They have been merged an
	110	* moved to ../include/ccl.h.
	111	* Node kind(s) in ccl_rpn_node have changed names.
	112	*
	113	* Revision 1.1 1995/02/13 12:35:21 adam
	114	* First version of CCL. Qualifiers aren't handled yet.
	115	*
	116	*/
	117
	118	#include <stdio.h>
	119	#include <string.h>
	120	#include <stdlib.h>
	121
	122	#include <yaz/ccl.h>
	123
	124	/*
	125	* token_cmp: Compare token with keyword(s)
	126	* kw: Keyword list. Each keyword is separated by space.
	127	* token: CCL token.
	128	* return: 1 if token string matches one of the keywords in list;
	129	* 0 otherwise.
	130	*/
	131	static int token_cmp (CCL_parser cclp, const char kw, struct ccl_token token)
	132	{
	133	const char *cp1 = kw;
	134	const char *cp2;
	135	if (!kw)
	136	return 0;
	137	while ((cp2 = strchr (cp1, ' ')))
	138	{
	139	if (token->len == (size_t) (cp2-cp1))
	140	{
	141	if (cclp->ccl_case_sensitive)
	142	{
	143	if (!memcmp (cp1, token->name, token->len))
	144	return 1;
	145	}
	146	else
	147	{
	148	if (!ccl_memicmp (cp1, token->name, token->len))
	149	return 1;
	150	}
	151	}
	152	cp1 = cp2+1;
	153	}
	154	if (cclp->ccl_case_sensitive)
	155	return token->len == strlen(cp1)
	156	&& !memcmp (cp1, token->name, token->len);
	157	return token->len == strlen(cp1) &&
	158	!ccl_memicmp (cp1, token->name, token->len);
	159	}
	160
	161	/*
	162	* ccl_token_simple: tokenize CCL raw tokens
	163	*/
	164	struct ccl_token ccl_token_simple (const char command)
	165	{
	166	const char *cp = command;
	167	struct ccl_token *first = NULL;
	168	struct ccl_token *last = NULL;
	169
	170	while (1)
	171	{
	172	while (cp && strchr (" \t\r\n", cp))
	173	{
	174	cp++;
	175	continue;
	176	}
	177	if (!first)
	178	{
	179	first = last = (struct ccl_token )malloc (sizeof (first));
	180	ccl_assert (first);
	181	last->prev = NULL;
	182	}
	183	else
	184	{
	185	last->next = (struct ccl_token )malloc (sizeof(first));
	186	ccl_assert (last->next);
	187	last->next->prev = last;
	188	last = last->next;
	189	}
	190	last->next = NULL;
	191	last->name = cp;
	192	last->len = 1;
	193	switch (*cp++)
	194	{
	195	case '\0':
	196	last->kind = CCL_TOK_EOL;
	197	return first;
	198	case '\"':
	199	last->kind = CCL_TOK_TERM;
	200	last->name = cp;
	201	last->len = 0;
	202	while (cp && cp != '\"')
	203	{
	204	cp++;
	205	++ last->len;
	206	}
	207	if (*cp == '\"')
	208	cp++;
	209	break;
	210	default:
	211	while (cp && !strchr (" \t\n\r", cp))
	212	{
	213	cp++;
	214	++ last->len;
	215	}
	216	last->kind = CCL_TOK_TERM;
	217	}
	218	}
	219	return first;
	220	}
	221
	222
	223	/*
	224	* ccl_tokenize: tokenize CCL command string.
	225	* return: CCL token list.
	226	*/
	227	struct ccl_token ccl_parser_tokenize (CCL_parser cclp, const char command)
	228	{
	229	const char *cp = command;
	230	struct ccl_token *first = NULL;
	231	struct ccl_token *last = NULL;
	232
	233	while (1)
	234	{
	235	while (cp && strchr (" \t\r\n", cp))
	236	{
	237	cp++;
	238	continue;
	239	}
	240	if (!first)
	241	{
	242	first = last = (struct ccl_token )malloc (sizeof (first));
	243	ccl_assert (first);
	244	last->prev = NULL;
	245	}
	246	else
	247	{
	248	last->next = (struct ccl_token )malloc (sizeof(first));
	249	ccl_assert (last->next);
	250	last->next->prev = last;
	251	last = last->next;
	252	}
	253	last->next = NULL;
	254	last->name = cp;
	255	last->len = 1;
	256	switch (*cp++)
	257	{
	258	case '\0':
	259	last->kind = CCL_TOK_EOL;
	260	return first;
	261	case '(':
	262	last->kind = CCL_TOK_LP;
	263	break;
	264	case ')':
	265	last->kind = CCL_TOK_RP;
	266	break;
	267	case ',':
	268	last->kind = CCL_TOK_COMMA;
	269	break;
	270	case '%':
	271	case '!':
	272	last->kind = CCL_TOK_PROX;
	273	while (cp == '%' \|\| cp == '!')
	274	{
	275	++ last->len;
	276	cp++;
	277	}
	278	break;
	279	case '>':
	280	case '<':
	281	case '=':
	282	if (cp == '=' \|\| cp == '<' \|\| *cp == '>')
	283	{
	284	cp++;
	285	last->kind = CCL_TOK_REL;
	286	++ last->len;
	287	}
	288	else if (cp[-1] == '=')
	289	last->kind = CCL_TOK_EQ;
	290	else
	291	last->kind = CCL_TOK_REL;
	292	break;
	293	case '-':
	294	last->kind = CCL_TOK_MINUS;
	295	break;
	296	case '\"':
	297	last->kind = CCL_TOK_TERM;
	298	last->name = cp;
	299	last->len = 0;
	300	while (cp && cp != '\"')
	301	{
	302	cp++;
	303	++ last->len;
	304	}
	305	if (*cp == '\"')
	306	cp++;
	307	break;
	308	default:
	309	while (cp && !strchr ("(),%!><=- \t\n\r", cp))
	310	{
	311	cp++;
	312	++ last->len;
	313	}
	314	if (token_cmp (cclp, cclp->ccl_token_and, last))
	315	last->kind = CCL_TOK_AND;
	316	else if (token_cmp (cclp, cclp->ccl_token_or, last))
	317	last->kind = CCL_TOK_OR;
	318	else if (token_cmp (cclp, cclp->ccl_token_not, last))
	319	last->kind = CCL_TOK_NOT;
	320	else if (token_cmp (cclp, cclp->ccl_token_set, last))
	321	last->kind = CCL_TOK_SET;
	322	else
	323	last->kind = CCL_TOK_TERM;
	324	}
	325	}
	326	return first;
	327	}
	328
	329	struct ccl_token ccl_tokenize (const char command)
	330	{
	331	CCL_parser cclp = ccl_parser_create ();
	332	struct ccl_token *list;
	333
	334	list = ccl_parser_tokenize (cclp, command);
	335
	336	ccl_parser_destroy (cclp);
	337	return list;
	338	}
	339
	340	/*
	341	* ccl_token_del: delete CCL tokens
	342	*/
	343	void ccl_token_del (struct ccl_token *list)
	344	{
	345	struct ccl_token *list1;
	346
	347	while (list)
	348	{
	349	list1 = list->next;
	350	free (list);
	351	list = list1;
	352	}
	353	}
	354
	355	static char ccl_strdup (const char str)
	356	{
	357	int len = strlen(str);
	358	char p = (char) malloc (len+1);
	359	strcpy (p, str);
	360	return p;
	361	}
	362
	363	CCL_parser ccl_parser_create (void)
	364	{
	365	CCL_parser p = (CCL_parser)malloc (sizeof(*p));
	366	if (!p)
	367	return p;
	368	p->look_token = NULL;
	369	p->error_code = 0;
	370	p->error_pos = NULL;
	371	p->bibset = NULL;
	372
	373	p->ccl_token_and = ccl_strdup("and");
	374	p->ccl_token_or = ccl_strdup("or");
	375	p->ccl_token_not = ccl_strdup("not andnot");
	376	p->ccl_token_set = ccl_strdup("set");
	377	p->ccl_case_sensitive = 1;
	378
	379	return p;
	380	}
	381
	382	void ccl_parser_destroy (CCL_parser p)
	383	{
	384	if (!p)
	385	return;
	386	free (p->ccl_token_and);
	387	free (p->ccl_token_or);
	388	free (p->ccl_token_not);
	389	free (p->ccl_token_set);
	390	free (p);
	391	}
	392
	393	void ccl_parser_set_op_and (CCL_parser p, const char *op)
	394	{
	395	if (p && op)
	396	p->ccl_token_and = ccl_strdup (op);
	397	}
	398
	399	void ccl_parser_set_op_or (CCL_parser p, const char *op)
	400	{
	401	if (p && op)
	402	p->ccl_token_or = ccl_strdup (op);
	403	}
	404	void ccl_parser_set_op_not (CCL_parser p, const char *op)
	405	{
	406	if (p && op)
	407	p->ccl_token_not = ccl_strdup (op);
	408	}
	409	void ccl_parser_set_op_set (CCL_parser p, const char *op)
	410	{
	411	if (p && op)
	412	p->ccl_token_set = ccl_strdup (op);
	413	}
	414
	415	void ccl_parser_set_case (CCL_parser p, int case_sensitivity_flag)
	416	{
	417	if (p)
	418	p->ccl_case_sensitive = case_sensitivity_flag;
	419	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: