source: trunk/indexers/mg/src/text/environment.c@ 7228

Last change on this file since 7228 was 7228, checked in by kjdon, 20 years ago

added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1/**************************************************************************
2 *
3 * environment.c -- mgquery environment functions
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: environment.c 7228 2004-04-25 23:01:18Z kjdon $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.2 2004/04/25 23:01:18 kjdon
27 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg.
28
29 Revision 1.1 2003/02/20 21:18:23 mdewsnip
30 Addition of MG package for search and retrieval
31
32 Revision 1.1 1999/08/10 21:17:50 sjboddie
33 renamed mg-1.3d directory mg
34
35 Revision 1.1 1998/11/17 09:34:37 rjmcnab
36 *** empty log message ***
37
38 * Revision 1.5 1995/03/14 05:15:26 tes
39 * Updated the boolean query optimiser to do different types of optimisation.
40 * A query environment variable "optimise_type" specifies which one is to be
41 * used. Type 1 is the new one which is faster than 2.
42 *
43 * Revision 1.4 1994/11/25 03:47:43 tes
44 * Committing files before adding the merge stuff.
45 *
46 * Revision 1.3 1994/10/20 03:56:43 tes
47 * I have rewritten the boolean query optimiser and abstracted out the
48 * components of the boolean query.
49 *
50 * Revision 1.2 1994/09/20 04:41:25 tes
51 * For version 1.1
52 *
53 */
54
55static char *RCSID = "$Id: environment.c 7228 2004-04-25 23:01:18Z kjdon $";
56
57#include "sysfuncs.h"
58
59#include "memlib.h"
60#include "locallib.h"
61#include "local_strings.h"
62#include "messages.h"
63
64#include "environment.h"
65
66/* [RPAP - Feb 97: WIN32 Port] */
67#ifdef __WIN32__
68# define STRCASECMP stricmp
69#else
70# define STRCASECMP strcasecmp
71#endif
72
73typedef struct
74 {
75 char *Name, *Data;
76 char *(*Constraint) (char *Old, char *New);
77 }
78EEntry;
79
80typedef struct Env
81 {
82 EEntry *Environment;
83 int NumEnv;
84 struct Env *Next;
85 }
86Env;
87
88
89char *ConstraintErrorStr;
90
91static Env Base =
92{NULL, 0, NULL};
93
94
95/*
96 * Add name and data to the environment (data may be NULL)
97 * All names are converted to lower case
98 * returns 0 on success, -1 on memory failure or -2 on constraint failure
99 */
100int
101SetEnv (char *name, char *data, char *(*Constraint) (char *, char *))
102{
103 int i;
104 EEntry *newenv, *env = Base.Environment;
105
106 if (data && *data == '\0')
107 data = NULL;
108
109 /* search for name in the environment */
110 for (i = 0; i < Base.NumEnv && STRCASECMP (name, env->Name); i++, env++); /* [RPAP - Feb 97: WIN32 Port] */
111
112 /* if not found the increase the environment size */
113 if (i >= Base.NumEnv)
114 {
115 if (Constraint)
116 {
117 data = Constraint (NULL, data);
118 if (!data)
119 return (-2);
120 }
121 /* allocate memory for the new environment */
122 newenv = Base.Environment ?
123 Xrealloc (Base.Environment, sizeof (EEntry) * (Base.NumEnv + 1)) :
124 Xmalloc (sizeof (EEntry) * (Base.NumEnv + 1));
125 if (!newenv)
126 return (-1);
127 Base.Environment = newenv;
128 env = &newenv[Base.NumEnv];
129 env->Data = NULL;
130 env->Name = NULL;
131 env->Constraint = Constraint;
132 env->Name = Xstrdup (name);
133 if (!env->Name)
134 return (-1);
135 Base.NumEnv++;
136 for (i = 0; env->Name[i]; i++)
137 env->Name[i] = tolower (env->Name[i]);
138 }
139 if (env->Constraint)
140 {
141 data = env->Constraint (env->Data, data);
142 if (!data)
143 return (-2);
144 }
145 if (env->Data)
146 Xfree (env->Data);
147 env->Data = NULL;
148 if (data && *data != '\0')
149 {
150 env->Data = Xstrdup (data);
151 if (!env->Data)
152 return (-1);
153 }
154 return (0);
155}
156
157
158
159/*
160 * Returns the data associated with name. If Data = NULL then this routine
161 * returns a pointer to "". If the specified name does not exist then this
162 * routine returns NULL.
163 */
164char *
165GetEnv (char *name)
166{
167 int i;
168 EEntry *env = Base.Environment;
169 /* search for name in the environment */
170 for (i = 0; i < Base.NumEnv && STRCASECMP (name, env->Name); i++, env++); /* [RPAP - Feb 97: WIN32 Port] */
171
172 if (i >= Base.NumEnv)
173 return (NULL);
174
175 return (env->Data ? env->Data : "");
176}
177
178/*
179 * Returns the data associated with name. If Data = NULL then this routine
180 * returns a pointer to "". If the specified name does not exist then this
181 * routine returns default.
182 */
183char *
184GetDefEnv (char *name, char *def)
185{
186 char *data = GetEnv (name);
187 return (data ? data : def);
188}
189
190
191
192/*
193 * This function pushes the environment on to a stack and duplicated it in
194 * the current stack.
195 *
196 */
197int
198PushEnv (void)
199{
200 Env *env = Xmalloc (sizeof (Env));
201 int i;
202 *env = Base;
203 Base.Next = env;
204 Base.Environment = NULL;
205 Base.NumEnv = 0;
206 for (i = 0; i < env->NumEnv; i++)
207 if (SetEnv (env->Environment[i].Name, env->Environment[i].Data,
208 env->Environment[i].Constraint))
209 return (-1);
210 return (0);
211}
212
213int
214PopEnv (void)
215{
216 Env *env;
217 int i;
218 if (!Base.Next)
219 return (-1);
220 for (i = 0; i < Base.NumEnv; i++)
221 {
222 Xfree (Base.Environment[i].Name);
223 if (Base.Environment[i].Data)
224 Xfree (Base.Environment[i].Data);
225 }
226 if (Base.Environment)
227 Xfree (Base.Environment);
228 env = Base.Next;
229 Base = *env;
230 Xfree (env);
231 return (0);
232}
233
234
235/*
236 * Count the number of environments on the stack, including the current one.
237 *
238 */
239int
240EnvStackHeight (void)
241{
242 int i = 1;
243 Env *env = &Base;
244 while (env->Next)
245 {
246 i++;
247 env = env->Next;
248 }
249 return (i);
250}
251
252/*
253 * Delete environment variable name. If name does not exist it returns -1
254 * otherwise it returns 0
255 */
256int
257UnsetEnv (char *name, int Force)
258{
259 int i;
260 EEntry *env = Base.Environment;
261 /* search for name in the environment */
262 for (i = 0; i < Base.NumEnv && STRCASECMP (name, env->Name); i++, env++); /* [RPAP - Feb 97: WIN32 Port] */
263
264 if (i >= Base.NumEnv ||
265 (Base.Environment[i].Constraint && !Force))
266 return (-1);
267 Xfree (Base.Environment[i].Name);
268 if (Base.Environment[i].Data)
269 Xfree (Base.Environment[i].Data);
270 for (i++; i < Base.NumEnv; i++)
271 Base.Environment[i - 1] = Base.Environment[i];
272 Base.NumEnv--;
273 if (Base.NumEnv == 0)
274 {
275 Xfree (Base.Environment);
276 Base.Environment = NULL;
277 }
278 return (0);
279}
280
281
282
283
284
285
286
287
288/*
289 * Returns the name of environment variable number i or NULL if i is greater
290 * than or equal to the number of environment variables. Names start from
291 * zero.
292 */
293char *
294GetEnvName (int i)
295{
296 return (i < Base.NumEnv ? Base.Environment[i].Name : NULL);
297}
298
299static char *BooleanStrs[] =
300{"false", "true", "no", "yes", "off", "on"};
301
302static char *
303BooleanCons (char *Old, char *New)
304{
305 int i;
306 int old = -1, new = -1;
307 if (Old)
308 for (i = 0; i < sizeof (BooleanStrs) / sizeof (char *); i++)
309 if (!STRCASECMP (Old, BooleanStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
310 {
311 old = i;
312 break;
313 }
314 if (New)
315 for (i = 0; i < sizeof (BooleanStrs) / sizeof (char *); i++)
316 if (!STRCASECMP (New, BooleanStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
317 {
318 new = i;
319 break;
320 }
321 if (new >= 0)
322 return (New);
323 if (!New)
324 return (BooleanStrs[Old ? old ^ 1 : 0]);
325 ConstraintErrorStr = "Invalid argument [true|false|yes|no|on|off] required";
326 return (NULL);
327}
328
329/*
330 * returns 0 or 1 for a boolean string
331 * or default on a error
332 */
333int
334BooleanEnv (char *data, int def)
335{
336 int i;
337 if (!data)
338 return (def);
339 for (i = 0; i < sizeof (BooleanStrs) / sizeof (char *); i++)
340 if (!STRCASECMP (data, BooleanStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
341 return (i & 1);
342 return (def);
343}
344
345/*
346 * returns the value for a integer string
347 * or default on a error
348 */
349long
350IntEnv (char *data, long def)
351{
352 long val;
353 char *ptr;
354 if (!data)
355 return (def);
356 val = strtol (data, &ptr, 10);
357 return *ptr ? def : val;
358}
359
360static char *
361NumberCmp (char *num, int min, int max)
362{
363 char *err;
364 int val;
365 static char Err[100];
366 ConstraintErrorStr = "Not a valid number";
367 if (!num)
368 return (NULL);
369 val = strtol (num, &err, 10);
370 if (*err)
371 return (NULL);
372 sprintf (Err, "Not in legal range [%d <= num <= %d]", min, max);
373 ConstraintErrorStr = Err;
374 if (val < min || val > max)
375 return (NULL);
376 return (num);
377}
378
379/*ARGSUSED */
380static char *
381MaxDocsCons (char *Old, char *New)
382{
383 if (!STRCASECMP (New, "all")) /* [RPAP - Feb 97: WIN32 Port] */
384 return (New);
385 return (NumberCmp (New, 1, ((unsigned) (~0)) >> 1));
386}
387
388/*ARGSUSED */
389static char *
390MaxTermsCons (char *Old, char *New)
391{
392 if (!STRCASECMP (New, "all")) /* [RPAP - Feb 97: WIN32 Port] */
393 return (New);
394 return (NumberCmp (New, 1, ((unsigned) (~0)) >> 1));
395}
396
397/* ARGSUSED */
398static char *
399BufferCons (char *Old, char *New)
400{
401 return (NumberCmp (New, 0, 16 * 1024 * 1024));
402}
403
404/* ARGSUSED */
405static char *
406MaxNodesCons (char *Old, char *New)
407{
408 if (!STRCASECMP (New, "all")) /* [RPAP - Feb 97: WIN32 Port] */
409 return (New);
410 return (NumberCmp (New, 8, 256 * 1024 * 1024));
411}
412
413/* ARGSUSED */
414static char *
415MaxHashCons (char *Old, char *New)
416{
417 return (NumberCmp (New, 8, 256 * 1024 * 1024));
418}
419
420/* ARGSUSED */
421static char *
422MaxParasCons (char *Old, char *New)
423{
424 return (NumberCmp (New, 1, 256 * 1024 * 1024));
425}
426
427/* ARGSUSED */
428static char *
429MaxHeadsCons (char *Old, char *New)
430{
431 return (NumberCmp (New, 1, 1000));
432}
433
434/* ARGSUSED */
435char *
436OptimiseCons (char *Old, char *New)
437{
438 return (NumberCmp (New, 0, 2));
439}
440
441/* ARGSUSED */
442char *
443MaxNumericCons (char *Old, char *New)
444{
445 return (NumberCmp (New, 4, 512));
446}
447
448
449/*
450 * Makes sure that New is a valid query type
451 *
452 */
453/* ARGSUSED */
454static char *
455QueryCons (char *Old, char *New)
456{
457 static char *QueryStrs[] =
458 {
459 "boolean", "ranked",
460 "docnums", "approx-ranked"};
461 int i;
462 int new = -1;
463 if (New)
464 for (i = 0; i < sizeof (QueryStrs) / sizeof (char *); i++)
465 if (!STRCASECMP (New, QueryStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
466 {
467 new = i;
468 break;
469 }
470 if (new >= 0)
471 return (New);
472 ConstraintErrorStr = "Invalid argument [boolean|ranked|docnums|approx-ranked] required";
473 return (NULL);
474}
475
476/*
477 * Makes sure that New is a valid accum type
478 *
479 */
480/* ARGSUSED */
481static char *
482AccumCons (char *Old, char *New)
483{
484 static char *AccumStrs[] =
485 {
486 "array", "splay_tree", "hash_table", "list"};
487 int i;
488 int new = -1;
489 if (New)
490 for (i = 0; i < sizeof (AccumStrs) / sizeof (char *); i++)
491 if (!STRCASECMP (New, AccumStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
492 {
493 new = i;
494 break;
495 }
496 if (new >= 0)
497 return (New);
498 ConstraintErrorStr = "Invalid argument "
499 "[array|splay_tree|hash_table|list] required";
500 return (NULL);
501}
502
503
504
505/*
506 * Makes sure that New is a valid output type type
507 *
508 */
509/* ARGSUSED */
510static char *
511OutputTypeCons (char *Old, char *New)
512{
513 static char *OutputTypeStrs[] =
514 {"text", "silent", "docnums",
515 "count", "heads", "hilite"
516#ifdef TREC_MODE
517 ,"extras_for_trec"
518#endif
519 };
520
521 int i;
522 int new = -1;
523 if (New)
524 for (i = 0; i < sizeof (OutputTypeStrs) / sizeof (char *); i++)
525 if (!STRCASECMP (New, OutputTypeStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
526 {
527 new = i;
528 break;
529 }
530 if (new >= 0)
531 return (New);
532 ConstraintErrorStr = "Invalid argument [text|silent|docnums|heads|count|hilite"
533#ifdef TREC_MODE
534 "|extras_for_trec"
535#endif
536 "] required";
537 return (NULL);
538}
539
540/*
541 * Makes sure that New is a valid query type
542 *
543 */
544/* ARGSUSED */
545char *
546HiliteStyleCons (char *Old, char *New)
547{
548 static char *StyleStrs[] =
549 {"bold", "underline"};
550 int i;
551 int new = -1;
552 if (New)
553 for (i = 0; i < sizeof (StyleStrs) / sizeof (char *); i++)
554 if (!STRCASECMP (New, StyleStrs[i])) /* [RPAP - Feb 97: WIN32 Port] */
555 {
556 new = i;
557 break;
558 }
559 if (new >= 0)
560 return (New);
561 ConstraintErrorStr = "Invalid argument [bold|underline] required";
562 return (NULL);
563}
564
565/* =========================================================================
566 * Function: get_output_type
567 * Description:
568 * Map a mode string onto a mode char e.g. OUTPUT_????
569 * Input:
570 * Output:
571 * The mode char which is a subrange value.
572 * ========================================================================= */
573
574char
575get_output_type (void)
576{
577 char *env_str = GetDefEnv ("mode", "text");
578 char ch1 = toupper (env_str[0]);
579 char ch2 = toupper (env_str[1]);
580
581 switch (ch1)
582 {
583 case 'T':
584 return OUTPUT_TEXT;
585 case 'S':
586 return OUTPUT_SILENT;
587 case 'E':
588 return OUTPUT_EXTRAS;
589 case 'C':
590 return OUTPUT_COUNT;
591 case 'D':
592 return OUTPUT_DOCNUMS;
593 case 'H':
594 if (ch2 == 'E')
595 return OUTPUT_HEADERS;
596 else
597 return OUTPUT_HILITE;
598 default:
599 FatalError (1, "Problem in output type switch");
600 }
601
602 return '\0'; /*shouldn't reach here -- keep compiler happy */
603}
604
605/* =========================================================================
606 * Function: get_query_type
607 * Description:
608 * Map a query type string onto a char.
609 * Input:
610 * Output:
611 * Query type char.
612 * ========================================================================= */
613
614char
615get_query_type (void)
616{
617 char *env_str = GetDefEnv ("query", "boolean");
618 char ch1 = toupper (env_str[0]);
619
620 switch (ch1)
621 {
622 case 'R':
623 return QUERY_RANKED;
624 case 'A':
625 return QUERY_APPROX;
626 case 'B':
627 return QUERY_BOOLEAN;
628 case 'D':
629 return QUERY_DOCNUMS;
630 default:
631 FatalError (1, "Problem in query type switch");
632 }
633
634 return '\0'; /*shouldn't reach here */
635}
636
637
638
639/*
640 * This initialises certain environment variables
641 *
642 */
643void
644InitEnv (void)
645{
646 SetEnv ("hilite_style", "underline", HiliteStyleCons); /*[TS:Sep/94] */
647 SetEnv ("briefstats", "off", BooleanCons);
648 SetEnv ("diskstats", "off", BooleanCons);
649 SetEnv ("expert", "false", BooleanCons);
650 SetEnv ("mgdir", getenv ("MGDATA") ? getenv ("MGDATA") : ".", NULL);
651 SetEnv ("mgname", "", NULL);
652 SetEnv ("maxdocs", "all", MaxDocsCons);
653 SetEnv ("memstats", "off", BooleanCons);
654 SetEnv ("mode", "text", OutputTypeCons);
655 SetEnv ("pager", getenv ("PAGER") ? getenv ("PAGER") : "more", NULL);
656 SetEnv ("qfreq", "true", BooleanCons);
657 SetEnv ("query", "boolean", QueryCons);
658 SetEnv ("sizestats", "off", BooleanCons);
659 SetEnv ("timestats", "off", BooleanCons);
660 SetEnv ("verbatim", "off", BooleanCons);
661 SetEnv ("sorted_terms", "on", BooleanCons);
662 SetEnv ("accumulator_method", "array", AccumCons);
663 SetEnv ("stop_at_max_accum", "off", BooleanCons);
664 SetEnv ("buffer", "1048576", BufferCons);
665 SetEnv ("max_accumulators", "50000", MaxNodesCons);
666 SetEnv ("max_terms", "all", MaxTermsCons);
667 SetEnv ("maxparas", "1000", MaxParasCons);
668 SetEnv ("hash_tbl_size", "1000", MaxHashCons);
669 SetEnv ("skip_dump", "skips.%d", NULL);
670 SetEnv ("ranked_doc_sepstr", "---------------------------------- %n %w\\n", NULL);
671 SetEnv ("doc_sepstr", "---------------------------------- %n\\n", NULL);
672 SetEnv ("para_sepstr", "\\n######## PARAGRAPH %n ########\\n", NULL);
673 SetEnv ("para_start", "***** Weight = %w *****\\n", NULL);
674 SetEnv ("terminator", "", NULL);
675 SetEnv ("heads_length", "50", MaxHeadsCons);
676 SetEnv ("optimise_type", "1", OptimiseCons); /*[TS:Mar/95] */
677 SetEnv ("casefold", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */
678 SetEnv ("stem", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */
679 SetEnv ("term_freq", "off", BooleanCons); /* [RPAP - Feb 97: Term Frequency] */
680 SetEnv ("maxnumeric", "4", MaxNumericCons); /* [sjboddie - Jun 2002: Max Numeric word length] */
681}
682
683
684void
685UninitEnv (void)
686{
687 char *name;
688 while (PopEnv () == 0);
689 while ((name = GetEnvName (0)) != 0)
690 UnsetEnv (name, 1);
691}
Note: See TracBrowser for help on using the repository browser.