Changeset 7228 for trunk/indexers/mg


Ignore:
Timestamp:
2004-04-26T11:01:18+12:00 (20 years ago)
Author:
kjdon
Message:

added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg.

Location:
trunk/indexers/mg/src/text
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/indexers/mg/src/text/environment.c

    r3745 r7228  
    2424/*
    2525   $Log$
     26   Revision 1.2  2004/04/25 23:01:18  kjdon
     27   added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg.
     28
    2629   Revision 1.1  2003/02/20 21:18:23  mdewsnip
    2730   Addition of MG package for search and retrieval
     
    436439}
    437440
     441/* ARGSUSED */
     442char *
     443MaxNumericCons (char *Old, char *New)
     444{
     445  return (NumberCmp (New, 4, 512));
     446}
    438447
    439448
     
    669678  SetEnv ("stem", "on", BooleanCons);  /* [RPAP - Jan 97: Stem Index Change] */
    670679  SetEnv ("term_freq", "off", BooleanCons);  /* [RPAP - Feb 97: Term Frequency] */
     680  SetEnv ("maxnumeric", "4", MaxNumericCons);  /* [sjboddie - Jun 2002: Max Numeric word length] */
    671681}
    672682
  • trunk/indexers/mg/src/text/mg_passes.c

    r3745 r7228  
    4040#include "stemmer.h"
    4141
     42#include "words.h"
    4243
    4344/*
    4445   $Log$
     46   Revision 1.2  2004/04/25 23:01:18  kjdon
     47   added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg.
     48
    4549   Revision 1.1  2003/02/20 21:18:24  mdewsnip
    4650   Addition of MG package for search and retrieval
     
    155159"  %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n"
    156160"  %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n"
    157 "  %*s [-a stemmer] -f doc-collection-name\n";
    158 
    159 
    160 
    161 
    162 
    163 
    164 
     161"  %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n";
    165162
    166163
     
    493490
    494491  opterr = 0;
    495   while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:")) != -1)
     492  while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1)
    496493    {
    497494      switch (ch)
     
    564561    case 't':
    565562      trace = (unsigned long) (atof (optarg) * 1024 * 1024);
     563      break;
     564    case 'M':
     565      SetEnv ("maxnumeric", optarg, NULL);
    566566      break;
    567567    case 'h':
  • trunk/indexers/mg/src/text/words.h

    r3745 r7228  
    2222 **************************************************************************/
    2323
     24
    2425#include "sysfuncs.h"
    25 
    2626#include "unitool.h"
    27 
    2827
    2928/*
     
    5150       of the program, , i.e., leave MAXSTEMLEN alone... */
    5251
    53 #define MAXNUMERIC  4
     52/*#define MAXNUMERIC    4*/
     53
    5454    /* Maximum number of numeric characters permitted in a word.
    5555       This avoids long sequences of numbers creating just one
     
    100100    register int numeric = 0;                                      \
    101101    unsigned short c;                                              \
     102    register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4);   \
    102103                                                                   \
    103104    charlength = parse_utf8_char((s_in),(end),&c);                 \
     
    105106    while (length+charlength <= MAXWORDLEN && charlength > 0 &&    \
    106107       (is_unicode_letter(c) || (is_unicode_digit(c) &&        \
    107                      ++numeric <= MAXNUMERIC))) {  \
     108                     ++numeric <= maxnumeric))) {  \
    108109      while (charlength-- > 0) {                                   \
    109110        *wptr++ = *(s_in)++; length++;                             \
     
    197198    register int numeric = 0;                                      \
    198199    unsigned short c;                                              \
     200    register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4);   \
    199201                                                                   \
    200202    charlength = parse_utf8_char((s_in),(end),&c);                 \
     
    202204    while (length+charlength <= MAXSTEMLEN && charlength > 0 &&    \
    203205       (is_unicode_letter(c) || (is_unicode_digit(c) &&        \
    204                      ++numeric <= MAXNUMERIC))) {  \
     206                     ++numeric <= maxnumeric))) {  \
    205207      while (charlength-- > 0) {                                   \
    206208        *wptr++ = *(s_in)++; length++;                             \
     
    210212    *(Word) = length;                                              \
    211213  }while(0)
    212 
    213214    /*
    214215#define PARSE_STEM_WORD(Word, s_in, end)                      \
     
    385386  } while (0)
    386387    */
    387 
Note: See TracChangeset for help on using the changeset viewer.