Changeset 4191


Ignore:
Timestamp:
2003-04-18T13:41:46+12:00 (21 years ago)
Author:
sjboddie
Message:

Added a new -M option to mg_passes, allowing "maxnumeric" to be altered.

Location:
trunk/gsdl/packages/mg/src/text
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/packages/mg/src/text/Makefile.in

    r3762 r4191  
    179179# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    180180
    181 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o
     181HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o environment$o
    182182
    183183mg_hilite_words: $(HILITE_OBJS)
     
    192192BOOL_OBJS = bool_tree$o bool_parser$o bool_optimiser$o bool_tester$o \
    193193            term_lists$o stemmer$o stem_search$o mg_errors$o query_term_list$o \
    194         words$o
     194        words$o environment$o
    195195
    196196bool_tester: $(BOOL_OBJS)
     
    218218    text.pass2$o locallib$o \
    219219    ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o \
    220     words$o
     220    words$o environment$o
    221221
    222222mg_passes: $(PASSES_OBJS)
  • trunk/gsdl/packages/mg/src/text/WIN32.MAK

    r2487 r4191  
    9191all:    $(EXEC) libtextin.lib
    9292
    93 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o
     93HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o environment$o
    9494mg_hilite_words$e: $(HILITE_OBJS)
    9595    $(LINK) $(HILITE_OBJS) $(LIBS)
     
    9797BOOL_OBJS = bool_tree$o bool_parser$o bool_optimiser$o bool_tester$o \
    9898        term_lists$o stemmer$o stem_search$o mg_errors$o query_term_list$o \
    99         words$o
     99        words$o environment$o
    100100bool_tester$e: $(BOOL_OBJS)
    101101    $(LINK) $(BOOL_OBJS) $(LIBS)
     
    117117    text.pass2$o locallib$o \
    118118    ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o \
    119     words$o
     119    words$o environment$o
    120120mg_passes$e: $(PASSES_OBJS)
    121121    $(LINK) $(PASSES_OBJS) $(LIBS)
     
    124124    words$o mgpass$o text.pass1$o comp_dict$o stemmer$o \
    125125    text.pass2$o locallib$o \
    126     ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o
     126    ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o environment$o
    127127mgpass$e: $(PASS_OBJS)
    128128    $(LINK) $(PASS_OBJS) $(LIBS)
  • trunk/gsdl/packages/mg/src/text/environment.c

    r439 r4191  
    2424/*
    2525   $Log$
     26   Revision 1.2  2003/04/18 01:41:46  sjboddie
     27   Added a new -M option to mg_passes, allowing "maxnumeric" to be altered.
     28
    2629   Revision 1.1  1999/08/10 21:17:50  sjboddie
    2730   renamed mg-1.3d directory mg
     
    433436}
    434437
     438/* ARGSUSED */
     439char *
     440MaxNumericCons (char *Old, char *New)
     441{
     442  return (NumberCmp (New, 4, 512));
     443}
    435444
    436445
     
    666675  SetEnv ("stem", "on", BooleanCons);  /* [RPAP - Jan 97: Stem Index Change] */
    667676  SetEnv ("term_freq", "off", BooleanCons);  /* [RPAP - Feb 97: Term Frequency] */
     677  SetEnv ("maxnumeric", "4", MaxNumericCons);  /* [sjboddie - Jun 2002: Max Numeric word length] */
    668678}
    669679
  • trunk/gsdl/packages/mg/src/text/mg_passes.c

    r2746 r4191  
    4040#include "stemmer.h"
    4141
     42#include "words.h"
    4243
    4344/*
    4445   $Log$
     46   Revision 1.4  2003/04/18 01:41:46  sjboddie
     47   Added a new -M option to mg_passes, allowing "maxnumeric" to be altered.
     48
    4549   Revision 1.3  2001/09/21 12:46:42  kjm18
    4650   updated mg to be in line with mg_1.3f. Now uses long long for some variables
     
    152156"  %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n"
    153157"  %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n"
    154 "  %*s [-a stemmer] -f doc-collection-name\n";
    155 
    156 
    157 
    158 
    159 
    160 
    161 
     158"  %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n";
    162159
    163160
     
    490487
    491488  opterr = 0;
    492   while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:")) != -1)
     489  while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1)
    493490    {
    494491      switch (ch)
     
    561558    case 't':
    562559      trace = (unsigned long) (atof (optarg) * 1024 * 1024);
     560      break;
     561    case 'M':
     562      SetEnv ("maxnumeric", optarg, NULL);
    563563      break;
    564564    case 'h':
  • trunk/gsdl/packages/mg/src/text/words.h

    r439 r4191  
    2222 **************************************************************************/
    2323
     24
    2425#include "sysfuncs.h"
    25 
    2626#include "unitool.h"
    27 
    2827
    2928/*
     
    5150       of the program, , i.e., leave MAXSTEMLEN alone... */
    5251
    53 #define MAXNUMERIC  4
     52/*#define MAXNUMERIC    4*/
     53
    5454    /* Maximum number of numeric characters permitted in a word.
    5555       This avoids long sequences of numbers creating just one
     
    100100    register int numeric = 0;                                      \
    101101    unsigned short c;                                              \
     102    register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4);   \
    102103                                                                   \
    103104    charlength = parse_utf8_char((s_in),(end),&c);                 \
     
    105106    while (length+charlength <= MAXWORDLEN && charlength > 0 &&    \
    106107       (is_unicode_letter(c) || (is_unicode_digit(c) &&        \
    107                      ++numeric <= MAXNUMERIC))) {  \
     108                     ++numeric <= maxnumeric))) {  \
    108109      while (charlength-- > 0) {                                   \
    109110        *wptr++ = *(s_in)++; length++;                             \
     
    197198    register int numeric = 0;                                      \
    198199    unsigned short c;                                              \
     200    register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4);   \
    199201                                                                   \
    200202    charlength = parse_utf8_char((s_in),(end),&c);                 \
     
    202204    while (length+charlength <= MAXSTEMLEN && charlength > 0 &&    \
    203205       (is_unicode_letter(c) || (is_unicode_digit(c) &&        \
    204                      ++numeric <= MAXNUMERIC))) {  \
     206                     ++numeric <= maxnumeric))) {  \
    205207      while (charlength-- > 0) {                                   \
    206208        *wptr++ = *(s_in)++; length++;                             \
     
    210212    *(Word) = length;                                              \
    211213  }while(0)
    212 
    213214    /*
    214215#define PARSE_STEM_WORD(Word, s_in, end)                      \
     
    385386  } while (0)
    386387    */
    387 
Note: See TracChangeset for help on using the changeset viewer.