Changeset 4191
- Timestamp:
- 2003-04-18T13:41:46+12:00 (21 years ago)
- Location:
- trunk/gsdl/packages/mg/src/text
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/packages/mg/src/text/Makefile.in
r3762 r4191 179 179 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 180 180 181 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o 181 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o environment$o 182 182 183 183 mg_hilite_words: $(HILITE_OBJS) … … 192 192 BOOL_OBJS = bool_tree$o bool_parser$o bool_optimiser$o bool_tester$o \ 193 193 term_lists$o stemmer$o stem_search$o mg_errors$o query_term_list$o \ 194 words$o 194 words$o environment$o 195 195 196 196 bool_tester: $(BOOL_OBJS) … … 218 218 text.pass2$o locallib$o \ 219 219 ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o \ 220 words$o 220 words$o environment$o 221 221 222 222 mg_passes: $(PASSES_OBJS) -
trunk/gsdl/packages/mg/src/text/WIN32.MAK
r2487 r4191 91 91 all: $(EXEC) libtextin.lib 92 92 93 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o 93 HILITE_OBJS = mg_hilite_words$o stemmer$o locallib$o words$o environment$o 94 94 mg_hilite_words$e: $(HILITE_OBJS) 95 95 $(LINK) $(HILITE_OBJS) $(LIBS) … … 97 97 BOOL_OBJS = bool_tree$o bool_parser$o bool_optimiser$o bool_tester$o \ 98 98 term_lists$o stemmer$o stem_search$o mg_errors$o query_term_list$o \ 99 words$o 99 words$o environment$o 100 100 bool_tester$e: $(BOOL_OBJS) 101 101 $(LINK) $(BOOL_OBJS) $(LIBS) … … 117 117 text.pass2$o locallib$o \ 118 118 ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o \ 119 words$o 119 words$o environment$o 120 120 mg_passes$e: $(PASSES_OBJS) 121 121 $(LINK) $(PASSES_OBJS) $(LIBS) … … 124 124 words$o mgpass$o text.pass1$o comp_dict$o stemmer$o \ 125 125 text.pass2$o locallib$o \ 126 ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o 126 ivf.pass1$o ivf.pass2$o mg.special$o mg_files$o environment$o 127 127 mgpass$e: $(PASS_OBJS) 128 128 $(LINK) $(PASS_OBJS) $(LIBS) -
trunk/gsdl/packages/mg/src/text/environment.c
r439 r4191 24 24 /* 25 25 $Log$ 26 Revision 1.2 2003/04/18 01:41:46 sjboddie 27 Added a new -M option to mg_passes, allowing "maxnumeric" to be altered. 28 26 29 Revision 1.1 1999/08/10 21:17:50 sjboddie 27 30 renamed mg-1.3d directory mg … … 433 436 } 434 437 438 /* ARGSUSED */ 439 char * 440 MaxNumericCons (char *Old, char *New) 441 { 442 return (NumberCmp (New, 4, 512)); 443 } 435 444 436 445 … … 666 675 SetEnv ("stem", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */ 667 676 SetEnv ("term_freq", "off", BooleanCons); /* [RPAP - Feb 97: Term Frequency] */ 677 SetEnv ("maxnumeric", "4", MaxNumericCons); /* [sjboddie - Jun 2002: Max Numeric word length] */ 668 678 } 669 679 -
trunk/gsdl/packages/mg/src/text/mg_passes.c
r2746 r4191 40 40 #include "stemmer.h" 41 41 42 #include "words.h" 42 43 43 44 /* 44 45 $Log$ 46 Revision 1.4 2003/04/18 01:41:46 sjboddie 47 Added a new -M option to mg_passes, allowing "maxnumeric" to be altered. 48 45 49 Revision 1.3 2001/09/21 12:46:42 kjm18 46 50 updated mg to be in line with mg_1.3f. Now uses long long for some variables … … 152 156 " %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n" 153 157 " %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n" 154 " %*s [-a stemmer] -f doc-collection-name\n"; 155 156 157 158 159 160 161 158 " %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n"; 162 159 163 160 … … 490 487 491 488 opterr = 0; 492 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a: ")) != -1)489 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1) 493 490 { 494 491 switch (ch) … … 561 558 case 't': 562 559 trace = (unsigned long) (atof (optarg) * 1024 * 1024); 560 break; 561 case 'M': 562 SetEnv ("maxnumeric", optarg, NULL); 563 563 break; 564 564 case 'h': -
trunk/gsdl/packages/mg/src/text/words.h
r439 r4191 22 22 **************************************************************************/ 23 23 24 24 25 #include "sysfuncs.h" 25 26 26 #include "unitool.h" 27 28 27 29 28 /* … … 51 50 of the program, , i.e., leave MAXSTEMLEN alone... */ 52 51 53 #define MAXNUMERIC 4 52 /*#define MAXNUMERIC 4*/ 53 54 54 /* Maximum number of numeric characters permitted in a word. 55 55 This avoids long sequences of numbers creating just one … … 100 100 register int numeric = 0; \ 101 101 unsigned short c; \ 102 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 102 103 \ 103 104 charlength = parse_utf8_char((s_in),(end),&c); \ … … 105 106 while (length+charlength <= MAXWORDLEN && charlength > 0 && \ 106 107 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 107 ++numeric <= MAXNUMERIC))) { \108 ++numeric <= maxnumeric))) { \ 108 109 while (charlength-- > 0) { \ 109 110 *wptr++ = *(s_in)++; length++; \ … … 197 198 register int numeric = 0; \ 198 199 unsigned short c; \ 200 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 199 201 \ 200 202 charlength = parse_utf8_char((s_in),(end),&c); \ … … 202 204 while (length+charlength <= MAXSTEMLEN && charlength > 0 && \ 203 205 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 204 ++numeric <= MAXNUMERIC))) { \206 ++numeric <= maxnumeric))) { \ 205 207 while (charlength-- > 0) { \ 206 208 *wptr++ = *(s_in)++; length++; \ … … 210 212 *(Word) = length; \ 211 213 }while(0) 212 213 214 /* 214 215 #define PARSE_STEM_WORD(Word, s_in, end) \ … … 385 386 } while (0) 386 387 */ 387
Note:
See TracChangeset
for help on using the changeset viewer.