Changeset 7228
- Timestamp:
- 2004-04-26T11:01:18+12:00 (20 years ago)
- Location:
- trunk
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/packages/mg/src/text/environment.c
r3745 r7228 24 24 /* 25 25 $Log$ 26 Revision 1.2 2004/04/25 23:01:18 kjdon 27 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 28 26 29 Revision 1.1 2003/02/20 21:18:23 mdewsnip 27 30 Addition of MG package for search and retrieval … … 436 439 } 437 440 441 /* ARGSUSED */ 442 char * 443 MaxNumericCons (char *Old, char *New) 444 { 445 return (NumberCmp (New, 4, 512)); 446 } 438 447 439 448 … … 669 678 SetEnv ("stem", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */ 670 679 SetEnv ("term_freq", "off", BooleanCons); /* [RPAP - Feb 97: Term Frequency] */ 680 SetEnv ("maxnumeric", "4", MaxNumericCons); /* [sjboddie - Jun 2002: Max Numeric word length] */ 671 681 } 672 682 -
trunk/gsdl3/packages/mg/src/text/mg_passes.c
r3745 r7228 40 40 #include "stemmer.h" 41 41 42 #include "words.h" 42 43 43 44 /* 44 45 $Log$ 46 Revision 1.2 2004/04/25 23:01:18 kjdon 47 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 48 45 49 Revision 1.1 2003/02/20 21:18:24 mdewsnip 46 50 Addition of MG package for search and retrieval … … 155 159 " %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n" 156 160 " %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n" 157 " %*s [-a stemmer] -f doc-collection-name\n"; 158 159 160 161 162 163 164 161 " %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n"; 165 162 166 163 … … 493 490 494 491 opterr = 0; 495 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a: ")) != -1)492 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1) 496 493 { 497 494 switch (ch) … … 564 561 case 't': 565 562 trace = (unsigned long) (atof (optarg) * 1024 * 1024); 563 break; 564 case 'M': 565 SetEnv ("maxnumeric", optarg, NULL); 566 566 break; 567 567 case 'h': -
trunk/gsdl3/packages/mg/src/text/words.h
r3745 r7228 22 22 **************************************************************************/ 23 23 24 24 25 #include "sysfuncs.h" 25 26 26 #include "unitool.h" 27 28 27 29 28 /* … … 51 50 of the program, , i.e., leave MAXSTEMLEN alone... */ 52 51 53 #define MAXNUMERIC 4 52 /*#define MAXNUMERIC 4*/ 53 54 54 /* Maximum number of numeric characters permitted in a word. 55 55 This avoids long sequences of numbers creating just one … … 100 100 register int numeric = 0; \ 101 101 unsigned short c; \ 102 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 102 103 \ 103 104 charlength = parse_utf8_char((s_in),(end),&c); \ … … 105 106 while (length+charlength <= MAXWORDLEN && charlength > 0 && \ 106 107 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 107 ++numeric <= MAXNUMERIC))) { \108 ++numeric <= maxnumeric))) { \ 108 109 while (charlength-- > 0) { \ 109 110 *wptr++ = *(s_in)++; length++; \ … … 197 198 register int numeric = 0; \ 198 199 unsigned short c; \ 200 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 199 201 \ 200 202 charlength = parse_utf8_char((s_in),(end),&c); \ … … 202 204 while (length+charlength <= MAXSTEMLEN && charlength > 0 && \ 203 205 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 204 ++numeric <= MAXNUMERIC))) { \206 ++numeric <= maxnumeric))) { \ 205 207 while (charlength-- > 0) { \ 206 208 *wptr++ = *(s_in)++; length++; \ … … 210 212 *(Word) = length; \ 211 213 }while(0) 212 213 214 /* 214 215 #define PARSE_STEM_WORD(Word, s_in, end) \ … … 385 386 } while (0) 386 387 */ 387 -
trunk/gsdl3/src/packages/mg/src/text/environment.c
r3745 r7228 24 24 /* 25 25 $Log$ 26 Revision 1.2 2004/04/25 23:01:18 kjdon 27 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 28 26 29 Revision 1.1 2003/02/20 21:18:23 mdewsnip 27 30 Addition of MG package for search and retrieval … … 436 439 } 437 440 441 /* ARGSUSED */ 442 char * 443 MaxNumericCons (char *Old, char *New) 444 { 445 return (NumberCmp (New, 4, 512)); 446 } 438 447 439 448 … … 669 678 SetEnv ("stem", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */ 670 679 SetEnv ("term_freq", "off", BooleanCons); /* [RPAP - Feb 97: Term Frequency] */ 680 SetEnv ("maxnumeric", "4", MaxNumericCons); /* [sjboddie - Jun 2002: Max Numeric word length] */ 671 681 } 672 682 -
trunk/gsdl3/src/packages/mg/src/text/mg_passes.c
r3745 r7228 40 40 #include "stemmer.h" 41 41 42 #include "words.h" 42 43 43 44 /* 44 45 $Log$ 46 Revision 1.2 2004/04/25 23:01:18 kjdon 47 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 48 45 49 Revision 1.1 2003/02/20 21:18:24 mdewsnip 46 50 Addition of MG package for search and retrieval … … 155 159 " %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n" 156 160 " %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n" 157 " %*s [-a stemmer] -f doc-collection-name\n"; 158 159 160 161 162 163 164 161 " %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n"; 165 162 166 163 … … 493 490 494 491 opterr = 0; 495 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a: ")) != -1)492 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1) 496 493 { 497 494 switch (ch) … … 564 561 case 't': 565 562 trace = (unsigned long) (atof (optarg) * 1024 * 1024); 563 break; 564 case 'M': 565 SetEnv ("maxnumeric", optarg, NULL); 566 566 break; 567 567 case 'h': -
trunk/gsdl3/src/packages/mg/src/text/words.h
r3745 r7228 22 22 **************************************************************************/ 23 23 24 24 25 #include "sysfuncs.h" 25 26 26 #include "unitool.h" 27 28 27 29 28 /* … … 51 50 of the program, , i.e., leave MAXSTEMLEN alone... */ 52 51 53 #define MAXNUMERIC 4 52 /*#define MAXNUMERIC 4*/ 53 54 54 /* Maximum number of numeric characters permitted in a word. 55 55 This avoids long sequences of numbers creating just one … … 100 100 register int numeric = 0; \ 101 101 unsigned short c; \ 102 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 102 103 \ 103 104 charlength = parse_utf8_char((s_in),(end),&c); \ … … 105 106 while (length+charlength <= MAXWORDLEN && charlength > 0 && \ 106 107 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 107 ++numeric <= MAXNUMERIC))) { \108 ++numeric <= maxnumeric))) { \ 108 109 while (charlength-- > 0) { \ 109 110 *wptr++ = *(s_in)++; length++; \ … … 197 198 register int numeric = 0; \ 198 199 unsigned short c; \ 200 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 199 201 \ 200 202 charlength = parse_utf8_char((s_in),(end),&c); \ … … 202 204 while (length+charlength <= MAXSTEMLEN && charlength > 0 && \ 203 205 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 204 ++numeric <= MAXNUMERIC))) { \206 ++numeric <= maxnumeric))) { \ 205 207 while (charlength-- > 0) { \ 206 208 *wptr++ = *(s_in)++; length++; \ … … 210 212 *(Word) = length; \ 211 213 }while(0) 212 213 214 /* 214 215 #define PARSE_STEM_WORD(Word, s_in, end) \ … … 385 386 } while (0) 386 387 */ 387 -
trunk/indexers/mg/src/text/environment.c
r3745 r7228 24 24 /* 25 25 $Log$ 26 Revision 1.2 2004/04/25 23:01:18 kjdon 27 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 28 26 29 Revision 1.1 2003/02/20 21:18:23 mdewsnip 27 30 Addition of MG package for search and retrieval … … 436 439 } 437 440 441 /* ARGSUSED */ 442 char * 443 MaxNumericCons (char *Old, char *New) 444 { 445 return (NumberCmp (New, 4, 512)); 446 } 438 447 439 448 … … 669 678 SetEnv ("stem", "on", BooleanCons); /* [RPAP - Jan 97: Stem Index Change] */ 670 679 SetEnv ("term_freq", "off", BooleanCons); /* [RPAP - Feb 97: Term Frequency] */ 680 SetEnv ("maxnumeric", "4", MaxNumericCons); /* [sjboddie - Jun 2002: Max Numeric word length] */ 671 681 } 672 682 -
trunk/indexers/mg/src/text/mg_passes.c
r3745 r7228 40 40 #include "stemmer.h" 41 41 42 #include "words.h" 42 43 43 44 /* 44 45 $Log$ 46 Revision 1.2 2004/04/25 23:01:18 kjdon 47 added a new -M option to mg_passes, allowing maxnumeric to be altered - made this change to keep gsdl3 mg inline with gsdl2 mg. 48 45 49 Revision 1.1 2003/02/20 21:18:24 mdewsnip 46 50 Addition of MG package for search and retrieval … … 155 159 " %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n" 156 160 " %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n" 157 " %*s [-a stemmer] -f doc-collection-name\n"; 158 159 160 161 162 163 164 161 " %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n"; 165 162 166 163 … … 493 490 494 491 opterr = 0; 495 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a: ")) != -1)492 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1) 496 493 { 497 494 switch (ch) … … 564 561 case 't': 565 562 trace = (unsigned long) (atof (optarg) * 1024 * 1024); 563 break; 564 case 'M': 565 SetEnv ("maxnumeric", optarg, NULL); 566 566 break; 567 567 case 'h': -
trunk/indexers/mg/src/text/words.h
r3745 r7228 22 22 **************************************************************************/ 23 23 24 24 25 #include "sysfuncs.h" 25 26 26 #include "unitool.h" 27 28 27 29 28 /* … … 51 50 of the program, , i.e., leave MAXSTEMLEN alone... */ 52 51 53 #define MAXNUMERIC 4 52 /*#define MAXNUMERIC 4*/ 53 54 54 /* Maximum number of numeric characters permitted in a word. 55 55 This avoids long sequences of numbers creating just one … … 100 100 register int numeric = 0; \ 101 101 unsigned short c; \ 102 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 102 103 \ 103 104 charlength = parse_utf8_char((s_in),(end),&c); \ … … 105 106 while (length+charlength <= MAXWORDLEN && charlength > 0 && \ 106 107 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 107 ++numeric <= MAXNUMERIC))) { \108 ++numeric <= maxnumeric))) { \ 108 109 while (charlength-- > 0) { \ 109 110 *wptr++ = *(s_in)++; length++; \ … … 197 198 register int numeric = 0; \ 198 199 unsigned short c; \ 200 register int maxnumeric = IntEnv (GetEnv ("maxnumeric"), 4); \ 199 201 \ 200 202 charlength = parse_utf8_char((s_in),(end),&c); \ … … 202 204 while (length+charlength <= MAXSTEMLEN && charlength > 0 && \ 203 205 (is_unicode_letter(c) || (is_unicode_digit(c) && \ 204 ++numeric <= MAXNUMERIC))) { \206 ++numeric <= maxnumeric))) { \ 205 207 while (charlength-- > 0) { \ 206 208 *wptr++ = *(s_in)++; length++; \ … … 210 212 *(Word) = length; \ 211 213 }while(0) 212 213 214 /* 214 215 #define PARSE_STEM_WORD(Word, s_in, end) \ … … 385 386 } while (0) 386 387 */ 387
Note:
See TracChangeset
for help on using the changeset viewer.