Changeset 7460 for trunk/gsdl3/src/packages/mg/src
- Timestamp:
- 2004-05-27T13:50:24+12:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/packages/mg/src/text/mg_passes_4jni.c
r7455 r7460 1 1 /************************************************************************** 2 2 * 3 * mg_passes .c -- Driver for the various passes3 * mg_passes_4jni.c -- Driver for the various passes 4 4 * Copyright (C) 1994 Neil Sharman 5 5 * … … 35 35 #include "stemmer.h" 36 36 37 38 37 #include "mg_files.h" 39 38 #include "mg.h" … … 113 112 }; 114 113 115 static char *usage_str = "\nUSAGE:\n" 116 " %s [-h] [-G] [-D] [-1|-2|-3] [-T1] [-T2] [-I1] [-I2] [-N1]\n" 117 " %*s [-N2] [-W] [-S] [-b buffer-size] [-d dictionary-directory]\n" 118 " %*s [-t trace-point Mb] [-m invf-memory] [-c chunk-limit]\n" 119 " %*s [-n trace-name] [-C comp-stat-size] [-s stem_method]\n" 120 " %*s [-a stemmer] [-M max-numeric] -f doc-collection-name\n"; 121 122 123 static void 124 usage (char *err) 125 { 126 if (err) 127 Message (err); 128 fprintf (stderr, usage_str, msg_prefix, strlen (msg_prefix), "", 129 strlen (msg_prefix), "",strlen (msg_prefix), "", 130 strlen (msg_prefix),""); 131 exit (1); 132 } 133 134 135 136 137 #if 0 138 static char * 139 str_comma (unsigned long u) 140 { 141 static char buf[20]; 142 unsigned long a, b, c, d; 143 a = u / 1000000000; 144 u -= a * 1000000000; 145 b = u / 1000000; 146 u -= b * 1000000; 147 c = u / 1000; 148 u -= c * 1000; 149 d = u; 150 151 if (a) 152 sprintf (buf, "%u,%03u,%03u,%03u", a, b, c, d); 153 else if (b) 154 sprintf (buf, "%u,%03u,%03u", b, c, d); 155 else if (c) 156 sprintf (buf, "%u,%03u", c, d); 157 else 158 sprintf (buf, "%u", d); 159 return (buf); 160 } 161 #endif 162 163 164 165 /* 166 int 167 open_next_file (int in_fd) 168 { 169 if (in_fd > 0) 170 close (in_fd); 171 if (num_files == 0) 172 return (-1); 173 if ((in_fd = open (files[0], O_RDONLY)) == -1) 174 FatalError (1, "Cannot open %s", files[0]); 175 files++; 176 num_files--; 177 return (in_fd); 178 } 179 */ 180 114 115 /* clear all the settings from one mg_passes run to the next */ 181 116 void clear_variables() { 182 117 … … 207 142 208 143 } 209 void set_invf_level(char level) { 210 211 switch (level) { 212 case '1': 213 InvfLevel = 1; 214 break; 215 case '2': 216 InvfLevel = 2; 217 break; 218 case '3': 219 InvfLevel = 3; 220 break; 221 } 222 223 } 224 void set_inversion_limit(int limit) { 225 invf_buffer_size = limit * 1024 * 1024; 226 } 227 228 void ignore_sgml_tags(int ignore) { 229 if (ignore) { 230 SkipSGML = 1; 231 } else { 232 SkipSGML = 0; 233 } 234 } 235 236 void set_buffer_size(long size) { 237 buf_size = size * 1024; 238 if (buf_size < MIN_BUF) { 239 buf_size = MIN_BUF; 240 } 241 } 242 243 void set_stem_options(char * stemmer, int method) { 244 stemmer_num = stemmernumber (stemmer); 245 stem_method = method & STEMMER_MASK; 246 247 } 248 249 void set_filename(char * filen) { 250 int len = strlen(filen); 251 if (filename) { 252 Xfree (filename); 253 filename = NULL; 254 } 255 filename = Xstrdup (filen); 256 // put this here for now 257 Dump=1; 258 trace = 512; 259 if (!trace_name) 260 trace_name = make_name (filename, TRACE_SUFFIX, NULL); 261 if (!(Trace = fopen (trace_name, "a"))) 262 Message ("Unable to open \"%s\". No tracing will be done.", trace_name); 263 else 264 setbuf (Trace, NULL); 265 266 } 267 268 144 145 /* ################################################## */ 146 /* the following are methods to set all the variables that used to be 147 set by command line args */ 148 149 /* -S, -T1, -T2, -I1, -I2, args to mg_passes */ 269 150 void add_pass (char pass_type, char pass_num) { 270 151 … … 291 172 break; 292 173 } 293 294 } 174 } 175 176 /* -D arg to mg_passes */ 177 void dump_failed_document(int dump) { 178 Dump = dump; 179 } 180 181 /* -G arg to mg_passes */ 182 void ignore_sgml_tags(int ignore) { 183 if (ignore) { 184 SkipSGML = 1; 185 } else { 186 SkipSGML = 0; 187 } 188 } 189 190 /* -b arg to mg_passes */ 191 void set_buffer_size(long size) { 192 buf_size = size * 1024; 193 if (buf_size < MIN_BUF) { 194 buf_size = MIN_BUF; 195 } 196 } 197 198 /* -c arg to mg_passes */ 199 void set_chunk_limit(long chunk_limit) { 200 ChunkLimit = chunk_limit; 201 } 202 203 /* -C arg to mg_passes */ 204 void set_comp_stat_point(int stat_point) { 205 comp_stat_point = stat_point * 1024; 206 } 207 208 /* -f arg to mg_passes */ 209 void set_filename(char * filen) { 210 int len = strlen(filen); 211 if (filename) { 212 Xfree (filename); 213 filename = NULL; 214 } 215 filename = Xstrdup (filen); 216 } 217 218 /* -m arg to mg_passes */ 219 void set_inversion_limit(int limit) { 220 invf_buffer_size = limit * 1024 * 1024; 221 } 222 223 /* -1, -2, -3 args to mg_passes */ 224 void set_invf_level(char level) { 225 switch (level) { 226 case '1': 227 InvfLevel = 1; 228 break; 229 case '2': 230 InvfLevel = 2; 231 break; 232 case '3': 233 InvfLevel = 3; 234 break; 235 } 236 } 237 238 /* -W arg to mg_passes */ 239 void set_make_weights(int make_w) { 240 MakeWeights = make_w; 241 } 242 243 /* -M arg to mg_passes */ 244 void set_max_numeric(int max_numeric) { 245 SetEnv ("maxnumeric", max_numeric, NULL); 246 } 247 248 /* -a, -s args to mg_passes */ 249 void set_stem_options(char * stemmer, int method) { 250 stemmer_num = stemmernumber (stemmer); 251 stem_method = method & STEMMER_MASK; 252 } 253 254 /* -t arg to mg_passes */ 255 void set_trace_point(int tracepos) { 256 trace = (unsigned long) (tracepos * 1024 * 1024); 257 } 258 259 /* -n arg to mg_passes */ 260 void set_trace_file(char * filen) { 261 int len = strlen(filen); 262 if (trace_name) { 263 Xfree (trace_name); 264 trace_name = NULL; 265 } 266 trace_name = Xstrdup (filen); 267 } 268 269 /* ############################################### */ 270 /* The old driver method has been split into 3: 271 init_driver, process_document (called numdocs times), 272 finalise_driver. 273 The above set vars methods should all be called before init_driver. 274 */ 275 276 295 277 ProgTime StartTime, InitTime, ProcTime, DoneTime; 296 278 … … 298 280 init_driver () 299 281 { 282 if (!filename || *filename == '\0') 283 FatalError (1, "A document collection name must be specified."); 284 285 if ((Passes & (IVF_PASS_1 | IVF_PASS_2)) == (IVF_PASS_1 | IVF_PASS_2)) 286 FatalError (1, "I1 and I2 cannot be done simultaneously."); 287 288 if ((Passes & (TEXT_PASS_1 | TEXT_PASS_2)) == (TEXT_PASS_1 | TEXT_PASS_2)) 289 FatalError (1, "T1 and T2 cannot be done simultaneously."); 290 291 if (!Passes) 292 FatalError (1, "S, T1, T2, I1 or I2 must be specified."); 293 294 if (trace) 295 { 296 if (!trace_name) 297 trace_name = make_name (filename, TRACE_SUFFIX, NULL); 298 if (!(Trace = fopen (trace_name, "a"))) 299 Message ("Unable to open \"%s\". No tracing will be done.", trace_name); 300 else 301 setbuf (Trace, NULL); 302 } 303 else 304 Trace = NULL; 305 306 if (comp_stat_point) 307 { 308 char *name = make_name (filename, COMPRESSION_STATS_SUFFIX, NULL); 309 if (!(Comp_Stats = fopen (name, "wb"))) /* [RPAP - Feb 97: WIN32 Port] */ 310 Message ("Unable to open \"%s\". No comp. stats. will be generated.", 311 name); 312 } 313 314 if (Trace) 315 { 316 int i; 317 fprintf (Trace, "\n\n\t\t-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n\n"); 318 /* print out the args to mg_passes */ 319 fprintf (Trace, "\n\n"); 320 } 321 300 322 int pass; 301 323 … … 338 360 bytes_processed += len; 339 361 340 printf("process doc, len=%d\n",len);341 362 #ifndef QUIET 342 363 if (!len) … … 501 522 (double) bytes_processed / (ProcTime.CPUTime - InitTime.CPUTime) / 1024); 502 523 //free (buffer); 503 } 504 505 506 507 int main (int argc, char **argv) 508 { 509 int ch, in_fd; 510 511 msg_prefix = argv[0]; 512 513 opterr = 0; 514 while ((ch = getopt (argc, argv, "hC:WGSD123f:d:b:T:I:t:m:N:c:n:s:a:M:")) != -1) 515 { 516 switch (ch) 517 { 518 case 'G': 519 SkipSGML = 1; 520 break; 521 case 'S': 522 Passes |= SPECIAL; 523 break; 524 case '1': 525 InvfLevel = 1; 526 break; 527 case '2': 528 InvfLevel = 2; 529 break; 530 case '3': 531 InvfLevel = 3; 532 break; 533 case 'f': 534 filename = optarg; 535 break; 536 case 'n': 537 trace_name = optarg; 538 break; 539 case 'D': 540 Dump = 1; 541 break; 542 case 'W': 543 MakeWeights = 1; 544 break; 545 case 'd': 546 set_basepath (optarg); 547 break; 548 case 'a': 549 stemmer_num = stemmernumber (optarg); 550 break; 551 case 's': 552 stem_method = atoi (optarg) & STEMMER_MASK; 553 break; 554 case 'b': 555 buf_size = atoi (optarg) * 1024; 556 break; 557 case 'C': 558 comp_stat_point = atoi (optarg) * 1024; 559 break; 560 case 'c': 561 ChunkLimit = atoi (optarg); 562 break; 563 case 'm': 564 invf_buffer_size = (int) (atof (optarg) * 1024 * 1024); 565 break; 566 case 'I': 567 case 'N': /* N kept for compatability */ 568 if (*optarg == '1') 569 Passes |= IVF_PASS_1; 570 else if (*optarg == '2') 571 Passes |= IVF_PASS_2; 572 else 573 usage ("Invalid pass number"); 574 break; 575 case 'T': 576 if (*optarg == '1') 577 Passes |= TEXT_PASS_1; 578 else if (*optarg == '2') 579 Passes |= TEXT_PASS_2; 580 else 581 usage ("Invalid pass number"); 582 break; 583 case 't': 584 trace = (unsigned long) (atof (optarg) * 1024 * 1024); 585 break; 586 case 'M': 587 SetEnv ("maxnumeric", optarg, NULL); 588 break; 589 case 'h': 590 case '?': 591 usage (NULL); 592 } 593 } 594 595 if (!filename || *filename == '\0') 596 FatalError (1, "A document collection name must be specified."); 597 598 if (buf_size < MIN_BUF) 599 FatalError (1, "The buffer size must exceed 1024 bytes."); 600 601 if ((Passes & (IVF_PASS_1 | IVF_PASS_2)) == (IVF_PASS_1 | IVF_PASS_2)) 602 FatalError (1, "I1 and I2 cannot be done simultaneously."); 603 604 if ((Passes & (TEXT_PASS_1 | TEXT_PASS_2)) == (TEXT_PASS_1 | TEXT_PASS_2)) 605 FatalError (1, "T1 and T2 cannot be done simultaneously."); 606 607 if (!Passes) 608 FatalError (1, "S, T1, T2, I1 or I2 must be specified."); 609 610 if (optind < argc) 611 { 612 if ((in_fd = open (argv[optind], O_RDONLY)) == -1) 613 FatalError (1, "Cannot open %s", argv[optind]); 614 files = &argv[optind + 1]; 615 num_files = argc - (optind + 1); 616 } 617 else 618 in_fd = 0; /* stdin */ 619 620 621 if (trace) 622 { 623 if (!trace_name) 624 trace_name = make_name (filename, TRACE_SUFFIX, NULL); 625 if (!(Trace = fopen (trace_name, "a"))) 626 Message ("Unable to open \"%s\". No tracing will be done.", trace_name); 627 else 628 setbuf (Trace, NULL); 629 } 630 else 631 Trace = NULL; 632 633 if (comp_stat_point) 634 { 635 char *name = make_name (filename, COMPRESSION_STATS_SUFFIX, NULL); 636 if (!(Comp_Stats = fopen (name, "wb"))) /* [RPAP - Feb 97: WIN32 Port] */ 637 Message ("Unable to open \"%s\". No comp. stats. will be generated.", 638 name); 639 } 640 641 642 if (Trace) 643 { 644 int i; 645 fprintf (Trace, "\n\n\t\t-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n\n"); 646 for (i = 0; i < argc; i++) 647 fprintf (Trace, "%s ", argv[i]); 648 fprintf (Trace, "\n\n"); 649 } 650 651 init_driver (); 652 /* here we have to do something to process docs from stdin */ 653 finalise_driver(); 524 654 525 if (Trace) 655 526 fclose (Trace); … … 658 529 fclose (Comp_Stats); 659 530 660 return 0; 661 } 531 } 532
Note:
See TracChangeset
for help on using the changeset viewer.