Changeset 2746
- Timestamp:
- 2001-09-22T00:46:42+12:00 (23 years ago)
- Location:
- trunk/gsdl/packages/mg/src/text
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/packages/mg/src/text/build.h
r439 r2746 26 26 #define H_BUILD 27 27 28 #include "longlong.h" 28 29 29 30 #define TERMPARAGRAPH '\003' … … 146 147 */ 147 148 148 extern double bytes_processed; /* [RJM 07/97: 4G limit] */149 extern mg_ullong bytes_processed; 149 150 /* 150 151 * The number of bytes processed. NOTE: This excludes document separators. 151 152 */ 152 153 153 extern double bytes_received; /* [RJM 07/97: 4G limit] */154 extern mg_ullong bytes_received; 154 155 /* 155 156 * The number of bytes processed. NOTE: This includes document separators. -
trunk/gsdl/packages/mg/src/text/ivf.pass1.c
r439 r2746 41 41 #include "hash.h" 42 42 43 #include "longlong.h" 43 44 44 45 /* 45 46 $Log$ 47 Revision 1.2 2001/09/21 12:46:42 kjm18 48 updated mg to be in line with mg_1.3f. Now uses long long for some variables 49 to enable indexing of very large collections. 50 46 51 Revision 1.1 1999/08/10 21:17:54 sjboddie 47 52 renamed mg-1.3d directory mg … … 85 90 */ 86 91 92 /* 93 * Modified: 94 * - long long bit counts for inverted file 95 * (1999-08-05 Tim Bell <[email protected]>) 96 */ 87 97 static char *RCSID = "$Id$"; 88 98 … … 119 129 static unsigned long words_read = 0, words_diff = 0, bytes_diff = 0; 120 130 static unsigned long outputbytes = 0; 121 static double inputbytes = 0; /* [RJM 07/97: 4G limit] */131 static unsigned long inputbytes = 0; 122 132 static unsigned long MaxMemInUse = 0; 123 133 static unsigned long MemInUse = 0; … … 140 150 static int max_first_occr; 141 151 142 static unsigned long L1_bits = 0, L1_ohead = 0; 143 static unsigned long L2_bits = 0, L2_ohead = 0; 144 static unsigned long L3_bits = 0, L3_ohead = 0; 152 static mg_ullong L1_bits = 0; 153 static mg_ullong L2_bits = 0; 154 static mg_ullong L3_bits = 0; 155 static unsigned long L1_ohead = 0; 156 static unsigned long L2_ohead = 0; 157 static unsigned long L3_ohead = 0; 145 158 static unsigned long callnum = 0, lcallnum = 0, wordnum = 0, lwordnum = 0; 146 159 static unsigned long ptrcnt = 0; … … 554 567 { 555 568 int i; 569 mg_ullong oldL12_bits = 0; 556 570 for (i = 0; i < HashUsed; i++) 557 571 { … … 567 581 (1.6 + log2 (1.0 * words_read / (wrd->wcnt + callnum)))); 568 582 L3_ohead += 0; 583 584 /* check for overflow */ 585 if (L1_bits + L2_bits < oldL12_bits) { 586 fprintf(stderr, "ERROR: Inverted file size will probably overflow %d byte unsigned integer\n", sizeof (mg_ullong)); 587 fprintf(stderr, " counter in pass 2.\n"); 588 if (sizeof (mg_ullong) < 8) { 589 fprintf(stderr, " Try compiling with GCC to enable use of 8 bytes for this counter.\n"); 590 } 591 fprintf(stderr, " Build aborted.\n"); 592 exit(1); 593 } 594 oldL12_bits = L1_bits + L2_bits; 569 595 } 570 596 L3_bits = (L3_bits + L2_bits + L1_bits + 7) / 8; -
trunk/gsdl/packages/mg/src/text/ivf.pass2.c
r439 r2746 24 24 /* 25 25 $Log$ 26 Revision 1.2 2001/09/21 12:46:42 kjm18 27 updated mg to be in line with mg_1.3f. Now uses long long for some variables 28 to enable indexing of very large collections. 29 30 * Revision 1.2 1997/08/02 05:01:57 wew 31 * changed literal values of 32 for the bit size of magic numbers of 32 * files to sizeof (unsigned long) * 8, increased the gap at the start 33 * of the invf during processing to 200 bytes 34 26 35 Revision 1.1 1999/08/10 21:17:54 sjboddie 27 36 renamed mg-1.3d directory mg … … 52 61 * For version 1.1 53 62 * 63 */ 64 65 /* 66 * Modified: 67 * - long long disk pointers and bit counts for inverted file 68 * (1999-08-03 Tim Bell <[email protected]>) 69 * Code provided by Owen de Kretser <[email protected]> 54 70 */ 55 71 … … 77 93 #include "hash.h" 78 94 95 #include "longlong.h" 96 97 #ifdef USE_LONG_LONG 98 #define BIO_Random_Seek_X BIO_Random_Seek_LL 99 #define BIO_Random_Tell_X BIO_Random_Tell_LL 100 #else 101 #define BIO_Random_Seek_X BIO_Random_Seek 102 #define BIO_Random_Tell_X BIO_Random_Tell 103 #endif 104 79 105 /* [RPAP - Feb 97: WIN32 Port] */ 80 106 #ifdef __WIN32__ … … 104 130 typedef struct invf_state_rec 105 131 { 106 unsignedlong Disk_Ptr;107 unsignedlong Disk_Last;132 mg_ullong Disk_Ptr; 133 mg_ullong Disk_Last; 108 134 unsigned long Disk_B; 109 135 } … … 268 294 269 295 #define ISR_CACHE 1024 270 #define ISR_ENTRY_SIZE (sizeof( unsignedlong)*2 + sizeof(unsigned long))296 #define ISR_ENTRY_SIZE (sizeof(mg_ullong)*2 + sizeof(unsigned long)) 271 297 272 298 invf_state_rec * … … 317 343 return 0; 318 344 } 319 BIO_Random_Seek (32, &rbs);345 BIO_Random_Seek_X (sizeof (unsigned long) * 8, &rbs); 320 346 pos = 0; 321 347 } … … 345 371 u_char prev[MAXSTEMLEN + 1]; 346 372 int i; 347 unsigned long totalIbits; 373 mg_ullong totalIbits; 374 mg_ullong lasttotalIbits; 348 375 double logN = 0.0; 349 376 377 350 378 if (open_files (file_name) == COMPERROR) 351 379 return COMPERROR; … … 419 447 totalDbytes += sizeof (chunk) * (ChunkLimit + 2); 420 448 421 totalIbits = 32; /* The magic number */422 totalIbits += 8 * 100; /* A 100 byte gap */449 totalIbits = sizeof (unsigned long) * 8; /* The magic number */ 450 totalIbits += 8 * 200; /* A 200 byte gap */ 423 451 424 452 if (MakeWeights) … … 440 468 unsigned long fcnt, wcnt; 441 469 470 lasttotalIbits = totalIbits; 471 442 472 copy = fgetc (dict); 443 473 suff = fgetc (dict); … … 473 503 totalIbits += BIO_Gamma_Bound (wcnt, fcnt); 474 504 505 #ifdef USE_LONG_LONG 506 totalIbits = (totalIbits + 7ull) & 0xfffffffffffffff8ull; 507 #else 475 508 totalIbits = (totalIbits + 7ul) & 0xfffffff8ul; 476 509 #endif 510 511 if (totalIbits < lasttotalIbits) { 512 fprintf(stderr, "ERROR: The totalIbits counter (%d byte unsigned integer) has overflowed.\n", sizeof (mg_ullong)); 513 if (sizeof (mg_ullong) < 8) { 514 fprintf(stderr, " Try compiling with GCC to enable use of 8 bytes for this counter.\n"); 515 } 516 fprintf(stderr, " Build aborted.\n"); 517 exit(1); 518 } 477 519 } 478 520 479 521 480 522 /* now convert to bytes, and actually get the space */ 523 #ifdef USE_LONG_LONG 524 totalIbytes = (totalIbits + 7ull) >> 3ull; 525 #else 481 526 totalIbytes = (totalIbits + 7ul) >> 3ul; 482 527 #endif 483 528 484 529 return (COMPALLOK); … … 648 693 register int B; 649 694 650 BIO_Random_Seek (isr->Disk_Ptr, &rbs); /* Position in invf file */695 BIO_Random_Seek_X (isr->Disk_Ptr, &rbs); /* Position in invf file */ 651 696 652 697 B = isr->Disk_B; … … 711 756 } 712 757 713 isr->Disk_Ptr = BIO_Random_Tell (&rbs);758 isr->Disk_Ptr = BIO_Random_Tell_X (&rbs); 714 759 715 760 } … … 948 993 { 949 994 long i; 950 unsignedlong totalIbits;995 mg_ullong totalIbits; 951 996 unsigned long invf_len; 952 997 unsigned long bytes_output; … … 981 1026 bytes_output = ftell (invf_out); 982 1027 983 totalIbits = 32; /* The magic number */984 totalIbits += 8 * 100; /* A 100 byte gap */1028 totalIbits = sizeof (unsigned long) * 8; /* The magic number */ 1029 totalIbits += 8 * 200; /* A 200 byte gap */ 985 1030 986 1031 /* find the right place in the file to start reading p values */ … … 1034 1079 if (InvfLevel >= 2) 1035 1080 totalIbits += BIO_Gamma_Bound (wcnt, fcnt); 1081 #ifdef USE_LONG_LONG 1082 totalIbits = (totalIbits + 7ull) & 0xfffffffffffffff8ull; 1083 #else 1036 1084 totalIbits = (totalIbits + 7ul) & 0xfffffff8ul; 1085 #endif 1037 1086 1038 1087 } -
trunk/gsdl/packages/mg/src/text/mg_passes.c
r2538 r2746 32 32 #include "timing.h" 33 33 34 #include "longlong.h" 35 34 36 #include "mg_files.h" 35 37 #include "mg.h" … … 41 43 /* 42 44 $Log$ 45 Revision 1.3 2001/09/21 12:46:42 kjm18 46 updated mg to be in line with mg_1.3f. Now uses long long for some variables 47 to enable indexing of very large collections. 48 43 49 Revision 1.2 2001/06/12 23:23:42 jrm21 44 50 fixed a bug where mg_passes segfaults when trying to print the usage message. … … 95 101 FILE *Comp_Stats = NULL; 96 102 int comp_stat_point = 0; 97 double bytes_processed = 0; /* [RJM 07/97: 4G limit] */ 98 double bytes_received = 0; /* [RJM 07/97: 4G limit] */ 103 mg_ullong bytes_processed = 0; 104 mg_ullong bytes_received = 0; 99 105 int stemmer_num = 0; /* default to the lovin stemmer */ 100 106 int stem_method = 0; … … 349 355 } 350 356 if (Trace) 351 fprintf (Trace, "%1 0.0fbytes |%7lu docs | %s\n",357 fprintf (Trace, "%11" ULL_FS " bytes |%7lu docs | %s\n", 352 358 bytes_processed, num_docs, 353 359 ElapsedTime (&StartTime, NULL)); … … 374 380 mi = mallinfo (); 375 381 block_bytes -= trace; 376 fprintf (Trace, "%1 0.0fbytes |%7lu docs |%7.3f Mb | %s\n",382 fprintf (Trace, "%11" ULL_FS " bytes |%7lu docs |%7.3f Mb | %s\n", 377 383 bytes_processed, num_docs, mi.arena / 1024.0 / 1024.0, 378 384 ElapsedTime (&StartTime, NULL)); 379 385 #else 380 386 block_bytes -= trace; 381 fprintf (Trace, "%1 0.0fbytes |%7lu docs | %s\n",387 fprintf (Trace, "%11" ULL_FS " bytes |%7lu docs | %s\n", 382 388 bytes_processed, num_docs, 383 389 ElapsedTime (&StartTime, NULL)); … … 430 436 struct mallinfo mi; 431 437 mi = mallinfo (); 432 fprintf (Trace, "%1 0.0fbytes |%7lu docs |%7.3f Mb | %s\n",438 fprintf (Trace, "%11" ULL_FS " bytes |%7lu docs |%7.3f Mb | %s\n", 433 439 bytes_processed, num_docs, mi.arena / 1024.0 / 1024.0, 434 440 ElapsedTime (&StartTime, NULL)); 435 441 #else 436 fprintf (Trace, "%1 0.0fbytes |%7lu docs | %s\n",442 fprintf (Trace, "%11" ULL_FS " bytes |%7lu docs | %s\n", 437 443 bytes_processed, num_docs, 438 444 ElapsedTime (&StartTime, NULL)); … … 466 472 Message ("Total time : %s", ElapsedTime (&StartTime, &DoneTime)); 467 473 Message ("Documents : %u", num_docs); 468 Message ("Bytes received : % .0f", bytes_received);469 Message ("Bytes processed : % .0f", bytes_processed);474 Message ("Bytes received : %" ULL_FS, bytes_received); 475 Message ("Bytes processed : %" ULL_FS, bytes_processed); 470 476 Message ("Process Rate : %.1f kB per cpu second", 471 477 (double) bytes_processed / (ProcTime.CPUTime - InitTime.CPUTime) / 1024); -
trunk/gsdl/packages/mg/src/text/mgstat.c
r439 r2746 32 32 #include "invf.h" 33 33 #include "text.h" 34 34 #include "longlong.h" 35 35 36 36 /* 37 37 $Log$ 38 Revision 1.2 2001/09/21 12:46:42 kjm18 39 updated mg to be in line with mg_1.3f. Now uses long long for some variables 40 to enable indexing of very large collections. 41 38 42 Revision 1.1 1999/08/10 21:18:19 sjboddie 39 43 renamed mg-1.3d directory mg … … 65 69 long ProcessStemBlk (char *name); /* [RPAP - Jan 97: Stem Index Change] */ 66 70 67 static double inputbytes = 0.0; /* [RJM 07/97: 4G limit] */68 static unsignedlong total = 0;71 static mg_ullong inputbytes = 0; 72 static mg_ullong total = 0; 69 73 70 74 … … 74 78 int main (int argc, char **argv) 75 79 { 76 unsignedlong sub_total;80 mg_ullong sub_total; 77 81 int fast; 78 82 char *file_name = ""; … … 164 168 printf ("\n"); 165 169 process_file (NULL, "TOTAL", exact); 166 167 170 return 0; 171 168 172 } 169 173 … … 251 255 { 252 256 inputbytes = cth.num_of_bytes; 253 printf ("Input bytes : %10 .0f, %8.2f Mbyte\n",254 cth.num_of_bytes, cth.num_of_bytes / 1024.0 / 1024.0); /* [RJM 07/97: 4G limit] */257 printf ("Input bytes : %10" ULL_FS ", %8.2f Mbyte\n", 258 cth.num_of_bytes, (double) cth.num_of_bytes / 1024 / 1024); 255 259 printf ("Documents : %10lu\n", cth.num_of_docs); 256 260 printf ("Words in collection [dict] : %10lu\n", cth.num_of_words); … … 378 382 { 379 383 if (exact) 380 printf ("%-34s : %10 ldbytes %7.3f%%\n", ext,384 printf ("%-34s : %10" ULL_FS " bytes %7.3f%%\n", ext, 381 385 total, 382 386 100.0 * total / inputbytes); … … 389 393 { 390 394 if (exact) 391 printf ("%-34s : %10 ldbytes\n", ext, total);395 printf ("%-34s : %10" ULL_FS " bytes\n", ext, total); 392 396 else 393 397 printf ("%-34s : %8.2f %s\n", ext,
Note:
See TracChangeset
for help on using the changeset viewer.