Changeset 23508 for main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mgpp/text/ivf.pass1.cpp
- Timestamp:
- 2010-12-17T14:04:10+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/branches/64_bit_Greenstone/greenstone2/common-src/indexers/mgpp/text/ivf.pass1.cpp
r16583 r23508 51 51 // structure to determine level information 52 52 struct LevelWorker { 53 unsignedlong lastLevelDocNum;54 unsignedlong count;53 mg_u_long lastLevelDocNum; 54 mg_u_long count; 55 55 56 56 LevelWorker () { … … 62 62 // note: the word is stored in the map 63 63 struct IvfWordInfo { 64 unsignedlong wordCount; // word frequency65 unsignedlong fragCount; // number of fragments that contain the word66 unsignedlong lastFragNum; // last fragment to contain the word67 unsignedlong chunkWordCount; // word frequency within this chunk68 unsignedlong chunkFragCount; // number of fragments within this chunk that64 mg_u_long wordCount; // word frequency 65 mg_u_long fragCount; // number of fragments that contain the word 66 mg_u_long lastFragNum; // last fragment to contain the word 67 mg_u_long chunkWordCount; // word frequency within this chunk 68 mg_u_long chunkFragCount; // number of fragments within this chunk that 69 69 // contain the word 70 70 … … 81 81 // tags don't require as much information 82 82 struct IvfTagInfo { 83 unsignedlong tagCount; // tag frequency84 unsignedlong fragCount; // number of fragments that contain the tag85 unsignedlong lastFragNum; // last fragment to contain the tag86 unsignedlong chunkFragCount; // number of fragments within this chunk that83 mg_u_long tagCount; // tag frequency 84 mg_u_long fragCount; // number of fragments that contain the tag 85 mg_u_long lastFragNum; // last fragment to contain the tag 86 mg_u_long chunkFragCount; // number of fragments within this chunk that 87 87 // contain the tag 88 88 … … 109 109 IvfTagInfoItArray ivfTagInfoOccurOrder; 110 110 111 static unsignedlong chunksWritten;112 static unsignedlong maxMemNeeded;113 114 static unsignedlong numDocs;115 static unsignedlong numChunkDocs;116 117 static unsignedlong numFrags;118 static unsignedlong numChunkFrags;119 120 static unsignedlong numWords;111 static mg_u_long chunksWritten; 112 static mg_u_long maxMemNeeded; 113 114 static mg_u_long numDocs; 115 static mg_u_long numChunkDocs; 116 117 static mg_u_long numFrags; 118 static mg_u_long numChunkFrags; 119 120 static mg_u_long numWords; 121 121 122 122 // the number of document numbers in the inverted file 123 static unsignedlong numChunkEntries;123 static mg_u_long numChunkEntries; 124 124 125 125 // next entry in the inverted file to check memory 126 126 // requirements for the current chunk 127 static unsignedlong entryCheckPoint;127 static mg_u_long entryCheckPoint; 128 128 129 129 // information about all the different levels … … 173 173 MAGIC_CHUNK, MG_MESSAGE))) 174 174 return COMPERROR; 175 fwrite (" ", sizeof ( u_long), 1, ic); // Space for the maxmem175 fwrite (" ", sizeof (mg_u_long), 1, ic); // Space for the maxmem 176 176 icb.attachFile (ic); 177 177 icb.encodeStart(); … … 195 195 196 196 numChunkEntries = 0; 197 entryCheckPoint = ( unsignedlong) ((invf_buffer_size * INIT_CHECK_FRAC) / CHECK_DIV);197 entryCheckPoint = (mg_u_long) ((invf_buffer_size * INIT_CHECK_FRAC) / CHECK_DIV); 198 198 199 199 // init the level information … … 347 347 348 348 349 static unsignedlong MemoryRequired (bool wordLevelIndex) {350 register unsignedlong total = 0;349 static mg_u_long MemoryRequired (bool wordLevelIndex) { 350 register mg_u_long total = 0; 351 351 352 352 // add memory required for word entries … … 372 372 if (info.chunkFragCount > 0) { 373 373 // two d entries for each frag entry 374 unsignedlong pTag = info.chunkFragCount*2;374 mg_u_long pTag = info.chunkFragCount*2; 375 375 total += BIO_Bblock_Bound (numChunkFrags+pTag, pTag); 376 376 } … … 385 385 386 386 /* 387 static void PrintChunkInfo ( unsignedlong mem) {387 static void PrintChunkInfo (mg_u_long mem) { 388 388 cout << "Chunk Number: " << chunksWritten << "\n"; 389 389 cout << "numChunkDocs " << numChunkDocs << "\n"; … … 396 396 IvfTagInfoMap::iterator tagMapHere = ivfTagInfo.begin(); 397 397 IvfTagInfoMap::iterator tagMapEnd = ivfTagInfo.end(); 398 unsignedlong tagNum = 0;398 mg_u_long tagNum = 0; 399 399 while (tagMapHere != tagMapEnd) { 400 400 cout << (*tagMapHere).first << " " << tagNum … … 406 406 */ 407 407 408 static void OutputChunkInfo ( unsignedlong mem, bool /*wordLevelIndex*/) {408 static void OutputChunkInfo (mg_u_long mem, bool /*wordLevelIndex*/) { 409 409 ++chunksWritten; 410 410 … … 493 493 // check the amount of memory needed for this chunk 494 494 if (numChunkEntries >= entryCheckPoint) { 495 unsignedlong mem = MemoryRequired (wordLevelIndex);495 mg_u_long mem = MemoryRequired (wordLevelIndex); 496 496 if (mem >= invf_buffer_size * CHECK_CLOSE) { 497 497 if (mem > maxMemNeeded) maxMemNeeded = mem; 498 498 OutputChunkInfo (mem, wordLevelIndex); 499 entryCheckPoint = ( unsignedlong)499 entryCheckPoint = (mg_u_long) 500 500 ((invf_buffer_size * INIT_CHECK_FRAC) / CHECK_DIV); 501 501 502 502 } else { 503 entryCheckPoint = ( unsignedlong)503 entryCheckPoint = (mg_u_long) 504 504 (entryCheckPoint * ((CHECK_FRAC * (invf_buffer_size - mem)) / mem) + 505 505 entryCheckPoint); … … 513 513 514 514 515 static void CalcInvfDictSize ( unsignedlong &totalBytes,516 unsignedlong &indexStringBytes) {515 static void CalcInvfDictSize (mg_u_long &totalBytes, 516 mg_u_long &indexStringBytes) { 517 517 totalBytes = 0; // The sum of the length of all words, including 518 518 // the length byte … … 526 526 IvfWordInfoMap::iterator wordEnd = ivfWordInfo.end(); 527 527 while (wordHere != wordEnd) { 528 unsignedlong wordSize = (*wordHere).first.size();528 mg_u_long wordSize = (*wordHere).first.size(); 529 529 totalBytes += wordSize + 1; 530 530 indexStringBytes += wordSize + 2; … … 541 541 IvfTagInfoMap::iterator tagEnd = ivfTagInfo.end(); 542 542 while (tagHere != tagEnd) { 543 unsignedlong tagSize = (*tagHere).first.size();543 mg_u_long tagSize = (*tagHere).first.size(); 544 544 totalBytes += tagSize + 1; 545 545 indexStringBytes += tagSize + 2; … … 642 642 643 643 // write out the updated header 644 fseek (sp, sizeof ( u_long), SEEK_SET);644 fseek (sp, sizeof (mg_u_long), SEEK_SET); 645 645 if (!idh.Write (sp)) { fclose (sp); return; } 646 646 … … 691 691 692 692 // write out the word translation table 693 unsignedlong wordDictSize = ivfWordInfoOccurOrder.size();693 mg_u_long wordDictSize = ivfWordInfoOccurOrder.size(); 694 694 IvfWordInfoItArray::iterator wordItHere = ivfWordInfoOccurOrder.begin(); 695 695 IvfWordInfoItArray::iterator wordItEnd = ivfWordInfoOccurOrder.end(); 696 unsignedlong oN = 0;696 mg_u_long oN = 0; 697 697 while (wordItHere != wordItEnd) { 698 698 register IvfWordInfo &ivfWordInfo = (*(*wordItHere)).second; … … 703 703 704 704 // write out the tag translation table 705 unsignedlong tagDictSize = ivfTagInfoOccurOrder.size();705 mg_u_long tagDictSize = ivfTagInfoOccurOrder.size(); 706 706 IvfTagInfoItArray::iterator tagItHere = ivfTagInfoOccurOrder.begin(); 707 707 IvfTagInfoItArray::iterator tagItEnd = ivfTagInfoOccurOrder.end(); … … 737 737 738 738 char *temp_str = msg_prefix; 739 msg_prefix = "ivf.pass1";739 msg_prefix = (char*)"ivf.pass1"; 740 740 741 741 // output the last chunk 742 742 if (numChunkDocs > 0) { 743 unsignedlong mem = MemoryRequired (wordLevelIndex);743 mg_u_long mem = MemoryRequired (wordLevelIndex); 744 744 OutputChunkInfo (mem, wordLevelIndex); 745 745 if (mem > maxMemNeeded) maxMemNeeded = mem; … … 751 751 752 752 // write out the maximum memory required and close the file 753 fseek (ic, sizeof ( long), 0);753 fseek (ic, sizeof (mg_u_long), 0); 754 754 WriteUL (ic, maxMemNeeded); 755 755 fclose (ic);
Note:
See TracChangeset
for help on using the changeset viewer.