Changeset 8692 for trunk/indexers
- Timestamp:
- 2004-11-29T15:43:11+13:00 (19 years ago)
- Location:
- trunk/indexers/mgpp
- Files:
-
- 24 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/indexers/mgpp/lib/bitio_m_random.cpp
r3365 r8692 16 16 17 17 random_bitio_buffer::~random_bitio_buffer() { 18 if (buffer != NULL) delete buffer;18 if (buffer != NULL) delete []buffer; 19 19 } 20 20 … … 22 22 // delete the old buffer 23 23 if (buffer != NULL) { 24 delete buffer;24 delete []buffer; 25 25 buffer = NULL; 26 26 } … … 54 54 // delete the old buffer 55 55 if (buffer != NULL) { 56 delete buffer;56 delete []buffer; 57 57 buffer = NULL; 58 58 } … … 131 131 flush(); 132 132 if (buffer != NULL) { 133 delete buffer;133 delete []buffer; 134 134 buffer = NULL; 135 135 } -
trunk/indexers/mgpp/lib/huffman.cpp
r3365 r8692 56 56 57 57 /* Initialise the pointers to the leaves */ 58 for (count = i = 0; i < num; i++)58 for (count = i = 0; i < num; ++i) 59 59 if (heap[num + i]) 60 60 heap[count++] = num + i; … … 62 62 /* Reorganise the pointers so that it is a heap */ 63 63 HNum = count; 64 for (i = HNum / 2; i > 0; i--)64 for (i = HNum / 2; i > 0; --i) 65 65 { 66 66 register int curr, child; … … 70 70 { 71 71 if (child < HNum && heap[heap[child]] < heap[heap[child - 1]]) 72 child++;72 ++child; 73 73 if (heap[heap[curr - 1]] > heap[heap[child - 1]]) 74 74 { … … 91 91 int pos[2]; 92 92 93 for (i = 0; i < 2; i++)93 for (i = 0; i < 2; ++i) 94 94 { 95 95 register int curr, child; … … 102 102 if (child < HNum && 103 103 heap[heap[child]] < heap[heap[child - 1]]) 104 child++;104 ++child; 105 105 if (heap[heap[curr - 1]] > heap[heap[child - 1]]) 106 106 { … … 124 124 { 125 125 register int parent, curr; 126 HNum++;126 ++HNum; 127 127 curr = HNum; 128 128 parent = curr >> 1; … … 143 143 heap[0] = -1UL; 144 144 heap[1] = 0; 145 for (i = 2; i < num * 2; i++)145 for (i = 2; i < num * 2; ++i) 146 146 heap[i] = heap[heap[i]] + 1; 147 147 … … 152 152 153 153 /* Set the code length of each leaf in the huffman tree */ 154 for (i = 0; i < num; i++)154 for (i = 0; i < num; ++i) 155 155 { 156 156 register u_long codelen = heap[i + num]; … … 162 162 if (codelen < hd->mincodelen) 163 163 hd->mincodelen = codelen; 164 hd->lencount[codelen]++;164 ++hd->lencount[codelen]; 165 165 } 166 166 … … 174 174 /* Calculate the current codes for each different code length */ 175 175 hd->min_code[hd->maxcodelen] = 0; 176 for (i = hd->maxcodelen - 1; i>=0; i--)176 for (i = hd->maxcodelen - 1; i>=0; --i) 177 177 hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1; 178 178 } 179 delete heap;179 delete []heap; 180 180 return (hd); 181 181 182 182 error2: 183 delete heap;183 delete []heap; 184 184 error1: 185 185 if (!data) … … 204 204 *mem += data->num_codes * sizeof (*codes); 205 205 memcpy (mc, data->min_code, sizeof (mc)); 206 for (i = 0; i < data->num_codes; i++)206 for (i = 0; i < data->num_codes; ++i) 207 207 if (data->clens[i]) 208 208 codes[i] = mc[(int) (data->clens[i])]++; … … 226 226 if (!(values = new unsigned long *[MAX_HUFFCODE_LEN + 1])) 227 227 { 228 delete vals;228 delete []vals; 229 229 return (NULL); 230 230 } … … 237 237 238 238 fcode[0] = values[0] = &vals[0]; 239 for (i = 1; i <= data->maxcodelen; i++)239 for (i = 1; i <= data->maxcodelen; ++i) 240 240 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]]; 241 241 242 for (i = 0; i < data->num_codes; i++)242 for (i = 0; i < data->num_codes; ++i) 243 243 if (data->clens[i]) 244 244 *fcode[(int) (data->clens[i])]++ = i; … … 256 256 if (!Generate_Huffman_Data (num, freqs, &hd, NULL)) 257 257 return -1; 258 for (i = 0; i < num; i++)258 for (i = 0; i < num; ++i) 259 259 size += counts[i] * hd.clens[i]; 260 delete hd.clens;260 delete []hd.clens; 261 261 return size; 262 262 } … … 290 290 /* [RPAP - Jan 97: Endian Ordering] */ 291 291 int i; 292 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)292 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i) 293 293 HTONSI(hd->lencount[i]); 294 for (i = 0; i < hd->maxcodelen + 1; i++)294 for (i = 0; i < hd->maxcodelen + 1; ++i) 295 295 HTONUL(hd->min_code[i]); 296 296 … … 308 308 309 309 /* [RPAP - Jan 97: Endian Ordering] */ 310 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)310 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i) 311 311 NTOHSI(hd->lencount[i]); 312 for (i = 0; i < hd->maxcodelen + 1; i++)312 for (i = 0; i < hd->maxcodelen + 1; ++i) 313 313 NTOHUL(hd->min_code[i]); 314 314 } … … 350 350 351 351 /* [RPAP - Jan 97: Endian Ordering] */ 352 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)352 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i) 353 353 NTOHSI(hd->lencount[i]); 354 354 … … 363 363 364 364 /* [RPAP - Jan 97: Endian Ordering] */ 365 for (i = 0; i < hd->maxcodelen + 1; i++)365 for (i = 0; i < hd->maxcodelen + 1; ++i) 366 366 NTOHUL(hd->min_code[i]); 367 367 -
trunk/indexers/mgpp/text/GSDLQueryLex.cpp
r8242 r8692 27 27 UCArray &text, 28 28 int len) { 29 if (text.capacity() < text.size() + len + 1) { 30 text.reserve(text.size() + len + 1); 31 } 29 32 while (len > 0) { 30 33 text.push_back (*here++); 31 len--;34 --len; 32 35 } 33 36 } … … 238 241 //return false; 239 242 240 UCArray AND; SetCStr (AND, "AND"); 241 if (el.text == AND) { 243 //UCArray AND; SetCStr (AND, "AND"); 244 //if (el.text == AND) { 245 if (UCArrayCStrEquals(el.text, "AND")) { 242 246 el.lexType = AndOpE; 243 247 return true; 244 248 } 245 UCArray OR; SetCStr (OR, "OR"); 246 if (el.text == OR) { 249 //UCArray OR; SetCStr (OR, "OR"); 250 //if (el.text == OR) { 251 if (UCArrayCStrEquals(el.text, "OR")) { 247 252 el.lexType = OrOpE; 248 253 return true; 249 254 } 250 UCArray NOT; SetCStr (NOT, "NOT"); 251 if (el.text == NOT) { 255 //UCArray NOT; SetCStr (NOT, "NOT"); 256 //if (el.text == NOT) { 257 if (UCArrayCStrEquals(el.text, "NOT")) { 252 258 el.lexType = NotOpE; 253 259 return true; 254 260 } 255 UCArray NEAR; SetCStr (NEAR, "NEAR" );261 UCArray NEAR; SetCStr (NEAR, "NEAR", 4); 256 262 if (PrefixLen(el.text, NEAR)==4) { 257 263 el.lexType = NearOpE; 258 264 return true; 259 265 } 260 UCArray WITHIN; SetCStr (WITHIN, "WITHIN" );266 UCArray WITHIN; SetCStr (WITHIN, "WITHIN", 6); 261 267 if (PrefixLen(el.text, WITHIN)==6) { 262 268 el.lexType = WithinOpE; -
trunk/indexers/mgpp/text/GSDLQueryParser.cpp
r8242 r8692 94 94 UCArray &nearby, 95 95 bool reverse) { 96 UCArray NEARBY; SetCStr(NEARBY, "NEAR" );97 UCArray WITHIN; SetCStr(WITHIN, "WITHIN" );96 UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4); 97 UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6); 98 98 99 99 if (nearby == NEARBY) { // no modifier … … 123 123 while (here != end) { 124 124 size = size*10 + (*here-'0'); 125 here++;125 ++here; 126 126 } 127 127 if (within) { … … 263 263 UCArray near_string; 264 264 while (ParseLexEl (here, end, el)) { 265 if (el.lexType == TermE || el.lexType == IntegerE) { 265 // cant have AND, OR, NOT in square brackets, so assume they are words 266 if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) { 266 267 TermNode termNode; 267 268 termNode.term = el.text; -
trunk/indexers/mgpp/text/IndexData.cpp
r3365 r8692 186 186 IvfLevelInfoMap::const_iterator levelHere, levelEnd; 187 187 for (levelHere=levels.levelInfo.begin(), levelEnd=levels.levelInfo.end(); 188 levelHere!=levelEnd && (*levelHere).first != level; levelHere++)189 curLevelNum++;188 levelHere!=levelEnd && (*levelHere).first != level; ++levelHere) 189 ++curLevelNum; 190 190 191 191 // make sure we found the level -
trunk/indexers/mgpp/text/MGQuery.cpp
r3365 r8692 108 108 rightI < rightResult.docs.size()) { 109 109 if (result.docs[leftI] < rightResult.docs[rightI]) { 110 leftI++;110 ++leftI; 111 111 } else if (result.docs[leftI] > rightResult.docs[rightI]) { 112 rightI++;112 ++rightI; 113 113 } else { 114 114 // the documents are equal … … 116 116 if (haveAccum) 117 117 result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI]; 118 leftI++;119 rightI++;120 outI++;118 ++leftI; 119 ++rightI; 120 ++outI; 121 121 } 122 122 } … … 209 209 if (haveAccum) 210 210 result.ranks.push_back (leftResult.ranks[leftI]); 211 leftI++;211 ++leftI; 212 212 213 213 } else if (leftDocNum > rightDocNum) { … … 215 215 if (haveAccum) 216 216 result.ranks.push_back (rightResult.ranks[rightI]); 217 rightI++;217 ++rightI; 218 218 219 219 } else { // equal … … 222 222 result.ranks.push_back (leftResult.ranks[leftI] + 223 223 rightResult.ranks[rightI]); 224 leftI++;225 rightI++;224 ++leftI; 225 ++rightI; 226 226 } 227 227 } … … 307 307 if (haveAccum) 308 308 result.ranks[outI] = result.ranks[queryI]; 309 queryI++;310 outI++;309 ++queryI; 310 ++outI; 311 311 } else if (result.docs[queryI] > notResult.docs[notI]) { 312 notI++;312 ++notI; 313 313 } else { 314 314 // the documents are equal, ignore both 315 queryI++;316 notI++;315 ++queryI; 316 ++notI; 317 317 } 318 318 } … … 378 378 unsigned long i; 379 379 FragRange thisFrag; 380 for (i=0; i<tagEl.frag_occur; i++) {380 for (i=0; i<tagEl.frag_occur; ++i) { 381 381 // get start 382 382 unsigned long delta = buffer.bblock_decode (B, NULL)-1; … … 446 446 CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData); 447 447 448 here++;448 ++here; 449 449 } 450 450 } … … 521 521 result); 522 522 523 termHere++;523 ++termHere; 524 524 525 525 if (termHere == termEnd) return; // nothing more to do … … 546 546 (*termHere).endRange, 547 547 fragLimitsPtr); 548 termHere++;548 ++termHere; 549 549 } 550 550 … … 572 572 while (here != end) { 573 573 (*here).Print (s, indent+2); 574 here++;574 ++here; 575 575 } 576 576 } … … 633 633 unsigned long i; 634 634 if (queryInfo.sortByRank || queryInfo.needRankInfo) { 635 for (i=0; i<result.ranks.size(); i++) {635 for (i=0; i<result.ranks.size(); ++i) { 636 636 result.ranks[i] /= 637 637 indexData.weightData.GetLowerApproxDocWeight (result.docs[i]); … … 668 668 indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr; 669 669 670 for (i=0; i<resultsSize; i++) {670 for (i=0; i<resultsSize; ++i) { 671 671 result.ranks[i] = result.ranks[i] * 672 672 indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) / … … 739 739 unsigned long DocNum = 0; 740 740 741 for (unsigned long i=0; i<realresult.docs.size(); i++) {741 for (unsigned long i=0; i<realresult.docs.size(); ++i) { 742 742 743 743 // do an if ! here???? -
trunk/indexers/mgpp/text/QueryLex.cpp
r3365 r8692 27 27 UCArray &text, 28 28 int len) { 29 if (text.capacity() < text.size() + len + 1) { 30 text.reserve(text.size + len + 1); 31 } 29 32 while (len > 0) { 30 33 text.push_back (*here++); 31 len--;34 --len; 32 35 } 33 36 } … … 148 151 if (!ParseTerm (here, end, el.text)) return false; 149 152 150 UCArray AND; SetCStr (AND, "AND"); 151 if (el.text == AND) { 153 //UCArray AND; SetCStr (AND, "AND"); 154 //if (el.text == AND) { 155 if (UCArrayCStrEquals(el.text, "AND")) { 152 156 el.lexType = AndOpE; 153 157 return true; 154 158 } 155 UCArray OR; SetCStr (OR, "OR"); 156 if (el.text == OR) { 159 //UCArray OR; SetCStr (OR, "OR"); 160 //if (el.text == OR) { 161 if (UCArrayCStrEquals(el.text, "OR")) { 157 162 el.lexType = OrOpE; 158 163 return true; 159 164 } 160 UCArray NOT; SetCStr (NOT, "NOT"); 161 if (el.text == NOT) { 165 //UCArray NOT; SetCStr (NOT, "NOT"); 166 //if (el.text == NOT) { 167 if (UCArrayCStrEquals(el.text, "NOT")) { 162 168 el.lexType = NotOpE; 163 169 return true; -
trunk/indexers/mgpp/text/QueryTester.cpp
r3365 r8692 49 49 DocNumArray &docSet1 = setNode1->queryResult.docs; 50 50 RankArray &rankSet1 = setNode1->queryResult.ranks; 51 docSet1.push_back (1); rankSet1.push_back (0.1 );52 docSet1.push_back (10); rankSet1.push_back (0.2 );53 docSet1.push_back (15); rankSet1.push_back (0.2 );54 docSet1.push_back (18); rankSet1.push_back (0.4 );55 docSet1.push_back (19); rankSet1.push_back (0.5 );51 docSet1.push_back (1); rankSet1.push_back (0.1f); 52 docSet1.push_back (10); rankSet1.push_back (0.2f); 53 docSet1.push_back (15); rankSet1.push_back (0.2f); 54 docSet1.push_back (18); rankSet1.push_back (0.4f); 55 docSet1.push_back (19); rankSet1.push_back (0.5f); 56 56 57 57 SetQueryNode *setNode2 = new SetQueryNode; 58 58 DocNumArray &docSet2 = setNode2->queryResult.docs; 59 59 RankArray &rankSet2 = setNode2->queryResult.ranks; 60 docSet2.push_back (2); rankSet2.push_back (0.1 );61 docSet2.push_back (11); rankSet2.push_back (0.2 );62 docSet2.push_back (12); rankSet2.push_back (0.3 );63 docSet2.push_back (13); rankSet2.push_back (0.4 );64 docSet2.push_back (14); rankSet2.push_back (0.5 );65 docSet2.push_back (15); rankSet2.push_back (0.6 );66 docSet2.push_back (16); rankSet2.push_back (0.7 );67 docSet2.push_back (17); rankSet2.push_back (0.8 );68 docSet2.push_back (19); rankSet2.push_back (0.9 );69 docSet2.push_back (20); rankSet2.push_back (0.1 );70 docSet2.push_back (21); rankSet2.push_back (0.2 );60 docSet2.push_back (2); rankSet2.push_back (0.1f); 61 docSet2.push_back (11); rankSet2.push_back (0.2f); 62 docSet2.push_back (12); rankSet2.push_back (0.3f); 63 docSet2.push_back (13); rankSet2.push_back (0.4f); 64 docSet2.push_back (14); rankSet2.push_back (0.5f); 65 docSet2.push_back (15); rankSet2.push_back (0.6f); 66 docSet2.push_back (16); rankSet2.push_back (0.7f); 67 docSet2.push_back (17); rankSet2.push_back (0.8f); 68 docSet2.push_back (19); rankSet2.push_back (0.9f); 69 docSet2.push_back (20); rankSet2.push_back (0.1f); 70 docSet2.push_back (21); rankSet2.push_back (0.2f); 71 71 72 72 cout << "\n" << setNode1->queryResult << "AND\n\n" … … 170 170 DocNumArray &docSet1 = setNode1->queryResult.docs; 171 171 RankArray &rankSet1 = setNode1->queryResult.ranks; 172 docSet1.push_back (1); rankSet1.push_back (0.1 );173 docSet1.push_back (10); rankSet1.push_back (0.2 );174 docSet1.push_back (15); rankSet1.push_back (0.2 );175 docSet1.push_back (18); rankSet1.push_back (0.4 );176 docSet1.push_back (19); rankSet1.push_back (0.5 );172 docSet1.push_back (1); rankSet1.push_back (0.1f); 173 docSet1.push_back (10); rankSet1.push_back (0.2f); 174 docSet1.push_back (15); rankSet1.push_back (0.2f); 175 docSet1.push_back (18); rankSet1.push_back (0.4f); 176 docSet1.push_back (19); rankSet1.push_back (0.5f); 177 177 178 178 SetQueryNode *setNode2 = new SetQueryNode; 179 179 DocNumArray &docSet2 = setNode2->queryResult.docs; 180 180 RankArray &rankSet2 = setNode2->queryResult.ranks; 181 docSet2.push_back (2); rankSet2.push_back (0.1 );182 docSet2.push_back (11); rankSet2.push_back (0.2 );183 docSet2.push_back (12); rankSet2.push_back (0.3 );184 docSet2.push_back (13); rankSet2.push_back (0.4 );185 docSet2.push_back (14); rankSet2.push_back (0.5 );186 docSet2.push_back (15); rankSet2.push_back (0.6 );187 docSet2.push_back (16); rankSet2.push_back (0.7 );188 docSet2.push_back (17); rankSet2.push_back (0.8 );189 docSet2.push_back (19); rankSet2.push_back (0.9 );190 docSet2.push_back (20); rankSet2.push_back (0.1 );191 docSet2.push_back (21); rankSet2.push_back (0.2 );181 docSet2.push_back (2); rankSet2.push_back (0.1f); 182 docSet2.push_back (11); rankSet2.push_back (0.2f); 183 docSet2.push_back (12); rankSet2.push_back (0.3f); 184 docSet2.push_back (13); rankSet2.push_back (0.4f); 185 docSet2.push_back (14); rankSet2.push_back (0.5f); 186 docSet2.push_back (15); rankSet2.push_back (0.6f); 187 docSet2.push_back (16); rankSet2.push_back (0.7f); 188 docSet2.push_back (17); rankSet2.push_back (0.8f); 189 docSet2.push_back (19); rankSet2.push_back (0.9f); 190 docSet2.push_back (20); rankSet2.push_back (0.1f); 191 docSet2.push_back (21); rankSet2.push_back (0.2f); 192 192 193 193 cout << "\n" << setNode1->queryResult << "OR\n\n" … … 214 214 DocNumArray &rcDocSet = resultCompare.docs; 215 215 RankArray &rcRankSet = resultCompare.ranks; 216 rcDocSet.push_back (1); rcRankSet.push_back (0.1 );217 rcDocSet.push_back (2); rcRankSet.push_back (0.1 );218 rcDocSet.push_back (10); rcRankSet.push_back (0.2 );219 rcDocSet.push_back (11); rcRankSet.push_back (0.2 );220 rcDocSet.push_back (12); rcRankSet.push_back (0.3 );221 rcDocSet.push_back (13); rcRankSet.push_back (0.4 );222 rcDocSet.push_back (14); rcRankSet.push_back (0.5 );223 rcDocSet.push_back (15); rcRankSet.push_back (0.2 +0.6);224 rcDocSet.push_back (16); rcRankSet.push_back (0.7 );225 rcDocSet.push_back (17); rcRankSet.push_back (0.8 );226 rcDocSet.push_back (18); rcRankSet.push_back (0.4 );227 rcDocSet.push_back (19); rcRankSet.push_back (0.9 +0.5);228 rcDocSet.push_back (20); rcRankSet.push_back (0.1 );229 rcDocSet.push_back (21); rcRankSet.push_back (0.2 );216 rcDocSet.push_back (1); rcRankSet.push_back (0.1f); 217 rcDocSet.push_back (2); rcRankSet.push_back (0.1f); 218 rcDocSet.push_back (10); rcRankSet.push_back (0.2f); 219 rcDocSet.push_back (11); rcRankSet.push_back (0.2f); 220 rcDocSet.push_back (12); rcRankSet.push_back (0.3f); 221 rcDocSet.push_back (13); rcRankSet.push_back (0.4f); 222 rcDocSet.push_back (14); rcRankSet.push_back (0.5f); 223 rcDocSet.push_back (15); rcRankSet.push_back (0.2f+0.6f); 224 rcDocSet.push_back (16); rcRankSet.push_back (0.7f); 225 rcDocSet.push_back (17); rcRankSet.push_back (0.8f); 226 rcDocSet.push_back (18); rcRankSet.push_back (0.4f); 227 rcDocSet.push_back (19); rcRankSet.push_back (0.9f+0.5f); 228 rcDocSet.push_back (20); rcRankSet.push_back (0.1f); 229 rcDocSet.push_back (21); rcRankSet.push_back (0.2f); 230 230 231 231 … … 317 317 DocNumArray &docSet1 = setNode1->queryResult.docs; 318 318 RankArray &rankSet1 = setNode1->queryResult.ranks; 319 docSet1.push_back (1); rankSet1.push_back (0.1 );320 docSet1.push_back (10); rankSet1.push_back (0.2 );321 docSet1.push_back (15); rankSet1.push_back (0.2 );322 docSet1.push_back (18); rankSet1.push_back (0.4 );323 docSet1.push_back (19); rankSet1.push_back (0.5 );319 docSet1.push_back (1); rankSet1.push_back (0.1f); 320 docSet1.push_back (10); rankSet1.push_back (0.2f); 321 docSet1.push_back (15); rankSet1.push_back (0.2f); 322 docSet1.push_back (18); rankSet1.push_back (0.4f); 323 docSet1.push_back (19); rankSet1.push_back (0.5f); 324 324 325 325 SetQueryNode *setNode2 = new SetQueryNode; 326 326 DocNumArray &docSet2 = setNode2->queryResult.docs; 327 327 RankArray &rankSet2 = setNode2->queryResult.ranks; 328 docSet2.push_back (2); rankSet2.push_back (0.1 );329 docSet2.push_back (11); rankSet2.push_back (0.2 );330 docSet2.push_back (12); rankSet2.push_back (0.3 );331 docSet2.push_back (13); rankSet2.push_back (0.4 );332 docSet2.push_back (14); rankSet2.push_back (0.5 );333 docSet2.push_back (15); rankSet2.push_back (0.6 );334 docSet2.push_back (16); rankSet2.push_back (0.7 );335 docSet2.push_back (17); rankSet2.push_back (0.8 );336 docSet2.push_back (19); rankSet2.push_back (0.9 );337 docSet2.push_back (20); rankSet2.push_back (0.1 );338 docSet2.push_back (21); rankSet2.push_back (0.2 );328 docSet2.push_back (2); rankSet2.push_back (0.1f); 329 docSet2.push_back (11); rankSet2.push_back (0.2f); 330 docSet2.push_back (12); rankSet2.push_back (0.3f); 331 docSet2.push_back (13); rankSet2.push_back (0.4f); 332 docSet2.push_back (14); rankSet2.push_back (0.5f); 333 docSet2.push_back (15); rankSet2.push_back (0.6f); 334 docSet2.push_back (16); rankSet2.push_back (0.7f); 335 docSet2.push_back (17); rankSet2.push_back (0.8f); 336 docSet2.push_back (19); rankSet2.push_back (0.9f); 337 docSet2.push_back (20); rankSet2.push_back (0.1f); 338 docSet2.push_back (21); rankSet2.push_back (0.2f); 339 339 340 340 cout << "\n" << setNode1->queryResult << "NOT\n\n" -
trunk/indexers/mgpp/text/Queryer.cpp
r6116 r8692 111 111 // do querying 112 112 QueryInfo queryInfo; 113 SetCStr (queryInfo.docLevel, "Document" );113 SetCStr (queryInfo.docLevel, "Document", 8); 114 114 queryInfo.maxDocs = 50; 115 115 queryInfo.sortByRank = true; … … 125 125 126 126 UCArray docLevel; 127 SetCStr(docLevel, "Document" );127 SetCStr(docLevel, "Document", 8); 128 128 129 129 UCArray level; … … 143 143 cout << "> "; 144 144 cin.getline(query, 2048, '\n'); 145 SetCStr (queryArray, query );145 SetCStr (queryArray, query, strlen(query)); 146 146 147 147 // check for commands … … 155 155 cin >> query; 156 156 UCArrayClear(queryInfo.docLevel); 157 SetCStr(queryInfo.docLevel, query );157 SetCStr(queryInfo.docLevel, query, strlen(query)); 158 158 cout << "index set to " << queryInfo.docLevel <<"\n"; 159 159 cin.getline(query, 2048, '\n'); … … 162 162 cin >> query; 163 163 UCArrayClear(level); 164 SetCStr(level, query );164 SetCStr(level, query, strlen(query)); 165 165 cout << "level set to " << level <<"\n"; 166 166 cin.getline(query, 2048, '\n'); … … 221 221 cin>>query; 222 222 UCArrayClear(browseNode.term); 223 SetCStr(browseNode.term, query );223 SetCStr(browseNode.term, query, strlen(query)); 224 224 cin.getline(query, 2048, '\n'); // get rest of line 225 225 -
trunk/indexers/mgpp/text/TagInfo.cpp
r3365 r8692 24 24 25 25 void TagInfo::SetDocTag (const char *cStr) { 26 SetCStr (docTag, cStr );26 SetCStr (docTag, cStr, strlen(cStr)); 27 27 } 28 28 29 29 void TagInfo::SetIndexLevel (const char *cStr) { 30 SetCStr (indexLevel, cStr );30 SetCStr (indexLevel, cStr, strlen(cStr)); 31 31 } 32 32 void TagInfo::AddLevelTag (const char *cStr) { 33 33 // convert the string 34 34 UCArray cArr; 35 SetCStr (cArr, cStr );35 SetCStr (cArr, cStr, strlen(cStr)); 36 36 37 37 // insert the tag … … 64 64 while (here != end) { 65 65 s << "\"" << (*here) << "\""; 66 here++;66 ++here; 67 67 if (here != end) s << ", "; 68 68 } -
trunk/indexers/mgpp/text/Terms.cpp
r8242 r8692 51 51 52 52 unsigned long i; 53 for (i=0; i<t.equivTerms.size(); i++) {53 for (i=0; i<t.equivTerms.size(); ++i) { 54 54 s << t.equivTerms[i] << ", "; 55 55 } … … 83 83 84 84 s << "termFreqs: "; 85 for (unsigned long i=0; i<termFreqs.size(); i++)85 for (unsigned long i=0; i<termFreqs.size(); ++i) 86 86 s << termFreqs[i] << ", "; 87 87 … … 95 95 s << "docs: "; 96 96 unsigned long i; 97 for (i=0; i<r.docs.size(); i++)97 for (i=0; i<r.docs.size(); ++i) 98 98 s << r.docs[i] << ", "; 99 99 100 100 s << "\nranks: "; 101 for (i=0; i<r.ranks.size(); i++)101 for (i=0; i<r.ranks.size(); ++i) 102 102 s << r.ranks[i] << ", "; 103 103 104 104 s << "\ntermFreqs: "; 105 for (i=0; i<r.termFreqs.size(); i++)105 for (i=0; i<r.termFreqs.size(); ++i) 106 106 s << r.termFreqs[i] << ", "; 107 107 … … 137 137 s << "docs: "; 138 138 unsigned long i; 139 for (i=0; i<r.docs.size(); i++)139 for (i=0; i<r.docs.size(); ++i) 140 140 s << r.docs[i] << ", "; 141 141 142 142 s << "\nlevels: "; 143 for (i=0; i<r.levels.size(); i++)143 for (i=0; i<r.levels.size(); ++i) 144 144 s << r.levels[i] << ", "; 145 145 146 146 147 147 s << "\nranks: "; 148 for (i=0; i<r.ranks.size(); i++)148 for (i=0; i<r.ranks.size(); ++i) 149 149 s << r.ranks[i] << ", "; 150 150 151 151 s << "\ntermFreqs: "; 152 for (i=0; i<r.termFreqs.size(); i++)152 for (i=0; i<r.termFreqs.size(); ++i) 153 153 s << r.termFreqs[i] << ", "; 154 154 s << "\nactual number of docs found: " << r.actualNumDocs; … … 182 182 s << "terms: "; 183 183 unsigned long i; 184 for (i=0; i<r.termFreqs.size(); i++)184 for (i=0; i<r.termFreqs.size(); ++i) 185 185 s << r.termFreqs[i] << ", "; 186 186 s << "\n\n"; … … 324 324 unsigned long fragLimitI = 0; 325 325 unsigned long i; 326 for (i=0; i<wordDictEl.frag_occur; i++) {326 for (i=0; i<wordDictEl.frag_occur; ++i) { 327 327 fragNum += buffer.bblock_decode (B, NULL); 328 328 if (!indexData.ifh.word_level_index) termFreq = buffer.gamma_decode (NULL); … … 333 333 while (fragLimitI+1 < (*fragLimits).size() && 334 334 fragNum > (*fragLimits)[fragLimitI+1].rangeStart) { 335 fragLimitI++;335 ++fragLimitI; 336 336 } 337 337 } … … 375 375 if (needFragFreqs) 376 376 outFragData.fragFreqs.push_back (f2.fragFreqs[f2I]); 377 f2I++;377 ++f2I; 378 378 379 379 } else if (f1I < f1Size && … … 384 384 if (needFragFreqs) 385 385 outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]); 386 f1I++;386 ++f1I; 387 387 388 388 } else { … … 391 391 if (needFragFreqs) 392 392 outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]+f2.fragFreqs[f2I]); 393 f1I++;394 f2I++;393 ++f1I; 394 ++f2I; 395 395 } 396 396 } … … 432 432 while (fragLimitI+1 < fragLimitSize && 433 433 comFragNum > (signed long)(*fragLimits)[fragLimitI+1].rangeStart) { 434 fragLimitI++;434 ++fragLimitI; 435 435 } 436 436 } … … 439 439 (fragLimits!=NULL && 440 440 fragNum<=(signed long)(*fragLimits)[fragLimitI].rangeStart)) { 441 fragDataI++;441 ++fragDataI; 442 442 443 443 } else if (fragNum > comFragNum+endRange || 444 444 (fragLimits!=NULL && 445 445 fragNum>(signed long)(*fragLimits)[fragLimitI].rangeEnd)) { 446 comFragDataI++;446 ++comFragDataI; 447 447 448 448 } else { … … 454 454 fragData.fragFreqs[fragDataI] : comFragData.fragFreqs[comFragDataI]; 455 455 } 456 fragDataI++;457 comFragDataI++;458 outI++;456 ++fragDataI; 457 ++comFragDataI; 458 ++outI; 459 459 } 460 460 } … … 527 527 } 528 528 } 529 termDataI++;529 ++termDataI; 530 530 } 531 531 … … 600 600 while (resultI < resultSize && 601 601 result.docs[resultI] < lastLevelDocNum) 602 resultI++;602 ++resultI; 603 603 604 604 // store the result … … 607 607 if (needRanks) 608 608 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt; 609 resultI++;610 resultOutI++;609 ++resultI; 610 ++resultOutI; 611 611 } 612 612 } … … 621 621 } 622 622 623 termDataI++;623 ++termDataI; 624 624 } // while 625 625 … … 631 631 while (resultI < resultSize && 632 632 result.docs[resultI] < lastLevelDocNum) 633 resultI++;633 ++resultI; 634 634 635 635 // store the result … … 638 638 if (needRanks) 639 639 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt; 640 resultI++;641 resultOutI++;640 ++resultI; 641 ++resultOutI; 642 642 } 643 643 } … … 688 688 while (resultI < resultSize && 689 689 result.docs[resultI] < lastLevelDocNum) 690 resultI++;690 ++resultI; 691 691 692 692 // store the result … … 695 695 if (needRanks) 696 696 result.ranks[resultOutI] = result.ranks[resultI]; 697 resultI++;698 resultOutI++;697 ++resultI; 698 ++resultOutI; 699 699 } 700 700 } … … 704 704 } 705 705 706 termDataI++;706 ++termDataI; 707 707 } 708 708 … … 711 711 while (resultI < resultSize && 712 712 result.docs[resultI] < lastLevelDocNum) 713 resultI++;713 ++resultI; 714 714 715 715 // store the result … … 718 718 if (needRanks) 719 719 result.ranks[resultOutI] = result.ranks[resultI]; 720 resultI++;721 resultOutI++;720 ++resultI; 721 ++resultOutI; 722 722 } 723 723 } … … 777 777 termdata.termFreq = (*here).freq; 778 778 terms.push_back(termdata); 779 here++;779 ++here; 780 780 } 781 781 -
trunk/indexers/mgpp/text/TextEl.cpp
r3365 r8692 79 79 80 80 static void ToggleParaTag (TextEl &el, bool &compatInPara) { 81 SetCStr (el.tagName, "Paragraph" );81 SetCStr (el.tagName, "Paragraph", 9); 82 82 el.text.erase (el.text.begin(), el.text.end()); 83 83 if (compatInPara) { … … 90 90 static void SetRecTag (TextEl &el, TextElType elType) { 91 91 el.elType = elType; 92 SetCStr (el.tagName, "Document" );92 SetCStr (el.tagName, "Document", 8); 93 93 el.text.erase (el.text.begin(), el.text.end()); 94 94 if (elType == CloseTagE) -
trunk/indexers/mgpp/text/TextGet.cpp
r3365 r8692 20 20 **************************************************************************/ 21 21 22 // is important to be first, so we escape the truncation warning on VC++ 23 #include "TextGet.h" 22 24 // need this to avoid bizarre compiler problems under VC++ 6.0 23 25 #if defined (__WIN32__) && !defined (GSDL_USE_IOS_H) … … 25 27 #endif 26 28 27 #include "TextGet.h"28 29 #include "mg_files.h" 29 30 #include "netorder.h" … … 49 50 memset (ad, '\0', sizeof (*ad)); 50 51 51 for (i = 0; i <= 1; i++)52 for (i = 0; i <= 1; ++i) 52 53 { 53 54 int j; … … 77 78 78 79 pos = ad->word_data[i]; 79 for (j = 0; j < (int)ad->afh[i].num_frags; j++)80 for (j = 0; j < (int)ad->afh[i].num_frags; ++j) 80 81 { 81 82 ad->words[i][j] = pos; … … 93 94 ad->blk_end[i][num] = ad->blk_start[i][num] + 94 95 (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2; 95 num++;96 ++num; 96 97 } 97 98 } … … 115 116 lookback = cd.cdh.lookback; 116 117 117 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++) {118 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) { 118 119 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback; 119 120 mem_reqd += cfh->huff_words_size[i]; … … 135 136 values[0] = vals; 136 137 values[0][0] = next_word[0]; 137 for (i = 1; i <= cfh->hd.maxcodelen; i++)138 for (i = 1; i <= cfh->hd.maxcodelen; ++i) 138 139 { 139 140 int next_start = (values[i - 1] - vals) + … … 146 147 memset (num_set, '\0', sizeof (num_set)); 147 148 148 for (i = 0; i < cfh->hd.num_codes; i++)149 for (i = 0; i < cfh->hd.num_codes; ++i) 149 150 { 150 151 register int val, copy; … … 175 176 } 176 177 memcpy (last_word[len], word, *word + 1); 177 num_set[len]++;178 ++num_set[len]; 178 179 } 179 180 if (cfh->hd.clens) 180 delete cfh->hd.clens;181 delete []cfh->hd.clens; 181 182 cfh->hd.clens = NULL; 182 183 return values; … … 196 197 return 3; 197 198 if (hd->clens) 198 delete hd->clens;199 delete []hd->clens; 199 200 hd->clens = NULL; 200 201 if (type == chars) … … 239 240 return false; 240 241 241 for (which = 0; which < 2; which++)242 for (which = 0; which < 2; ++which) 242 243 switch (cd.cdh.dict_type) 243 244 { … … 353 354 fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict); 354 355 355 for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)356 for (p = (u_long *) cd; (u_long) p < (u_long) end; ++p) 356 357 if (IS_FIXUP (p)) 357 358 { … … 364 365 NTOHUL(cd->cdh.dict_type); 365 366 NTOHUL(cd->cdh.novel_method); 366 for (i = 0; i < TEXT_PARAMS; i++)367 for (i = 0; i < TEXT_PARAMS; ++i) 367 368 NTOHUL(cd->cdh.params[i]); 368 369 NTOHUL(cd->cdh.num_words[0]); … … 372 373 NTOHUL(cd->cdh.lookback); 373 374 /* cfh */ 374 for (i = 0; i <= 1; i++)375 for (i = 0; i <= 1; ++i) 375 376 { 376 377 int j; … … 379 380 NTOHSI(cd->cfh[i]->hd.mincodelen); 380 381 NTOHSI(cd->cfh[i]->hd.maxcodelen); 381 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)382 for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j) 382 383 { 383 384 NTOHSI(cd->cfh[i]->hd.lencount[j]); … … 385 386 } 386 387 NTOHUL(cd->cfh[i]->uncompressed_size); 387 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)388 for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j) 388 389 NTOHUL(cd->cfh[i]->huff_words_size[j]); 389 390 } … … 392 393 if (cd->cdh.novel_method == MG_NOVEL_DELTA || 393 394 cd->cdh.novel_method == MG_NOVEL_HYBRID) 394 for (i = 0; i <= 1; i++)395 for (i = 0; i <= 1; ++i) 395 396 { 396 397 int j; … … 398 399 NTOHUL(cd->ad->afh[i].num_frags); 399 400 NTOHUL(cd->ad->afh[i].mem_for_frags); 400 for (j = 0; j < 33; j++)401 for (j = 0; j < 33; ++j) 401 402 { 402 403 NTOHSI(cd->ad->blk_start[i][j]); … … 406 407 NTOHSI(cd->fast_loaded); 407 408 408 delete fixup;409 delete []fixup; 409 410 410 411 // the whole fast comp dict is a bit of a hack so I don't … … 584 585 unsigned long bits = 0; 585 586 587 if (docText.capacity() < docText.size() + num_bits + 1) { 588 docText.reserve(docText.size() + num_bits + 1); 589 } 586 590 // keep decoding bits until enough bits have been decoded 587 591 while (bits < num_bits) { … … 616 620 len = buffer.huff_decode(cd.lens_huff[which]->min_code, 617 621 cd.lens_vals[which], &bits); 618 for (i = 0; i < len; i++) {622 for (i = 0; i < len; ++i) { 619 623 c = buffer.huff_decode(cd.chars_huff[which]->min_code, 620 624 cd.chars_vals[which], &bits); … … 633 637 { 634 638 idx = buffer.delta_decode (&bits); 635 idx--;639 --idx; 636 640 } 637 641 break; … … 640 644 int k; 641 645 k = buffer.gamma_decode (&bits); 642 k--;646 --k; 643 647 idx = buffer.binary_decode(ad->blk_end[which][k] - 644 648 ad->blk_start[which][k] + 1, … … 650 654 base = ad->words[which][idx]; 651 655 len = *base++; 652 for (; len; len--)656 for (; len; --len) 653 657 { 654 658 docText.push_back (*base++); -
trunk/indexers/mgpp/text/UCArray.cpp
r8242 r8692 29 29 while (*cStr != '\0') { 30 30 text.push_back (*cStr); 31 cStr++;31 ++cStr; 32 32 } 33 33 } … … 46 46 } 47 47 48 char * GetCStr( UCArraytext) {48 char * GetCStr(const UCArray& text) { 49 49 50 50 char *cstr = new char[text.size()+1]; … … 55 55 while (here != end) { 56 56 cstr[i] = text[i]; 57 here++; i++;57 ++here; ++i; 58 58 } 59 59 cstr[i]='\0'; 60 60 return cstr; 61 } 62 63 bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr) 64 { 65 if ((cStr == NULL || *cStr == '\0') && text.empty()) return true; 66 UCArray::const_iterator thisUC = text.begin(); 67 UCArray::const_iterator endUC = text.end(); 68 while (thisUC != endUC && *cStr != '\0') { 69 if (*thisUC != *cStr) return false; 70 ++cStr; ++thisUC; 71 } 72 if (thisUC == endUC && *cStr == '\0') return true; 73 return false; 61 74 } 62 75 … … 158 171 a.push_back (b); 159 172 160 arraySize--;173 --arraySize; 161 174 } 162 175 … … 297 310 298 311 while (i < l && *a1Here == *a2Here) { 299 i++; ++a1Here; ++a2Here;312 ++i; ++a1Here; ++a2Here; 300 313 } 301 314 … … 340 353 unsigned char c = fgetc (f); 341 354 a.push_back (c); 342 sufLen--;343 } 344 345 return true; 346 } 347 355 --sufLen; 356 } 357 358 return true; 359 } 360 -
trunk/indexers/mgpp/text/UCArray.h
r8242 r8692 22 22 #ifndef UCARRAY_H 23 23 #define UCARRAY_H 24 25 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) 26 #pragma warning(disable:4786) 27 #endif 24 28 25 29 // need this to avoid bizarre compiler problems under VC++ 6.0 … … 57 61 // same as SetCStr but first tries to allocate nSizeHint space (only if needed) 58 62 void SetCStr (UCArray &text, const char *cStr, size_t nSizeHint); 59 char * GetCStr( UCArraytext);63 char * GetCStr(const UCArray& text); 60 64 inline void UCArrayClear (UCArray &a) { 61 65 a.erase (a.begin(), a.end()); 62 66 } 67 bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr); 68 inline bool UCArrayCStrEquals(const UCArray &text, const char *cStr) { return UCArrayCStrEquals(text, (const unsigned char *)cStr); } 63 69 64 70 // stream operator to print UCArray -
trunk/indexers/mgpp/text/mg_errors.cpp
r3365 r8692 51 51 /* free the current error string, unless it is the null string */ 52 52 if ((mg_error_data != NULL) && (mg_error_data != null_data)) { 53 delete mg_error_data;53 delete []mg_error_data; 54 54 mg_error_data = null_data; 55 55 } … … 67 67 /* free the current error string, unless it is the null string */ 68 68 if ((mg_error_data != NULL) && (mg_error_data != null_data)) { 69 delete mg_error_data;69 delete []mg_error_data; 70 70 mg_error_data = null_data; 71 71 } -
trunk/indexers/mgpp/text/mg_files.cpp
r3365 r8692 45 45 if (basepath) 46 46 { 47 delete basepath;47 delete []basepath; 48 48 basepath = NULL; 49 49 } -
trunk/indexers/mgpp/text/mgpp_compression_dict.cpp
r3365 r8692 233 233 NTOHD(csh.num_bytes); 234 234 235 for (i = 0; i < 2; i++)235 for (i = 0; i < 2; ++i) 236 236 { 237 237 frags_stats_header fsh; … … 253 253 wd = Words[i] = (DictWordData *) Xmalloc (sizeof (DictWordData) * Num[i]); 254 254 unsigned int j; 255 for (j = 0; j < Num[i]; j++, wd++)255 for (j = 0; j < Num[i]; ++j, ++wd) 256 256 { 257 257 int len; … … 306 306 dd->chars = 0; 307 307 wd = dd->wd; 308 for (i = 0; i < dd->num_wds; i++, wd++)308 for (i = 0; i < dd->num_wds; ++i, ++wd) 309 309 dd->chars += (*wd)->word[0]; 310 310 } … … 317 317 Alloc_keep_discard (); 318 318 keep[0].num_wds = Num[0]; 319 for (i = 0; i < Num[0]; i++)319 for (i = 0; i < Num[0]; ++i) 320 320 keep[0].wd[i] = Words[0] + i; 321 321 keep[1].num_wds = Num[1]; 322 for (i = 0; i < Num[1]; i++)322 for (i = 0; i < Num[1]; ++i) 323 323 keep[1].wd[i] = Words[1] + i; 324 324 SortAndCount_DictData (&keep[0]); … … 367 367 num = Num[0] + Num[1]; 368 368 wd = (DictWordData **) Xmalloc (num * sizeof (DictWordData *)); 369 for (i = 0; (unsigned int)i < Num[0]; i++)369 for (i = 0; (unsigned int)i < Num[0]; ++i) 370 370 wd[i] = Words[0] + i; 371 for (i = 0; (unsigned int)i < Num[1]; i++)371 for (i = 0; (unsigned int)i < Num[1]; ++i) 372 372 wd[i + Num[0]] = Words[1] + i; 373 373 … … 390 390 } 391 391 392 for (i = 0; i < num; i++)392 for (i = 0; i < num; ++i) 393 393 { 394 394 DictWordData *word = wd[i]; … … 438 438 memset (char_freqs, '\0', sizeof (char_freqs)); 439 439 memset (len_freqs, '\0', sizeof (len_freqs)); 440 for (i = 0; i < num; i++, wd++)440 for (i = 0; i < num; ++i, ++wd) 441 441 { 442 442 u_long freq = (*wd)->documents(); … … 446 446 len_freqs[idx][len] += freq; 447 447 escape[idx] += freq; 448 for (; len; len--, buf++)448 for (; len; --len, ++buf) 449 449 char_freqs[idx][(u_long) (*buf)] += freq; 450 450 } … … 471 471 int j; 472 472 473 for (j = 0; j < num; j++, word++)473 for (j = 0; j < num; ++j, ++word) 474 474 { 475 475 float cbc, wbc; … … 480 480 481 481 cbc = len_lens[idx][len]; 482 for (; len; len--, buf++)482 for (; len; --len, ++buf) 483 483 cbc += char_lens[idx][(u_long) (*buf)]; 484 484 … … 585 585 586 586 587 for (i = 0; (unsigned int)i < Num[0]; i++)587 for (i = 0; (unsigned int)i < Num[0]; ++i) 588 588 discard_heap[i] = Words[0] + i; 589 for (i = 0; (unsigned int)i < Num[1]; i++)589 for (i = 0; (unsigned int)i < Num[1]; ++i) 590 590 discard_heap[i + Num[0]] = Words[1] + i; 591 591 … … 600 600 keep_heap[keep_num++] = word; 601 601 freqs_trans[KIND (word)] += word->documents(); 602 num_trans++;602 ++num_trans; 603 603 } 604 604 … … 663 663 heap_build (keep_heap, sizeof (keep_heap), keep_num, SmallSaving); 664 664 recalc_reqd = 0; 665 recalcs++;665 ++recalcs; 666 666 } 667 667 } … … 669 669 Alloc_keep_discard (); 670 670 671 for (i = 0; i < discard_num; i++)671 for (i = 0; i < discard_num; ++i) 672 672 { 673 673 DictWordData *word = discard_heap[i]; … … 676 676 discard[idx].wd[discard[idx].num_wds++] = word; 677 677 } 678 for (i = 0; i < keep_num; i++)678 for (i = 0; i < keep_num; ++i) 679 679 { 680 680 DictWordData *word = keep_heap[i]; … … 724 724 HTONUL(tmp.dict_type); 725 725 HTONUL(tmp.novel_method); 726 for (i = 0; i < TEXT_PARAMS; i++)726 for (i = 0; i < TEXT_PARAMS; ++i) 727 727 HTONUL(tmp.params[i]); 728 728 HTONUL(tmp.num_words[0]); … … 741 741 unsigned int i; 742 742 u_char *curr, *prev = NULL; 743 for (i = 0; i < dd->num_wds; i++)743 for (i = 0; i < dd->num_wds; ++i) 744 744 { 745 745 int len; … … 762 762 { 763 763 unsigned int i, us; 764 for (us = i = 0; i < dd->num_wds; i++)764 for (us = i = 0; i < dd->num_wds; ++i) 765 765 us += dd->wd[i]->word[0]; 766 766 return us; … … 783 783 FatalError (1, "Unable to allocate memory for freqs"); 784 784 785 for (i = 0; (unsigned)i < dd->num_wds; i++)785 for (i = 0; (unsigned)i < dd->num_wds; ++i) 786 786 { 787 787 freqs[i] = dd->wd[i]->documents(); … … 792 792 FatalError (1, "Unable to allocate memory for huffman data"); 793 793 794 delete (freqs);794 delete []freqs; 795 795 freqs = NULL; 796 796 … … 813 813 mem_reqd = 0; 814 814 815 for (i = 0; (unsigned)i < dd->num_wds; i++)815 for (i = 0; (unsigned)i < dd->num_wds; ++i) 816 816 { 817 817 int codelen = hd->clens[i]; … … 841 841 lastword[codelen] = word; 842 842 #endif 843 lencounts[codelen]++;843 ++lencounts[codelen]; 844 844 } 845 845 846 846 /* [RPAP - Jan 97: Endian Ordering] */ 847 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)847 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i) 848 848 HTONUL(huff_words_size[i]); 849 849 … … 852 852 853 853 /* [RPAP - Jan 97: Endian Ordering] */ 854 for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)854 for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i) 855 855 NTOHUL(huff_words_size[i]); 856 856 857 857 Write_words (f, dd); 858 858 859 delete hd->clens;859 delete []hd->clens; 860 860 delete hd; 861 861 … … 876 876 memset (freqs, '\0', sizeof (freqs)); 877 877 878 for (j = 0; j < dd->num_wds; j++, wd++)878 for (j = 0; j < dd->num_wds; ++j, ++wd) 879 879 { 880 880 u_char *buf = (*wd)->word; 881 881 int len = *buf++; 882 for (; len; len--, buf++)882 for (; len; --len, ++buf) 883 883 freqs[(u_long) (*buf)] += (*wd)->documents(); 884 884 } 885 885 886 886 if (!zero_freq_permitted) 887 for (j = 0; j < 256; j++)887 for (j = 0; j < 256; ++j) 888 888 if (!freqs[j] && PESINAWORD (j) == words) 889 889 freqs[j] = 1; … … 895 895 FatalError (1, "Unable to write huffman data"); 896 896 897 delete hd->clens;897 delete []hd->clens; 898 898 delete hd; 899 899 } … … 912 912 memset (freqs, '\0', sizeof (freqs)); 913 913 914 for (j = 0; j < dd->num_wds; j++, wd++)914 for (j = 0; j < dd->num_wds; ++j, ++wd) 915 915 freqs[(*wd)->word[0]] += (*wd)->documents(); 916 916 917 917 if (!zero_freq_permitted) 918 for (j = 0; j < 16; j++)918 for (j = 0; j < 16; ++j) 919 919 if (!freqs[j]) 920 920 freqs[j] = 1; … … 927 927 928 928 929 delete hd->clens;929 delete []hd->clens; 930 930 delete hd; 931 931 } … … 958 958 Write_cdh (f, &cdh); 959 959 960 for (i = 0; i < 2; i++)960 for (i = 0; i < 2; ++i) 961 961 switch (type) 962 962 { … … 975 975 esc.word = (u_char *) ""; 976 976 keep[i].wd[keep[i].num_wds++] = &esc; 977 for (j = 0; (unsigned)j < discard[i].num_wds; j++)977 for (j = 0; (unsigned)j < discard[i].num_wds; ++j) 978 978 esc.docCount += discard[i].wd[j]->documents(); 979 979 if (!esc.docCount) 980 esc.docCount++;980 ++esc.docCount; 981 981 mem_reqd += Write_data (f, &keep[i], lookback); 982 982 } … … 994 994 esc.word = (u_char *) ""; 995 995 keep[i].wd[keep[i].num_wds++] = &esc; 996 for (j = 0; (unsigned)j < all[i].num_wds; j++)996 for (j = 0; (unsigned)j < all[i].num_wds; ++j) 997 997 if (all[i].wd[j]->documents() == 1) 998 esc.docCount++;998 ++esc.docCount; 999 999 if (!esc.docCount) 1000 esc.docCount++;1000 ++esc.docCount; 1001 1001 mem_reqd += Write_data (f, &keep[i], lookback); 1002 1002 } -
trunk/indexers/mgpp/text/mgpp_decompress_text.cpp
r3365 r8692 41 41 char *basePath = ""; 42 42 UCArray level; 43 SetCStr (level, "Document" );43 SetCStr (level, "Document", 8); 44 44 45 45 opterr = 0; … … 57 57 break; 58 58 case 'K': 59 SetCStr (level, optarg );59 SetCStr (level, optarg, strlen(optarg)); 60 60 break; 61 61 case 'h': … … 92 92 cout << docText << "\n"; 93 93 94 docNum++;94 ++docNum; 95 95 } 96 96 -
trunk/indexers/mgpp/text/mgpp_fast_comp_dict.cpp
r3365 r8692 60 60 #define FIXUP_VALS(vals) do { \ 61 61 int i; \ 62 for (i=0; i < MAX_HUFFCODE_LEN+1; i++) \62 for (i=0; i < MAX_HUFFCODE_LEN+1; ++i) \ 63 63 FIXUP(&vals[i]); \ 64 64 } while(0) … … 128 128 129 129 /* cfh */ 130 for (which = 0; which <= 1; which++)130 for (which = 0; which <= 1; ++which) 131 131 { 132 132 int j; … … 135 135 HTONSI(cd->cfh[which]->hd.mincodelen); 136 136 HTONSI(cd->cfh[which]->hd.maxcodelen); 137 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)137 for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j) 138 138 { 139 139 HTONSI(cd->cfh[which]->hd.lencount[j]); … … 141 141 } 142 142 HTONUL(cd->cfh[which]->uncompressed_size); 143 for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)143 for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j) 144 144 HTONUL(cd->cfh[which]->huff_words_size[j]); 145 145 } … … 148 148 if (cd->cdh.novel_method == MG_NOVEL_DELTA || 149 149 cd->cdh.novel_method == MG_NOVEL_HYBRID) 150 for (which = 0; which <= 1; which++)150 for (which = 0; which <= 1; ++which) 151 151 { 152 152 int j; … … 154 154 HTONUL(cd->ad->afh[which].num_frags); 155 155 HTONUL(cd->ad->afh[which].mem_for_frags); 156 for (j = 0; j < 33; j++)156 for (j = 0; j < 33; ++j) 157 157 { 158 158 HTONSI(cd->ad->blk_start[which][j]); … … 163 163 HTONUL(cd->cdh.dict_type); 164 164 HTONUL(cd->cdh.novel_method); 165 for (i = 0; i < TEXT_PARAMS; i++)165 for (i = 0; i < TEXT_PARAMS; ++i) 166 166 HTONUL(cd->cdh.params[which]); 167 167 HTONUL(cd->cdh.num_words[0]); … … 186 186 { 187 187 u_long *p; 188 for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)188 for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p) 189 189 { 190 190 if (IS_FIXUP (p)) … … 206 206 MAGIC_AUX_DICT, MG_ABORT); /* [RPAP - Feb 97: WIN32 Port] */ 207 207 208 for (i = 0; i <= 1; i++)208 for (i = 0; i <= 1; ++i) 209 209 { 210 210 aux_frags_header afh; … … 234 234 lookback = cdh->lookback; 235 235 236 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)236 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) 237 237 { 238 238 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback; … … 244 244 mem += mem_reqd; 245 245 246 for (i = 0; i < cfh->hd.num_codes; i++)246 for (i = 0; i < cfh->hd.num_codes; ++i) 247 247 { 248 248 register int val; 249 for (val = getc (dict) & 0xf; val; val--)249 for (val = getc (dict) & 0xf; val; --val) 250 250 getc (dict); 251 251 } … … 259 259 Read_Huffman_Data (dict, &hd, NULL, NULL); 260 260 if (hd.clens) 261 delete hd.clens;261 delete []hd.clens; 262 262 mem += hd.num_codes * sizeof (unsigned long); 263 263 mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *); … … 275 275 mem += mem_for_words (dict, cdh, cfh); 276 276 if (cfh->hd.clens) 277 delete cfh->hd.clens;277 delete []cfh->hd.clens; 278 278 279 279 return mem; … … 298 298 NTOHUL(cdh.dict_type); 299 299 NTOHUL(cdh.novel_method); 300 for (i = 0; i < TEXT_PARAMS; i++)300 for (i = 0; i < TEXT_PARAMS; ++i) 301 301 NTOHUL(cdh.params[i]); 302 302 NTOHUL(cdh.num_words[0]); … … 306 306 NTOHUL(cdh.lookback); 307 307 308 for (which = 0; which < 2; which++)308 for (which = 0; which < 2; ++which) 309 309 switch (cdh.dict_type) 310 310 { … … 393 393 ad = (auxiliary_dict *) getmem (sizeof (auxiliary_dict), sizeof (u_char *)); 394 394 395 for (i = 0; i <= 1; i++)395 for (i = 0; i <= 1; ++i) 396 396 { 397 397 unsigned int j; … … 414 414 415 415 pos = ad->word_data[i]; 416 for (j = 0; j < ad->afh[i].num_frags; j++)416 for (j = 0; j < ad->afh[i].num_frags; ++j) 417 417 { 418 418 ad->words[i][j] = pos; … … 431 431 ad->blk_end[i][num] = ad->blk_start[i][num] + 432 432 (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2; 433 num++;433 ++num; 434 434 } 435 435 } … … 459 459 lookback = cd->cdh.lookback; 460 460 461 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)461 for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) 462 462 { 463 463 ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback; … … 479 479 values[0][0] = next_word[0]; 480 480 FIXUP (&values[0][0]); 481 for (i = 1; i <= cfh->hd.maxcodelen; i++)481 for (i = 1; i <= cfh->hd.maxcodelen; ++i) 482 482 { 483 483 int next_start = (values[i - 1] - vals) + … … 492 492 memset (num_set, '\0', sizeof (num_set)); 493 493 494 for (i = 0; i < cfh->hd.num_codes; i++)494 for (i = 0; i < cfh->hd.num_codes; ++i) 495 495 { 496 496 register int val, copy; … … 528 528 } 529 529 memcpy (last_word[len], word, *word + 1); 530 num_set[len]++;530 ++num_set[len]; 531 531 } 532 532 if (cfh->hd.clens) 533 delete cfh->hd.clens;533 delete []cfh->hd.clens; 534 534 cfh->hd.clens = NULL; 535 535 return values; … … 559 559 fcode[0] = values[0] = &vals[0]; 560 560 FIXUP (&values[0]); 561 for (i = 1; i <= data->maxcodelen; i++)561 for (i = 1; i <= data->maxcodelen; ++i) 562 562 { 563 563 fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]]; … … 565 565 } 566 566 567 for (i = 0; i < data->num_codes; i++)567 for (i = 0; i < data->num_codes; ++i) 568 568 if (data->clens[i]) 569 569 *fcode[(int) (data->clens[i])]++ = i; … … 586 586 FIXUP (&cd->chars_vals[which]); 587 587 if (hd->clens) 588 delete hd->clens;588 delete []hd->clens; 589 589 hd->clens = NULL; 590 590 } … … 623 623 Read_cdh (dict, &cd->cdh, NULL, NULL); 624 624 625 for (which = 0; which < 2; which++)625 for (which = 0; which < 2; ++which) 626 626 switch (cd->cdh.dict_type) 627 627 { … … 683 683 { 684 684 u_long *p; 685 for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)685 for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p) 686 686 { 687 687 if (IS_FIXUP (p)) -
trunk/indexers/mgpp/text/mgpp_passes.cpp
r3365 r8692 23 23 #define _XOPEN_SOURCE 1 24 24 #define _XOPEN_SOURCE_EXTENDED 1 25 26 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) 27 #pragma warning(disable:4786) 28 #endif 25 29 26 30 // need this to avoid bizarre compiler problems under VC++ 6.0 -
trunk/indexers/mgpp/text/text.pass2.cpp
r3365 r8692 21 21 **************************************************************************/ 22 22 23 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) 24 #pragma warning(disable:4786) 25 #endif 26 23 27 // need this to avoid bizarre compiler problems under VC++ 6.0 24 28 #if defined (__WIN32__) && !defined (GSDL_USE_IOS_H) … … 135 139 int i; 136 140 if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS) 137 for (i = 0; i <= 1; i++)141 for (i = 0; i <= 1; ++i) 138 142 { 139 143 nht[i].HashSize = INITIAL_HASH_SIZE; … … 158 162 blk_end[i][num] = blk_start[i][num] + 159 163 (blk_end[i][num - 1] - blk_start[i][num - 1]) * 2; 160 num++;164 ++num; 161 165 } 162 166 } … … 176 180 int res; 177 181 178 if (which) numWords++;182 if (which) ++numWords; 179 183 180 184 /* First parse a word or non-word out of the string */ … … 202 206 s2 = *wptr; 203 207 len = *s1 + 1; 204 for (; len; len--)208 for (; len; --len) 205 209 if (*s1++ != *s2++) break; 206 210 … … 234 238 } 235 239 buffer.huff_encode (Word[0], lens_codes[which], lens_huff[which].clens, NULL); 236 for (i = 0; i < Word[0]; i++)240 for (i = 0; i < Word[0]; ++i) 237 241 buffer.huff_encode (Word[i + 1], char_codes[which], 238 242 char_huff[which].clens, NULL); … … 252 256 buffer.huff_encode (Word[0], lens_codes[which], 253 257 lens_huff[which].clens, NULL); 254 for (i = 0; i < Word[0]; i++)258 for (i = 0; i < Word[0]; ++i) 255 259 buffer.huff_encode (Word[i + 1], char_codes[which], 256 260 char_huff[which].clens, NULL); … … 280 284 h->pool->ptr += len; 281 285 h->pool->left -= len; 282 h->HashUsed++;286 ++h->HashUsed; 283 287 break; 284 288 } … … 287 291 s2 = ent->word; 288 292 len = *s1 + 1; 289 for (; len; len--)293 for (; len; --len) 290 294 if (*s1++ != *s2++) 291 295 break; … … 306 310 int j = ent->ordinal_num - 1; 307 311 while (j > blk_end[which][k]) 308 k++;312 ++k; 309 313 assert (j - blk_start[which][k] + 1 >= 1 && 310 314 j - blk_start[which][k] + 1 <= … … 331 335 memset (ht, '\0', sizeof (novel_hash_rec) * size); 332 336 333 for (i = 0; i < h->HashSize; i++)337 for (i = 0; i < h->HashSize; ++i) 334 338 if (h->HashTable[i].word) 335 339 { … … 409 413 410 414 cth.num_of_bytes += (*here).text.size(); 411 here++;415 ++here; 412 416 } 413 417 … … 417 421 while (tiHere != tiEnd) { 418 422 if ((*tiHere).second.inDoc) (*tiHere).second.SetEnd (endPos, endBit); 419 tiHere++;423 ++tiHere; 420 424 } 421 425 422 426 // we've processed one more document 423 cth.num_of_docs++;427 ++cth.num_of_docs; 424 428 425 429 return COMPALLOK; … … 434 438 return COMPERROR; 435 439 436 for (i = 0; i <= 1; i++)440 for (i = 0; i <= 1; ++i) 437 441 { 438 442 aux_frags_header afh; … … 461 465 int i; 462 466 u_long aux_compressed = 0, total_uncomp = 0; 463 for (i = 0; i <= 1; i++)467 for (i = 0; i <= 1; ++i) 464 468 { 465 469 int j; … … 475 479 { 476 480 int len = *buf++; 477 lens[len]++;481 ++lens[len]; 478 482 total_uncomp += len + 4; 479 for (; len; len--)480 chars[*buf++]++;483 for (; len; --len) 484 ++chars[*buf++]; 481 485 } 482 486 } 483 for (j = 0; j < 256; j++)487 for (j = 0; j < 256; ++j) 484 488 if (!chars[j] && PESINAWORD (j) == i) 485 489 fchars[j] = 1; 486 490 else 487 491 fchars[j] = chars[j]; 488 for (j = 0; j < 16; j++)492 for (j = 0; j < 16; ++j) 489 493 if (!lens[j]) 490 494 flens[j] = 1; … … 522 526 return false; 523 527 524 tiHere++;528 ++tiHere; 525 529 } 526 530 -
trunk/indexers/mgpp/text/words.cpp
r3365 r8692 37 37 } 38 38 39 /* It determines whether a given place in a UTF-8 encoded Unicode string is a unicode space. */ 40 int isaspace (const u_char *here, const u_char *end) 41 { 42 unsigned short c; 43 if (parse_utf8_char(here, end, &c) > 0) return is_unicode_space(c); 44 return 0; 45 } 46 47 /* Return a the UTF-8 encoded Unicode string with begining 48 unicode spaces skippend. */ 49 u_char *skipspace(u_char *here, u_char *end) 50 { 51 unsigned short c; 52 int length; 53 while(here != end) { 54 length = parse_utf8_char(here, end, &c); 55 if (length == 0 || !is_unicode_space(c)) break; 56 here += length; 57 } 58 return here; 59 } 60 39 61 const unsigned char *ParseIndexWord (const unsigned char *textHere, 40 62 const unsigned char *textEnd, … … 53 75 ++numeric <= MAXNUMERIC))) { 54 76 while (charlength-- > 0) { 55 word.push_back (*textHere++); length++;77 word.push_back (*textHere++); ++length; 56 78 } 57 79 charlength = parse_utf8_char (textHere, textEnd, &c); -
trunk/indexers/mgpp/text/words.h
r3365 r8692 76 76 77 77 #ifdef __cplusplus 78 extern "C" 78 extern "C" { 79 79 #endif 80 80 int inaword (const u_char *here, const u_char *end); … … 83 83 is part of a word. */ 84 84 85 int isaspace (const u_char *here, const u_char *end); 86 /* It determines whether a given place in a UTF-8 encoded Unicode string is a unicode space. */ 87 88 u_char *skipspace(u_char *here, u_char *end); 89 /* Return a the UTF-8 encoded Unicode string with begining unicode spaces skippend. */ 90 91 #ifdef __cplusplus 92 } 93 #endif 85 94 86 95 const unsigned char *ParseIndexWord (const unsigned char *textHere,
Note:
See TracChangeset
for help on using the changeset viewer.