Changeset 8692 for trunk/indexers

trunk/indexers/mgpp/lib/bitio_m_random.cpp

-              r3365
+              r8692
 random_bitio_buffer::~random_bitio_buffer() {
   if (buffer != NULL) delete buffer;
+  if (buffer != NULL) delete []buffer;
+}
 …
   // delete the old buffer
   if (buffer != NULL) {
     delete buffer;
+    delete []buffer;
     buffer = NULL;
+  }
 …
   // delete the old buffer
   if (buffer != NULL) {
     delete buffer;
+    delete []buffer;
     buffer = NULL;
+  }
 …
   flush();
   if (buffer != NULL) {
     delete buffer;
+    delete []buffer;
     buffer = NULL;
+  }

trunk/indexers/mgpp/lib/huffman.cpp

-              r3365
+              r8692
   /* Initialise the pointers to the leaves */
   for (count = i = 0; i < num; i++)
+  for (count = i = 0; i < num; ++i)
     if (heap[num + i])
       heap[count++] = num + i;
 …
   /* Reorganise the pointers so that it is a heap */
   HNum = count;
   for (i = HNum / 2; i > 0; i--)
+  for (i = HNum / 2; i > 0; --i)
+    {
       register int curr, child;
 …
+    {
       if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
         child++;
+        ++child;
       if (heap[heap[curr - 1]] > heap[heap[child - 1]])
+        {
 …
       int pos[2];
       for (i = 0; i < 2; i++)
+      for (i = 0; i < 2; ++i)
+    {
       register int curr, child;
 …
           if (child < HNum &&
           heap[heap[child]] < heap[heap[child - 1]])
         child++;
+        ++child;
           if (heap[heap[curr - 1]] > heap[heap[child - 1]])
+        {
 …
+      {
     register int parent, curr;
     HNum++;
+    ++HNum;
     curr = HNum;
     parent = curr >> 1;
 …
   heap[0] = -1UL;
   heap[1] = 0;
   for (i = 2; i < num * 2; i++)
+  for (i = 2; i < num * 2; ++i)
     heap[i] = heap[heap[i]] + 1;
 …
   /* Set the code length of each leaf in the huffman tree */
   for (i = 0; i < num; i++)
+  for (i = 0; i < num; ++i)
+    {
       register u_long codelen = heap[i + num];
 …
       if (codelen < hd->mincodelen)
     hd->mincodelen = codelen;
       hd->lencount[codelen]++;
+      ++hd->lencount[codelen];
+    }
 …
       /* Calculate the current codes for each different code length */
       hd->min_code[hd->maxcodelen] = 0;
       for (i = hd->maxcodelen - 1; i>=0; i--)
+      for (i = hd->maxcodelen - 1; i>=0; --i)
     hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
+    }
   delete heap;
+  delete []heap;
   return (hd);
 error2:
   delete heap;
+  delete []heap;
 error1:
   if (!data)
 …
     *mem += data->num_codes * sizeof (*codes);
   memcpy (mc, data->min_code, sizeof (mc));
   for (i = 0; i < data->num_codes; i++)
+  for (i = 0; i < data->num_codes; ++i)
     if (data->clens[i])
       codes[i] = mc[(int) (data->clens[i])]++;
 …
   if (!(values = new unsigned long *[MAX_HUFFCODE_LEN + 1]))
+    {
       delete vals;
+      delete []vals;
       return (NULL);
+    }
 …
   fcode[0] = values[0] = &vals[0];
   for (i = 1; i <= data->maxcodelen; i++)
+  for (i = 1; i <= data->maxcodelen; ++i)
     fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
   for (i = 0; i < data->num_codes; i++)
+  for (i = 0; i < data->num_codes; ++i)
     if (data->clens[i])
       *fcode[(int) (data->clens[i])]++ = i;
 …
   if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
     return -1;
   for (i = 0; i < num; i++)
+  for (i = 0; i < num; ++i)
     size += counts[i] * hd.clens[i];
   delete hd.clens;
+  delete []hd.clens;
   return size;
+}
 …
       /* [RPAP - Jan 97: Endian Ordering] */
       int i;
       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
+      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
     HTONSI(hd->lencount[i]);
       for (i = 0; i < hd->maxcodelen + 1; i++)
+      for (i = 0; i < hd->maxcodelen + 1; ++i)
     HTONUL(hd->min_code[i]);
 …
       /* [RPAP - Jan 97: Endian Ordering] */
       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
+      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
     NTOHSI(hd->lencount[i]);
       for (i = 0; i < hd->maxcodelen + 1; i++)
+      for (i = 0; i < hd->maxcodelen + 1; ++i)
     NTOHUL(hd->min_code[i]);
+    }
 …
       /* [RPAP - Jan 97: Endian Ordering] */
       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
+      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
     NTOHSI(hd->lencount[i]);
 …
       /* [RPAP - Jan 97: Endian Ordering] */
       for (i = 0; i < hd->maxcodelen + 1; i++)
+      for (i = 0; i < hd->maxcodelen + 1; ++i)
     NTOHUL(hd->min_code[i]);

trunk/indexers/mgpp/text/GSDLQueryLex.cpp

-              r8242
+              r8692
               UCArray &text,
               int len) {
+  if (text.capacity() < text.size() + len + 1) {
+    text.reserve(text.size() + len + 1);
+  }
   while (len > 0) {
     text.push_back (*here++);
     len--;
+    --len;
+  }
+}
 …
   //return false;
+  UCArray AND; SetCStr (AND, "AND");
+  if (el.text == AND) {
+  //UCArray AND; SetCStr (AND, "AND");
+  //if (el.text == AND) {
+  if (UCArrayCStrEquals(el.text, "AND")) {
     el.lexType = AndOpE;
     return true;
+  }
+  UCArray OR; SetCStr (OR, "OR");
+  if (el.text == OR) {
+  //UCArray OR; SetCStr (OR, "OR");
+  //if (el.text == OR) {
+  if (UCArrayCStrEquals(el.text, "OR")) {
     el.lexType = OrOpE;
     return true;
+  }
+  UCArray NOT; SetCStr (NOT, "NOT");
+  if (el.text == NOT) {
+  //UCArray NOT; SetCStr (NOT, "NOT");
+  //if (el.text == NOT) {
+  if (UCArrayCStrEquals(el.text, "NOT")) {
     el.lexType = NotOpE;
     return true;
+  }
   UCArray NEAR; SetCStr (NEAR, "NEAR");
+  UCArray NEAR; SetCStr (NEAR, "NEAR", 4);
   if (PrefixLen(el.text, NEAR)==4) {
     el.lexType = NearOpE;
     return true;
+  }
   UCArray WITHIN; SetCStr (WITHIN, "WITHIN");
+  UCArray WITHIN; SetCStr (WITHIN, "WITHIN", 6);
   if (PrefixLen(el.text, WITHIN)==6) {
     el.lexType = WithinOpE;

trunk/indexers/mgpp/text/GSDLQueryParser.cpp

-              r8242
+              r8692
                 UCArray &nearby,
                 bool reverse) {
   UCArray NEARBY; SetCStr(NEARBY, "NEAR");
   UCArray WITHIN; SetCStr(WITHIN, "WITHIN");
+  UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4);
+  UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6);
   if (nearby == NEARBY) { // no modifier
 …
     while (here != end) {
       size = size*10 + (*here-'0');
       here++;
+      ++here;
+    }
     if (within) {
 …
   UCArray near_string;
   while (ParseLexEl (here, end, el)) {
+    if (el.lexType == TermE || el.lexType == IntegerE) {
+    // cant have AND, OR, NOT in square brackets, so assume they are words
+    if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) {
       TermNode termNode;
       termNode.term = el.text;

trunk/indexers/mgpp/text/IndexData.cpp

-              r3365
+              r8692
   IvfLevelInfoMap::const_iterator levelHere, levelEnd;
   for (levelHere=levels.levelInfo.begin(), levelEnd=levels.levelInfo.end();
        levelHere!=levelEnd && (*levelHere).first != level; levelHere++)
     curLevelNum++;
+       levelHere!=levelEnd && (*levelHere).first != level; ++levelHere)
+    ++curLevelNum;
   // make sure we found the level

trunk/indexers/mgpp/text/MGQuery.cpp

-              r3365
+              r8692
      rightI < rightResult.docs.size()) {
     if (result.docs[leftI] < rightResult.docs[rightI]) {
       leftI++;
+      ++leftI;
     } else if (result.docs[leftI] > rightResult.docs[rightI]) {
       rightI++;
+      ++rightI;
     } else {
       // the documents are equal
 …
       if (haveAccum)
     result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
       leftI++;
       rightI++;
       outI++;
+      ++leftI;
+      ++rightI;
+      ++outI;
+    }
+  }
 …
       if (haveAccum)
     result.ranks.push_back (leftResult.ranks[leftI]);
       leftI++;
+      ++leftI;
     } else if (leftDocNum > rightDocNum) {
 …
       if (haveAccum)
     result.ranks.push_back (rightResult.ranks[rightI]);
       rightI++;
+      ++rightI;
     } else { // equal
 …
     result.ranks.push_back (leftResult.ranks[leftI] +
                 rightResult.ranks[rightI]);
       leftI++;
       rightI++;
+      ++leftI;
+      ++rightI;
+    }
+  }
 …
       if (haveAccum)
     result.ranks[outI] = result.ranks[queryI];
       queryI++;
       outI++;
+      ++queryI;
+      ++outI;
     } else if (result.docs[queryI] > notResult.docs[notI]) {
       notI++;
+      ++notI;
     } else {
       // the documents are equal, ignore both
       queryI++;
       notI++;
+      ++queryI;
+      ++notI;
+    }
+  }
 …
   unsigned long i;
   FragRange thisFrag;
   for (i=0; i<tagEl.frag_occur; i++) {
+  for (i=0; i<tagEl.frag_occur; ++i) {
     // get start
     unsigned long delta = buffer.bblock_decode (B, NULL)-1;
 …
     CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
     here++;
+    ++here;
+  }
+}
 …
             result);
     termHere++;
+    ++termHere;
     if (termHere == termEnd) return; // nothing more to do
 …
             (*termHere).endRange,
             fragLimitsPtr);
     termHere++;
+    ++termHere;
+  }
 …
   while (here != end) {
     (*here).Print (s, indent+2);
     here++;
+    ++here;
+  }
+}
 …
   unsigned long i;
   if (queryInfo.sortByRank || queryInfo.needRankInfo) {
     for (i=0; i<result.ranks.size(); i++) {
+    for (i=0; i<result.ranks.size(); ++i) {
       result.ranks[i] /=
     indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
 …
       indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
     for (i=0; i<resultsSize; i++) {
+    for (i=0; i<resultsSize; ++i) {
       result.ranks[i] =  result.ranks[i] *
     indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
 …
   unsigned long DocNum = 0;
   for (unsigned long i=0; i<realresult.docs.size(); i++) {
+  for (unsigned long i=0; i<realresult.docs.size(); ++i) {
     // do an if ! here????

trunk/indexers/mgpp/text/QueryLex.cpp

-              r3365
+              r8692
               UCArray &text,
               int len) {
+  if (text.capacity() < text.size() + len + 1) {
+    text.reserve(text.size + len + 1);
+  }
   while (len > 0) {
     text.push_back (*here++);
     len--;
+    --len;
+  }
+}
 …
   if (!ParseTerm (here, end, el.text)) return false;
+  UCArray AND; SetCStr (AND, "AND");
+  if (el.text == AND) {
+  //UCArray AND; SetCStr (AND, "AND");
+  //if (el.text == AND) {
+  if (UCArrayCStrEquals(el.text, "AND")) {
     el.lexType = AndOpE;
     return true;
+  }
+  UCArray OR; SetCStr (OR, "OR");
+  if (el.text == OR) {
+  //UCArray OR; SetCStr (OR, "OR");
+  //if (el.text == OR) {
+  if (UCArrayCStrEquals(el.text, "OR")) {
     el.lexType = OrOpE;
     return true;
+  }
+  UCArray NOT; SetCStr (NOT, "NOT");
+  if (el.text == NOT) {
+  //UCArray NOT; SetCStr (NOT, "NOT");
+  //if (el.text == NOT) {
+  if (UCArrayCStrEquals(el.text, "NOT")) {
     el.lexType = NotOpE;
     return true;

trunk/indexers/mgpp/text/QueryTester.cpp

-              r3365
+              r8692
   DocNumArray &docSet1 = setNode1->queryResult.docs;
   RankArray &rankSet1 = setNode1->queryResult.ranks;
   docSet1.push_back (1);  rankSet1.push_back (0.1);
   docSet1.push_back (10); rankSet1.push_back (0.2);
   docSet1.push_back (15); rankSet1.push_back (0.2);
   docSet1.push_back (18); rankSet1.push_back (0.4);
   docSet1.push_back (19); rankSet1.push_back (0.5);
+  docSet1.push_back (1);  rankSet1.push_back (0.1f);
+  docSet1.push_back (10); rankSet1.push_back (0.2f);
+  docSet1.push_back (15); rankSet1.push_back (0.2f);
+  docSet1.push_back (18); rankSet1.push_back (0.4f);
+  docSet1.push_back (19); rankSet1.push_back (0.5f);
   SetQueryNode *setNode2 = new SetQueryNode;
   DocNumArray &docSet2 = setNode2->queryResult.docs;
   RankArray &rankSet2 = setNode2->queryResult.ranks;
   docSet2.push_back (2);  rankSet2.push_back (0.1);
   docSet2.push_back (11); rankSet2.push_back (0.2);
   docSet2.push_back (12); rankSet2.push_back (0.3);
   docSet2.push_back (13); rankSet2.push_back (0.4);
   docSet2.push_back (14); rankSet2.push_back (0.5);
   docSet2.push_back (15); rankSet2.push_back (0.6);
   docSet2.push_back (16); rankSet2.push_back (0.7);
   docSet2.push_back (17); rankSet2.push_back (0.8);
   docSet2.push_back (19); rankSet2.push_back (0.9);
   docSet2.push_back (20); rankSet2.push_back (0.1);
   docSet2.push_back (21); rankSet2.push_back (0.2);
+  docSet2.push_back (2);  rankSet2.push_back (0.1f);
+  docSet2.push_back (11); rankSet2.push_back (0.2f);
+  docSet2.push_back (12); rankSet2.push_back (0.3f);
+  docSet2.push_back (13); rankSet2.push_back (0.4f);
+  docSet2.push_back (14); rankSet2.push_back (0.5f);
+  docSet2.push_back (15); rankSet2.push_back (0.6f);
+  docSet2.push_back (16); rankSet2.push_back (0.7f);
+  docSet2.push_back (17); rankSet2.push_back (0.8f);
+  docSet2.push_back (19); rankSet2.push_back (0.9f);
+  docSet2.push_back (20); rankSet2.push_back (0.1f);
+  docSet2.push_back (21); rankSet2.push_back (0.2f);
   cout << "\n" << setNode1->queryResult << "AND\n\n"
 …
   DocNumArray &docSet1 = setNode1->queryResult.docs;
   RankArray &rankSet1 = setNode1->queryResult.ranks;
   docSet1.push_back (1);  rankSet1.push_back (0.1);
   docSet1.push_back (10); rankSet1.push_back (0.2);
   docSet1.push_back (15); rankSet1.push_back (0.2);
   docSet1.push_back (18); rankSet1.push_back (0.4);
   docSet1.push_back (19); rankSet1.push_back (0.5);
+  docSet1.push_back (1);  rankSet1.push_back (0.1f);
+  docSet1.push_back (10); rankSet1.push_back (0.2f);
+  docSet1.push_back (15); rankSet1.push_back (0.2f);
+  docSet1.push_back (18); rankSet1.push_back (0.4f);
+  docSet1.push_back (19); rankSet1.push_back (0.5f);
   SetQueryNode *setNode2 = new SetQueryNode;
   DocNumArray &docSet2 = setNode2->queryResult.docs;
   RankArray &rankSet2 = setNode2->queryResult.ranks;
   docSet2.push_back (2);  rankSet2.push_back (0.1);
   docSet2.push_back (11); rankSet2.push_back (0.2);
   docSet2.push_back (12); rankSet2.push_back (0.3);
   docSet2.push_back (13); rankSet2.push_back (0.4);
   docSet2.push_back (14); rankSet2.push_back (0.5);
   docSet2.push_back (15); rankSet2.push_back (0.6);
   docSet2.push_back (16); rankSet2.push_back (0.7);
   docSet2.push_back (17); rankSet2.push_back (0.8);
   docSet2.push_back (19); rankSet2.push_back (0.9);
   docSet2.push_back (20); rankSet2.push_back (0.1);
   docSet2.push_back (21); rankSet2.push_back (0.2);
+  docSet2.push_back (2);  rankSet2.push_back (0.1f);
+  docSet2.push_back (11); rankSet2.push_back (0.2f);
+  docSet2.push_back (12); rankSet2.push_back (0.3f);
+  docSet2.push_back (13); rankSet2.push_back (0.4f);
+  docSet2.push_back (14); rankSet2.push_back (0.5f);
+  docSet2.push_back (15); rankSet2.push_back (0.6f);
+  docSet2.push_back (16); rankSet2.push_back (0.7f);
+  docSet2.push_back (17); rankSet2.push_back (0.8f);
+  docSet2.push_back (19); rankSet2.push_back (0.9f);
+  docSet2.push_back (20); rankSet2.push_back (0.1f);
+  docSet2.push_back (21); rankSet2.push_back (0.2f);
   cout << "\n" << setNode1->queryResult << "OR\n\n"
 …
   DocNumArray &rcDocSet = resultCompare.docs;
   RankArray &rcRankSet = resultCompare.ranks;
   rcDocSet.push_back (1);  rcRankSet.push_back (0.1);
   rcDocSet.push_back (2);  rcRankSet.push_back (0.1);
   rcDocSet.push_back (10); rcRankSet.push_back (0.2);
   rcDocSet.push_back (11); rcRankSet.push_back (0.2);
   rcDocSet.push_back (12); rcRankSet.push_back (0.3);
   rcDocSet.push_back (13); rcRankSet.push_back (0.4);
   rcDocSet.push_back (14); rcRankSet.push_back (0.5);
   rcDocSet.push_back (15); rcRankSet.push_back (0.2+0.6);
   rcDocSet.push_back (16); rcRankSet.push_back (0.7);
   rcDocSet.push_back (17); rcRankSet.push_back (0.8);
   rcDocSet.push_back (18); rcRankSet.push_back (0.4);
   rcDocSet.push_back (19); rcRankSet.push_back (0.9+0.5);
   rcDocSet.push_back (20); rcRankSet.push_back (0.1);
   rcDocSet.push_back (21); rcRankSet.push_back (0.2);
+  rcDocSet.push_back (1);  rcRankSet.push_back (0.1f);
+  rcDocSet.push_back (2);  rcRankSet.push_back (0.1f);
+  rcDocSet.push_back (10); rcRankSet.push_back (0.2f);
+  rcDocSet.push_back (11); rcRankSet.push_back (0.2f);
+  rcDocSet.push_back (12); rcRankSet.push_back (0.3f);
+  rcDocSet.push_back (13); rcRankSet.push_back (0.4f);
+  rcDocSet.push_back (14); rcRankSet.push_back (0.5f);
+  rcDocSet.push_back (15); rcRankSet.push_back (0.2f+0.6f);
+  rcDocSet.push_back (16); rcRankSet.push_back (0.7f);
+  rcDocSet.push_back (17); rcRankSet.push_back (0.8f);
+  rcDocSet.push_back (18); rcRankSet.push_back (0.4f);
+  rcDocSet.push_back (19); rcRankSet.push_back (0.9f+0.5f);
+  rcDocSet.push_back (20); rcRankSet.push_back (0.1f);
+  rcDocSet.push_back (21); rcRankSet.push_back (0.2f);
 …
   DocNumArray &docSet1 = setNode1->queryResult.docs;
   RankArray &rankSet1 = setNode1->queryResult.ranks;
   docSet1.push_back (1);  rankSet1.push_back (0.1);
   docSet1.push_back (10); rankSet1.push_back (0.2);
   docSet1.push_back (15); rankSet1.push_back (0.2);
   docSet1.push_back (18); rankSet1.push_back (0.4);
   docSet1.push_back (19); rankSet1.push_back (0.5);
+  docSet1.push_back (1);  rankSet1.push_back (0.1f);
+  docSet1.push_back (10); rankSet1.push_back (0.2f);
+  docSet1.push_back (15); rankSet1.push_back (0.2f);
+  docSet1.push_back (18); rankSet1.push_back (0.4f);
+  docSet1.push_back (19); rankSet1.push_back (0.5f);
   SetQueryNode *setNode2 = new SetQueryNode;
   DocNumArray &docSet2 = setNode2->queryResult.docs;
   RankArray &rankSet2 = setNode2->queryResult.ranks;
   docSet2.push_back (2);  rankSet2.push_back (0.1);
   docSet2.push_back (11); rankSet2.push_back (0.2);
   docSet2.push_back (12); rankSet2.push_back (0.3);
   docSet2.push_back (13); rankSet2.push_back (0.4);
   docSet2.push_back (14); rankSet2.push_back (0.5);
   docSet2.push_back (15); rankSet2.push_back (0.6);
   docSet2.push_back (16); rankSet2.push_back (0.7);
   docSet2.push_back (17); rankSet2.push_back (0.8);
   docSet2.push_back (19); rankSet2.push_back (0.9);
   docSet2.push_back (20); rankSet2.push_back (0.1);
   docSet2.push_back (21); rankSet2.push_back (0.2);
+  docSet2.push_back (2);  rankSet2.push_back (0.1f);
+  docSet2.push_back (11); rankSet2.push_back (0.2f);
+  docSet2.push_back (12); rankSet2.push_back (0.3f);
+  docSet2.push_back (13); rankSet2.push_back (0.4f);
+  docSet2.push_back (14); rankSet2.push_back (0.5f);
+  docSet2.push_back (15); rankSet2.push_back (0.6f);
+  docSet2.push_back (16); rankSet2.push_back (0.7f);
+  docSet2.push_back (17); rankSet2.push_back (0.8f);
+  docSet2.push_back (19); rankSet2.push_back (0.9f);
+  docSet2.push_back (20); rankSet2.push_back (0.1f);
+  docSet2.push_back (21); rankSet2.push_back (0.2f);
   cout << "\n" << setNode1->queryResult << "NOT\n\n"

trunk/indexers/mgpp/text/Queryer.cpp

-              r6116
+              r8692
   // do querying
   QueryInfo queryInfo;
   SetCStr (queryInfo.docLevel, "Document");
+  SetCStr (queryInfo.docLevel, "Document", 8);
   queryInfo.maxDocs = 50;
   queryInfo.sortByRank = true;
 …
   UCArray docLevel;
   SetCStr(docLevel, "Document");
+  SetCStr(docLevel, "Document", 8);
   UCArray level;
 …
     cout << "> ";
     cin.getline(query, 2048, '\n');
     SetCStr (queryArray, query);
+    SetCStr (queryArray, query, strlen(query));
     // check for commands
 …
     cin >> query;
     UCArrayClear(queryInfo.docLevel);
     SetCStr(queryInfo.docLevel, query);
+    SetCStr(queryInfo.docLevel, query, strlen(query));
     cout << "index set to " << queryInfo.docLevel <<"\n";
     cin.getline(query, 2048, '\n');
 …
     cin >> query;
     UCArrayClear(level);
     SetCStr(level, query);
+    SetCStr(level, query, strlen(query));
     cout << "level set to " << level <<"\n";
     cin.getline(query, 2048, '\n');
 …
     cin>>query;
     UCArrayClear(browseNode.term);
     SetCStr(browseNode.term, query);
+    SetCStr(browseNode.term, query, strlen(query));
     cin.getline(query, 2048, '\n'); // get rest of line

trunk/indexers/mgpp/text/TagInfo.cpp

-              r3365
+              r8692
 void TagInfo::SetDocTag (const char *cStr) {
   SetCStr (docTag, cStr);
+  SetCStr (docTag, cStr, strlen(cStr));
+}
 void TagInfo::SetIndexLevel (const char *cStr) {
   SetCStr (indexLevel, cStr);
+  SetCStr (indexLevel, cStr, strlen(cStr));
+}
 void TagInfo::AddLevelTag (const char *cStr) {
   // convert the string
   UCArray cArr;
   SetCStr (cArr, cStr);
+  SetCStr (cArr, cStr, strlen(cStr));
   // insert the tag
 …
   while (here != end) {
     s << "\"" << (*here) << "\"";
     here++;
+    ++here;
     if (here != end) s << ", ";
+  }

trunk/indexers/mgpp/text/Terms.cpp

-              r8242
+              r8692
   unsigned long i;
   for (i=0; i<t.equivTerms.size(); i++) {
+  for (i=0; i<t.equivTerms.size(); ++i) {
     s << t.equivTerms[i] << ", ";
+  }
 …
   s << "termFreqs: ";
   for (unsigned long i=0; i<termFreqs.size(); i++)
+  for (unsigned long i=0; i<termFreqs.size(); ++i)
     s << termFreqs[i] << ", ";
 …
   s << "docs: ";
   unsigned long i;
   for (i=0; i<r.docs.size(); i++)
+  for (i=0; i<r.docs.size(); ++i)
     s << r.docs[i] << ", ";
   s << "\nranks: ";
   for (i=0; i<r.ranks.size(); i++)
+  for (i=0; i<r.ranks.size(); ++i)
     s << r.ranks[i] << ", ";
   s << "\ntermFreqs: ";
   for (i=0; i<r.termFreqs.size(); i++)
+  for (i=0; i<r.termFreqs.size(); ++i)
     s << r.termFreqs[i] << ", ";
 …
   s << "docs: ";
   unsigned long i;
   for (i=0; i<r.docs.size(); i++)
+  for (i=0; i<r.docs.size(); ++i)
     s << r.docs[i] << ", ";
   s << "\nlevels: ";
   for (i=0; i<r.levels.size(); i++)
+  for (i=0; i<r.levels.size(); ++i)
     s << r.levels[i] << ", ";
   s << "\nranks: ";
   for (i=0; i<r.ranks.size(); i++)
+  for (i=0; i<r.ranks.size(); ++i)
     s << r.ranks[i] << ", ";
   s << "\ntermFreqs: ";
   for (i=0; i<r.termFreqs.size(); i++)
+  for (i=0; i<r.termFreqs.size(); ++i)
     s << r.termFreqs[i] << ", ";
   s << "\nactual number of docs found: " << r.actualNumDocs;
 …
   s << "terms: ";
   unsigned long i;
   for (i=0; i<r.termFreqs.size(); i++)
+  for (i=0; i<r.termFreqs.size(); ++i)
     s << r.termFreqs[i] << ", ";
     s << "\n\n";
 …
   unsigned long fragLimitI = 0;
   unsigned long i;
   for (i=0; i<wordDictEl.frag_occur; i++) {
+  for (i=0; i<wordDictEl.frag_occur; ++i) {
     fragNum += buffer.bblock_decode (B, NULL);
     if (!indexData.ifh.word_level_index) termFreq = buffer.gamma_decode (NULL);
 …
       while (fragLimitI+1 < (*fragLimits).size() &&
          fragNum > (*fragLimits)[fragLimitI+1].rangeStart) {
     fragLimitI++;
+    ++fragLimitI;
+      }
+    }
 …
       if (needFragFreqs)
     outFragData.fragFreqs.push_back (f2.fragFreqs[f2I]);
       f2I++;
+      ++f2I;
     } else if (f1I < f1Size &&
 …
       if (needFragFreqs)
     outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]);
       f1I++;
+      ++f1I;
     } else {
 …
       if (needFragFreqs)
     outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]+f2.fragFreqs[f2I]);
       f1I++;
       f2I++;
+      ++f1I;
+      ++f2I;
+    }
+  }
 …
       while (fragLimitI+1 < fragLimitSize &&
          comFragNum > (signed long)(*fragLimits)[fragLimitI+1].rangeStart) {
     fragLimitI++;
+    ++fragLimitI;
+      }
+    }
 …
     (fragLimits!=NULL &&
      fragNum<=(signed long)(*fragLimits)[fragLimitI].rangeStart)) {
       fragDataI++;
+      ++fragDataI;
     } else if (fragNum > comFragNum+endRange ||
            (fragLimits!=NULL &&
         fragNum>(signed long)(*fragLimits)[fragLimitI].rangeEnd)) {
       comFragDataI++;
+      ++comFragDataI;
     } else {
 …
       fragData.fragFreqs[fragDataI] : comFragData.fragFreqs[comFragDataI];
+      }
       fragDataI++;
       comFragDataI++;
       outI++;
+      ++fragDataI;
+      ++comFragDataI;
+      ++outI;
+    }
+  }
 …
+      }
+    }
     termDataI++;
+    ++termDataI;
+  }
 …
       while (resultI < resultSize &&
          result.docs[resultI] < lastLevelDocNum)
         resultI++;
+        ++resultI;
       // store the result
 …
         if (needRanks)
           result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
         resultI++;
         resultOutI++;
+        ++resultI;
+        ++resultOutI;
+      }
+    }
 …
+    }
     termDataI++;
+    ++termDataI;
   } // while
 …
     while (resultI < resultSize &&
        result.docs[resultI] < lastLevelDocNum)
       resultI++;
+      ++resultI;
     // store the result
 …
       if (needRanks)
     result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
       resultI++;
       resultOutI++;
+      ++resultI;
+      ++resultOutI;
+    }
+  }
 …
       while (resultI < resultSize &&
          result.docs[resultI] < lastLevelDocNum)
         resultI++;
+        ++resultI;
       // store the result
 …
         if (needRanks)
           result.ranks[resultOutI] = result.ranks[resultI];
         resultI++;
       resultOutI++;
+        ++resultI;
+        ++resultOutI;
+      }
+    }
 …
+    }
     termDataI++;
+    ++termDataI;
+  }
 …
     while (resultI < resultSize &&
        result.docs[resultI] < lastLevelDocNum)
       resultI++;
+      ++resultI;
     // store the result
 …
       if (needRanks)
     result.ranks[resultOutI] = result.ranks[resultI];
       resultI++;
       resultOutI++;
+      ++resultI;
+      ++resultOutI;
+    }
+  }
 …
     termdata.termFreq = (*here).freq;
     terms.push_back(termdata);
     here++;
+    ++here;
+  }

trunk/indexers/mgpp/text/TextEl.cpp

-              r3365
+              r8692
 static void ToggleParaTag (TextEl &el, bool &compatInPara) {
   SetCStr (el.tagName, "Paragraph");
+  SetCStr (el.tagName, "Paragraph", 9);
   el.text.erase (el.text.begin(), el.text.end());
   if (compatInPara) {
 …
 static void SetRecTag (TextEl &el, TextElType elType) {
   el.elType = elType;
   SetCStr (el.tagName, "Document");
+  SetCStr (el.tagName, "Document", 8);
   el.text.erase (el.text.begin(), el.text.end());
   if (elType == CloseTagE)

trunk/indexers/mgpp/text/TextGet.cpp

-              r3365
+              r8692
  **************************************************************************/
+// is important to be first, so we escape the truncation warning on VC++
+#include "TextGet.h"
 // need this to avoid bizarre compiler problems under VC++ 6.0
 #if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
 …
 #endif
-#include "TextGet.h"
 #include "mg_files.h"
 #include "netorder.h"
 …
   memset (ad, '\0', sizeof (*ad));
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       int j;
 …
       pos = ad->word_data[i];
       for (j = 0; j < (int)ad->afh[i].num_frags; j++)
+      for (j = 0; j < (int)ad->afh[i].num_frags; ++j)
+    {
       ad->words[i][j] = pos;
 …
           ad->blk_end[i][num] = ad->blk_start[i][num] +
         (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
           num++;
+          ++num;
+        }
+    }
 …
   lookback = cd.cdh.lookback;
   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++) {
+  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) {
     ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
     mem_reqd += cfh->huff_words_size[i];
 …
   values[0] = vals;
   values[0][0] = next_word[0];
   for (i = 1; i <= cfh->hd.maxcodelen; i++)
+  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
+    {
       int next_start = (values[i - 1] - vals) +
 …
   memset (num_set, '\0', sizeof (num_set));
   for (i = 0; i < cfh->hd.num_codes; i++)
+  for (i = 0; i < cfh->hd.num_codes; ++i)
+    {
       register int val, copy;
 …
+    }
       memcpy (last_word[len], word, *word + 1);
       num_set[len]++;
+      ++num_set[len];
+    }
   if (cfh->hd.clens)
     delete cfh->hd.clens;
+    delete []cfh->hd.clens;
   cfh->hd.clens = NULL;
   return values;
 …
     return 3;
   if (hd->clens)
     delete hd->clens;
+    delete []hd->clens;
   hd->clens = NULL;
   if (type == chars)
 …
     return false;
   for (which = 0; which < 2; which++)
+  for (which = 0; which < 2; ++which)
     switch (cd.cdh.dict_type)
+      {
 …
   fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
   for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
+  for (p = (u_long *) cd; (u_long) p < (u_long) end; ++p)
     if (IS_FIXUP (p))
+      {
 …
   NTOHUL(cd->cdh.dict_type);
   NTOHUL(cd->cdh.novel_method);
   for (i = 0; i < TEXT_PARAMS; i++)
+  for (i = 0; i < TEXT_PARAMS; ++i)
     NTOHUL(cd->cdh.params[i]);
   NTOHUL(cd->cdh.num_words[0]);
 …
   NTOHUL(cd->cdh.lookback);
   /* cfh */
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       int j;
 …
       NTOHSI(cd->cfh[i]->hd.mincodelen);
       NTOHSI(cd->cfh[i]->hd.maxcodelen);
       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
+      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
+    {
       NTOHSI(cd->cfh[i]->hd.lencount[j]);
 …
+    }
       NTOHUL(cd->cfh[i]->uncompressed_size);
       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
+      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
     NTOHUL(cd->cfh[i]->huff_words_size[j]);
+    }
 …
   if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
       cd->cdh.novel_method == MG_NOVEL_HYBRID)
     for (i = 0; i <= 1; i++)
+    for (i = 0; i <= 1; ++i)
+      {
     int j;
 …
     NTOHUL(cd->ad->afh[i].num_frags);
     NTOHUL(cd->ad->afh[i].mem_for_frags);
     for (j = 0; j < 33; j++)
+    for (j = 0; j < 33; ++j)
+      {
         NTOHSI(cd->ad->blk_start[i][j]);
 …
   NTOHSI(cd->fast_loaded);
   delete fixup;
+  delete []fixup;
   // the whole fast comp dict is a bit of a hack so I don't
 …
   unsigned long bits = 0;
+  if (docText.capacity() < docText.size() + num_bits + 1) {
+    docText.reserve(docText.size() + num_bits + 1);
+  }
   // keep decoding bits until enough bits have been decoded
   while (bits < num_bits) {
 …
       len = buffer.huff_decode(cd.lens_huff[which]->min_code,
                    cd.lens_vals[which], &bits);
       for (i = 0; i < len; i++) {
+      for (i = 0; i < len; ++i) {
         c = buffer.huff_decode(cd.chars_huff[which]->min_code,
                    cd.chars_vals[which], &bits);
 …
+          {
         idx = buffer.delta_decode (&bits);
         idx--;
+        --idx;
+          }
           break;
 …
         int k;
         k = buffer.gamma_decode (&bits);
         k--;
+        --k;
         idx = buffer.binary_decode(ad->blk_end[which][k] -
                        ad->blk_start[which][k] + 1,
 …
       base = ad->words[which][idx];
       len = *base++;
       for (; len; len--)
+      for (; len; --len)
+        {
           docText.push_back (*base++);

trunk/indexers/mgpp/text/UCArray.cpp

-              r8242
+              r8692
   while (*cStr != '\0') {
     text.push_back (*cStr);
     cStr++;
+    ++cStr;
+  }
+}
 …
+}
 char * GetCStr(UCArray text) {
+char * GetCStr(const UCArray& text) {
   char *cstr = new char[text.size()+1];
 …
   while (here != end) {
     cstr[i] = text[i];
     here++; i++;
+    ++here; ++i;
+  }
   cstr[i]='\0';
   return cstr;
+}
+bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr)
+{
+  if ((cStr == NULL || *cStr == '\0') && text.empty()) return true;
+  UCArray::const_iterator thisUC = text.begin();
+  UCArray::const_iterator endUC = text.end();
+  while (thisUC != endUC && *cStr != '\0') {
+    if (*thisUC != *cStr) return false;
+    ++cStr; ++thisUC;
+  }
+  if (thisUC == endUC && *cStr == '\0') return true;
+  return false;
+}
 …
     a.push_back (b);
     arraySize--;
+    --arraySize;
+  }
 …
   while (i < l && *a1Here == *a2Here) {
     i++; ++a1Here; ++a2Here;
+    ++i; ++a1Here; ++a2Here;
+  }
 …
     unsigned char c = fgetc (f);
     a.push_back (c);
     sufLen--;
+  }
   return true;
+}
+    --sufLen;
+  }
+  return true;
+}

trunk/indexers/mgpp/text/UCArray.h

-              r8242
+              r8692
 #ifndef UCARRAY_H
 #define UCARRAY_H
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
+#pragma warning(disable:4786)
+#endif
 // need this to avoid bizarre compiler problems under VC++ 6.0
 …
 // same as SetCStr but first tries to allocate nSizeHint space (only if needed)
 void SetCStr (UCArray &text, const char *cStr, size_t nSizeHint);
 char * GetCStr(UCArray text);
+char * GetCStr(const UCArray& text);
 inline void UCArrayClear (UCArray &a) {
   a.erase (a.begin(), a.end());
+}
+bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr);
+inline bool UCArrayCStrEquals(const UCArray &text, const char *cStr) { return UCArrayCStrEquals(text, (const unsigned char *)cStr); }
 // stream operator to print UCArray

trunk/indexers/mgpp/text/mg_errors.cpp

-              r3365
+              r8692
   /* free the current error string, unless it is the null string */
   if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
     delete mg_error_data;
+    delete []mg_error_data;
     mg_error_data = null_data;
+  }
 …
   /* free the current error string, unless it is the null string */
   if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
     delete mg_error_data;
+    delete []mg_error_data;
     mg_error_data = null_data;
+  }

trunk/indexers/mgpp/text/mg_files.cpp

r3365	r8692
45	45	if (basepath)
46	46	{
47		delete basepath;
	47	delete []basepath;
48	48	basepath = NULL;
49	49	}

trunk/indexers/mgpp/text/mgpp_compression_dict.cpp

-              r3365
+              r8692
   NTOHD(csh.num_bytes);
   for (i = 0; i < 2; i++)
+  for (i = 0; i < 2; ++i)
+    {
       frags_stats_header fsh;
 …
       wd = Words[i] = (DictWordData *) Xmalloc (sizeof (DictWordData) * Num[i]);
       unsigned int j;
       for (j = 0; j < Num[i]; j++, wd++)
+      for (j = 0; j < Num[i]; ++j, ++wd)
+    {
       int len;
 …
   dd->chars = 0;
   wd = dd->wd;
   for (i = 0; i < dd->num_wds; i++, wd++)
+  for (i = 0; i < dd->num_wds; ++i, ++wd)
     dd->chars += (*wd)->word[0];
+}
 …
   Alloc_keep_discard ();
   keep[0].num_wds = Num[0];
   for (i = 0; i < Num[0]; i++)
+  for (i = 0; i < Num[0]; ++i)
     keep[0].wd[i] = Words[0] + i;
   keep[1].num_wds = Num[1];
   for (i = 0; i < Num[1]; i++)
+  for (i = 0; i < Num[1]; ++i)
     keep[1].wd[i] = Words[1] + i;
   SortAndCount_DictData (&keep[0]);
 …
   num = Num[0] + Num[1];
   wd = (DictWordData **) Xmalloc (num * sizeof (DictWordData *));
   for (i = 0; (unsigned int)i < Num[0]; i++)
+  for (i = 0; (unsigned int)i < Num[0]; ++i)
     wd[i] = Words[0] + i;
   for (i = 0; (unsigned int)i < Num[1]; i++)
+  for (i = 0; (unsigned int)i < Num[1]; ++i)
     wd[i + Num[0]] = Words[1] + i;
 …
+    }
   for (i = 0; i < num; i++)
+  for (i = 0; i < num; ++i)
+    {
       DictWordData *word = wd[i];
 …
   memset (char_freqs, '\0', sizeof (char_freqs));
   memset (len_freqs, '\0', sizeof (len_freqs));
   for (i = 0; i < num; i++, wd++)
+  for (i = 0; i < num; ++i, ++wd)
+    {
       u_long freq = (*wd)->documents();
 …
       len_freqs[idx][len] += freq;
       escape[idx] += freq;
       for (; len; len--, buf++)
+      for (; len; --len, ++buf)
     char_freqs[idx][(u_long) (*buf)] += freq;
+    }
 …
   int j;
   for (j = 0; j < num; j++, word++)
+  for (j = 0; j < num; ++j, ++word)
+    {
       float   cbc, wbc;
 …
       cbc = len_lens[idx][len];
       for (; len; len--, buf++)
+      for (; len; --len, ++buf)
     cbc += char_lens[idx][(u_long) (*buf)];
 …
   for (i = 0; (unsigned int)i < Num[0]; i++)
+  for (i = 0; (unsigned int)i < Num[0]; ++i)
     discard_heap[i] = Words[0] + i;
   for (i = 0; (unsigned int)i < Num[1]; i++)
+  for (i = 0; (unsigned int)i < Num[1]; ++i)
     discard_heap[i + Num[0]] = Words[1] + i;
 …
       keep_heap[keep_num++] = word;
       freqs_trans[KIND (word)] += word->documents();
       num_trans++;
+      ++num_trans;
+    }
 …
       heap_build (keep_heap, sizeof (keep_heap), keep_num, SmallSaving);
       recalc_reqd = 0;
       recalcs++;
+      ++recalcs;
+    }
+    }
 …
   Alloc_keep_discard ();
   for (i = 0; i < discard_num; i++)
+  for (i = 0; i < discard_num; ++i)
+    {
       DictWordData *word = discard_heap[i];
 …
       discard[idx].wd[discard[idx].num_wds++] = word;
+    }
   for (i = 0; i < keep_num; i++)
+  for (i = 0; i < keep_num; ++i)
+    {
       DictWordData *word = keep_heap[i];
 …
   HTONUL(tmp.dict_type);
   HTONUL(tmp.novel_method);
   for (i = 0; i < TEXT_PARAMS; i++)
+  for (i = 0; i < TEXT_PARAMS; ++i)
     HTONUL(tmp.params[i]);
   HTONUL(tmp.num_words[0]);
 …
   unsigned int i;
   u_char *curr, *prev = NULL;
   for (i = 0; i < dd->num_wds; i++)
+  for (i = 0; i < dd->num_wds; ++i)
+    {
       int len;
 …
+{
   unsigned int i, us;
   for (us = i = 0; i < dd->num_wds; i++)
+  for (us = i = 0; i < dd->num_wds; ++i)
     us += dd->wd[i]->word[0];
   return us;
 …
     FatalError (1, "Unable to allocate memory for freqs");
   for (i = 0; (unsigned)i < dd->num_wds; i++)
+  for (i = 0; (unsigned)i < dd->num_wds; ++i)
+    {
       freqs[i] = dd->wd[i]->documents();
 …
     FatalError (1, "Unable to allocate memory for huffman data");
   delete (freqs);
+  delete []freqs;
   freqs = NULL;
 …
   mem_reqd = 0;
   for (i = 0; (unsigned)i < dd->num_wds; i++)
+  for (i = 0; (unsigned)i < dd->num_wds; ++i)
+    {
       int codelen = hd->clens[i];
 …
       lastword[codelen] = word;
 #endif
       lencounts[codelen]++;
+      ++lencounts[codelen];
+    }
   /* [RPAP - Jan 97: Endian Ordering] */
   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
+  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
     HTONUL(huff_words_size[i]);
 …
   /* [RPAP - Jan 97: Endian Ordering] */
   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
+  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
     NTOHUL(huff_words_size[i]);
   Write_words (f, dd);
   delete hd->clens;
+  delete []hd->clens;
   delete hd;
 …
   memset (freqs, '\0', sizeof (freqs));
   for (j = 0; j < dd->num_wds; j++, wd++)
+  for (j = 0; j < dd->num_wds; ++j, ++wd)
+    {
       u_char *buf = (*wd)->word;
       int len = *buf++;
       for (; len; len--, buf++)
+      for (; len; --len, ++buf)
     freqs[(u_long) (*buf)] += (*wd)->documents();
+    }
   if (!zero_freq_permitted)
     for (j = 0; j < 256; j++)
+    for (j = 0; j < 256; ++j)
       if (!freqs[j] && PESINAWORD (j) == words)
     freqs[j] = 1;
 …
     FatalError (1, "Unable to write huffman data");
   delete hd->clens;
+  delete []hd->clens;
   delete hd;
+}
 …
   memset (freqs, '\0', sizeof (freqs));
   for (j = 0; j < dd->num_wds; j++, wd++)
+  for (j = 0; j < dd->num_wds; ++j, ++wd)
     freqs[(*wd)->word[0]] += (*wd)->documents();
   if (!zero_freq_permitted)
     for (j = 0; j < 16; j++)
+    for (j = 0; j < 16; ++j)
       if (!freqs[j])
     freqs[j] = 1;
 …
   delete hd->clens;
+  delete []hd->clens;
   delete hd;
+}
 …
   Write_cdh (f, &cdh);
   for (i = 0; i < 2; i++)
+  for (i = 0; i < 2; ++i)
     switch (type)
+      {
 …
           esc.word = (u_char *) "";
           keep[i].wd[keep[i].num_wds++] = &esc;
           for (j = 0; (unsigned)j < discard[i].num_wds; j++)
+          for (j = 0; (unsigned)j < discard[i].num_wds; ++j)
         esc.docCount += discard[i].wd[j]->documents();
           if (!esc.docCount)
         esc.docCount++;
+        ++esc.docCount;
           mem_reqd += Write_data (f, &keep[i], lookback);
+        }
 …
           esc.word = (u_char *) "";
           keep[i].wd[keep[i].num_wds++] = &esc;
           for (j = 0; (unsigned)j < all[i].num_wds; j++)
+          for (j = 0; (unsigned)j < all[i].num_wds; ++j)
         if (all[i].wd[j]->documents() == 1)
           esc.docCount++;
+          ++esc.docCount;
           if (!esc.docCount)
         esc.docCount++;
+        ++esc.docCount;
           mem_reqd += Write_data (f, &keep[i], lookback);
+        }

trunk/indexers/mgpp/text/mgpp_decompress_text.cpp

-              r3365
+              r8692
   char *basePath = "";
   UCArray level;
   SetCStr (level, "Document");
+  SetCStr (level, "Document", 8);
   opterr = 0;
 …
       break;
     case 'K':
       SetCStr (level, optarg);
+      SetCStr (level, optarg, strlen(optarg));
       break;
     case 'h':
 …
     cout << docText << "\n";
     docNum++;
+    ++docNum;
+  }

trunk/indexers/mgpp/text/mgpp_fast_comp_dict.cpp

-              r3365
+              r8692
 #define FIXUP_VALS(vals) do {                       \
     int i;                              \
     for (i=0; i < MAX_HUFFCODE_LEN+1; i++)              \
+    for (i=0; i < MAX_HUFFCODE_LEN+1; ++i)              \
       FIXUP(&vals[i]);                      \
       } while(0)
 …
     /* cfh */
     for (which = 0; which <= 1; which++)
+    for (which = 0; which <= 1; ++which)
+      {
     int j;
 …
     HTONSI(cd->cfh[which]->hd.mincodelen);
     HTONSI(cd->cfh[which]->hd.maxcodelen);
     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
+    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
+      {
         HTONSI(cd->cfh[which]->hd.lencount[j]);
 …
+      }
     HTONUL(cd->cfh[which]->uncompressed_size);
     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
+    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
       HTONUL(cd->cfh[which]->huff_words_size[j]);
+      }
 …
     if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
     cd->cdh.novel_method == MG_NOVEL_HYBRID)
       for (which = 0; which <= 1; which++)
+      for (which = 0; which <= 1; ++which)
+    {
       int j;
 …
       HTONUL(cd->ad->afh[which].num_frags);
       HTONUL(cd->ad->afh[which].mem_for_frags);
       for (j = 0; j < 33; j++)
+      for (j = 0; j < 33; ++j)
+        {
           HTONSI(cd->ad->blk_start[which][j]);
 …
     HTONUL(cd->cdh.dict_type);
     HTONUL(cd->cdh.novel_method);
     for (i = 0; i < TEXT_PARAMS; i++)
+    for (i = 0; i < TEXT_PARAMS; ++i)
       HTONUL(cd->cdh.params[which]);
     HTONUL(cd->cdh.num_words[0]);
 …
+{
   u_long *p;
   for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
+  for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
+    {
       if (IS_FIXUP (p))
 …
            MAGIC_AUX_DICT, MG_ABORT);  /* [RPAP - Feb 97: WIN32 Port] */
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       aux_frags_header afh;
 …
   lookback = cdh->lookback;
   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
+  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
+    {
       ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
 …
   mem += mem_reqd;
   for (i = 0; i < cfh->hd.num_codes; i++)
+  for (i = 0; i < cfh->hd.num_codes; ++i)
+    {
       register int val;
       for (val = getc (dict) & 0xf; val; val--)
+      for (val = getc (dict) & 0xf; val; --val)
     getc (dict);
+    }
 …
   Read_Huffman_Data (dict, &hd, NULL, NULL);
   if (hd.clens)
     delete hd.clens;
+    delete []hd.clens;
   mem += hd.num_codes * sizeof (unsigned long);
   mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
 …
   mem += mem_for_words (dict, cdh, cfh);
   if (cfh->hd.clens)
     delete cfh->hd.clens;
+    delete []cfh->hd.clens;
   return mem;
 …
   NTOHUL(cdh.dict_type);
   NTOHUL(cdh.novel_method);
   for (i = 0; i < TEXT_PARAMS; i++)
+  for (i = 0; i < TEXT_PARAMS; ++i)
     NTOHUL(cdh.params[i]);
   NTOHUL(cdh.num_words[0]);
 …
   NTOHUL(cdh.lookback);
   for (which = 0; which < 2; which++)
+  for (which = 0; which < 2; ++which)
     switch (cdh.dict_type)
+      {
 …
   ad = (auxiliary_dict *) getmem (sizeof (auxiliary_dict), sizeof (u_char *));
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       unsigned int j;
 …
       pos = ad->word_data[i];
       for (j = 0; j < ad->afh[i].num_frags; j++)
+      for (j = 0; j < ad->afh[i].num_frags; ++j)
+    {
       ad->words[i][j] = pos;
 …
           ad->blk_end[i][num] = ad->blk_start[i][num] +
         (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
           num++;
+          ++num;
+        }
+    }
 …
   lookback = cd->cdh.lookback;
   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
+  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
+    {
       ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
 …
   values[0][0] = next_word[0];
   FIXUP (&values[0][0]);
   for (i = 1; i <= cfh->hd.maxcodelen; i++)
+  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
+    {
       int next_start = (values[i - 1] - vals) +
 …
   memset (num_set, '\0', sizeof (num_set));
   for (i = 0; i < cfh->hd.num_codes; i++)
+  for (i = 0; i < cfh->hd.num_codes; ++i)
+    {
       register int val, copy;
 …
+    }
       memcpy (last_word[len], word, *word + 1);
       num_set[len]++;
+      ++num_set[len];
+    }
   if (cfh->hd.clens)
     delete cfh->hd.clens;
+    delete []cfh->hd.clens;
   cfh->hd.clens = NULL;
   return values;
 …
   fcode[0] = values[0] = &vals[0];
   FIXUP (&values[0]);
   for (i = 1; i <= data->maxcodelen; i++)
+  for (i = 1; i <= data->maxcodelen; ++i)
+    {
       fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
 …
+    }
   for (i = 0; i < data->num_codes; i++)
+  for (i = 0; i < data->num_codes; ++i)
     if (data->clens[i])
       *fcode[(int) (data->clens[i])]++ = i;
 …
   FIXUP (&cd->chars_vals[which]);
   if (hd->clens)
     delete hd->clens;
+    delete []hd->clens;
   hd->clens = NULL;
+}
 …
   Read_cdh (dict, &cd->cdh, NULL, NULL);
   for (which = 0; which < 2; which++)
+  for (which = 0; which < 2; ++which)
     switch (cd->cdh.dict_type)
+      {
 …
+  {
     u_long *p;
     for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
+    for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
+      {
     if (IS_FIXUP (p))

trunk/indexers/mgpp/text/mgpp_passes.cpp

-              r3365
+              r8692
 #define _XOPEN_SOURCE 1
 #define _XOPEN_SOURCE_EXTENDED 1
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
+#pragma warning(disable:4786)
+#endif
 // need this to avoid bizarre compiler problems under VC++ 6.0

trunk/indexers/mgpp/text/text.pass2.cpp

-              r3365
+              r8692
  **************************************************************************/
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
+#pragma warning(disable:4786)
+#endif
 // need this to avoid bizarre compiler problems under VC++ 6.0
 #if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
 …
   int i;
   if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS)
     for (i = 0; i <= 1; i++)
+    for (i = 0; i <= 1; ++i)
+      {
     nht[i].HashSize = INITIAL_HASH_SIZE;
 …
         blk_end[i][num] = blk_start[i][num] +
           (blk_end[i][num - 1] - blk_start[i][num - 1]) * 2;
         num++;
+        ++num;
+          }
+      }
 …
     int res;
     if (which) numWords++;
+    if (which) ++numWords;
     /* First parse a word or non-word out of the string */
 …
     s2 = *wptr;
     len = *s1 + 1;
     for (; len; len--)
+    for (; len; --len)
       if (*s1++ != *s2++) break;
 …
+          }
         buffer.huff_encode (Word[0], lens_codes[which], lens_huff[which].clens, NULL);
         for (i = 0; i < Word[0]; i++)
+        for (i = 0; i < Word[0]; ++i)
           buffer.huff_encode (Word[i + 1], char_codes[which],
                   char_huff[which].clens, NULL);
 …
           buffer.huff_encode (Word[0], lens_codes[which],
                       lens_huff[which].clens, NULL);
           for (i = 0; i < Word[0]; i++)
+          for (i = 0; i < Word[0]; ++i)
             buffer.huff_encode (Word[i + 1], char_codes[which],
                     char_huff[which].clens, NULL);
 …
               h->pool->ptr += len;
               h->pool->left -= len;
               h->HashUsed++;
+              ++h->HashUsed;
               break;
+            }
 …
               s2 = ent->word;
               len = *s1 + 1;
               for (; len; len--)
+              for (; len; --len)
             if (*s1++ != *s2++)
               break;
 …
             int j = ent->ordinal_num - 1;
             while (j > blk_end[which][k])
               k++;
+              ++k;
             assert (j - blk_start[which][k] + 1 >= 1 &&
                 j - blk_start[which][k] + 1 <=
 …
               memset (ht, '\0', sizeof (novel_hash_rec) * size);
               for (i = 0; i < h->HashSize; i++)
+              for (i = 0; i < h->HashSize; ++i)
             if (h->HashTable[i].word)
+              {
 …
     cth.num_of_bytes += (*here).text.size();
     here++;
+    ++here;
+  }
 …
   while (tiHere != tiEnd) {
     if ((*tiHere).second.inDoc) (*tiHere).second.SetEnd (endPos, endBit);
     tiHere++;
+    ++tiHere;
+  }
   // we've processed one more document
   cth.num_of_docs++;
+  ++cth.num_of_docs;
   return COMPALLOK;
 …
     return COMPERROR;
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       aux_frags_header afh;
 …
   int i;
   u_long aux_compressed = 0, total_uncomp = 0;
   for (i = 0; i <= 1; i++)
+  for (i = 0; i <= 1; ++i)
+    {
       int j;
 …
+        {
           int len = *buf++;
           lens[len]++;
+          ++lens[len];
           total_uncomp += len + 4;
           for (; len; len--)
         chars[*buf++]++;
+          for (; len; --len)
+        ++chars[*buf++];
+        }
+    }
       for (j = 0; j < 256; j++)
+      for (j = 0; j < 256; ++j)
     if (!chars[j] && PESINAWORD (j) == i)
       fchars[j] = 1;
     else
       fchars[j] = chars[j];
       for (j = 0; j < 16; j++)
+      for (j = 0; j < 16; ++j)
     if (!lens[j])
       flens[j] = 1;
 …
       return false;
     tiHere++;
+    ++tiHere;
+  }

trunk/indexers/mgpp/text/words.cpp

-              r3365
+              r8692
+}
+/* It determines whether a given place in a UTF-8 encoded Unicode string is a unicode space. */
+int isaspace (const u_char *here, const u_char *end)
+{
+  unsigned short c;
+  if (parse_utf8_char(here, end, &c) > 0) return is_unicode_space(c);
+  return 0;
+}
+/* Return a the UTF-8 encoded Unicode string with begining
+   unicode spaces skippend. */
+u_char *skipspace(u_char *here, u_char *end)
+{
+  unsigned short c;
+  int length;
+  while(here != end) {
+    length = parse_utf8_char(here, end, &c);
+    if (length == 0 || !is_unicode_space(c)) break;
+    here += length;
+  }
+  return here;
+}
 const unsigned char *ParseIndexWord (const unsigned char *textHere,
                      const unsigned char *textEnd,
 …
                    ++numeric <= MAXNUMERIC))) {
     while (charlength-- > 0) {
       word.push_back (*textHere++); length++;
+      word.push_back (*textHere++); ++length;
+    }
     charlength = parse_utf8_char (textHere, textEnd, &c);

trunk/indexers/mgpp/text/words.h

-              r3365
+              r8692
 #ifdef __cplusplus
 extern "C"
+extern "C" {
 #endif
 int inaword (const u_char *here, const u_char *end);
 …
        is part of a word. */
+int isaspace (const u_char *here, const u_char *end);
+        /* It determines whether a given place in a UTF-8 encoded Unicode string  is a unicode space. */
+u_char *skipspace(u_char *here, u_char *end);
+        /* Return a the UTF-8 encoded Unicode string with begining unicode spaces skippend. */
+#ifdef __cplusplus
+}
+#endif
 const unsigned char *ParseIndexWord (const unsigned char *textHere,

Context Navigation

Legend:

trunk/indexers/mgpp/lib/bitio_m_random.cpp

trunk/indexers/mgpp/lib/huffman.cpp

trunk/indexers/mgpp/text/GSDLQueryLex.cpp

trunk/indexers/mgpp/text/GSDLQueryParser.cpp

trunk/indexers/mgpp/text/IndexData.cpp

trunk/indexers/mgpp/text/MGQuery.cpp

trunk/indexers/mgpp/text/QueryLex.cpp

trunk/indexers/mgpp/text/QueryTester.cpp

trunk/indexers/mgpp/text/Queryer.cpp

trunk/indexers/mgpp/text/TagInfo.cpp

trunk/indexers/mgpp/text/Terms.cpp

trunk/indexers/mgpp/text/TextEl.cpp

trunk/indexers/mgpp/text/TextGet.cpp

trunk/indexers/mgpp/text/UCArray.cpp

trunk/indexers/mgpp/text/UCArray.h

trunk/indexers/mgpp/text/mg_errors.cpp

trunk/indexers/mgpp/text/mg_files.cpp

trunk/indexers/mgpp/text/mgpp_compression_dict.cpp

trunk/indexers/mgpp/text/mgpp_decompress_text.cpp

trunk/indexers/mgpp/text/mgpp_fast_comp_dict.cpp

trunk/indexers/mgpp/text/mgpp_passes.cpp

trunk/indexers/mgpp/text/text.pass2.cpp

trunk/indexers/mgpp/text/words.cpp

trunk/indexers/mgpp/text/words.h

Download in other formats: