Changeset 8692


Ignore:
Timestamp:
2004-11-29T15:43:11+13:00 (19 years ago)
Author:
kjdon
Message:

Added the changes from Emanuel Dejanu (Simple Words) - mostly efficiency changes. For example, changing i++ to ++i, delete xxx to delete []xxx, some stuff to do with UCArrays...

Location:
trunk
Files:
48 edited

Legend:

Unmodified
Added
Removed
  • trunk/indexers/mgpp/lib/bitio_m_random.cpp

    r3365 r8692  
    1616
    1717random_bitio_buffer::~random_bitio_buffer() {
    18   if (buffer != NULL) delete buffer;
     18  if (buffer != NULL) delete []buffer;
    1919}
    2020
     
    2222  // delete the old buffer
    2323  if (buffer != NULL) {
    24     delete buffer;
     24    delete []buffer;
    2525    buffer = NULL;
    2626  }
     
    5454  // delete the old buffer
    5555  if (buffer != NULL) {
    56     delete buffer;
     56    delete []buffer;
    5757    buffer = NULL;
    5858  }
     
    131131  flush();
    132132  if (buffer != NULL) {
    133     delete buffer;
     133    delete []buffer;
    134134    buffer = NULL;
    135135  }
  • trunk/indexers/mgpp/lib/huffman.cpp

    r3365 r8692  
    5656
    5757  /* Initialise the pointers to the leaves */
    58   for (count = i = 0; i < num; i++)
     58  for (count = i = 0; i < num; ++i)
    5959    if (heap[num + i])
    6060      heap[count++] = num + i;
     
    6262  /* Reorganise the pointers so that it is a heap */
    6363  HNum = count;
    64   for (i = HNum / 2; i > 0; i--)
     64  for (i = HNum / 2; i > 0; --i)
    6565    {
    6666      register int curr, child;
     
    7070    {
    7171      if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
    72         child++;
     72        ++child;
    7373      if (heap[heap[curr - 1]] > heap[heap[child - 1]])
    7474        {
     
    9191      int pos[2];
    9292
    93       for (i = 0; i < 2; i++)
     93      for (i = 0; i < 2; ++i)
    9494    {
    9595      register int curr, child;
     
    102102          if (child < HNum &&
    103103          heap[heap[child]] < heap[heap[child - 1]])
    104         child++;
     104        ++child;
    105105          if (heap[heap[curr - 1]] > heap[heap[child - 1]])
    106106        {
     
    124124      {
    125125    register int parent, curr;
    126     HNum++;
     126    ++HNum;
    127127    curr = HNum;
    128128    parent = curr >> 1;
     
    143143  heap[0] = -1UL;
    144144  heap[1] = 0;
    145   for (i = 2; i < num * 2; i++)
     145  for (i = 2; i < num * 2; ++i)
    146146    heap[i] = heap[heap[i]] + 1;
    147147
     
    152152
    153153  /* Set the code length of each leaf in the huffman tree */
    154   for (i = 0; i < num; i++)
     154  for (i = 0; i < num; ++i)
    155155    {
    156156      register u_long codelen = heap[i + num];
     
    162162      if (codelen < hd->mincodelen)
    163163    hd->mincodelen = codelen;
    164       hd->lencount[codelen]++;
     164      ++hd->lencount[codelen];
    165165    }
    166166
     
    174174      /* Calculate the current codes for each different code length */
    175175      hd->min_code[hd->maxcodelen] = 0;
    176       for (i = hd->maxcodelen - 1; i>=0; i--)
     176      for (i = hd->maxcodelen - 1; i>=0; --i)
    177177    hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
    178178    }
    179   delete heap;
     179  delete []heap;
    180180  return (hd);
    181181
    182182error2:
    183   delete heap;
     183  delete []heap;
    184184error1:
    185185  if (!data)
     
    204204    *mem += data->num_codes * sizeof (*codes);
    205205  memcpy (mc, data->min_code, sizeof (mc));
    206   for (i = 0; i < data->num_codes; i++)
     206  for (i = 0; i < data->num_codes; ++i)
    207207    if (data->clens[i])
    208208      codes[i] = mc[(int) (data->clens[i])]++;
     
    226226  if (!(values = new unsigned long *[MAX_HUFFCODE_LEN + 1]))
    227227    {
    228       delete vals;
     228      delete []vals;
    229229      return (NULL);
    230230    }
     
    237237
    238238  fcode[0] = values[0] = &vals[0];
    239   for (i = 1; i <= data->maxcodelen; i++)
     239  for (i = 1; i <= data->maxcodelen; ++i)
    240240    fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
    241241
    242   for (i = 0; i < data->num_codes; i++)
     242  for (i = 0; i < data->num_codes; ++i)
    243243    if (data->clens[i])
    244244      *fcode[(int) (data->clens[i])]++ = i;
     
    256256  if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
    257257    return -1;
    258   for (i = 0; i < num; i++)
     258  for (i = 0; i < num; ++i)
    259259    size += counts[i] * hd.clens[i];
    260   delete hd.clens;
     260  delete []hd.clens;
    261261  return size;
    262262}
     
    290290      /* [RPAP - Jan 97: Endian Ordering] */
    291291      int i;
    292       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     292      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    293293    HTONSI(hd->lencount[i]);
    294       for (i = 0; i < hd->maxcodelen + 1; i++)
     294      for (i = 0; i < hd->maxcodelen + 1; ++i)
    295295    HTONUL(hd->min_code[i]);
    296296
     
    308308
    309309      /* [RPAP - Jan 97: Endian Ordering] */
    310       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     310      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    311311    NTOHSI(hd->lencount[i]);
    312       for (i = 0; i < hd->maxcodelen + 1; i++)
     312      for (i = 0; i < hd->maxcodelen + 1; ++i)
    313313    NTOHUL(hd->min_code[i]);
    314314    }
     
    350350
    351351      /* [RPAP - Jan 97: Endian Ordering] */
    352       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     352      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    353353    NTOHSI(hd->lencount[i]);
    354354
     
    363363
    364364      /* [RPAP - Jan 97: Endian Ordering] */
    365       for (i = 0; i < hd->maxcodelen + 1; i++)
     365      for (i = 0; i < hd->maxcodelen + 1; ++i)
    366366    NTOHUL(hd->min_code[i]);
    367367
  • trunk/indexers/mgpp/text/GSDLQueryLex.cpp

    r8242 r8692  
    2727              UCArray &text,
    2828              int len) {
     29  if (text.capacity() < text.size() + len + 1) {
     30    text.reserve(text.size() + len + 1);
     31  }
    2932  while (len > 0) {
    3033    text.push_back (*here++);
    31     len--;
     34    --len;
    3235  }
    3336}
     
    238241  //return false;
    239242
    240   UCArray AND; SetCStr (AND, "AND");
    241   if (el.text == AND) {
     243  //UCArray AND; SetCStr (AND, "AND");
     244  //if (el.text == AND) {
     245  if (UCArrayCStrEquals(el.text, "AND")) {
    242246    el.lexType = AndOpE;
    243247    return true;
    244248  }
    245   UCArray OR; SetCStr (OR, "OR");
    246   if (el.text == OR) {
     249  //UCArray OR; SetCStr (OR, "OR");
     250  //if (el.text == OR) {
     251  if (UCArrayCStrEquals(el.text, "OR")) {
    247252    el.lexType = OrOpE;
    248253    return true;
    249254  }
    250   UCArray NOT; SetCStr (NOT, "NOT");
    251   if (el.text == NOT) {
     255  //UCArray NOT; SetCStr (NOT, "NOT");
     256  //if (el.text == NOT) {
     257  if (UCArrayCStrEquals(el.text, "NOT")) {
    252258    el.lexType = NotOpE;
    253259    return true;
    254260  }
    255   UCArray NEAR; SetCStr (NEAR, "NEAR");
     261  UCArray NEAR; SetCStr (NEAR, "NEAR", 4);
    256262  if (PrefixLen(el.text, NEAR)==4) {
    257263    el.lexType = NearOpE;
    258264    return true;
    259265  }
    260   UCArray WITHIN; SetCStr (WITHIN, "WITHIN");
     266  UCArray WITHIN; SetCStr (WITHIN, "WITHIN", 6);
    261267  if (PrefixLen(el.text, WITHIN)==6) {
    262268    el.lexType = WithinOpE;
  • trunk/indexers/mgpp/text/GSDLQueryParser.cpp

    r8242 r8692  
    9494                UCArray &nearby,
    9595                bool reverse) {
    96   UCArray NEARBY; SetCStr(NEARBY, "NEAR");
    97   UCArray WITHIN; SetCStr(WITHIN, "WITHIN");
     96  UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4);
     97  UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6);
    9898 
    9999  if (nearby == NEARBY) { // no modifier
     
    123123    while (here != end) {
    124124      size = size*10 + (*here-'0');
    125       here++;
     125      ++here;
    126126    }
    127127    if (within) {
     
    263263  UCArray near_string;
    264264  while (ParseLexEl (here, end, el)) {
    265     if (el.lexType == TermE || el.lexType == IntegerE) {
     265    // cant have AND, OR, NOT in square brackets, so assume they are words
     266    if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) {
    266267      TermNode termNode;
    267268      termNode.term = el.text;
  • trunk/indexers/mgpp/text/IndexData.cpp

    r3365 r8692  
    186186  IvfLevelInfoMap::const_iterator levelHere, levelEnd;
    187187  for (levelHere=levels.levelInfo.begin(), levelEnd=levels.levelInfo.end();
    188        levelHere!=levelEnd && (*levelHere).first != level; levelHere++)
    189     curLevelNum++;
     188       levelHere!=levelEnd && (*levelHere).first != level; ++levelHere)
     189    ++curLevelNum;
    190190
    191191  // make sure we found the level
  • trunk/indexers/mgpp/text/MGQuery.cpp

    r3365 r8692  
    108108     rightI < rightResult.docs.size()) {
    109109    if (result.docs[leftI] < rightResult.docs[rightI]) {
    110       leftI++;
     110      ++leftI;
    111111    } else if (result.docs[leftI] > rightResult.docs[rightI]) {
    112       rightI++;
     112      ++rightI;
    113113    } else {
    114114      // the documents are equal
     
    116116      if (haveAccum)
    117117    result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
    118       leftI++;
    119       rightI++;
    120       outI++;
     118      ++leftI;
     119      ++rightI;
     120      ++outI;
    121121    }
    122122  }
     
    209209      if (haveAccum)
    210210    result.ranks.push_back (leftResult.ranks[leftI]);
    211       leftI++;
     211      ++leftI;
    212212     
    213213    } else if (leftDocNum > rightDocNum) {
     
    215215      if (haveAccum)
    216216    result.ranks.push_back (rightResult.ranks[rightI]);
    217       rightI++;
     217      ++rightI;
    218218     
    219219    } else { // equal
     
    222222    result.ranks.push_back (leftResult.ranks[leftI] +
    223223                rightResult.ranks[rightI]);
    224       leftI++;
    225       rightI++;
     224      ++leftI;
     225      ++rightI;
    226226    }
    227227  }
     
    307307      if (haveAccum)
    308308    result.ranks[outI] = result.ranks[queryI];
    309       queryI++;
    310       outI++;
     309      ++queryI;
     310      ++outI;
    311311    } else if (result.docs[queryI] > notResult.docs[notI]) {
    312       notI++;
     312      ++notI;
    313313    } else {
    314314      // the documents are equal, ignore both
    315       queryI++;
    316       notI++;
     315      ++queryI;
     316      ++notI;
    317317    }
    318318  }
     
    378378  unsigned long i;
    379379  FragRange thisFrag;
    380   for (i=0; i<tagEl.frag_occur; i++) {
     380  for (i=0; i<tagEl.frag_occur; ++i) {
    381381    // get start
    382382    unsigned long delta = buffer.bblock_decode (B, NULL)-1;
     
    446446    CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
    447447   
    448     here++;
     448    ++here;
    449449  }
    450450}
     
    521521            result);
    522522 
    523     termHere++;
     523    ++termHere;
    524524
    525525    if (termHere == termEnd) return; // nothing more to do
     
    546546            (*termHere).endRange,
    547547            fragLimitsPtr);
    548     termHere++;
     548    ++termHere;
    549549  }
    550550
     
    572572  while (here != end) {
    573573    (*here).Print (s, indent+2);
    574     here++;
     574    ++here;
    575575  }
    576576}
     
    633633  unsigned long i;
    634634  if (queryInfo.sortByRank || queryInfo.needRankInfo) {
    635     for (i=0; i<result.ranks.size(); i++) {
     635    for (i=0; i<result.ranks.size(); ++i) {
    636636      result.ranks[i] /=
    637637    indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
     
    668668      indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
    669669   
    670     for (i=0; i<resultsSize; i++) {
     670    for (i=0; i<resultsSize; ++i) {
    671671      result.ranks[i] =  result.ranks[i] *
    672672    indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
     
    739739  unsigned long DocNum = 0;
    740740 
    741   for (unsigned long i=0; i<realresult.docs.size(); i++) {
     741  for (unsigned long i=0; i<realresult.docs.size(); ++i) {
    742742
    743743    // do an if ! here????
  • trunk/indexers/mgpp/text/QueryLex.cpp

    r3365 r8692  
    2727              UCArray &text,
    2828              int len) {
     29  if (text.capacity() < text.size() + len + 1) {
     30    text.reserve(text.size + len + 1);
     31  }
    2932  while (len > 0) {
    3033    text.push_back (*here++);
    31     len--;
     34    --len;
    3235  }
    3336}
     
    148151  if (!ParseTerm (here, end, el.text)) return false;
    149152
    150   UCArray AND; SetCStr (AND, "AND");
    151   if (el.text == AND) {
     153  //UCArray AND; SetCStr (AND, "AND");
     154  //if (el.text == AND) {
     155  if (UCArrayCStrEquals(el.text, "AND")) { 
    152156    el.lexType = AndOpE;
    153157    return true;
    154158  }
    155   UCArray OR; SetCStr (OR, "OR");
    156   if (el.text == OR) {
     159  //UCArray OR; SetCStr (OR, "OR");
     160  //if (el.text == OR) {
     161  if (UCArrayCStrEquals(el.text, "OR")) {
    157162    el.lexType = OrOpE;
    158163    return true;
    159164  }
    160   UCArray NOT; SetCStr (NOT, "NOT");
    161   if (el.text == NOT) {
     165  //UCArray NOT; SetCStr (NOT, "NOT");
     166  //if (el.text == NOT) {
     167  if (UCArrayCStrEquals(el.text, "NOT")) {
    162168    el.lexType = NotOpE;
    163169    return true;
  • trunk/indexers/mgpp/text/QueryTester.cpp

    r3365 r8692  
    4949  DocNumArray &docSet1 = setNode1->queryResult.docs;
    5050  RankArray &rankSet1 = setNode1->queryResult.ranks;
    51   docSet1.push_back (1);  rankSet1.push_back (0.1);
    52   docSet1.push_back (10); rankSet1.push_back (0.2);
    53   docSet1.push_back (15); rankSet1.push_back (0.2);
    54   docSet1.push_back (18); rankSet1.push_back (0.4);
    55   docSet1.push_back (19); rankSet1.push_back (0.5);
     51  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     52  docSet1.push_back (10); rankSet1.push_back (0.2f);
     53  docSet1.push_back (15); rankSet1.push_back (0.2f);
     54  docSet1.push_back (18); rankSet1.push_back (0.4f);
     55  docSet1.push_back (19); rankSet1.push_back (0.5f);
    5656
    5757  SetQueryNode *setNode2 = new SetQueryNode;
    5858  DocNumArray &docSet2 = setNode2->queryResult.docs;
    5959  RankArray &rankSet2 = setNode2->queryResult.ranks;
    60   docSet2.push_back (2);  rankSet2.push_back (0.1);
    61   docSet2.push_back (11); rankSet2.push_back (0.2);
    62   docSet2.push_back (12); rankSet2.push_back (0.3);
    63   docSet2.push_back (13); rankSet2.push_back (0.4);
    64   docSet2.push_back (14); rankSet2.push_back (0.5);
    65   docSet2.push_back (15); rankSet2.push_back (0.6);
    66   docSet2.push_back (16); rankSet2.push_back (0.7);
    67   docSet2.push_back (17); rankSet2.push_back (0.8);
    68   docSet2.push_back (19); rankSet2.push_back (0.9);
    69   docSet2.push_back (20); rankSet2.push_back (0.1);
    70   docSet2.push_back (21); rankSet2.push_back (0.2);
     60  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     61  docSet2.push_back (11); rankSet2.push_back (0.2f);
     62  docSet2.push_back (12); rankSet2.push_back (0.3f);
     63  docSet2.push_back (13); rankSet2.push_back (0.4f);
     64  docSet2.push_back (14); rankSet2.push_back (0.5f);
     65  docSet2.push_back (15); rankSet2.push_back (0.6f);
     66  docSet2.push_back (16); rankSet2.push_back (0.7f);
     67  docSet2.push_back (17); rankSet2.push_back (0.8f);
     68  docSet2.push_back (19); rankSet2.push_back (0.9f);
     69  docSet2.push_back (20); rankSet2.push_back (0.1f);
     70  docSet2.push_back (21); rankSet2.push_back (0.2f);
    7171
    7272  cout << "\n" << setNode1->queryResult << "AND\n\n"
     
    170170  DocNumArray &docSet1 = setNode1->queryResult.docs;
    171171  RankArray &rankSet1 = setNode1->queryResult.ranks;
    172   docSet1.push_back (1);  rankSet1.push_back (0.1);
    173   docSet1.push_back (10); rankSet1.push_back (0.2);
    174   docSet1.push_back (15); rankSet1.push_back (0.2);
    175   docSet1.push_back (18); rankSet1.push_back (0.4);
    176   docSet1.push_back (19); rankSet1.push_back (0.5);
     172  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     173  docSet1.push_back (10); rankSet1.push_back (0.2f);
     174  docSet1.push_back (15); rankSet1.push_back (0.2f);
     175  docSet1.push_back (18); rankSet1.push_back (0.4f);
     176  docSet1.push_back (19); rankSet1.push_back (0.5f);
    177177
    178178  SetQueryNode *setNode2 = new SetQueryNode;
    179179  DocNumArray &docSet2 = setNode2->queryResult.docs;
    180180  RankArray &rankSet2 = setNode2->queryResult.ranks;
    181   docSet2.push_back (2);  rankSet2.push_back (0.1);
    182   docSet2.push_back (11); rankSet2.push_back (0.2);
    183   docSet2.push_back (12); rankSet2.push_back (0.3);
    184   docSet2.push_back (13); rankSet2.push_back (0.4);
    185   docSet2.push_back (14); rankSet2.push_back (0.5);
    186   docSet2.push_back (15); rankSet2.push_back (0.6);
    187   docSet2.push_back (16); rankSet2.push_back (0.7);
    188   docSet2.push_back (17); rankSet2.push_back (0.8);
    189   docSet2.push_back (19); rankSet2.push_back (0.9);
    190   docSet2.push_back (20); rankSet2.push_back (0.1);
    191   docSet2.push_back (21); rankSet2.push_back (0.2);
     181  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     182  docSet2.push_back (11); rankSet2.push_back (0.2f);
     183  docSet2.push_back (12); rankSet2.push_back (0.3f);
     184  docSet2.push_back (13); rankSet2.push_back (0.4f);
     185  docSet2.push_back (14); rankSet2.push_back (0.5f);
     186  docSet2.push_back (15); rankSet2.push_back (0.6f);
     187  docSet2.push_back (16); rankSet2.push_back (0.7f);
     188  docSet2.push_back (17); rankSet2.push_back (0.8f);
     189  docSet2.push_back (19); rankSet2.push_back (0.9f);
     190  docSet2.push_back (20); rankSet2.push_back (0.1f);
     191  docSet2.push_back (21); rankSet2.push_back (0.2f);
    192192
    193193  cout << "\n" << setNode1->queryResult << "OR\n\n"
     
    214214  DocNumArray &rcDocSet = resultCompare.docs;
    215215  RankArray &rcRankSet = resultCompare.ranks;
    216   rcDocSet.push_back (1);  rcRankSet.push_back (0.1);
    217   rcDocSet.push_back (2);  rcRankSet.push_back (0.1);
    218   rcDocSet.push_back (10); rcRankSet.push_back (0.2);
    219   rcDocSet.push_back (11); rcRankSet.push_back (0.2);
    220   rcDocSet.push_back (12); rcRankSet.push_back (0.3);
    221   rcDocSet.push_back (13); rcRankSet.push_back (0.4);
    222   rcDocSet.push_back (14); rcRankSet.push_back (0.5);
    223   rcDocSet.push_back (15); rcRankSet.push_back (0.2+0.6);
    224   rcDocSet.push_back (16); rcRankSet.push_back (0.7);
    225   rcDocSet.push_back (17); rcRankSet.push_back (0.8);
    226   rcDocSet.push_back (18); rcRankSet.push_back (0.4);
    227   rcDocSet.push_back (19); rcRankSet.push_back (0.9+0.5);
    228   rcDocSet.push_back (20); rcRankSet.push_back (0.1);
    229   rcDocSet.push_back (21); rcRankSet.push_back (0.2);
     216  rcDocSet.push_back (1);  rcRankSet.push_back (0.1f);
     217  rcDocSet.push_back (2);  rcRankSet.push_back (0.1f);
     218  rcDocSet.push_back (10); rcRankSet.push_back (0.2f);
     219  rcDocSet.push_back (11); rcRankSet.push_back (0.2f);
     220  rcDocSet.push_back (12); rcRankSet.push_back (0.3f);
     221  rcDocSet.push_back (13); rcRankSet.push_back (0.4f);
     222  rcDocSet.push_back (14); rcRankSet.push_back (0.5f);
     223  rcDocSet.push_back (15); rcRankSet.push_back (0.2f+0.6f);
     224  rcDocSet.push_back (16); rcRankSet.push_back (0.7f);
     225  rcDocSet.push_back (17); rcRankSet.push_back (0.8f);
     226  rcDocSet.push_back (18); rcRankSet.push_back (0.4f);
     227  rcDocSet.push_back (19); rcRankSet.push_back (0.9f+0.5f);
     228  rcDocSet.push_back (20); rcRankSet.push_back (0.1f);
     229  rcDocSet.push_back (21); rcRankSet.push_back (0.2f);
    230230 
    231231
     
    317317  DocNumArray &docSet1 = setNode1->queryResult.docs;
    318318  RankArray &rankSet1 = setNode1->queryResult.ranks;
    319   docSet1.push_back (1);  rankSet1.push_back (0.1);
    320   docSet1.push_back (10); rankSet1.push_back (0.2);
    321   docSet1.push_back (15); rankSet1.push_back (0.2);
    322   docSet1.push_back (18); rankSet1.push_back (0.4);
    323   docSet1.push_back (19); rankSet1.push_back (0.5);
     319  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     320  docSet1.push_back (10); rankSet1.push_back (0.2f);
     321  docSet1.push_back (15); rankSet1.push_back (0.2f);
     322  docSet1.push_back (18); rankSet1.push_back (0.4f);
     323  docSet1.push_back (19); rankSet1.push_back (0.5f);
    324324
    325325  SetQueryNode *setNode2 = new SetQueryNode;
    326326  DocNumArray &docSet2 = setNode2->queryResult.docs;
    327327  RankArray &rankSet2 = setNode2->queryResult.ranks;
    328   docSet2.push_back (2);  rankSet2.push_back (0.1);
    329   docSet2.push_back (11); rankSet2.push_back (0.2);
    330   docSet2.push_back (12); rankSet2.push_back (0.3);
    331   docSet2.push_back (13); rankSet2.push_back (0.4);
    332   docSet2.push_back (14); rankSet2.push_back (0.5);
    333   docSet2.push_back (15); rankSet2.push_back (0.6);
    334   docSet2.push_back (16); rankSet2.push_back (0.7);
    335   docSet2.push_back (17); rankSet2.push_back (0.8);
    336   docSet2.push_back (19); rankSet2.push_back (0.9);
    337   docSet2.push_back (20); rankSet2.push_back (0.1);
    338   docSet2.push_back (21); rankSet2.push_back (0.2);
     328  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     329  docSet2.push_back (11); rankSet2.push_back (0.2f);
     330  docSet2.push_back (12); rankSet2.push_back (0.3f);
     331  docSet2.push_back (13); rankSet2.push_back (0.4f);
     332  docSet2.push_back (14); rankSet2.push_back (0.5f);
     333  docSet2.push_back (15); rankSet2.push_back (0.6f);
     334  docSet2.push_back (16); rankSet2.push_back (0.7f);
     335  docSet2.push_back (17); rankSet2.push_back (0.8f);
     336  docSet2.push_back (19); rankSet2.push_back (0.9f);
     337  docSet2.push_back (20); rankSet2.push_back (0.1f);
     338  docSet2.push_back (21); rankSet2.push_back (0.2f);
    339339
    340340  cout << "\n" << setNode1->queryResult << "NOT\n\n"
  • trunk/indexers/mgpp/text/Queryer.cpp

    r6116 r8692  
    111111  // do querying
    112112  QueryInfo queryInfo;
    113   SetCStr (queryInfo.docLevel, "Document");
     113  SetCStr (queryInfo.docLevel, "Document", 8);
    114114  queryInfo.maxDocs = 50;
    115115  queryInfo.sortByRank = true;
     
    125125
    126126  UCArray docLevel;
    127   SetCStr(docLevel, "Document");
     127  SetCStr(docLevel, "Document", 8);
    128128
    129129  UCArray level;
     
    143143    cout << "> ";
    144144    cin.getline(query, 2048, '\n');
    145     SetCStr (queryArray, query);
     145    SetCStr (queryArray, query, strlen(query));
    146146
    147147    // check for commands
     
    155155    cin >> query;
    156156    UCArrayClear(queryInfo.docLevel);
    157     SetCStr(queryInfo.docLevel, query);
     157    SetCStr(queryInfo.docLevel, query, strlen(query));
    158158    cout << "index set to " << queryInfo.docLevel <<"\n";
    159159    cin.getline(query, 2048, '\n');
     
    162162    cin >> query;
    163163    UCArrayClear(level);
    164     SetCStr(level, query);
     164    SetCStr(level, query, strlen(query));
    165165    cout << "level set to " << level <<"\n";
    166166    cin.getline(query, 2048, '\n');
     
    221221    cin>>query;
    222222    UCArrayClear(browseNode.term);
    223     SetCStr(browseNode.term, query);
     223    SetCStr(browseNode.term, query, strlen(query));
    224224    cin.getline(query, 2048, '\n'); // get rest of line
    225225
  • trunk/indexers/mgpp/text/TagInfo.cpp

    r3365 r8692  
    2424
    2525void TagInfo::SetDocTag (const char *cStr) {
    26   SetCStr (docTag, cStr);
     26  SetCStr (docTag, cStr, strlen(cStr));
    2727}
    2828
    2929void TagInfo::SetIndexLevel (const char *cStr) {
    30   SetCStr (indexLevel, cStr);
     30  SetCStr (indexLevel, cStr, strlen(cStr));
    3131}
    3232void TagInfo::AddLevelTag (const char *cStr) {
    3333  // convert the string
    3434  UCArray cArr;
    35   SetCStr (cArr, cStr);
     35  SetCStr (cArr, cStr, strlen(cStr));
    3636
    3737  // insert the tag
     
    6464  while (here != end) {
    6565    s << "\"" << (*here) << "\"";
    66     here++;
     66    ++here;
    6767    if (here != end) s << ", ";
    6868  }
  • trunk/indexers/mgpp/text/Terms.cpp

    r8242 r8692  
    5151 
    5252  unsigned long i;
    53   for (i=0; i<t.equivTerms.size(); i++) {
     53  for (i=0; i<t.equivTerms.size(); ++i) {
    5454    s << t.equivTerms[i] << ", ";
    5555  }
     
    8383
    8484  s << "termFreqs: ";
    85   for (unsigned long i=0; i<termFreqs.size(); i++)
     85  for (unsigned long i=0; i<termFreqs.size(); ++i)
    8686    s << termFreqs[i] << ", ";
    8787 
     
    9595  s << "docs: ";
    9696  unsigned long i;
    97   for (i=0; i<r.docs.size(); i++)
     97  for (i=0; i<r.docs.size(); ++i)
    9898    s << r.docs[i] << ", ";
    9999 
    100100  s << "\nranks: ";
    101   for (i=0; i<r.ranks.size(); i++)
     101  for (i=0; i<r.ranks.size(); ++i)
    102102    s << r.ranks[i] << ", ";
    103103
    104104  s << "\ntermFreqs: ";
    105   for (i=0; i<r.termFreqs.size(); i++)
     105  for (i=0; i<r.termFreqs.size(); ++i)
    106106    s << r.termFreqs[i] << ", ";
    107107
     
    137137  s << "docs: ";
    138138  unsigned long i;
    139   for (i=0; i<r.docs.size(); i++)
     139  for (i=0; i<r.docs.size(); ++i)
    140140    s << r.docs[i] << ", ";
    141141
    142142  s << "\nlevels: ";
    143   for (i=0; i<r.levels.size(); i++)
     143  for (i=0; i<r.levels.size(); ++i)
    144144    s << r.levels[i] << ", ";
    145145
    146146 
    147147  s << "\nranks: ";
    148   for (i=0; i<r.ranks.size(); i++)
     148  for (i=0; i<r.ranks.size(); ++i)
    149149    s << r.ranks[i] << ", ";
    150150
    151151  s << "\ntermFreqs: ";
    152   for (i=0; i<r.termFreqs.size(); i++)
     152  for (i=0; i<r.termFreqs.size(); ++i)
    153153    s << r.termFreqs[i] << ", ";
    154154  s << "\nactual number of docs found: " << r.actualNumDocs;
     
    182182  s << "terms: ";
    183183  unsigned long i;
    184   for (i=0; i<r.termFreqs.size(); i++)
     184  for (i=0; i<r.termFreqs.size(); ++i)
    185185    s << r.termFreqs[i] << ", ";
    186186    s << "\n\n";
     
    324324  unsigned long fragLimitI = 0;
    325325  unsigned long i;
    326   for (i=0; i<wordDictEl.frag_occur; i++) {
     326  for (i=0; i<wordDictEl.frag_occur; ++i) {
    327327    fragNum += buffer.bblock_decode (B, NULL);
    328328    if (!indexData.ifh.word_level_index) termFreq = buffer.gamma_decode (NULL);
     
    333333      while (fragLimitI+1 < (*fragLimits).size() &&
    334334         fragNum > (*fragLimits)[fragLimitI+1].rangeStart) {
    335     fragLimitI++;
     335    ++fragLimitI;
    336336      }
    337337    }
     
    375375      if (needFragFreqs)
    376376    outFragData.fragFreqs.push_back (f2.fragFreqs[f2I]);
    377       f2I++;
     377      ++f2I;
    378378     
    379379    } else if (f1I < f1Size &&
     
    384384      if (needFragFreqs)
    385385    outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]);
    386       f1I++;
     386      ++f1I;
    387387     
    388388    } else {
     
    391391      if (needFragFreqs)
    392392    outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]+f2.fragFreqs[f2I]);
    393       f1I++;
    394       f2I++;
     393      ++f1I;
     394      ++f2I;
    395395    }
    396396  }
     
    432432      while (fragLimitI+1 < fragLimitSize &&
    433433         comFragNum > (signed long)(*fragLimits)[fragLimitI+1].rangeStart) {
    434     fragLimitI++;
     434    ++fragLimitI;
    435435      }
    436436    }
     
    439439    (fragLimits!=NULL &&
    440440     fragNum<=(signed long)(*fragLimits)[fragLimitI].rangeStart)) {
    441       fragDataI++;
     441      ++fragDataI;
    442442     
    443443    } else if (fragNum > comFragNum+endRange ||
    444444           (fragLimits!=NULL &&
    445445        fragNum>(signed long)(*fragLimits)[fragLimitI].rangeEnd)) {
    446       comFragDataI++;
     446      ++comFragDataI;
    447447     
    448448    } else {
     
    454454      fragData.fragFreqs[fragDataI] : comFragData.fragFreqs[comFragDataI];
    455455      }
    456       fragDataI++;
    457       comFragDataI++;
    458       outI++;
     456      ++fragDataI;
     457      ++comFragDataI;
     458      ++outI;
    459459    }
    460460  }
     
    527527      }
    528528    }
    529     termDataI++;
     529    ++termDataI;
    530530  }
    531531
     
    600600      while (resultI < resultSize &&
    601601         result.docs[resultI] < lastLevelDocNum)
    602         resultI++;
     602        ++resultI;
    603603     
    604604      // store the result
     
    607607        if (needRanks)
    608608          result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
    609         resultI++;
    610         resultOutI++;
     609        ++resultI;
     610        ++resultOutI;
    611611      }
    612612    }
     
    621621    }
    622622   
    623     termDataI++;
     623    ++termDataI;
    624624  } // while
    625625
     
    631631    while (resultI < resultSize &&
    632632       result.docs[resultI] < lastLevelDocNum)
    633       resultI++;
     633      ++resultI;
    634634   
    635635    // store the result
     
    638638      if (needRanks)
    639639    result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
    640       resultI++;
    641       resultOutI++;
     640      ++resultI;
     641      ++resultOutI;
    642642    }
    643643  }
     
    688688      while (resultI < resultSize &&
    689689         result.docs[resultI] < lastLevelDocNum)
    690         resultI++;
     690        ++resultI;
    691691     
    692692      // store the result
     
    695695        if (needRanks)
    696696          result.ranks[resultOutI] = result.ranks[resultI];
    697         resultI++;
    698       resultOutI++;
     697        ++resultI;
     698        ++resultOutI;
    699699      }
    700700    }
     
    704704    }
    705705   
    706     termDataI++;
     706    ++termDataI;
    707707  }
    708708
     
    711711    while (resultI < resultSize &&
    712712       result.docs[resultI] < lastLevelDocNum)
    713       resultI++;
     713      ++resultI;
    714714   
    715715    // store the result
     
    718718      if (needRanks)
    719719    result.ranks[resultOutI] = result.ranks[resultI];
    720       resultI++;
    721       resultOutI++;
     720      ++resultI;
     721      ++resultOutI;
    722722    }
    723723  }
     
    777777    termdata.termFreq = (*here).freq;
    778778    terms.push_back(termdata);
    779     here++;
     779    ++here;
    780780  }
    781781
  • trunk/indexers/mgpp/text/TextEl.cpp

    r3365 r8692  
    7979
    8080static void ToggleParaTag (TextEl &el, bool &compatInPara) {
    81   SetCStr (el.tagName, "Paragraph");
     81  SetCStr (el.tagName, "Paragraph", 9);
    8282  el.text.erase (el.text.begin(), el.text.end());
    8383  if (compatInPara) {
     
    9090static void SetRecTag (TextEl &el, TextElType elType) {
    9191  el.elType = elType;
    92   SetCStr (el.tagName, "Document");
     92  SetCStr (el.tagName, "Document", 8);
    9393  el.text.erase (el.text.begin(), el.text.end());
    9494  if (elType == CloseTagE)
  • trunk/indexers/mgpp/text/TextGet.cpp

    r3365 r8692  
    2020 **************************************************************************/
    2121
     22// is important to be first, so we escape the truncation warning on VC++
     23#include "TextGet.h"
    2224// need this to avoid bizarre compiler problems under VC++ 6.0
    2325#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
     
    2527#endif
    2628
    27 #include "TextGet.h"
    2829#include "mg_files.h"
    2930#include "netorder.h"
     
    4950  memset (ad, '\0', sizeof (*ad));
    5051
    51   for (i = 0; i <= 1; i++)
     52  for (i = 0; i <= 1; ++i)
    5253    {
    5354      int j;
     
    7778
    7879      pos = ad->word_data[i];
    79       for (j = 0; j < (int)ad->afh[i].num_frags; j++)
     80      for (j = 0; j < (int)ad->afh[i].num_frags; ++j)
    8081    {
    8182      ad->words[i][j] = pos;
     
    9394          ad->blk_end[i][num] = ad->blk_start[i][num] +
    9495        (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
    95           num++;
     96          ++num;
    9697        }
    9798    }
     
    115116  lookback = cd.cdh.lookback;
    116117
    117   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++) {
     118  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) {
    118119    ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
    119120    mem_reqd += cfh->huff_words_size[i];
     
    135136  values[0] = vals;
    136137  values[0][0] = next_word[0];
    137   for (i = 1; i <= cfh->hd.maxcodelen; i++)
     138  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
    138139    {
    139140      int next_start = (values[i - 1] - vals) +
     
    146147  memset (num_set, '\0', sizeof (num_set));
    147148
    148   for (i = 0; i < cfh->hd.num_codes; i++)
     149  for (i = 0; i < cfh->hd.num_codes; ++i)
    149150    {
    150151      register int val, copy;
     
    175176    }
    176177      memcpy (last_word[len], word, *word + 1);
    177       num_set[len]++;
     178      ++num_set[len];
    178179    }
    179180  if (cfh->hd.clens)
    180     delete cfh->hd.clens;
     181    delete []cfh->hd.clens;
    181182  cfh->hd.clens = NULL;
    182183  return values;
     
    196197    return 3;
    197198  if (hd->clens)
    198     delete hd->clens;
     199    delete []hd->clens;
    199200  hd->clens = NULL;
    200201  if (type == chars)
     
    239240    return false;
    240241
    241   for (which = 0; which < 2; which++)
     242  for (which = 0; which < 2; ++which)
    242243    switch (cd.cdh.dict_type)
    243244      {
     
    353354  fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
    354355
    355   for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
     356  for (p = (u_long *) cd; (u_long) p < (u_long) end; ++p)
    356357    if (IS_FIXUP (p))
    357358      {
     
    364365  NTOHUL(cd->cdh.dict_type);
    365366  NTOHUL(cd->cdh.novel_method);
    366   for (i = 0; i < TEXT_PARAMS; i++)
     367  for (i = 0; i < TEXT_PARAMS; ++i)
    367368    NTOHUL(cd->cdh.params[i]);
    368369  NTOHUL(cd->cdh.num_words[0]);
     
    372373  NTOHUL(cd->cdh.lookback);
    373374  /* cfh */
    374   for (i = 0; i <= 1; i++)
     375  for (i = 0; i <= 1; ++i)
    375376    {
    376377      int j;
     
    379380      NTOHSI(cd->cfh[i]->hd.mincodelen);
    380381      NTOHSI(cd->cfh[i]->hd.maxcodelen);
    381       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     382      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    382383    {
    383384      NTOHSI(cd->cfh[i]->hd.lencount[j]);
     
    385386    }
    386387      NTOHUL(cd->cfh[i]->uncompressed_size);
    387       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     388      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    388389    NTOHUL(cd->cfh[i]->huff_words_size[j]);
    389390    }
     
    392393  if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
    393394      cd->cdh.novel_method == MG_NOVEL_HYBRID)
    394     for (i = 0; i <= 1; i++)
     395    for (i = 0; i <= 1; ++i)
    395396      {
    396397    int j;
     
    398399    NTOHUL(cd->ad->afh[i].num_frags);
    399400    NTOHUL(cd->ad->afh[i].mem_for_frags);
    400     for (j = 0; j < 33; j++)
     401    for (j = 0; j < 33; ++j)
    401402      {
    402403        NTOHSI(cd->ad->blk_start[i][j]);
     
    406407  NTOHSI(cd->fast_loaded);
    407408
    408   delete fixup;
     409  delete []fixup;
    409410
    410411  // the whole fast comp dict is a bit of a hack so I don't
     
    584585  unsigned long bits = 0;
    585586
     587  if (docText.capacity() < docText.size() + num_bits + 1) {
     588    docText.reserve(docText.size() + num_bits + 1);
     589  }
    586590  // keep decoding bits until enough bits have been decoded
    587591  while (bits < num_bits) {
     
    616620      len = buffer.huff_decode(cd.lens_huff[which]->min_code,
    617621                   cd.lens_vals[which], &bits);
    618       for (i = 0; i < len; i++) {
     622      for (i = 0; i < len; ++i) {
    619623        c = buffer.huff_decode(cd.chars_huff[which]->min_code,
    620624                   cd.chars_vals[which], &bits);
     
    633637          {
    634638        idx = buffer.delta_decode (&bits);
    635         idx--;
     639        --idx;
    636640          }
    637641          break;
     
    640644        int k;
    641645        k = buffer.gamma_decode (&bits);
    642         k--;
     646        --k;
    643647        idx = buffer.binary_decode(ad->blk_end[which][k] -
    644648                       ad->blk_start[which][k] + 1,
     
    650654      base = ad->words[which][idx];
    651655      len = *base++;
    652       for (; len; len--)
     656      for (; len; --len)
    653657        {
    654658          docText.push_back (*base++);
  • trunk/indexers/mgpp/text/UCArray.cpp

    r8242 r8692  
    2929  while (*cStr != '\0') {
    3030    text.push_back (*cStr);
    31     cStr++;
     31    ++cStr;
    3232  }
    3333}
     
    4646}
    4747
    48 char * GetCStr(UCArray text) {
     48char * GetCStr(const UCArray& text) {
    4949
    5050  char *cstr = new char[text.size()+1];
     
    5555  while (here != end) {
    5656    cstr[i] = text[i];
    57     here++; i++;
     57    ++here; ++i;
    5858  }
    5959  cstr[i]='\0';
    6060  return cstr;
     61}
     62
     63bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr)
     64{
     65  if ((cStr == NULL || *cStr == '\0') && text.empty()) return true;
     66  UCArray::const_iterator thisUC = text.begin();
     67  UCArray::const_iterator endUC = text.end();
     68  while (thisUC != endUC && *cStr != '\0') {
     69    if (*thisUC != *cStr) return false;
     70    ++cStr; ++thisUC;
     71  }
     72  if (thisUC == endUC && *cStr == '\0') return true;
     73  return false;
    6174}
    6275
     
    158171    a.push_back (b);
    159172   
    160     arraySize--;
     173    --arraySize;
    161174  }
    162175 
     
    297310
    298311  while (i < l && *a1Here == *a2Here) {
    299     i++; ++a1Here; ++a2Here;
     312    ++i; ++a1Here; ++a2Here;
    300313  }
    301314 
     
    340353    unsigned char c = fgetc (f);
    341354    a.push_back (c);
    342     sufLen--;
    343   }
    344 
    345   return true;
    346 }
    347 
     355    --sufLen;
     356  }
     357
     358  return true;
     359}
     360
  • trunk/indexers/mgpp/text/UCArray.h

    r8242 r8692  
    2222#ifndef UCARRAY_H
    2323#define UCARRAY_H
     24
     25#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     26#pragma warning(disable:4786)
     27#endif
    2428
    2529// need this to avoid bizarre compiler problems under VC++ 6.0
     
    5761// same as SetCStr but first tries to allocate nSizeHint space (only if needed)
    5862void SetCStr (UCArray &text, const char *cStr, size_t nSizeHint);
    59 char * GetCStr(UCArray text);
     63char * GetCStr(const UCArray& text);
    6064inline void UCArrayClear (UCArray &a) {
    6165  a.erase (a.begin(), a.end());
    6266}
     67bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr);
     68inline bool UCArrayCStrEquals(const UCArray &text, const char *cStr) { return UCArrayCStrEquals(text, (const unsigned char *)cStr); }
    6369
    6470// stream operator to print UCArray
  • trunk/indexers/mgpp/text/mg_errors.cpp

    r3365 r8692  
    5151  /* free the current error string, unless it is the null string */
    5252  if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
    53     delete mg_error_data;
     53    delete []mg_error_data;
    5454    mg_error_data = null_data;
    5555  }
     
    6767  /* free the current error string, unless it is the null string */
    6868  if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
    69     delete mg_error_data;
     69    delete []mg_error_data;
    7070    mg_error_data = null_data;
    7171  }
  • trunk/indexers/mgpp/text/mg_files.cpp

    r3365 r8692  
    4545  if (basepath)
    4646    {
    47       delete basepath;
     47      delete []basepath;
    4848      basepath = NULL;
    4949    }
  • trunk/indexers/mgpp/text/mgpp_compression_dict.cpp

    r3365 r8692  
    233233  NTOHD(csh.num_bytes);
    234234
    235   for (i = 0; i < 2; i++)
     235  for (i = 0; i < 2; ++i)
    236236    {
    237237      frags_stats_header fsh;
     
    253253      wd = Words[i] = (DictWordData *) Xmalloc (sizeof (DictWordData) * Num[i]);
    254254      unsigned int j;
    255       for (j = 0; j < Num[i]; j++, wd++)
     255      for (j = 0; j < Num[i]; ++j, ++wd)
    256256    {
    257257      int len;
     
    306306  dd->chars = 0;
    307307  wd = dd->wd;
    308   for (i = 0; i < dd->num_wds; i++, wd++)
     308  for (i = 0; i < dd->num_wds; ++i, ++wd)
    309309    dd->chars += (*wd)->word[0];
    310310}
     
    317317  Alloc_keep_discard ();
    318318  keep[0].num_wds = Num[0];
    319   for (i = 0; i < Num[0]; i++)
     319  for (i = 0; i < Num[0]; ++i)
    320320    keep[0].wd[i] = Words[0] + i;
    321321  keep[1].num_wds = Num[1];
    322   for (i = 0; i < Num[1]; i++)
     322  for (i = 0; i < Num[1]; ++i)
    323323    keep[1].wd[i] = Words[1] + i;
    324324  SortAndCount_DictData (&keep[0]);
     
    367367  num = Num[0] + Num[1];
    368368  wd = (DictWordData **) Xmalloc (num * sizeof (DictWordData *));
    369   for (i = 0; (unsigned int)i < Num[0]; i++)
     369  for (i = 0; (unsigned int)i < Num[0]; ++i)
    370370    wd[i] = Words[0] + i;
    371   for (i = 0; (unsigned int)i < Num[1]; i++)
     371  for (i = 0; (unsigned int)i < Num[1]; ++i)
    372372    wd[i + Num[0]] = Words[1] + i;
    373373
     
    390390    }
    391391
    392   for (i = 0; i < num; i++)
     392  for (i = 0; i < num; ++i)
    393393    {
    394394      DictWordData *word = wd[i];
     
    438438  memset (char_freqs, '\0', sizeof (char_freqs));
    439439  memset (len_freqs, '\0', sizeof (len_freqs));
    440   for (i = 0; i < num; i++, wd++)
     440  for (i = 0; i < num; ++i, ++wd)
    441441    {
    442442      u_long freq = (*wd)->documents();
     
    446446      len_freqs[idx][len] += freq;
    447447      escape[idx] += freq;
    448       for (; len; len--, buf++)
     448      for (; len; --len, ++buf)
    449449    char_freqs[idx][(u_long) (*buf)] += freq;
    450450    }
     
    471471  int j;
    472472
    473   for (j = 0; j < num; j++, word++)
     473  for (j = 0; j < num; ++j, ++word)
    474474    {
    475475      float   cbc, wbc;
     
    480480
    481481      cbc = len_lens[idx][len];
    482       for (; len; len--, buf++)
     482      for (; len; --len, ++buf)
    483483    cbc += char_lens[idx][(u_long) (*buf)];
    484484
     
    585585
    586586
    587   for (i = 0; (unsigned int)i < Num[0]; i++)
     587  for (i = 0; (unsigned int)i < Num[0]; ++i)
    588588    discard_heap[i] = Words[0] + i;
    589   for (i = 0; (unsigned int)i < Num[1]; i++)
     589  for (i = 0; (unsigned int)i < Num[1]; ++i)
    590590    discard_heap[i + Num[0]] = Words[1] + i;
    591591
     
    600600      keep_heap[keep_num++] = word;
    601601      freqs_trans[KIND (word)] += word->documents();
    602       num_trans++;
     602      ++num_trans;
    603603    }
    604604
     
    663663      heap_build (keep_heap, sizeof (keep_heap), keep_num, SmallSaving);
    664664      recalc_reqd = 0;
    665       recalcs++;
     665      ++recalcs;
    666666    }
    667667    }
     
    669669  Alloc_keep_discard ();
    670670
    671   for (i = 0; i < discard_num; i++)
     671  for (i = 0; i < discard_num; ++i)
    672672    {
    673673      DictWordData *word = discard_heap[i];
     
    676676      discard[idx].wd[discard[idx].num_wds++] = word;
    677677    }
    678   for (i = 0; i < keep_num; i++)
     678  for (i = 0; i < keep_num; ++i)
    679679    {
    680680      DictWordData *word = keep_heap[i];
     
    724724  HTONUL(tmp.dict_type);
    725725  HTONUL(tmp.novel_method);
    726   for (i = 0; i < TEXT_PARAMS; i++)
     726  for (i = 0; i < TEXT_PARAMS; ++i)
    727727    HTONUL(tmp.params[i]);
    728728  HTONUL(tmp.num_words[0]);
     
    741741  unsigned int i;
    742742  u_char *curr, *prev = NULL;
    743   for (i = 0; i < dd->num_wds; i++)
     743  for (i = 0; i < dd->num_wds; ++i)
    744744    {
    745745      int len;
     
    762762{
    763763  unsigned int i, us;
    764   for (us = i = 0; i < dd->num_wds; i++)
     764  for (us = i = 0; i < dd->num_wds; ++i)
    765765    us += dd->wd[i]->word[0];
    766766  return us;
     
    783783    FatalError (1, "Unable to allocate memory for freqs");
    784784
    785   for (i = 0; (unsigned)i < dd->num_wds; i++)
     785  for (i = 0; (unsigned)i < dd->num_wds; ++i)
    786786    {
    787787      freqs[i] = dd->wd[i]->documents();
     
    792792    FatalError (1, "Unable to allocate memory for huffman data");
    793793
    794   delete (freqs);
     794  delete []freqs;
    795795  freqs = NULL;
    796796
     
    813813  mem_reqd = 0;
    814814
    815   for (i = 0; (unsigned)i < dd->num_wds; i++)
     815  for (i = 0; (unsigned)i < dd->num_wds; ++i)
    816816    {
    817817      int codelen = hd->clens[i];
     
    841841      lastword[codelen] = word;
    842842#endif
    843       lencounts[codelen]++;
     843      ++lencounts[codelen];
    844844    }
    845845
    846846  /* [RPAP - Jan 97: Endian Ordering] */
    847   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     847  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    848848    HTONUL(huff_words_size[i]);
    849849
     
    852852
    853853  /* [RPAP - Jan 97: Endian Ordering] */
    854   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     854  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    855855    NTOHUL(huff_words_size[i]);
    856856
    857857  Write_words (f, dd);
    858858
    859   delete hd->clens;
     859  delete []hd->clens;
    860860  delete hd;
    861861
     
    876876  memset (freqs, '\0', sizeof (freqs));
    877877
    878   for (j = 0; j < dd->num_wds; j++, wd++)
     878  for (j = 0; j < dd->num_wds; ++j, ++wd)
    879879    {
    880880      u_char *buf = (*wd)->word;
    881881      int len = *buf++;
    882       for (; len; len--, buf++)
     882      for (; len; --len, ++buf)
    883883    freqs[(u_long) (*buf)] += (*wd)->documents();
    884884    }
    885885
    886886  if (!zero_freq_permitted)
    887     for (j = 0; j < 256; j++)
     887    for (j = 0; j < 256; ++j)
    888888      if (!freqs[j] && PESINAWORD (j) == words)
    889889    freqs[j] = 1;
     
    895895    FatalError (1, "Unable to write huffman data");
    896896
    897   delete hd->clens;
     897  delete []hd->clens;
    898898  delete hd;
    899899}
     
    912912  memset (freqs, '\0', sizeof (freqs));
    913913
    914   for (j = 0; j < dd->num_wds; j++, wd++)
     914  for (j = 0; j < dd->num_wds; ++j, ++wd)
    915915    freqs[(*wd)->word[0]] += (*wd)->documents();
    916916
    917917  if (!zero_freq_permitted)
    918     for (j = 0; j < 16; j++)
     918    for (j = 0; j < 16; ++j)
    919919      if (!freqs[j])
    920920    freqs[j] = 1;
     
    927927
    928928
    929   delete hd->clens;
     929  delete []hd->clens;
    930930  delete hd;
    931931}
     
    958958  Write_cdh (f, &cdh);
    959959
    960   for (i = 0; i < 2; i++)
     960  for (i = 0; i < 2; ++i)
    961961    switch (type)
    962962      {
     
    975975          esc.word = (u_char *) "";
    976976          keep[i].wd[keep[i].num_wds++] = &esc;
    977           for (j = 0; (unsigned)j < discard[i].num_wds; j++)
     977          for (j = 0; (unsigned)j < discard[i].num_wds; ++j)
    978978        esc.docCount += discard[i].wd[j]->documents();
    979979          if (!esc.docCount)
    980         esc.docCount++;
     980        ++esc.docCount;
    981981          mem_reqd += Write_data (f, &keep[i], lookback);
    982982        }
     
    994994          esc.word = (u_char *) "";
    995995          keep[i].wd[keep[i].num_wds++] = &esc;
    996           for (j = 0; (unsigned)j < all[i].num_wds; j++)
     996          for (j = 0; (unsigned)j < all[i].num_wds; ++j)
    997997        if (all[i].wd[j]->documents() == 1)
    998           esc.docCount++;
     998          ++esc.docCount;
    999999          if (!esc.docCount)
    1000         esc.docCount++;
     1000        ++esc.docCount;
    10011001          mem_reqd += Write_data (f, &keep[i], lookback);
    10021002        }
  • trunk/indexers/mgpp/text/mgpp_decompress_text.cpp

    r3365 r8692  
    4141  char *basePath = "";
    4242  UCArray level;
    43   SetCStr (level, "Document");
     43  SetCStr (level, "Document", 8);
    4444
    4545  opterr = 0;
     
    5757      break;
    5858    case 'K':
    59       SetCStr (level, optarg);
     59      SetCStr (level, optarg, strlen(optarg));
    6060      break;
    6161    case 'h':
     
    9292    cout << docText << "\n";
    9393   
    94     docNum++;
     94    ++docNum;
    9595  }
    9696 
  • trunk/indexers/mgpp/text/mgpp_fast_comp_dict.cpp

    r3365 r8692  
    6060#define FIXUP_VALS(vals) do {                       \
    6161    int i;                              \
    62     for (i=0; i < MAX_HUFFCODE_LEN+1; i++)              \
     62    for (i=0; i < MAX_HUFFCODE_LEN+1; ++i)              \
    6363      FIXUP(&vals[i]);                      \
    6464      } while(0)
     
    128128
    129129    /* cfh */
    130     for (which = 0; which <= 1; which++)
     130    for (which = 0; which <= 1; ++which)
    131131      {
    132132    int j;
     
    135135    HTONSI(cd->cfh[which]->hd.mincodelen);
    136136    HTONSI(cd->cfh[which]->hd.maxcodelen);
    137     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     137    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    138138      {
    139139        HTONSI(cd->cfh[which]->hd.lencount[j]);
     
    141141      }
    142142    HTONUL(cd->cfh[which]->uncompressed_size);
    143     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     143    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    144144      HTONUL(cd->cfh[which]->huff_words_size[j]);
    145145      }
     
    148148    if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
    149149    cd->cdh.novel_method == MG_NOVEL_HYBRID)
    150       for (which = 0; which <= 1; which++)
     150      for (which = 0; which <= 1; ++which)
    151151    {
    152152      int j;
     
    154154      HTONUL(cd->ad->afh[which].num_frags);
    155155      HTONUL(cd->ad->afh[which].mem_for_frags);
    156       for (j = 0; j < 33; j++)
     156      for (j = 0; j < 33; ++j)
    157157        {
    158158          HTONSI(cd->ad->blk_start[which][j]);
     
    163163    HTONUL(cd->cdh.dict_type);
    164164    HTONUL(cd->cdh.novel_method);
    165     for (i = 0; i < TEXT_PARAMS; i++)
     165    for (i = 0; i < TEXT_PARAMS; ++i)
    166166      HTONUL(cd->cdh.params[which]);
    167167    HTONUL(cd->cdh.num_words[0]);
     
    186186{
    187187  u_long *p;
    188   for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
     188  for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
    189189    {
    190190      if (IS_FIXUP (p))
     
    206206           MAGIC_AUX_DICT, MG_ABORT);  /* [RPAP - Feb 97: WIN32 Port] */
    207207
    208   for (i = 0; i <= 1; i++)
     208  for (i = 0; i <= 1; ++i)
    209209    {
    210210      aux_frags_header afh;
     
    234234  lookback = cdh->lookback;
    235235
    236   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
     236  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
    237237    {
    238238      ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
     
    244244  mem += mem_reqd;
    245245
    246   for (i = 0; i < cfh->hd.num_codes; i++)
     246  for (i = 0; i < cfh->hd.num_codes; ++i)
    247247    {
    248248      register int val;
    249       for (val = getc (dict) & 0xf; val; val--)
     249      for (val = getc (dict) & 0xf; val; --val)
    250250    getc (dict);
    251251    }
     
    259259  Read_Huffman_Data (dict, &hd, NULL, NULL);
    260260  if (hd.clens)
    261     delete hd.clens;
     261    delete []hd.clens;
    262262  mem += hd.num_codes * sizeof (unsigned long);
    263263  mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
     
    275275  mem += mem_for_words (dict, cdh, cfh);
    276276  if (cfh->hd.clens)
    277     delete cfh->hd.clens;
     277    delete []cfh->hd.clens;
    278278
    279279  return mem;
     
    298298  NTOHUL(cdh.dict_type);
    299299  NTOHUL(cdh.novel_method);
    300   for (i = 0; i < TEXT_PARAMS; i++)
     300  for (i = 0; i < TEXT_PARAMS; ++i)
    301301    NTOHUL(cdh.params[i]);
    302302  NTOHUL(cdh.num_words[0]);
     
    306306  NTOHUL(cdh.lookback);
    307307
    308   for (which = 0; which < 2; which++)
     308  for (which = 0; which < 2; ++which)
    309309    switch (cdh.dict_type)
    310310      {
     
    393393  ad = (auxiliary_dict *) getmem (sizeof (auxiliary_dict), sizeof (u_char *));
    394394
    395   for (i = 0; i <= 1; i++)
     395  for (i = 0; i <= 1; ++i)
    396396    {
    397397      unsigned int j;
     
    414414
    415415      pos = ad->word_data[i];
    416       for (j = 0; j < ad->afh[i].num_frags; j++)
     416      for (j = 0; j < ad->afh[i].num_frags; ++j)
    417417    {
    418418      ad->words[i][j] = pos;
     
    431431          ad->blk_end[i][num] = ad->blk_start[i][num] +
    432432        (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
    433           num++;
     433          ++num;
    434434        }
    435435    }
     
    459459  lookback = cd->cdh.lookback;
    460460
    461   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
     461  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
    462462    {
    463463      ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
     
    479479  values[0][0] = next_word[0];
    480480  FIXUP (&values[0][0]);
    481   for (i = 1; i <= cfh->hd.maxcodelen; i++)
     481  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
    482482    {
    483483      int next_start = (values[i - 1] - vals) +
     
    492492  memset (num_set, '\0', sizeof (num_set));
    493493
    494   for (i = 0; i < cfh->hd.num_codes; i++)
     494  for (i = 0; i < cfh->hd.num_codes; ++i)
    495495    {
    496496      register int val, copy;
     
    528528    }
    529529      memcpy (last_word[len], word, *word + 1);
    530       num_set[len]++;
     530      ++num_set[len];
    531531    }
    532532  if (cfh->hd.clens)
    533     delete cfh->hd.clens;
     533    delete []cfh->hd.clens;
    534534  cfh->hd.clens = NULL;
    535535  return values;
     
    559559  fcode[0] = values[0] = &vals[0];
    560560  FIXUP (&values[0]);
    561   for (i = 1; i <= data->maxcodelen; i++)
     561  for (i = 1; i <= data->maxcodelen; ++i)
    562562    {
    563563      fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
     
    565565    }
    566566
    567   for (i = 0; i < data->num_codes; i++)
     567  for (i = 0; i < data->num_codes; ++i)
    568568    if (data->clens[i])
    569569      *fcode[(int) (data->clens[i])]++ = i;
     
    586586  FIXUP (&cd->chars_vals[which]);
    587587  if (hd->clens)
    588     delete hd->clens;
     588    delete []hd->clens;
    589589  hd->clens = NULL;
    590590}
     
    623623  Read_cdh (dict, &cd->cdh, NULL, NULL);
    624624
    625   for (which = 0; which < 2; which++)
     625  for (which = 0; which < 2; ++which)
    626626    switch (cd->cdh.dict_type)
    627627      {
     
    683683  {
    684684    u_long *p;
    685     for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
     685    for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
    686686      {
    687687    if (IS_FIXUP (p))
  • trunk/indexers/mgpp/text/mgpp_passes.cpp

    r3365 r8692  
    2323#define _XOPEN_SOURCE 1
    2424#define _XOPEN_SOURCE_EXTENDED 1
     25
     26#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     27#pragma warning(disable:4786)
     28#endif
    2529
    2630// need this to avoid bizarre compiler problems under VC++ 6.0
  • trunk/indexers/mgpp/text/text.pass2.cpp

    r3365 r8692  
    2121 **************************************************************************/
    2222
     23#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     24#pragma warning(disable:4786)
     25#endif
     26
    2327// need this to avoid bizarre compiler problems under VC++ 6.0
    2428#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
     
    135139  int i;
    136140  if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS)
    137     for (i = 0; i <= 1; i++)
     141    for (i = 0; i <= 1; ++i)
    138142      {
    139143    nht[i].HashSize = INITIAL_HASH_SIZE;
     
    158162        blk_end[i][num] = blk_start[i][num] +
    159163          (blk_end[i][num - 1] - blk_start[i][num - 1]) * 2;
    160         num++;
     164        ++num;
    161165          }
    162166      }
     
    176180    int res;
    177181   
    178     if (which) numWords++;
     182    if (which) ++numWords;
    179183
    180184    /* First parse a word or non-word out of the string */
     
    202206    s2 = *wptr;
    203207    len = *s1 + 1;
    204     for (; len; len--)
     208    for (; len; --len)
    205209      if (*s1++ != *s2++) break;
    206210   
     
    234238          }
    235239        buffer.huff_encode (Word[0], lens_codes[which], lens_huff[which].clens, NULL);
    236         for (i = 0; i < Word[0]; i++)
     240        for (i = 0; i < Word[0]; ++i)
    237241          buffer.huff_encode (Word[i + 1], char_codes[which],
    238242                  char_huff[which].clens, NULL);
     
    252256          buffer.huff_encode (Word[0], lens_codes[which],
    253257                      lens_huff[which].clens, NULL);
    254           for (i = 0; i < Word[0]; i++)
     258          for (i = 0; i < Word[0]; ++i)
    255259            buffer.huff_encode (Word[i + 1], char_codes[which],
    256260                    char_huff[which].clens, NULL);
     
    280284              h->pool->ptr += len;
    281285              h->pool->left -= len;
    282               h->HashUsed++;
     286              ++h->HashUsed;
    283287              break;
    284288            }
     
    287291              s2 = ent->word;
    288292              len = *s1 + 1;
    289               for (; len; len--)
     293              for (; len; --len)
    290294            if (*s1++ != *s2++)
    291295              break;
     
    306310            int j = ent->ordinal_num - 1;
    307311            while (j > blk_end[which][k])
    308               k++;
     312              ++k;
    309313            assert (j - blk_start[which][k] + 1 >= 1 &&
    310314                j - blk_start[which][k] + 1 <=
     
    331335              memset (ht, '\0', sizeof (novel_hash_rec) * size);
    332336             
    333               for (i = 0; i < h->HashSize; i++)
     337              for (i = 0; i < h->HashSize; ++i)
    334338            if (h->HashTable[i].word)
    335339              {
     
    409413   
    410414    cth.num_of_bytes += (*here).text.size();
    411     here++;
     415    ++here;
    412416  }
    413417
     
    417421  while (tiHere != tiEnd) {
    418422    if ((*tiHere).second.inDoc) (*tiHere).second.SetEnd (endPos, endBit);
    419     tiHere++;
     423    ++tiHere;
    420424  }
    421425 
    422426  // we've processed one more document
    423   cth.num_of_docs++;
     427  ++cth.num_of_docs;
    424428
    425429  return COMPALLOK;
     
    434438    return COMPERROR;
    435439 
    436   for (i = 0; i <= 1; i++)
     440  for (i = 0; i <= 1; ++i)
    437441    {
    438442      aux_frags_header afh;
     
    461465  int i;
    462466  u_long aux_compressed = 0, total_uncomp = 0;
    463   for (i = 0; i <= 1; i++)
     467  for (i = 0; i <= 1; ++i)
    464468    {
    465469      int j;
     
    475479        {
    476480          int len = *buf++;
    477           lens[len]++;
     481          ++lens[len];
    478482          total_uncomp += len + 4;
    479           for (; len; len--)
    480         chars[*buf++]++;
     483          for (; len; --len)
     484        ++chars[*buf++];
    481485        }
    482486    }
    483       for (j = 0; j < 256; j++)
     487      for (j = 0; j < 256; ++j)
    484488    if (!chars[j] && PESINAWORD (j) == i)
    485489      fchars[j] = 1;
    486490    else
    487491      fchars[j] = chars[j];
    488       for (j = 0; j < 16; j++)
     492      for (j = 0; j < 16; ++j)
    489493    if (!lens[j])
    490494      flens[j] = 1;
     
    522526      return false;
    523527   
    524     tiHere++;
     528    ++tiHere;
    525529  }
    526530
  • trunk/indexers/mgpp/text/words.cpp

    r3365 r8692  
    3737}
    3838
     39/* It determines whether a given place in a UTF-8 encoded Unicode string is a unicode space. */
     40int isaspace (const u_char *here, const u_char *end)
     41{
     42  unsigned short c;
     43  if (parse_utf8_char(here, end, &c) > 0) return is_unicode_space(c);
     44  return 0;
     45}
     46
     47/* Return a the UTF-8 encoded Unicode string with begining
     48   unicode spaces skippend. */
     49u_char *skipspace(u_char *here, u_char *end)
     50{
     51  unsigned short c;
     52  int length;
     53  while(here != end) {
     54    length = parse_utf8_char(here, end, &c);
     55    if (length == 0 || !is_unicode_space(c)) break;
     56    here += length;
     57  }
     58  return here;
     59}
     60 
    3961const unsigned char *ParseIndexWord (const unsigned char *textHere,
    4062                     const unsigned char *textEnd,
     
    5375                   ++numeric <= MAXNUMERIC))) {
    5476    while (charlength-- > 0) {
    55       word.push_back (*textHere++); length++;
     77      word.push_back (*textHere++); ++length;
    5678    }
    5779    charlength = parse_utf8_char (textHere, textEnd, &c);
  • trunk/indexers/mgpp/text/words.h

    r3365 r8692  
    7676
    7777#ifdef __cplusplus
    78 extern "C"
     78extern "C" {
    7979#endif
    8080int inaword (const u_char *here, const u_char *end);
     
    8383       is part of a word. */
    8484
     85int isaspace (const u_char *here, const u_char *end);
     86        /* It determines whether a given place in a UTF-8 encoded Unicode string  is a unicode space. */
     87
     88u_char *skipspace(u_char *here, u_char *end);
     89        /* Return a the UTF-8 encoded Unicode string with begining unicode spaces skippend. */
     90
     91#ifdef __cplusplus
     92}
     93#endif
    8594
    8695const unsigned char *ParseIndexWord (const unsigned char *textHere,
  • trunk/mgpp/lib/bitio_m_random.cpp

    r3365 r8692  
    1616
    1717random_bitio_buffer::~random_bitio_buffer() {
    18   if (buffer != NULL) delete buffer;
     18  if (buffer != NULL) delete []buffer;
    1919}
    2020
     
    2222  // delete the old buffer
    2323  if (buffer != NULL) {
    24     delete buffer;
     24    delete []buffer;
    2525    buffer = NULL;
    2626  }
     
    5454  // delete the old buffer
    5555  if (buffer != NULL) {
    56     delete buffer;
     56    delete []buffer;
    5757    buffer = NULL;
    5858  }
     
    131131  flush();
    132132  if (buffer != NULL) {
    133     delete buffer;
     133    delete []buffer;
    134134    buffer = NULL;
    135135  }
  • trunk/mgpp/lib/huffman.cpp

    r3365 r8692  
    5656
    5757  /* Initialise the pointers to the leaves */
    58   for (count = i = 0; i < num; i++)
     58  for (count = i = 0; i < num; ++i)
    5959    if (heap[num + i])
    6060      heap[count++] = num + i;
     
    6262  /* Reorganise the pointers so that it is a heap */
    6363  HNum = count;
    64   for (i = HNum / 2; i > 0; i--)
     64  for (i = HNum / 2; i > 0; --i)
    6565    {
    6666      register int curr, child;
     
    7070    {
    7171      if (child < HNum && heap[heap[child]] < heap[heap[child - 1]])
    72         child++;
     72        ++child;
    7373      if (heap[heap[curr - 1]] > heap[heap[child - 1]])
    7474        {
     
    9191      int pos[2];
    9292
    93       for (i = 0; i < 2; i++)
     93      for (i = 0; i < 2; ++i)
    9494    {
    9595      register int curr, child;
     
    102102          if (child < HNum &&
    103103          heap[heap[child]] < heap[heap[child - 1]])
    104         child++;
     104        ++child;
    105105          if (heap[heap[curr - 1]] > heap[heap[child - 1]])
    106106        {
     
    124124      {
    125125    register int parent, curr;
    126     HNum++;
     126    ++HNum;
    127127    curr = HNum;
    128128    parent = curr >> 1;
     
    143143  heap[0] = -1UL;
    144144  heap[1] = 0;
    145   for (i = 2; i < num * 2; i++)
     145  for (i = 2; i < num * 2; ++i)
    146146    heap[i] = heap[heap[i]] + 1;
    147147
     
    152152
    153153  /* Set the code length of each leaf in the huffman tree */
    154   for (i = 0; i < num; i++)
     154  for (i = 0; i < num; ++i)
    155155    {
    156156      register u_long codelen = heap[i + num];
     
    162162      if (codelen < hd->mincodelen)
    163163    hd->mincodelen = codelen;
    164       hd->lencount[codelen]++;
     164      ++hd->lencount[codelen];
    165165    }
    166166
     
    174174      /* Calculate the current codes for each different code length */
    175175      hd->min_code[hd->maxcodelen] = 0;
    176       for (i = hd->maxcodelen - 1; i>=0; i--)
     176      for (i = hd->maxcodelen - 1; i>=0; --i)
    177177    hd->min_code[i] = (hd->min_code[i + 1] + hd->lencount[i + 1]) >> 1;
    178178    }
    179   delete heap;
     179  delete []heap;
    180180  return (hd);
    181181
    182182error2:
    183   delete heap;
     183  delete []heap;
    184184error1:
    185185  if (!data)
     
    204204    *mem += data->num_codes * sizeof (*codes);
    205205  memcpy (mc, data->min_code, sizeof (mc));
    206   for (i = 0; i < data->num_codes; i++)
     206  for (i = 0; i < data->num_codes; ++i)
    207207    if (data->clens[i])
    208208      codes[i] = mc[(int) (data->clens[i])]++;
     
    226226  if (!(values = new unsigned long *[MAX_HUFFCODE_LEN + 1]))
    227227    {
    228       delete vals;
     228      delete []vals;
    229229      return (NULL);
    230230    }
     
    237237
    238238  fcode[0] = values[0] = &vals[0];
    239   for (i = 1; i <= data->maxcodelen; i++)
     239  for (i = 1; i <= data->maxcodelen; ++i)
    240240    fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
    241241
    242   for (i = 0; i < data->num_codes; i++)
     242  for (i = 0; i < data->num_codes; ++i)
    243243    if (data->clens[i])
    244244      *fcode[(int) (data->clens[i])]++ = i;
     
    256256  if (!Generate_Huffman_Data (num, freqs, &hd, NULL))
    257257    return -1;
    258   for (i = 0; i < num; i++)
     258  for (i = 0; i < num; ++i)
    259259    size += counts[i] * hd.clens[i];
    260   delete hd.clens;
     260  delete []hd.clens;
    261261  return size;
    262262}
     
    290290      /* [RPAP - Jan 97: Endian Ordering] */
    291291      int i;
    292       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     292      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    293293    HTONSI(hd->lencount[i]);
    294       for (i = 0; i < hd->maxcodelen + 1; i++)
     294      for (i = 0; i < hd->maxcodelen + 1; ++i)
    295295    HTONUL(hd->min_code[i]);
    296296
     
    308308
    309309      /* [RPAP - Jan 97: Endian Ordering] */
    310       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     310      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    311311    NTOHSI(hd->lencount[i]);
    312       for (i = 0; i < hd->maxcodelen + 1; i++)
     312      for (i = 0; i < hd->maxcodelen + 1; ++i)
    313313    NTOHUL(hd->min_code[i]);
    314314    }
     
    350350
    351351      /* [RPAP - Jan 97: Endian Ordering] */
    352       for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     352      for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    353353    NTOHSI(hd->lencount[i]);
    354354
     
    363363
    364364      /* [RPAP - Jan 97: Endian Ordering] */
    365       for (i = 0; i < hd->maxcodelen + 1; i++)
     365      for (i = 0; i < hd->maxcodelen + 1; ++i)
    366366    NTOHUL(hd->min_code[i]);
    367367
  • trunk/mgpp/text/GSDLQueryLex.cpp

    r8242 r8692  
    2727              UCArray &text,
    2828              int len) {
     29  if (text.capacity() < text.size() + len + 1) {
     30    text.reserve(text.size() + len + 1);
     31  }
    2932  while (len > 0) {
    3033    text.push_back (*here++);
    31     len--;
     34    --len;
    3235  }
    3336}
     
    238241  //return false;
    239242
    240   UCArray AND; SetCStr (AND, "AND");
    241   if (el.text == AND) {
     243  //UCArray AND; SetCStr (AND, "AND");
     244  //if (el.text == AND) {
     245  if (UCArrayCStrEquals(el.text, "AND")) {
    242246    el.lexType = AndOpE;
    243247    return true;
    244248  }
    245   UCArray OR; SetCStr (OR, "OR");
    246   if (el.text == OR) {
     249  //UCArray OR; SetCStr (OR, "OR");
     250  //if (el.text == OR) {
     251  if (UCArrayCStrEquals(el.text, "OR")) {
    247252    el.lexType = OrOpE;
    248253    return true;
    249254  }
    250   UCArray NOT; SetCStr (NOT, "NOT");
    251   if (el.text == NOT) {
     255  //UCArray NOT; SetCStr (NOT, "NOT");
     256  //if (el.text == NOT) {
     257  if (UCArrayCStrEquals(el.text, "NOT")) {
    252258    el.lexType = NotOpE;
    253259    return true;
    254260  }
    255   UCArray NEAR; SetCStr (NEAR, "NEAR");
     261  UCArray NEAR; SetCStr (NEAR, "NEAR", 4);
    256262  if (PrefixLen(el.text, NEAR)==4) {
    257263    el.lexType = NearOpE;
    258264    return true;
    259265  }
    260   UCArray WITHIN; SetCStr (WITHIN, "WITHIN");
     266  UCArray WITHIN; SetCStr (WITHIN, "WITHIN", 6);
    261267  if (PrefixLen(el.text, WITHIN)==6) {
    262268    el.lexType = WithinOpE;
  • trunk/mgpp/text/GSDLQueryParser.cpp

    r8242 r8692  
    9494                UCArray &nearby,
    9595                bool reverse) {
    96   UCArray NEARBY; SetCStr(NEARBY, "NEAR");
    97   UCArray WITHIN; SetCStr(WITHIN, "WITHIN");
     96  UCArray NEARBY; SetCStr(NEARBY, "NEAR", 4);
     97  UCArray WITHIN; SetCStr(WITHIN, "WITHIN", 6);
    9898 
    9999  if (nearby == NEARBY) { // no modifier
     
    123123    while (here != end) {
    124124      size = size*10 + (*here-'0');
    125       here++;
     125      ++here;
    126126    }
    127127    if (within) {
     
    263263  UCArray near_string;
    264264  while (ParseLexEl (here, end, el)) {
    265     if (el.lexType == TermE || el.lexType == IntegerE) {
     265    // cant have AND, OR, NOT in square brackets, so assume they are words
     266    if (el.lexType == TermE || el.lexType == IntegerE || el.lexType == AndOpE || el.lexType == OrOpE || el.lexType == NotOpE) {
    266267      TermNode termNode;
    267268      termNode.term = el.text;
  • trunk/mgpp/text/IndexData.cpp

    r3365 r8692  
    186186  IvfLevelInfoMap::const_iterator levelHere, levelEnd;
    187187  for (levelHere=levels.levelInfo.begin(), levelEnd=levels.levelInfo.end();
    188        levelHere!=levelEnd && (*levelHere).first != level; levelHere++)
    189     curLevelNum++;
     188       levelHere!=levelEnd && (*levelHere).first != level; ++levelHere)
     189    ++curLevelNum;
    190190
    191191  // make sure we found the level
  • trunk/mgpp/text/MGQuery.cpp

    r3365 r8692  
    108108     rightI < rightResult.docs.size()) {
    109109    if (result.docs[leftI] < rightResult.docs[rightI]) {
    110       leftI++;
     110      ++leftI;
    111111    } else if (result.docs[leftI] > rightResult.docs[rightI]) {
    112       rightI++;
     112      ++rightI;
    113113    } else {
    114114      // the documents are equal
     
    116116      if (haveAccum)
    117117    result.ranks[outI] = result.ranks[leftI] + rightResult.ranks[rightI];
    118       leftI++;
    119       rightI++;
    120       outI++;
     118      ++leftI;
     119      ++rightI;
     120      ++outI;
    121121    }
    122122  }
     
    209209      if (haveAccum)
    210210    result.ranks.push_back (leftResult.ranks[leftI]);
    211       leftI++;
     211      ++leftI;
    212212     
    213213    } else if (leftDocNum > rightDocNum) {
     
    215215      if (haveAccum)
    216216    result.ranks.push_back (rightResult.ranks[rightI]);
    217       rightI++;
     217      ++rightI;
    218218     
    219219    } else { // equal
     
    222222    result.ranks.push_back (leftResult.ranks[leftI] +
    223223                rightResult.ranks[rightI]);
    224       leftI++;
    225       rightI++;
     224      ++leftI;
     225      ++rightI;
    226226    }
    227227  }
     
    307307      if (haveAccum)
    308308    result.ranks[outI] = result.ranks[queryI];
    309       queryI++;
    310       outI++;
     309      ++queryI;
     310      ++outI;
    311311    } else if (result.docs[queryI] > notResult.docs[notI]) {
    312       notI++;
     312      ++notI;
    313313    } else {
    314314      // the documents are equal, ignore both
    315       queryI++;
    316       notI++;
     315      ++queryI;
     316      ++notI;
    317317    }
    318318  }
     
    378378  unsigned long i;
    379379  FragRange thisFrag;
    380   for (i=0; i<tagEl.frag_occur; i++) {
     380  for (i=0; i<tagEl.frag_occur; ++i) {
    381381    // get start
    382382    unsigned long delta = buffer.bblock_decode (B, NULL)-1;
     
    446446    CombineFragData (needFragFreqs, tempFragData1, tempFragData2, fragData);
    447447   
    448     here++;
     448    ++here;
    449449  }
    450450}
     
    521521            result);
    522522 
    523     termHere++;
     523    ++termHere;
    524524
    525525    if (termHere == termEnd) return; // nothing more to do
     
    546546            (*termHere).endRange,
    547547            fragLimitsPtr);
    548     termHere++;
     548    ++termHere;
    549549  }
    550550
     
    572572  while (here != end) {
    573573    (*here).Print (s, indent+2);
    574     here++;
     574    ++here;
    575575  }
    576576}
     
    633633  unsigned long i;
    634634  if (queryInfo.sortByRank || queryInfo.needRankInfo) {
    635     for (i=0; i<result.ranks.size(); i++) {
     635    for (i=0; i<result.ranks.size(); ++i) {
    636636      result.ranks[i] /=
    637637    indexData.weightData.GetLowerApproxDocWeight (result.docs[i]);
     
    668668      indexData.levels.levelInfo[indexData.curLevel].exactWeightsDiskPtr;
    669669   
    670     for (i=0; i<resultsSize; i++) {
     670    for (i=0; i<resultsSize; ++i) {
    671671      result.ranks[i] =  result.ranks[i] *
    672672    indexData.weightData.GetLowerApproxDocWeight (result.docs[i]) /
     
    739739  unsigned long DocNum = 0;
    740740 
    741   for (unsigned long i=0; i<realresult.docs.size(); i++) {
     741  for (unsigned long i=0; i<realresult.docs.size(); ++i) {
    742742
    743743    // do an if ! here????
  • trunk/mgpp/text/QueryLex.cpp

    r3365 r8692  
    2727              UCArray &text,
    2828              int len) {
     29  if (text.capacity() < text.size() + len + 1) {
     30    text.reserve(text.size + len + 1);
     31  }
    2932  while (len > 0) {
    3033    text.push_back (*here++);
    31     len--;
     34    --len;
    3235  }
    3336}
     
    148151  if (!ParseTerm (here, end, el.text)) return false;
    149152
    150   UCArray AND; SetCStr (AND, "AND");
    151   if (el.text == AND) {
     153  //UCArray AND; SetCStr (AND, "AND");
     154  //if (el.text == AND) {
     155  if (UCArrayCStrEquals(el.text, "AND")) { 
    152156    el.lexType = AndOpE;
    153157    return true;
    154158  }
    155   UCArray OR; SetCStr (OR, "OR");
    156   if (el.text == OR) {
     159  //UCArray OR; SetCStr (OR, "OR");
     160  //if (el.text == OR) {
     161  if (UCArrayCStrEquals(el.text, "OR")) {
    157162    el.lexType = OrOpE;
    158163    return true;
    159164  }
    160   UCArray NOT; SetCStr (NOT, "NOT");
    161   if (el.text == NOT) {
     165  //UCArray NOT; SetCStr (NOT, "NOT");
     166  //if (el.text == NOT) {
     167  if (UCArrayCStrEquals(el.text, "NOT")) {
    162168    el.lexType = NotOpE;
    163169    return true;
  • trunk/mgpp/text/QueryTester.cpp

    r3365 r8692  
    4949  DocNumArray &docSet1 = setNode1->queryResult.docs;
    5050  RankArray &rankSet1 = setNode1->queryResult.ranks;
    51   docSet1.push_back (1);  rankSet1.push_back (0.1);
    52   docSet1.push_back (10); rankSet1.push_back (0.2);
    53   docSet1.push_back (15); rankSet1.push_back (0.2);
    54   docSet1.push_back (18); rankSet1.push_back (0.4);
    55   docSet1.push_back (19); rankSet1.push_back (0.5);
     51  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     52  docSet1.push_back (10); rankSet1.push_back (0.2f);
     53  docSet1.push_back (15); rankSet1.push_back (0.2f);
     54  docSet1.push_back (18); rankSet1.push_back (0.4f);
     55  docSet1.push_back (19); rankSet1.push_back (0.5f);
    5656
    5757  SetQueryNode *setNode2 = new SetQueryNode;
    5858  DocNumArray &docSet2 = setNode2->queryResult.docs;
    5959  RankArray &rankSet2 = setNode2->queryResult.ranks;
    60   docSet2.push_back (2);  rankSet2.push_back (0.1);
    61   docSet2.push_back (11); rankSet2.push_back (0.2);
    62   docSet2.push_back (12); rankSet2.push_back (0.3);
    63   docSet2.push_back (13); rankSet2.push_back (0.4);
    64   docSet2.push_back (14); rankSet2.push_back (0.5);
    65   docSet2.push_back (15); rankSet2.push_back (0.6);
    66   docSet2.push_back (16); rankSet2.push_back (0.7);
    67   docSet2.push_back (17); rankSet2.push_back (0.8);
    68   docSet2.push_back (19); rankSet2.push_back (0.9);
    69   docSet2.push_back (20); rankSet2.push_back (0.1);
    70   docSet2.push_back (21); rankSet2.push_back (0.2);
     60  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     61  docSet2.push_back (11); rankSet2.push_back (0.2f);
     62  docSet2.push_back (12); rankSet2.push_back (0.3f);
     63  docSet2.push_back (13); rankSet2.push_back (0.4f);
     64  docSet2.push_back (14); rankSet2.push_back (0.5f);
     65  docSet2.push_back (15); rankSet2.push_back (0.6f);
     66  docSet2.push_back (16); rankSet2.push_back (0.7f);
     67  docSet2.push_back (17); rankSet2.push_back (0.8f);
     68  docSet2.push_back (19); rankSet2.push_back (0.9f);
     69  docSet2.push_back (20); rankSet2.push_back (0.1f);
     70  docSet2.push_back (21); rankSet2.push_back (0.2f);
    7171
    7272  cout << "\n" << setNode1->queryResult << "AND\n\n"
     
    170170  DocNumArray &docSet1 = setNode1->queryResult.docs;
    171171  RankArray &rankSet1 = setNode1->queryResult.ranks;
    172   docSet1.push_back (1);  rankSet1.push_back (0.1);
    173   docSet1.push_back (10); rankSet1.push_back (0.2);
    174   docSet1.push_back (15); rankSet1.push_back (0.2);
    175   docSet1.push_back (18); rankSet1.push_back (0.4);
    176   docSet1.push_back (19); rankSet1.push_back (0.5);
     172  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     173  docSet1.push_back (10); rankSet1.push_back (0.2f);
     174  docSet1.push_back (15); rankSet1.push_back (0.2f);
     175  docSet1.push_back (18); rankSet1.push_back (0.4f);
     176  docSet1.push_back (19); rankSet1.push_back (0.5f);
    177177
    178178  SetQueryNode *setNode2 = new SetQueryNode;
    179179  DocNumArray &docSet2 = setNode2->queryResult.docs;
    180180  RankArray &rankSet2 = setNode2->queryResult.ranks;
    181   docSet2.push_back (2);  rankSet2.push_back (0.1);
    182   docSet2.push_back (11); rankSet2.push_back (0.2);
    183   docSet2.push_back (12); rankSet2.push_back (0.3);
    184   docSet2.push_back (13); rankSet2.push_back (0.4);
    185   docSet2.push_back (14); rankSet2.push_back (0.5);
    186   docSet2.push_back (15); rankSet2.push_back (0.6);
    187   docSet2.push_back (16); rankSet2.push_back (0.7);
    188   docSet2.push_back (17); rankSet2.push_back (0.8);
    189   docSet2.push_back (19); rankSet2.push_back (0.9);
    190   docSet2.push_back (20); rankSet2.push_back (0.1);
    191   docSet2.push_back (21); rankSet2.push_back (0.2);
     181  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     182  docSet2.push_back (11); rankSet2.push_back (0.2f);
     183  docSet2.push_back (12); rankSet2.push_back (0.3f);
     184  docSet2.push_back (13); rankSet2.push_back (0.4f);
     185  docSet2.push_back (14); rankSet2.push_back (0.5f);
     186  docSet2.push_back (15); rankSet2.push_back (0.6f);
     187  docSet2.push_back (16); rankSet2.push_back (0.7f);
     188  docSet2.push_back (17); rankSet2.push_back (0.8f);
     189  docSet2.push_back (19); rankSet2.push_back (0.9f);
     190  docSet2.push_back (20); rankSet2.push_back (0.1f);
     191  docSet2.push_back (21); rankSet2.push_back (0.2f);
    192192
    193193  cout << "\n" << setNode1->queryResult << "OR\n\n"
     
    214214  DocNumArray &rcDocSet = resultCompare.docs;
    215215  RankArray &rcRankSet = resultCompare.ranks;
    216   rcDocSet.push_back (1);  rcRankSet.push_back (0.1);
    217   rcDocSet.push_back (2);  rcRankSet.push_back (0.1);
    218   rcDocSet.push_back (10); rcRankSet.push_back (0.2);
    219   rcDocSet.push_back (11); rcRankSet.push_back (0.2);
    220   rcDocSet.push_back (12); rcRankSet.push_back (0.3);
    221   rcDocSet.push_back (13); rcRankSet.push_back (0.4);
    222   rcDocSet.push_back (14); rcRankSet.push_back (0.5);
    223   rcDocSet.push_back (15); rcRankSet.push_back (0.2+0.6);
    224   rcDocSet.push_back (16); rcRankSet.push_back (0.7);
    225   rcDocSet.push_back (17); rcRankSet.push_back (0.8);
    226   rcDocSet.push_back (18); rcRankSet.push_back (0.4);
    227   rcDocSet.push_back (19); rcRankSet.push_back (0.9+0.5);
    228   rcDocSet.push_back (20); rcRankSet.push_back (0.1);
    229   rcDocSet.push_back (21); rcRankSet.push_back (0.2);
     216  rcDocSet.push_back (1);  rcRankSet.push_back (0.1f);
     217  rcDocSet.push_back (2);  rcRankSet.push_back (0.1f);
     218  rcDocSet.push_back (10); rcRankSet.push_back (0.2f);
     219  rcDocSet.push_back (11); rcRankSet.push_back (0.2f);
     220  rcDocSet.push_back (12); rcRankSet.push_back (0.3f);
     221  rcDocSet.push_back (13); rcRankSet.push_back (0.4f);
     222  rcDocSet.push_back (14); rcRankSet.push_back (0.5f);
     223  rcDocSet.push_back (15); rcRankSet.push_back (0.2f+0.6f);
     224  rcDocSet.push_back (16); rcRankSet.push_back (0.7f);
     225  rcDocSet.push_back (17); rcRankSet.push_back (0.8f);
     226  rcDocSet.push_back (18); rcRankSet.push_back (0.4f);
     227  rcDocSet.push_back (19); rcRankSet.push_back (0.9f+0.5f);
     228  rcDocSet.push_back (20); rcRankSet.push_back (0.1f);
     229  rcDocSet.push_back (21); rcRankSet.push_back (0.2f);
    230230 
    231231
     
    317317  DocNumArray &docSet1 = setNode1->queryResult.docs;
    318318  RankArray &rankSet1 = setNode1->queryResult.ranks;
    319   docSet1.push_back (1);  rankSet1.push_back (0.1);
    320   docSet1.push_back (10); rankSet1.push_back (0.2);
    321   docSet1.push_back (15); rankSet1.push_back (0.2);
    322   docSet1.push_back (18); rankSet1.push_back (0.4);
    323   docSet1.push_back (19); rankSet1.push_back (0.5);
     319  docSet1.push_back (1);  rankSet1.push_back (0.1f);
     320  docSet1.push_back (10); rankSet1.push_back (0.2f);
     321  docSet1.push_back (15); rankSet1.push_back (0.2f);
     322  docSet1.push_back (18); rankSet1.push_back (0.4f);
     323  docSet1.push_back (19); rankSet1.push_back (0.5f);
    324324
    325325  SetQueryNode *setNode2 = new SetQueryNode;
    326326  DocNumArray &docSet2 = setNode2->queryResult.docs;
    327327  RankArray &rankSet2 = setNode2->queryResult.ranks;
    328   docSet2.push_back (2);  rankSet2.push_back (0.1);
    329   docSet2.push_back (11); rankSet2.push_back (0.2);
    330   docSet2.push_back (12); rankSet2.push_back (0.3);
    331   docSet2.push_back (13); rankSet2.push_back (0.4);
    332   docSet2.push_back (14); rankSet2.push_back (0.5);
    333   docSet2.push_back (15); rankSet2.push_back (0.6);
    334   docSet2.push_back (16); rankSet2.push_back (0.7);
    335   docSet2.push_back (17); rankSet2.push_back (0.8);
    336   docSet2.push_back (19); rankSet2.push_back (0.9);
    337   docSet2.push_back (20); rankSet2.push_back (0.1);
    338   docSet2.push_back (21); rankSet2.push_back (0.2);
     328  docSet2.push_back (2);  rankSet2.push_back (0.1f);
     329  docSet2.push_back (11); rankSet2.push_back (0.2f);
     330  docSet2.push_back (12); rankSet2.push_back (0.3f);
     331  docSet2.push_back (13); rankSet2.push_back (0.4f);
     332  docSet2.push_back (14); rankSet2.push_back (0.5f);
     333  docSet2.push_back (15); rankSet2.push_back (0.6f);
     334  docSet2.push_back (16); rankSet2.push_back (0.7f);
     335  docSet2.push_back (17); rankSet2.push_back (0.8f);
     336  docSet2.push_back (19); rankSet2.push_back (0.9f);
     337  docSet2.push_back (20); rankSet2.push_back (0.1f);
     338  docSet2.push_back (21); rankSet2.push_back (0.2f);
    339339
    340340  cout << "\n" << setNode1->queryResult << "NOT\n\n"
  • trunk/mgpp/text/Queryer.cpp

    r6116 r8692  
    111111  // do querying
    112112  QueryInfo queryInfo;
    113   SetCStr (queryInfo.docLevel, "Document");
     113  SetCStr (queryInfo.docLevel, "Document", 8);
    114114  queryInfo.maxDocs = 50;
    115115  queryInfo.sortByRank = true;
     
    125125
    126126  UCArray docLevel;
    127   SetCStr(docLevel, "Document");
     127  SetCStr(docLevel, "Document", 8);
    128128
    129129  UCArray level;
     
    143143    cout << "> ";
    144144    cin.getline(query, 2048, '\n');
    145     SetCStr (queryArray, query);
     145    SetCStr (queryArray, query, strlen(query));
    146146
    147147    // check for commands
     
    155155    cin >> query;
    156156    UCArrayClear(queryInfo.docLevel);
    157     SetCStr(queryInfo.docLevel, query);
     157    SetCStr(queryInfo.docLevel, query, strlen(query));
    158158    cout << "index set to " << queryInfo.docLevel <<"\n";
    159159    cin.getline(query, 2048, '\n');
     
    162162    cin >> query;
    163163    UCArrayClear(level);
    164     SetCStr(level, query);
     164    SetCStr(level, query, strlen(query));
    165165    cout << "level set to " << level <<"\n";
    166166    cin.getline(query, 2048, '\n');
     
    221221    cin>>query;
    222222    UCArrayClear(browseNode.term);
    223     SetCStr(browseNode.term, query);
     223    SetCStr(browseNode.term, query, strlen(query));
    224224    cin.getline(query, 2048, '\n'); // get rest of line
    225225
  • trunk/mgpp/text/TagInfo.cpp

    r3365 r8692  
    2424
    2525void TagInfo::SetDocTag (const char *cStr) {
    26   SetCStr (docTag, cStr);
     26  SetCStr (docTag, cStr, strlen(cStr));
    2727}
    2828
    2929void TagInfo::SetIndexLevel (const char *cStr) {
    30   SetCStr (indexLevel, cStr);
     30  SetCStr (indexLevel, cStr, strlen(cStr));
    3131}
    3232void TagInfo::AddLevelTag (const char *cStr) {
    3333  // convert the string
    3434  UCArray cArr;
    35   SetCStr (cArr, cStr);
     35  SetCStr (cArr, cStr, strlen(cStr));
    3636
    3737  // insert the tag
     
    6464  while (here != end) {
    6565    s << "\"" << (*here) << "\"";
    66     here++;
     66    ++here;
    6767    if (here != end) s << ", ";
    6868  }
  • trunk/mgpp/text/Terms.cpp

    r8242 r8692  
    5151 
    5252  unsigned long i;
    53   for (i=0; i<t.equivTerms.size(); i++) {
     53  for (i=0; i<t.equivTerms.size(); ++i) {
    5454    s << t.equivTerms[i] << ", ";
    5555  }
     
    8383
    8484  s << "termFreqs: ";
    85   for (unsigned long i=0; i<termFreqs.size(); i++)
     85  for (unsigned long i=0; i<termFreqs.size(); ++i)
    8686    s << termFreqs[i] << ", ";
    8787 
     
    9595  s << "docs: ";
    9696  unsigned long i;
    97   for (i=0; i<r.docs.size(); i++)
     97  for (i=0; i<r.docs.size(); ++i)
    9898    s << r.docs[i] << ", ";
    9999 
    100100  s << "\nranks: ";
    101   for (i=0; i<r.ranks.size(); i++)
     101  for (i=0; i<r.ranks.size(); ++i)
    102102    s << r.ranks[i] << ", ";
    103103
    104104  s << "\ntermFreqs: ";
    105   for (i=0; i<r.termFreqs.size(); i++)
     105  for (i=0; i<r.termFreqs.size(); ++i)
    106106    s << r.termFreqs[i] << ", ";
    107107
     
    137137  s << "docs: ";
    138138  unsigned long i;
    139   for (i=0; i<r.docs.size(); i++)
     139  for (i=0; i<r.docs.size(); ++i)
    140140    s << r.docs[i] << ", ";
    141141
    142142  s << "\nlevels: ";
    143   for (i=0; i<r.levels.size(); i++)
     143  for (i=0; i<r.levels.size(); ++i)
    144144    s << r.levels[i] << ", ";
    145145
    146146 
    147147  s << "\nranks: ";
    148   for (i=0; i<r.ranks.size(); i++)
     148  for (i=0; i<r.ranks.size(); ++i)
    149149    s << r.ranks[i] << ", ";
    150150
    151151  s << "\ntermFreqs: ";
    152   for (i=0; i<r.termFreqs.size(); i++)
     152  for (i=0; i<r.termFreqs.size(); ++i)
    153153    s << r.termFreqs[i] << ", ";
    154154  s << "\nactual number of docs found: " << r.actualNumDocs;
     
    182182  s << "terms: ";
    183183  unsigned long i;
    184   for (i=0; i<r.termFreqs.size(); i++)
     184  for (i=0; i<r.termFreqs.size(); ++i)
    185185    s << r.termFreqs[i] << ", ";
    186186    s << "\n\n";
     
    324324  unsigned long fragLimitI = 0;
    325325  unsigned long i;
    326   for (i=0; i<wordDictEl.frag_occur; i++) {
     326  for (i=0; i<wordDictEl.frag_occur; ++i) {
    327327    fragNum += buffer.bblock_decode (B, NULL);
    328328    if (!indexData.ifh.word_level_index) termFreq = buffer.gamma_decode (NULL);
     
    333333      while (fragLimitI+1 < (*fragLimits).size() &&
    334334         fragNum > (*fragLimits)[fragLimitI+1].rangeStart) {
    335     fragLimitI++;
     335    ++fragLimitI;
    336336      }
    337337    }
     
    375375      if (needFragFreqs)
    376376    outFragData.fragFreqs.push_back (f2.fragFreqs[f2I]);
    377       f2I++;
     377      ++f2I;
    378378     
    379379    } else if (f1I < f1Size &&
     
    384384      if (needFragFreqs)
    385385    outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]);
    386       f1I++;
     386      ++f1I;
    387387     
    388388    } else {
     
    391391      if (needFragFreqs)
    392392    outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]+f2.fragFreqs[f2I]);
    393       f1I++;
    394       f2I++;
     393      ++f1I;
     394      ++f2I;
    395395    }
    396396  }
     
    432432      while (fragLimitI+1 < fragLimitSize &&
    433433         comFragNum > (signed long)(*fragLimits)[fragLimitI+1].rangeStart) {
    434     fragLimitI++;
     434    ++fragLimitI;
    435435      }
    436436    }
     
    439439    (fragLimits!=NULL &&
    440440     fragNum<=(signed long)(*fragLimits)[fragLimitI].rangeStart)) {
    441       fragDataI++;
     441      ++fragDataI;
    442442     
    443443    } else if (fragNum > comFragNum+endRange ||
    444444           (fragLimits!=NULL &&
    445445        fragNum>(signed long)(*fragLimits)[fragLimitI].rangeEnd)) {
    446       comFragDataI++;
     446      ++comFragDataI;
    447447     
    448448    } else {
     
    454454      fragData.fragFreqs[fragDataI] : comFragData.fragFreqs[comFragDataI];
    455455      }
    456       fragDataI++;
    457       comFragDataI++;
    458       outI++;
     456      ++fragDataI;
     457      ++comFragDataI;
     458      ++outI;
    459459    }
    460460  }
     
    527527      }
    528528    }
    529     termDataI++;
     529    ++termDataI;
    530530  }
    531531
     
    600600      while (resultI < resultSize &&
    601601         result.docs[resultI] < lastLevelDocNum)
    602         resultI++;
     602        ++resultI;
    603603     
    604604      // store the result
     
    607607        if (needRanks)
    608608          result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
    609         resultI++;
    610         resultOutI++;
     609        ++resultI;
     610        ++resultOutI;
    611611      }
    612612    }
     
    621621    }
    622622   
    623     termDataI++;
     623    ++termDataI;
    624624  } // while
    625625
     
    631631    while (resultI < resultSize &&
    632632       result.docs[resultI] < lastLevelDocNum)
    633       resultI++;
     633      ++resultI;
    634634   
    635635    // store the result
     
    638638      if (needRanks)
    639639    result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
    640       resultI++;
    641       resultOutI++;
     640      ++resultI;
     641      ++resultOutI;
    642642    }
    643643  }
     
    688688      while (resultI < resultSize &&
    689689         result.docs[resultI] < lastLevelDocNum)
    690         resultI++;
     690        ++resultI;
    691691     
    692692      // store the result
     
    695695        if (needRanks)
    696696          result.ranks[resultOutI] = result.ranks[resultI];
    697         resultI++;
    698       resultOutI++;
     697        ++resultI;
     698        ++resultOutI;
    699699      }
    700700    }
     
    704704    }
    705705   
    706     termDataI++;
     706    ++termDataI;
    707707  }
    708708
     
    711711    while (resultI < resultSize &&
    712712       result.docs[resultI] < lastLevelDocNum)
    713       resultI++;
     713      ++resultI;
    714714   
    715715    // store the result
     
    718718      if (needRanks)
    719719    result.ranks[resultOutI] = result.ranks[resultI];
    720       resultI++;
    721       resultOutI++;
     720      ++resultI;
     721      ++resultOutI;
    722722    }
    723723  }
     
    777777    termdata.termFreq = (*here).freq;
    778778    terms.push_back(termdata);
    779     here++;
     779    ++here;
    780780  }
    781781
  • trunk/mgpp/text/TextEl.cpp

    r3365 r8692  
    7979
    8080static void ToggleParaTag (TextEl &el, bool &compatInPara) {
    81   SetCStr (el.tagName, "Paragraph");
     81  SetCStr (el.tagName, "Paragraph", 9);
    8282  el.text.erase (el.text.begin(), el.text.end());
    8383  if (compatInPara) {
     
    9090static void SetRecTag (TextEl &el, TextElType elType) {
    9191  el.elType = elType;
    92   SetCStr (el.tagName, "Document");
     92  SetCStr (el.tagName, "Document", 8);
    9393  el.text.erase (el.text.begin(), el.text.end());
    9494  if (elType == CloseTagE)
  • trunk/mgpp/text/TextGet.cpp

    r3365 r8692  
    2020 **************************************************************************/
    2121
     22// is important to be first, so we escape the truncation warning on VC++
     23#include "TextGet.h"
    2224// need this to avoid bizarre compiler problems under VC++ 6.0
    2325#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
     
    2527#endif
    2628
    27 #include "TextGet.h"
    2829#include "mg_files.h"
    2930#include "netorder.h"
     
    4950  memset (ad, '\0', sizeof (*ad));
    5051
    51   for (i = 0; i <= 1; i++)
     52  for (i = 0; i <= 1; ++i)
    5253    {
    5354      int j;
     
    7778
    7879      pos = ad->word_data[i];
    79       for (j = 0; j < (int)ad->afh[i].num_frags; j++)
     80      for (j = 0; j < (int)ad->afh[i].num_frags; ++j)
    8081    {
    8182      ad->words[i][j] = pos;
     
    9394          ad->blk_end[i][num] = ad->blk_start[i][num] +
    9495        (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
    95           num++;
     96          ++num;
    9697        }
    9798    }
     
    115116  lookback = cd.cdh.lookback;
    116117
    117   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++) {
     118  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i) {
    118119    ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
    119120    mem_reqd += cfh->huff_words_size[i];
     
    135136  values[0] = vals;
    136137  values[0][0] = next_word[0];
    137   for (i = 1; i <= cfh->hd.maxcodelen; i++)
     138  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
    138139    {
    139140      int next_start = (values[i - 1] - vals) +
     
    146147  memset (num_set, '\0', sizeof (num_set));
    147148
    148   for (i = 0; i < cfh->hd.num_codes; i++)
     149  for (i = 0; i < cfh->hd.num_codes; ++i)
    149150    {
    150151      register int val, copy;
     
    175176    }
    176177      memcpy (last_word[len], word, *word + 1);
    177       num_set[len]++;
     178      ++num_set[len];
    178179    }
    179180  if (cfh->hd.clens)
    180     delete cfh->hd.clens;
     181    delete []cfh->hd.clens;
    181182  cfh->hd.clens = NULL;
    182183  return values;
     
    196197    return 3;
    197198  if (hd->clens)
    198     delete hd->clens;
     199    delete []hd->clens;
    199200  hd->clens = NULL;
    200201  if (type == chars)
     
    239240    return false;
    240241
    241   for (which = 0; which < 2; which++)
     242  for (which = 0; which < 2; ++which)
    242243    switch (cd.cdh.dict_type)
    243244      {
     
    353354  fread (fixup, fixup_mem, sizeof (u_char), text_fast_comp_dict);
    354355
    355   for (p = (u_long *) cd; (u_long) p < (u_long) end; p++)
     356  for (p = (u_long *) cd; (u_long) p < (u_long) end; ++p)
    356357    if (IS_FIXUP (p))
    357358      {
     
    364365  NTOHUL(cd->cdh.dict_type);
    365366  NTOHUL(cd->cdh.novel_method);
    366   for (i = 0; i < TEXT_PARAMS; i++)
     367  for (i = 0; i < TEXT_PARAMS; ++i)
    367368    NTOHUL(cd->cdh.params[i]);
    368369  NTOHUL(cd->cdh.num_words[0]);
     
    372373  NTOHUL(cd->cdh.lookback);
    373374  /* cfh */
    374   for (i = 0; i <= 1; i++)
     375  for (i = 0; i <= 1; ++i)
    375376    {
    376377      int j;
     
    379380      NTOHSI(cd->cfh[i]->hd.mincodelen);
    380381      NTOHSI(cd->cfh[i]->hd.maxcodelen);
    381       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     382      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    382383    {
    383384      NTOHSI(cd->cfh[i]->hd.lencount[j]);
     
    385386    }
    386387      NTOHUL(cd->cfh[i]->uncompressed_size);
    387       for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     388      for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    388389    NTOHUL(cd->cfh[i]->huff_words_size[j]);
    389390    }
     
    392393  if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
    393394      cd->cdh.novel_method == MG_NOVEL_HYBRID)
    394     for (i = 0; i <= 1; i++)
     395    for (i = 0; i <= 1; ++i)
    395396      {
    396397    int j;
     
    398399    NTOHUL(cd->ad->afh[i].num_frags);
    399400    NTOHUL(cd->ad->afh[i].mem_for_frags);
    400     for (j = 0; j < 33; j++)
     401    for (j = 0; j < 33; ++j)
    401402      {
    402403        NTOHSI(cd->ad->blk_start[i][j]);
     
    406407  NTOHSI(cd->fast_loaded);
    407408
    408   delete fixup;
     409  delete []fixup;
    409410
    410411  // the whole fast comp dict is a bit of a hack so I don't
     
    584585  unsigned long bits = 0;
    585586
     587  if (docText.capacity() < docText.size() + num_bits + 1) {
     588    docText.reserve(docText.size() + num_bits + 1);
     589  }
    586590  // keep decoding bits until enough bits have been decoded
    587591  while (bits < num_bits) {
     
    616620      len = buffer.huff_decode(cd.lens_huff[which]->min_code,
    617621                   cd.lens_vals[which], &bits);
    618       for (i = 0; i < len; i++) {
     622      for (i = 0; i < len; ++i) {
    619623        c = buffer.huff_decode(cd.chars_huff[which]->min_code,
    620624                   cd.chars_vals[which], &bits);
     
    633637          {
    634638        idx = buffer.delta_decode (&bits);
    635         idx--;
     639        --idx;
    636640          }
    637641          break;
     
    640644        int k;
    641645        k = buffer.gamma_decode (&bits);
    642         k--;
     646        --k;
    643647        idx = buffer.binary_decode(ad->blk_end[which][k] -
    644648                       ad->blk_start[which][k] + 1,
     
    650654      base = ad->words[which][idx];
    651655      len = *base++;
    652       for (; len; len--)
     656      for (; len; --len)
    653657        {
    654658          docText.push_back (*base++);
  • trunk/mgpp/text/UCArray.cpp

    r8242 r8692  
    2929  while (*cStr != '\0') {
    3030    text.push_back (*cStr);
    31     cStr++;
     31    ++cStr;
    3232  }
    3333}
     
    4646}
    4747
    48 char * GetCStr(UCArray text) {
     48char * GetCStr(const UCArray& text) {
    4949
    5050  char *cstr = new char[text.size()+1];
     
    5555  while (here != end) {
    5656    cstr[i] = text[i];
    57     here++; i++;
     57    ++here; ++i;
    5858  }
    5959  cstr[i]='\0';
    6060  return cstr;
     61}
     62
     63bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr)
     64{
     65  if ((cStr == NULL || *cStr == '\0') && text.empty()) return true;
     66  UCArray::const_iterator thisUC = text.begin();
     67  UCArray::const_iterator endUC = text.end();
     68  while (thisUC != endUC && *cStr != '\0') {
     69    if (*thisUC != *cStr) return false;
     70    ++cStr; ++thisUC;
     71  }
     72  if (thisUC == endUC && *cStr == '\0') return true;
     73  return false;
    6174}
    6275
     
    158171    a.push_back (b);
    159172   
    160     arraySize--;
     173    --arraySize;
    161174  }
    162175 
     
    297310
    298311  while (i < l && *a1Here == *a2Here) {
    299     i++; ++a1Here; ++a2Here;
     312    ++i; ++a1Here; ++a2Here;
    300313  }
    301314 
     
    340353    unsigned char c = fgetc (f);
    341354    a.push_back (c);
    342     sufLen--;
    343   }
    344 
    345   return true;
    346 }
    347 
     355    --sufLen;
     356  }
     357
     358  return true;
     359}
     360
  • trunk/mgpp/text/UCArray.h

    r8242 r8692  
    2222#ifndef UCARRAY_H
    2323#define UCARRAY_H
     24
     25#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     26#pragma warning(disable:4786)
     27#endif
    2428
    2529// need this to avoid bizarre compiler problems under VC++ 6.0
     
    5761// same as SetCStr but first tries to allocate nSizeHint space (only if needed)
    5862void SetCStr (UCArray &text, const char *cStr, size_t nSizeHint);
    59 char * GetCStr(UCArray text);
     63char * GetCStr(const UCArray& text);
    6064inline void UCArrayClear (UCArray &a) {
    6165  a.erase (a.begin(), a.end());
    6266}
     67bool UCArrayCStrEquals(const UCArray &text, const unsigned char *cStr);
     68inline bool UCArrayCStrEquals(const UCArray &text, const char *cStr) { return UCArrayCStrEquals(text, (const unsigned char *)cStr); }
    6369
    6470// stream operator to print UCArray
  • trunk/mgpp/text/mg_errors.cpp

    r3365 r8692  
    5151  /* free the current error string, unless it is the null string */
    5252  if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
    53     delete mg_error_data;
     53    delete []mg_error_data;
    5454    mg_error_data = null_data;
    5555  }
     
    6767  /* free the current error string, unless it is the null string */
    6868  if ((mg_error_data != NULL) && (mg_error_data != null_data)) {
    69     delete mg_error_data;
     69    delete []mg_error_data;
    7070    mg_error_data = null_data;
    7171  }
  • trunk/mgpp/text/mg_files.cpp

    r3365 r8692  
    4545  if (basepath)
    4646    {
    47       delete basepath;
     47      delete []basepath;
    4848      basepath = NULL;
    4949    }
  • trunk/mgpp/text/mgpp_compression_dict.cpp

    r3365 r8692  
    233233  NTOHD(csh.num_bytes);
    234234
    235   for (i = 0; i < 2; i++)
     235  for (i = 0; i < 2; ++i)
    236236    {
    237237      frags_stats_header fsh;
     
    253253      wd = Words[i] = (DictWordData *) Xmalloc (sizeof (DictWordData) * Num[i]);
    254254      unsigned int j;
    255       for (j = 0; j < Num[i]; j++, wd++)
     255      for (j = 0; j < Num[i]; ++j, ++wd)
    256256    {
    257257      int len;
     
    306306  dd->chars = 0;
    307307  wd = dd->wd;
    308   for (i = 0; i < dd->num_wds; i++, wd++)
     308  for (i = 0; i < dd->num_wds; ++i, ++wd)
    309309    dd->chars += (*wd)->word[0];
    310310}
     
    317317  Alloc_keep_discard ();
    318318  keep[0].num_wds = Num[0];
    319   for (i = 0; i < Num[0]; i++)
     319  for (i = 0; i < Num[0]; ++i)
    320320    keep[0].wd[i] = Words[0] + i;
    321321  keep[1].num_wds = Num[1];
    322   for (i = 0; i < Num[1]; i++)
     322  for (i = 0; i < Num[1]; ++i)
    323323    keep[1].wd[i] = Words[1] + i;
    324324  SortAndCount_DictData (&keep[0]);
     
    367367  num = Num[0] + Num[1];
    368368  wd = (DictWordData **) Xmalloc (num * sizeof (DictWordData *));
    369   for (i = 0; (unsigned int)i < Num[0]; i++)
     369  for (i = 0; (unsigned int)i < Num[0]; ++i)
    370370    wd[i] = Words[0] + i;
    371   for (i = 0; (unsigned int)i < Num[1]; i++)
     371  for (i = 0; (unsigned int)i < Num[1]; ++i)
    372372    wd[i + Num[0]] = Words[1] + i;
    373373
     
    390390    }
    391391
    392   for (i = 0; i < num; i++)
     392  for (i = 0; i < num; ++i)
    393393    {
    394394      DictWordData *word = wd[i];
     
    438438  memset (char_freqs, '\0', sizeof (char_freqs));
    439439  memset (len_freqs, '\0', sizeof (len_freqs));
    440   for (i = 0; i < num; i++, wd++)
     440  for (i = 0; i < num; ++i, ++wd)
    441441    {
    442442      u_long freq = (*wd)->documents();
     
    446446      len_freqs[idx][len] += freq;
    447447      escape[idx] += freq;
    448       for (; len; len--, buf++)
     448      for (; len; --len, ++buf)
    449449    char_freqs[idx][(u_long) (*buf)] += freq;
    450450    }
     
    471471  int j;
    472472
    473   for (j = 0; j < num; j++, word++)
     473  for (j = 0; j < num; ++j, ++word)
    474474    {
    475475      float   cbc, wbc;
     
    480480
    481481      cbc = len_lens[idx][len];
    482       for (; len; len--, buf++)
     482      for (; len; --len, ++buf)
    483483    cbc += char_lens[idx][(u_long) (*buf)];
    484484
     
    585585
    586586
    587   for (i = 0; (unsigned int)i < Num[0]; i++)
     587  for (i = 0; (unsigned int)i < Num[0]; ++i)
    588588    discard_heap[i] = Words[0] + i;
    589   for (i = 0; (unsigned int)i < Num[1]; i++)
     589  for (i = 0; (unsigned int)i < Num[1]; ++i)
    590590    discard_heap[i + Num[0]] = Words[1] + i;
    591591
     
    600600      keep_heap[keep_num++] = word;
    601601      freqs_trans[KIND (word)] += word->documents();
    602       num_trans++;
     602      ++num_trans;
    603603    }
    604604
     
    663663      heap_build (keep_heap, sizeof (keep_heap), keep_num, SmallSaving);
    664664      recalc_reqd = 0;
    665       recalcs++;
     665      ++recalcs;
    666666    }
    667667    }
     
    669669  Alloc_keep_discard ();
    670670
    671   for (i = 0; i < discard_num; i++)
     671  for (i = 0; i < discard_num; ++i)
    672672    {
    673673      DictWordData *word = discard_heap[i];
     
    676676      discard[idx].wd[discard[idx].num_wds++] = word;
    677677    }
    678   for (i = 0; i < keep_num; i++)
     678  for (i = 0; i < keep_num; ++i)
    679679    {
    680680      DictWordData *word = keep_heap[i];
     
    724724  HTONUL(tmp.dict_type);
    725725  HTONUL(tmp.novel_method);
    726   for (i = 0; i < TEXT_PARAMS; i++)
     726  for (i = 0; i < TEXT_PARAMS; ++i)
    727727    HTONUL(tmp.params[i]);
    728728  HTONUL(tmp.num_words[0]);
     
    741741  unsigned int i;
    742742  u_char *curr, *prev = NULL;
    743   for (i = 0; i < dd->num_wds; i++)
     743  for (i = 0; i < dd->num_wds; ++i)
    744744    {
    745745      int len;
     
    762762{
    763763  unsigned int i, us;
    764   for (us = i = 0; i < dd->num_wds; i++)
     764  for (us = i = 0; i < dd->num_wds; ++i)
    765765    us += dd->wd[i]->word[0];
    766766  return us;
     
    783783    FatalError (1, "Unable to allocate memory for freqs");
    784784
    785   for (i = 0; (unsigned)i < dd->num_wds; i++)
     785  for (i = 0; (unsigned)i < dd->num_wds; ++i)
    786786    {
    787787      freqs[i] = dd->wd[i]->documents();
     
    792792    FatalError (1, "Unable to allocate memory for huffman data");
    793793
    794   delete (freqs);
     794  delete []freqs;
    795795  freqs = NULL;
    796796
     
    813813  mem_reqd = 0;
    814814
    815   for (i = 0; (unsigned)i < dd->num_wds; i++)
     815  for (i = 0; (unsigned)i < dd->num_wds; ++i)
    816816    {
    817817      int codelen = hd->clens[i];
     
    841841      lastword[codelen] = word;
    842842#endif
    843       lencounts[codelen]++;
     843      ++lencounts[codelen];
    844844    }
    845845
    846846  /* [RPAP - Jan 97: Endian Ordering] */
    847   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     847  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    848848    HTONUL(huff_words_size[i]);
    849849
     
    852852
    853853  /* [RPAP - Jan 97: Endian Ordering] */
    854   for (i = hd->mincodelen; i < hd->maxcodelen + 1; i++)
     854  for (i = hd->mincodelen; i < hd->maxcodelen + 1; ++i)
    855855    NTOHUL(huff_words_size[i]);
    856856
    857857  Write_words (f, dd);
    858858
    859   delete hd->clens;
     859  delete []hd->clens;
    860860  delete hd;
    861861
     
    876876  memset (freqs, '\0', sizeof (freqs));
    877877
    878   for (j = 0; j < dd->num_wds; j++, wd++)
     878  for (j = 0; j < dd->num_wds; ++j, ++wd)
    879879    {
    880880      u_char *buf = (*wd)->word;
    881881      int len = *buf++;
    882       for (; len; len--, buf++)
     882      for (; len; --len, ++buf)
    883883    freqs[(u_long) (*buf)] += (*wd)->documents();
    884884    }
    885885
    886886  if (!zero_freq_permitted)
    887     for (j = 0; j < 256; j++)
     887    for (j = 0; j < 256; ++j)
    888888      if (!freqs[j] && PESINAWORD (j) == words)
    889889    freqs[j] = 1;
     
    895895    FatalError (1, "Unable to write huffman data");
    896896
    897   delete hd->clens;
     897  delete []hd->clens;
    898898  delete hd;
    899899}
     
    912912  memset (freqs, '\0', sizeof (freqs));
    913913
    914   for (j = 0; j < dd->num_wds; j++, wd++)
     914  for (j = 0; j < dd->num_wds; ++j, ++wd)
    915915    freqs[(*wd)->word[0]] += (*wd)->documents();
    916916
    917917  if (!zero_freq_permitted)
    918     for (j = 0; j < 16; j++)
     918    for (j = 0; j < 16; ++j)
    919919      if (!freqs[j])
    920920    freqs[j] = 1;
     
    927927
    928928
    929   delete hd->clens;
     929  delete []hd->clens;
    930930  delete hd;
    931931}
     
    958958  Write_cdh (f, &cdh);
    959959
    960   for (i = 0; i < 2; i++)
     960  for (i = 0; i < 2; ++i)
    961961    switch (type)
    962962      {
     
    975975          esc.word = (u_char *) "";
    976976          keep[i].wd[keep[i].num_wds++] = &esc;
    977           for (j = 0; (unsigned)j < discard[i].num_wds; j++)
     977          for (j = 0; (unsigned)j < discard[i].num_wds; ++j)
    978978        esc.docCount += discard[i].wd[j]->documents();
    979979          if (!esc.docCount)
    980         esc.docCount++;
     980        ++esc.docCount;
    981981          mem_reqd += Write_data (f, &keep[i], lookback);
    982982        }
     
    994994          esc.word = (u_char *) "";
    995995          keep[i].wd[keep[i].num_wds++] = &esc;
    996           for (j = 0; (unsigned)j < all[i].num_wds; j++)
     996          for (j = 0; (unsigned)j < all[i].num_wds; ++j)
    997997        if (all[i].wd[j]->documents() == 1)
    998           esc.docCount++;
     998          ++esc.docCount;
    999999          if (!esc.docCount)
    1000         esc.docCount++;
     1000        ++esc.docCount;
    10011001          mem_reqd += Write_data (f, &keep[i], lookback);
    10021002        }
  • trunk/mgpp/text/mgpp_decompress_text.cpp

    r3365 r8692  
    4141  char *basePath = "";
    4242  UCArray level;
    43   SetCStr (level, "Document");
     43  SetCStr (level, "Document", 8);
    4444
    4545  opterr = 0;
     
    5757      break;
    5858    case 'K':
    59       SetCStr (level, optarg);
     59      SetCStr (level, optarg, strlen(optarg));
    6060      break;
    6161    case 'h':
     
    9292    cout << docText << "\n";
    9393   
    94     docNum++;
     94    ++docNum;
    9595  }
    9696 
  • trunk/mgpp/text/mgpp_fast_comp_dict.cpp

    r3365 r8692  
    6060#define FIXUP_VALS(vals) do {                       \
    6161    int i;                              \
    62     for (i=0; i < MAX_HUFFCODE_LEN+1; i++)              \
     62    for (i=0; i < MAX_HUFFCODE_LEN+1; ++i)              \
    6363      FIXUP(&vals[i]);                      \
    6464      } while(0)
     
    128128
    129129    /* cfh */
    130     for (which = 0; which <= 1; which++)
     130    for (which = 0; which <= 1; ++which)
    131131      {
    132132    int j;
     
    135135    HTONSI(cd->cfh[which]->hd.mincodelen);
    136136    HTONSI(cd->cfh[which]->hd.maxcodelen);
    137     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     137    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    138138      {
    139139        HTONSI(cd->cfh[which]->hd.lencount[j]);
     
    141141      }
    142142    HTONUL(cd->cfh[which]->uncompressed_size);
    143     for (j = 0; j < MAX_HUFFCODE_LEN + 1; j++)
     143    for (j = 0; j < MAX_HUFFCODE_LEN + 1; ++j)
    144144      HTONUL(cd->cfh[which]->huff_words_size[j]);
    145145      }
     
    148148    if (cd->cdh.novel_method == MG_NOVEL_DELTA ||
    149149    cd->cdh.novel_method == MG_NOVEL_HYBRID)
    150       for (which = 0; which <= 1; which++)
     150      for (which = 0; which <= 1; ++which)
    151151    {
    152152      int j;
     
    154154      HTONUL(cd->ad->afh[which].num_frags);
    155155      HTONUL(cd->ad->afh[which].mem_for_frags);
    156       for (j = 0; j < 33; j++)
     156      for (j = 0; j < 33; ++j)
    157157        {
    158158          HTONSI(cd->ad->blk_start[which][j]);
     
    163163    HTONUL(cd->cdh.dict_type);
    164164    HTONUL(cd->cdh.novel_method);
    165     for (i = 0; i < TEXT_PARAMS; i++)
     165    for (i = 0; i < TEXT_PARAMS; ++i)
    166166      HTONUL(cd->cdh.params[which]);
    167167    HTONUL(cd->cdh.num_words[0]);
     
    186186{
    187187  u_long *p;
    188   for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
     188  for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
    189189    {
    190190      if (IS_FIXUP (p))
     
    206206           MAGIC_AUX_DICT, MG_ABORT);  /* [RPAP - Feb 97: WIN32 Port] */
    207207
    208   for (i = 0; i <= 1; i++)
     208  for (i = 0; i <= 1; ++i)
    209209    {
    210210      aux_frags_header afh;
     
    234234  lookback = cdh->lookback;
    235235
    236   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
     236  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
    237237    {
    238238      ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
     
    244244  mem += mem_reqd;
    245245
    246   for (i = 0; i < cfh->hd.num_codes; i++)
     246  for (i = 0; i < cfh->hd.num_codes; ++i)
    247247    {
    248248      register int val;
    249       for (val = getc (dict) & 0xf; val; val--)
     249      for (val = getc (dict) & 0xf; val; --val)
    250250    getc (dict);
    251251    }
     
    259259  Read_Huffman_Data (dict, &hd, NULL, NULL);
    260260  if (hd.clens)
    261     delete hd.clens;
     261    delete []hd.clens;
    262262  mem += hd.num_codes * sizeof (unsigned long);
    263263  mem += (MAX_HUFFCODE_LEN + 1) * sizeof (unsigned long *);
     
    275275  mem += mem_for_words (dict, cdh, cfh);
    276276  if (cfh->hd.clens)
    277     delete cfh->hd.clens;
     277    delete []cfh->hd.clens;
    278278
    279279  return mem;
     
    298298  NTOHUL(cdh.dict_type);
    299299  NTOHUL(cdh.novel_method);
    300   for (i = 0; i < TEXT_PARAMS; i++)
     300  for (i = 0; i < TEXT_PARAMS; ++i)
    301301    NTOHUL(cdh.params[i]);
    302302  NTOHUL(cdh.num_words[0]);
     
    306306  NTOHUL(cdh.lookback);
    307307
    308   for (which = 0; which < 2; which++)
     308  for (which = 0; which < 2; ++which)
    309309    switch (cdh.dict_type)
    310310      {
     
    393393  ad = (auxiliary_dict *) getmem (sizeof (auxiliary_dict), sizeof (u_char *));
    394394
    395   for (i = 0; i <= 1; i++)
     395  for (i = 0; i <= 1; ++i)
    396396    {
    397397      unsigned int j;
     
    414414
    415415      pos = ad->word_data[i];
    416       for (j = 0; j < ad->afh[i].num_frags; j++)
     416      for (j = 0; j < ad->afh[i].num_frags; ++j)
    417417    {
    418418      ad->words[i][j] = pos;
     
    431431          ad->blk_end[i][num] = ad->blk_start[i][num] +
    432432        (ad->blk_end[i][num - 1] - ad->blk_start[i][num - 1]) * 2;
    433           num++;
     433          ++num;
    434434        }
    435435    }
     
    459459  lookback = cd->cdh.lookback;
    460460
    461   for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; i++)
     461  for (i = cfh->hd.mincodelen; i <= cfh->hd.maxcodelen; ++i)
    462462    {
    463463      ptrs_reqd += (cfh->hd.lencount[i] + ((1 << lookback) - 1)) >> lookback;
     
    479479  values[0][0] = next_word[0];
    480480  FIXUP (&values[0][0]);
    481   for (i = 1; i <= cfh->hd.maxcodelen; i++)
     481  for (i = 1; i <= cfh->hd.maxcodelen; ++i)
    482482    {
    483483      int next_start = (values[i - 1] - vals) +
     
    492492  memset (num_set, '\0', sizeof (num_set));
    493493
    494   for (i = 0; i < cfh->hd.num_codes; i++)
     494  for (i = 0; i < cfh->hd.num_codes; ++i)
    495495    {
    496496      register int val, copy;
     
    528528    }
    529529      memcpy (last_word[len], word, *word + 1);
    530       num_set[len]++;
     530      ++num_set[len];
    531531    }
    532532  if (cfh->hd.clens)
    533     delete cfh->hd.clens;
     533    delete []cfh->hd.clens;
    534534  cfh->hd.clens = NULL;
    535535  return values;
     
    559559  fcode[0] = values[0] = &vals[0];
    560560  FIXUP (&values[0]);
    561   for (i = 1; i <= data->maxcodelen; i++)
     561  for (i = 1; i <= data->maxcodelen; ++i)
    562562    {
    563563      fcode[i] = values[i] = &vals[(values[i - 1] - vals) + data->lencount[i - 1]];
     
    565565    }
    566566
    567   for (i = 0; i < data->num_codes; i++)
     567  for (i = 0; i < data->num_codes; ++i)
    568568    if (data->clens[i])
    569569      *fcode[(int) (data->clens[i])]++ = i;
     
    586586  FIXUP (&cd->chars_vals[which]);
    587587  if (hd->clens)
    588     delete hd->clens;
     588    delete []hd->clens;
    589589  hd->clens = NULL;
    590590}
     
    623623  Read_cdh (dict, &cd->cdh, NULL, NULL);
    624624
    625   for (which = 0; which < 2; which++)
     625  for (which = 0; which < 2; ++which)
    626626    switch (cd->cdh.dict_type)
    627627      {
     
    683683  {
    684684    u_long *p;
    685     for (p = (u_long *) buffer; (u_long) p < (u_long) cur; p++)
     685    for (p = (u_long *) buffer; (u_long) p < (u_long) cur; ++p)
    686686      {
    687687    if (IS_FIXUP (p))
  • trunk/mgpp/text/mgpp_passes.cpp

    r3365 r8692  
    2323#define _XOPEN_SOURCE 1
    2424#define _XOPEN_SOURCE_EXTENDED 1
     25
     26#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     27#pragma warning(disable:4786)
     28#endif
    2529
    2630// need this to avoid bizarre compiler problems under VC++ 6.0
  • trunk/mgpp/text/text.pass2.cpp

    r3365 r8692  
    2121 **************************************************************************/
    2222
     23#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
     24#pragma warning(disable:4786)
     25#endif
     26
    2327// need this to avoid bizarre compiler problems under VC++ 6.0
    2428#if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)
     
    135139  int i;
    136140  if (cdh.novel_method != MG_NOVEL_HUFFMAN_CHARS)
    137     for (i = 0; i <= 1; i++)
     141    for (i = 0; i <= 1; ++i)
    138142      {
    139143    nht[i].HashSize = INITIAL_HASH_SIZE;
     
    158162        blk_end[i][num] = blk_start[i][num] +
    159163          (blk_end[i][num - 1] - blk_start[i][num - 1]) * 2;
    160         num++;
     164        ++num;
    161165          }
    162166      }
     
    176180    int res;
    177181   
    178     if (which) numWords++;
     182    if (which) ++numWords;
    179183
    180184    /* First parse a word or non-word out of the string */
     
    202206    s2 = *wptr;
    203207    len = *s1 + 1;
    204     for (; len; len--)
     208    for (; len; --len)
    205209      if (*s1++ != *s2++) break;
    206210   
     
    234238          }
    235239        buffer.huff_encode (Word[0], lens_codes[which], lens_huff[which].clens, NULL);
    236         for (i = 0; i < Word[0]; i++)
     240        for (i = 0; i < Word[0]; ++i)
    237241          buffer.huff_encode (Word[i + 1], char_codes[which],
    238242                  char_huff[which].clens, NULL);
     
    252256          buffer.huff_encode (Word[0], lens_codes[which],
    253257                      lens_huff[which].clens, NULL);
    254           for (i = 0; i < Word[0]; i++)
     258          for (i = 0; i < Word[0]; ++i)
    255259            buffer.huff_encode (Word[i + 1], char_codes[which],
    256260                    char_huff[which].clens, NULL);
     
    280284              h->pool->ptr += len;
    281285              h->pool->left -= len;
    282               h->HashUsed++;
     286              ++h->HashUsed;
    283287              break;
    284288            }
     
    287291              s2 = ent->word;
    288292              len = *s1 + 1;
    289               for (; len; len--)
     293              for (; len; --len)
    290294            if (*s1++ != *s2++)
    291295              break;
     
    306310            int j = ent->ordinal_num - 1;
    307311            while (j > blk_end[which][k])
    308               k++;
     312              ++k;
    309313            assert (j - blk_start[which][k] + 1 >= 1 &&
    310314                j - blk_start[which][k] + 1 <=
     
    331335              memset (ht, '\0', sizeof (novel_hash_rec) * size);
    332336             
    333               for (i = 0; i < h->HashSize; i++)
     337              for (i = 0; i < h->HashSize; ++i)
    334338            if (h->HashTable[i].word)
    335339              {
     
    409413   
    410414    cth.num_of_bytes += (*here).text.size();
    411     here++;
     415    ++here;
    412416  }
    413417
     
    417421  while (tiHere != tiEnd) {
    418422    if ((*tiHere).second.inDoc) (*tiHere).second.SetEnd (endPos, endBit);
    419     tiHere++;
     423    ++tiHere;
    420424  }
    421425 
    422426  // we've processed one more document
    423   cth.num_of_docs++;
     427  ++cth.num_of_docs;
    424428
    425429  return COMPALLOK;
     
    434438    return COMPERROR;
    435439 
    436   for (i = 0; i <= 1; i++)
     440  for (i = 0; i <= 1; ++i)
    437441    {
    438442      aux_frags_header afh;
     
    461465  int i;
    462466  u_long aux_compressed = 0, total_uncomp = 0;
    463   for (i = 0; i <= 1; i++)
     467  for (i = 0; i <= 1; ++i)
    464468    {
    465469      int j;
     
    475479        {
    476480          int len = *buf++;
    477           lens[len]++;
     481          ++lens[len];
    478482          total_uncomp += len + 4;
    479           for (; len; len--)
    480         chars[*buf++]++;
     483          for (; len; --len)
     484        ++chars[*buf++];
    481485        }
    482486    }
    483       for (j = 0; j < 256; j++)
     487      for (j = 0; j < 256; ++j)
    484488    if (!chars[j] && PESINAWORD (j) == i)
    485489      fchars[j] = 1;
    486490    else
    487491      fchars[j] = chars[j];
    488       for (j = 0; j < 16; j++)
     492      for (j = 0; j < 16; ++j)
    489493    if (!lens[j])
    490494      flens[j] = 1;
     
    522526      return false;
    523527   
    524     tiHere++;
     528    ++tiHere;
    525529  }
    526530
  • trunk/mgpp/text/words.cpp

    r3365 r8692  
    3737}
    3838
     39/* It determines whether a given place in a UTF-8 encoded Unicode string is a unicode space. */
     40int isaspace (const u_char *here, const u_char *end)
     41{
     42  unsigned short c;
     43  if (parse_utf8_char(here, end, &c) > 0) return is_unicode_space(c);
     44  return 0;
     45}
     46
     47/* Return a the UTF-8 encoded Unicode string with begining
     48   unicode spaces skippend. */
     49u_char *skipspace(u_char *here, u_char *end)
     50{
     51  unsigned short c;
     52  int length;
     53  while(here != end) {
     54    length = parse_utf8_char(here, end, &c);
     55    if (length == 0 || !is_unicode_space(c)) break;
     56    here += length;
     57  }
     58  return here;
     59}
     60 
    3961const unsigned char *ParseIndexWord (const unsigned char *textHere,
    4062                     const unsigned char *textEnd,
     
    5375                   ++numeric <= MAXNUMERIC))) {
    5476    while (charlength-- > 0) {
    55       word.push_back (*textHere++); length++;
     77      word.push_back (*textHere++); ++length;
    5678    }
    5779    charlength = parse_utf8_char (textHere, textEnd, &c);
  • trunk/mgpp/text/words.h

    r3365 r8692  
    7676
    7777#ifdef __cplusplus
    78 extern "C"
     78extern "C" {
    7979#endif
    8080int inaword (const u_char *here, const u_char *end);
     
    8383       is part of a word. */
    8484
     85int isaspace (const u_char *here, const u_char *end);
     86        /* It determines whether a given place in a UTF-8 encoded Unicode string  is a unicode space. */
     87
     88u_char *skipspace(u_char *here, u_char *end);
     89        /* Return a the UTF-8 encoded Unicode string with begining unicode spaces skippend. */
     90
     91#ifdef __cplusplus
     92}
     93#endif
    8594
    8695const unsigned char *ParseIndexWord (const unsigned char *textHere,
Note: See TracChangeset for help on using the changeset viewer.