Changeset 12887


Ignore:
Timestamp:
2006-09-28T11:03:42+12:00 (18 years ago)
Author:
kjdon
Message:

Accent folding patch thanks to Juan Grigera. addedd in support for extra stem methods

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/Terms.cpp

    r8691 r12887  
    210210              vector<unsigned long> &equivWords) {
    211211  equivWords.erase (equivWords.begin(), equivWords.end());
    212  
    213   if (stemMethod == 0 || stemMethod==4 || stemMethod==5) {
     212
     213  /* [JFG - Mar 06: Accent folding patch] */
     214  /* use flag PARTIAL_MATCH */ 
     215  if (stemMethod == 0 || (stemMethod & STEM_PARTIAL_MATCH)) {
    214216    // don't need to stem the word,
    215217    // find the word number(s) for this term
     
    218220    word_block_dict_el wordDictEl;
    219221    wordDictEl.SetNumLevels (numLevels);
    220     if (stemMethod ==0) {
     222    if (stemMethod == 0) {
    221223      if (SearchWordBlockDictEl (indexData.dictFile, indexData.biWords,
    222224                 indexData.bdh.entries_per_wblk,
     
    228230    } else {
    229231      // partial matching,
    230       PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords, (stemMethod==5?true:false) );
     232      PartialMatchSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, indexData.bdh.entries_per_wblk, indexData.bdh.word_dict_size, numLevels, term, wordDictEl, equivWords,
     233            (stemMethod & STEM_CaseFolding)? true : false);
     234            // TODO: Accent Folding is not handled here!!
    231235      return;
    232236    }
     
    234238             
    235239  // need to stem this word and find it in the blocked stem index
    236  
    237240  unsigned char  mgWord[MAXSTEMLEN + 1];
    238241  UCArray stemTerm;
    239242  unsigned long stemmerNum = 0;
    240   if (stemMethod == 1) stemmerNum = indexData.sih1.stemmer_num;
    241   else if (stemMethod == 2) stemmerNum = indexData.sih2.stemmer_num;
    242   else if (stemMethod == 3) stemmerNum = indexData.sih3.stemmer_num;
    243    
     243
     244  /* [JFG - Mar 06: Accent folding patch] */
     245  if(stemMethod > STEM_MAX) {
     246    return;
     247    //TODO: throw an error here
     248  }
     249  stemmerNum = indexData.sih[stemMethod-1].stemmer_num;
     250 
    244251  // convert the word to an "mg word"
    245252  mgWord[0] = term.size();
     
    247254 
    248255  // stem the word
    249   stemmer (stemMethod, stemmerNum, mgWord);
    250 
     256  mgpp_stemmer (stemMethod, stemmerNum, mgWord);
    251257  // convert the result back to a UCArray
    252258  stemTerm.insert (stemTerm.end(), &mgWord[1], &mgWord[1] + mgWord[0]);
     
    256262  unsigned long stemElNum;
    257263  bool result = false;
    258   if (stemMethod == 1) {
    259     result = SearchStemBlockDictEl (indexData.stem1File,
    260                indexData.sii1,
    261                indexData.sih1.entries_per_block,
    262                indexData.sih1.dict_size,
     264 
     265  /* [JFG - Mar 06: Accent folding patch] */
     266  result = SearchStemBlockDictEl (indexData.stemFile[stemMethod-1],
     267               indexData.sii[stemMethod-1],
     268               indexData.sih[stemMethod-1].entries_per_block,
     269               indexData.sih[stemMethod-1].dict_size,
    263270               stemTerm,
    264271               stemDictEl,
    265272               stemElNum);
    266 
    267   } else if (stemMethod == 2) {
    268     result = SearchStemBlockDictEl (indexData.stem2File,
    269                indexData.sii2,
    270                indexData.sih2.entries_per_block,
    271                indexData.sih2.dict_size,
    272                stemTerm,
    273                stemDictEl,
    274                stemElNum);
    275 
    276   } else if (stemMethod == 3) {
    277     result = SearchStemBlockDictEl (indexData.stem3File,
    278                indexData.sii3,
    279                indexData.sih3.entries_per_block,
    280                indexData.sih3.dict_size,
    281                stemTerm,
    282                stemDictEl,
    283                stemElNum);
    284   }
    285 
     273 
    286274  if (result) {
    287275    equivWords = stemDictEl.equivWords; 
Note: See TracChangeset for help on using the changeset viewer.