Changeset 12884


Ignore:
Timestamp:
2006-09-28T10:44:01+12:00 (18 years ago)
Author:
kjdon
Message:

Accent folding patch thanks to Juan Grigera. parsing of stem/case/accent term modifiers now uses defines from mg_files.h

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/mgpp/text/GSDLQueryParser.cpp

    r12313 r12884  
    141141
    142142static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) {
    143   // here expect el to contain some of c,s,i,u
    144   // stem method 0 = c,u  00
    145   // stem method 1 = i,u  01  - default for DL
    146   // stem method 2 = c, s  10
    147   // stem method 3 = i,s  11
    148 
     143  // here expect el to contain some of c,s,i,u,f,a -- see mg_files.h CHAR_FLAG_STEM_* constants
    149144  unsigned long stem = (unsigned long)defaultStemMethod;
    150145     
     
    152147  UCArray::const_iterator end = el.text.end();
    153148
    154   unsigned char c1 = *here;
    155   if (!(c1 == 'c'|| c1 == 'i' || c1 == 'u' || c1 == 's'))
    156     return 4; // incorrect format
    157  
    158   ++here;
    159   unsigned char c2 = 'a';
    160   if (here !=end) {
    161     c2 = *here;
    162     if (!(c2 == 'c'|| c2 == 'i' || c2 == 'u' || c2 == 's'))
    163       return 4; // incorrect format
    164   }
    165  
    166   if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1
    167   if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0
    168   if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1
    169   if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0
     149  /* [JFG - Mar 06: Accent folding patch] */
     150  /* Changed to use CHAR_FLAG_STEM* constants from mg_files.h */
     151  while(here != end) {
     152    unsigned char ch = *here;
     153    if (strchr (CHAR_FLAG_STEM_Validator, ch) == NULL)
     154        return STEM_INVALID; // incorrect format
     155
     156    switch(ch) {
     157        case CHAR_FLAG_STEM_CaseFold:       // ignore case (fold)
     158            stem |= STEM_CaseFolding;
     159            break;
     160        case CHAR_FLAG_STEM_NoCaseFold:     // case sensitive
     161            stem &= (~STEM_CaseFolding);
     162            break;
     163        case CHAR_FLAG_STEM_Stemming:       // stem words
     164            stem |= STEM_Stemming;
     165            break;
     166        case CHAR_FLAG_STEM_NoStemming:     // do not stem words
     167            stem &= (~STEM_Stemming);
     168            break;
     169        case CHAR_FLAG_STEM_AccentFold:     // accent fold
     170            stem |= STEM_AccentFolding;
     171            break;
     172        case CHAR_FLAG_STEM_NoAccentFold:   // do no accent folding
     173            stem &= (~STEM_AccentFolding);
     174            break;
     175    };
     176
     177    ++here;     
     178  }
    170179  return stem;
    171180}
     
    190199      if (ParseLexEl (here, end, stem) && stem.lexType == TermE) {
    191200    termNode.stemMethod = GetStemMethod(stem, defaultStemMethod);
    192     if (termNode.stemMethod == 4) { // error so backtrack
     201    /* [JFG - Mar 06: Accent folding patch] */
     202    /* use STEM_INVALID instead of hardcoded 4 */
     203    if (termNode.stemMethod == STEM_INVALID) { // error so backtrack
    193204      here = oldHere;
    194205      termNode.stemMethod = (unsigned long)defaultStemMethod;
     
    211222   
    212223    if (partial_match) {
    213       termNode.stemMethod |= 4; // set bit 2 to 1
    214       termNode.stemMethod &=0xd; // set bit 1 to 0 // we dont have stemming on if doing partial matching.
     224      /* [JFG - Mar 06: Accent folding patch] */
     225      /* use STEM_PARTIAL_MATCH flag */
     226      termNode.stemMethod |= STEM_PARTIAL_MATCH;    // set partial match flag
     227      termNode.stemMethod &= (~STEM_Stemming);      // we dont have stemming on if doing partial matching.
    215228    }
    216229    oldHere = here;
Note: See TracChangeset for help on using the changeset viewer.