Ignore:
Timestamp:
2006-12-11T11:22:20+13:00 (17 years ago)
Author:
shaoqun
Message:

added code for accentfolding

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/mgpp/text/stemmer.h

    r3365 r13477  
    2323#define STEMMER_H
    2424
    25 #include "sysfuncs.h"
     25/* [RPAP - Jan 97: Stem Index Change] */
     26/* [JFG - Mar 06: Accent folding patch] */
     27enum stemMethods {
     28    STEM_None       =   0,
     29    STEM_CaseFolding    =   0x1,
     30    STEM_Stemming       =   0x2,
     31    STEM_AccentFolding  =   0x4,
     32};
     33
     34/* This is for the QueryParser */
     35#define CHAR_FLAG_STEM_CaseFold    'i'  // ignore case
     36#define CHAR_FLAG_STEM_NoCaseFold  'c'  // case sensitive
     37#define CHAR_FLAG_STEM_Stemming    's'  // stem words
     38#define CHAR_FLAG_STEM_NoStemming  'u'  // do not stem words
     39#define CHAR_FLAG_STEM_AccentFold  'f'  // accent fold
     40#define CHAR_FLAG_STEM_NoAccentFold  'a'    // do no accent folding
     41#define CHAR_FLAG_STEM_Validator  "icsufa"  // all of the above
     42
     43
     44#define STEM_MIN 1
     45#define STEM_MAX (STEM_CaseFolding | STEM_Stemming | STEM_AccentFolding)
     46#define STEM_PARTIAL_MATCH  (STEM_MAX+1)
     47#define STEM_INVALID        (STEM_MAX+2)
    2648
    2749#define STEMMER_MASK 3
     
    4769 * stemmer description.
    4870 */
    49 int stemmernumber (u_char *stemmerdescription);
     71int mgpp_stemmernumber (unsigned char *stemmerdescription);
    5072
    5173/*
     
    5476 * Method 2 - Stem.
    5577 * Method 3 - Case fold and stem.
    56  *
     78 * Method 4 - Accent fold
     79 * Method 5 - Case fold and accent fold
     80 * Method 6 - Stem and accent fold
     81 * Method 7 - Case fold, stem and accent fold
    5782 * The stemmer number should be obtained using function
    5883 * stemmernumber above.
     
    6186extern "C"
    6287#endif
    63 void stemmer (int method, int stemmer, u_char * word);
     88void mgpp_stemmer (int method, int stemmer, unsigned char * word);
    6489
    6590#endif
Note: See TracChangeset for help on using the changeset viewer.