/************************************************************************** * * stemmer.h -- The stemmer/case folder * Copyright (C) 1994 Neil Sharman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #ifndef STEMMER_H #define STEMMER_H /* [RPAP - Jan 97: Stem Index Change] */ /* [JFG - Mar 06: Accent folding patch] */ enum stemMethods { STEM_None = 0, STEM_CaseFolding = 0x1, STEM_Stemming = 0x2, STEM_AccentFolding = 0x4, }; /* This is for the QueryParser */ #define CHAR_FLAG_STEM_CaseFold 'i' // ignore case #define CHAR_FLAG_STEM_NoCaseFold 'c' // case sensitive #define CHAR_FLAG_STEM_Stemming 's' // stem words #define CHAR_FLAG_STEM_NoStemming 'u' // do not stem words #define CHAR_FLAG_STEM_AccentFold 'f' // accent fold #define CHAR_FLAG_STEM_NoAccentFold 'a' // do no accent folding #define CHAR_FLAG_STEM_Validator "icsufa" // all of the above #define STEM_MIN 1 #define STEM_MAX (STEM_CaseFolding | STEM_Stemming | STEM_AccentFolding) #define STEM_PARTIAL_MATCH (STEM_MAX+1) #define STEM_INVALID (STEM_MAX+2) #define STEMMER_MASK 3 #define MAX_STEM_DESCRIPTION_LEN 16 /* stemmernumber will return the stemmer for * a description of the stemmer. Stemmer descriptions * are not case sensitive. Valid descriptions are: * * 'English' * 'Lovin' * 'French' * 'SimpleFrench' * * More than one description might result in the same * stemmer number (for example, for stemming 'English' * we currently use the 'Lovin' stemmer). * * stemmerdescription is a normal C, null-terminated, * string. * * stemmernumber will return -1 if it doesn't know the * stemmer description. */ int mgpp_stemmernumber (unsigned char *stemmerdescription); /* * Method 0 - Do not stem or case fold. * Method 1 - Case fold. * Method 2 - Stem. * Method 3 - Case fold and stem. * Method 4 - Accent fold * Method 5 - Case fold and accent fold * Method 6 - Stem and accent fold * Method 7 - Case fold, stem and accent fold * The stemmer number should be obtained using function * stemmernumber above. */ #ifdef __cplusplus extern "C" #endif void mgpp_stemmer (int method, int stemmer, unsigned char * word); #endif