/************************************************************************** * * stemmer.cpp -- The stemmer/case folder * Copyright (C) 1994 Neil Sharman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #include "sysfuncs.h" #include "stemmer.h" #include "lovinstem.h" #include "simplefrenchstem.h" #include "unitool.h" #ifdef ENABLE_ACCENTFOLD /* [JFG - Mar 06: Accent folding patch] */ #include "unac.h" #endif #define LOVINSTEMMER 0 #define SIMPLEFRENCHSTEMMER 1 /* decode the utf-8 encoded unicode, casefold and then recode * making sure the final length doesn't exceed the original * length */ static void mgpp_unicode_casefold (u_char *word) { unsigned short out[256]; /* temp space */ int i; int len; /* decode */ utf8_word_to_unicode (word, out, 255); len = out[0]; /* casefold and simplify-fold */ for (i=0; i