source: indexers/trunk/mgpp/text/stemmer.h@ 17930

Last change on this file since 17930 was 17930, checked in by mdewsnip, 15 years ago

Removed comma after last item in enum list (caused problems with some compilers).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.8 KB
Line 
1/**************************************************************************
2 *
3 * stemmer.h -- The stemmer/case folder
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#ifndef STEMMER_H
23#define STEMMER_H
24
25/* [RPAP - Jan 97: Stem Index Change] */
26/* [JFG - Mar 06: Accent folding patch] */
27enum stemMethods {
28 STEM_None = 0,
29 STEM_CaseFolding = 0x1,
30 STEM_Stemming = 0x2,
31 STEM_AccentFolding = 0x4
32};
33
34/* This is for the QueryParser */
35#define CHAR_FLAG_STEM_CaseFold 'i' // ignore case
36#define CHAR_FLAG_STEM_NoCaseFold 'c' // case sensitive
37#define CHAR_FLAG_STEM_Stemming 's' // stem words
38#define CHAR_FLAG_STEM_NoStemming 'u' // do not stem words
39#define CHAR_FLAG_STEM_AccentFold 'f' // accent fold
40#define CHAR_FLAG_STEM_NoAccentFold 'a' // do no accent folding
41#define CHAR_FLAG_STEM_Validator "icsufa" // all of the above
42
43
44#define STEM_MIN 1
45#define STEM_MAX (STEM_CaseFolding | STEM_Stemming | STEM_AccentFolding)
46#define STEM_PARTIAL_MATCH (STEM_MAX+1)
47#define STEM_INVALID (STEM_MAX+2)
48
49#define STEMMER_MASK 3
50#define MAX_STEM_DESCRIPTION_LEN 16
51
52/* stemmernumber will return the stemmer for
53 * a description of the stemmer. Stemmer descriptions
54 * are not case sensitive. Valid descriptions are:
55 *
56 * 'English'
57 * 'Lovin'
58 * 'French'
59 * 'SimpleFrench'
60 *
61 * More than one description might result in the same
62 * stemmer number (for example, for stemming 'English'
63 * we currently use the 'Lovin' stemmer).
64 *
65 * stemmerdescription is a normal C, null-terminated,
66 * string.
67 *
68 * stemmernumber will return -1 if it doesn't know the
69 * stemmer description.
70 */
71int mgpp_stemmernumber (unsigned char *stemmerdescription);
72
73/*
74 * Method 0 - Do not stem or case fold.
75 * Method 1 - Case fold.
76 * Method 2 - Stem.
77 * Method 3 - Case fold and stem.
78 * Method 4 - Accent fold
79 * Method 5 - Case fold and accent fold
80 * Method 6 - Stem and accent fold
81 * Method 7 - Case fold, stem and accent fold
82 * The stemmer number should be obtained using function
83 * stemmernumber above.
84 */
85#ifdef __cplusplus
86extern "C"
87#endif
88void mgpp_stemmer (int method, int stemmer, unsigned char * word);
89
90#endif
Note: See TracBrowser for help on using the repository browser.