source: trunk/gsdl/packages/kea/kea-3.0/Stemmer.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 1.7 KB
Line 
1/*
2 * Stemmer.java
3 * Copyright (C) 2001 Eibe Frank
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20import java.util.*;
21import java.io.*;
22
23/**
24 * Abstract class for stemmers.
25 *
26 * @author Eibe Frank ([email protected])
27 * @version 1.0
28 */
29public abstract class Stemmer implements Serializable {
30
31 /**
32 * Iterated stemming of the given word.
33 */
34 public abstract String stem(String str);
35
36 /**
37 * Stems everything in the given string.
38 */
39 public String stemString(String str) {
40
41 StringBuffer result = new StringBuffer();
42 int start = -1;
43 for (int j = 0; j < str.length(); j++) {
44 char c = str.charAt(j);
45 if (Character.isLetterOrDigit(c)) {
46 if (start == -1) {
47 start = j;
48 }
49 } else if (c == '\'') {
50 if (start == -1) {
51 result.append(c);
52 }
53 } else {
54 if (start != -1) {
55 result.append(stem(str.substring(start, j)));
56 start = -1;
57 }
58 result.append(c);
59 }
60 }
61 if (start != -1) {
62 result.append(stem(str.substring(start, str.length())));
63 }
64 return result.toString();
65 }
66}
67
68
Note: See TracBrowser for help on using the repository browser.