source: trunk/gsdl/packages/kea/kea-3.0/StopwordsGerman.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.4 KB
Line 
1/*
2 * StopwordsGerman.java
3 * Copyright (C) 2001 Eibe Frank
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20import java.util.*;
21
22/**
23 * Class that can test whether a given string is a stop word.
24 * Lowercases all words before the test.
25 *
26 * This list of German stop words has been obtained from
27 * http://snowball.tartarus.org/german/stop.txt
28 *
29 * But I have deleted/changed some words that I haven't seen before.
30 *
31 * @author Eibe Frank ([email protected])
32 * @version 1.0
33 */
34public class StopwordsGerman extends Stopwords {
35
36 /** The hashtable containing the list of stopwords */
37 private static Hashtable m_Stopwords = null;
38
39 static {
40
41 if (m_Stopwords == null) {
42 m_Stopwords = new Hashtable();
43 Double dummy = new Double(0);
44
45 m_Stopwords.put("aber", dummy);
46
47 m_Stopwords.put("alle", dummy);
48 m_Stopwords.put("allem", dummy);
49 m_Stopwords.put("allen", dummy);
50 m_Stopwords.put("aller", dummy);
51 m_Stopwords.put("alles", dummy);
52
53 m_Stopwords.put("als", dummy);
54 m_Stopwords.put("also", dummy);
55 m_Stopwords.put("am", dummy);
56 m_Stopwords.put("an", dummy);
57
58 m_Stopwords.put("ander", dummy);
59 m_Stopwords.put("andere", dummy);
60 m_Stopwords.put("anderem", dummy);
61 m_Stopwords.put("anderen", dummy);
62 m_Stopwords.put("anderer", dummy);
63 m_Stopwords.put("anderes", dummy);
64 m_Stopwords.put("anderm", dummy);
65 m_Stopwords.put("andern", dummy);
66 m_Stopwords.put("anders", dummy);
67
68 m_Stopwords.put("auch", dummy);
69 m_Stopwords.put("auf", dummy);
70 m_Stopwords.put("aus", dummy);
71 m_Stopwords.put("bei", dummy);
72 m_Stopwords.put("bin", dummy);
73 m_Stopwords.put("bis", dummy);
74 m_Stopwords.put("bist", dummy);
75 m_Stopwords.put("da", dummy);
76 m_Stopwords.put("damit", dummy);
77 m_Stopwords.put("dann", dummy);
78
79 m_Stopwords.put("der", dummy);
80 m_Stopwords.put("den", dummy);
81 m_Stopwords.put("des", dummy);
82 m_Stopwords.put("dem", dummy);
83 m_Stopwords.put("die", dummy);
84 m_Stopwords.put("das", dummy);
85
86 m_Stopwords.put("da\u00df", dummy);
87
88 m_Stopwords.put("derselbe", dummy);
89 m_Stopwords.put("derselben", dummy);
90 m_Stopwords.put("denselben", dummy);
91 m_Stopwords.put("desselben", dummy);
92 m_Stopwords.put("demselben", dummy);
93 m_Stopwords.put("dieselbe", dummy);
94 m_Stopwords.put("dieselben", dummy);
95 m_Stopwords.put("dasselbe", dummy);
96
97 m_Stopwords.put("dazu", dummy);
98
99 m_Stopwords.put("dein", dummy);
100 m_Stopwords.put("deine", dummy);
101 m_Stopwords.put("deinem", dummy);
102 m_Stopwords.put("deinen", dummy);
103 m_Stopwords.put("deiner", dummy);
104 m_Stopwords.put("deines", dummy);
105
106 m_Stopwords.put("denn", dummy);
107
108 m_Stopwords.put("derer", dummy);
109 m_Stopwords.put("dessen", dummy);
110
111 m_Stopwords.put("dich", dummy);
112 m_Stopwords.put("dir", dummy);
113 m_Stopwords.put("du", dummy);
114
115 m_Stopwords.put("dies", dummy);
116 m_Stopwords.put("diese", dummy);
117 m_Stopwords.put("diesem", dummy);
118 m_Stopwords.put("diesen", dummy);
119 m_Stopwords.put("dieser", dummy);
120 m_Stopwords.put("dieses", dummy);
121
122 m_Stopwords.put("doch", dummy);
123 m_Stopwords.put("dort", dummy);
124
125 m_Stopwords.put("durch", dummy);
126
127 m_Stopwords.put("ein", dummy);
128 m_Stopwords.put("eine", dummy);
129 m_Stopwords.put("einem", dummy);
130 m_Stopwords.put("einen", dummy);
131 m_Stopwords.put("einer", dummy);
132 m_Stopwords.put("eines", dummy);
133
134 m_Stopwords.put("einig", dummy);
135 m_Stopwords.put("einige", dummy);
136 m_Stopwords.put("einigem", dummy);
137 m_Stopwords.put("einigen", dummy);
138 m_Stopwords.put("einiger", dummy);
139 m_Stopwords.put("einiges", dummy);
140
141 m_Stopwords.put("einmal", dummy);
142
143 m_Stopwords.put("er", dummy);
144 m_Stopwords.put("ihn", dummy);
145 m_Stopwords.put("ihm", dummy);
146
147 m_Stopwords.put("es", dummy);
148 m_Stopwords.put("etwas", dummy);
149
150 m_Stopwords.put("euer", dummy);
151 m_Stopwords.put("eure", dummy);
152 m_Stopwords.put("eurem", dummy);
153 m_Stopwords.put("euren", dummy);
154 m_Stopwords.put("eurer", dummy);
155 m_Stopwords.put("eures", dummy);
156
157 m_Stopwords.put("f\u00fcr", dummy);
158 m_Stopwords.put("gegen", dummy);
159 m_Stopwords.put("gewesen", dummy);
160 m_Stopwords.put("hab", dummy);
161 m_Stopwords.put("habe", dummy);
162 m_Stopwords.put("haben", dummy);
163 m_Stopwords.put("hat", dummy);
164 m_Stopwords.put("hatte", dummy);
165 m_Stopwords.put("hatten", dummy);
166 m_Stopwords.put("hier", dummy);
167 m_Stopwords.put("hin", dummy);
168 m_Stopwords.put("hinter", dummy);
169
170 m_Stopwords.put("ich", dummy);
171 m_Stopwords.put("mich", dummy);
172 m_Stopwords.put("mir", dummy);
173
174 m_Stopwords.put("ihr", dummy);
175 m_Stopwords.put("ihre", dummy);
176 m_Stopwords.put("ihrem", dummy);
177 m_Stopwords.put("ihren", dummy);
178 m_Stopwords.put("ihrer", dummy);
179 m_Stopwords.put("ihres", dummy);
180 m_Stopwords.put("euch", dummy);
181
182 m_Stopwords.put("im", dummy);
183 m_Stopwords.put("in", dummy);
184 m_Stopwords.put("indem", dummy);
185 m_Stopwords.put("ins", dummy);
186 m_Stopwords.put("ist", dummy);
187
188 m_Stopwords.put("jede", dummy);
189 m_Stopwords.put("jedem", dummy);
190 m_Stopwords.put("jeden", dummy);
191 m_Stopwords.put("jeder", dummy);
192 m_Stopwords.put("jedes", dummy);
193
194 m_Stopwords.put("jene", dummy);
195 m_Stopwords.put("jenem", dummy);
196 m_Stopwords.put("jenen", dummy);
197 m_Stopwords.put("jener", dummy);
198 m_Stopwords.put("jenes", dummy);
199
200 m_Stopwords.put("jetzt", dummy);
201 m_Stopwords.put("kann", dummy);
202
203 m_Stopwords.put("kein", dummy);
204 m_Stopwords.put("keine", dummy);
205 m_Stopwords.put("keinem", dummy);
206 m_Stopwords.put("keinen", dummy);
207 m_Stopwords.put("keiner", dummy);
208 m_Stopwords.put("keines", dummy);
209
210 m_Stopwords.put("k\u00f6nnen", dummy);
211 m_Stopwords.put("k\u00f6nnte", dummy);
212 m_Stopwords.put("machen", dummy);
213 m_Stopwords.put("man", dummy);
214
215 m_Stopwords.put("manche", dummy);
216 m_Stopwords.put("manchem", dummy);
217 m_Stopwords.put("manchen", dummy);
218 m_Stopwords.put("mancher", dummy);
219 m_Stopwords.put("manches", dummy);
220
221 m_Stopwords.put("mein", dummy);
222 m_Stopwords.put("meine", dummy);
223 m_Stopwords.put("meinem", dummy);
224 m_Stopwords.put("meinen", dummy);
225 m_Stopwords.put("meiner", dummy);
226 m_Stopwords.put("meines", dummy);
227
228 m_Stopwords.put("mit", dummy);
229 m_Stopwords.put("muss", dummy);
230 m_Stopwords.put("musste", dummy);
231 m_Stopwords.put("nach", dummy);
232 m_Stopwords.put("nicht", dummy);
233 m_Stopwords.put("nichts", dummy);
234 m_Stopwords.put("noch", dummy);
235 m_Stopwords.put("nun", dummy);
236 m_Stopwords.put("nur", dummy);
237 m_Stopwords.put("ob", dummy);
238 m_Stopwords.put("oder", dummy);
239 m_Stopwords.put("ohne", dummy);
240 m_Stopwords.put("sehr", dummy);
241
242 m_Stopwords.put("sein", dummy);
243 m_Stopwords.put("seine", dummy);
244 m_Stopwords.put("seinem", dummy);
245 m_Stopwords.put("seinen", dummy);
246 m_Stopwords.put("seiner", dummy);
247 m_Stopwords.put("seines", dummy);
248
249 m_Stopwords.put("selbst", dummy);
250 m_Stopwords.put("sich", dummy);
251
252 m_Stopwords.put("sie", dummy);
253 m_Stopwords.put("ihnen", dummy);
254
255 m_Stopwords.put("sind", dummy);
256 m_Stopwords.put("so", dummy);
257
258 m_Stopwords.put("solche", dummy);
259 m_Stopwords.put("solchem", dummy);
260 m_Stopwords.put("solchen", dummy);
261 m_Stopwords.put("solcher", dummy);
262 m_Stopwords.put("solches", dummy);
263
264 m_Stopwords.put("soll", dummy);
265 m_Stopwords.put("sollte", dummy);
266 m_Stopwords.put("sondern", dummy);
267 m_Stopwords.put("sonst", dummy);
268 m_Stopwords.put("\00fcber", dummy);
269 m_Stopwords.put("um", dummy);
270 m_Stopwords.put("und", dummy);
271
272 m_Stopwords.put("uns", dummy);
273 m_Stopwords.put("unser", dummy);
274 m_Stopwords.put("unserem", dummy);
275 m_Stopwords.put("unseren", dummy);
276 m_Stopwords.put("unsere", dummy);
277 m_Stopwords.put("unseres", dummy);
278
279 m_Stopwords.put("unter", dummy);
280 m_Stopwords.put("viel", dummy);
281 m_Stopwords.put("vom", dummy);
282 m_Stopwords.put("von", dummy);
283 m_Stopwords.put("vor", dummy);
284 m_Stopwords.put("w\u00e4hrend", dummy);
285 m_Stopwords.put("war", dummy);
286 m_Stopwords.put("waren", dummy);
287 m_Stopwords.put("warst", dummy);
288 m_Stopwords.put("was", dummy);
289 m_Stopwords.put("weg", dummy);
290 m_Stopwords.put("weil", dummy);
291 m_Stopwords.put("weiter", dummy);
292
293 m_Stopwords.put("welche", dummy);
294 m_Stopwords.put("welchem", dummy);
295 m_Stopwords.put("welchen", dummy);
296 m_Stopwords.put("welcher", dummy);
297 m_Stopwords.put("welches", dummy);
298
299 m_Stopwords.put("wenn", dummy);
300 m_Stopwords.put("werde", dummy);
301 m_Stopwords.put("werden", dummy);
302 m_Stopwords.put("wie", dummy);
303 m_Stopwords.put("wieder", dummy);
304 m_Stopwords.put("will", dummy);
305 m_Stopwords.put("wir", dummy);
306 m_Stopwords.put("wird", dummy);
307 m_Stopwords.put("wirst", dummy);
308 m_Stopwords.put("wo", dummy);
309 m_Stopwords.put("wollen", dummy);
310 m_Stopwords.put("wollte", dummy);
311 m_Stopwords.put("w\u00fcrde", dummy);
312 m_Stopwords.put("w\u00fcrden", dummy);
313 m_Stopwords.put("zu", dummy);
314 m_Stopwords.put("zum", dummy);
315 m_Stopwords.put("zur", dummy);
316 m_Stopwords.put("zwar", dummy);
317 m_Stopwords.put("zwischen", dummy);
318 }
319 }
320
321 /**
322 * Returns true if the given string is a stop word.
323 */
324 public boolean isStopword(String str) {
325
326 return m_Stopwords.containsKey(str.toLowerCase());
327 }
328}
329
330
Note: See TracBrowser for help on using the repository browser.