1 | /*
|
---|
2 | * StopwordsGerman.java
|
---|
3 | * Copyright (C) 2001 Eibe Frank
|
---|
4 | *
|
---|
5 | * This program is free software; you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
18 | */
|
---|
19 |
|
---|
20 | import java.util.*;
|
---|
21 |
|
---|
22 | /**
|
---|
23 | * Class that can test whether a given string is a stop word.
|
---|
24 | * Lowercases all words before the test.
|
---|
25 | *
|
---|
26 | * This list of German stop words has been obtained from
|
---|
27 | * http://snowball.tartarus.org/german/stop.txt
|
---|
28 | *
|
---|
29 | * But I have deleted/changed some words that I haven't seen before.
|
---|
30 | *
|
---|
31 | * @author Eibe Frank ([email protected])
|
---|
32 | * @version 1.0
|
---|
33 | */
|
---|
34 | public class StopwordsGerman extends Stopwords {
|
---|
35 |
|
---|
36 | /** The hashtable containing the list of stopwords */
|
---|
37 | private static Hashtable m_Stopwords = null;
|
---|
38 |
|
---|
39 | static {
|
---|
40 |
|
---|
41 | if (m_Stopwords == null) {
|
---|
42 | m_Stopwords = new Hashtable();
|
---|
43 | Double dummy = new Double(0);
|
---|
44 |
|
---|
45 | m_Stopwords.put("aber", dummy);
|
---|
46 |
|
---|
47 | m_Stopwords.put("alle", dummy);
|
---|
48 | m_Stopwords.put("allem", dummy);
|
---|
49 | m_Stopwords.put("allen", dummy);
|
---|
50 | m_Stopwords.put("aller", dummy);
|
---|
51 | m_Stopwords.put("alles", dummy);
|
---|
52 |
|
---|
53 | m_Stopwords.put("als", dummy);
|
---|
54 | m_Stopwords.put("also", dummy);
|
---|
55 | m_Stopwords.put("am", dummy);
|
---|
56 | m_Stopwords.put("an", dummy);
|
---|
57 |
|
---|
58 | m_Stopwords.put("ander", dummy);
|
---|
59 | m_Stopwords.put("andere", dummy);
|
---|
60 | m_Stopwords.put("anderem", dummy);
|
---|
61 | m_Stopwords.put("anderen", dummy);
|
---|
62 | m_Stopwords.put("anderer", dummy);
|
---|
63 | m_Stopwords.put("anderes", dummy);
|
---|
64 | m_Stopwords.put("anderm", dummy);
|
---|
65 | m_Stopwords.put("andern", dummy);
|
---|
66 | m_Stopwords.put("anders", dummy);
|
---|
67 |
|
---|
68 | m_Stopwords.put("auch", dummy);
|
---|
69 | m_Stopwords.put("auf", dummy);
|
---|
70 | m_Stopwords.put("aus", dummy);
|
---|
71 | m_Stopwords.put("bei", dummy);
|
---|
72 | m_Stopwords.put("bin", dummy);
|
---|
73 | m_Stopwords.put("bis", dummy);
|
---|
74 | m_Stopwords.put("bist", dummy);
|
---|
75 | m_Stopwords.put("da", dummy);
|
---|
76 | m_Stopwords.put("damit", dummy);
|
---|
77 | m_Stopwords.put("dann", dummy);
|
---|
78 |
|
---|
79 | m_Stopwords.put("der", dummy);
|
---|
80 | m_Stopwords.put("den", dummy);
|
---|
81 | m_Stopwords.put("des", dummy);
|
---|
82 | m_Stopwords.put("dem", dummy);
|
---|
83 | m_Stopwords.put("die", dummy);
|
---|
84 | m_Stopwords.put("das", dummy);
|
---|
85 |
|
---|
86 | m_Stopwords.put("da\u00df", dummy);
|
---|
87 |
|
---|
88 | m_Stopwords.put("derselbe", dummy);
|
---|
89 | m_Stopwords.put("derselben", dummy);
|
---|
90 | m_Stopwords.put("denselben", dummy);
|
---|
91 | m_Stopwords.put("desselben", dummy);
|
---|
92 | m_Stopwords.put("demselben", dummy);
|
---|
93 | m_Stopwords.put("dieselbe", dummy);
|
---|
94 | m_Stopwords.put("dieselben", dummy);
|
---|
95 | m_Stopwords.put("dasselbe", dummy);
|
---|
96 |
|
---|
97 | m_Stopwords.put("dazu", dummy);
|
---|
98 |
|
---|
99 | m_Stopwords.put("dein", dummy);
|
---|
100 | m_Stopwords.put("deine", dummy);
|
---|
101 | m_Stopwords.put("deinem", dummy);
|
---|
102 | m_Stopwords.put("deinen", dummy);
|
---|
103 | m_Stopwords.put("deiner", dummy);
|
---|
104 | m_Stopwords.put("deines", dummy);
|
---|
105 |
|
---|
106 | m_Stopwords.put("denn", dummy);
|
---|
107 |
|
---|
108 | m_Stopwords.put("derer", dummy);
|
---|
109 | m_Stopwords.put("dessen", dummy);
|
---|
110 |
|
---|
111 | m_Stopwords.put("dich", dummy);
|
---|
112 | m_Stopwords.put("dir", dummy);
|
---|
113 | m_Stopwords.put("du", dummy);
|
---|
114 |
|
---|
115 | m_Stopwords.put("dies", dummy);
|
---|
116 | m_Stopwords.put("diese", dummy);
|
---|
117 | m_Stopwords.put("diesem", dummy);
|
---|
118 | m_Stopwords.put("diesen", dummy);
|
---|
119 | m_Stopwords.put("dieser", dummy);
|
---|
120 | m_Stopwords.put("dieses", dummy);
|
---|
121 |
|
---|
122 | m_Stopwords.put("doch", dummy);
|
---|
123 | m_Stopwords.put("dort", dummy);
|
---|
124 |
|
---|
125 | m_Stopwords.put("durch", dummy);
|
---|
126 |
|
---|
127 | m_Stopwords.put("ein", dummy);
|
---|
128 | m_Stopwords.put("eine", dummy);
|
---|
129 | m_Stopwords.put("einem", dummy);
|
---|
130 | m_Stopwords.put("einen", dummy);
|
---|
131 | m_Stopwords.put("einer", dummy);
|
---|
132 | m_Stopwords.put("eines", dummy);
|
---|
133 |
|
---|
134 | m_Stopwords.put("einig", dummy);
|
---|
135 | m_Stopwords.put("einige", dummy);
|
---|
136 | m_Stopwords.put("einigem", dummy);
|
---|
137 | m_Stopwords.put("einigen", dummy);
|
---|
138 | m_Stopwords.put("einiger", dummy);
|
---|
139 | m_Stopwords.put("einiges", dummy);
|
---|
140 |
|
---|
141 | m_Stopwords.put("einmal", dummy);
|
---|
142 |
|
---|
143 | m_Stopwords.put("er", dummy);
|
---|
144 | m_Stopwords.put("ihn", dummy);
|
---|
145 | m_Stopwords.put("ihm", dummy);
|
---|
146 |
|
---|
147 | m_Stopwords.put("es", dummy);
|
---|
148 | m_Stopwords.put("etwas", dummy);
|
---|
149 |
|
---|
150 | m_Stopwords.put("euer", dummy);
|
---|
151 | m_Stopwords.put("eure", dummy);
|
---|
152 | m_Stopwords.put("eurem", dummy);
|
---|
153 | m_Stopwords.put("euren", dummy);
|
---|
154 | m_Stopwords.put("eurer", dummy);
|
---|
155 | m_Stopwords.put("eures", dummy);
|
---|
156 |
|
---|
157 | m_Stopwords.put("f\u00fcr", dummy);
|
---|
158 | m_Stopwords.put("gegen", dummy);
|
---|
159 | m_Stopwords.put("gewesen", dummy);
|
---|
160 | m_Stopwords.put("hab", dummy);
|
---|
161 | m_Stopwords.put("habe", dummy);
|
---|
162 | m_Stopwords.put("haben", dummy);
|
---|
163 | m_Stopwords.put("hat", dummy);
|
---|
164 | m_Stopwords.put("hatte", dummy);
|
---|
165 | m_Stopwords.put("hatten", dummy);
|
---|
166 | m_Stopwords.put("hier", dummy);
|
---|
167 | m_Stopwords.put("hin", dummy);
|
---|
168 | m_Stopwords.put("hinter", dummy);
|
---|
169 |
|
---|
170 | m_Stopwords.put("ich", dummy);
|
---|
171 | m_Stopwords.put("mich", dummy);
|
---|
172 | m_Stopwords.put("mir", dummy);
|
---|
173 |
|
---|
174 | m_Stopwords.put("ihr", dummy);
|
---|
175 | m_Stopwords.put("ihre", dummy);
|
---|
176 | m_Stopwords.put("ihrem", dummy);
|
---|
177 | m_Stopwords.put("ihren", dummy);
|
---|
178 | m_Stopwords.put("ihrer", dummy);
|
---|
179 | m_Stopwords.put("ihres", dummy);
|
---|
180 | m_Stopwords.put("euch", dummy);
|
---|
181 |
|
---|
182 | m_Stopwords.put("im", dummy);
|
---|
183 | m_Stopwords.put("in", dummy);
|
---|
184 | m_Stopwords.put("indem", dummy);
|
---|
185 | m_Stopwords.put("ins", dummy);
|
---|
186 | m_Stopwords.put("ist", dummy);
|
---|
187 |
|
---|
188 | m_Stopwords.put("jede", dummy);
|
---|
189 | m_Stopwords.put("jedem", dummy);
|
---|
190 | m_Stopwords.put("jeden", dummy);
|
---|
191 | m_Stopwords.put("jeder", dummy);
|
---|
192 | m_Stopwords.put("jedes", dummy);
|
---|
193 |
|
---|
194 | m_Stopwords.put("jene", dummy);
|
---|
195 | m_Stopwords.put("jenem", dummy);
|
---|
196 | m_Stopwords.put("jenen", dummy);
|
---|
197 | m_Stopwords.put("jener", dummy);
|
---|
198 | m_Stopwords.put("jenes", dummy);
|
---|
199 |
|
---|
200 | m_Stopwords.put("jetzt", dummy);
|
---|
201 | m_Stopwords.put("kann", dummy);
|
---|
202 |
|
---|
203 | m_Stopwords.put("kein", dummy);
|
---|
204 | m_Stopwords.put("keine", dummy);
|
---|
205 | m_Stopwords.put("keinem", dummy);
|
---|
206 | m_Stopwords.put("keinen", dummy);
|
---|
207 | m_Stopwords.put("keiner", dummy);
|
---|
208 | m_Stopwords.put("keines", dummy);
|
---|
209 |
|
---|
210 | m_Stopwords.put("k\u00f6nnen", dummy);
|
---|
211 | m_Stopwords.put("k\u00f6nnte", dummy);
|
---|
212 | m_Stopwords.put("machen", dummy);
|
---|
213 | m_Stopwords.put("man", dummy);
|
---|
214 |
|
---|
215 | m_Stopwords.put("manche", dummy);
|
---|
216 | m_Stopwords.put("manchem", dummy);
|
---|
217 | m_Stopwords.put("manchen", dummy);
|
---|
218 | m_Stopwords.put("mancher", dummy);
|
---|
219 | m_Stopwords.put("manches", dummy);
|
---|
220 |
|
---|
221 | m_Stopwords.put("mein", dummy);
|
---|
222 | m_Stopwords.put("meine", dummy);
|
---|
223 | m_Stopwords.put("meinem", dummy);
|
---|
224 | m_Stopwords.put("meinen", dummy);
|
---|
225 | m_Stopwords.put("meiner", dummy);
|
---|
226 | m_Stopwords.put("meines", dummy);
|
---|
227 |
|
---|
228 | m_Stopwords.put("mit", dummy);
|
---|
229 | m_Stopwords.put("muss", dummy);
|
---|
230 | m_Stopwords.put("musste", dummy);
|
---|
231 | m_Stopwords.put("nach", dummy);
|
---|
232 | m_Stopwords.put("nicht", dummy);
|
---|
233 | m_Stopwords.put("nichts", dummy);
|
---|
234 | m_Stopwords.put("noch", dummy);
|
---|
235 | m_Stopwords.put("nun", dummy);
|
---|
236 | m_Stopwords.put("nur", dummy);
|
---|
237 | m_Stopwords.put("ob", dummy);
|
---|
238 | m_Stopwords.put("oder", dummy);
|
---|
239 | m_Stopwords.put("ohne", dummy);
|
---|
240 | m_Stopwords.put("sehr", dummy);
|
---|
241 |
|
---|
242 | m_Stopwords.put("sein", dummy);
|
---|
243 | m_Stopwords.put("seine", dummy);
|
---|
244 | m_Stopwords.put("seinem", dummy);
|
---|
245 | m_Stopwords.put("seinen", dummy);
|
---|
246 | m_Stopwords.put("seiner", dummy);
|
---|
247 | m_Stopwords.put("seines", dummy);
|
---|
248 |
|
---|
249 | m_Stopwords.put("selbst", dummy);
|
---|
250 | m_Stopwords.put("sich", dummy);
|
---|
251 |
|
---|
252 | m_Stopwords.put("sie", dummy);
|
---|
253 | m_Stopwords.put("ihnen", dummy);
|
---|
254 |
|
---|
255 | m_Stopwords.put("sind", dummy);
|
---|
256 | m_Stopwords.put("so", dummy);
|
---|
257 |
|
---|
258 | m_Stopwords.put("solche", dummy);
|
---|
259 | m_Stopwords.put("solchem", dummy);
|
---|
260 | m_Stopwords.put("solchen", dummy);
|
---|
261 | m_Stopwords.put("solcher", dummy);
|
---|
262 | m_Stopwords.put("solches", dummy);
|
---|
263 |
|
---|
264 | m_Stopwords.put("soll", dummy);
|
---|
265 | m_Stopwords.put("sollte", dummy);
|
---|
266 | m_Stopwords.put("sondern", dummy);
|
---|
267 | m_Stopwords.put("sonst", dummy);
|
---|
268 | m_Stopwords.put("\00fcber", dummy);
|
---|
269 | m_Stopwords.put("um", dummy);
|
---|
270 | m_Stopwords.put("und", dummy);
|
---|
271 |
|
---|
272 | m_Stopwords.put("uns", dummy);
|
---|
273 | m_Stopwords.put("unser", dummy);
|
---|
274 | m_Stopwords.put("unserem", dummy);
|
---|
275 | m_Stopwords.put("unseren", dummy);
|
---|
276 | m_Stopwords.put("unsere", dummy);
|
---|
277 | m_Stopwords.put("unseres", dummy);
|
---|
278 |
|
---|
279 | m_Stopwords.put("unter", dummy);
|
---|
280 | m_Stopwords.put("viel", dummy);
|
---|
281 | m_Stopwords.put("vom", dummy);
|
---|
282 | m_Stopwords.put("von", dummy);
|
---|
283 | m_Stopwords.put("vor", dummy);
|
---|
284 | m_Stopwords.put("w\u00e4hrend", dummy);
|
---|
285 | m_Stopwords.put("war", dummy);
|
---|
286 | m_Stopwords.put("waren", dummy);
|
---|
287 | m_Stopwords.put("warst", dummy);
|
---|
288 | m_Stopwords.put("was", dummy);
|
---|
289 | m_Stopwords.put("weg", dummy);
|
---|
290 | m_Stopwords.put("weil", dummy);
|
---|
291 | m_Stopwords.put("weiter", dummy);
|
---|
292 |
|
---|
293 | m_Stopwords.put("welche", dummy);
|
---|
294 | m_Stopwords.put("welchem", dummy);
|
---|
295 | m_Stopwords.put("welchen", dummy);
|
---|
296 | m_Stopwords.put("welcher", dummy);
|
---|
297 | m_Stopwords.put("welches", dummy);
|
---|
298 |
|
---|
299 | m_Stopwords.put("wenn", dummy);
|
---|
300 | m_Stopwords.put("werde", dummy);
|
---|
301 | m_Stopwords.put("werden", dummy);
|
---|
302 | m_Stopwords.put("wie", dummy);
|
---|
303 | m_Stopwords.put("wieder", dummy);
|
---|
304 | m_Stopwords.put("will", dummy);
|
---|
305 | m_Stopwords.put("wir", dummy);
|
---|
306 | m_Stopwords.put("wird", dummy);
|
---|
307 | m_Stopwords.put("wirst", dummy);
|
---|
308 | m_Stopwords.put("wo", dummy);
|
---|
309 | m_Stopwords.put("wollen", dummy);
|
---|
310 | m_Stopwords.put("wollte", dummy);
|
---|
311 | m_Stopwords.put("w\u00fcrde", dummy);
|
---|
312 | m_Stopwords.put("w\u00fcrden", dummy);
|
---|
313 | m_Stopwords.put("zu", dummy);
|
---|
314 | m_Stopwords.put("zum", dummy);
|
---|
315 | m_Stopwords.put("zur", dummy);
|
---|
316 | m_Stopwords.put("zwar", dummy);
|
---|
317 | m_Stopwords.put("zwischen", dummy);
|
---|
318 | }
|
---|
319 | }
|
---|
320 |
|
---|
321 | /**
|
---|
322 | * Returns true if the given string is a stop word.
|
---|
323 | */
|
---|
324 | public boolean isStopword(String str) {
|
---|
325 |
|
---|
326 | return m_Stopwords.containsKey(str.toLowerCase());
|
---|
327 | }
|
---|
328 | }
|
---|
329 |
|
---|
330 |
|
---|