source: trunk/gsdl/packages/kea/kea-3.0/StopwordsEnglish.java@ 8815

Last change on this file since 8815 was 8815, checked in by mdewsnip, 19 years ago

Kea 3.0, as downloaded from http://www.nzdl.org/kea but with CSTR_abstracts_test, CSTR_abstracts_train, Chinese_test, and Chinese_train directories removed.

  • Property svn:keywords set to Author Date Id Revision
File size: 18.7 KB
Line 
1/*
2 * StopwordsEnglish.java
3 * Copyright (C) 2001 Eibe Frank
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20import java.util.*;
21
22/**
23 * Class that can test whether a given string is a stop word.
24 * Lowercases all words before the test.
25 *
26 * @author Eibe Frank ([email protected])
27 * @version 1.0
28 */
29public class StopwordsEnglish extends Stopwords {
30
31 /** The hashtable containing the list of stopwords */
32 private static Hashtable m_Stopwords = null;
33
34 static {
35
36 if (m_Stopwords == null) {
37 m_Stopwords = new Hashtable();
38 Double dummy = new Double(0);
39
40 m_Stopwords.put("a", dummy);
41 m_Stopwords.put("abaft", dummy);
42 m_Stopwords.put("aboard", dummy);
43 m_Stopwords.put("about", dummy);
44 m_Stopwords.put("above", dummy);
45 m_Stopwords.put("across", dummy);
46 m_Stopwords.put("afore", dummy);
47 m_Stopwords.put("aforesaid", dummy);
48 m_Stopwords.put("after", dummy);
49 m_Stopwords.put("again", dummy);
50 m_Stopwords.put("against", dummy);
51 m_Stopwords.put("agin", dummy);
52 m_Stopwords.put("ago", dummy);
53 m_Stopwords.put("aint", dummy);
54 m_Stopwords.put("albeit", dummy);
55 m_Stopwords.put("all", dummy);
56 m_Stopwords.put("almost", dummy);
57 m_Stopwords.put("alone", dummy);
58 m_Stopwords.put("along", dummy);
59 m_Stopwords.put("alongside", dummy);
60 m_Stopwords.put("already", dummy);
61 m_Stopwords.put("also", dummy);
62 m_Stopwords.put("although", dummy);
63 m_Stopwords.put("always", dummy);
64 m_Stopwords.put("am", dummy);
65 m_Stopwords.put("american", dummy);
66 m_Stopwords.put("amid", dummy);
67 m_Stopwords.put("amidst", dummy);
68 m_Stopwords.put("among", dummy);
69 m_Stopwords.put("amongst", dummy);
70 m_Stopwords.put("an", dummy);
71 m_Stopwords.put("and", dummy);
72 m_Stopwords.put("anent", dummy);
73 m_Stopwords.put("another", dummy);
74 m_Stopwords.put("any", dummy);
75 m_Stopwords.put("anybody", dummy);
76 m_Stopwords.put("anyone", dummy);
77 m_Stopwords.put("anything", dummy);
78 m_Stopwords.put("are", dummy);
79 m_Stopwords.put("aren't", dummy);
80 m_Stopwords.put("around", dummy);
81 m_Stopwords.put("as", dummy);
82 m_Stopwords.put("aslant", dummy);
83 m_Stopwords.put("astride", dummy);
84 m_Stopwords.put("at", dummy);
85 m_Stopwords.put("athwart", dummy);
86 m_Stopwords.put("away", dummy);
87 m_Stopwords.put("b", dummy);
88 m_Stopwords.put("back", dummy);
89 m_Stopwords.put("bar", dummy);
90 m_Stopwords.put("barring", dummy);
91 m_Stopwords.put("be", dummy);
92 m_Stopwords.put("because", dummy);
93 m_Stopwords.put("been", dummy);
94 m_Stopwords.put("before", dummy);
95 m_Stopwords.put("behind", dummy);
96 m_Stopwords.put("being", dummy);
97 m_Stopwords.put("below", dummy);
98 m_Stopwords.put("beneath", dummy);
99 m_Stopwords.put("beside", dummy);
100 m_Stopwords.put("besides", dummy);
101 m_Stopwords.put("best", dummy);
102 m_Stopwords.put("better", dummy);
103 m_Stopwords.put("between", dummy);
104 m_Stopwords.put("betwixt", dummy);
105 m_Stopwords.put("beyond", dummy);
106 m_Stopwords.put("both", dummy);
107 m_Stopwords.put("but", dummy);
108 m_Stopwords.put("by", dummy);
109 m_Stopwords.put("c", dummy);
110 m_Stopwords.put("can", dummy);
111 m_Stopwords.put("cannot", dummy);
112 m_Stopwords.put("can't", dummy);
113 m_Stopwords.put("certain", dummy);
114 m_Stopwords.put("circa", dummy);
115 m_Stopwords.put("close", dummy);
116 m_Stopwords.put("concerning", dummy);
117 m_Stopwords.put("considering", dummy);
118 m_Stopwords.put("cos", dummy);
119 m_Stopwords.put("could", dummy);
120 m_Stopwords.put("couldn't", dummy);
121 m_Stopwords.put("couldst", dummy);
122 m_Stopwords.put("d", dummy);
123 m_Stopwords.put("dare", dummy);
124 m_Stopwords.put("dared", dummy);
125 m_Stopwords.put("daren't", dummy);
126 m_Stopwords.put("dares", dummy);
127 m_Stopwords.put("daring", dummy);
128 m_Stopwords.put("despite", dummy);
129 m_Stopwords.put("did", dummy);
130 m_Stopwords.put("didn't", dummy);
131 m_Stopwords.put("different", dummy);
132 m_Stopwords.put("directly", dummy);
133 m_Stopwords.put("do", dummy);
134 m_Stopwords.put("does", dummy);
135 m_Stopwords.put("doesn't", dummy);
136 m_Stopwords.put("doing", dummy);
137 m_Stopwords.put("done", dummy);
138 m_Stopwords.put("don't", dummy);
139 m_Stopwords.put("dost", dummy);
140 m_Stopwords.put("doth", dummy);
141 m_Stopwords.put("down", dummy);
142 m_Stopwords.put("during", dummy);
143 m_Stopwords.put("durst", dummy);
144 m_Stopwords.put("e", dummy);
145 m_Stopwords.put("each", dummy);
146 m_Stopwords.put("early", dummy);
147 m_Stopwords.put("either", dummy);
148 m_Stopwords.put("em", dummy);
149 m_Stopwords.put("english", dummy);
150 m_Stopwords.put("enough", dummy);
151 m_Stopwords.put("ere", dummy);
152 m_Stopwords.put("even", dummy);
153 m_Stopwords.put("ever", dummy);
154 m_Stopwords.put("every", dummy);
155 m_Stopwords.put("everybody", dummy);
156 m_Stopwords.put("everyone", dummy);
157 m_Stopwords.put("everything", dummy);
158 m_Stopwords.put("except", dummy);
159 m_Stopwords.put("excepting", dummy);
160 m_Stopwords.put("f", dummy);
161 m_Stopwords.put("failing", dummy);
162 m_Stopwords.put("far", dummy);
163 m_Stopwords.put("few", dummy);
164 m_Stopwords.put("first", dummy);
165 m_Stopwords.put("five", dummy);
166 m_Stopwords.put("following", dummy);
167 m_Stopwords.put("for", dummy);
168 m_Stopwords.put("four", dummy);
169 m_Stopwords.put("from", dummy);
170 m_Stopwords.put("g", dummy);
171 m_Stopwords.put("gonna", dummy);
172 m_Stopwords.put("gotta", dummy);
173 m_Stopwords.put("h", dummy);
174 m_Stopwords.put("had", dummy);
175 m_Stopwords.put("hadn't", dummy);
176 m_Stopwords.put("hard", dummy);
177 m_Stopwords.put("has", dummy);
178 m_Stopwords.put("hasn't", dummy);
179 m_Stopwords.put("hast", dummy);
180 m_Stopwords.put("hath", dummy);
181 m_Stopwords.put("have", dummy);
182 m_Stopwords.put("haven't", dummy);
183 m_Stopwords.put("having", dummy);
184 m_Stopwords.put("he", dummy);
185 m_Stopwords.put("he'd", dummy);
186 m_Stopwords.put("he'll", dummy);
187 m_Stopwords.put("her", dummy);
188 m_Stopwords.put("here", dummy);
189 m_Stopwords.put("here's", dummy);
190 m_Stopwords.put("hers", dummy);
191 m_Stopwords.put("herself", dummy);
192 m_Stopwords.put("he's", dummy);
193 m_Stopwords.put("high", dummy);
194 m_Stopwords.put("him", dummy);
195 m_Stopwords.put("himself", dummy);
196 m_Stopwords.put("his", dummy);
197 m_Stopwords.put("home", dummy);
198 m_Stopwords.put("how", dummy);
199 m_Stopwords.put("howbeit", dummy);
200 m_Stopwords.put("however", dummy);
201 m_Stopwords.put("how's", dummy);
202 m_Stopwords.put("i", dummy);
203 m_Stopwords.put("id", dummy);
204 m_Stopwords.put("if", dummy);
205 m_Stopwords.put("ill", dummy);
206 m_Stopwords.put("i'm", dummy);
207 m_Stopwords.put("immediately", dummy);
208 m_Stopwords.put("important", dummy);
209 m_Stopwords.put("in", dummy);
210 m_Stopwords.put("inside", dummy);
211 m_Stopwords.put("instantly", dummy);
212 m_Stopwords.put("into", dummy);
213 m_Stopwords.put("is", dummy);
214 m_Stopwords.put("isn't", dummy);
215 m_Stopwords.put("it", dummy);
216 m_Stopwords.put("it'll", dummy);
217 m_Stopwords.put("it's", dummy);
218 m_Stopwords.put("its", dummy);
219 m_Stopwords.put("itself", dummy);
220 m_Stopwords.put("i've", dummy);
221 m_Stopwords.put("j", dummy);
222 m_Stopwords.put("just", dummy);
223 m_Stopwords.put("k", dummy);
224 m_Stopwords.put("l", dummy);
225 m_Stopwords.put("large", dummy);
226 m_Stopwords.put("last", dummy);
227 m_Stopwords.put("later", dummy);
228 m_Stopwords.put("least", dummy);
229 m_Stopwords.put("left", dummy);
230 m_Stopwords.put("less", dummy);
231 m_Stopwords.put("lest", dummy);
232 m_Stopwords.put("let's", dummy);
233 m_Stopwords.put("like", dummy);
234 m_Stopwords.put("likewise", dummy);
235 m_Stopwords.put("little", dummy);
236 m_Stopwords.put("living", dummy);
237 m_Stopwords.put("long", dummy);
238 m_Stopwords.put("m", dummy);
239 m_Stopwords.put("many", dummy);
240 m_Stopwords.put("may", dummy);
241 m_Stopwords.put("mayn't", dummy);
242 m_Stopwords.put("me", dummy);
243 m_Stopwords.put("mid", dummy);
244 m_Stopwords.put("midst", dummy);
245 m_Stopwords.put("might", dummy);
246 m_Stopwords.put("mightn't", dummy);
247 m_Stopwords.put("mine", dummy);
248 m_Stopwords.put("minus", dummy);
249 m_Stopwords.put("more", dummy);
250 m_Stopwords.put("most", dummy);
251 m_Stopwords.put("much", dummy);
252 m_Stopwords.put("must", dummy);
253 m_Stopwords.put("mustn't", dummy);
254 m_Stopwords.put("my", dummy);
255 m_Stopwords.put("myself", dummy);
256 m_Stopwords.put("n", dummy);
257 m_Stopwords.put("near", dummy);
258 m_Stopwords.put("'neath", dummy);
259 m_Stopwords.put("need", dummy);
260 m_Stopwords.put("needed", dummy);
261 m_Stopwords.put("needing", dummy);
262 m_Stopwords.put("needn't", dummy);
263 m_Stopwords.put("needs", dummy);
264 m_Stopwords.put("neither", dummy);
265 m_Stopwords.put("never", dummy);
266 m_Stopwords.put("nevertheless", dummy);
267 m_Stopwords.put("new", dummy);
268 m_Stopwords.put("next", dummy);
269 m_Stopwords.put("nigh", dummy);
270 m_Stopwords.put("nigher", dummy);
271 m_Stopwords.put("nighest", dummy);
272 m_Stopwords.put("nisi", dummy);
273 m_Stopwords.put("no", dummy);
274 m_Stopwords.put("no-one", dummy);
275 m_Stopwords.put("nobody", dummy);
276 m_Stopwords.put("none", dummy);
277 m_Stopwords.put("nor", dummy);
278 m_Stopwords.put("not", dummy);
279 m_Stopwords.put("nothing", dummy);
280 m_Stopwords.put("notwithstanding", dummy);
281 m_Stopwords.put("now", dummy);
282 m_Stopwords.put("o", dummy);
283 m_Stopwords.put("o'er", dummy);
284 m_Stopwords.put("of", dummy);
285 m_Stopwords.put("off", dummy);
286 m_Stopwords.put("often", dummy);
287 m_Stopwords.put("on", dummy);
288 m_Stopwords.put("once", dummy);
289 m_Stopwords.put("one", dummy);
290 m_Stopwords.put("oneself", dummy);
291 m_Stopwords.put("only", dummy);
292 m_Stopwords.put("onto", dummy);
293 m_Stopwords.put("open", dummy);
294 m_Stopwords.put("or", dummy);
295 m_Stopwords.put("other", dummy);
296 m_Stopwords.put("otherwise", dummy);
297 m_Stopwords.put("ought", dummy);
298 m_Stopwords.put("oughtn't", dummy);
299 m_Stopwords.put("our", dummy);
300 m_Stopwords.put("ours", dummy);
301 m_Stopwords.put("ourselves", dummy);
302 m_Stopwords.put("out", dummy);
303 m_Stopwords.put("outside", dummy);
304 m_Stopwords.put("over", dummy);
305 m_Stopwords.put("own", dummy);
306 m_Stopwords.put("p", dummy);
307 m_Stopwords.put("past", dummy);
308 m_Stopwords.put("pending", dummy);
309 m_Stopwords.put("per", dummy);
310 m_Stopwords.put("perhaps", dummy);
311 m_Stopwords.put("plus", dummy);
312 m_Stopwords.put("possible", dummy);
313 m_Stopwords.put("present", dummy);
314 m_Stopwords.put("probably", dummy);
315 m_Stopwords.put("provided", dummy);
316 m_Stopwords.put("providing", dummy);
317 m_Stopwords.put("public", dummy);
318 m_Stopwords.put("q", dummy);
319 m_Stopwords.put("qua", dummy);
320 m_Stopwords.put("quite", dummy);
321 m_Stopwords.put("r", dummy);
322 m_Stopwords.put("rather", dummy);
323 m_Stopwords.put("re", dummy);
324 m_Stopwords.put("real", dummy);
325 m_Stopwords.put("really", dummy);
326 m_Stopwords.put("respecting", dummy);
327 m_Stopwords.put("right", dummy);
328 m_Stopwords.put("round", dummy);
329 m_Stopwords.put("s", dummy);
330 m_Stopwords.put("same", dummy);
331 m_Stopwords.put("sans", dummy);
332 m_Stopwords.put("save", dummy);
333 m_Stopwords.put("saving", dummy);
334 m_Stopwords.put("second", dummy);
335 m_Stopwords.put("several", dummy);
336 m_Stopwords.put("shall", dummy);
337 m_Stopwords.put("shalt", dummy);
338 m_Stopwords.put("shan't", dummy);
339 m_Stopwords.put("she", dummy);
340 m_Stopwords.put("shed", dummy);
341 m_Stopwords.put("shell", dummy);
342 m_Stopwords.put("she's", dummy);
343 m_Stopwords.put("short", dummy);
344 m_Stopwords.put("should", dummy);
345 m_Stopwords.put("shouldn't", dummy);
346 m_Stopwords.put("since", dummy);
347 m_Stopwords.put("six", dummy);
348 m_Stopwords.put("small", dummy);
349 m_Stopwords.put("so", dummy);
350 m_Stopwords.put("some", dummy);
351 m_Stopwords.put("somebody", dummy);
352 m_Stopwords.put("someone", dummy);
353 m_Stopwords.put("something", dummy);
354 m_Stopwords.put("sometimes", dummy);
355 m_Stopwords.put("soon", dummy);
356 m_Stopwords.put("special", dummy);
357 m_Stopwords.put("still", dummy);
358 m_Stopwords.put("such", dummy);
359 m_Stopwords.put("summat", dummy);
360 m_Stopwords.put("supposing", dummy);
361 m_Stopwords.put("sure", dummy);
362 m_Stopwords.put("t", dummy);
363 m_Stopwords.put("than", dummy);
364 m_Stopwords.put("that", dummy);
365 m_Stopwords.put("that'd", dummy);
366 m_Stopwords.put("that'll", dummy);
367 m_Stopwords.put("that's", dummy);
368 m_Stopwords.put("the", dummy);
369 m_Stopwords.put("thee", dummy);
370 m_Stopwords.put("their", dummy);
371 m_Stopwords.put("theirs", dummy);
372 m_Stopwords.put("their's", dummy);
373 m_Stopwords.put("them", dummy);
374 m_Stopwords.put("themselves", dummy);
375 m_Stopwords.put("then", dummy);
376 m_Stopwords.put("there", dummy);
377 m_Stopwords.put("there's", dummy);
378 m_Stopwords.put("these", dummy);
379 m_Stopwords.put("they", dummy);
380 m_Stopwords.put("they'd", dummy);
381 m_Stopwords.put("they'll", dummy);
382 m_Stopwords.put("they're", dummy);
383 m_Stopwords.put("they've", dummy);
384 m_Stopwords.put("thine", dummy);
385 m_Stopwords.put("this", dummy);
386 m_Stopwords.put("tho", dummy);
387 m_Stopwords.put("those", dummy);
388 m_Stopwords.put("thou", dummy);
389 m_Stopwords.put("though", dummy);
390 m_Stopwords.put("three", dummy);
391 m_Stopwords.put("thro'", dummy);
392 m_Stopwords.put("through", dummy);
393 m_Stopwords.put("throughout", dummy);
394 m_Stopwords.put("thru", dummy);
395 m_Stopwords.put("thyself", dummy);
396 m_Stopwords.put("till", dummy);
397 m_Stopwords.put("to", dummy);
398 m_Stopwords.put("today", dummy);
399 m_Stopwords.put("together", dummy);
400 m_Stopwords.put("too", dummy);
401 m_Stopwords.put("touching", dummy);
402 m_Stopwords.put("toward", dummy);
403 m_Stopwords.put("towards", dummy);
404 m_Stopwords.put("true", dummy);
405 m_Stopwords.put("'twas", dummy);
406 m_Stopwords.put("'tween", dummy);
407 m_Stopwords.put("'twere", dummy);
408 m_Stopwords.put("'twill", dummy);
409 m_Stopwords.put("'twixt", dummy);
410 m_Stopwords.put("two", dummy);
411 m_Stopwords.put("'twould", dummy);
412 m_Stopwords.put("u", dummy);
413 m_Stopwords.put("under", dummy);
414 m_Stopwords.put("underneath", dummy);
415 m_Stopwords.put("unless", dummy);
416 m_Stopwords.put("unlike", dummy);
417 m_Stopwords.put("until", dummy);
418 m_Stopwords.put("unto", dummy);
419 m_Stopwords.put("up", dummy);
420 m_Stopwords.put("upon", dummy);
421 m_Stopwords.put("us", dummy);
422 m_Stopwords.put("used", dummy);
423 m_Stopwords.put("usually", dummy);
424 m_Stopwords.put("v", dummy);
425 m_Stopwords.put("versus", dummy);
426 m_Stopwords.put("very", dummy);
427 m_Stopwords.put("via", dummy);
428 m_Stopwords.put("vice", dummy);
429 m_Stopwords.put("vis-a-vis", dummy);
430 m_Stopwords.put("w", dummy);
431 m_Stopwords.put("wanna", dummy);
432 m_Stopwords.put("wanting", dummy);
433 m_Stopwords.put("was", dummy);
434 m_Stopwords.put("wasn't", dummy);
435 m_Stopwords.put("way", dummy);
436 m_Stopwords.put("we", dummy);
437 m_Stopwords.put("we'd", dummy);
438 m_Stopwords.put("well", dummy);
439 m_Stopwords.put("were", dummy);
440 m_Stopwords.put("weren't", dummy);
441 m_Stopwords.put("wert", dummy);
442 m_Stopwords.put("we've", dummy);
443 m_Stopwords.put("what", dummy);
444 m_Stopwords.put("whatever", dummy);
445 m_Stopwords.put("what'll", dummy);
446 m_Stopwords.put("what's", dummy);
447 m_Stopwords.put("when", dummy);
448 m_Stopwords.put("whencesoever", dummy);
449 m_Stopwords.put("whenever", dummy);
450 m_Stopwords.put("when's", dummy);
451 m_Stopwords.put("whereas", dummy);
452 m_Stopwords.put("where's", dummy);
453 m_Stopwords.put("whether", dummy);
454 m_Stopwords.put("which", dummy);
455 m_Stopwords.put("whichever", dummy);
456 m_Stopwords.put("whichsoever", dummy);
457 m_Stopwords.put("while", dummy);
458 m_Stopwords.put("whilst", dummy);
459 m_Stopwords.put("who", dummy);
460 m_Stopwords.put("who'd", dummy);
461 m_Stopwords.put("whoever", dummy);
462 m_Stopwords.put("whole", dummy);
463 m_Stopwords.put("who'll", dummy);
464 m_Stopwords.put("whom", dummy);
465 m_Stopwords.put("whore", dummy);
466 m_Stopwords.put("who's", dummy);
467 m_Stopwords.put("whose", dummy);
468 m_Stopwords.put("whoso", dummy);
469 m_Stopwords.put("whosoever", dummy);
470 m_Stopwords.put("will", dummy);
471 m_Stopwords.put("with", dummy);
472 m_Stopwords.put("within", dummy);
473 m_Stopwords.put("without", dummy);
474 m_Stopwords.put("wont", dummy);
475 m_Stopwords.put("would", dummy);
476 m_Stopwords.put("wouldn't", dummy);
477 m_Stopwords.put("wouldst", dummy);
478 m_Stopwords.put("x", dummy);
479 m_Stopwords.put("y", dummy);
480 m_Stopwords.put("ye", dummy);
481 m_Stopwords.put("yet", dummy);
482 m_Stopwords.put("you", dummy);
483 m_Stopwords.put("you'd", dummy);
484 m_Stopwords.put("you'll", dummy);
485 m_Stopwords.put("your", dummy);
486 m_Stopwords.put("you're", dummy);
487 m_Stopwords.put("yours", dummy);
488 m_Stopwords.put("yourself", dummy);
489 m_Stopwords.put("yourselves", dummy);
490 m_Stopwords.put("you've", dummy);
491 m_Stopwords.put("z", dummy);
492 }
493 }
494
495 /**
496 * Returns true if the given string is a stop word.
497 */
498 public boolean isStopword(String str) {
499
500 return m_Stopwords.containsKey(str.toLowerCase());
501 }
502}
503
504
Note: See TracBrowser for help on using the repository browser.