1 | /*
|
---|
2 | * StopwordsEnglish.java
|
---|
3 | * Copyright (C) 2001 Eibe Frank
|
---|
4 | *
|
---|
5 | * This program is free software; you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
18 | */
|
---|
19 |
|
---|
20 | import java.util.*;
|
---|
21 |
|
---|
22 | /**
|
---|
23 | * Class that can test whether a given string is a stop word.
|
---|
24 | * Lowercases all words before the test.
|
---|
25 | *
|
---|
26 | * @author Eibe Frank ([email protected])
|
---|
27 | * @version 1.0
|
---|
28 | */
|
---|
29 | public class StopwordsEnglish extends Stopwords {
|
---|
30 |
|
---|
31 | /** The hashtable containing the list of stopwords */
|
---|
32 | private static Hashtable m_Stopwords = null;
|
---|
33 |
|
---|
34 | static {
|
---|
35 |
|
---|
36 | if (m_Stopwords == null) {
|
---|
37 | m_Stopwords = new Hashtable();
|
---|
38 | Double dummy = new Double(0);
|
---|
39 |
|
---|
40 | m_Stopwords.put("a", dummy);
|
---|
41 | m_Stopwords.put("abaft", dummy);
|
---|
42 | m_Stopwords.put("aboard", dummy);
|
---|
43 | m_Stopwords.put("about", dummy);
|
---|
44 | m_Stopwords.put("above", dummy);
|
---|
45 | m_Stopwords.put("across", dummy);
|
---|
46 | m_Stopwords.put("afore", dummy);
|
---|
47 | m_Stopwords.put("aforesaid", dummy);
|
---|
48 | m_Stopwords.put("after", dummy);
|
---|
49 | m_Stopwords.put("again", dummy);
|
---|
50 | m_Stopwords.put("against", dummy);
|
---|
51 | m_Stopwords.put("agin", dummy);
|
---|
52 | m_Stopwords.put("ago", dummy);
|
---|
53 | m_Stopwords.put("aint", dummy);
|
---|
54 | m_Stopwords.put("albeit", dummy);
|
---|
55 | m_Stopwords.put("all", dummy);
|
---|
56 | m_Stopwords.put("almost", dummy);
|
---|
57 | m_Stopwords.put("alone", dummy);
|
---|
58 | m_Stopwords.put("along", dummy);
|
---|
59 | m_Stopwords.put("alongside", dummy);
|
---|
60 | m_Stopwords.put("already", dummy);
|
---|
61 | m_Stopwords.put("also", dummy);
|
---|
62 | m_Stopwords.put("although", dummy);
|
---|
63 | m_Stopwords.put("always", dummy);
|
---|
64 | m_Stopwords.put("am", dummy);
|
---|
65 | m_Stopwords.put("american", dummy);
|
---|
66 | m_Stopwords.put("amid", dummy);
|
---|
67 | m_Stopwords.put("amidst", dummy);
|
---|
68 | m_Stopwords.put("among", dummy);
|
---|
69 | m_Stopwords.put("amongst", dummy);
|
---|
70 | m_Stopwords.put("an", dummy);
|
---|
71 | m_Stopwords.put("and", dummy);
|
---|
72 | m_Stopwords.put("anent", dummy);
|
---|
73 | m_Stopwords.put("another", dummy);
|
---|
74 | m_Stopwords.put("any", dummy);
|
---|
75 | m_Stopwords.put("anybody", dummy);
|
---|
76 | m_Stopwords.put("anyone", dummy);
|
---|
77 | m_Stopwords.put("anything", dummy);
|
---|
78 | m_Stopwords.put("are", dummy);
|
---|
79 | m_Stopwords.put("aren't", dummy);
|
---|
80 | m_Stopwords.put("around", dummy);
|
---|
81 | m_Stopwords.put("as", dummy);
|
---|
82 | m_Stopwords.put("aslant", dummy);
|
---|
83 | m_Stopwords.put("astride", dummy);
|
---|
84 | m_Stopwords.put("at", dummy);
|
---|
85 | m_Stopwords.put("athwart", dummy);
|
---|
86 | m_Stopwords.put("away", dummy);
|
---|
87 | m_Stopwords.put("b", dummy);
|
---|
88 | m_Stopwords.put("back", dummy);
|
---|
89 | m_Stopwords.put("bar", dummy);
|
---|
90 | m_Stopwords.put("barring", dummy);
|
---|
91 | m_Stopwords.put("be", dummy);
|
---|
92 | m_Stopwords.put("because", dummy);
|
---|
93 | m_Stopwords.put("been", dummy);
|
---|
94 | m_Stopwords.put("before", dummy);
|
---|
95 | m_Stopwords.put("behind", dummy);
|
---|
96 | m_Stopwords.put("being", dummy);
|
---|
97 | m_Stopwords.put("below", dummy);
|
---|
98 | m_Stopwords.put("beneath", dummy);
|
---|
99 | m_Stopwords.put("beside", dummy);
|
---|
100 | m_Stopwords.put("besides", dummy);
|
---|
101 | m_Stopwords.put("best", dummy);
|
---|
102 | m_Stopwords.put("better", dummy);
|
---|
103 | m_Stopwords.put("between", dummy);
|
---|
104 | m_Stopwords.put("betwixt", dummy);
|
---|
105 | m_Stopwords.put("beyond", dummy);
|
---|
106 | m_Stopwords.put("both", dummy);
|
---|
107 | m_Stopwords.put("but", dummy);
|
---|
108 | m_Stopwords.put("by", dummy);
|
---|
109 | m_Stopwords.put("c", dummy);
|
---|
110 | m_Stopwords.put("can", dummy);
|
---|
111 | m_Stopwords.put("cannot", dummy);
|
---|
112 | m_Stopwords.put("can't", dummy);
|
---|
113 | m_Stopwords.put("certain", dummy);
|
---|
114 | m_Stopwords.put("circa", dummy);
|
---|
115 | m_Stopwords.put("close", dummy);
|
---|
116 | m_Stopwords.put("concerning", dummy);
|
---|
117 | m_Stopwords.put("considering", dummy);
|
---|
118 | m_Stopwords.put("cos", dummy);
|
---|
119 | m_Stopwords.put("could", dummy);
|
---|
120 | m_Stopwords.put("couldn't", dummy);
|
---|
121 | m_Stopwords.put("couldst", dummy);
|
---|
122 | m_Stopwords.put("d", dummy);
|
---|
123 | m_Stopwords.put("dare", dummy);
|
---|
124 | m_Stopwords.put("dared", dummy);
|
---|
125 | m_Stopwords.put("daren't", dummy);
|
---|
126 | m_Stopwords.put("dares", dummy);
|
---|
127 | m_Stopwords.put("daring", dummy);
|
---|
128 | m_Stopwords.put("despite", dummy);
|
---|
129 | m_Stopwords.put("did", dummy);
|
---|
130 | m_Stopwords.put("didn't", dummy);
|
---|
131 | m_Stopwords.put("different", dummy);
|
---|
132 | m_Stopwords.put("directly", dummy);
|
---|
133 | m_Stopwords.put("do", dummy);
|
---|
134 | m_Stopwords.put("does", dummy);
|
---|
135 | m_Stopwords.put("doesn't", dummy);
|
---|
136 | m_Stopwords.put("doing", dummy);
|
---|
137 | m_Stopwords.put("done", dummy);
|
---|
138 | m_Stopwords.put("don't", dummy);
|
---|
139 | m_Stopwords.put("dost", dummy);
|
---|
140 | m_Stopwords.put("doth", dummy);
|
---|
141 | m_Stopwords.put("down", dummy);
|
---|
142 | m_Stopwords.put("during", dummy);
|
---|
143 | m_Stopwords.put("durst", dummy);
|
---|
144 | m_Stopwords.put("e", dummy);
|
---|
145 | m_Stopwords.put("each", dummy);
|
---|
146 | m_Stopwords.put("early", dummy);
|
---|
147 | m_Stopwords.put("either", dummy);
|
---|
148 | m_Stopwords.put("em", dummy);
|
---|
149 | m_Stopwords.put("english", dummy);
|
---|
150 | m_Stopwords.put("enough", dummy);
|
---|
151 | m_Stopwords.put("ere", dummy);
|
---|
152 | m_Stopwords.put("even", dummy);
|
---|
153 | m_Stopwords.put("ever", dummy);
|
---|
154 | m_Stopwords.put("every", dummy);
|
---|
155 | m_Stopwords.put("everybody", dummy);
|
---|
156 | m_Stopwords.put("everyone", dummy);
|
---|
157 | m_Stopwords.put("everything", dummy);
|
---|
158 | m_Stopwords.put("except", dummy);
|
---|
159 | m_Stopwords.put("excepting", dummy);
|
---|
160 | m_Stopwords.put("f", dummy);
|
---|
161 | m_Stopwords.put("failing", dummy);
|
---|
162 | m_Stopwords.put("far", dummy);
|
---|
163 | m_Stopwords.put("few", dummy);
|
---|
164 | m_Stopwords.put("first", dummy);
|
---|
165 | m_Stopwords.put("five", dummy);
|
---|
166 | m_Stopwords.put("following", dummy);
|
---|
167 | m_Stopwords.put("for", dummy);
|
---|
168 | m_Stopwords.put("four", dummy);
|
---|
169 | m_Stopwords.put("from", dummy);
|
---|
170 | m_Stopwords.put("g", dummy);
|
---|
171 | m_Stopwords.put("gonna", dummy);
|
---|
172 | m_Stopwords.put("gotta", dummy);
|
---|
173 | m_Stopwords.put("h", dummy);
|
---|
174 | m_Stopwords.put("had", dummy);
|
---|
175 | m_Stopwords.put("hadn't", dummy);
|
---|
176 | m_Stopwords.put("hard", dummy);
|
---|
177 | m_Stopwords.put("has", dummy);
|
---|
178 | m_Stopwords.put("hasn't", dummy);
|
---|
179 | m_Stopwords.put("hast", dummy);
|
---|
180 | m_Stopwords.put("hath", dummy);
|
---|
181 | m_Stopwords.put("have", dummy);
|
---|
182 | m_Stopwords.put("haven't", dummy);
|
---|
183 | m_Stopwords.put("having", dummy);
|
---|
184 | m_Stopwords.put("he", dummy);
|
---|
185 | m_Stopwords.put("he'd", dummy);
|
---|
186 | m_Stopwords.put("he'll", dummy);
|
---|
187 | m_Stopwords.put("her", dummy);
|
---|
188 | m_Stopwords.put("here", dummy);
|
---|
189 | m_Stopwords.put("here's", dummy);
|
---|
190 | m_Stopwords.put("hers", dummy);
|
---|
191 | m_Stopwords.put("herself", dummy);
|
---|
192 | m_Stopwords.put("he's", dummy);
|
---|
193 | m_Stopwords.put("high", dummy);
|
---|
194 | m_Stopwords.put("him", dummy);
|
---|
195 | m_Stopwords.put("himself", dummy);
|
---|
196 | m_Stopwords.put("his", dummy);
|
---|
197 | m_Stopwords.put("home", dummy);
|
---|
198 | m_Stopwords.put("how", dummy);
|
---|
199 | m_Stopwords.put("howbeit", dummy);
|
---|
200 | m_Stopwords.put("however", dummy);
|
---|
201 | m_Stopwords.put("how's", dummy);
|
---|
202 | m_Stopwords.put("i", dummy);
|
---|
203 | m_Stopwords.put("id", dummy);
|
---|
204 | m_Stopwords.put("if", dummy);
|
---|
205 | m_Stopwords.put("ill", dummy);
|
---|
206 | m_Stopwords.put("i'm", dummy);
|
---|
207 | m_Stopwords.put("immediately", dummy);
|
---|
208 | m_Stopwords.put("important", dummy);
|
---|
209 | m_Stopwords.put("in", dummy);
|
---|
210 | m_Stopwords.put("inside", dummy);
|
---|
211 | m_Stopwords.put("instantly", dummy);
|
---|
212 | m_Stopwords.put("into", dummy);
|
---|
213 | m_Stopwords.put("is", dummy);
|
---|
214 | m_Stopwords.put("isn't", dummy);
|
---|
215 | m_Stopwords.put("it", dummy);
|
---|
216 | m_Stopwords.put("it'll", dummy);
|
---|
217 | m_Stopwords.put("it's", dummy);
|
---|
218 | m_Stopwords.put("its", dummy);
|
---|
219 | m_Stopwords.put("itself", dummy);
|
---|
220 | m_Stopwords.put("i've", dummy);
|
---|
221 | m_Stopwords.put("j", dummy);
|
---|
222 | m_Stopwords.put("just", dummy);
|
---|
223 | m_Stopwords.put("k", dummy);
|
---|
224 | m_Stopwords.put("l", dummy);
|
---|
225 | m_Stopwords.put("large", dummy);
|
---|
226 | m_Stopwords.put("last", dummy);
|
---|
227 | m_Stopwords.put("later", dummy);
|
---|
228 | m_Stopwords.put("least", dummy);
|
---|
229 | m_Stopwords.put("left", dummy);
|
---|
230 | m_Stopwords.put("less", dummy);
|
---|
231 | m_Stopwords.put("lest", dummy);
|
---|
232 | m_Stopwords.put("let's", dummy);
|
---|
233 | m_Stopwords.put("like", dummy);
|
---|
234 | m_Stopwords.put("likewise", dummy);
|
---|
235 | m_Stopwords.put("little", dummy);
|
---|
236 | m_Stopwords.put("living", dummy);
|
---|
237 | m_Stopwords.put("long", dummy);
|
---|
238 | m_Stopwords.put("m", dummy);
|
---|
239 | m_Stopwords.put("many", dummy);
|
---|
240 | m_Stopwords.put("may", dummy);
|
---|
241 | m_Stopwords.put("mayn't", dummy);
|
---|
242 | m_Stopwords.put("me", dummy);
|
---|
243 | m_Stopwords.put("mid", dummy);
|
---|
244 | m_Stopwords.put("midst", dummy);
|
---|
245 | m_Stopwords.put("might", dummy);
|
---|
246 | m_Stopwords.put("mightn't", dummy);
|
---|
247 | m_Stopwords.put("mine", dummy);
|
---|
248 | m_Stopwords.put("minus", dummy);
|
---|
249 | m_Stopwords.put("more", dummy);
|
---|
250 | m_Stopwords.put("most", dummy);
|
---|
251 | m_Stopwords.put("much", dummy);
|
---|
252 | m_Stopwords.put("must", dummy);
|
---|
253 | m_Stopwords.put("mustn't", dummy);
|
---|
254 | m_Stopwords.put("my", dummy);
|
---|
255 | m_Stopwords.put("myself", dummy);
|
---|
256 | m_Stopwords.put("n", dummy);
|
---|
257 | m_Stopwords.put("near", dummy);
|
---|
258 | m_Stopwords.put("'neath", dummy);
|
---|
259 | m_Stopwords.put("need", dummy);
|
---|
260 | m_Stopwords.put("needed", dummy);
|
---|
261 | m_Stopwords.put("needing", dummy);
|
---|
262 | m_Stopwords.put("needn't", dummy);
|
---|
263 | m_Stopwords.put("needs", dummy);
|
---|
264 | m_Stopwords.put("neither", dummy);
|
---|
265 | m_Stopwords.put("never", dummy);
|
---|
266 | m_Stopwords.put("nevertheless", dummy);
|
---|
267 | m_Stopwords.put("new", dummy);
|
---|
268 | m_Stopwords.put("next", dummy);
|
---|
269 | m_Stopwords.put("nigh", dummy);
|
---|
270 | m_Stopwords.put("nigher", dummy);
|
---|
271 | m_Stopwords.put("nighest", dummy);
|
---|
272 | m_Stopwords.put("nisi", dummy);
|
---|
273 | m_Stopwords.put("no", dummy);
|
---|
274 | m_Stopwords.put("no-one", dummy);
|
---|
275 | m_Stopwords.put("nobody", dummy);
|
---|
276 | m_Stopwords.put("none", dummy);
|
---|
277 | m_Stopwords.put("nor", dummy);
|
---|
278 | m_Stopwords.put("not", dummy);
|
---|
279 | m_Stopwords.put("nothing", dummy);
|
---|
280 | m_Stopwords.put("notwithstanding", dummy);
|
---|
281 | m_Stopwords.put("now", dummy);
|
---|
282 | m_Stopwords.put("o", dummy);
|
---|
283 | m_Stopwords.put("o'er", dummy);
|
---|
284 | m_Stopwords.put("of", dummy);
|
---|
285 | m_Stopwords.put("off", dummy);
|
---|
286 | m_Stopwords.put("often", dummy);
|
---|
287 | m_Stopwords.put("on", dummy);
|
---|
288 | m_Stopwords.put("once", dummy);
|
---|
289 | m_Stopwords.put("one", dummy);
|
---|
290 | m_Stopwords.put("oneself", dummy);
|
---|
291 | m_Stopwords.put("only", dummy);
|
---|
292 | m_Stopwords.put("onto", dummy);
|
---|
293 | m_Stopwords.put("open", dummy);
|
---|
294 | m_Stopwords.put("or", dummy);
|
---|
295 | m_Stopwords.put("other", dummy);
|
---|
296 | m_Stopwords.put("otherwise", dummy);
|
---|
297 | m_Stopwords.put("ought", dummy);
|
---|
298 | m_Stopwords.put("oughtn't", dummy);
|
---|
299 | m_Stopwords.put("our", dummy);
|
---|
300 | m_Stopwords.put("ours", dummy);
|
---|
301 | m_Stopwords.put("ourselves", dummy);
|
---|
302 | m_Stopwords.put("out", dummy);
|
---|
303 | m_Stopwords.put("outside", dummy);
|
---|
304 | m_Stopwords.put("over", dummy);
|
---|
305 | m_Stopwords.put("own", dummy);
|
---|
306 | m_Stopwords.put("p", dummy);
|
---|
307 | m_Stopwords.put("past", dummy);
|
---|
308 | m_Stopwords.put("pending", dummy);
|
---|
309 | m_Stopwords.put("per", dummy);
|
---|
310 | m_Stopwords.put("perhaps", dummy);
|
---|
311 | m_Stopwords.put("plus", dummy);
|
---|
312 | m_Stopwords.put("possible", dummy);
|
---|
313 | m_Stopwords.put("present", dummy);
|
---|
314 | m_Stopwords.put("probably", dummy);
|
---|
315 | m_Stopwords.put("provided", dummy);
|
---|
316 | m_Stopwords.put("providing", dummy);
|
---|
317 | m_Stopwords.put("public", dummy);
|
---|
318 | m_Stopwords.put("q", dummy);
|
---|
319 | m_Stopwords.put("qua", dummy);
|
---|
320 | m_Stopwords.put("quite", dummy);
|
---|
321 | m_Stopwords.put("r", dummy);
|
---|
322 | m_Stopwords.put("rather", dummy);
|
---|
323 | m_Stopwords.put("re", dummy);
|
---|
324 | m_Stopwords.put("real", dummy);
|
---|
325 | m_Stopwords.put("really", dummy);
|
---|
326 | m_Stopwords.put("respecting", dummy);
|
---|
327 | m_Stopwords.put("right", dummy);
|
---|
328 | m_Stopwords.put("round", dummy);
|
---|
329 | m_Stopwords.put("s", dummy);
|
---|
330 | m_Stopwords.put("same", dummy);
|
---|
331 | m_Stopwords.put("sans", dummy);
|
---|
332 | m_Stopwords.put("save", dummy);
|
---|
333 | m_Stopwords.put("saving", dummy);
|
---|
334 | m_Stopwords.put("second", dummy);
|
---|
335 | m_Stopwords.put("several", dummy);
|
---|
336 | m_Stopwords.put("shall", dummy);
|
---|
337 | m_Stopwords.put("shalt", dummy);
|
---|
338 | m_Stopwords.put("shan't", dummy);
|
---|
339 | m_Stopwords.put("she", dummy);
|
---|
340 | m_Stopwords.put("shed", dummy);
|
---|
341 | m_Stopwords.put("shell", dummy);
|
---|
342 | m_Stopwords.put("she's", dummy);
|
---|
343 | m_Stopwords.put("short", dummy);
|
---|
344 | m_Stopwords.put("should", dummy);
|
---|
345 | m_Stopwords.put("shouldn't", dummy);
|
---|
346 | m_Stopwords.put("since", dummy);
|
---|
347 | m_Stopwords.put("six", dummy);
|
---|
348 | m_Stopwords.put("small", dummy);
|
---|
349 | m_Stopwords.put("so", dummy);
|
---|
350 | m_Stopwords.put("some", dummy);
|
---|
351 | m_Stopwords.put("somebody", dummy);
|
---|
352 | m_Stopwords.put("someone", dummy);
|
---|
353 | m_Stopwords.put("something", dummy);
|
---|
354 | m_Stopwords.put("sometimes", dummy);
|
---|
355 | m_Stopwords.put("soon", dummy);
|
---|
356 | m_Stopwords.put("special", dummy);
|
---|
357 | m_Stopwords.put("still", dummy);
|
---|
358 | m_Stopwords.put("such", dummy);
|
---|
359 | m_Stopwords.put("summat", dummy);
|
---|
360 | m_Stopwords.put("supposing", dummy);
|
---|
361 | m_Stopwords.put("sure", dummy);
|
---|
362 | m_Stopwords.put("t", dummy);
|
---|
363 | m_Stopwords.put("than", dummy);
|
---|
364 | m_Stopwords.put("that", dummy);
|
---|
365 | m_Stopwords.put("that'd", dummy);
|
---|
366 | m_Stopwords.put("that'll", dummy);
|
---|
367 | m_Stopwords.put("that's", dummy);
|
---|
368 | m_Stopwords.put("the", dummy);
|
---|
369 | m_Stopwords.put("thee", dummy);
|
---|
370 | m_Stopwords.put("their", dummy);
|
---|
371 | m_Stopwords.put("theirs", dummy);
|
---|
372 | m_Stopwords.put("their's", dummy);
|
---|
373 | m_Stopwords.put("them", dummy);
|
---|
374 | m_Stopwords.put("themselves", dummy);
|
---|
375 | m_Stopwords.put("then", dummy);
|
---|
376 | m_Stopwords.put("there", dummy);
|
---|
377 | m_Stopwords.put("there's", dummy);
|
---|
378 | m_Stopwords.put("these", dummy);
|
---|
379 | m_Stopwords.put("they", dummy);
|
---|
380 | m_Stopwords.put("they'd", dummy);
|
---|
381 | m_Stopwords.put("they'll", dummy);
|
---|
382 | m_Stopwords.put("they're", dummy);
|
---|
383 | m_Stopwords.put("they've", dummy);
|
---|
384 | m_Stopwords.put("thine", dummy);
|
---|
385 | m_Stopwords.put("this", dummy);
|
---|
386 | m_Stopwords.put("tho", dummy);
|
---|
387 | m_Stopwords.put("those", dummy);
|
---|
388 | m_Stopwords.put("thou", dummy);
|
---|
389 | m_Stopwords.put("though", dummy);
|
---|
390 | m_Stopwords.put("three", dummy);
|
---|
391 | m_Stopwords.put("thro'", dummy);
|
---|
392 | m_Stopwords.put("through", dummy);
|
---|
393 | m_Stopwords.put("throughout", dummy);
|
---|
394 | m_Stopwords.put("thru", dummy);
|
---|
395 | m_Stopwords.put("thyself", dummy);
|
---|
396 | m_Stopwords.put("till", dummy);
|
---|
397 | m_Stopwords.put("to", dummy);
|
---|
398 | m_Stopwords.put("today", dummy);
|
---|
399 | m_Stopwords.put("together", dummy);
|
---|
400 | m_Stopwords.put("too", dummy);
|
---|
401 | m_Stopwords.put("touching", dummy);
|
---|
402 | m_Stopwords.put("toward", dummy);
|
---|
403 | m_Stopwords.put("towards", dummy);
|
---|
404 | m_Stopwords.put("true", dummy);
|
---|
405 | m_Stopwords.put("'twas", dummy);
|
---|
406 | m_Stopwords.put("'tween", dummy);
|
---|
407 | m_Stopwords.put("'twere", dummy);
|
---|
408 | m_Stopwords.put("'twill", dummy);
|
---|
409 | m_Stopwords.put("'twixt", dummy);
|
---|
410 | m_Stopwords.put("two", dummy);
|
---|
411 | m_Stopwords.put("'twould", dummy);
|
---|
412 | m_Stopwords.put("u", dummy);
|
---|
413 | m_Stopwords.put("under", dummy);
|
---|
414 | m_Stopwords.put("underneath", dummy);
|
---|
415 | m_Stopwords.put("unless", dummy);
|
---|
416 | m_Stopwords.put("unlike", dummy);
|
---|
417 | m_Stopwords.put("until", dummy);
|
---|
418 | m_Stopwords.put("unto", dummy);
|
---|
419 | m_Stopwords.put("up", dummy);
|
---|
420 | m_Stopwords.put("upon", dummy);
|
---|
421 | m_Stopwords.put("us", dummy);
|
---|
422 | m_Stopwords.put("used", dummy);
|
---|
423 | m_Stopwords.put("usually", dummy);
|
---|
424 | m_Stopwords.put("v", dummy);
|
---|
425 | m_Stopwords.put("versus", dummy);
|
---|
426 | m_Stopwords.put("very", dummy);
|
---|
427 | m_Stopwords.put("via", dummy);
|
---|
428 | m_Stopwords.put("vice", dummy);
|
---|
429 | m_Stopwords.put("vis-a-vis", dummy);
|
---|
430 | m_Stopwords.put("w", dummy);
|
---|
431 | m_Stopwords.put("wanna", dummy);
|
---|
432 | m_Stopwords.put("wanting", dummy);
|
---|
433 | m_Stopwords.put("was", dummy);
|
---|
434 | m_Stopwords.put("wasn't", dummy);
|
---|
435 | m_Stopwords.put("way", dummy);
|
---|
436 | m_Stopwords.put("we", dummy);
|
---|
437 | m_Stopwords.put("we'd", dummy);
|
---|
438 | m_Stopwords.put("well", dummy);
|
---|
439 | m_Stopwords.put("were", dummy);
|
---|
440 | m_Stopwords.put("weren't", dummy);
|
---|
441 | m_Stopwords.put("wert", dummy);
|
---|
442 | m_Stopwords.put("we've", dummy);
|
---|
443 | m_Stopwords.put("what", dummy);
|
---|
444 | m_Stopwords.put("whatever", dummy);
|
---|
445 | m_Stopwords.put("what'll", dummy);
|
---|
446 | m_Stopwords.put("what's", dummy);
|
---|
447 | m_Stopwords.put("when", dummy);
|
---|
448 | m_Stopwords.put("whencesoever", dummy);
|
---|
449 | m_Stopwords.put("whenever", dummy);
|
---|
450 | m_Stopwords.put("when's", dummy);
|
---|
451 | m_Stopwords.put("whereas", dummy);
|
---|
452 | m_Stopwords.put("where's", dummy);
|
---|
453 | m_Stopwords.put("whether", dummy);
|
---|
454 | m_Stopwords.put("which", dummy);
|
---|
455 | m_Stopwords.put("whichever", dummy);
|
---|
456 | m_Stopwords.put("whichsoever", dummy);
|
---|
457 | m_Stopwords.put("while", dummy);
|
---|
458 | m_Stopwords.put("whilst", dummy);
|
---|
459 | m_Stopwords.put("who", dummy);
|
---|
460 | m_Stopwords.put("who'd", dummy);
|
---|
461 | m_Stopwords.put("whoever", dummy);
|
---|
462 | m_Stopwords.put("whole", dummy);
|
---|
463 | m_Stopwords.put("who'll", dummy);
|
---|
464 | m_Stopwords.put("whom", dummy);
|
---|
465 | m_Stopwords.put("whore", dummy);
|
---|
466 | m_Stopwords.put("who's", dummy);
|
---|
467 | m_Stopwords.put("whose", dummy);
|
---|
468 | m_Stopwords.put("whoso", dummy);
|
---|
469 | m_Stopwords.put("whosoever", dummy);
|
---|
470 | m_Stopwords.put("will", dummy);
|
---|
471 | m_Stopwords.put("with", dummy);
|
---|
472 | m_Stopwords.put("within", dummy);
|
---|
473 | m_Stopwords.put("without", dummy);
|
---|
474 | m_Stopwords.put("wont", dummy);
|
---|
475 | m_Stopwords.put("would", dummy);
|
---|
476 | m_Stopwords.put("wouldn't", dummy);
|
---|
477 | m_Stopwords.put("wouldst", dummy);
|
---|
478 | m_Stopwords.put("x", dummy);
|
---|
479 | m_Stopwords.put("y", dummy);
|
---|
480 | m_Stopwords.put("ye", dummy);
|
---|
481 | m_Stopwords.put("yet", dummy);
|
---|
482 | m_Stopwords.put("you", dummy);
|
---|
483 | m_Stopwords.put("you'd", dummy);
|
---|
484 | m_Stopwords.put("you'll", dummy);
|
---|
485 | m_Stopwords.put("your", dummy);
|
---|
486 | m_Stopwords.put("you're", dummy);
|
---|
487 | m_Stopwords.put("yours", dummy);
|
---|
488 | m_Stopwords.put("yourself", dummy);
|
---|
489 | m_Stopwords.put("yourselves", dummy);
|
---|
490 | m_Stopwords.put("you've", dummy);
|
---|
491 | m_Stopwords.put("z", dummy);
|
---|
492 | }
|
---|
493 | }
|
---|
494 |
|
---|
495 | /**
|
---|
496 | * Returns true if the given string is a stop word.
|
---|
497 | */
|
---|
498 | public boolean isStopword(String str) {
|
---|
499 |
|
---|
500 | return m_Stopwords.containsKey(str.toLowerCase());
|
---|
501 | }
|
---|
502 | }
|
---|
503 |
|
---|
504 |
|
---|