source: main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/lucene-version-3.3/GS2Analyzer.java@ 24716

Last change on this file since 24716 was 24716, checked in by davidb, 13 years ago

A version of the LuceneWrapper code that works with Lucene version 3.3

  • Property svn:executable set to *
File size: 2.5 KB
Line 
1/**********************************************************************
2 *
3 * GS2Analyzer.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26package org.greenstone.LuceneWrapper;
27
28
29import java.io.*;
30import java.util.Set;
31
32import org.apache.lucene.analysis.*;
33import org.apache.lucene.analysis.standard.*;
34
35import org.apache.lucene.analysis.ASCIIFoldingFilter;
36
37import org.apache.lucene.util.Version;
38
39
40class GS2Analyzer extends GS2StandardAnalyzer
41{
42
43 static Version matchVersion = Version.LUCENE_24;
44
45
46 public GS2Analyzer()
47 {
48 super(matchVersion);
49 }
50
51
52 public GS2Analyzer(Set stopWords)
53 {
54 super(matchVersion,stopWords);
55 }
56
57
58 public GS2Analyzer(String [] stopwords)
59 {
60 super(matchVersion,StopFilter.makeStopSet(stopwords));
61 }
62
63 @Override
64 protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
65 final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
66 src.setMaxTokenLength(maxTokenLength);
67 src.setReplaceInvalidAcronym(replaceInvalidAcronym);
68 TokenStream tok = new StandardFilter(matchVersion, src);
69 tok = new LowerCaseFilter(matchVersion, tok);
70 tok = new StopFilter(matchVersion, tok, stopwords);
71
72 // top it up with accent folding
73 tok = new ASCIIFoldingFilter(tok);
74
75 return new TokenStreamComponents(src, tok) {
76 @Override
77 protected boolean reset(final Reader reader) throws IOException {
78 src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
79 return super.reset(reader);
80 }
81 };
82 }
83
84}
85
86
Note: See TracBrowser for help on using the repository browser.