source: gs3-extensions/solr/trunk/src/src/java/org/greenstone/LuceneWrapper/GS2Analyzer.java@ 24641

Last change on this file since 24641 was 24641, checked in by davidb, 13 years ago

Initial cut at Greenstone3 runtime code to support Solr. Solr code based on version 3.3, so this also include an upgraded version of the LuceneWrapper code (gs2build/common-src/indexers/lucene-gs) that works with this version of the support jar files

  • Property svn:executable set to *
File size: 2.5 KB
Line 
1/**********************************************************************
2 *
3 * GS2Analyzer.java
4 *
5 * Copyright 2004 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26package org.greenstone.LuceneWrapper;
27
28
29import java.io.*;
30import java.util.Set;
31
32import org.apache.lucene.analysis.*;
33import org.apache.lucene.analysis.standard.*;
34
35import org.apache.lucene.analysis.ASCIIFoldingFilter;
36
37import org.apache.lucene.util.Version;
38
39
40class GS2Analyzer extends GS2StandardAnalyzer
41{
42
43 static Version matchVersion = Version.LUCENE_24;
44
45
46 public GS2Analyzer()
47 {
48 super(matchVersion);
49 }
50
51
52 public GS2Analyzer(Set stopWords)
53 {
54 super(matchVersion,stopWords);
55 }
56
57
58 public GS2Analyzer(String [] stopwords)
59 {
60 super(matchVersion,StopFilter.makeStopSet(stopwords));
61 }
62
63 @Override
64 protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
65 final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
66 src.setMaxTokenLength(maxTokenLength);
67 src.setReplaceInvalidAcronym(replaceInvalidAcronym);
68 TokenStream tok = new StandardFilter(matchVersion, src);
69 tok = new LowerCaseFilter(matchVersion, tok);
70 tok = new StopFilter(matchVersion, tok, stopwords);
71
72 // top it up with accent folding
73 tok = new ASCIIFoldingFilter(tok);
74
75 return new TokenStreamComponents(src, tok) {
76 @Override
77 protected boolean reset(final Reader reader) throws IOException {
78 src.setMaxTokenLength(GS2Analyzer.this.maxTokenLength);
79 return super.reset(reader);
80 }
81 };
82 }
83
84}
85
86
Note: See TracBrowser for help on using the repository browser.