[29148] | 1 | package org.greenstone.LuceneWrapper4;
|
---|
| 2 |
|
---|
| 3 | import java.io.File;
|
---|
| 4 | import java.io.IOException;
|
---|
| 5 |
|
---|
| 6 | import org.apache.lucene.analysis.Analyzer;
|
---|
| 7 | //import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
|
---|
| 8 | import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
|
---|
| 9 | import org.apache.lucene.index.IndexWriter;
|
---|
| 10 | import org.apache.lucene.index.IndexWriterConfig;
|
---|
| 11 | import org.apache.lucene.store.FSDirectory; //import org.apache.lucene.store.SimpleFSDirectory;
|
---|
| 12 | import org.apache.lucene.util.Version;
|
---|
| 13 | //import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
---|
| 14 |
|
---|
| 15 |
|
---|
| 16 | /**
|
---|
| 17 | * Commonly used static functions, saves some repetition.
|
---|
| 18 | * The OpenMode flags CREATE, APPEND, or CREATE_OR_APPEND passed around in each case are to keep
|
---|
| 19 | * the behaviour in the Lucene 4.7.2. upgrade similar to the default behaviour in Lucene 3.3.
|
---|
| 20 | */
|
---|
| 21 | public final class GSLuceneUtil {
|
---|
| 22 |
|
---|
| 23 | /**
|
---|
| 24 | * Having a private constructor prevents instantiation and this being a final class prevents subclassing,
|
---|
| 25 | * indicating that this is a class with purely static constants (and/or methods).
|
---|
| 26 | * See http://stackoverflow.com/questions/320588/interfaces-with-static-fields-in-java-for-sharing-constants
|
---|
| 27 | */
|
---|
| 28 | private GSLuceneUtil() {}
|
---|
| 29 |
|
---|
| 30 | public static IndexWriter getIndexWriter(String index_path)
|
---|
| 31 | throws IOException
|
---|
| 32 | {
|
---|
| 33 | return GSLuceneUtil.getIndexWriter(index_path, new GS2Analyzer(), IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
|
---|
| 34 | }
|
---|
| 35 |
|
---|
| 36 | public static IndexWriter getIndexWriter(String index_path, boolean create)
|
---|
| 37 | throws IOException
|
---|
| 38 | {
|
---|
| 39 | if(create) {
|
---|
| 40 | return GSLuceneUtil.getIndexWriter(index_path, new GS2Analyzer(), IndexWriterConfig.OpenMode.CREATE);
|
---|
| 41 | } else {
|
---|
| 42 | return GSLuceneUtil.getIndexWriter(index_path, new GS2Analyzer(), IndexWriterConfig.OpenMode.APPEND);
|
---|
| 43 | }
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | public static IndexWriter getIndexWriter(String index_path, Analyzer analyzer, boolean create)
|
---|
| 47 | throws IOException
|
---|
| 48 | {
|
---|
| 49 | if(create) {
|
---|
| 50 | return GSLuceneUtil.getIndexWriter(index_path, analyzer, IndexWriterConfig.OpenMode.CREATE);
|
---|
| 51 | } else {
|
---|
| 52 | return GSLuceneUtil.getIndexWriter(index_path, analyzer, IndexWriterConfig.OpenMode.APPEND);
|
---|
| 53 | }
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | public static IndexWriter getIndexWriter(String index_path, Analyzer analyzer,
|
---|
| 57 | IndexWriterConfig.OpenMode openMode)
|
---|
| 58 | throws IOException
|
---|
| 59 | {
|
---|
| 60 | IndexWriter index_writer = null;
|
---|
| 61 |
|
---|
| 62 | //SimpleFSDirectory index_path_dir = new SimpleFSDirectory(new File(index_path));
|
---|
| 63 | FSDirectory index_path_dir = FSDirectory.open(new File(index_path));
|
---|
| 64 |
|
---|
| 65 |
|
---|
| 66 | //if(GSLuceneConstants.MATCH_VERSION.compareTo(Version.LUCENE_36) < 0) {
|
---|
| 67 | // index_writer = new IndexWriter(index_path_dir, new GS2Analyzer(),
|
---|
| 68 | // org.apache.lucene.index.IndexWriter.MaxFieldLength.UNLIMITED);
|
---|
| 69 | //} else { // lucene 3.6 or greater
|
---|
| 70 |
|
---|
| 71 | //index_writer = new IndexWriter(index_path_dir, new IndexWriterConfig(GS_LUCENE_VERSION, new GS2Analyzer()));
|
---|
| 72 |
|
---|
| 73 | // see p.10 and 11 of http://alias-i.com/lingpipe-book/lucene-3-tutorial-0.5.pdf
|
---|
| 74 | // "In order to index all the text in a field, however long that field may be,
|
---|
| 75 | // we need to wrap the StandardAnalyzer (here GS2Analyzer) in a LimitTokenCountAnalyzer
|
---|
| 76 | // We set the maximum field length to Integer.MAX_VALUE, the largest possible value available."
|
---|
| 77 | Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE);
|
---|
| 78 | IndexWriterConfig indexWriterConfig = new IndexWriterConfig(GSLuceneConstants.MATCH_VERSION, ltcAn);
|
---|
| 79 | if(openMode == null) {
|
---|
| 80 | indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
|
---|
| 81 | } else {
|
---|
| 82 | indexWriterConfig.setOpenMode(openMode);
|
---|
| 83 | }
|
---|
| 84 | // setOpenMode() only takes effect when IndexWriter is first created.
|
---|
| 85 | index_writer = new IndexWriter(index_path_dir, indexWriterConfig);
|
---|
| 86 |
|
---|
| 87 | //}
|
---|
| 88 |
|
---|
| 89 | return index_writer;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | } |
---|