- Timestamp:
- 2018-09-25T10:56:42+12:00 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java
r20341 r32485 1 1 import org.greenstone.gsdl3.util.GSEntityResolver; 2 import org.greenstone.LuceneWrapper4.GSLuceneUtil; 3 import org.greenstone.LuceneWrapper4.GSLuceneConstants; 2 4 3 5 import org.xml.sax.Attributes; … … 9 11 import javax.xml.parsers.SAXParserFactory; 10 12 13 14 import org.apache.lucene.analysis.Analyzer; 15 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; 16 import org.apache.lucene.index.IndexWriter; 17 import org.apache.lucene.index.IndexWriterConfig; 18 import org.apache.lucene.store.FSDirectory; 19 import org.apache.lucene.util.Version; 20 11 21 import org.apache.lucene.document.Document; 12 22 import org.apache.lucene.document.Field; 23 import org.apache.lucene.document.StoredField; 24 import org.apache.lucene.document.TextField; 13 25 //import org.apache.lucene.document.DateField; 14 26 import org.apache.lucene.index.IndexWriter; 15 27 import org.apache.lucene.analysis.standard.StandardAnalyzer; 28 import org.apache.lucene.util.Version; 16 29 //import org.apache.lucene.analysis.SimpleAnalyzer; 17 30 … … 40 53 SAXParserFactory sax_factory = SAXParserFactory.newInstance(); 41 54 sax_parser = sax_factory.newSAXParser(); 42 writer = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create); 55 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); 56 Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE); 57 58 IndexWriterConfig.OpenMode open_mode; 43 59 if (create) { 44 writer.optimize(); 45 } 60 open_mode = IndexWriterConfig.OpenMode.CREATE; 61 } else { 62 open_mode = IndexWriterConfig.OpenMode.APPEND; 63 } 64 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, ltcAn); 65 indexWriterConfig.setOpenMode(open_mode); 66 67 FSDirectory index_fs_dir = FSDirectory.open(index_dir); 68 writer = new IndexWriter(index_fs_dir, indexWriterConfig); 46 69 47 70 } catch (Exception e) { … … 68 91 public void finish() { 69 92 try { 70 writer.optimize();71 93 writer.close(); 72 94 } catch (Exception e) {} … … 104 126 id += "/>"; 105 127 128 String value; 106 129 if (scope.equals(qName)) { 107 108 current_doc.add(new Field("nodeID", this.file_id+"."+qName, 109 Field.Store.YES,Field.Index.NO)); 130 value = this.file_id+"."+qName; 110 131 } else { 111 current_doc.add(new Field("nodeID", this.file_id+"."+scope+"."+qName+"."+node_id, 112 Field.Store.YES,Field.Index.NO)); 113 } 132 value = this.file_id+"."+scope+"."+qName+"."+node_id; 133 } 134 current_doc.add(new StoredField("nodeID", value)); 135 114 136 } 115 137 } 116 138 public void endElement(String uri, String localName, String qName) throws SAXException { 117 139 if (XMLTagInfo.isIndexable(qName) && qName.equals(current_node)) { 118 current_doc.add(new Field("content", current_contents, 119 Field.Store.NO,Field.Index.TOKENIZED)); 140 current_doc.add(new TextField("content", current_contents, Field.Store.NO)); 120 141 try { 121 142 writer.addDocument(current_doc);
Note:
See TracChangeset
for help on using the changeset viewer.