Changeset 24731 for main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java
- Timestamp:
- 2011-10-07T11:36:07+13:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java
r24725 r24731 45 45 import org.apache.lucene.index.Term; 46 46 import org.apache.lucene.analysis.Analyzer; 47 48 import org.apache.lucene.store.SimpleFSDirectory;49 import org.apache.lucene.index.IndexWriter.MaxFieldLength;50 47 51 48 import java.util.Stack; … … 193 190 protected String file_id_ = null; 194 191 192 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 193 194 195 195 /** pass in true if want to create a new index, false if want to use the existing one */ 196 196 public Indexer (String doc_tag_level, File index_dir, boolean create) … … 206 206 reader.setFeature("http://xml.org/sax/features/validation", false); 207 207 208 SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath())); 209 210 analyzer_ = new GS2Analyzer(); // uses build in stop_word_set 211 212 writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED); 213 208 analyzer_ = new GS2Analyzer(stop_words); 209 210 writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 214 211 // by default, will only index 10,000 words per document 215 212 // Can throw out_of_memory errors … … 321 318 //String node_id = atts.getValue("gs2:id"); 322 319 //print(" " + qName + ": " + node_id + " (" + mode_ + ")" ); 323 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index. NOT_ANALYZED));320 //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED)); 324 321 325 322 current_doc_oid_ = atts.getValue("gs2:docOID"); 326 323 print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" ); 327 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index. NOT_ANALYZED));324 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED)); 328 325 } 329 326 … … 362 359 if (qName.equals(indexable_current_node_)) 363 360 { 364 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index. ANALYZED, Field.TermVector.YES));361 current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES)); 365 362 // The byXX fields are used for sorting search results 366 363 // We don't want to do that for Text or AllFields fields … … 368 365 if (!qName.equals("TX") && !qName.equals("ZZ")) 369 366 { 370 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index. NOT_ANALYZED, Field.TermVector.NO));367 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO)); 371 368 } 372 369 … … 475 472 { 476 473 debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")"); 477 debug("- Initial number of documents in index: " + writer_. numDocs());474 debug("- Initial number of documents in index: " + writer_.docCount()); 478 475 writer_.deleteDocuments(new Term("docOID", doc_id)); 479 debug("- Final number of documents in index: " + writer_. numDocs());476 debug("- Final number of documents in index: " + writer_.docCount()); 480 477 } 481 478
Note:
See TracChangeset
for help on using the changeset viewer.