Ignore:
Timestamp:
2011-10-05T15:44:19+13:00 (13 years ago)
Author:
davidb
Message:

Restruturing of Lucene version 2.x and 3.x to make it easier to control which one is used

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/common-src/indexers/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java

    r23194 r24725  
    4545import org.apache.lucene.index.Term;
    4646import org.apache.lucene.analysis.Analyzer;
     47
     48import org.apache.lucene.store.SimpleFSDirectory;
     49import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    4750
    4851import java.util.Stack;
     
    190193    protected String file_id_ = null;
    191194
    192     static private String[] stop_words = GS2Analyzer.STOP_WORDS;
    193 
    194 
    195195    /** pass in true if want to create a new index, false if want to use the existing one */
    196196    public Indexer (String doc_tag_level, File index_dir, boolean create)
     
    206206        reader.setFeature("http://xml.org/sax/features/validation", false);
    207207
    208         analyzer_ = new GS2Analyzer(stop_words);
    209 
    210         writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create);
     208        SimpleFSDirectory index_dir_dir = new SimpleFSDirectory(new File(index_dir.getPath()));
     209
     210        analyzer_ = new GS2Analyzer(); // uses build in stop_word_set
     211
     212        writer_ = new IndexWriter(index_dir_dir, analyzer_, create, MaxFieldLength.UNLIMITED);
     213                       
    211214        // by default, will only index 10,000 words per document
    212215        // Can throw out_of_memory errors
     
    318321        //String node_id = atts.getValue("gs2:id");
    319322        //print(" " + qName + ": " + node_id + " (" + mode_ + ")" );
    320         //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.UN_TOKENIZED));
     323        //current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.NOT_ANALYZED));
    321324       
    322325        current_doc_oid_ = atts.getValue("gs2:docOID");
    323326        print(" " + qName + ": " + current_doc_oid_ + " (" + mode_ + ")" );
    324         current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.UN_TOKENIZED));
     327        current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.NOT_ANALYZED));
    325328        }
    326329       
     
    359362        if (qName.equals(indexable_current_node_))
    360363            {
    361             current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
     364            current_doc_.add(new Field(qName, current_contents_, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
    362365            // The byXX fields are used for sorting search results
    363366            // We don't want to do that for Text or AllFields fields
     
    365368            if (!qName.equals("TX") && !qName.equals("ZZ"))
    366369                {
    367                 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
     370                current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
    368371                }
    369372           
     
    472475    {
    473476        debug("GS2LuceneDelete.deleteDocument(" + doc_id + ")");
    474         debug("- Initial number of documents in index: " + writer_.docCount());
     477        debug("- Initial number of documents in index: " + writer_.numDocs());
    475478        writer_.deleteDocuments(new Term("docOID", doc_id));
    476         debug("- Final number of documents in index: " + writer_.docCount());
     479        debug("- Final number of documents in index: " + writer_.numDocs());
    477480    }
    478481
Note: See TracChangeset for help on using the changeset viewer.