Changeset 17804 for indexers/trunk/lucene-gs
- Timestamp:
- 2008-11-10T20:46:44+13:00 (14 years ago)
- Location:
- indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneIndexer.java
r16583 r17804 44 44 import org.apache.lucene.index.IndexWriter; 45 45 import org.apache.lucene.index.Term; 46 import org.apache.lucene.analysis. standard.StandardAnalyzer;46 import org.apache.lucene.analysis.Analyzer; 47 47 48 48 import java.util.Stack; … … 61 61 public static void main (String args[]) throws Exception 62 62 { 63 64 63 int verbosity = 1; 65 64 // Default is to edit the existing index … … 150 149 { 151 150 IndexWriter writer_ = null; 151 Analyzer analyzer_ = null; 152 152 SAXParser sax_parser_ = null; 153 153 String doc_tag_level_ = null; … … 164 164 protected String file_id_ = null; 165 165 166 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 167 166 168 /** pass in true if want to create a new index, false if want to use the existing one */ 167 169 public Indexer (String doc_tag_level, File index_dir, boolean create) … … 177 179 reader.setFeature("http://xml.org/sax/features/validation", false); 178 180 179 writer_ = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create); 181 analyzer_ = new GS2Analyzer(stop_words); 182 183 writer_ = new IndexWriter(index_dir.getPath(), analyzer_, create); 180 184 // by default, will only index 10,000 words per document 181 185 // Can throw out_of_memory errors … … 267 271 String node_id = atts.getValue("gs2:id"); 268 272 print(" " + qName + ": " + node_id ); 269 current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index. UN_TOKENIZED));273 current_doc_.add(new Field("nodeID", node_id, Field.Store.YES, Field.Index.TOKENIZED)); 270 274 271 275 current_doc_oid_ = atts.getValue("gs2:docOID"); 272 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index. UN_TOKENIZED));276 current_doc_.add(new Field("docOID", current_doc_oid_, Field.Store.YES, Field.Index.TOKENIZED)); 273 277 } 274 278 … … 302 306 if (!qName.equals("TX")) 303 307 { 304 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index. UN_TOKENIZED, Field.TermVector.NO));308 current_doc_.add(new Field("by" + qName, current_contents_, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO)); 305 309 } 306 310 … … 310 314 if (qName.equals(doc_tag_level_)) { 311 315 try { 312 writer_.updateDocument(new Term("docOID", current_doc_oid_), current_doc_ );316 writer_.updateDocument(new Term("docOID", current_doc_oid_), current_doc_, analyzer_); 313 317 } 314 318 catch (java.io.IOException e) { -
indexers/trunk/lucene-gs/src/org/greenstone/LuceneWrapper/GS2LuceneQuery.java
r16947 r17804 59 59 60 60 // Use the standard set of English stop words by default 61 static private String[] stop_words = StandardAnalyzer.STOP_WORDS;61 static private String[] stop_words = GS2Analyzer.STOP_WORDS; 62 62 63 63 private String full_indexdir=""; … … 94 94 // Create one query parser with the standard set of stop words, and one with none 95 95 96 query_parser = new QueryParser(TEXTFIELD, new StandardAnalyzer(stop_words));97 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new StandardAnalyzer(new String[] { }));96 query_parser = new QueryParser(TEXTFIELD, new GS2Analyzer(stop_words)); 97 query_parser_no_stop_words = new QueryParser(TEXTFIELD, new GS2Analyzer(new String[] { })); 98 98 } 99 99 … … 134 134 query_including_stop_words = query_including_stop_words.rewrite(reader); 135 135 136 // System.err.println("********* query_string " + query_string + "****"); 137 136 138 Query query = parseQuery(reader, query_parser, query_string, fuzziness); 137 139 query = query.rewrite(reader);
Note:
See TracChangeset
for help on using the changeset viewer.