- Timestamp:
- 2018-09-25T10:56:42+12:00 (6 years ago)
- Location:
- main/trunk/greenstone3/web/sites/localsite/collect/gberg/java
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java
r20341 r32485 1 1 import org.greenstone.gsdl3.util.GSEntityResolver; 2 import org.greenstone.LuceneWrapper4.GSLuceneUtil; 3 import org.greenstone.LuceneWrapper4.GSLuceneConstants; 2 4 3 5 import org.xml.sax.Attributes; … … 9 11 import javax.xml.parsers.SAXParserFactory; 10 12 13 14 import org.apache.lucene.analysis.Analyzer; 15 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; 16 import org.apache.lucene.index.IndexWriter; 17 import org.apache.lucene.index.IndexWriterConfig; 18 import org.apache.lucene.store.FSDirectory; 19 import org.apache.lucene.util.Version; 20 11 21 import org.apache.lucene.document.Document; 12 22 import org.apache.lucene.document.Field; 23 import org.apache.lucene.document.StoredField; 24 import org.apache.lucene.document.TextField; 13 25 //import org.apache.lucene.document.DateField; 14 26 import org.apache.lucene.index.IndexWriter; 15 27 import org.apache.lucene.analysis.standard.StandardAnalyzer; 28 import org.apache.lucene.util.Version; 16 29 //import org.apache.lucene.analysis.SimpleAnalyzer; 17 30 … … 40 53 SAXParserFactory sax_factory = SAXParserFactory.newInstance(); 41 54 sax_parser = sax_factory.newSAXParser(); 42 writer = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create); 55 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); 56 Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE); 57 58 IndexWriterConfig.OpenMode open_mode; 43 59 if (create) { 44 writer.optimize(); 45 } 60 open_mode = IndexWriterConfig.OpenMode.CREATE; 61 } else { 62 open_mode = IndexWriterConfig.OpenMode.APPEND; 63 } 64 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, ltcAn); 65 indexWriterConfig.setOpenMode(open_mode); 66 67 FSDirectory index_fs_dir = FSDirectory.open(index_dir); 68 writer = new IndexWriter(index_fs_dir, indexWriterConfig); 46 69 47 70 } catch (Exception e) { … … 68 91 public void finish() { 69 92 try { 70 writer.optimize();71 93 writer.close(); 72 94 } catch (Exception e) {} … … 104 126 id += "/>"; 105 127 128 String value; 106 129 if (scope.equals(qName)) { 107 108 current_doc.add(new Field("nodeID", this.file_id+"."+qName, 109 Field.Store.YES,Field.Index.NO)); 130 value = this.file_id+"."+qName; 110 131 } else { 111 current_doc.add(new Field("nodeID", this.file_id+"."+scope+"."+qName+"."+node_id, 112 Field.Store.YES,Field.Index.NO)); 113 } 132 value = this.file_id+"."+scope+"."+qName+"."+node_id; 133 } 134 current_doc.add(new StoredField("nodeID", value)); 135 114 136 } 115 137 } 116 138 public void endElement(String uri, String localName, String qName) throws SAXException { 117 139 if (XMLTagInfo.isIndexable(qName) && qName.equals(current_node)) { 118 current_doc.add(new Field("content", current_contents, 119 Field.Store.NO,Field.Index.TOKENIZED)); 140 current_doc.add(new TextField("content", current_contents, Field.Store.NO)); 120 141 try { 121 142 writer.addDocument(current_doc); -
main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Search.java
r5956 r32485 2 2 * search.java 3 3 * 4 * Created on 25 February 2003, 02:25 5 */ 4 * Simple command line program to search the lucene index. 5 * Run like: java -classpath $CLASSPATH:<path-to-gberg>/java Search <index directory> 6 * where the index directory is the index/idx folder. 7 * 8 * Copyright 2003 The New Zealand Digital Library Project 9 * 10 * A component of the Greenstone digital library software 11 * from the New Zealand Digital Library Project at the 12 * University of Waikato, New Zealand. 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU General Public License as published by 16 * the Free Software Foundation; either version 2 of the License, or 17 * (at your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU General Public License 25 * along with this program; if not, write to the Free Software 26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 27 * 28 *********************************************************************/ 6 29 7 8 /**9 *10 * @author [email protected]11 * @version12 */13 30 14 31 import java.io.IOException; 15 32 import java.io.BufferedReader; 16 33 import java.io.InputStreamReader; 34 import java.io.File; 17 35 18 import org.apache.lucene.analysis.Analyzer;19 import org.apache.lucene.analysis.standard.StandardAnalyzer;36 //import org.apache.lucene.analysis.Analyzer; 37 //import org.apache.lucene.analysis.standard.StandardAnalyzer; 20 38 import org.apache.lucene.document.Document; 21 39 import org.apache.lucene.search.Searcher; 22 40 import org.apache.lucene.search.IndexSearcher; 23 41 import org.apache.lucene.search.Query; 24 import org.apache.lucene.search.Hits; 42 //import org.apache.lucene.search.Hits; 43 import org.apache.lucene.search.TopDocs; 25 44 import org.apache.lucene.queryParser.QueryParser; 26 45 import org.apache.lucene.search.TermQuery; 27 46 import org.apache.lucene.index.Term; 47 import org.apache.lucene.store.Directory; 48 import org.apache.lucene.store.FSDirectory; 49 import org.apache.lucene.index.DirectoryReader; 50 import org.apache.lucene.index.IndexReader; 51 import org.apache.lucene.util.Version; 28 52 29 53 public class Search { … … 36 60 } 37 61 try { 38 Searcher searcher = new IndexSearcher(args[0]);39 Analyzer analyzer = new StandardAnalyzer();40 62 41 BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); 42 while (true) { 43 System.out.print("Query: "); 44 String line = in.readLine(); 63 Directory indexdir_dir = FSDirectory.open(new File(args[0])); 64 IndexReader reader = DirectoryReader.open(indexdir_dir); 65 IndexSearcher searcher = new IndexSearcher(reader); 66 67 BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); 68 System.out.println("Type .q to quit"); 69 while (true) { 70 System.out.print("Query: "); 71 String line = in.readLine(); 72 line.trim(); 73 if (line.equals(".q")) 74 break; 45 75 46 if (line.length() == -1) 47 break; 76 Term term = new Term("content",line); 48 77 49 Term term = new Term("content",line); 78 Query query = new TermQuery(term); 79 System.out.println("Searching for: " + query.toString("content")); 80 final int HITS_PER_PAGE=10; 50 81 51 Query query = new TermQuery(term); 52 System.out.println("Searching for: " + query.toString("content")); 82 TopDocs hits = searcher.search(query, Integer.MAX_VALUE); 83 System.out.println(hits.totalHits + " total matching documents"); 84 for (int start = 0; start < hits.totalHits; start += HITS_PER_PAGE) { 85 86 int end = Math.min(hits.totalHits, start + HITS_PER_PAGE); 87 for (int i = start; i < end; i++) { 88 int docnum = hits.scoreDocs[i].doc; 89 Document doc = reader.document(docnum); 90 String node_id = doc.get("nodeID"); 91 System.out.println(i + ". ID: "+node_id); 92 } 93 if (hits.totalHits > end) { 94 System.out.print("more (y/n) ? "); 95 line = in.readLine(); 96 if (line.length() == 0 || line.charAt(0) == 'n') 97 break; 98 } 99 } 53 100 54 Hits hits = searcher.search(query); 55 System.out.println(hits.length() + " total matching documents"); 56 57 final int HITS_PER_PAGE=10; 58 59 for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { 60 int end = Math.min(hits.length(), start + HITS_PER_PAGE); 61 for (int i = start; i < end; i++) { 62 Document doc = hits.doc(i); 63 String node_id= doc.get("nodeID"); 64 System.out.println(i + ". ID: "+node_id); 65 } 66 67 if (hits.length() > end) { 68 System.out.print("more (y/n) ? "); 69 line = in.readLine(); 70 if (line.length() == 0 || line.charAt(0) == 'n') 71 break; 72 } 73 } 74 75 } 101 } 76 102 77 searcher.close();103 reader.close(); 78 104 } 79 105 catch (Exception e) { 80 81 106 System.out.println(" caught a " + e.getClass() + 107 "\n with message: " + e.getMessage()); 82 108 } 83 109 }
Note:
See TracChangeset
for help on using the changeset viewer.