Changeset 32485 for main/trunk

Show
Ignore:
Timestamp:
25.09.2018 10:56:42 (12 months ago)
Author:
kjdon
Message:

greenstone now uses lucene 4.7.2, so upgrading this code to match

Location:
main/trunk/greenstone3/web/sites/localsite/collect/gberg/java
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java

    r20341 r32485  
    11import org.greenstone.gsdl3.util.GSEntityResolver; 
     2import org.greenstone.LuceneWrapper4.GSLuceneUtil; 
     3import org.greenstone.LuceneWrapper4.GSLuceneConstants; 
    24 
    35import org.xml.sax.Attributes; 
     
    911import javax.xml.parsers.SAXParserFactory; 
    1012 
     13 
     14import org.apache.lucene.analysis.Analyzer; 
     15import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer; 
     16import org.apache.lucene.index.IndexWriter; 
     17import org.apache.lucene.index.IndexWriterConfig; 
     18import org.apache.lucene.store.FSDirectory; 
     19import org.apache.lucene.util.Version; 
     20 
    1121import org.apache.lucene.document.Document; 
    1222import org.apache.lucene.document.Field; 
     23import org.apache.lucene.document.StoredField; 
     24import org.apache.lucene.document.TextField; 
    1325//import org.apache.lucene.document.DateField; 
    1426import org.apache.lucene.index.IndexWriter; 
    1527import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     28import org.apache.lucene.util.Version; 
    1629//import org.apache.lucene.analysis.SimpleAnalyzer; 
    1730 
     
    4053        SAXParserFactory sax_factory = SAXParserFactory.newInstance(); 
    4154        sax_parser = sax_factory.newSAXParser(); 
    42         writer = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create); 
     55        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); 
     56        Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE); 
     57 
     58        IndexWriterConfig.OpenMode open_mode; 
    4359        if (create) { 
    44         writer.optimize(); 
    45         } 
     60          open_mode = IndexWriterConfig.OpenMode.CREATE; 
     61        } else { 
     62          open_mode = IndexWriterConfig.OpenMode.APPEND; 
     63        } 
     64        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, ltcAn); 
     65        indexWriterConfig.setOpenMode(open_mode); 
     66 
     67        FSDirectory index_fs_dir = FSDirectory.open(index_dir); 
     68        writer = new IndexWriter(index_fs_dir, indexWriterConfig); 
    4669 
    4770    } catch (Exception e) { 
     
    6891    public void finish() { 
    6992    try { 
    70         writer.optimize(); 
    7193        writer.close(); 
    7294    } catch (Exception e) {} 
     
    104126        id += "/>"; 
    105127 
     128        String value; 
    106129        if (scope.equals(qName)) { 
    107  
    108             current_doc.add(new Field("nodeID", this.file_id+"."+qName, 
    109                       Field.Store.YES,Field.Index.NO)); 
     130          value = this.file_id+"."+qName; 
    110131        } else { 
    111         current_doc.add(new Field("nodeID", this.file_id+"."+scope+"."+qName+"."+node_id, 
    112                       Field.Store.YES,Field.Index.NO)); 
    113         } 
     132          value = this.file_id+"."+scope+"."+qName+"."+node_id; 
     133        } 
     134        current_doc.add(new StoredField("nodeID", value)); 
     135                   
    114136    } 
    115137    } 
    116138    public void endElement(String uri, String localName, String qName) throws SAXException { 
    117139    if (XMLTagInfo.isIndexable(qName) && qName.equals(current_node)) { 
    118         current_doc.add(new Field("content", current_contents, 
    119                       Field.Store.NO,Field.Index.TOKENIZED)); 
     140      current_doc.add(new TextField("content", current_contents, Field.Store.NO)); 
    120141        try { 
    121142        writer.addDocument(current_doc); 
  • main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Search.java

    r5956 r32485  
    22 * search.java 
    33 * 
    4  * Created on 25 February 2003, 02:25 
    5  */ 
     4 * Simple command line program to search the lucene index. 
     5 * Run like: java -classpath $CLASSPATH:<path-to-gberg>/java Search <index directory> 
     6 * where the index directory is the index/idx folder. 
     7 * 
     8 * Copyright 2003 The New Zealand Digital Library Project 
     9 * 
     10 * A component of the Greenstone digital library software 
     11 * from the New Zealand Digital Library Project at the 
     12 * University of Waikato, New Zealand. 
     13 * 
     14 * This program is free software; you can redistribute it and/or modify 
     15 * it under the terms of the GNU General Public License as published by 
     16 * the Free Software Foundation; either version 2 of the License, or 
     17 * (at your option) any later version. 
     18 * 
     19 * This program is distributed in the hope that it will be useful, 
     20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
     21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
     22 * GNU General Public License for more details. 
     23 * 
     24 * You should have received a copy of the GNU General Public License 
     25 * along with this program; if not, write to the Free Software 
     26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
     27 * 
     28 *********************************************************************/ 
    629 
    7  
    8 /** 
    9  * 
    10  * @author  kjdon@cs.waikato.ac.nz 
    11  * @version  
    12  */ 
    1330 
    1431import java.io.IOException; 
    1532import java.io.BufferedReader; 
    1633import java.io.InputStreamReader; 
     34import java.io.File; 
    1735 
    18 import org.apache.lucene.analysis.Analyzer; 
    19 import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     36//import org.apache.lucene.analysis.Analyzer; 
     37//import org.apache.lucene.analysis.standard.StandardAnalyzer; 
    2038import org.apache.lucene.document.Document; 
    2139import org.apache.lucene.search.Searcher; 
    2240import org.apache.lucene.search.IndexSearcher; 
    2341import org.apache.lucene.search.Query; 
    24 import org.apache.lucene.search.Hits; 
     42//import org.apache.lucene.search.Hits; 
     43import org.apache.lucene.search.TopDocs; 
    2544import org.apache.lucene.queryParser.QueryParser; 
    2645import org.apache.lucene.search.TermQuery; 
    2746import org.apache.lucene.index.Term; 
     47import org.apache.lucene.store.Directory; 
     48import org.apache.lucene.store.FSDirectory; 
     49import org.apache.lucene.index.DirectoryReader; 
     50import org.apache.lucene.index.IndexReader; 
     51import org.apache.lucene.util.Version; 
    2852 
    2953public class Search  { 
     
    3660    } 
    3761        try { 
    38         Searcher searcher = new IndexSearcher(args[0]); 
    39         Analyzer analyzer = new StandardAnalyzer(); 
    4062 
    41         BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); 
    42         while (true) { 
    43         System.out.print("Query: "); 
    44         String line = in.readLine(); 
     63      Directory indexdir_dir = FSDirectory.open(new File(args[0])); 
     64      IndexReader reader = DirectoryReader.open(indexdir_dir); 
     65      IndexSearcher searcher = new IndexSearcher(reader); 
     66 
     67      BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); 
     68      System.out.println("Type .q to quit"); 
     69      while (true) { 
     70        System.out.print("Query: "); 
     71        String line = in.readLine(); 
     72        line.trim(); 
     73        if (line.equals(".q")) 
     74          break; 
    4575         
    46         if (line.length() == -1) 
    47             break; 
     76        Term term = new Term("content",line); 
    4877         
    49         Term term = new Term("content",line); 
     78        Query query = new TermQuery(term); 
     79        System.out.println("Searching for: " + query.toString("content")); 
     80        final int HITS_PER_PAGE=10; 
    5081         
    51         Query query = new TermQuery(term); 
    52         System.out.println("Searching for: " + query.toString("content")); 
     82        TopDocs hits = searcher.search(query, Integer.MAX_VALUE); 
     83        System.out.println(hits.totalHits + " total matching documents"); 
     84        for (int start = 0; start < hits.totalHits; start += HITS_PER_PAGE) { 
     85           
     86          int end = Math.min(hits.totalHits, start + HITS_PER_PAGE); 
     87          for (int i = start; i < end; i++) { 
     88        int docnum = hits.scoreDocs[i].doc; 
     89        Document doc = reader.document(docnum); 
     90        String node_id = doc.get("nodeID"); 
     91        System.out.println(i + ". ID: "+node_id); 
     92          } 
     93          if (hits.totalHits > end) { 
     94        System.out.print("more (y/n) ? "); 
     95        line = in.readLine(); 
     96        if (line.length() == 0 || line.charAt(0) == 'n') 
     97          break; 
     98          } 
     99        } 
    53100         
    54         Hits hits = searcher.search(query); 
    55         System.out.println(hits.length() + " total matching documents"); 
    56          
    57         final int HITS_PER_PAGE=10; 
    58          
    59         for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { 
    60             int end = Math.min(hits.length(), start + HITS_PER_PAGE); 
    61             for (int i = start; i < end; i++) { 
    62             Document doc = hits.doc(i); 
    63             String node_id= doc.get("nodeID"); 
    64             System.out.println(i + ". ID: "+node_id); 
    65             } 
    66              
    67             if (hits.length() > end) { 
    68             System.out.print("more (y/n) ? "); 
    69             line = in.readLine(); 
    70             if (line.length() == 0 || line.charAt(0) == 'n') 
    71                 break; 
    72             } 
    73         } 
    74          
    75         } 
     101      } 
    76102         
    77         searcher.close(); 
     103      reader.close(); 
    78104    } 
    79105    catch (Exception e) { 
    80         System.out.println(" caught a " + e.getClass() + 
    81                    "\n with message: " + e.getMessage()); 
     106      System.out.println(" caught a " + e.getClass() + 
     107                 "\n with message: " + e.getMessage()); 
    82108        } 
    83109    }