Changeset 32485


Ignore:
Timestamp:
2018-09-25T10:56:42+12:00 (3 years ago)
Author:
kjdon
Message:

greenstone now uses lucene 4.7.2, so upgrading this code to match

Location:
main/trunk/greenstone3/web/sites/localsite/collect/gberg/java
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java

    r20341 r32485  
    11import org.greenstone.gsdl3.util.GSEntityResolver;
     2import org.greenstone.LuceneWrapper4.GSLuceneUtil;
     3import org.greenstone.LuceneWrapper4.GSLuceneConstants;
    24
    35import org.xml.sax.Attributes;
     
    911import javax.xml.parsers.SAXParserFactory;
    1012
     13
     14import org.apache.lucene.analysis.Analyzer;
     15import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
     16import org.apache.lucene.index.IndexWriter;
     17import org.apache.lucene.index.IndexWriterConfig;
     18import org.apache.lucene.store.FSDirectory;
     19import org.apache.lucene.util.Version;
     20
    1121import org.apache.lucene.document.Document;
    1222import org.apache.lucene.document.Field;
     23import org.apache.lucene.document.StoredField;
     24import org.apache.lucene.document.TextField;
    1325//import org.apache.lucene.document.DateField;
    1426import org.apache.lucene.index.IndexWriter;
    1527import org.apache.lucene.analysis.standard.StandardAnalyzer;
     28import org.apache.lucene.util.Version;
    1629//import org.apache.lucene.analysis.SimpleAnalyzer;
    1730
     
    4053        SAXParserFactory sax_factory = SAXParserFactory.newInstance();
    4154        sax_parser = sax_factory.newSAXParser();
    42         writer = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create);
     55        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
     56        Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE);
     57
     58        IndexWriterConfig.OpenMode open_mode;
    4359        if (create) {
    44         writer.optimize();
    45         }
     60          open_mode = IndexWriterConfig.OpenMode.CREATE;
     61        } else {
     62          open_mode = IndexWriterConfig.OpenMode.APPEND;
     63        }
     64        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, ltcAn);
     65        indexWriterConfig.setOpenMode(open_mode);
     66
     67        FSDirectory index_fs_dir = FSDirectory.open(index_dir);
     68        writer = new IndexWriter(index_fs_dir, indexWriterConfig);
    4669
    4770    } catch (Exception e) {
     
    6891    public void finish() {
    6992    try {
    70         writer.optimize();
    7193        writer.close();
    7294    } catch (Exception e) {}
     
    104126        id += "/>";
    105127
     128        String value;
    106129        if (scope.equals(qName)) {
    107 
    108             current_doc.add(new Field("nodeID", this.file_id+"."+qName,
    109                       Field.Store.YES,Field.Index.NO));
     130          value = this.file_id+"."+qName;
    110131        } else {
    111         current_doc.add(new Field("nodeID", this.file_id+"."+scope+"."+qName+"."+node_id,
    112                       Field.Store.YES,Field.Index.NO));
    113         }
     132          value = this.file_id+"."+scope+"."+qName+"."+node_id;
     133        }
     134        current_doc.add(new StoredField("nodeID", value));
     135                 
    114136    }
    115137    }
    116138    public void endElement(String uri, String localName, String qName) throws SAXException {
    117139    if (XMLTagInfo.isIndexable(qName) && qName.equals(current_node)) {
    118         current_doc.add(new Field("content", current_contents,
    119                       Field.Store.NO,Field.Index.TOKENIZED));
     140      current_doc.add(new TextField("content", current_contents, Field.Store.NO));
    120141        try {
    121142        writer.addDocument(current_doc);
  • main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Search.java

    r5956 r32485  
    22 * search.java
    33 *
    4  * Created on 25 February 2003, 02:25
    5  */
     4 * Simple command line program to search the lucene index.
     5 * Run like: java -classpath $CLASSPATH:<path-to-gberg>/java Search <index directory>
     6 * where the index directory is the index/idx folder.
     7 *
     8 * Copyright 2003 The New Zealand Digital Library Project
     9 *
     10 * A component of the Greenstone digital library software
     11 * from the New Zealand Digital Library Project at the
     12 * University of Waikato, New Zealand.
     13 *
     14 * This program is free software; you can redistribute it and/or modify
     15 * it under the terms of the GNU General Public License as published by
     16 * the Free Software Foundation; either version 2 of the License, or
     17 * (at your option) any later version.
     18 *
     19 * This program is distributed in the hope that it will be useful,
     20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     22 * GNU General Public License for more details.
     23 *
     24 * You should have received a copy of the GNU General Public License
     25 * along with this program; if not, write to the Free Software
     26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     27 *
     28 *********************************************************************/
    629
    7 
    8 /**
    9  *
    10  * @author  kjdon@cs.waikato.ac.nz
    11  * @version
    12  */
    1330
    1431import java.io.IOException;
    1532import java.io.BufferedReader;
    1633import java.io.InputStreamReader;
     34import java.io.File;
    1735
    18 import org.apache.lucene.analysis.Analyzer;
    19 import org.apache.lucene.analysis.standard.StandardAnalyzer;
     36//import org.apache.lucene.analysis.Analyzer;
     37//import org.apache.lucene.analysis.standard.StandardAnalyzer;
    2038import org.apache.lucene.document.Document;
    2139import org.apache.lucene.search.Searcher;
    2240import org.apache.lucene.search.IndexSearcher;
    2341import org.apache.lucene.search.Query;
    24 import org.apache.lucene.search.Hits;
     42//import org.apache.lucene.search.Hits;
     43import org.apache.lucene.search.TopDocs;
    2544import org.apache.lucene.queryParser.QueryParser;
    2645import org.apache.lucene.search.TermQuery;
    2746import org.apache.lucene.index.Term;
     47import org.apache.lucene.store.Directory;
     48import org.apache.lucene.store.FSDirectory;
     49import org.apache.lucene.index.DirectoryReader;
     50import org.apache.lucene.index.IndexReader;
     51import org.apache.lucene.util.Version;
    2852
    2953public class Search  {
     
    3660    }
    3761        try {
    38         Searcher searcher = new IndexSearcher(args[0]);
    39         Analyzer analyzer = new StandardAnalyzer();
    4062
    41         BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
    42         while (true) {
    43         System.out.print("Query: ");
    44         String line = in.readLine();
     63      Directory indexdir_dir = FSDirectory.open(new File(args[0]));
     64      IndexReader reader = DirectoryReader.open(indexdir_dir);
     65      IndexSearcher searcher = new IndexSearcher(reader);
     66
     67      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     68      System.out.println("Type .q to quit");
     69      while (true) {
     70        System.out.print("Query: ");
     71        String line = in.readLine();
     72        line.trim();
     73        if (line.equals(".q"))
     74          break;
    4575       
    46         if (line.length() == -1)
    47             break;
     76        Term term = new Term("content",line);
    4877       
    49         Term term = new Term("content",line);
     78        Query query = new TermQuery(term);
     79        System.out.println("Searching for: " + query.toString("content"));
     80        final int HITS_PER_PAGE=10;
    5081       
    51         Query query = new TermQuery(term);
    52         System.out.println("Searching for: " + query.toString("content"));
     82        TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
     83        System.out.println(hits.totalHits + " total matching documents");
     84        for (int start = 0; start < hits.totalHits; start += HITS_PER_PAGE) {
     85         
     86          int end = Math.min(hits.totalHits, start + HITS_PER_PAGE);
     87          for (int i = start; i < end; i++) {
     88        int docnum = hits.scoreDocs[i].doc;
     89        Document doc = reader.document(docnum);
     90        String node_id = doc.get("nodeID");
     91        System.out.println(i + ". ID: "+node_id);
     92          }
     93          if (hits.totalHits > end) {
     94        System.out.print("more (y/n) ? ");
     95        line = in.readLine();
     96        if (line.length() == 0 || line.charAt(0) == 'n')
     97          break;
     98          }
     99        }
    53100       
    54         Hits hits = searcher.search(query);
    55         System.out.println(hits.length() + " total matching documents");
    56        
    57         final int HITS_PER_PAGE=10;
    58        
    59         for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
    60             int end = Math.min(hits.length(), start + HITS_PER_PAGE);
    61             for (int i = start; i < end; i++) {
    62             Document doc = hits.doc(i);
    63             String node_id= doc.get("nodeID");
    64             System.out.println(i + ". ID: "+node_id);
    65             }
    66            
    67             if (hits.length() > end) {
    68             System.out.print("more (y/n) ? ");
    69             line = in.readLine();
    70             if (line.length() == 0 || line.charAt(0) == 'n')
    71                 break;
    72             }
    73         }
    74        
    75         }
     101      }
    76102       
    77         searcher.close();
     103      reader.close();
    78104    }
    79105    catch (Exception e) {
    80         System.out.println(" caught a " + e.getClass() +
    81                    "\n with message: " + e.getMessage());
     106      System.out.println(" caught a " + e.getClass() +
     107                 "\n with message: " + e.getMessage());
    82108        }
    83109    }
Note: See TracChangeset for help on using the changeset viewer.