Context Navigation

← Previous Change
Next Change →

gberg

Timestamp:

2018-09-25T10:56:42+12:00 (6 years ago)

Author:

kjdon

Message:

greenstone now uses lucene 4.7.2, so upgrading this code to match

Location:

main/trunk/greenstone3/web/sites/localsite/collect/gberg/java

Files:

: 2 edited

Indexer.java (modified) (5 diffs)
Search.java (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java

-              r20341
+              r32485
 import org.greenstone.gsdl3.util.GSEntityResolver;
+import org.greenstone.LuceneWrapper4.GSLuceneUtil;
+import org.greenstone.LuceneWrapper4.GSLuceneConstants;
 import org.xml.sax.Attributes;
 …
 import javax.xml.parsers.SAXParserFactory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.TextField;
 //import org.apache.lucene.document.DateField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.util.Version;
 //import org.apache.lucene.analysis.SimpleAnalyzer;
 …
         SAXParserFactory sax_factory = SAXParserFactory.newInstance();
         sax_parser = sax_factory.newSAXParser();
+        writer = new IndexWriter(index_dir.getPath(), new StandardAnalyzer(), create);
+        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
+        Analyzer ltcAn = new LimitTokenCountAnalyzer(analyzer,Integer.MAX_VALUE);
+        IndexWriterConfig.OpenMode open_mode;
         if (create) {
+        writer.optimize();
+        }
+          open_mode = IndexWriterConfig.OpenMode.CREATE;
+        } else {
+          open_mode = IndexWriterConfig.OpenMode.APPEND;
+        }
+        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, ltcAn);
+        indexWriterConfig.setOpenMode(open_mode);
+        FSDirectory index_fs_dir = FSDirectory.open(index_dir);
+        writer = new IndexWriter(index_fs_dir, indexWriterConfig);
     } catch (Exception e) {
 …
     public void finish() {
     try {
-        writer.optimize();
         writer.close();
     } catch (Exception e) {}
 …
         id += "/>";
+        String value;
         if (scope.equals(qName)) {
+            current_doc.add(new Field("nodeID", this.file_id+"."+qName,
+                      Field.Store.YES,Field.Index.NO));
+          value = this.file_id+"."+qName;
         } else {
+        current_doc.add(new Field("nodeID", this.file_id+"."+scope+"."+qName+"."+node_id,
+                      Field.Store.YES,Field.Index.NO));
+        }
+          value = this.file_id+"."+scope+"."+qName+"."+node_id;
+        }
+        current_doc.add(new StoredField("nodeID", value));
+    }
+    }
     public void endElement(String uri, String localName, String qName) throws SAXException {
     if (XMLTagInfo.isIndexable(qName) && qName.equals(current_node)) {
+        current_doc.add(new Field("content", current_contents,
+                      Field.Store.NO,Field.Index.TOKENIZED));
+      current_doc.add(new TextField("content", current_contents, Field.Store.NO));
         try {
         writer.addDocument(current_doc);

main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Search.java

-              r5956
+              r32485
  * search.java
+ *
+ * Created on 25 February 2003, 02:25
+ */
+ * Simple command line program to search the lucene index.
+ * Run like: java -classpath $CLASSPATH:<path-to-gberg>/java Search <index directory>
+ * where the index directory is the index/idx folder.
+ *
+ * Copyright 2003 The New Zealand Digital Library Project
+ *
+ * A component of the Greenstone digital library software
+ * from the New Zealand Digital Library Project at the
+ * University of Waikato, New Zealand.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *********************************************************************/
-/**
+ *
- * @author  [email protected]
- * @version
- */
 import java.io.IOException;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
+import java.io.File;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+//import org.apache.lucene.analysis.Analyzer;
+//import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Hits;
+//import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.Version;
 public class Search  {
 …
+    }
         try {
-        Searcher searcher = new IndexSearcher(args[0]);
-        Analyzer analyzer = new StandardAnalyzer();
+        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+        while (true) {
+        System.out.print("Query: ");
+        String line = in.readLine();
+      Directory indexdir_dir = FSDirectory.open(new File(args[0]));
+      IndexReader reader = DirectoryReader.open(indexdir_dir);
+      IndexSearcher searcher = new IndexSearcher(reader);
+      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+      System.out.println("Type .q to quit");
+      while (true) {
+        System.out.print("Query: ");
+        String line = in.readLine();
+        line.trim();
+        if (line.equals(".q"))
+          break;
+        if (line.length() == -1)
+            break;
+        Term term = new Term("content",line);
+        Term term = new Term("content",line);
+        Query query = new TermQuery(term);
+        System.out.println("Searching for: " + query.toString("content"));
+        final int HITS_PER_PAGE=10;
+        Query query = new TermQuery(term);
+        System.out.println("Searching for: " + query.toString("content"));
+        TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
+        System.out.println(hits.totalHits + " total matching documents");
+        for (int start = 0; start < hits.totalHits; start += HITS_PER_PAGE) {
+          int end = Math.min(hits.totalHits, start + HITS_PER_PAGE);
+          for (int i = start; i < end; i++) {
+        int docnum = hits.scoreDocs[i].doc;
+        Document doc = reader.document(docnum);
+        String node_id = doc.get("nodeID");
+        System.out.println(i + ". ID: "+node_id);
+          }
+          if (hits.totalHits > end) {
+        System.out.print("more (y/n) ? ");
+        line = in.readLine();
+        if (line.length() == 0 || line.charAt(0) == 'n')
+          break;
+          }
+        }
+        Hits hits = searcher.search(query);
+        System.out.println(hits.length() + " total matching documents");
+        final int HITS_PER_PAGE=10;
+        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
+            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
+            for (int i = start; i < end; i++) {
+            Document doc = hits.doc(i);
+            String node_id= doc.get("nodeID");
+            System.out.println(i + ". ID: "+node_id);
+            }
+            if (hits.length() > end) {
+            System.out.print("more (y/n) ? ");
+            line = in.readLine();
+            if (line.length() == 0 || line.charAt(0) == 'n')
+                break;
+            }
+        }
+        }
+      }
         searcher.close();
+      reader.close();
+    }
     catch (Exception e) {
         System.out.println(" caught a " + e.getClass() +
                    "\n with message: " + e.getMessage());
+      System.out.println(" caught a " + e.getClass() +
+                 "\n with message: " + e.getMessage());
+        }
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 32485 for main/trunk/greenstone3/web/sites/localsite/collect/gberg

Legend:

main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Indexer.java

main/trunk/greenstone3/web/sites/localsite/collect/gberg/java/Search.java

Download in other formats: