source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 22306

Last change on this file since 22306 was 20295, checked in by kjdon, 15 years ago

removed my email address

  • Property svn:keywords set to Author Date Id Revision
File size: 4.3 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gsdl3.util.*;
5
6// XML classes
7import org.w3c.dom.Element;
8import org.w3c.dom.Document;
9import org.w3c.dom.NodeList;
10
11import java.util.HashMap;
12import java.util.ArrayList;
13
14import org.apache.lucene.analysis.Analyzer;
15import org.apache.lucene.analysis.standard.StandardAnalyzer;
16import org.apache.lucene.document.*; //Document;
17import org.apache.lucene.search.Searcher;
18import org.apache.lucene.search.IndexSearcher;
19import org.apache.lucene.search.Query;
20import org.apache.lucene.search.Hits;
21import org.apache.lucene.queryParser.QueryParser;
22import org.apache.lucene.search.TermQuery;
23import org.apache.lucene.index.Term;
24
25import java.io.File;
26
27import org.apache.log4j.*;
28
29/**
30 *
31 */
32
33public class LuceneSearch
34 extends AbstractSearch {
35
36 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
37
38 protected static final String INDEX_ELEM = "index";
39
40 public boolean configure(Element info, Element extra_info) {
41 if (!super.configure(info, extra_info)){
42 return false;
43 }
44
45 default_index = "idx";
46 return true;
47 }
48
49 protected void getIndexData(ArrayList index_ids, ArrayList index_names, String lang)
50 {
51 // the index info - read from config file - cache it??
52 Element index_list = (Element)GSXML.getChildByTagName(this.config_info, INDEX_ELEM+GSXML.LIST_MODIFIER);
53 if (index_list != null) {
54 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
55 int len = indexes.getLength();
56 // now add even if there is only one
57 for (int i=0; i<len; i++) {
58 Element index = (Element)indexes.item(i);
59 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
60 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
61
62 }
63 } else {
64 // there is only one index, so we assume the default
65 index_ids.add(this.default_index);
66 index_names.add("default index");
67 }
68
69 }
70
71 /** Process a text query - implemented by concrete subclasses */
72 protected Element processTextQuery(Element request) {
73
74 // Create a new (empty) result message
75 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
76 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
77 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
78 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
79 result.appendChild(doc_node_list);
80 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
81 result.appendChild(metadata_list);
82 // Get the parameters of the request
83 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
84 if (param_list == null) {
85 logger.error("TextQuery request had no paramList.");
86 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
87 return result; // Return the empty result
88 }
89
90 // Process the request parameters
91 HashMap params = GSXML.extractParams(param_list, false);
92
93 // Make sure a query has been specified
94 String query_string = (String) params.get(QUERY_PARAM);
95 if (query_string == null || query_string.equals("")) {
96 logger.error("TextQuery request had no query string.");
97 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
98 return result; // Return the empty result
99 }
100
101 // Get the index
102 String index = (String) params.get(INDEX_PARAM);
103 if (index == null || index.equals("")) {
104 index = this.default_index; // assume the default
105 }
106 try {
107 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
108 index_dir += File.separator+index;
109 Searcher searcher = new IndexSearcher(index_dir);
110 Analyzer analyzer = new StandardAnalyzer();
111
112 Term term = new Term("content", query_string);
113
114 Query query = new TermQuery(term);
115
116 Hits hits = searcher.search(query);
117 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.length());
118
119 for (int i=0; i<hits.length(); i++) {
120 org.apache.lucene.document.Document luc_doc = hits.doc(i);
121 String node_id = luc_doc.get("nodeID");
122 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
123 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
124 doc_node_list.appendChild(node);
125 }
126 } catch (Exception e) {
127 e.printStackTrace();
128 }
129
130 return result;
131
132 }
133
134
135}
Note: See TracBrowser for help on using the repository browser.