source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.4 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import org.greenstone.gsdl3.util.*;
5
6// XML classes
7import org.w3c.dom.Element;
8import org.w3c.dom.Document;
9import org.w3c.dom.NodeList;
10
11import java.util.HashMap;
12import java.util.ArrayList;
13
14import org.apache.lucene.analysis.Analyzer;
15import org.apache.lucene.analysis.standard.StandardAnalyzer;
16import org.apache.lucene.document.*; //Document;
17import org.apache.lucene.search.Searcher;
18import org.apache.lucene.search.IndexSearcher;
19import org.apache.lucene.index.IndexReader;
20import org.apache.lucene.search.Query;
21import org.apache.lucene.queryParser.QueryParser;
22//import org.apache.lucene.search.Hits;
23import org.apache.lucene.search.TermQuery;
24import org.apache.lucene.search.TopDocs;
25import org.apache.lucene.index.Term;
26import org.apache.lucene.store.FSDirectory;
27import org.apache.lucene.store.Directory;
28
29import org.greenstone.LuceneWrapper3.GS2Analyzer;
30import java.io.File;
31import java.io.Serializable;
32
33import org.apache.log4j.*;
34
35/**
36 *
37 */
38
39public class LuceneSearch
40 extends AbstractTextSearch {
41
42 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
43
44 protected static final String INDEX_ELEM = "index";
45
46 protected ArrayList<String> index_ids;
47
48 public LuceneSearch()
49 {
50 index_ids = new ArrayList<String>();
51 }
52
53 public boolean configure(Element info, Element extra_info) {
54 if (!super.configure(info, extra_info)){
55 return false;
56 }
57
58 default_index = "idx";
59
60 // cache index info read from config file
61 Element index_list
62 = (Element)GSXML.getChildByTagName(this.config_info,
63 INDEX_ELEM+GSXML.LIST_MODIFIER);
64 if (index_list != null) {
65 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
66 int len = indexes.getLength();
67 // now add even if there is only one
68 for (int i=0; i<len; i++) {
69 Element index = (Element)indexes.item(i);
70 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
71 }
72 } else {
73 // there is only one index, so we assume the default
74 index_ids.add(this.default_index);
75 }
76
77 return true;
78 }
79
80 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
81 {
82 // copying exercise for index_ids,
83 for (int i=0; i<this.index_ids.size(); i++) {
84 index_ids.add(this.index_ids.get(i));
85 }
86
87 // But need to work out display name from scratch as this uses
88 // the 'lang' parameter
89
90 Element index_list
91 = (Element)GSXML.getChildByTagName(this.config_info,
92 INDEX_ELEM+GSXML.LIST_MODIFIER);
93 if (index_list != null) {
94 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
95 int len = indexes.getLength();
96 // now add even if there is only one
97 for (int i=0; i<len; i++) {
98 Element index = (Element)indexes.item(i);
99 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
100
101 }
102 } else {
103 // there is only one index, so we assume the default
104 index_names.add("default index");
105 }
106 }
107
108
109 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
110 {
111
112 // Create a new (empty) result message
113 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
114 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
115 result.appendChild(doc_node_list);
116 result.appendChild(metadata_list);
117 }
118
119 protected boolean hasParamList(Element request, Element metadata_list)
120 {
121 // Get the parameters of the request
122 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
123 if (param_list == null) {
124 logger.error("TextQuery request had no paramList.");
125 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
126 return false; // signal that an empty result should be return
127 }
128
129 return true;
130 }
131
132 protected boolean hasQueryString(Element param_list, Element metadata_list)
133 {
134
135 // Process the request parameters to make sure a query has been specified
136 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
137 String query_string = (String) params.get(QUERY_PARAM);
138
139 if (query_string == null || query_string.equals("")) {
140 logger.error("TextQuery request had no query string.");
141 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
142 return false; // signal that an empty result should be return
143 }
144
145 return true;
146 }
147
148
149
150 /** Process a text query - implemented by concrete subclasses */
151 protected Element processTextQuery(Element request) {
152
153 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
154 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
155 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
156 initResultElement(result,doc_node_list,metadata_list);
157
158 if (!hasParamList(request,metadata_list)) {
159 return result;
160 }
161
162 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163 if (!hasQueryString(param_list,metadata_list)) {
164 return result;
165 }
166
167 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
168 String query_string = (String) params.get(QUERY_PARAM);
169
170 // Get the index
171 String index = (String) params.get(INDEX_PARAM);
172 if (index == null || index.equals("")) {
173 index = this.default_index; // assume the default
174 }
175
176 try {
177 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
178 index_dir += File.separator+index;
179 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
180 Searcher searcher = new IndexSearcher(index_dir_dir);
181 Analyzer analyzer = new GS2Analyzer();
182
183 Term term = new Term("content", query_string);
184
185 Query query = new TermQuery(term);
186
187 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
188
189 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+hits.scoreDocs.length);
190
191 IndexReader reader = ((IndexSearcher) searcher).getIndexReader();
192
193 for (int i=0; i<hits.scoreDocs.length; i++) {
194 int lucene_doc_num = hits.scoreDocs[i].doc;
195 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
196 String node_id = luc_doc.get("nodeID");
197 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
198 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
199 doc_node_list.appendChild(node);
200 }
201 } catch (Exception e) {
202 e.printStackTrace();
203 }
204
205 return result;
206 }
207
208
209}
Note: See TracBrowser for help on using the repository browser.