source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 25763

Last change on this file since 25763 was 25763, checked in by sjm84, 12 years ago

Fixed some deprecated stuff in LuceneSearch

  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import java.io.File;
5import java.io.Serializable;
6import java.util.ArrayList;
7import java.util.HashMap;
8
9import org.apache.log4j.Logger;
10import org.apache.lucene.index.IndexReader;
11import org.apache.lucene.index.Term;
12import org.apache.lucene.search.IndexSearcher;
13import org.apache.lucene.search.Query;
14import org.apache.lucene.search.TermQuery;
15import org.apache.lucene.search.TopDocs;
16import org.apache.lucene.store.Directory;
17import org.apache.lucene.store.FSDirectory;
18import org.greenstone.gsdl3.util.GSFile;
19import org.greenstone.gsdl3.util.GSXML;
20import org.w3c.dom.Element;
21import org.w3c.dom.NodeList;
22
23public class LuceneSearch extends AbstractTextSearch
24{
25 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
26
27 protected static final String INDEX_ELEM = "index";
28
29 protected ArrayList<String> index_ids;
30
31 public LuceneSearch()
32 {
33 index_ids = new ArrayList<String>();
34 }
35
36 public boolean configure(Element info, Element extra_info)
37 {
38 if (!super.configure(info, extra_info))
39 {
40 return false;
41 }
42
43 default_index = "idx";
44
45 // cache index info read from config file
46 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
47 if (index_list != null)
48 {
49 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
50 int len = indexes.getLength();
51 // now add even if there is only one
52 for (int i = 0; i < len; i++)
53 {
54 Element index = (Element) indexes.item(i);
55 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
56 }
57 }
58 else
59 {
60 // there is only one index, so we assume the default
61 index_ids.add(this.default_index);
62 }
63
64 return true;
65 }
66
67 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
68 {
69 // copying exercise for index_ids,
70 for (int i = 0; i < this.index_ids.size(); i++)
71 {
72 index_ids.add(this.index_ids.get(i));
73 }
74
75 // But need to work out display name from scratch as this uses
76 // the 'lang' parameter
77
78 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
79 if (index_list != null)
80 {
81 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
82 int len = indexes.getLength();
83 // now add even if there is only one
84 for (int i = 0; i < len; i++)
85 {
86 Element index = (Element) indexes.item(i);
87 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
88
89 }
90 }
91 else
92 {
93 // there is only one index, so we assume the default
94 index_names.add("default index");
95 }
96 }
97
98 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
99 {
100 // Create a new (empty) result message
101 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
102 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
103 result.appendChild(doc_node_list);
104 result.appendChild(metadata_list);
105 }
106
107 protected boolean hasParamList(Element request, Element metadata_list)
108 {
109 // Get the parameters of the request
110 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
111 if (param_list == null)
112 {
113 logger.error("TextQuery request had no paramList.");
114 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
115 return false; // signal that an empty result should be return
116 }
117
118 return true;
119 }
120
121 protected boolean hasQueryString(Element param_list, Element metadata_list)
122 {
123 // Process the request parameters to make sure a query has been specified
124 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
125 String query_string = (String) params.get(QUERY_PARAM);
126
127 if (query_string == null || query_string.equals(""))
128 {
129 logger.error("TextQuery request had no query string.");
130 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "0");
131 return false; // signal that an empty result should be return
132 }
133
134 return true;
135 }
136
137 /** Process a text query - implemented by concrete subclasses */
138 protected Element processTextQuery(Element request)
139 {
140 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
141 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
142 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
143 initResultElement(result, doc_node_list, metadata_list);
144
145 if (!hasParamList(request, metadata_list))
146 {
147 return result;
148 }
149
150 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
151 if (!hasQueryString(param_list, metadata_list))
152 {
153 return result;
154 }
155
156 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
157 String query_string = (String) params.get(QUERY_PARAM);
158
159 // Get the index
160 String index = (String) params.get(INDEX_PARAM);
161 if (index == null || index.equals(""))
162 {
163 index = this.default_index; // assume the default
164 }
165
166 try
167 {
168 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
169 index_dir += File.separator + index;
170 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
171 IndexSearcher searcher = new IndexSearcher(index_dir_dir);
172
173 Term term = new Term("content", query_string);
174
175 Query query = new TermQuery(term);
176
177 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
178
179 GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", "" + hits.scoreDocs.length);
180
181 IndexReader reader = searcher.getIndexReader();
182
183 for (int i = 0; i < hits.scoreDocs.length; i++)
184 {
185 int lucene_doc_num = hits.scoreDocs[i].doc;
186 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
187 String node_id = luc_doc.get("nodeID");
188 Element node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
189 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
190 doc_node_list.appendChild(node);
191 }
192 }
193 catch (Exception e)
194 {
195 e.printStackTrace();
196 }
197
198 return result;
199 }
200}
Note: See TracBrowser for help on using the repository browser.