source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 29143

Last change on this file since 29143 was 29143, checked in by ak19, 10 years ago

Part of port from lucene3.3.0 to lucene4.7.2. LuceneWrapper related. Changes to Greenstone3's main src code, build.xml and .classpath file, to swith over from using Lucene3Wrapper to Lucene4Wrapper

  • Property svn:keywords set to Author Date Id Revision
File size: 6.1 KB
Line 
1package org.greenstone.gsdl3.service;
2
3// Greenstone classes
4import java.io.File;
5import java.io.Serializable;
6import java.util.ArrayList;
7import java.util.HashMap;
8
9import org.apache.log4j.Logger;
10import org.apache.lucene.index.DirectoryReader;
11import org.apache.lucene.index.IndexReader;
12import org.apache.lucene.index.Term;
13import org.apache.lucene.search.IndexSearcher;
14import org.apache.lucene.search.Query;
15import org.apache.lucene.search.TermQuery;
16import org.apache.lucene.search.TopDocs;
17import org.apache.lucene.store.Directory;
18import org.apache.lucene.store.FSDirectory;
19import org.greenstone.gsdl3.util.GSFile;
20import org.greenstone.gsdl3.util.GSXML;
21import org.greenstone.gsdl3.util.XMLConverter;
22
23import org.w3c.dom.Document;
24import org.w3c.dom.Element;
25import org.w3c.dom.NodeList;
26
27public class LuceneSearch extends AbstractTextSearch
28{
29 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
30
31 protected static final String INDEX_ELEM = "index";
32
33 protected ArrayList<String> index_ids;
34
35 public LuceneSearch()
36 {
37 index_ids = new ArrayList<String>();
38 }
39
40 public boolean configure(Element info, Element extra_info)
41 {
42 if (!super.configure(info, extra_info))
43 {
44 return false;
45 }
46
47 default_index = "idx";
48
49 // cache index info read from config file
50 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
51 if (index_list != null)
52 {
53 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
54 int len = indexes.getLength();
55 // now add even if there is only one
56 for (int i = 0; i < len; i++)
57 {
58 Element index = (Element) indexes.item(i);
59 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
60 }
61 }
62 else
63 {
64 // there is only one index, so we assume the default
65 index_ids.add(this.default_index);
66 }
67
68 return true;
69 }
70
71 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
72 {
73 // copying exercise for index_ids,
74 for (int i = 0; i < this.index_ids.size(); i++)
75 {
76 index_ids.add(this.index_ids.get(i));
77 }
78
79 // But need to work out display name from scratch as this uses
80 // the 'lang' parameter
81
82 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
83 if (index_list != null)
84 {
85 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
86 int len = indexes.getLength();
87 // now add even if there is only one
88 for (int i = 0; i < len; i++)
89 {
90 Element index = (Element) indexes.item(i);
91 index_names.add(GSXML.getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
92
93 }
94 }
95 else
96 {
97 // there is only one index, so we assume the default
98 index_names.add("default index");
99 }
100 }
101
102 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
103 {
104 // Create a new (empty) result message
105 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
106 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
107 result.appendChild(doc_node_list);
108 result.appendChild(metadata_list);
109 }
110
111 protected boolean hasParamList(Element request, Element metadata_list)
112 {
113 // Get the parameters of the request
114 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
115 if (param_list == null)
116 {
117 logger.error("TextQuery request had no paramList.");
118 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
119 return false; // signal that an empty result should be return
120 }
121
122 return true;
123 }
124
125 protected boolean hasQueryString(Element param_list, Element metadata_list)
126 {
127 // Process the request parameters to make sure a query has been specified
128 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
129 String query_string = (String) params.get(QUERY_PARAM);
130
131 if (query_string == null || query_string.equals(""))
132 {
133 logger.error("TextQuery request had no query string.");
134 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
135 return false; // signal that an empty result should be return
136 }
137
138 return true;
139 }
140
141 /** Process a text query - implemented by concrete subclasses */
142 protected Element processTextQuery(Element request)
143 {
144 Document result_doc = XMLConverter.newDOM();
145 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
146 Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
147 Element metadata_list = result_doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
148 initResultElement(result, doc_node_list, metadata_list);
149
150 if (!hasParamList(request, metadata_list))
151 {
152 return result;
153 }
154
155 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
156 if (!hasQueryString(param_list, metadata_list))
157 {
158 return result;
159 }
160
161 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
162 String query_string = (String) params.get(QUERY_PARAM);
163
164 // Get the index
165 String index = (String) params.get(INDEX_PARAM);
166 if (index == null || index.equals(""))
167 {
168 index = this.default_index; // assume the default
169 }
170
171 try
172 {
173 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
174 index_dir += File.separator + index;
175 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
176 IndexReader reader = DirectoryReader.open(index_dir_dir); //deprecated: IndexReader.open(index_dir_dir);
177 IndexSearcher searcher = new IndexSearcher(reader);
178
179 Term term = new Term("content", query_string);
180
181 Query query = new TermQuery(term);
182
183 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
184
185 GSXML.addMetadata(metadata_list, "numDocsMatched", "" + hits.scoreDocs.length);
186
187 for (int i = 0; i < hits.scoreDocs.length; i++)
188 {
189 int lucene_doc_num = hits.scoreDocs[i].doc;
190 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
191 String node_id = luc_doc.get("nodeID");
192 Element node = result_doc.createElement(GSXML.DOC_NODE_ELEM);
193 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
194 doc_node_list.appendChild(node);
195 }
196 }
197 catch (Exception e)
198 {
199 e.printStackTrace();
200 }
201
202 return result;
203 }
204}
Note: See TracBrowser for help on using the repository browser.