source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/LuceneSearch.java@ 30631

Last change on this file since 30631 was 30631, checked in by kjdon, 8 years ago

getDisplayText has moved classes

  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
Line 
1/** This is a simple Lucene XML Search class. This is NOT the one
2 used by Greenstone Lucene collections. This is used by the custom
3 gberg collection */
4
5package org.greenstone.gsdl3.service;
6
7// Greenstone classes
8import java.io.File;
9import java.io.Serializable;
10import java.util.ArrayList;
11import java.util.HashMap;
12
13import org.apache.log4j.Logger;
14import org.apache.lucene.index.DirectoryReader;
15import org.apache.lucene.index.IndexReader;
16import org.apache.lucene.index.Term;
17import org.apache.lucene.search.IndexSearcher;
18import org.apache.lucene.search.Query;
19import org.apache.lucene.search.TermQuery;
20import org.apache.lucene.search.TopDocs;
21import org.apache.lucene.store.Directory;
22import org.apache.lucene.store.FSDirectory;
23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.XMLConverter;
26
27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.NodeList;
30
31public class LuceneSearch extends AbstractTextSearch
32{
33 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.LuceneSearch.class.getName());
34
35 protected static final String INDEX_ELEM = "index";
36
37 protected ArrayList<String> index_ids;
38
39 public LuceneSearch()
40 {
41 index_ids = new ArrayList<String>();
42 }
43
44 public boolean configure(Element info, Element extra_info)
45 {
46 if (!super.configure(info, extra_info))
47 {
48 return false;
49 }
50
51 default_index = "idx";
52
53 // cache index info read from config file
54 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
55 if (index_list != null)
56 {
57 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
58 int len = indexes.getLength();
59 // now add even if there is only one
60 for (int i = 0; i < len; i++)
61 {
62 Element index = (Element) indexes.item(i);
63 index_ids.add(index.getAttribute(GSXML.NAME_ATT));
64 }
65 }
66 else
67 {
68 // there is only one index, so we assume the default
69 index_ids.add(this.default_index);
70 }
71
72 return true;
73 }
74
75 protected void getIndexData(ArrayList<String> index_ids, ArrayList<String> index_names, String lang)
76 {
77 // copying exercise for index_ids,
78 for (int i = 0; i < this.index_ids.size(); i++)
79 {
80 index_ids.add(this.index_ids.get(i));
81 }
82
83 // But need to work out display name from scratch as this uses
84 // the 'lang' parameter
85
86 Element index_list = (Element) GSXML.getChildByTagName(this.config_info, INDEX_ELEM + GSXML.LIST_MODIFIER);
87 if (index_list != null)
88 {
89 NodeList indexes = index_list.getElementsByTagName(INDEX_ELEM);
90 int len = indexes.getLength();
91 // now add even if there is only one
92 for (int i = 0; i < len; i++)
93 {
94 Element index = (Element) indexes.item(i);
95 index_names.add(getDisplayText(index, GSXML.DISPLAY_TEXT_NAME, lang, "en"));
96
97 }
98 }
99 else
100 {
101 // there is only one index, so we assume the default
102 index_names.add("default index");
103 }
104 }
105
106 protected void initResultElement(Element result, Element doc_node_list, Element metadata_list)
107 {
108 // Create a new (empty) result message
109 result.setAttribute(GSXML.FROM_ATT, QUERY_SERVICE);
110 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
111 result.appendChild(doc_node_list);
112 result.appendChild(metadata_list);
113 }
114
115 protected boolean hasParamList(Element request, Element metadata_list)
116 {
117 // Get the parameters of the request
118 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
119 if (param_list == null)
120 {
121 logger.error("TextQuery request had no paramList.");
122 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
123 return false; // signal that an empty result should be return
124 }
125
126 return true;
127 }
128
129 protected boolean hasQueryString(Element param_list, Element metadata_list)
130 {
131 // Process the request parameters to make sure a query has been specified
132 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
133 String query_string = (String) params.get(QUERY_PARAM);
134
135 if (query_string == null || query_string.equals(""))
136 {
137 logger.error("TextQuery request had no query string.");
138 GSXML.addMetadata(metadata_list, "numDocsMatched", "0");
139 return false; // signal that an empty result should be return
140 }
141
142 return true;
143 }
144
145 /** Process a text query - implemented by concrete subclasses */
146 protected Element processTextQuery(Element request)
147 {
148 Document result_doc = XMLConverter.newDOM();
149 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
150 Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
151 Element metadata_list = result_doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
152 initResultElement(result, doc_node_list, metadata_list);
153
154 if (!hasParamList(request, metadata_list))
155 {
156 return result;
157 }
158
159 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
160 if (!hasQueryString(param_list, metadata_list))
161 {
162 return result;
163 }
164
165 HashMap<String, Serializable> params = GSXML.extractParams(param_list, false);
166 String query_string = (String) params.get(QUERY_PARAM);
167
168 // Get the index
169 String index = (String) params.get(INDEX_PARAM);
170 if (index == null || index.equals(""))
171 {
172 index = this.default_index; // assume the default
173 }
174
175 try
176 {
177 String index_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name);
178 index_dir += File.separator + index;
179 Directory index_dir_dir = FSDirectory.open(new File(index_dir));
180 IndexReader reader = DirectoryReader.open(index_dir_dir); //deprecated: IndexReader.open(index_dir_dir);
181 IndexSearcher searcher = new IndexSearcher(reader);
182
183 Term term = new Term("content", query_string);
184
185 Query query = new TermQuery(term);
186
187 TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
188
189 GSXML.addMetadata(metadata_list, "numDocsMatched", "" + hits.scoreDocs.length);
190
191 for (int i = 0; i < hits.scoreDocs.length; i++)
192 {
193 int lucene_doc_num = hits.scoreDocs[i].doc;
194 org.apache.lucene.document.Document luc_doc = reader.document(lucene_doc_num);
195 String node_id = luc_doc.get("nodeID");
196 Element node = result_doc.createElement(GSXML.DOC_NODE_ELEM);
197 node.setAttribute(GSXML.NODE_ID_ATT, node_id);
198 doc_node_list.appendChild(node);
199 }
200 }
201 catch (Exception e)
202 {
203 e.printStackTrace();
204 }
205
206 return result;
207 }
208}
Note: See TracBrowser for help on using the repository browser.