1 | package org.greenstone.gsdl3.action;
|
---|
2 |
|
---|
3 | import java.io.Serializable;
|
---|
4 | import java.util.HashMap;
|
---|
5 | import java.util.HashSet;
|
---|
6 |
|
---|
7 | import org.apache.log4j.Logger;
|
---|
8 | import org.greenstone.gsdl3.util.GSParams;
|
---|
9 | import org.greenstone.gsdl3.util.GSPath;
|
---|
10 | import org.greenstone.gsdl3.util.GSXML;
|
---|
11 | import org.greenstone.gsdl3.util.GSXSLT;
|
---|
12 | import org.greenstone.gsdl3.util.UserContext;
|
---|
13 | import org.greenstone.gsdl3.util.XMLConverter;
|
---|
14 | import org.w3c.dom.Document;
|
---|
15 | import org.w3c.dom.Element;
|
---|
16 | import org.w3c.dom.Node;
|
---|
17 | import org.w3c.dom.NodeList;
|
---|
18 |
|
---|
19 | /** action class for queries */
|
---|
20 | public class QueryAction extends Action
|
---|
21 | {
|
---|
22 |
|
---|
23 | public static final String HITS_PER_PAGE = "hitsPerPage";
|
---|
24 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.QueryAction.class.getName());
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * process - processes a request.
|
---|
28 | */
|
---|
29 | public Node process(Node message_node)
|
---|
30 | {
|
---|
31 | Element message = GSXML.nodeToElement(message_node);
|
---|
32 | Document doc = message.getOwnerDocument();
|
---|
33 |
|
---|
34 | // get the request - assume there is only one
|
---|
35 | Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
|
---|
36 |
|
---|
37 | // create the return message
|
---|
38 | Element result = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
39 | Element response = basicQuery(request);
|
---|
40 | result.appendChild(doc.importNode(response, true));
|
---|
41 | return result;
|
---|
42 | }
|
---|
43 |
|
---|
44 | /**
|
---|
45 | * a generic query handler this gets the service description, does the query
|
---|
46 | * (just passes all the params to the service, then gets the titles for any
|
---|
47 | * results
|
---|
48 | */
|
---|
49 | protected Element basicQuery(Element request)
|
---|
50 | {
|
---|
51 | // the result
|
---|
52 | Document doc = request.getOwnerDocument();
|
---|
53 | Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
|
---|
54 |
|
---|
55 | // extract the params from the cgi-request, and check that we have a coll specified
|
---|
56 | Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
|
---|
57 | HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
|
---|
58 |
|
---|
59 | String request_type = (String) params.get(GSParams.REQUEST_TYPE);
|
---|
60 | String service_name = (String) params.get(GSParams.SERVICE);
|
---|
61 | String collection = (String) params.get(GSParams.COLLECTION);
|
---|
62 | String lang = request.getAttribute(GSXML.LANG_ATT);
|
---|
63 | // collection may be null or empty when we are doing cross coll services
|
---|
64 | if (collection == null || collection.equals(""))
|
---|
65 | {
|
---|
66 | collection = null;
|
---|
67 | }
|
---|
68 |
|
---|
69 | UserContext userContext = new UserContext(request);
|
---|
70 | String to = service_name;
|
---|
71 | if (collection != null)
|
---|
72 | {
|
---|
73 | to = GSPath.prependLink(to, collection);
|
---|
74 | }
|
---|
75 |
|
---|
76 | // get the format info - there may be global format info in the collection that searching needs
|
---|
77 | Element format_elem = getFormatInfo(to, userContext);
|
---|
78 | if (format_elem != null) {
|
---|
79 | // set the format type
|
---|
80 | format_elem.setAttribute(GSXML.TYPE_ATT, "search");
|
---|
81 | // for now just add to the response
|
---|
82 | page_response.appendChild(doc.importNode(format_elem, true));
|
---|
83 | }
|
---|
84 | // get the service description
|
---|
85 | // we have been asked for the service description
|
---|
86 | Element mr_info_message = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
87 | Element mr_info_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
|
---|
88 | mr_info_message.appendChild(mr_info_request);
|
---|
89 |
|
---|
90 | // process the message
|
---|
91 | Element mr_info_response = (Element) this.mr.process(mr_info_message);
|
---|
92 |
|
---|
93 | boolean does_paging = false;
|
---|
94 |
|
---|
95 | // the response
|
---|
96 | Element service_response = (Element) GSXML.getChildByTagName(mr_info_response, GSXML.RESPONSE_ELEM);
|
---|
97 |
|
---|
98 | Element service_description = (Element)GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM);
|
---|
99 | if (service_description != null) {
|
---|
100 | service_description = (Element) doc.importNode(service_description, true);
|
---|
101 |
|
---|
102 | Element meta_list =(Element) GSXML.getChildByTagName(service_description, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
|
---|
103 | if (meta_list != null) {
|
---|
104 | String value = GSXML.getMetadataValue(meta_list, "does_paging");
|
---|
105 | if (value.equals("true")) {
|
---|
106 | does_paging = true;
|
---|
107 | }
|
---|
108 | }
|
---|
109 |
|
---|
110 | if (does_paging == false) {
|
---|
111 | // we will do the paging, so lets add in a hitsPerPage param to the service
|
---|
112 | addHitsParamToService(doc, service_description, lang);
|
---|
113 | }
|
---|
114 | }
|
---|
115 | //Element service_description = (Element) doc.importNode(GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM), true);
|
---|
116 |
|
---|
117 |
|
---|
118 | // have we been asked to return the service description
|
---|
119 | // as part of the response?
|
---|
120 | if (request_type.indexOf("d") != -1) {
|
---|
121 | if (service_description != null) {
|
---|
122 | page_response.appendChild(service_description);
|
---|
123 | }
|
---|
124 | addCollectionsHierarchy(page_response,userContext);
|
---|
125 | }
|
---|
126 |
|
---|
127 | if (request_type.indexOf("r") == -1)
|
---|
128 | {
|
---|
129 | // just a display request, no actual processing to do
|
---|
130 | //append site metadata
|
---|
131 | addSiteMetadata(page_response, userContext);
|
---|
132 | addInterfaceOptions(page_response);
|
---|
133 | return page_response;
|
---|
134 | }
|
---|
135 |
|
---|
136 | // check that we have some service params
|
---|
137 | HashMap service_params = (HashMap) params.get("s1");
|
---|
138 | if (service_params == null)
|
---|
139 | { // no query
|
---|
140 | //append site metadata
|
---|
141 | addSiteMetadata(page_response, userContext);
|
---|
142 | addInterfaceOptions(page_response);
|
---|
143 | return page_response;
|
---|
144 | }
|
---|
145 |
|
---|
146 | // create the query request
|
---|
147 | Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
148 | Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
|
---|
149 | mr_query_message.appendChild(mr_query_request);
|
---|
150 |
|
---|
151 | Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
|
---|
152 | GSXML.addParametersToList(query_param_list, service_params);
|
---|
153 | mr_query_request.appendChild(query_param_list);
|
---|
154 |
|
---|
155 | // do the query
|
---|
156 | Element mr_query_response = (Element) this.mr.process(mr_query_message);
|
---|
157 |
|
---|
158 | // check for errors
|
---|
159 | if (processErrorElements(mr_query_response, page_response))
|
---|
160 | {
|
---|
161 | //append site metadata
|
---|
162 | addSiteMetadata(page_response, userContext);
|
---|
163 | addInterfaceOptions(page_response);
|
---|
164 | return page_response;
|
---|
165 | }
|
---|
166 |
|
---|
167 | Element query_response = (Element) GSXML.getChildByTagName(mr_query_response, GSXML.RESPONSE_ELEM);
|
---|
168 | Element query_result_metadata_list = (Element) GSXML.getChildByTagName(query_response, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
|
---|
169 |
|
---|
170 | if (query_result_metadata_list == null)
|
---|
171 | {
|
---|
172 | logger.error("No query result metadata.\n");
|
---|
173 | }
|
---|
174 | else
|
---|
175 | { // add it into the page response
|
---|
176 | page_response.appendChild(doc.importNode(query_result_metadata_list, true));
|
---|
177 | }
|
---|
178 |
|
---|
179 | Element query_term_info_list = (Element) GSXML.getChildByTagName(query_response, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
|
---|
180 | if (query_term_info_list == null)
|
---|
181 | {
|
---|
182 | logger.error("No query term information.\n");
|
---|
183 | }
|
---|
184 | else
|
---|
185 | { // add it into the page response
|
---|
186 | page_response.appendChild(doc.importNode(query_term_info_list, true));
|
---|
187 | }
|
---|
188 |
|
---|
189 | Element facet_list = (Element) GSXML.getChildByTagName(query_response, GSXML.FACET_ELEM + GSXML.LIST_MODIFIER);
|
---|
190 | if (facet_list == null)
|
---|
191 | {
|
---|
192 | logger.error("No query term information.\n");
|
---|
193 | }
|
---|
194 | else
|
---|
195 | { // add it into the page response
|
---|
196 | page_response.appendChild(doc.importNode(facet_list, true));
|
---|
197 | }
|
---|
198 |
|
---|
199 | // check that there are some documents - for now check the list, but later should use a numdocs metadata elem
|
---|
200 | Element document_list = (Element) GSXML.getChildByTagName(query_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
|
---|
201 | // documentList not present if no docs found
|
---|
202 | if (document_list == null)
|
---|
203 | {
|
---|
204 | // add in a dummy doc node list - used by the display. need to think about this
|
---|
205 | page_response.appendChild(doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER));
|
---|
206 | //append site metadata
|
---|
207 | addSiteMetadata(page_response, userContext);
|
---|
208 | addInterfaceOptions(page_response);
|
---|
209 | return page_response;
|
---|
210 | }
|
---|
211 |
|
---|
212 | // now we check to see if there is metadata already - some search services return predefined metadata. if there is some, don't do a metadata request
|
---|
213 | NodeList doc_metadata = document_list.getElementsByTagName(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
|
---|
214 | if (doc_metadata.getLength() > 0)
|
---|
215 | {
|
---|
216 | // why are we not paging these results?????
|
---|
217 | // append the doc list to the result
|
---|
218 | page_response.appendChild(doc.importNode(document_list, true));
|
---|
219 | //append site metadata
|
---|
220 | addSiteMetadata(page_response, userContext);
|
---|
221 | addInterfaceOptions(page_response);
|
---|
222 | return page_response;
|
---|
223 | }
|
---|
224 |
|
---|
225 | // get the metadata elements needed from the format statement if any
|
---|
226 | HashSet<String> metadata_names = new HashSet<String>();
|
---|
227 | metadata_names.add("Title");
|
---|
228 | // we already got the format element earlier
|
---|
229 | if (format_elem != null)
|
---|
230 | {
|
---|
231 | getRequiredMetadataNames(format_elem, metadata_names);
|
---|
232 | }
|
---|
233 |
|
---|
234 | // paging of the results is done here - we filter the list to remove unwanted entries before retrieving metadata
|
---|
235 | Element filtered_doc_list;
|
---|
236 | if (does_paging) {
|
---|
237 | filtered_doc_list = (Element)doc.importNode(document_list, true);
|
---|
238 | } else {
|
---|
239 | filtered_doc_list = filterDocList(doc, params, service_params, document_list);
|
---|
240 | }
|
---|
241 | // do the metadata request on the filtered list
|
---|
242 | Element mr_metadata_message = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
243 | to = "DocumentMetadataRetrieve";
|
---|
244 | if (collection != null)
|
---|
245 | {
|
---|
246 | to = GSPath.prependLink(to, collection);
|
---|
247 | }
|
---|
248 | Element mr_metadata_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
|
---|
249 | mr_metadata_message.appendChild(mr_metadata_request);
|
---|
250 |
|
---|
251 | // just get all for now - the receptionist should perhaps pass in some
|
---|
252 | // metadata that it wants, and QueryAction should look through the format stuff to see if there is any other?
|
---|
253 |
|
---|
254 | Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
|
---|
255 | if(extraMetaListElem != null)
|
---|
256 | {
|
---|
257 | NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
|
---|
258 | for(int i = 0; i < extraMetaList.getLength(); i++)
|
---|
259 | {
|
---|
260 | metadata_names.add(((Element)extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
|
---|
261 | }
|
---|
262 | }
|
---|
263 |
|
---|
264 | Element dm_param_list = createMetadataParamList(doc,metadata_names);
|
---|
265 |
|
---|
266 | mr_metadata_request.appendChild(dm_param_list);
|
---|
267 |
|
---|
268 | // add in the doc node list too
|
---|
269 | mr_metadata_request.appendChild(filtered_doc_list);
|
---|
270 |
|
---|
271 | Element mr_metadata_response = (Element) this.mr.process(mr_metadata_message);
|
---|
272 |
|
---|
273 | Element query_result_snippet_list = (Element) GSXML.getChildByTagName(query_response, GSXML.HL_SNIPPET_ELEM + GSXML.LIST_MODIFIER);
|
---|
274 |
|
---|
275 | // check for errors
|
---|
276 | processErrorElements(mr_metadata_response, page_response);
|
---|
277 |
|
---|
278 | Element metadata_response = (Element) GSXML.getChildByTagName(mr_metadata_response, GSXML.RESPONSE_ELEM);
|
---|
279 |
|
---|
280 | Element query_result_document_list = (Element) GSXML.getChildByTagName(metadata_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
|
---|
281 |
|
---|
282 | if (query_result_document_list != null)
|
---|
283 | {
|
---|
284 | page_response.appendChild(doc.importNode(query_result_document_list, true));
|
---|
285 | if (query_result_snippet_list != null)
|
---|
286 | {
|
---|
287 | page_response.appendChild(doc.importNode(query_result_snippet_list,true));
|
---|
288 | }
|
---|
289 | }
|
---|
290 |
|
---|
291 | //logger.debug("Query page:\n" + this.converter.getPrettyString(page_response));
|
---|
292 | //append site metadata
|
---|
293 | addSiteMetadata(page_response, userContext);
|
---|
294 | addInterfaceOptions(page_response);
|
---|
295 | return page_response;
|
---|
296 | }
|
---|
297 |
|
---|
298 | private void addCollectionsHierarchy(Element page_response, UserContext userContext) {
|
---|
299 | Document doc = page_response.getOwnerDocument();
|
---|
300 | String collectionsHierarchy = "CollectionsHierarchy";
|
---|
301 | if (checkServiceAvailable(userContext, collectionsHierarchy)){
|
---|
302 | Element groupQueryMessage = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
303 | Element groupQueryRequest = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, collectionsHierarchy, userContext);
|
---|
304 | groupQueryMessage.appendChild(groupQueryRequest);
|
---|
305 | Element groupQueryResult = (Element) this.mr.process(groupQueryMessage);
|
---|
306 | if (groupQueryResult == null){
|
---|
307 | return;
|
---|
308 | }
|
---|
309 | Element groupQueryResponse = (Element) GSXML.getChildByTagName(groupQueryResult, GSXML.RESPONSE_ELEM);
|
---|
310 | if (groupQueryResponse == null){
|
---|
311 | return;
|
---|
312 | }
|
---|
313 | Element hierarchy = (Element) GSXML.getChildByTagName(groupQueryResponse, GSXML.HIERARCHY_ELEM);
|
---|
314 | page_response.appendChild(doc.importNode(hierarchy, true));
|
---|
315 | }
|
---|
316 |
|
---|
317 |
|
---|
318 | }
|
---|
319 |
|
---|
320 | private boolean checkServiceAvailable(UserContext userContext, String collectionsHierarchy) {
|
---|
321 |
|
---|
322 | Document doc = XMLConverter.newDOM();
|
---|
323 | Element infoMessage = doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
324 | Element infoRequest = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
|
---|
325 | Element paramList = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
|
---|
326 | infoRequest.appendChild(paramList);
|
---|
327 | GSXML.addParameterToList(paramList, GSXML.SUBSET_PARAM, GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
|
---|
328 | infoMessage.appendChild(infoRequest);
|
---|
329 | Element responseMessage = (Element) this.mr.process(infoMessage);
|
---|
330 | if (responseMessage == null)
|
---|
331 | {
|
---|
332 | logger.error("couldn't query the message router!");
|
---|
333 | return false;
|
---|
334 | }
|
---|
335 | NodeList serviceLists = responseMessage.getElementsByTagName(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
|
---|
336 | if (serviceLists == null || serviceLists.getLength() == 0){
|
---|
337 | logger.error("No service List in response message from message router!");
|
---|
338 | return false;
|
---|
339 | }
|
---|
340 | Element serviceList = (Element) serviceLists.item(0);
|
---|
341 | Element groupInfoService = GSXML.getNamedElement(serviceList, GSXML.SERVICE_ELEM, GSXML.NAME_ATT, collectionsHierarchy);
|
---|
342 | if (groupInfoService == null){
|
---|
343 | logger.error("service " + collectionsHierarchy + " unavailable");
|
---|
344 | return false;
|
---|
345 | }
|
---|
346 | return true;
|
---|
347 | }
|
---|
348 |
|
---|
349 | /** this filters out some of the doc results for result paging */
|
---|
350 | protected Element filterDocList(Document doc, HashMap<String, Serializable> params, HashMap service_params, Element orig_doc_list)
|
---|
351 | {
|
---|
352 |
|
---|
353 | String hits_pp = (String) service_params.get(HITS_PER_PAGE);
|
---|
354 |
|
---|
355 | int hits = 20;
|
---|
356 | if (hits_pp != null && !hits_pp.equals(""))
|
---|
357 | {
|
---|
358 | if (hits_pp.equals("all")) {
|
---|
359 | hits = -1;
|
---|
360 | } else {
|
---|
361 | try
|
---|
362 | {
|
---|
363 | hits = Integer.parseInt(hits_pp);
|
---|
364 | }
|
---|
365 | catch (Exception e)
|
---|
366 | {
|
---|
367 | hits = 20;
|
---|
368 | }
|
---|
369 | }
|
---|
370 | }
|
---|
371 | if (hits == -1)
|
---|
372 | { // all
|
---|
373 | return (Element) doc.importNode(orig_doc_list, true);
|
---|
374 | }
|
---|
375 | NodeList result_docs = orig_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM);
|
---|
376 |
|
---|
377 | int num_docs = result_docs.getLength();
|
---|
378 | if (num_docs <= hits)
|
---|
379 | {
|
---|
380 | // too few docs to do paging
|
---|
381 | return (Element) doc.importNode(orig_doc_list, true);
|
---|
382 | }
|
---|
383 |
|
---|
384 | // now we need our own doc list
|
---|
385 | Element result_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
|
---|
386 |
|
---|
387 | String start_p = (String) service_params.get("startPage");
|
---|
388 | if (start_p == null)
|
---|
389 | {
|
---|
390 | start_p = (String) params.get("startPage");
|
---|
391 | }
|
---|
392 |
|
---|
393 | int start = 1;
|
---|
394 | if (start_p != null && !start_p.equals(""))
|
---|
395 | {
|
---|
396 | try
|
---|
397 | {
|
---|
398 | start = Integer.parseInt(start_p);
|
---|
399 | }
|
---|
400 | catch (Exception e)
|
---|
401 | {
|
---|
402 | start = 1;
|
---|
403 | }
|
---|
404 | }
|
---|
405 | int start_from = (start - 1) * hits;
|
---|
406 | int end_at = (start * hits) - 1;
|
---|
407 |
|
---|
408 | if (start_from > num_docs)
|
---|
409 | {
|
---|
410 | // something has gone wrong
|
---|
411 | return result_list;
|
---|
412 | }
|
---|
413 |
|
---|
414 | if (end_at > num_docs)
|
---|
415 | {
|
---|
416 | end_at = num_docs - 1;
|
---|
417 | }
|
---|
418 | // now we finally have the docs numbers to use
|
---|
419 | for (int i = start_from; i <= end_at; i++)
|
---|
420 | {
|
---|
421 | result_list.appendChild(doc.importNode(result_docs.item(i), true));
|
---|
422 | }
|
---|
423 |
|
---|
424 | return result_list;
|
---|
425 | }
|
---|
426 |
|
---|
427 | protected boolean addHitsParamToService(Document doc, Element service_description, String lang) {
|
---|
428 | Element param_list = (Element)GSXML.getChildByTagName(service_description, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
429 | Element param = GSXML.createParameterDescription(doc, HITS_PER_PAGE, getTextString("param." + HITS_PER_PAGE, lang, "ServiceRack", null), GSXML.PARAM_TYPE_INTEGER, "20", null, null);
|
---|
430 | Element query_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, "query");
|
---|
431 | if (query_param != null) {
|
---|
432 | param_list.insertBefore(param, query_param);
|
---|
433 | } else {
|
---|
434 | param_list.appendChild(param);
|
---|
435 | }
|
---|
436 | return true;
|
---|
437 | }
|
---|
438 |
|
---|
439 | }
|
---|