source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/QueryAction.java@ 37514

Last change on this file since 37514 was 37514, checked in by kjdon, 14 months ago

usign the new GetRequiredMEtadataNames - has an extra arg, and we no longer need to do teh extraMEtadataList bit ourselves, as its now in getRequiredMetadataNames

  • Property svn:keywords set to Author Date Id Revision
File size: 15.4 KB
Line 
1package org.greenstone.gsdl3.action;
2
3import java.io.Serializable;
4import java.util.HashMap;
5import java.util.HashSet;
6
7import org.apache.log4j.Logger;
8import org.greenstone.gsdl3.util.GSParams;
9import org.greenstone.gsdl3.util.GSPath;
10import org.greenstone.gsdl3.util.GSXML;
11import org.greenstone.gsdl3.util.GSXSLT;
12import org.greenstone.gsdl3.util.UserContext;
13import org.greenstone.gsdl3.util.XMLConverter;
14import org.w3c.dom.Document;
15import org.w3c.dom.Element;
16import org.w3c.dom.Node;
17import org.w3c.dom.NodeList;
18
19/** action class for queries */
20public class QueryAction extends Action
21{
22
23 public static final String HITS_PER_PAGE_ARG = "hitsPerPage";
24 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.QueryAction.class.getName());
25
26 /**
27 * tell the param class what its arguments are.
28 * if an action has its own arguments, this should add them to
29 * the params object - particularly
30 * important for args that need to be saved to the session
31 */
32 public boolean addActionParameters(GSParams params)
33 {
34 // some query services don't do paging themselves, in shich case we do it here. Let tell servlet to save the param just in case nothing else is doing that.
35 params.addServiceParameter(HITS_PER_PAGE_ARG, "20", true, false);
36 return true;
37 }
38
39 /**
40 * process - processes a request.
41 */
42 public Node process(Node message_node)
43 {
44 Element message = GSXML.nodeToElement(message_node);
45 Document doc = message.getOwnerDocument();
46
47 // get the request - assume there is only one
48 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
49
50 // create the return message
51 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
52 Element response = basicQuery(request);
53 result.appendChild(doc.importNode(response, true));
54 return result;
55 }
56
57 /**
58 * a generic query handler this gets the service description, does the query
59 * (just passes all the params to the service, then gets the titles for any
60 * results
61 */
62 protected Element basicQuery(Element request)
63 {
64 // the result
65 Document doc = request.getOwnerDocument();
66 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
67
68 // extract the params from the cgi-request, and check that we have a coll specified
69 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
70 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
71
72 String request_type = (String) params.get(GSParams.REQUEST_TYPE);
73 String service_name = (String) params.get(GSParams.SERVICE);
74 String collection = (String) params.get(GSParams.COLLECTION);
75 String lang = request.getAttribute(GSXML.LANG_ATT);
76 // collection may be null or empty when we are doing cross coll services
77 if (collection == null || collection.equals(""))
78 {
79 collection = null;
80 }
81
82 UserContext userContext = new UserContext(request);
83 String to = service_name;
84 if (collection != null)
85 {
86 to = GSPath.prependLink(to, collection);
87 }
88
89 // get the format info - there may be global format info in the collection that searching needs
90 Element format_elem = getFormatInfo(to, userContext);
91 if (format_elem != null) {
92 // set the format type
93 format_elem.setAttribute(GSXML.TYPE_ATT, "search");
94 // for now just add to the response
95 page_response.appendChild(doc.importNode(format_elem, true));
96 }
97 // get the service description
98 // we have been asked for the service description
99 Element mr_info_message = doc.createElement(GSXML.MESSAGE_ELEM);
100 Element mr_info_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
101 mr_info_message.appendChild(mr_info_request);
102
103 // process the message
104 Element mr_info_response = (Element) this.mr.process(mr_info_message);
105
106 boolean does_paging = false;
107
108 // the response
109 Element service_response = (Element) GSXML.getChildByTagName(mr_info_response, GSXML.RESPONSE_ELEM);
110
111 Element service_description = (Element)GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM);
112 if (service_description != null) {
113 service_description = (Element) doc.importNode(service_description, true);
114
115 Element meta_list =(Element) GSXML.getChildByTagName(service_description, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
116 if (meta_list != null) {
117 String value = GSXML.getMetadataValue(meta_list, "does_paging");
118 if (value.equals("true")) {
119 does_paging = true;
120 }
121 }
122
123 if (does_paging == false) {
124 // we will do the paging, so lets add in a hitsPerPage param to the service
125 addHitsParamToService(doc, service_description, lang);
126 }
127 }
128 //Element service_description = (Element) doc.importNode(GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM), true);
129
130
131 // have we been asked to return the service description
132 // as part of the response?
133 if (request_type.indexOf("d") != -1) {
134 if (service_description != null) {
135 page_response.appendChild(service_description);
136 }
137 addCollectionsHierarchy(page_response,userContext);
138 }
139
140 if (request_type.indexOf("r") == -1)
141 {
142 // just a display request, no actual processing to do
143 //append site metadata
144 addSiteMetadata(page_response, userContext);
145 addInterfaceOptions(page_response);
146 return page_response;
147 }
148
149 // check that we have some service params
150 HashMap service_params = (HashMap) params.get("s1");
151 if (service_params == null)
152 { // no query
153 //append site metadata
154 addSiteMetadata(page_response, userContext);
155 addInterfaceOptions(page_response);
156 return page_response;
157 }
158
159 // create the query request
160 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
161 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
162 mr_query_message.appendChild(mr_query_request);
163
164 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
165 GSXML.addParametersToList(query_param_list, service_params);
166 mr_query_request.appendChild(query_param_list);
167
168 // do the query
169 Element mr_query_response = (Element) this.mr.process(mr_query_message);
170
171 // check for errors
172 if (processErrorElements(mr_query_response, page_response))
173 {
174 //append site metadata
175 addSiteMetadata(page_response, userContext);
176 addInterfaceOptions(page_response);
177 return page_response;
178 }
179
180 Element query_response = (Element) GSXML.getChildByTagName(mr_query_response, GSXML.RESPONSE_ELEM);
181 Element query_result_metadata_list = (Element) GSXML.getChildByTagName(query_response, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
182
183 if (query_result_metadata_list == null)
184 {
185 logger.error("No query result metadata.\n");
186 }
187 else
188 { // add it into the page response
189 page_response.appendChild(doc.importNode(query_result_metadata_list, true));
190 }
191
192 Element query_term_info_list = (Element) GSXML.getChildByTagName(query_response, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
193 if (query_term_info_list == null)
194 {
195 logger.error("No query term information.\n");
196 }
197 else
198 { // add it into the page response
199 page_response.appendChild(doc.importNode(query_term_info_list, true));
200 }
201
202 Element facet_list = (Element) GSXML.getChildByTagName(query_response, GSXML.FACET_ELEM + GSXML.LIST_MODIFIER);
203 if (facet_list == null)
204 {
205 logger.error("No query term information.\n");
206 }
207 else
208 { // add it into the page response
209 page_response.appendChild(doc.importNode(facet_list, true));
210 }
211
212 // check that there are some documents - for now check the list, but later should use a numdocs metadata elem
213 Element document_list = (Element) GSXML.getChildByTagName(query_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
214 // documentList not present if no docs found
215 if (document_list == null)
216 {
217 // add in a dummy doc node list - used by the display. need to think about this
218 page_response.appendChild(doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER));
219 //append site metadata
220 addSiteMetadata(page_response, userContext);
221 addInterfaceOptions(page_response);
222 return page_response;
223 }
224
225 // now we check to see if there is metadata already - some search services return predefined metadata. if there is some, don't do a metadata request
226 NodeList doc_metadata = document_list.getElementsByTagName(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
227 if (doc_metadata.getLength() > 0)
228 {
229 // why are we not paging these results?????
230 // append the doc list to the result
231 page_response.appendChild(doc.importNode(document_list, true));
232 //append site metadata
233 addSiteMetadata(page_response, userContext);
234 addInterfaceOptions(page_response);
235 return page_response;
236 }
237
238 // get the metadata elements needed from the format statement if any
239 HashSet<String> metadata_names = new HashSet<String>();
240 metadata_names.add("Title");
241 // we already got the format element earlier
242 // find names from format and from extraMetadataList in the request
243 getRequiredMetadataNames(metadata_names, format_elem, request);
244
245
246 // paging of the results is done here - we filter the list to remove unwanted entries before retrieving metadata
247 Element filtered_doc_list;
248 if (does_paging) {
249 filtered_doc_list = (Element)doc.importNode(document_list, true);
250 } else {
251 filtered_doc_list = filterDocList(doc, params, service_params, document_list);
252 }
253 // do the metadata request on the filtered list
254 Element mr_metadata_message = doc.createElement(GSXML.MESSAGE_ELEM);
255 to = "DocumentMetadataRetrieve";
256 if (collection != null)
257 {
258 to = GSPath.prependLink(to, collection);
259 }
260 Element mr_metadata_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
261 mr_metadata_message.appendChild(mr_metadata_request);
262
263 Element dm_param_list = createMetadataParamList(doc,metadata_names);
264
265 mr_metadata_request.appendChild(dm_param_list);
266
267 // add in the doc node list too
268 mr_metadata_request.appendChild(filtered_doc_list);
269
270 Element mr_metadata_response = (Element) this.mr.process(mr_metadata_message);
271
272 Element query_result_snippet_list = (Element) GSXML.getChildByTagName(query_response, GSXML.HL_SNIPPET_ELEM + GSXML.LIST_MODIFIER);
273
274 // check for errors
275 processErrorElements(mr_metadata_response, page_response);
276
277 Element metadata_response = (Element) GSXML.getChildByTagName(mr_metadata_response, GSXML.RESPONSE_ELEM);
278
279 Element query_result_document_list = (Element) GSXML.getChildByTagName(metadata_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
280
281 if (query_result_document_list != null)
282 {
283 page_response.appendChild(doc.importNode(query_result_document_list, true));
284 if (query_result_snippet_list != null)
285 {
286 page_response.appendChild(doc.importNode(query_result_snippet_list,true));
287 }
288 }
289
290 //logger.debug("Query page:\n" + this.converter.getPrettyString(page_response));
291 //append site metadata
292 addSiteMetadata(page_response, userContext);
293 addInterfaceOptions(page_response);
294 return page_response;
295 }
296
297 private void addCollectionsHierarchy(Element page_response, UserContext userContext) {
298 Document doc = page_response.getOwnerDocument();
299 String collectionsHierarchy = "CollectionsHierarchy";
300 if (checkServiceAvailable(userContext, collectionsHierarchy)){
301 Element groupQueryMessage = doc.createElement(GSXML.MESSAGE_ELEM);
302 Element groupQueryRequest = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, collectionsHierarchy, userContext);
303 groupQueryMessage.appendChild(groupQueryRequest);
304 Element groupQueryResult = (Element) this.mr.process(groupQueryMessage);
305 if (groupQueryResult == null){
306 return;
307 }
308 Element groupQueryResponse = (Element) GSXML.getChildByTagName(groupQueryResult, GSXML.RESPONSE_ELEM);
309 if (groupQueryResponse == null){
310 return;
311 }
312 Element hierarchy = (Element) GSXML.getChildByTagName(groupQueryResponse, GSXML.HIERARCHY_ELEM);
313 page_response.appendChild(doc.importNode(hierarchy, true));
314 }
315
316
317 }
318
319 private boolean checkServiceAvailable(UserContext userContext, String collectionsHierarchy) {
320
321 Document doc = XMLConverter.newDOM();
322 Element infoMessage = doc.createElement(GSXML.MESSAGE_ELEM);
323 Element infoRequest = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
324 Element paramList = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
325 infoRequest.appendChild(paramList);
326 GSXML.addParameterToList(paramList, GSXML.SUBSET_PARAM, GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
327 infoMessage.appendChild(infoRequest);
328 Element responseMessage = (Element) this.mr.process(infoMessage);
329 if (responseMessage == null)
330 {
331 logger.error("couldn't query the message router!");
332 return false;
333 }
334 NodeList serviceLists = responseMessage.getElementsByTagName(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
335 if (serviceLists == null || serviceLists.getLength() == 0){
336 logger.error("No service List in response message from message router!");
337 return false;
338 }
339 Element serviceList = (Element) serviceLists.item(0);
340 Element groupInfoService = GSXML.getNamedElement(serviceList, GSXML.SERVICE_ELEM, GSXML.NAME_ATT, collectionsHierarchy);
341 if (groupInfoService == null){
342 logger.error("service " + collectionsHierarchy + " unavailable");
343 return false;
344 }
345 return true;
346 }
347
348 /** this filters out some of the doc results for result paging */
349 protected Element filterDocList(Document doc, HashMap<String, Serializable> params, HashMap service_params, Element orig_doc_list)
350 {
351
352 String hits_pp = (String) service_params.get(HITS_PER_PAGE_ARG);
353
354 int hits = 20;
355 if (hits_pp != null && !hits_pp.equals(""))
356 {
357 if (hits_pp.equals("all")) {
358 hits = -1;
359 } else {
360 try
361 {
362 hits = Integer.parseInt(hits_pp);
363 }
364 catch (Exception e)
365 {
366 hits = 20;
367 }
368 }
369 }
370 if (hits == -1)
371 { // all
372 return (Element) doc.importNode(orig_doc_list, true);
373 }
374 NodeList result_docs = orig_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM);
375
376 int num_docs = result_docs.getLength();
377 if (num_docs <= hits)
378 {
379 // too few docs to do paging
380 return (Element) doc.importNode(orig_doc_list, true);
381 }
382
383 // now we need our own doc list
384 Element result_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
385
386 String start_p = (String) service_params.get("startPage");
387 if (start_p == null)
388 {
389 start_p = (String) params.get("startPage");
390 }
391
392 int start = 1;
393 if (start_p != null && !start_p.equals(""))
394 {
395 try
396 {
397 start = Integer.parseInt(start_p);
398 }
399 catch (Exception e)
400 {
401 start = 1;
402 }
403 }
404 int start_from = (start - 1) * hits;
405 int end_at = (start * hits) - 1;
406
407 if (start_from > num_docs)
408 {
409 // something has gone wrong
410 return result_list;
411 }
412
413 if (end_at > num_docs)
414 {
415 end_at = num_docs - 1;
416 }
417 // now we finally have the docs numbers to use
418 for (int i = start_from; i <= end_at; i++)
419 {
420 result_list.appendChild(doc.importNode(result_docs.item(i), true));
421 }
422
423 return result_list;
424 }
425
426 protected boolean addHitsParamToService(Document doc, Element service_description, String lang) {
427 Element param_list = (Element)GSXML.getChildByTagName(service_description, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
428 Element param = GSXML.createParameterDescription(doc, HITS_PER_PAGE_ARG, getTextString("param." + HITS_PER_PAGE_ARG, lang, null), GSXML.PARAM_TYPE_INTEGER, "20", null, null);
429 Element query_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, "query");
430 if (query_param != null) {
431 param_list.insertBefore(param, query_param);
432 } else {
433 param_list.appendChild(param);
434 }
435 return true;
436 }
437
438}
Note: See TracBrowser for help on using the repository browser.