source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/QueryAction.java@ 28854

Last change on this file since 28854 was 28382, checked in by davidb, 11 years ago

Elimination of the 'this.doc' field from the Action baseclass and the subclasses that rely on it. For Greenstone3 purposes it is unsafe to create this object in the constructor to the action and then store it for other methods to access. This is because the Greenstone 3 (and in particular calls to 'process' operate in a multi-threaded context, that is managed by the Servlet server (e.g. Tomcat by default). Calls to DOM methods are not guaranteed to be thread safe, this became apparent when we started looking in to an exception that was being thrown, and centred around use of the DOM method 'item(i)'. The change this commit makes is to remove 'this.doc' being stored as a field. A document is now created in the top level of a call to 'process()' and when a DOM reference is needed in a subsequent method an Element variable (typically passed in as a parameter to the method) is used (through 'Document doc = element.getOwnerDocument()') to gain access to the DOM

  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1package org.greenstone.gsdl3.action;
2
3import java.io.Serializable;
4import java.util.HashMap;
5import java.util.HashSet;
6
7import org.apache.log4j.Logger;
8import org.greenstone.gsdl3.util.GSParams;
9import org.greenstone.gsdl3.util.GSPath;
10import org.greenstone.gsdl3.util.GSXML;
11import org.greenstone.gsdl3.util.GSXSLT;
12import org.greenstone.gsdl3.util.UserContext;
13import org.w3c.dom.Document;
14import org.w3c.dom.Element;
15import org.w3c.dom.Node;
16import org.w3c.dom.NodeList;
17
18/** action class for queries */
19public class QueryAction extends Action
20{
21
22 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.QueryAction.class.getName());
23
24 /**
25 * process - processes a request.
26 */
27 public Node process(Node message_node)
28 {
29 Element message = this.converter.nodeToElement(message_node);
30 Document doc = message.getOwnerDocument();
31
32 // get the request - assume there is only one
33 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
34
35 // create the return message
36 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
37 Element response = basicQuery(request);
38 result.appendChild(doc.importNode(response, true));
39 return result;
40 }
41
42 /**
43 * a generic query handler this gets the service description, does the query
44 * (just passes all the params to the service, then gets the titles for any
45 * results
46 */
47 protected Element basicQuery(Element request)
48 {
49 // the result
50 Document doc = request.getOwnerDocument();
51 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
52
53 // extract the params from the cgi-request, and check that we have a coll specified
54 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
55 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
56
57 String request_type = (String) params.get(GSParams.REQUEST_TYPE);
58 String service_name = (String) params.get(GSParams.SERVICE);
59 String collection = (String) params.get(GSParams.COLLECTION);
60
61 // collection may be null or empty when we are doing cross coll services
62 if (collection == null || collection.equals(""))
63 {
64 collection = null;
65 }
66
67 UserContext userContext = new UserContext(request);
68 String to = service_name;
69 if (collection != null)
70 {
71 to = GSPath.prependLink(to, collection);
72 }
73
74 // get the format info - there may be global format info in the collection that searching needs
75 Element format_elem = getFormatInfo(to, userContext);
76 // set the format type
77 format_elem.setAttribute(GSXML.TYPE_ATT, "search");
78 // for now just add to the response
79 page_response.appendChild(doc.importNode(format_elem, true));
80
81 if (request_type.indexOf("d") != -1)
82 {
83 // we have been asked for the service description
84 Element mr_info_message = doc.createElement(GSXML.MESSAGE_ELEM);
85 Element mr_info_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, to, userContext);
86 mr_info_message.appendChild(mr_info_request);
87
88 // process the message
89 Element mr_info_response = (Element) this.mr.process(mr_info_message);
90 // the response
91
92 Element service_response = (Element) GSXML.getChildByTagName(mr_info_response, GSXML.RESPONSE_ELEM);
93
94 Element service_description = (Element) doc.importNode(GSXML.getChildByTagName(service_response, GSXML.SERVICE_ELEM), true);
95 page_response.appendChild(service_description);
96 }
97
98 if (request_type.indexOf("r") == -1)
99 {
100 // just a display request, no actual processing to do
101 //append site metadata
102 addSiteMetadata(page_response, userContext);
103 addInterfaceOptions(page_response);
104 return page_response;
105 }
106
107 // check that we have some service params
108 HashMap service_params = (HashMap) params.get("s1");
109 if (service_params == null)
110 { // no query
111 //append site metadata
112 addSiteMetadata(page_response, userContext);
113 addInterfaceOptions(page_response);
114 return page_response;
115 }
116
117 // create the query request
118 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
119 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
120 mr_query_message.appendChild(mr_query_request);
121
122 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
123 GSXML.addParametersToList(doc, query_param_list, service_params);
124 mr_query_request.appendChild(query_param_list);
125
126 logger.debug(GSXML.xmlNodeToString(mr_query_message));
127
128 // do the query
129 Element mr_query_response = (Element) this.mr.process(mr_query_message);
130
131 // check for errors
132 if (processErrorElements(mr_query_response, page_response))
133 {
134 //append site metadata
135 addSiteMetadata(page_response, userContext);
136 addInterfaceOptions(page_response);
137 return page_response;
138 }
139
140 Element query_response = (Element) GSXML.getChildByTagName(mr_query_response, GSXML.RESPONSE_ELEM);
141 Element query_result_metadata_list = (Element) GSXML.getChildByTagName(query_response, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
142 if (query_result_metadata_list == null)
143 {
144 logger.error("No query result metadata.\n");
145 }
146 else
147 { // add it into the page response
148 page_response.appendChild(doc.importNode(query_result_metadata_list, true));
149 }
150
151 Element query_term_info_list = (Element) GSXML.getChildByTagName(query_response, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
152 if (query_term_info_list == null)
153 {
154 logger.error("No query term information.\n");
155 }
156 else
157 { // add it into the page response
158 page_response.appendChild(doc.importNode(query_term_info_list, true));
159 }
160
161 Element facet_list = (Element) GSXML.getChildByTagName(query_response, GSXML.FACET_ELEM + GSXML.LIST_MODIFIER);
162 if (facet_list == null)
163 {
164 logger.error("No query term information.\n");
165 }
166 else
167 { // add it into the page response
168 page_response.appendChild(doc.importNode(facet_list, true));
169 }
170
171 // check that there are some documents - for now check the list, but later should use a numdocs metadata elem
172 Element document_list = (Element) GSXML.getChildByTagName(query_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
173 // documentList not present if no docs found
174 if (document_list == null)
175 {
176 // add in a dummy doc node list - used by the display. need to think about this
177 page_response.appendChild(doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER));
178 //append site metadata
179 addSiteMetadata(page_response, userContext);
180 addInterfaceOptions(page_response);
181 return page_response;
182 }
183
184 // now we check to see if there is metadata already - some search services return predefined metadata. if there is some, don't do a metadata request
185 NodeList doc_metadata = document_list.getElementsByTagName(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
186 if (doc_metadata.getLength() > 0)
187 {
188 logger.error("have already found metadata!");
189 // append the doc list to the result
190 page_response.appendChild(doc.importNode(document_list, true));
191 //append site metadata
192 addSiteMetadata(page_response, userContext);
193 addInterfaceOptions(page_response);
194 return page_response;
195 }
196
197 // get the metadata elements needed from the format statement if any
198 HashSet<String> metadata_names = new HashSet<String>();
199 metadata_names.add("Title");
200 // we already got the format element earlier
201 if (format_elem != null)
202 {
203 getRequiredMetadataNames(format_elem, metadata_names);
204 }
205
206 // paging of the results is done here - we filter the list to remove unwanted entries before retrieving metadata
207 Element filtered_doc_list = filterDocList(doc, params, service_params, document_list);
208
209 // do the metadata request on the filtered list
210 Element mr_metadata_message = doc.createElement(GSXML.MESSAGE_ELEM);
211 to = "DocumentMetadataRetrieve";
212 if (collection != null)
213 {
214 to = GSPath.prependLink(to, collection);
215 }
216 Element mr_metadata_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
217 mr_metadata_message.appendChild(mr_metadata_request);
218
219 // just get all for now - the receptionist should perhaps pass in some
220 // metadata that it wants, and QueryAction should look through the format stuff to see if there is any other?
221
222 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
223 if(extraMetaListElem != null)
224 {
225 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
226 for(int i = 0; i < extraMetaList.getLength(); i++)
227 {
228 metadata_names.add(((Element)extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
229 }
230 }
231
232 Element dm_param_list = createMetadataParamList(doc,metadata_names);
233
234 mr_metadata_request.appendChild(dm_param_list);
235
236 // add in the doc node list too
237 mr_metadata_request.appendChild(filtered_doc_list);
238
239 Element mr_metadata_response = (Element) this.mr.process(mr_metadata_message);
240
241 // check for errors
242 processErrorElements(mr_metadata_response, page_response);
243
244 Element metadata_response = (Element) GSXML.getChildByTagName(mr_metadata_response, GSXML.RESPONSE_ELEM);
245
246 Element query_result_document_list = (Element) GSXML.getChildByTagName(metadata_response, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
247
248 if (query_result_document_list != null)
249 {
250 page_response.appendChild(doc.importNode(query_result_document_list, true));
251 }
252
253 logger.debug("Query page:\n" + this.converter.getPrettyString(page_response));
254 //append site metadata
255 addSiteMetadata(page_response, userContext);
256 addInterfaceOptions(page_response);
257 return page_response;
258 }
259
260 /** this filters out some of the doc results for result paging */
261 protected Element filterDocList(Document doc, HashMap<String, Serializable> params, HashMap service_params, Element orig_doc_list)
262 {
263
264 // check the hits_per_page param - is it a service param??
265 String hits_pp = (String) service_params.get("hitsPerPage");
266 if (hits_pp == null)
267 {
268 // the service is doing the paging, so we want to display all of the returned docs(???)
269 // return (Element)doc.importNode(orig_doc_list, true);
270 // try hitsPerPage in the globle param
271 hits_pp = (String) params.get("hitsPerPage");
272 }
273
274 int hits = 20;
275 if (hits_pp != null && !hits_pp.equals(""))
276 {
277 try
278 {
279 hits = Integer.parseInt(hits_pp);
280 }
281 catch (Exception e)
282 {
283 hits = 20;
284 }
285 }
286
287 if (hits == -1)
288 { // all
289 return (Element) doc.importNode(orig_doc_list, true);
290 }
291 NodeList result_docs = orig_doc_list.getElementsByTagName(GSXML.DOC_NODE_ELEM);
292
293 int num_docs = result_docs.getLength();
294 if (num_docs <= hits)
295 {
296 // too few docs to do paging
297 return (Element) doc.importNode(orig_doc_list, true);
298 }
299
300 // now we need our own doc list
301 Element result_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
302
303 String start_p = (String) service_params.get("startPage");
304 if (start_p == null)
305 {
306 start_p = (String) params.get("startPage");
307 }
308
309 int start = 1;
310 if (start_p != null && !start_p.equals(""))
311 {
312 try
313 {
314 start = Integer.parseInt(start_p);
315 }
316 catch (Exception e)
317 {
318 start = 1;
319 }
320 }
321
322 int start_from = (start - 1) * hits;
323 int end_at = (start * hits) - 1;
324
325 if (start_from > num_docs)
326 {
327 // something has gone wrong
328 return result_list;
329 }
330
331 if (end_at > num_docs)
332 {
333 end_at = num_docs - 1;
334 }
335
336 // now we finally have the docs numbers to use
337 for (int i = start_from; i <= end_at; i++)
338 {
339 result_list.appendChild(doc.importNode(result_docs.item(i), true));
340 }
341
342 return result_list;
343 }
344
345}
Note: See TracBrowser for help on using the repository browser.