source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/CrossCollectionSearch.java@ 28966

Last change on this file since 28966 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

  • Property svn:keywords set to Author Date Id Revision
File size: 19.7 KB
Line 
1/*
2 * CrossCollectionSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import java.util.ArrayList;
22import java.util.HashMap;
23import java.util.Iterator;
24import java.util.Map;
25import java.util.Set;
26
27import org.apache.log4j.Logger;
28import org.greenstone.gsdl3.util.GSPath;
29import org.greenstone.gsdl3.util.GSXML;
30import org.greenstone.gsdl3.util.UserContext;
31import org.greenstone.gsdl3.util.XMLConverter;
32
33import org.w3c.dom.Document;
34import org.w3c.dom.Element;
35import org.w3c.dom.Node;
36import org.w3c.dom.NodeList;
37
38/**
39 *
40 */
41
42public class CrossCollectionSearch extends ServiceRack
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.CrossCollectionSearch.class.getName());
46 protected static final String QUERY_PARAM = "query";
47 protected static final String COLLECTION_PARAM = "collection";
48
49 // the services on offer - these proxy the actual collection ones
50 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
51 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
52
53 protected String[] coll_ids_list = null;
54 protected String[] coll_ids_list_no_all = null;
55 // maps lang to coll names list
56 protected HashMap<String, String[]> coll_names_map = null;
57
58 //protected String[] coll_names_list = null;
59
60 /** constructor */
61 public CrossCollectionSearch()
62 {
63 }
64
65 public boolean configure(Element info, Element extra_info)
66 {
67 // any parameters? colls to include??
68 logger.info("Configuring CrossCollectionSearch...");
69 // query service
70 Element ccs_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
71 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
72 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
73 this.short_service_info.appendChild(ccs_service);
74
75 // metadata service
76 Element dmr_service = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
77 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
78 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
79 this.short_service_info.appendChild(dmr_service);
80
81 // get any format info
82 Element format_info = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
83 if (format_info != null)
84 {
85 this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(format_info, true));
86 }
87 else
88 {
89 // add in a default format statement
90 String format_string = "<format xmlns:gsf='" + GSXML.GSF_NAMESPACE + "' xmlns:xsl='" + GSXML.XSL_NAMESPACE + "'><gsf:template match='documentNode'><td><a><xsl:attribute name='href'>?a=d&amp;c=<xsl:value-of select='@collection'/>&amp;d=<xsl:value-of select='@nodeID'/><xsl:if test=\"@nodeType='leaf'\">&amp;sib=1</xsl:if>&amp;dt=<xsl:value-of select='@docType'/>&amp;p.a=q&amp;p.s=" + TEXT_QUERY_SERVICE + "&amp;p.c=";
91 if (this.cluster_name != null)
92 {
93 format_string += this.cluster_name;
94 }
95 format_string += "</xsl:attribute><gsf:icon/></a></td><td><gsf:metadata name='Title'/> (<xsl:value-of select='@collection'/>) </td></gsf:template></format>";
96 this.format_info_map.put(TEXT_QUERY_SERVICE, this.desc_doc.importNode(this.converter.getDOM(format_string).getDocumentElement(), true));
97 }
98 return true;
99 }
100
101 protected Element getServiceDescription(Document doc, String service, String lang, String subset)
102 {
103 if (service.equals(TEXT_QUERY_SERVICE))
104 {
105
106 Element ccs_service = doc.createElement(GSXML.SERVICE_ELEM);
107 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
108 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
109
110 // display info
111 if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER))
112 {
113 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE + ".name", lang)));
114 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE + ".submit", lang)));
115 ccs_service.appendChild(GSXML.createDisplayTextElement(doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE + ".description", lang)));
116 }
117 // param info
118 if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER))
119 {
120 Element param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
121 // collection list
122 if (coll_ids_list == null)
123 {
124 initCollectionList(lang);
125 }
126 if (!coll_names_map.containsKey(lang))
127 {
128 addCollectionNames(lang);
129 }
130 Element param = GSXML.createParameterDescription(doc, COLLECTION_PARAM, getTextString("param." + COLLECTION_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "all", coll_ids_list, coll_names_map.get(lang));
131 param_list.appendChild(param);
132 // query param
133 param = GSXML.createParameterDescription(doc, QUERY_PARAM, getTextString("param." + QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
134 param_list.appendChild(param);
135 ccs_service.appendChild(param_list);
136 }
137
138 logger.debug("service description=" + this.converter.getPrettyString(ccs_service));
139 return ccs_service;
140 }
141 // these ones are probably never called, but put them here just in case
142 Element service_elem = doc.createElement(GSXML.SERVICE_ELEM);
143 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
144 service_elem.setAttribute(GSXML.NAME_ATT, service);
145 return service_elem;
146
147 }
148
149 protected Element processTextQuery(Element request)
150 {
151 // Create a new (empty) result message
152 Document result_doc = XMLConverter.newDOM();
153 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
154 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
155 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
156
157 UserContext userContext = new UserContext(request);
158
159 // Get the parameters of the request
160 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
161 if (param_list == null)
162 {
163 logger.error("TextQuery request had no paramList.");
164 return result; // Return the empty result
165 }
166
167 // get the collection list
168 String[] colls_list = coll_ids_list_no_all;
169 Element coll_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, COLLECTION_PARAM);
170 if (coll_param != null)
171 {
172 String coll_list = GSXML.getValue(coll_param);
173 if (!coll_list.equals("all") && !coll_list.equals(""))
174 {
175 colls_list = coll_list.split(",");
176 }
177 }
178
179 Document msg_doc = XMLConverter.newDOM();
180 Element query_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
181 // we are sending the same request to each collection - build up the to
182 // attribute for the request
183 StringBuffer to_att = new StringBuffer();
184 for (int i = 0; i < colls_list.length; i++)
185 {
186 if (i > 0)
187 {
188 to_att.append(",");
189 }
190 to_att.append(GSPath.appendLink(colls_list[i], "TextQuery"));
191
192 }
193 // send the query to all colls
194 Element query_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_PROCESS, to_att.toString(), userContext);
195 query_message.appendChild(query_request);
196 // should we add params individually?
197 Element new_param_list = msg_doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
198 query_request.appendChild(new_param_list);
199 new_param_list.appendChild(msg_doc.importNode(GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, QUERY_PARAM), true));
200 Element query_result = (Element) this.router.process(query_message);
201
202 // gather up the data from each response
203 int numDocsMatched = 0;
204 int numDocsReturned = 0;
205
206 //term info??
207
208 NodeList metadata = query_result.getElementsByTagName(GSXML.METADATA_ELEM);
209 for (int j = 0; j < metadata.getLength(); j++)
210 {
211 Element meta = (Element) metadata.item(j);
212 if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsReturned"))
213 {
214 numDocsReturned += Integer.parseInt(GSXML.getValue(meta));
215 }
216 else if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsMatched"))
217 {
218 numDocsMatched += Integer.parseInt(GSXML.getValue(meta));
219 }
220 }
221
222 Element metadata_list = result_doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
223 result.appendChild(metadata_list);
224 GSXML.addMetadata(metadata_list, "numDocsReturned", "" + numDocsReturned);
225 //GSXML.addMetadata(metadata_list, "numDocsMatched", ""+numDocsMatched);
226
227 Element doc_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
228 result.appendChild(doc_node_list);
229
230 NodeList responses = query_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
231
232 for (int k = 0; k < responses.getLength(); k++)
233 {
234 String coll_name = GSPath.removeLastLink(((Element) responses.item(k)).getAttribute(GSXML.FROM_ATT));
235 NodeList nodes = ((Element) responses.item(k)).getElementsByTagName(GSXML.DOC_NODE_ELEM);
236 if (nodes == null || nodes.getLength() == 0)
237 continue;
238 Element last_node = null;
239 Element this_node = null;
240 for (int n = 0; n < nodes.getLength(); n++)
241 {
242 this_node = (Element) nodes.item(n);
243 this_node.setAttribute("collection", coll_name);
244 if (k == 0)
245 {
246
247 doc_node_list.appendChild(result_doc.importNode(this_node, true));
248 }
249 else
250 {
251 if (last_node == null)
252 {
253 last_node = (Element) GSXML.getChildByTagName(doc_node_list, GSXML.DOC_NODE_ELEM);
254 }
255 last_node = GSXML.insertIntoOrderedList(doc_node_list, GSXML.DOC_NODE_ELEM, last_node, this_node, "rank", true);
256 }
257
258 }
259 }
260 return result;
261 }
262
263 // protected Element processAdvTextQuery(Element request)
264 // {
265
266 // }
267 protected boolean initCollectionList(String lang)
268 {
269 UserContext userContext = new UserContext();
270 userContext.setLanguage(lang);
271 userContext.setUserID("");
272
273 // first, get the message router info
274 Document msg_doc = XMLConverter.newDOM();
275 Element coll_list_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
276 Element coll_list_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext); // uid
277 coll_list_message.appendChild(coll_list_request);
278 logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request));
279 Element coll_list_response = (Element) this.router.process(coll_list_message);
280 if (coll_list_response == null)
281 {
282 logger.error("couldn't query the message router!");
283 return false;
284 }
285 logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response));
286 // second, get some info from each collection. we want the coll name
287 // and whether its got a text query service
288
289 NodeList colls = coll_list_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
290 // we can send the same request to multiple collections at once by using a comma separated list
291 Element metadata_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
292 StringBuffer colls_sb = new StringBuffer();
293 for (int i = 0; i < colls.getLength(); i++)
294 {
295 Element c = (Element) colls.item(i);
296 String name = c.getAttribute(GSXML.NAME_ATT);
297 if (i != 0)
298 {
299 colls_sb.append(",");
300 }
301 colls_sb.append(name);
302 //Element metadata_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, name, userContext);
303 //metadata_message.appendChild(metadata_request);
304 }
305
306 Element metadata_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, colls_sb.toString(), userContext);
307 metadata_message.appendChild(metadata_request);
308 logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message));
309 Element metadata_response = (Element) this.router.process(metadata_message);
310 logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response));
311 NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
312 ArrayList<String> valid_colls = new ArrayList<String>();
313 ArrayList<String> valid_coll_names = new ArrayList<String>();
314 for (int i = 0; i < coll_responses.getLength(); i++)
315 {
316 Element response = (Element) coll_responses.item(i);
317 Element coll = (Element) GSXML.getChildByTagName(response, GSXML.COLLECTION_ELEM);
318 Element service_list = (Element) GSXML.getChildByTagName(coll, GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
319 if (service_list == null)
320 continue;
321 Element query_service = GSXML.getNamedElement(service_list, GSXML.SERVICE_ELEM, GSXML.NAME_ATT, TEXT_QUERY_SERVICE); // should be AbstractTextSearch.TEXT_QUERY_SERVICE
322 if (query_service == null)
323 continue;
324 // use the name of the response in case we are talking to a remote collection, not the name of the collection.
325 String coll_id = response.getAttribute(GSXML.FROM_ATT);
326 String coll_name = GSXML.getDisplayText(coll, GSXML.DISPLAY_TEXT_NAME, lang, "en");
327 valid_colls.add(coll_id);
328 valid_coll_names.add(coll_name);
329 }
330
331 this.coll_names_map = new HashMap<String, String[]>();
332
333 // ids no all has the list without 'all' option.
334 this.coll_ids_list_no_all = new String[1];
335 this.coll_ids_list_no_all = valid_colls.toArray(coll_ids_list_no_all);
336
337 valid_colls.add(0, "all");
338 valid_coll_names.add(0, getTextString("param." + COLLECTION_PARAM + ".all", lang));
339
340 this.coll_ids_list = new String[1];
341 this.coll_ids_list = valid_colls.toArray(coll_ids_list);
342
343 String[] coll_names_list = new String[1];
344 coll_names_list = valid_coll_names.toArray(coll_names_list);
345 this.coll_names_map.put(lang, coll_names_list);
346 return true;
347 }
348
349 protected void addCollectionNames(String lang)
350 {
351
352 UserContext userContext = new UserContext();
353 userContext.setLanguage(lang);
354 userContext.setUserID("");
355
356 ArrayList<String> coll_names = new ArrayList<String>();
357 coll_names.add(getTextString("param." + COLLECTION_PARAM + ".all", lang));
358
359 // need to request MR for collection descriptions
360 Document msg_doc = XMLConverter.newDOM();
361 Element metadata_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
362
363 // get a comma separated list of coll ids to send to MR
364 // the first item is the place holder for 'all'
365 StringBuffer colls_sb = new StringBuffer();
366 for (int i = 1; i < coll_ids_list.length; i++)
367 {
368 if (i != 1)
369 {
370 colls_sb.append(",");
371 }
372 colls_sb.append(coll_ids_list[i]);
373 }
374 Element metadata_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_DESCRIBE, colls_sb.toString(), userContext);
375 // param_list to request just displayTextList
376 Element param_list = msg_doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
377 Element param = GSXML.createParameter(msg_doc, GSXML.SUBSET_PARAM, GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER);
378 param_list.appendChild(param);
379 metadata_request.appendChild(param_list);
380 metadata_message.appendChild(metadata_request);
381 logger.debug("coll names metadata request = " + this.converter.getPrettyString(metadata_message));
382 Element metadata_response = (Element) this.router.process(metadata_message);
383 logger.debug("coll names metadata response = " + this.converter.getPrettyString(metadata_response));
384 NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
385 for (int i = 0; i < coll_responses.getLength(); i++)
386 {
387 Element response = (Element) coll_responses.item(i);
388 Element coll = (Element) GSXML.getChildByTagName(response, GSXML.COLLECTION_ELEM);
389 String coll_name = GSXML.getDisplayText(coll, GSXML.DISPLAY_TEXT_NAME, lang, "en");
390 coll_names.add(coll_name);
391 }
392
393 String[] coll_names_list = new String[1];
394 coll_names_list = coll_names.toArray(coll_names_list);
395 this.coll_names_map.put(lang, coll_names_list);
396
397 }
398
399 protected Element processDocumentMetadataRetrieve(Element request)
400 {
401 // Create a new (empty) result message
402 Document result_doc = XMLConverter.newDOM();
403 Element result = result_doc.createElement(GSXML.RESPONSE_ELEM);
404 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
405 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
406
407 UserContext userContext = new UserContext(request);
408 // Get the parameters of the request
409 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
410 if (param_list == null)
411 {
412 logger.error("DocumentMetadataRetrieve request had no paramList.");
413 return result; // Return the empty result
414 }
415
416 NodeList query_doc_list = request.getElementsByTagName(GSXML.DOC_NODE_ELEM);
417 if (query_doc_list.getLength() == 0)
418 {
419 logger.error("DocumentMetadataRetrieve request had no documentNodes.");
420 return result; // Return the empty result
421 }
422
423 // the resulting doc node list
424 Element result_node_list = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
425 result.appendChild(result_node_list);
426
427
428 // organise the nodes into collection lists
429 HashMap<String, Node> coll_map = new HashMap<String, Node>();
430
431 for (int i = 0; i < query_doc_list.getLength(); i++)
432 {
433 Element doc_node = (Element) query_doc_list.item(i);
434 String coll_name = doc_node.getAttribute("collection");
435 Element coll_items = (Element) coll_map.get(coll_name);
436 if (coll_items == null)
437 {
438 coll_items = result_doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
439 coll_map.put(coll_name, coll_items);
440 }
441 coll_items.appendChild(result_doc.importNode(doc_node, true));
442 }
443
444 // create teh individual requests
445 Document msg_doc = XMLConverter.newDOM();
446 Element meta_request_message = msg_doc.createElement(GSXML.MESSAGE_ELEM);
447 // get all the metadata params
448 Element new_param_list = msg_doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
449 Element param = GSXML.createParameter(msg_doc, "metadata", "Title");
450 new_param_list.appendChild(param);
451
452 Set mapping_set = coll_map.entrySet();
453 Iterator iter = mapping_set.iterator();
454
455 while (iter.hasNext())
456 {
457 Map.Entry e = (Map.Entry) iter.next();
458 String cname = (String) e.getKey();
459 Element doc_nodes = (Element) e.getValue();
460 Element meta_request = GSXML.createBasicRequest(msg_doc, GSXML.REQUEST_TYPE_PROCESS, GSPath.appendLink(cname, DOCUMENT_METADATA_RETRIEVE_SERVICE), userContext);
461 meta_request.appendChild(msg_doc.importNode(doc_nodes, true));
462 meta_request.appendChild(new_param_list.cloneNode(true));
463 meta_request_message.appendChild(meta_request);
464
465 }
466
467 Node meta_result_node = this.router.process(meta_request_message);
468 Element meta_result = GSXML.nodeToElement(meta_result_node);
469
470 // now need to put the doc nodes back in the right order
471 // go through the original list again. keep an element pointer to
472 // the next element in each collections list
473 NodeList meta_responses = meta_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
474 for (int i = 0; i < meta_responses.getLength(); i++)
475 {
476 String collname = GSPath.removeLastLink(((Element) meta_responses.item(i)).getAttribute(GSXML.FROM_ATT));
477 Element first_elem = (Element) GSXML.getNodeByPath(meta_responses.item(i), "documentNodeList/documentNode");
478 coll_map.put(collname, first_elem);
479 }
480
481 for (int i = 0; i < query_doc_list.getLength(); i++)
482 {
483 Element doc_node = (Element) query_doc_list.item(i);
484 Element new_node = (Element) result_doc.importNode(doc_node, false);
485 result_node_list.appendChild(new_node);
486 String coll_name = doc_node.getAttribute("collection");
487
488 Element meta_elem = (Element) coll_map.get(coll_name);
489 GSXML.mergeMetadataLists(new_node, meta_elem);
490 coll_map.put(coll_name, meta_elem.getNextSibling());
491 }
492 return result;
493 }
494}
Note: See TracBrowser for help on using the repository browser.