source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/CrossCollectionSearch.java@ 25635

Last change on this file since 25635 was 25635, checked in by sjm84, 12 years ago

Fixing Greenstone 3's use (or lack thereof) of generics, this was done automatically so we may want to change it over time. This change will also auto-format any files that have not already been formatted.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
Line 
1/*
2 * CrossCollectionSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.gsdl3.util.GSXML;
22import org.greenstone.gsdl3.util.GSPath;
23import org.greenstone.gsdl3.util.UserContext;
24
25import org.w3c.dom.Element;
26import org.w3c.dom.Node;
27import org.w3c.dom.NodeList;
28
29import java.util.ArrayList;
30import java.util.Map;
31import java.util.Iterator;
32import java.util.Set;
33import java.util.HashMap;
34
35import org.apache.log4j.*;
36
37/**
38 *
39 * @author <a href="mailto:[email protected]">Katherine Don</a>
40 */
41
42public class CrossCollectionSearch extends ServiceRack
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.CrossCollectionSearch.class.getName());
46 protected static final String QUERY_PARAM = "query";
47 protected static final String COLLECTION_PARAM = "collection";
48
49 // the services on offer - these proxy the actual collection ones
50 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
51 protected static final String ADV_QUERY_SERVICE = "AdvTextQuery";
52 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
53
54 protected String[] coll_ids_list = null;
55 protected String[] coll_ids_list_no_all = null;
56 protected String[] coll_names_list = null;
57
58 /** constructor */
59 public CrossCollectionSearch()
60 {
61 }
62
63 public boolean configure(Element info, Element extra_info)
64 {
65 // any parameters? colls to include??
66 logger.info("Configuring CrossCollectionSearch...");
67 // query service
68 Element ccs_service = this.doc.createElement(GSXML.SERVICE_ELEM);
69 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
70 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
71 this.short_service_info.appendChild(ccs_service);
72
73 // metadata service
74 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
75 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
76 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
77 this.short_service_info.appendChild(dmr_service);
78
79 // get any format info
80 Element format_info = (Element) GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
81 if (format_info != null)
82 {
83 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format_info, true));
84 }
85 else
86 {
87 // add in a default format statement
88 String format_string = "<format xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'><gsf:template match='documentNode'><td><a><xsl:attribute name='href'>?a=d&amp;c=<xsl:value-of select='@collection'/>&amp;d=<xsl:value-of select='@nodeID'/><xsl:if test=\"@nodeType='leaf'\">&amp;sib=1</xsl:if>&amp;dt=<xsl:value-of select='@docType'/>&amp;p.a=q&amp;p.s=" + TEXT_QUERY_SERVICE + "&amp;p.c=";
89 if (this.cluster_name != null)
90 {
91 format_string += this.cluster_name;
92 }
93 format_string += "</xsl:attribute><gsf:icon/></a></td><td><gsf:metadata name='Title'/> (<xsl:value-of select='@collection'/>) </td></gsf:template></format>";
94 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(this.converter.getDOM(format_string).getDocumentElement(), true));
95 }
96 return true;
97 }
98
99 protected Element getServiceDescription(String service, String lang, String subset)
100 {
101 if (service.equals(TEXT_QUERY_SERVICE))
102 {
103
104 Element ccs_service = this.doc.createElement(GSXML.SERVICE_ELEM);
105 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
106 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
107
108 // display info
109 if (subset == null || subset.equals(GSXML.DISPLAY_TEXT_ELEM + GSXML.LIST_MODIFIER))
110 {
111 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE + ".name", lang)));
112 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE + ".submit", lang)));
113 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE + ".description", lang)));
114 }
115 // param info
116 if (subset == null || subset.equals(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER))
117 {
118 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
119 // collection list
120 if (coll_ids_list == null)
121 {
122 initCollectionList();
123 }
124 Element param = GSXML.createParameterDescription(this.doc, COLLECTION_PARAM, getTextString("param." + COLLECTION_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "all", coll_ids_list, coll_names_list);
125 param_list.appendChild(param);
126 // query param
127 param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param." + QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
128 param_list.appendChild(param);
129 ccs_service.appendChild(param_list);
130 }
131
132 logger.debug("service description=" + this.converter.getPrettyString(ccs_service));
133 return ccs_service;
134 }
135 // these ones are probably never called, but put them here just in case
136 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
137 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
138 service_elem.setAttribute(GSXML.NAME_ATT, service);
139 return service_elem;
140
141 }
142
143 protected Element processTextQuery(Element request)
144 {
145 // Create a new (empty) result message
146 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
147 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
148 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
149
150 UserContext userContext = new UserContext(request);
151
152 // Get the parameters of the request
153 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
154 if (param_list == null)
155 {
156 logger.error("TextQuery request had no paramList.");
157 return result; // Return the empty result
158 }
159
160 // get the collection list
161 String[] colls_list = coll_ids_list_no_all;
162 Element coll_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, COLLECTION_PARAM);
163 if (coll_param != null)
164 {
165 String coll_list = GSXML.getValue(coll_param);
166 if (!coll_list.equals("all") && !coll_list.equals(""))
167 {
168 colls_list = coll_list.split(",");
169 }
170 }
171
172 Element query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
173 // we are sending the same request to each collection - build up the to
174 // attribute for the request
175 StringBuffer to_att = new StringBuffer();
176 for (int i = 0; i < colls_list.length; i++)
177 {
178 if (i > 0)
179 {
180 to_att.append(",");
181 }
182 to_att.append(GSPath.appendLink(colls_list[i], "TextQuery"));
183
184 }
185 // send the query to all colls
186 Element query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to_att.toString(), userContext);
187 query_message.appendChild(query_request);
188 // should we add params individually?
189 Element new_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
190 query_request.appendChild(new_param_list);
191 new_param_list.appendChild(this.doc.importNode(GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, QUERY_PARAM), true));
192 Element query_result = (Element) this.router.process(query_message);
193
194 // gather up the data from each response
195 int numDocsMatched = 0;
196 int numDocsReturned = 0;
197
198 //term info??
199
200 NodeList metadata = query_result.getElementsByTagName(GSXML.METADATA_ELEM);
201 for (int j = 0; j < metadata.getLength(); j++)
202 {
203 Element meta = (Element) metadata.item(j);
204 if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsReturned"))
205 {
206 numDocsReturned += Integer.parseInt(GSXML.getValue(meta));
207 }
208 else if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsMatched"))
209 {
210 numDocsMatched += Integer.parseInt(GSXML.getValue(meta));
211 }
212 }
213
214 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
215 result.appendChild(metadata_list);
216 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", "" + numDocsReturned);
217 //GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+numDocsMatched);
218
219 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
220 result.appendChild(doc_node_list);
221
222 NodeList responses = query_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
223
224 for (int k = 0; k < responses.getLength(); k++)
225 {
226 String coll_name = GSPath.removeLastLink(((Element) responses.item(k)).getAttribute(GSXML.FROM_ATT));
227 NodeList nodes = ((Element) responses.item(k)).getElementsByTagName(GSXML.DOC_NODE_ELEM);
228 if (nodes == null || nodes.getLength() == 0)
229 continue;
230 Element last_node = null;
231 Element this_node = null;
232 for (int n = 0; n < nodes.getLength(); n++)
233 {
234 this_node = (Element) nodes.item(n);
235 this_node.setAttribute("collection", coll_name);
236 if (k == 0)
237 {
238
239 doc_node_list.appendChild(this.doc.importNode(this_node, true));
240 }
241 else
242 {
243 if (last_node == null)
244 {
245 last_node = (Element) GSXML.getChildByTagName(doc_node_list, GSXML.DOC_NODE_ELEM);
246 }
247 last_node = GSXML.insertIntoOrderedList(doc_node_list, GSXML.DOC_NODE_ELEM, last_node, this_node, "rank", true);
248 }
249
250 }
251 }
252 return result;
253 }
254
255 // protected Element processAdvTextQuery(Element request)
256 // {
257
258 // }
259 protected boolean initCollectionList()
260 {
261 UserContext userContext = new UserContext();
262 userContext.setLanguage("en");
263 userContext.setUserID("");
264
265 // first, get the message router info
266 Element coll_list_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
267 Element coll_list_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext); // uid
268 coll_list_message.appendChild(coll_list_request);
269 logger.debug("coll list request = " + this.converter.getPrettyString(coll_list_request));
270 Element coll_list_response = (Element) this.router.process(coll_list_message);
271 if (coll_list_response == null)
272 {
273 logger.error("couldn't query the message router!");
274 return false;
275 }
276 logger.debug("coll list response = " + this.converter.getPrettyString(coll_list_response));
277 // second, get some info from each collection. we want the coll name
278 // and whether its got a text query service
279
280 NodeList colls = coll_list_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
281 // we will send all the requests in a single message
282 Element metadata_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
283 for (int i = 0; i < colls.getLength(); i++)
284 {
285 Element c = (Element) colls.item(i);
286 String name = c.getAttribute(GSXML.NAME_ATT);
287 Element metadata_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, name, userContext);
288 metadata_message.appendChild(metadata_request);
289 }
290 logger.debug("metadata request = " + this.converter.getPrettyString(metadata_message));
291 Element metadata_response = (Element) this.router.process(metadata_message);
292 logger.debug("metadata response = " + this.converter.getPrettyString(metadata_response));
293 NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
294 ArrayList<String> valid_colls = new ArrayList<String>();
295 ArrayList<String> valid_coll_names = new ArrayList<String>();
296 for (int i = 0; i < coll_responses.getLength(); i++)
297 {
298 Element response = (Element) coll_responses.item(i);
299 Element coll = (Element) GSXML.getChildByTagName(response, GSXML.COLLECTION_ELEM);
300 Element service_list = (Element) GSXML.getChildByTagName(coll, GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
301 if (service_list == null)
302 continue;
303 Element query_service = GSXML.getNamedElement(service_list, GSXML.SERVICE_ELEM, GSXML.NAME_ATT, TEXT_QUERY_SERVICE); // should be AbstractTextSearch.TEXT_QUERY_SERVICE
304 if (query_service == null)
305 continue;
306 // use the name of the response in case we are talking to a remote collection, not the name of the collection.
307 String coll_id = response.getAttribute(GSXML.FROM_ATT);
308 String coll_name = coll_id + ": " + GSXML.getDisplayText(coll, GSXML.DISPLAY_TEXT_NAME, "en", "en"); // just use english for now until we do some caching or something
309 valid_colls.add(coll_id);
310 valid_coll_names.add(coll_name);
311 }
312
313 if (valid_colls.size() == 0)
314 {
315 return false;
316 }
317 // ids no all has the list without 'all' option.
318 this.coll_ids_list_no_all = new String[1];
319 this.coll_ids_list_no_all = valid_colls.toArray(coll_ids_list_no_all);
320
321 valid_colls.add(0, "all");
322 valid_coll_names.add(0, getTextString("param." + COLLECTION_PARAM + ".all", "en"));
323 this.coll_ids_list = new String[1];
324 this.coll_names_list = new String[1];
325 this.coll_ids_list = valid_colls.toArray(coll_ids_list);
326 this.coll_names_list = valid_coll_names.toArray(coll_names_list);
327 return true;
328 }
329
330 protected Element processDocumentMetadataRetrieve(Element request)
331 {
332 // Create a new (empty) result message
333 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
334 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
335 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
336
337 UserContext userContext = new UserContext(request);
338 // Get the parameters of the request
339 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
340 if (param_list == null)
341 {
342 logger.error("DocumentMetadataRetrieve request had no paramList.");
343 return result; // Return the empty result
344 }
345
346 NodeList query_doc_list = request.getElementsByTagName(GSXML.DOC_NODE_ELEM);
347 if (query_doc_list.getLength() == 0)
348 {
349 logger.error("DocumentMetadataRetrieve request had no documentNodes.");
350 return result; // Return the empty result
351 }
352
353 // the resulting doc node list
354 Element result_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
355 result.appendChild(result_node_list);
356
357 // get all the metadata params
358 Element new_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
359 Element param = GSXML.createParameter(this.doc, "metadata", "Title");
360 new_param_list.appendChild(param);
361
362 // organise the nodes into collection lists
363 HashMap<String, Node> coll_map = new HashMap<String, Node>();
364
365 for (int i = 0; i < query_doc_list.getLength(); i++)
366 {
367 Element doc_node = (Element) query_doc_list.item(i);
368 String coll_name = doc_node.getAttribute("collection");
369 Element coll_items = (Element) coll_map.get(coll_name);
370 if (coll_items == null)
371 {
372 coll_items = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
373 coll_map.put(coll_name, coll_items);
374 }
375 coll_items.appendChild(this.doc.importNode(doc_node, true));
376 }
377
378 // create teh individual requests
379 Element meta_request_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
380 Set mapping_set = coll_map.entrySet();
381 Iterator iter = mapping_set.iterator();
382
383 while (iter.hasNext())
384 {
385 Map.Entry e = (Map.Entry) iter.next();
386 String cname = (String) e.getKey();
387 Element doc_nodes = (Element) e.getValue();
388 Element meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, GSPath.appendLink(cname, DOCUMENT_METADATA_RETRIEVE_SERVICE), userContext);
389 meta_request.appendChild(doc_nodes);
390 meta_request.appendChild(new_param_list.cloneNode(true));
391 meta_request_message.appendChild(meta_request);
392
393 }
394
395 Node meta_result_node = this.router.process(meta_request_message);
396 Element meta_result = this.converter.nodeToElement(meta_result_node);
397
398 // now need to put the doc nodes back in the right order
399 // go through the original list again. keep an element pointer to
400 // the next element in each collections list
401 NodeList meta_responses = meta_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
402 for (int i = 0; i < meta_responses.getLength(); i++)
403 {
404 String collname = GSPath.removeLastLink(((Element) meta_responses.item(i)).getAttribute(GSXML.FROM_ATT));
405 Element first_elem = (Element) GSXML.getNodeByPath(meta_responses.item(i), "documentNodeList/documentNode");
406 coll_map.put(collname, first_elem);
407 }
408
409 for (int i = 0; i < query_doc_list.getLength(); i++)
410 {
411 Element doc_node = (Element) query_doc_list.item(i);
412 Element new_node = (Element) this.doc.importNode(doc_node, false);
413 result_node_list.appendChild(new_node);
414 String coll_name = doc_node.getAttribute("collection");
415
416 Element meta_elem = (Element) coll_map.get(coll_name);
417 GSXML.mergeMetadataLists(new_node, meta_elem);
418 coll_map.put(coll_name, meta_elem.getNextSibling());
419 }
420 return result;
421 }
422}
Note: See TracBrowser for help on using the repository browser.