source: greenstone3/trunk/src/java/org/greenstone/gsdl3/service/CrossCollectionSearch.java@ 16688

Last change on this file since 16688 was 16688, checked in by davidb, 16 years ago

Changed 'Element process(Element)' in ModuleInterface to 'Node process(Node)'. After some deliberation is was decided this is a more useful (generic) layer of the DOM to pass information around in. Helps with the DocType problem when producing XSL Transformed pages, for example. When this was an Element, it would loose track of its DocType. Supporting method provided in XMLConverter 'Element nodeToElement(Node)' which checks a nodes docType and casts to Element if appropriate, or if a Document, typecasts to that and then extracts the top-level Element. With this fundamental change in ModuleInterface, around 20 files needed to be updated (Actions, Services, etc) that build on top of 'process()' to reflect this change, and use nodeToElement where necessary.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1/*
2 * CrossCollectionSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21import org.greenstone.gsdl3.util.GSXML;
22import org.greenstone.gsdl3.util.GSPath;
23
24import org.w3c.dom.Element;
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27
28import java.util.ArrayList;
29import java.util.Map;
30import java.util.Iterator;
31import java.util.Set;
32import java.util.HashMap;
33
34import org.apache.log4j.*;
35
36/**
37 *
38 * @author <a href="mailto:[email protected]">Katherine Don</a>
39 */
40
41public class CrossCollectionSearch
42 extends ServiceRack {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.CrossCollectionSearch.class.getName());
45 protected static final String QUERY_PARAM = "query";
46 protected static final String COLLECTION_PARAM = "collection";
47
48 // the services on offer - these proxy the actual collection ones
49 protected static final String TEXT_QUERY_SERVICE = "TextQuery";
50 protected static final String ADV_QUERY_SERVICE = "AdvTextQuery";
51 protected static final String DOCUMENT_METADATA_RETRIEVE_SERVICE = "DocumentMetadataRetrieve";
52
53 protected String [] coll_ids_list = null;
54 protected String [] coll_ids_list_no_all = null;
55 protected String [] coll_names_list = null;
56
57
58 /** constructor */
59 public CrossCollectionSearch()
60 {
61 }
62
63 public boolean configure(Element info, Element extra_info)
64 {
65 // any parameters? colls to include??
66 logger.info("Configuring CrossCollectionSearch...");
67 // query service
68 Element ccs_service = this.doc.createElement(GSXML.SERVICE_ELEM);
69 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
70 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
71 this.short_service_info.appendChild(ccs_service);
72
73 // metadata service
74 Element dmr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
75 dmr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
76 dmr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
77 this.short_service_info.appendChild(dmr_service);
78
79 // get any format info
80 Element format_info = (Element)GSXML.getChildByTagName(info, GSXML.FORMAT_ELEM);
81 if (format_info != null) {
82 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(format_info, true));
83 } else {
84 // add in a default format statement
85 String format_string = "<format xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'><gsf:template match='documentNode'><td><a><xsl:attribute name='href'>?a=d&amp;c=<xsl:value-of select='@collection'/>&amp;d=<xsl:value-of select='@nodeID'/><xsl:if test=\"@nodeType='leaf'\">&amp;sib=1</xsl:if>&amp;dt=<xsl:value-of select='@docType'/>&amp;p.a=q&amp;&amp;p.s="+TEXT_QUERY_SERVICE+"&amp;p.c=";
86 if (this.cluster_name!=null) {
87 format_string += this.cluster_name;
88 }
89 format_string += "</xsl:attribute><gsf:icon/></a></td><td><gsf:metadata name='Title'/> (<xsl:value-of select='@collection'/>) </td></gsf:template></format>";
90 this.format_info_map.put(TEXT_QUERY_SERVICE, this.doc.importNode(this.converter.getDOM(format_string).getDocumentElement(), true));
91 }
92 return true;
93 }
94
95 protected Element getServiceDescription(String service, String lang, String subset)
96 {
97 if (service.equals(TEXT_QUERY_SERVICE)) {
98
99 Element ccs_service = this.doc.createElement(GSXML.SERVICE_ELEM);
100 ccs_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_QUERY);
101 ccs_service.setAttribute(GSXML.NAME_ATT, TEXT_QUERY_SERVICE);
102
103 // display info
104 if (subset==null || subset.equals(GSXML.DISPLAY_TEXT_ELEM+GSXML.LIST_MODIFIER)) {
105 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_NAME, getTextString(TEXT_QUERY_SERVICE+".name", lang)));
106 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_SUBMIT, getTextString(TEXT_QUERY_SERVICE+".submit", lang)));
107 ccs_service.appendChild(GSXML.createDisplayTextElement(this.doc, GSXML.DISPLAY_TEXT_DESCRIPTION, getTextString(TEXT_QUERY_SERVICE+".description", lang)));
108 }
109 // param info
110 if (subset==null || subset.equals(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER)) {
111 Element param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
112 // collection list
113 if (coll_ids_list ==null ) {
114 initCollectionList();
115 }
116 Element param = GSXML.createParameterDescription(this.doc, COLLECTION_PARAM, getTextString("param."+COLLECTION_PARAM, lang), GSXML.PARAM_TYPE_ENUM_MULTI, "all", coll_ids_list, coll_names_list);
117 param_list.appendChild(param);
118 // query param
119 param = GSXML.createParameterDescription(this.doc, QUERY_PARAM, getTextString("param."+QUERY_PARAM, lang), GSXML.PARAM_TYPE_STRING, null, null, null);
120 param_list.appendChild(param);
121 ccs_service.appendChild(param_list);
122 }
123
124 logger.debug("service description="+this.converter.getPrettyString(ccs_service));
125 return ccs_service;
126 }
127 // these ones are probably never called, but put them here just in case
128 Element service_elem = this.doc.createElement(GSXML.SERVICE_ELEM);
129 service_elem.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
130 service_elem.setAttribute(GSXML.NAME_ATT, service);
131 return service_elem;
132
133
134 }
135
136
137 protected Element processTextQuery(Element request) {
138 // Create a new (empty) result message
139 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
140 result.setAttribute(GSXML.FROM_ATT, TEXT_QUERY_SERVICE);
141 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
142
143 String lang = request.getAttribute(GSXML.LANG_ATT);
144 // Get the parameters of the request
145 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
146 if (param_list == null) {
147 logger.error("TextQuery request had no paramList.");
148 return result; // Return the empty result
149 }
150
151 // get the collection list
152 String [] colls_list = coll_ids_list_no_all;
153 Element coll_param = GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, COLLECTION_PARAM);
154 if (coll_param != null) {
155 String coll_list = GSXML.getValue(coll_param);
156 if (!coll_list.equals("all") && !coll_list.equals("")) {
157 colls_list = coll_list.split(",");
158 }
159 }
160
161 Element query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
162 // we are sending the same request to each collection - build up the to
163 // attribute for the request
164 StringBuffer to_att = new StringBuffer();
165 for (int i=0; i<colls_list.length; i++) {
166 if (i>0) {
167 to_att.append(",");
168 }
169 to_att.append(GSPath.appendLink(colls_list[i], "TextQuery"));
170
171 }
172 // send the query to all colls
173 Element query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to_att.toString(), lang, "");
174 query_message.appendChild(query_request);
175 // should we add params individually?
176 Element new_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
177 query_request.appendChild(new_param_list);
178 new_param_list.appendChild(this.doc.importNode(GSXML.getNamedElement(param_list, GSXML.PARAM_ELEM, GSXML.NAME_ATT, QUERY_PARAM), true));
179 Element query_result = (Element)this.router.process(query_message);
180
181 // gather up the data from each response
182 int numDocsMatched = 0;
183 int numDocsReturned = 0;
184
185 //term info??
186
187 NodeList metadata = query_result.getElementsByTagName(GSXML.METADATA_ELEM);
188 for (int j=0; j<metadata.getLength(); j++) {
189 Element meta = (Element)metadata.item(j);
190 if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsReturned")) {
191 numDocsReturned += Integer.parseInt(GSXML.getValue(meta));
192 } else if (meta.getAttribute(GSXML.NAME_ATT).equals("numDocsMatched")) {
193 numDocsMatched += Integer.parseInt(GSXML.getValue(meta));
194 }
195 }
196
197 Element metadata_list = this.doc.createElement(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
198 result.appendChild(metadata_list);
199 GSXML.addMetadata(this.doc, metadata_list, "numDocsReturned", ""+numDocsReturned);
200 //GSXML.addMetadata(this.doc, metadata_list, "numDocsMatched", ""+numDocsMatched);
201
202 Element doc_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
203 result.appendChild(doc_node_list);
204
205 NodeList responses = query_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
206
207 for (int k=0; k<responses.getLength(); k++) {
208 String coll_name = GSPath.removeLastLink(((Element)responses.item(k)).getAttribute(GSXML.FROM_ATT));
209 NodeList nodes = ((Element)responses.item(k)).getElementsByTagName(GSXML.DOC_NODE_ELEM);
210 if (nodes==null || nodes.getLength()==0) continue;
211 Element last_node = null;
212 Element this_node = null;
213 for (int n=0; n<nodes.getLength(); n++) {
214 this_node = (Element)nodes.item(n);
215 this_node.setAttribute("collection", coll_name);
216 if (k==0) {
217
218 doc_node_list.appendChild(this.doc.importNode(this_node, true));
219 } else {
220 if (last_node==null) {
221 last_node = (Element)GSXML.getChildByTagName(doc_node_list, GSXML.DOC_NODE_ELEM);
222 }
223 last_node = GSXML.insertIntoOrderedList(doc_node_list, GSXML.DOC_NODE_ELEM, last_node, this_node, "rank", true);
224 }
225
226 }
227 }
228 return result;
229 }
230
231// protected Element processAdvTextQuery(Element request)
232// {
233
234// }
235 protected boolean initCollectionList() {
236 String lang="en";
237 String uid = "";
238 // first, get the message router info
239 Element coll_list_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
240 Element coll_list_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, ""); // uid
241 coll_list_message.appendChild(coll_list_request);
242 logger.debug("coll list request = "+this.converter.getPrettyString(coll_list_request));
243 Element coll_list_response = (Element)this.router.process(coll_list_message);
244 if (coll_list_response==null) {
245 logger.error("couldn't query the message router!");
246 return false;
247 }
248 logger.debug("coll list response = "+this.converter.getPrettyString(coll_list_response));
249 // second, get some info from each collection. we want the coll name
250 // and whether its got a text query service
251
252 NodeList colls = coll_list_response.getElementsByTagName(GSXML.COLLECTION_ELEM);
253 // we will send all the requests in a single message
254 Element metadata_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
255 for (int i=0; i<colls.getLength(); i++) {
256 Element c = (Element)colls.item(i);
257 String name = c.getAttribute(GSXML.NAME_ATT);
258 Element metadata_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, name, lang, uid);
259 metadata_message.appendChild(metadata_request);
260 }
261 logger.debug("metadata request = "+this.converter.getPrettyString(metadata_message));
262 Element metadata_response = (Element)this.router.process(metadata_message);
263 logger.debug("metadata response = "+this.converter.getPrettyString(metadata_response));
264 NodeList coll_responses = metadata_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
265 ArrayList valid_colls = new ArrayList();
266 ArrayList valid_coll_names = new ArrayList();
267 for (int i=0; i<coll_responses.getLength(); i++) {
268 Element response = (Element)coll_responses.item(i);
269 Element coll = (Element)GSXML.getChildByTagName(response, GSXML.COLLECTION_ELEM);
270 Element service_list = (Element)GSXML.getChildByTagName(coll, GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER);
271 if (service_list==null) continue;
272 Element query_service = GSXML.getNamedElement(service_list, GSXML.SERVICE_ELEM, GSXML.NAME_ATT, TEXT_QUERY_SERVICE); // should be AbstractSearch.TEXT_QUERY_SERVICE
273 if (query_service == null) continue;
274 // use the name of the response in case we are talking to a remote collection, not the name of the collection.
275 String coll_id = response.getAttribute(GSXML.FROM_ATT);
276 String coll_name = coll_id+": "+GSXML.getDisplayText(coll, GSXML.DISPLAY_TEXT_NAME, "en", "en"); // just use english for now until we do some caching or something
277 valid_colls.add(coll_id);
278 valid_coll_names.add(coll_name);
279 }
280
281 if (valid_colls.size()==0) {
282 return false;
283 }
284 // ids no all has the list without 'all' option.
285 this.coll_ids_list_no_all = new String[1];
286 this.coll_ids_list_no_all = (String []) valid_colls.toArray(coll_ids_list_no_all);
287
288 valid_colls.add(0, "all");
289 valid_coll_names.add(0, getTextString("param."+COLLECTION_PARAM+".all", "en" ));
290 this.coll_ids_list = new String[1];
291 this.coll_names_list = new String[1];
292 this.coll_ids_list = (String []) valid_colls.toArray(coll_ids_list);
293 this.coll_names_list = (String []) valid_coll_names.toArray(coll_names_list);
294 return true;
295 }
296
297 protected Element processDocumentMetadataRetrieve(Element request) {
298 // Create a new (empty) result message
299 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
300 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_METADATA_RETRIEVE_SERVICE);
301 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
302
303 String lang = request.getAttribute(GSXML.LANG_ATT);
304 // Get the parameters of the request
305 Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
306 if (param_list == null) {
307 logger.error("DocumentMetadataRetrieve request had no paramList.");
308 return result; // Return the empty result
309 }
310
311 NodeList query_doc_list = request.getElementsByTagName(GSXML.DOC_NODE_ELEM);
312 if (query_doc_list.getLength()==0) {
313 logger.error("DocumentMetadataRetrieve request had no documentNodes.");
314 return result; // Return the empty result
315 }
316
317 // the resulting doc node list
318 Element result_node_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
319 result.appendChild(result_node_list);
320
321 // get all the metadata params
322 Element new_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
323 Element param = GSXML.createParameter(this.doc, "metadata", "Title");
324 new_param_list.appendChild(param);
325
326 // organise the nodes into collection lists
327 HashMap coll_map = new HashMap();
328
329 for (int i=0; i<query_doc_list.getLength(); i++) {
330 Element doc_node = (Element)query_doc_list.item(i);
331 String coll_name = doc_node.getAttribute("collection");
332 Element coll_items = (Element)coll_map.get(coll_name);
333 if (coll_items==null) {
334 coll_items=this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
335 coll_map.put(coll_name, coll_items);
336 }
337 coll_items.appendChild(this.doc.importNode(doc_node, true));
338 }
339
340 // create teh individual requests
341 Element meta_request_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
342 Set mapping_set = coll_map.entrySet();
343 Iterator iter = mapping_set.iterator();
344
345 while (iter.hasNext()) {
346 Map.Entry e = (Map.Entry)iter.next();
347 String cname = (String)e.getKey();
348 Element doc_nodes = (Element)e.getValue();
349 Element meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, GSPath.appendLink(cname, DOCUMENT_METADATA_RETRIEVE_SERVICE), lang, "");
350 meta_request.appendChild(doc_nodes);
351 meta_request.appendChild(new_param_list.cloneNode(true));
352 meta_request_message.appendChild(meta_request);
353
354 }
355
356 Node meta_result_node = this.router.process(meta_request_message);
357 Element meta_result = this.converter.nodeToElement(meta_result_node);
358
359 // now need to put the doc nodes back in the right order
360 // go through the original list again. keep an element pointer to
361 // the next element in each collections list
362 NodeList meta_responses = meta_result.getElementsByTagName(GSXML.RESPONSE_ELEM);
363 for (int i=0; i<meta_responses.getLength(); i++) {
364 String collname = GSPath.removeLastLink(((Element)meta_responses.item(i)).getAttribute(GSXML.FROM_ATT));
365 Element first_elem = (Element)GSXML.getNodeByPath(meta_responses.item(i), "documentNodeList/documentNode");
366 coll_map.put(collname, first_elem);
367 }
368
369 for (int i=0; i<query_doc_list.getLength(); i++) {
370 Element doc_node = (Element)query_doc_list.item(i);
371 Element new_node = (Element)this.doc.importNode(doc_node, false);
372 result_node_list.appendChild(new_node);
373 String coll_name = doc_node.getAttribute("collection");
374
375 Element meta_elem = (Element)coll_map.get(coll_name);
376 GSXML.mergeMetadataLists(new_node, meta_elem);
377 coll_map.put(coll_name, meta_elem.getNextSibling());
378 }
379 return result;
380 }
381}
382
Note: See TracBrowser for help on using the repository browser.