source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 16688

Last change on this file since 16688 was 16688, checked in by davidb, 16 years ago

Changed 'Element process(Element)' in ModuleInterface to 'Node process(Node)'. After some deliberation is was decided this is a more useful (generic) layer of the DOM to pass information around in. Helps with the DocType problem when producing XSL Transformed pages, for example. When this was an Element, it would loose track of its DocType. Supporting method provided in XMLConverter 'Element nodeToElement(Node)' which checks a nodes docType and casts to Element if appropriate, or if a Document, typecasts to that and then extracts the top-level Element. With this fundamental change in ModuleInterface, around 20 files needed to be updated (Actions, Services, etc) that build on top of 'process()' to reflect this change, and use nodeToElement where necessary.

  • Property svn:keywords set to Author Date Id Revision
File size: 36.5 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router
41 */
42public class DocumentAction extends Action {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /** if this is set to true, when a document is displayed, any annotation
52 * type services (enrich) will be offered to the user as well */
53 protected boolean provide_annotations = false;
54
55 protected boolean highlight_query_terms = false;
56
57 public boolean configure() {
58 super.configure();
59 String highlight = (String)config_params.get("highlightQueryTerms");
60 if (highlight != null && highlight.equals("true")) {
61 highlight_query_terms = true;
62 }
63 String annotate = (String)config_params.get("displayAnnotationService");
64 if (annotate != null && annotate.equals("true")) {
65 provide_annotations = true;
66 }
67 return true;
68 }
69 public Node process (Node message_node)
70 {
71 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73 Element message = this.converter.nodeToElement(message_node);
74
75 // the response
76 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78 result.appendChild(page_response);
79
80 // get the request - assume only one
81 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83 HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85 // just in case there are some that need to get passed to the services
86 HashMap service_params = (HashMap)params.get("s0");
87
88 String has_rl = null;
89 String has_href = null;
90 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
91 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
92 String collection = (String) params.get(GSParams.COLLECTION);
93 String lang = request.getAttribute(GSXML.LANG_ATT);
94 String uid = request.getAttribute(GSXML.USER_ID_ATT);
95 String document_name = (String) params.get(GSParams.DOCUMENT);
96 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) {
97 logger.error("no document specified!");
98 return result;
99 }
100 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
101 if (document_type == null) {
102 document_type = "simple";
103 }
104 //whether to retrieve siblings or not
105 boolean get_siblings = false;
106 String sibs = (String) params.get(SIBLING_ARG);
107 if (sibs != null && sibs.equals("1")) {
108 get_siblings = true;
109 }
110
111 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
112 if (sibling_num != null && !sibling_num.equals("")) {
113 // we have to modify the doc name
114 document_name = document_name+"."+sibling_num+".ss";
115 }
116
117 boolean expand_document = false;
118 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
119 if (ed_arg != null && ed_arg.equals("1")) {
120 expand_document = true;
121 }
122
123
124 boolean expand_contents = false;
125 if (expand_document) { // we always expand the contents with the text
126 expand_contents = true;
127 } else {
128 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
129 if (ec_arg != null && ec_arg.equals("1")) {
130 expand_contents = true;
131 }
132 }
133 // get the additional data needed for the page
134 getBackgroundData(page_response, collection, lang, uid);
135 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
136
137 // the_document is where all the doc info - structure and metadata etc
138 // is added into, to be returned in the page
139 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
140 page_response.appendChild(the_document);
141
142 // set the doctype from the cgi arg as an attribute
143 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
144
145 // create a basic doc list containing the current node
146 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
147 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
148 basic_doc_list.appendChild(current_doc);
149 if (document_name.length()!=0){
150 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
151 }else if (has_href.length()!=0){
152 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
153 current_doc.setAttribute("externalURL", has_rl);
154 }
155
156 // Create a parameter list to specify the required structure information
157 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
158
159 if (service_params != null) {
160 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
161 }
162
163 Element ds_param = null;
164 boolean get_structure = false;
165 boolean get_structure_info = false;
166 if (document_type.equals("paged")) {
167 get_structure_info = true;
168 // get teh info needed for paged naviagtion
169 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
170 ds_param_list.appendChild(ds_param);
171 ds_param.setAttribute(GSXML.NAME_ATT, "info");
172 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
173 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
174 ds_param_list.appendChild(ds_param);
175 ds_param.setAttribute(GSXML.NAME_ATT, "info");
176 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
177 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178 ds_param_list.appendChild(ds_param);
179 ds_param.setAttribute(GSXML.NAME_ATT, "info");
180 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
181
182 } else if (document_type.equals("hierarchy")){
183 get_structure = true;
184 if (expand_contents) {
185 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
186 ds_param_list.appendChild(ds_param);
187 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
188 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
189 } else {
190 // get the info needed for table of contents
191 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
192 ds_param_list.appendChild(ds_param);
193 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
194 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
195 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
196 ds_param_list.appendChild(ds_param);
197 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
198 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
199 if (get_siblings) {
200 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201 ds_param_list.appendChild(ds_param);
202 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
204 }
205 }
206 } else {
207 // we dont need any structure
208 }
209
210 boolean has_dummy = false;
211 if (get_structure || get_structure_info) {
212
213 // Build a request to obtain the document structure
214 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
215 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
216 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
217 ds_message.appendChild(ds_request);
218 ds_request.appendChild(ds_param_list);
219
220 // create a doc_node_list and put in the doc_node that we are interested in
221 ds_request.appendChild(basic_doc_list);
222
223 // Process the document structure retrieve message
224 Element ds_response_message = (Element) this.mr.process(ds_message);
225 if (processErrorElements(ds_response_message, page_response)) {
226 return result;
227 }
228
229 // get the info and print out
230 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
231 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
232 path = GSPath.appendLink(path, "nodeStructureInfo");
233 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
234 // get the doc_node bit
235 if (ds_response_struct_info != null) {
236 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
237 }
238 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
239 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
240 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
241 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
242
243 if (ds_response_structure != null) {
244 // add the contents of the structure bit into the_document
245 NodeList structs = ds_response_structure.getChildNodes();
246 for (int i=0; i<structs.getLength();i++) {
247 the_document.appendChild(this.doc.importNode(structs.item(i), true));
248 }
249 } else {
250 // no structure nodes, so put in a dummy doc node
251 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
252 if (document_name.length()!=0){
253 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
254 }else if (has_href.length()!=0){
255 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
256 doc_node.setAttribute("externalURL", has_rl);
257 }
258 the_document.appendChild(doc_node);
259 has_dummy = true;
260 }
261 } else { // a simple type - we dont have a dummy node for simple
262 // should think about this more
263 // no structure request, so just put in a dummy doc node
264 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
265 if (document_name.length()!=0){
266 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
267 }else if (has_href.length()!=0){
268 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
269 doc_node.setAttribute("externalURL", has_rl);
270 }
271 the_document.appendChild(doc_node);
272 has_dummy = true;
273 }
274
275 // Build a request to obtain some document metadata
276 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
277 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
278 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
279 dm_message.appendChild(dm_request);
280 // Create a parameter list to specify the required metadata information
281
282 HashSet meta_names = new HashSet();
283 meta_names.add("Title"); // the default
284 if (format_elem != null) {
285 extractMetadataNames(format_elem, meta_names);
286 }
287
288 Element dm_param_list = createMetadataParamList(meta_names);
289 if (service_params != null) {
290 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
291 }
292
293 dm_request.appendChild(dm_param_list);
294
295
296 // create the doc node list for the metadata request
297 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
298 dm_request.appendChild(dm_doc_list);
299
300 // Add each node from the structure response into the metadata request
301 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
302 for (int i = 0; i < doc_nodes.getLength(); i++) {
303 Element doc_node = (Element) doc_nodes.item(i);
304 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
305
306 // Add the documentNode to the list
307 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
308 dm_doc_list.appendChild(dm_doc_node);
309 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
310 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
311 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
312 }
313
314 // we also want a metadata request to the top level document to get
315 // assocfilepath - this could be cached too
316 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
317 dm_message.appendChild(doc_meta_request);
318 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
319 if (service_params != null) {
320 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
321 }
322
323 doc_meta_request.appendChild(doc_meta_param_list);
324 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
325 doc_meta_param_list.appendChild(doc_param);
326 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
327 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
328
329 // create the doc node list for the metadata request
330 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
331 doc_meta_request.appendChild(doc_list);
332
333 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
334 // the node we want is the root document node
335 if (document_name.length()!=0){
336 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
337 }else if (has_href.length()!=0){
338 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
339 doc_node.setAttribute("externalURL", has_rl);
340 }
341 doc_list.appendChild(doc_node);
342 Element dm_response_message = (Element) this.mr.process(dm_message);
343 if (processErrorElements(dm_response_message, page_response)) {
344 return result;
345 }
346
347 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
348 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
349
350 // Merge the metadata with the structure information
351 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
352 for (int i = 0; i < doc_nodes.getLength(); i++) {
353 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
354 }
355 // get teh top level doc metadata out
356 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
357 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
358 if (doc_meta_list != null) {
359 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
360 }
361 // Build a request to obtain some document content
362 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
363 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
364 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
365 dc_message.appendChild(dc_request);
366
367
368 // Create a parameter list to specify the request parameters - empty for now
369 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
370 if (service_params != null) {
371 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
372 }
373
374 dc_request.appendChild(dc_param_list);
375
376 // get the content
377 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
378 if (expand_document) {
379 dc_request.appendChild(dm_doc_list);
380 } else {
381 dc_request.appendChild(basic_doc_list);
382 }
383 logger.debug("request = "+converter.getString(dc_message));
384 Element dc_response_message = (Element) this.mr.process(dc_message);
385 if (processErrorElements(dc_response_message, page_response)) {
386 return result;
387 }
388
389 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
390
391 if (expand_document) {
392 // Merge the content with the structure information
393 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
394 for (int i = 0; i < doc_nodes.getLength(); i++) {
395 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
396 if (content != null) {
397 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
398 }
399 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
400 }
401 } else {
402 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
403 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
404 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
405 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
406
407 if (dc_response_doc_content == null) {
408 // no content to add
409 if (dc_response_doc_external !=null){
410 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
411
412 the_document.setAttribute("selectedNode", modified_doc_id);
413 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
414 }
415 return result;
416 }
417 if (highlight_query_terms) {
418 dc_response_doc.removeChild(dc_response_doc_content);
419
420 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
421 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
422 }
423
424
425 if (provide_annotations) {
426 String service_selected = (String)params.get(ENRICH_DOC_ARG);
427 if (service_selected != null && service_selected.equals("1")) {
428 // now we can modifiy the response doc if needed
429 String enrich_service = (String)params.get(GSParams.SERVICE);
430 // send a message to the service
431 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
432 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
433 enrich_message.appendChild(enrich_request);
434 // check for parameters
435 HashMap e_service_params = (HashMap)params.get("s1");
436 if (e_service_params != null) {
437 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
438 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
439 enrich_request.appendChild(enrich_pl);
440 }
441 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
442 enrich_request.appendChild(e_doc_list);
443 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
444
445 Node enrich_response = this.mr.process(enrich_message);
446
447 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
448 path = GSPath.createPath(links);
449 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
450
451 }
452 } // if provide_annotations
453
454
455 // use the returned id rather than the sent one cos there may have
456 // been modifiers such as .pr that are removed.
457 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
458 the_document.setAttribute("selectedNode", modified_doc_id);
459 if (has_dummy) {
460 // change the id if necessary and add the content
461 Element dummy_node = (Element)doc_nodes.item(0);
462
463 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
464 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
465 // hack for simple type
466 if (document_type.equals("simple")) {
467 // we dont want the internal docNode, just want the content and metadata in the document
468 // rethink this!!
469 the_document.removeChild(dummy_node);
470
471 NodeList dummy_children = dummy_node.getChildNodes();
472 //for (int i=0; i<dummy_children.getLength(); i++) {
473 for (int i=dummy_children.getLength()-1; i>=0; i--) {
474 the_document.appendChild(dummy_children.item(i));
475
476 }
477 }
478 } else {
479 // Merge the document content with the metadata and structure information
480 for (int i = 0; i < doc_nodes.getLength(); i++) {
481 Node dn = doc_nodes.item(i);
482 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
483 if (dn_id.equals(modified_doc_id)) {
484 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
485 break;
486 }
487 }
488 }
489 }
490 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
491 return result;
492 }
493
494 /** tell the param class what its arguments are
495 * if an action has its own arguments, this should add them to the params
496 * object - particularly important for args that should not be saved */
497 public boolean getActionParameters(GSParams params) {
498 params.addParameter(GOTO_PAGE_ARG, false);
499 params.addParameter(ENRICH_DOC_ARG, false);
500 return true;
501 }
502
503
504 /** this method gets the collection description, the format info, the
505 * list of enrich services, etc - stuff that is needed for the page,
506 * but is the same whatever the query is - should be cached */
507 protected boolean getBackgroundData(Element page_response,
508 String collection, String lang,
509 String uid) {
510
511 // create a message to process - contains requests for the collection
512 // description, the format element, the enrich services on offer
513 // these could all be cached
514 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
515 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
516 // the format request - ignore for now, where does this request go to??
517 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
518 info_message.appendChild(format_request);
519
520 // the enrich_services request - only do this if provide_annotations is true
521
522 if (provide_annotations) {
523 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
524 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
525 info_message.appendChild(enrich_services_request);
526 }
527
528 Element info_response = (Element)this.mr.process(info_message);
529
530 // the collection is the first response
531 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
532 Element format_resp = (Element) responses.item(0);
533
534 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
535 if (format_elem != null) {
536 logger.debug("doc action found a format statement");
537 // set teh format type
538 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
539 page_response.appendChild(this.doc.importNode(format_elem, true));
540 }
541
542 if (provide_annotations) {
543 Element services_resp = (Element)responses.item(1);
544
545 // a new message for the mr
546 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
547
548 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
549 boolean service_found = false;
550 for (int j=0; j<e_services.getLength(); j++) {
551 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
552 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
553 enrich_message.appendChild(s);
554 service_found = true;
555 }
556 }
557 if (service_found) {
558 Element enrich_response = (Element)this.mr.process(enrich_message);
559
560 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
561 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
562 for (int i=0; i<e_responses.getLength(); i++) {
563 Element e_resp = (Element)e_responses.item(i);
564 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
565 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
566 service_list.appendChild(e_service);
567 }
568 page_response.appendChild(service_list);
569 }
570 } // if provide_annotations
571 return true;
572
573 }
574
575 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
576 */
577 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
578
579 // do the query again to get term info
580 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
581 HashMap params = GSXML.extractParams(cgi_param_list, false);
582
583 HashMap previous_params = (HashMap)params.get("p");
584 if (previous_params == null) {
585 return dc_response_doc_content;
586 }
587 String service_name = (String)previous_params.get(GSParams.SERVICE);
588 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
589 logger.error("invalid service, not doing highlighting");
590 return dc_response_doc_content;
591 }
592 String collection = (String)params.get(GSParams.COLLECTION);
593 String lang = request.getAttribute(GSXML.LANG_ATT);
594 String uid = request.getAttribute(GSXML.USER_ID_ATT);
595 String to = GSPath.appendLink(collection, service_name);
596
597 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
598 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
599 mr_query_message.appendChild(mr_query_request);
600
601 // paramList
602 HashMap service_params = (HashMap)params.get("s1");
603
604 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
605 GSXML.addParametersToList(this.doc, query_param_list, service_params);
606 mr_query_request.appendChild(query_param_list);
607
608 // do the query
609 Element mr_query_response = (Element)this.mr.process(mr_query_message);
610
611 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
612 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
613 if (query_term_list_element == null) {
614 // no term info
615 logger.error("No query term information.\n");
616 return dc_response_doc_content;
617 }
618
619 String content = GSXML.getNodeText(dc_response_doc_content);
620
621 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
622 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
623
624 HashSet query_term_variants = new HashSet();
625 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
626 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
627 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
628 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
629 for (int j = 0; j < equivalent_terms.length; j++) {
630 query_term_variants.add(equivalent_terms[j]);
631 }
632 }
633
634 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
635
636 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
637 String performed_query = GSXML.getNodeText(query_element) + " ";
638
639 ArrayList phrase_query_p_term_variants_list = new ArrayList();
640 int term_start = 0;
641 boolean in_term = false;
642 boolean in_phrase = false;
643 for (int i = 0; i < performed_query.length(); i++) {
644 char character = performed_query.charAt(i);
645 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
646
647 // Has a query term just started?
648 if (in_term == false && is_character_letter_or_digit == true) {
649 in_term = true;
650 term_start = i;
651 }
652
653 // Or has a term just finished?
654 else if (in_term == true && is_character_letter_or_digit == false) {
655 in_term = false;
656 String term = performed_query.substring(term_start, i);
657
658 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
659 if (term_element != null) {
660
661 HashSet phrase_query_p_term_x_variants = new HashSet();
662
663 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
664 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
665 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
666 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
667 for (int k = 0; k < term_equivalent_terms.length; k++) {
668 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
669 }
670 }
671 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
672
673 if (in_phrase == false) {
674 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
675 phrase_query_p_term_variants_list = new ArrayList();
676 }
677 }
678 }
679 // Watch for phrases (surrounded by quotes)
680 if (character == '\"') {
681 // Has a phrase just started?
682 if (in_phrase == false) {
683 in_phrase = true;
684 }
685 // Or has a phrase just finished?
686 else if (in_phrase == true) {
687 in_phrase = false;
688 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
689 }
690
691 phrase_query_p_term_variants_list = new ArrayList();
692 }
693 }
694
695 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
696 }
697
698
699 /**
700 * Highlights query terms in a piece of text.
701 */
702 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
703 {
704 // Convert the content string to an array of characters for speed
705 char[] content_characters = new char[content.length()];
706 content.getChars(0, content.length(), content_characters, 0);
707
708 // Now skim through the content, identifying word matches
709 ArrayList word_matches = new ArrayList();
710 int word_start = 0;
711 boolean in_word = false;
712 boolean preceding_word_matched = false;
713 for (int i = 0; i < content_characters.length; i++) {
714 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
715
716 // Has a word just started?
717 if (in_word == false && is_character_letter_or_digit == true) {
718 in_word = true;
719 word_start = i;
720 }
721
722 // Or has a word just finished?
723 else if (in_word == true && is_character_letter_or_digit == false) {
724 in_word = false;
725
726 // Check if the word matches any of the query term equivalents
727 String word = new String(content_characters, word_start, (i - word_start));
728 if (query_term_variants.contains(word)) {
729 // We have found a matching word, so remember its location
730 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
731 preceding_word_matched = true;
732 }
733 else {
734 preceding_word_matched = false;
735 }
736 }
737 }
738
739 // Don't forget the last word...
740 if (in_word == true) {
741 // Check if the word matches any of the query term equivalents
742 String word = new String(content_characters, word_start, (content_characters.length - word_start));
743 if (query_term_variants.contains(word)) {
744 // We have found a matching word, so remember its location
745 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
746 }
747 }
748
749 ArrayList highlight_start_positions = new ArrayList();
750 ArrayList highlight_end_positions = new ArrayList();
751
752 // Deal with phrases now
753 ArrayList partial_phrase_matches = new ArrayList();
754 for (int i = 0; i < word_matches.size(); i++) {
755 WordMatch word_match = (WordMatch) word_matches.get(i);
756
757 // See if any partial phrase matches are extended by this word
758 if (word_match.preceding_word_matched) {
759 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
760 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
761 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
762 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
763 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
764 partial_phrase_match.num_words_matched++;
765
766 // Has a complete phrase match occurred?
767 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
768 // Check for overlaps by looking at the previous highlight range
769 if (!highlight_end_positions.isEmpty()) {
770 int last_highlight_index = highlight_end_positions.size() - 1;
771 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
772 if (last_highlight_end > partial_phrase_match.start_position) {
773 // There is an overlap, so remove the previous phrase match
774 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
775 highlight_end_positions.remove(last_highlight_index);
776 partial_phrase_match.start_position = last_highlight_start;
777 }
778 }
779
780 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
781 highlight_end_positions.add(new Integer(word_match.end_position));
782 }
783 // No, but add the partial match back into the list for next time
784 else {
785 partial_phrase_matches.add(partial_phrase_match);
786 }
787 }
788 }
789 }
790 else {
791 partial_phrase_matches.clear();
792 }
793
794 // See if this word is at the start of any of the phrases
795 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
796 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
797 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
798 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
799 // If this phrase is just one word long, we have a complete match
800 if (phrase_query_p_term_variants_list.size() == 1) {
801 highlight_start_positions.add(new Integer(word_match.start_position));
802 highlight_end_positions.add(new Integer(word_match.end_position));
803 }
804 // Otherwise we have the start of a potential phrase match
805 else {
806 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
807 }
808 }
809 }
810 }
811
812 // Now add the annotation tags into the document at the correct points
813 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
814
815 int last_wrote = 0;
816 for (int i = 0; i < highlight_start_positions.size(); i++) {
817 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
818 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
819
820 // Print anything before the highlight range
821 if (last_wrote < highlight_start) {
822 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
823 // System.err.print(preceding_text);
824 content_element.appendChild(this.doc.createTextNode(preceding_text));
825 }
826
827 // Print the highlight text, annotated
828 if (highlight_end > last_wrote) {
829 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
830 // System.err.print("|" + highlight_text + "|");
831 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
832 annotation_element.setAttribute("type", "query_term");
833 content_element.appendChild(annotation_element);
834 last_wrote = highlight_end;
835 }
836 }
837
838 // Finish off any unwritten text
839 if (last_wrote < content_characters.length) {
840 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
841 // System.err.print(remaining_text);
842 content_element.appendChild(this.doc.createTextNode(remaining_text));
843 }
844
845 return content_element;
846 }
847
848
849 static private class WordMatch
850 {
851 public String word;
852 public int start_position;
853 public int end_position;
854 public boolean preceding_word_matched;
855
856 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
857 {
858 this.word = word;
859 this.start_position = start_position;
860 this.end_position = end_position;
861 this.preceding_word_matched = preceding_word_matched;
862 }
863 }
864
865
866 static private class PartialPhraseMatch
867 {
868 public int start_position;
869 public int query_phrase_number;
870 public int num_words_matched;
871
872 public PartialPhraseMatch(int start_position, int query_phrase_number)
873 {
874 this.start_position = start_position;
875 this.query_phrase_number = query_phrase_number;
876 this.num_words_matched = 1;
877 }
878 }
879}
Note: See TracBrowser for help on using the repository browser.