source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 24993

Last change on this file since 24993 was 24993, checked in by sjm84, 12 years ago

Adding UserContext to replace the use of lang and uid

  • Property svn:keywords set to Author Date Id Revision
File size: 39.4 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router */
41public class DocumentAction extends Action
42{
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /**
52 * if this is set to true, when a document is displayed, any annotation type
53 * services (enrich) will be offered to the user as well
54 */
55 protected boolean provide_annotations = false;
56
57 protected boolean highlight_query_terms = false;
58
59 public boolean configure()
60 {
61 super.configure();
62 String highlight = (String) config_params.get("highlightQueryTerms");
63 if (highlight != null && highlight.equals("true"))
64 {
65 highlight_query_terms = true;
66 }
67 String annotate = (String) config_params.get("displayAnnotationService");
68 if (annotate != null && annotate.equals("true"))
69 {
70 provide_annotations = true;
71 }
72 return true;
73 }
74
75 public Node process(Node message_node)
76 {
77 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
78
79 Element message = this.converter.nodeToElement(message_node);
80
81 // the response
82 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
83 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
84 result.appendChild(page_response);
85
86 // get the request - assume only one
87 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
88 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
89 HashMap params = GSXML.extractParams(cgi_paramList, false);
90
91 // just in case there are some that need to get passed to the services
92 HashMap service_params = (HashMap) params.get("s0");
93
94 String has_rl = null;
95 String has_href = null;
96 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
97 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
98 String collection = (String) params.get(GSParams.COLLECTION);
99 UserContext userContext = new UserContext(request);
100 String document_name = (String) params.get(GSParams.DOCUMENT);
101 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals("")))
102 {
103 logger.error("no document specified!");
104 return result;
105 }
106 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
107 if (document_type == null)
108 {
109 document_type = "simple";
110 }
111 //whether to retrieve siblings or not
112 boolean get_siblings = false;
113 String sibs = (String) params.get(SIBLING_ARG);
114 if (sibs != null && sibs.equals("1"))
115 {
116 get_siblings = true;
117 }
118
119 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
120 if (sibling_num != null && !sibling_num.equals(""))
121 {
122 // we have to modify the doc name
123 document_name = document_name + "." + sibling_num + ".ss";
124 }
125
126 boolean expand_document = false;
127 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
128 if (ed_arg != null && ed_arg.equals("1"))
129 {
130 expand_document = true;
131 }
132
133 boolean expand_contents = false;
134 if (expand_document)
135 { // we always expand the contents with the text
136 expand_contents = true;
137 }
138 else
139 {
140 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
141 if (ec_arg != null && ec_arg.equals("1"))
142 {
143 expand_contents = true;
144 }
145 }
146
147 //append site metadata
148 addSiteMetadata(page_response, userContext);
149
150 // get the additional data needed for the page
151 getBackgroundData(page_response, collection, userContext);
152 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
153
154 // the_document is where all the doc info - structure and metadata etc
155 // is added into, to be returned in the page
156 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
157 page_response.appendChild(the_document);
158
159 // set the doctype from the cgi arg as an attribute
160 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
161
162 // create a basic doc list containing the current node
163 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
164 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
165 basic_doc_list.appendChild(current_doc);
166 if (document_name.length() != 0)
167 {
168 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
169 }
170 else if (has_href.length() != 0)
171 {
172 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
173 current_doc.setAttribute("externalURL", has_rl);
174 }
175
176 // Create a parameter list to specify the required structure information
177 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
178
179 if (service_params != null)
180 {
181 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
182 }
183
184 Element ds_param = null;
185 boolean get_structure = false;
186 boolean get_structure_info = false;
187 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
188 {
189 get_structure_info = true;
190
191 if (expand_contents)
192 {
193 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
194 ds_param_list.appendChild(ds_param);
195 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
196 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
197 }
198
199 // get teh info needed for paged naviagtion
200 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201 ds_param_list.appendChild(ds_param);
202 ds_param.setAttribute(GSXML.NAME_ATT, "info");
203 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
204 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
205 ds_param_list.appendChild(ds_param);
206 ds_param.setAttribute(GSXML.NAME_ATT, "info");
207 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
208 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
209 ds_param_list.appendChild(ds_param);
210 ds_param.setAttribute(GSXML.NAME_ATT, "info");
211 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
212
213 if (get_siblings)
214 {
215 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
216 ds_param_list.appendChild(ds_param);
217 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
218 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
219 }
220
221 }
222 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
223 {
224 get_structure = true;
225 if (expand_contents)
226 {
227 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
228 ds_param_list.appendChild(ds_param);
229 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
230 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
231 }
232 else
233 {
234 // get the info needed for table of contents
235 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
236 ds_param_list.appendChild(ds_param);
237 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
238 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
239 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
240 ds_param_list.appendChild(ds_param);
241 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
242 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
243 if (get_siblings)
244 {
245 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
246 ds_param_list.appendChild(ds_param);
247 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
248 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
249 }
250 }
251 }
252 else
253 {
254 // we dont need any structure
255 }
256
257 boolean has_dummy = false;
258 if (get_structure || get_structure_info)
259 {
260
261 // Build a request to obtain the document structure
262 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
263 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
264 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
265 ds_message.appendChild(ds_request);
266 ds_request.appendChild(ds_param_list);
267
268 // create a doc_node_list and put in the doc_node that we are interested in
269 ds_request.appendChild(basic_doc_list);
270
271 // Process the document structure retrieve message
272 Element ds_response_message = (Element) this.mr.process(ds_message);
273 if (processErrorElements(ds_response_message, page_response))
274 {
275 return result;
276 }
277
278 // get the info and print out
279 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
280 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
281 path = GSPath.appendLink(path, "nodeStructureInfo");
282 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
283 // get the doc_node bit
284 if (ds_response_struct_info != null)
285 {
286 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
287 }
288 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
289 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
290 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
291 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
292
293 if (ds_response_structure != null)
294 {
295 // add the contents of the structure bit into the_document
296 NodeList structs = ds_response_structure.getChildNodes();
297 for (int i = 0; i < structs.getLength(); i++)
298 {
299 the_document.appendChild(this.doc.importNode(structs.item(i), true));
300 }
301 }
302 else
303 {
304 // no structure nodes, so put in a dummy doc node
305 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
306 if (document_name.length() != 0)
307 {
308 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
309 }
310 else if (has_href.length() != 0)
311 {
312 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
313 doc_node.setAttribute("externalURL", has_rl);
314 }
315 the_document.appendChild(doc_node);
316 has_dummy = true;
317 }
318 }
319 else
320 { // a simple type - we dont have a dummy node for simple
321 // should think about this more
322 // no structure request, so just put in a dummy doc node
323 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
324 if (document_name.length() != 0)
325 {
326 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
327 }
328 else if (has_href.length() != 0)
329 {
330 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
331 doc_node.setAttribute("externalURL", has_rl);
332 }
333 the_document.appendChild(doc_node);
334 has_dummy = true;
335 }
336
337 // Build a request to obtain some document metadata
338 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
339 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
340 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
341 dm_message.appendChild(dm_request);
342 // Create a parameter list to specify the required metadata information
343
344 HashSet meta_names = new HashSet();
345 meta_names.add("Title"); // the default
346 if (format_elem != null)
347 {
348 getRequiredMetadataNames(format_elem, meta_names);
349 }
350
351 Element dm_param_list = createMetadataParamList(meta_names);
352 if (service_params != null)
353 {
354 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
355 }
356
357 dm_request.appendChild(dm_param_list);
358
359 // create the doc node list for the metadata request
360 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
361 dm_request.appendChild(dm_doc_list);
362
363 // Add each node from the structure response into the metadata request
364 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
365 for (int i = 0; i < doc_nodes.getLength(); i++)
366 {
367 Element doc_node = (Element) doc_nodes.item(i);
368 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
369
370 // Add the documentNode to the list
371 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
372 dm_doc_list.appendChild(dm_doc_node);
373 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
374 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
375 }
376
377 // we also want a metadata request to the top level document to get
378 // assocfilepath - this could be cached too
379 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
380 dm_message.appendChild(doc_meta_request);
381 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
382 if (service_params != null)
383 {
384 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
385 }
386
387 doc_meta_request.appendChild(doc_meta_param_list);
388 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
389 doc_meta_param_list.appendChild(doc_param);
390 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
391 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
392
393 // create the doc node list for the metadata request
394 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
395 doc_meta_request.appendChild(doc_list);
396
397 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
398 // the node we want is the root document node
399 if (document_name.length() != 0)
400 {
401 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name + ".rt");
402 }
403 else if (has_href.length() != 0)
404 {
405 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href + ".rt");
406 doc_node.setAttribute("externalURL", has_rl);
407 }
408 doc_list.appendChild(doc_node);
409
410 Element dm_response_message = (Element) this.mr.process(dm_message);
411 if (processErrorElements(dm_response_message, page_response))
412 {
413 return result;
414 }
415
416 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
417 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
418
419 // Merge the metadata with the structure information
420 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
421 for (int i = 0; i < doc_nodes.getLength(); i++)
422 {
423 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
424 }
425 // get the top level doc metadata out
426 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
427 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
428 GSXML.mergeMetadataLists(the_document, top_doc_node);
429
430 // Build a request to obtain some document content
431 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
432 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
433 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
434 dc_message.appendChild(dc_request);
435
436 // Create a parameter list to specify the request parameters - empty for now
437 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
438 if (service_params != null)
439 {
440 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
441 }
442
443 dc_request.appendChild(dc_param_list);
444
445 // get the content
446 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
447 if (expand_document)
448 {
449 dc_request.appendChild(dm_doc_list);
450 }
451 else
452 {
453 dc_request.appendChild(basic_doc_list);
454 }
455 logger.debug("request = " + converter.getString(dc_message));
456 Element dc_response_message = (Element) this.mr.process(dc_message);
457 if (processErrorElements(dc_response_message, page_response))
458 {
459 return result;
460 }
461
462 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
463
464 if (expand_document)
465 {
466 // Merge the content with the structure information
467 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
468 for (int i = 0; i < doc_nodes.getLength(); i++)
469 {
470 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
471 if (content != null)
472 {
473 if (highlight_query_terms)
474 {
475 content = highlightQueryTerms(request, (Element) content);
476 }
477 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
478 }
479 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
480 }
481 }
482 else
483 {
484 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
485 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
486 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
487 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
488
489 if (dc_response_doc_content == null)
490 {
491 // no content to add
492 if (dc_response_doc_external != null)
493 {
494 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
495
496 the_document.setAttribute("selectedNode", modified_doc_id);
497 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
498 }
499 return result;
500 }
501 if (highlight_query_terms)
502 {
503 dc_response_doc.removeChild(dc_response_doc_content);
504
505 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
506 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
507 }
508
509 if (provide_annotations)
510 {
511 String service_selected = (String) params.get(ENRICH_DOC_ARG);
512 if (service_selected != null && service_selected.equals("1"))
513 {
514 // now we can modifiy the response doc if needed
515 String enrich_service = (String) params.get(GSParams.SERVICE);
516 // send a message to the service
517 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
518 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
519 enrich_message.appendChild(enrich_request);
520 // check for parameters
521 HashMap e_service_params = (HashMap) params.get("s1");
522 if (e_service_params != null)
523 {
524 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
525 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
526 enrich_request.appendChild(enrich_pl);
527 }
528 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
529 enrich_request.appendChild(e_doc_list);
530 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
531
532 Node enrich_response = this.mr.process(enrich_message);
533
534 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
535 path = GSPath.createPath(links);
536 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
537
538 }
539 } // if provide_annotations
540
541 // use the returned id rather than the sent one cos there may have
542 // been modifiers such as .pr that are removed.
543 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
544 the_document.setAttribute("selectedNode", modified_doc_id);
545 if (has_dummy)
546 {
547 // change the id if necessary and add the content
548 Element dummy_node = (Element) doc_nodes.item(0);
549
550 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
551 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
552 // hack for simple type
553 if (document_type.equals("simple"))
554 {
555 // we dont want the internal docNode, just want the content and metadata in the document
556 // rethink this!!
557 the_document.removeChild(dummy_node);
558
559 NodeList dummy_children = dummy_node.getChildNodes();
560 //for (int i=0; i<dummy_children.getLength(); i++) {
561 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
562 {
563 // special case as we don't want more than one metadata list
564 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
565 {
566 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
567 }
568 else
569 {
570 the_document.appendChild(dummy_children.item(i));
571 }
572 }
573 }
574 }
575 else
576 {
577 // Merge the document content with the metadata and structure information
578 for (int i = 0; i < doc_nodes.getLength(); i++)
579 {
580 Node dn = doc_nodes.item(i);
581 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
582 if (dn_id.equals(modified_doc_id))
583 {
584 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
585 break;
586 }
587 }
588 }
589 }
590 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
591 return result;
592 }
593
594 /**
595 * tell the param class what its arguments are if an action has its own
596 * arguments, this should add them to the params object - particularly
597 * important for args that should not be saved
598 */
599 public boolean getActionParameters(GSParams params)
600 {
601 params.addParameter(GOTO_PAGE_ARG, false);
602 params.addParameter(ENRICH_DOC_ARG, false);
603 return true;
604 }
605
606 /**
607 * this method gets the collection description, the format info, the list of
608 * enrich services, etc - stuff that is needed for the page, but is the same
609 * whatever the query is - should be cached
610 */
611 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
612 {
613
614 // create a message to process - contains requests for the collection
615 // description, the format element, the enrich services on offer
616 // these could all be cached
617 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
618 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
619 // the format request - ignore for now, where does this request go to??
620 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
621 info_message.appendChild(format_request);
622
623 // the enrich_services request - only do this if provide_annotations is true
624
625 if (provide_annotations)
626 {
627 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
628 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
629 info_message.appendChild(enrich_services_request);
630 }
631
632 Element info_response = (Element) this.mr.process(info_message);
633
634 // the collection is the first response
635 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
636 Element format_resp = (Element) responses.item(0);
637
638 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
639 if (format_elem != null)
640 {
641 logger.debug("doc action found a format statement");
642 // set teh format type
643 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
644 page_response.appendChild(this.doc.importNode(format_elem, true));
645 }
646
647 if (provide_annotations)
648 {
649 Element services_resp = (Element) responses.item(1);
650
651 // a new message for the mr
652 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
653
654 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
655 boolean service_found = false;
656 for (int j = 0; j < e_services.getLength(); j++)
657 {
658 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
659 {
660 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
661 enrich_message.appendChild(s);
662 service_found = true;
663 }
664 }
665 if (service_found)
666 {
667 Element enrich_response = (Element) this.mr.process(enrich_message);
668
669 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
670 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
671 for (int i = 0; i < e_responses.getLength(); i++)
672 {
673 Element e_resp = (Element) e_responses.item(i);
674 Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
675 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
676 service_list.appendChild(e_service);
677 }
678 page_response.appendChild(service_list);
679 }
680 } // if provide_annotations
681 return true;
682
683 }
684
685 /**
686 * this involves a bit of a hack to get the equivalent query terms - has to
687 * requery the query service - uses the last selected service name. (if it
688 * ends in query). should this action do the query or should it send a
689 * message to the query action? but that will involve lots of extra stuff.
690 * also doesn't handle phrases properly - just highlights all the terms
691 * found in the text.
692 */
693 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
694 {
695
696 // do the query again to get term info
697 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
698 HashMap params = GSXML.extractParams(cgi_param_list, false);
699
700 HashMap previous_params = (HashMap) params.get("p");
701 if (previous_params == null)
702 {
703 return dc_response_doc_content;
704 }
705 String service_name = (String) previous_params.get(GSParams.SERVICE);
706 if (service_name == null || !service_name.endsWith("Query"))
707 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
708 logger.debug("invalid service, not doing highlighting");
709 return dc_response_doc_content;
710 }
711 String collection = (String) params.get(GSParams.COLLECTION);
712 UserContext userContext = new UserContext(request);
713 String to = GSPath.appendLink(collection, service_name);
714
715 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
716 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
717 mr_query_message.appendChild(mr_query_request);
718
719 // paramList
720 HashMap service_params = (HashMap) params.get("s1");
721
722 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
723 GSXML.addParametersToList(this.doc, query_param_list, service_params);
724 mr_query_request.appendChild(query_param_list);
725
726 // do the query
727 Element mr_query_response = (Element) this.mr.process(mr_query_message);
728
729 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
730 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
731 if (query_term_list_element == null)
732 {
733 // no term info
734 logger.error("No query term information.\n");
735 return dc_response_doc_content;
736 }
737
738 String content = GSXML.getNodeText(dc_response_doc_content);
739
740 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
741 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
742
743 HashSet query_term_variants = new HashSet();
744 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
745 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
746 {
747 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
748 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
749 {
750 for (int i = 0; i < terms_nodelist.getLength(); i++)
751 {
752 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
753 String termValueU = null;
754 String termValueL = null;
755
756 if (termValue.length() > 1)
757 {
758 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
759 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
760 }
761 else
762 {
763 termValueU = termValue.substring(0, 1).toUpperCase();
764 termValueL = termValue.substring(0, 1).toLowerCase();
765 }
766
767 query_term_variants.add(termValueU);
768 query_term_variants.add(termValueL);
769 }
770 }
771 }
772 else
773 {
774 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
775 {
776 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
777 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
778 for (int j = 0; j < equivalent_terms.length; j++)
779 {
780 query_term_variants.add(equivalent_terms[j]);
781 }
782 }
783 }
784
785 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
786
787 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
788 String performed_query = GSXML.getNodeText(query_element) + " ";
789
790 ArrayList phrase_query_p_term_variants_list = new ArrayList();
791 int term_start = 0;
792 boolean in_term = false;
793 boolean in_phrase = false;
794 for (int i = 0; i < performed_query.length(); i++)
795 {
796 char character = performed_query.charAt(i);
797 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
798
799 // Has a query term just started?
800 if (in_term == false && is_character_letter_or_digit == true)
801 {
802 in_term = true;
803 term_start = i;
804 }
805
806 // Or has a term just finished?
807 else if (in_term == true && is_character_letter_or_digit == false)
808 {
809 in_term = false;
810 String term = performed_query.substring(term_start, i);
811
812 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
813 if (term_element != null)
814 {
815
816 HashSet phrase_query_p_term_x_variants = new HashSet();
817
818 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
819 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
820 {
821 String termValueU = null;
822 String termValueL = null;
823
824 if (term.length() > 1)
825 {
826 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
827 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
828 }
829 else
830 {
831 termValueU = term.substring(0, 1).toUpperCase();
832 termValueL = term.substring(0, 1).toLowerCase();
833 }
834
835 phrase_query_p_term_x_variants.add(termValueU);
836 phrase_query_p_term_x_variants.add(termValueL);
837 }
838 else
839 {
840 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
841 {
842 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
843 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
844 for (int k = 0; k < term_equivalent_terms.length; k++)
845 {
846 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
847 }
848 }
849 }
850 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
851
852 if (in_phrase == false)
853 {
854 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
855 phrase_query_p_term_variants_list = new ArrayList();
856 }
857 }
858 }
859 // Watch for phrases (surrounded by quotes)
860 if (character == '\"')
861 {
862 // Has a phrase just started?
863 if (in_phrase == false)
864 {
865 in_phrase = true;
866 }
867 // Or has a phrase just finished?
868 else if (in_phrase == true)
869 {
870 in_phrase = false;
871 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
872 }
873
874 phrase_query_p_term_variants_list = new ArrayList();
875 }
876 }
877
878 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
879 }
880
881 /**
882 * Highlights query terms in a piece of text.
883 */
884 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
885 {
886 // Convert the content string to an array of characters for speed
887 char[] content_characters = new char[content.length()];
888 content.getChars(0, content.length(), content_characters, 0);
889
890 // Now skim through the content, identifying word matches
891 ArrayList word_matches = new ArrayList();
892 int word_start = 0;
893 boolean in_word = false;
894 boolean preceding_word_matched = false;
895 boolean inTag = false;
896 for (int i = 0; i < content_characters.length; i++)
897 {
898 //We don't want to find words inside HTML tags
899 if (content_characters[i] == '<')
900 {
901 inTag = true;
902 continue;
903 }
904 else if (inTag && content_characters[i] == '>')
905 {
906 inTag = false;
907 }
908 else if (inTag)
909 {
910 continue;
911 }
912
913 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
914
915 // Has a word just started?
916 if (in_word == false && is_character_letter_or_digit == true)
917 {
918 in_word = true;
919 word_start = i;
920 }
921
922 // Or has a word just finished?
923 else if (in_word == true && is_character_letter_or_digit == false)
924 {
925 in_word = false;
926
927 // Check if the word matches any of the query term equivalents
928 String word = new String(content_characters, word_start, (i - word_start));
929 if (query_term_variants.contains(word))
930 {
931 // We have found a matching word, so remember its location
932 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
933 preceding_word_matched = true;
934 }
935 else
936 {
937 preceding_word_matched = false;
938 }
939 }
940 }
941
942 // Don't forget the last word...
943 if (in_word == true)
944 {
945 // Check if the word matches any of the query term equivalents
946 String word = new String(content_characters, word_start, (content_characters.length - word_start));
947 if (query_term_variants.contains(word))
948 {
949 // We have found a matching word, so remember its location
950 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
951 }
952 }
953
954 ArrayList highlight_start_positions = new ArrayList();
955 ArrayList highlight_end_positions = new ArrayList();
956
957 // Deal with phrases now
958 ArrayList partial_phrase_matches = new ArrayList();
959 for (int i = 0; i < word_matches.size(); i++)
960 {
961 WordMatch word_match = (WordMatch) word_matches.get(i);
962
963 // See if any partial phrase matches are extended by this word
964 if (word_match.preceding_word_matched)
965 {
966 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
967 {
968 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
969 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
970 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
971 if (phrase_query_p_term_x_variants.contains(word_match.word))
972 {
973 partial_phrase_match.num_words_matched++;
974
975 // Has a complete phrase match occurred?
976 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
977 {
978 // Check for overlaps by looking at the previous highlight range
979 if (!highlight_end_positions.isEmpty())
980 {
981 int last_highlight_index = highlight_end_positions.size() - 1;
982 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
983 if (last_highlight_end > partial_phrase_match.start_position)
984 {
985 // There is an overlap, so remove the previous phrase match
986 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
987 highlight_end_positions.remove(last_highlight_index);
988 partial_phrase_match.start_position = last_highlight_start;
989 }
990 }
991
992 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
993 highlight_end_positions.add(new Integer(word_match.end_position));
994 }
995 // No, but add the partial match back into the list for next time
996 else
997 {
998 partial_phrase_matches.add(partial_phrase_match);
999 }
1000 }
1001 }
1002 }
1003 else
1004 {
1005 partial_phrase_matches.clear();
1006 }
1007
1008 // See if this word is at the start of any of the phrases
1009 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1010 {
1011 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1012 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1013 if (phrase_query_p_term_1_variants.contains(word_match.word))
1014 {
1015 // If this phrase is just one word long, we have a complete match
1016 if (phrase_query_p_term_variants_list.size() == 1)
1017 {
1018 highlight_start_positions.add(new Integer(word_match.start_position));
1019 highlight_end_positions.add(new Integer(word_match.end_position));
1020 }
1021 // Otherwise we have the start of a potential phrase match
1022 else
1023 {
1024 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1025 }
1026 }
1027 }
1028 }
1029
1030 // Now add the annotation tags into the document at the correct points
1031 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1032
1033 int last_wrote = 0;
1034 for (int i = 0; i < highlight_start_positions.size(); i++)
1035 {
1036 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1037 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1038
1039 // Print anything before the highlight range
1040 if (last_wrote < highlight_start)
1041 {
1042 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1043 content_element.appendChild(this.doc.createTextNode(preceding_text));
1044 }
1045
1046 // Print the highlight text, annotated
1047 if (highlight_end > last_wrote)
1048 {
1049 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1050 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1051 annotation_element.setAttribute("type", "query_term");
1052 content_element.appendChild(annotation_element);
1053 last_wrote = highlight_end;
1054 }
1055 }
1056
1057 // Finish off any unwritten text
1058 if (last_wrote < content_characters.length)
1059 {
1060 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1061 content_element.appendChild(this.doc.createTextNode(remaining_text));
1062 }
1063
1064 return content_element;
1065 }
1066
1067 static private class WordMatch
1068 {
1069 public String word;
1070 public int start_position;
1071 public int end_position;
1072 public boolean preceding_word_matched;
1073
1074 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1075 {
1076 this.word = word;
1077 this.start_position = start_position;
1078 this.end_position = end_position;
1079 this.preceding_word_matched = preceding_word_matched;
1080 }
1081 }
1082
1083 static private class PartialPhraseMatch
1084 {
1085 public int start_position;
1086 public int query_phrase_number;
1087 public int num_words_matched;
1088
1089 public PartialPhraseMatch(int start_position, int query_phrase_number)
1090 {
1091 this.start_position = start_position;
1092 this.query_phrase_number = query_phrase_number;
1093 this.num_words_matched = 1;
1094 }
1095 }
1096}
Note: See TracBrowser for help on using the repository browser.