source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25642

Last change on this file since 25642 was 25642, checked in by sjm84, 12 years ago

A few minor changes to DocumentAction

  • Property svn:keywords set to Author Date Id Revision
File size: 40.3 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37import java.io.Serializable;
38
39import org.apache.log4j.*;
40
41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
61 protected boolean highlight_query_terms = false;
62
63 public boolean configure()
64 {
65 super.configure();
66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
69 highlight_query_terms = true;
70 }
71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
74 provide_annotations = true;
75 }
76 return true;
77 }
78
79 public Node process(Node message_node)
80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83 Element message = this.converter.nodeToElement(message_node);
84
85 // the response
86 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88 result.appendChild(page_response);
89
90 // get the request - assume only one
91 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
93 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
94
95 // just in case there are some that need to get passed to the services
96 HashMap service_params = (HashMap) params.get("s0");
97
98 String collection = (String) params.get(GSParams.COLLECTION);
99 String document_id = (String) params.get(GSParams.DOCUMENT);
100 if (document_id != null && document_id.equals(""))
101 {
102 document_id = null;
103 }
104 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
105 if (href != null && href.equals(""))
106 {
107 href = null;
108 }
109 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110 if (document_id == null && href == null)
111 {
112 logger.error("no document specified!");
113 return result;
114 }
115 if (rl != null && rl.equals("0"))
116 {
117 // this is a true external link, we should have been directed to a different page or action
118 logger.error("rl value was 0, shouldn't get here");
119 return result;
120 }
121 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
122 if (document_type == null || document_type.equals(""))
123 {
124 document_type = "hierarchy";
125 }
126 //whether to retrieve siblings or not
127 boolean get_siblings = false;
128 String sibs = (String) params.get(SIBLING_ARG);
129 if (sibs != null && sibs.equals("1"))
130 {
131 get_siblings = true;
132 }
133
134 String doc_id_modifier = "";
135 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
136 if (sibling_num != null && !sibling_num.equals(""))
137 {
138 // we have to modify the doc name
139 doc_id_modifier = "." + sibling_num + ".ss";
140 }
141
142 boolean expand_document = false;
143 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
144 if (ed_arg != null && ed_arg.equals("1"))
145 {
146 expand_document = true;
147 }
148
149 boolean expand_contents = false;
150 if (expand_document)
151 { // we always expand the contents with the text
152 expand_contents = true;
153 }
154 else
155 {
156 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
157 if (ec_arg != null && ec_arg.equals("1"))
158 {
159 expand_contents = true;
160 }
161 }
162
163 UserContext userContext = new UserContext(request);
164
165 //append site metadata
166 addSiteMetadata(page_response, userContext);
167 addInterfaceOptions(page_response);
168
169 // get the additional data needed for the page
170 getBackgroundData(page_response, collection, userContext);
171 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
172
173 // the_document is where all the doc info - structure and metadata etc
174 // is added into, to be returned in the page
175 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
176 page_response.appendChild(the_document);
177
178 // set the doctype from the cgi arg as an attribute
179 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
180
181 // create a basic doc list containing the current node
182 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
183 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
184 basic_doc_list.appendChild(current_doc);
185 if (document_id != null)
186 {
187 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
188 }
189 else
190 {
191 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
192 // do we need this??
193 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
194 }
195
196 // Create a parameter list to specify the required structure information
197 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
198
199 if (service_params != null)
200 {
201 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
202 }
203
204 Element ds_param = null;
205 boolean get_structure = false;
206 boolean get_structure_info = false;
207 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
208 {
209 get_structure_info = true;
210
211 if (expand_contents)
212 {
213 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
214 ds_param_list.appendChild(ds_param);
215 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
216 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
217 }
218
219 // get the info needed for paged naviagtion
220 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
221 ds_param_list.appendChild(ds_param);
222 ds_param.setAttribute(GSXML.NAME_ATT, "info");
223 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
224 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
225 ds_param_list.appendChild(ds_param);
226 ds_param.setAttribute(GSXML.NAME_ATT, "info");
227 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
228 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
229 ds_param_list.appendChild(ds_param);
230 ds_param.setAttribute(GSXML.NAME_ATT, "info");
231 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
232
233 if (get_siblings)
234 {
235 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
236 ds_param_list.appendChild(ds_param);
237 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
238 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
239 }
240
241 }
242 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
243 {
244 get_structure = true;
245 if (expand_contents)
246 {
247 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
248 ds_param_list.appendChild(ds_param);
249 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
250 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
251 }
252 else
253 {
254 // get the info needed for table of contents
255 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
256 ds_param_list.appendChild(ds_param);
257 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
258 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
259 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
260 ds_param_list.appendChild(ds_param);
261 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
262 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
263 if (get_siblings)
264 {
265 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
266 ds_param_list.appendChild(ds_param);
267 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
268 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
269 }
270 }
271 }
272 else
273 {
274 // we dont need any structure
275 }
276
277 boolean has_dummy = false;
278 if (get_structure || get_structure_info)
279 {
280
281 // Build a request to obtain the document structure
282 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
283 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
284 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
285 ds_message.appendChild(ds_request);
286 ds_request.appendChild(ds_param_list);
287
288 // create a doc_node_list and put in the doc_node that we are interested in
289 ds_request.appendChild(basic_doc_list);
290
291 // Process the document structure retrieve message
292 Element ds_response_message = (Element) this.mr.process(ds_message);
293 if (processErrorElements(ds_response_message, page_response))
294 {
295 return result;
296 }
297
298 // get the info and print out
299 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
300 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
301 path = GSPath.appendLink(path, "nodeStructureInfo");
302 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
303 // get the doc_node bit
304 if (ds_response_struct_info != null)
305 {
306 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
307 }
308 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
309 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
310 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
311 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
312
313 if (ds_response_structure != null)
314 {
315 // add the contents of the structure bit into the_document
316 NodeList structs = ds_response_structure.getChildNodes();
317 for (int i = 0; i < structs.getLength(); i++)
318 {
319 the_document.appendChild(this.doc.importNode(structs.item(i), true));
320 }
321 }
322 else
323 {
324 // no structure nodes, so put in a dummy doc node
325 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
326 if (document_id != null)
327 {
328 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
329 }
330 else
331 {
332 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
333
334 }
335 the_document.appendChild(doc_node);
336 has_dummy = true;
337 }
338 }
339 else
340 { // a simple type - we dont have a dummy node for simple
341 // should think about this more
342 // no structure request, so just put in a dummy doc node
343 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
344 if (document_id != null)
345 {
346 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
347 }
348 else
349 {
350 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
351 }
352 the_document.appendChild(doc_node);
353 has_dummy = true;
354 }
355
356 // Build a request to obtain some document metadata
357 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
358 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
359 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
360 dm_message.appendChild(dm_request);
361 // Create a parameter list to specify the required metadata information
362
363 HashSet<String> meta_names = new HashSet<String>();
364 meta_names.add("Title"); // the default
365 if (format_elem != null)
366 {
367 getRequiredMetadataNames(format_elem, meta_names);
368 }
369
370 Element dm_param_list = createMetadataParamList(meta_names);
371 if (service_params != null)
372 {
373 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
374 }
375
376 dm_request.appendChild(dm_param_list);
377
378 // create the doc node list for the metadata request
379 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
380 dm_request.appendChild(dm_doc_list);
381
382 // Add each node from the structure response into the metadata request
383 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
384 for (int i = 0; i < doc_nodes.getLength(); i++)
385 {
386 Element doc_node = (Element) doc_nodes.item(i);
387 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
388
389 // Add the documentNode to the list
390 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
391 dm_doc_list.appendChild(dm_doc_node);
392 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
393 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
394 }
395
396 // we also want a metadata request to the top level document to get
397 // assocfilepath - this could be cached too
398 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
399 dm_message.appendChild(doc_meta_request);
400 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
401 if (service_params != null)
402 {
403 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
404 }
405
406 doc_meta_request.appendChild(doc_meta_param_list);
407 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
408 doc_meta_param_list.appendChild(doc_param);
409 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
410 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
411
412 // create the doc node list for the metadata request
413 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
414 doc_meta_request.appendChild(doc_list);
415
416 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
417 // the node we want is the root document node
418 if (document_id != null)
419 {
420 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
421 }
422 else
423 {
424 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
425 // can we assume that href is always a top level doc??
426 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
427 //doc_node.setAttribute("externalURL", has_rl);
428 }
429 doc_list.appendChild(doc_node);
430
431 Element dm_response_message = (Element) this.mr.process(dm_message);
432 if (processErrorElements(dm_response_message, page_response))
433 {
434 return result;
435 }
436
437 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
438 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
439
440 // Merge the metadata with the structure information
441 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
442 for (int i = 0; i < doc_nodes.getLength(); i++)
443 {
444 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
445 }
446 // get the top level doc metadata out
447 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
448 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
449 GSXML.mergeMetadataLists(the_document, top_doc_node);
450
451 // Build a request to obtain some document content
452 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
453 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
454 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
455 dc_message.appendChild(dc_request);
456
457 // Create a parameter list to specify the request parameters - empty for now
458 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
459 if (service_params != null)
460 {
461 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
462 }
463
464 dc_request.appendChild(dc_param_list);
465
466 // get the content
467 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
468 if (expand_document)
469 {
470 dc_request.appendChild(dm_doc_list);
471 }
472 else
473 {
474 dc_request.appendChild(basic_doc_list);
475 }
476 logger.debug("request = " + XMLConverter.getString(dc_message));
477 Element dc_response_message = (Element) this.mr.process(dc_message);
478 if (processErrorElements(dc_response_message, page_response))
479 {
480 return result;
481 }
482
483 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
484
485 if (expand_document)
486 {
487 // Merge the content with the structure information
488 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
489 for (int i = 0; i < doc_nodes.getLength(); i++)
490 {
491 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
492 if (content != null)
493 {
494 if (highlight_query_terms)
495 {
496 content = highlightQueryTerms(request, (Element) content);
497 }
498 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
499 }
500 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
501 }
502 }
503 else
504 {
505 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
506 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
507 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
508 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
509
510 if (dc_response_doc_content == null)
511 {
512 // no content to add
513 if (dc_response_doc.getAttribute("external").equals("true"))
514 {
515
516 //if (dc_response_doc_external != null)
517 //{
518 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
519
520 the_document.setAttribute("selectedNode", href_id);
521 the_document.setAttribute("external", href_id);
522 }
523 return result;
524 }
525 if (highlight_query_terms)
526 {
527 dc_response_doc.removeChild(dc_response_doc_content);
528
529 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
530 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
531 }
532
533 if (provide_annotations)
534 {
535 String service_selected = (String) params.get(ENRICH_DOC_ARG);
536 if (service_selected != null && service_selected.equals("1"))
537 {
538 // now we can modifiy the response doc if needed
539 String enrich_service = (String) params.get(GSParams.SERVICE);
540 // send a message to the service
541 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
542 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
543 enrich_message.appendChild(enrich_request);
544 // check for parameters
545 HashMap e_service_params = (HashMap) params.get("s1");
546 if (e_service_params != null)
547 {
548 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
549 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
550 enrich_request.appendChild(enrich_pl);
551 }
552 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
553 enrich_request.appendChild(e_doc_list);
554 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
555
556 Node enrich_response = this.mr.process(enrich_message);
557
558 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
559 path = GSPath.createPath(links);
560 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
561
562 }
563 } // if provide_annotations
564
565 // use the returned id rather than the sent one cos there may have
566 // been modifiers such as .pr that are removed.
567 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
568 the_document.setAttribute("selectedNode", modified_doc_id);
569 if (has_dummy)
570 {
571 // change the id if necessary and add the content
572 Element dummy_node = (Element) doc_nodes.item(0);
573
574 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
575 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
576 // hack for simple type
577 if (document_type.equals("simple"))
578 {
579 // we dont want the internal docNode, just want the content and metadata in the document
580 // rethink this!!
581 the_document.removeChild(dummy_node);
582
583 NodeList dummy_children = dummy_node.getChildNodes();
584 //for (int i=0; i<dummy_children.getLength(); i++) {
585 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
586 {
587 // special case as we don't want more than one metadata list
588 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
589 {
590 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
591 }
592 else
593 {
594 the_document.appendChild(dummy_children.item(i));
595 }
596 }
597 }
598 }
599 else
600 {
601 // Merge the document content with the metadata and structure information
602 for (int i = 0; i < doc_nodes.getLength(); i++)
603 {
604 Node dn = doc_nodes.item(i);
605 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
606 if (dn_id.equals(modified_doc_id))
607 {
608 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
609 break;
610 }
611 }
612 }
613 }
614 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
615 return result;
616 }
617
618 /**
619 * tell the param class what its arguments are if an action has its own
620 * arguments, this should add them to the params object - particularly
621 * important for args that should not be saved
622 */
623 public boolean addActionParameters(GSParams params)
624 {
625 params.addParameter(GOTO_PAGE_ARG, false);
626 params.addParameter(ENRICH_DOC_ARG, false);
627 params.addParameter(EXPAND_DOCUMENT_ARG, false);
628 params.addParameter(EXPAND_CONTENTS_ARG, false);
629 params.addParameter(REALISTIC_BOOK_ARG, false);
630
631 return true;
632 }
633
634 /**
635 * this method gets the collection description, the format info, the list of
636 * enrich services, etc - stuff that is needed for the page, but is the same
637 * whatever the query is - should be cached
638 */
639 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
640 {
641
642 // create a message to process - contains requests for the collection
643 // description, the format element, the enrich services on offer
644 // these could all be cached
645 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
646 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
647 // the format request - ignore for now, where does this request go to??
648 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
649 info_message.appendChild(format_request);
650
651 // the enrich_services request - only do this if provide_annotations is true
652
653 if (provide_annotations)
654 {
655 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
656 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
657 info_message.appendChild(enrich_services_request);
658 }
659
660 Element info_response = (Element) this.mr.process(info_message);
661
662 // the collection is the first response
663 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
664 Element format_resp = (Element) responses.item(0);
665
666 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
667 if (format_elem != null)
668 {
669 logger.debug("doc action found a format statement");
670 // set teh format type
671 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
672 page_response.appendChild(this.doc.importNode(format_elem, true));
673 }
674
675 if (provide_annotations)
676 {
677 Element services_resp = (Element) responses.item(1);
678
679 // a new message for the mr
680 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
681
682 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
683 boolean service_found = false;
684 for (int j = 0; j < e_services.getLength(); j++)
685 {
686 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
687 {
688 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
689 enrich_message.appendChild(s);
690 service_found = true;
691 }
692 }
693 if (service_found)
694 {
695 Element enrich_response = (Element) this.mr.process(enrich_message);
696
697 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
698 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
699 for (int i = 0; i < e_responses.getLength(); i++)
700 {
701 Element e_resp = (Element) e_responses.item(i);
702 Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
703 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
704 service_list.appendChild(e_service);
705 }
706 page_response.appendChild(service_list);
707 }
708 } // if provide_annotations
709 return true;
710
711 }
712
713 /**
714 * this involves a bit of a hack to get the equivalent query terms - has to
715 * requery the query service - uses the last selected service name. (if it
716 * ends in query). should this action do the query or should it send a
717 * message to the query action? but that will involve lots of extra stuff.
718 * also doesn't handle phrases properly - just highlights all the terms
719 * found in the text.
720 */
721 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
722 {
723 // do the query again to get term info
724 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
725 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
726
727 HashMap previous_params = (HashMap) params.get("p");
728 if (previous_params == null)
729 {
730 return dc_response_doc_content;
731 }
732 String service_name = (String) previous_params.get(GSParams.SERVICE);
733 if (service_name == null || !service_name.endsWith("Query"))
734 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
735 logger.debug("invalid service, not doing highlighting");
736 return dc_response_doc_content;
737 }
738 String collection = (String) params.get(GSParams.COLLECTION);
739 UserContext userContext = new UserContext(request);
740 String to = GSPath.appendLink(collection, service_name);
741
742 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
743 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
744 mr_query_message.appendChild(mr_query_request);
745
746 // paramList
747 HashMap service_params = (HashMap) params.get("s1");
748
749 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
750 GSXML.addParametersToList(this.doc, query_param_list, service_params);
751 mr_query_request.appendChild(query_param_list);
752
753 // do the query
754 Element mr_query_response = (Element) this.mr.process(mr_query_message);
755
756 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
757 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
758 if (query_term_list_element == null)
759 {
760 // no term info
761 logger.error("No query term information.\n");
762 return dc_response_doc_content;
763 }
764
765 String content = GSXML.getNodeText(dc_response_doc_content);
766
767 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
768 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
769
770 HashSet<String> query_term_variants = new HashSet<String>();
771 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
772 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
773 {
774 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
775 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
776 {
777 for (int i = 0; i < terms_nodelist.getLength(); i++)
778 {
779 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
780 String termValueU = null;
781 String termValueL = null;
782
783 if (termValue.length() > 1)
784 {
785 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
786 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
787 }
788 else
789 {
790 termValueU = termValue.substring(0, 1).toUpperCase();
791 termValueL = termValue.substring(0, 1).toLowerCase();
792 }
793
794 query_term_variants.add(termValueU);
795 query_term_variants.add(termValueL);
796 }
797 }
798 }
799 else
800 {
801 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
802 {
803 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
804 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
805 for (int j = 0; j < equivalent_terms.length; j++)
806 {
807 query_term_variants.add(equivalent_terms[j]);
808 }
809 }
810 }
811
812 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
813
814 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
815 String performed_query = GSXML.getNodeText(query_element) + " ";
816
817 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
818 int term_start = 0;
819 boolean in_term = false;
820 boolean in_phrase = false;
821 for (int i = 0; i < performed_query.length(); i++)
822 {
823 char character = performed_query.charAt(i);
824 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
825
826 // Has a query term just started?
827 if (in_term == false && is_character_letter_or_digit == true)
828 {
829 in_term = true;
830 term_start = i;
831 }
832
833 // Or has a term just finished?
834 else if (in_term == true && is_character_letter_or_digit == false)
835 {
836 in_term = false;
837 String term = performed_query.substring(term_start, i);
838
839 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
840 if (term_element != null)
841 {
842
843 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
844
845 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
846 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
847 {
848 String termValueU = null;
849 String termValueL = null;
850
851 if (term.length() > 1)
852 {
853 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
854 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
855 }
856 else
857 {
858 termValueU = term.substring(0, 1).toUpperCase();
859 termValueL = term.substring(0, 1).toLowerCase();
860 }
861
862 phrase_query_p_term_x_variants.add(termValueU);
863 phrase_query_p_term_x_variants.add(termValueL);
864 }
865 else
866 {
867 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
868 {
869 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
870 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
871 for (int k = 0; k < term_equivalent_terms.length; k++)
872 {
873 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
874 }
875 }
876 }
877 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
878
879 if (in_phrase == false)
880 {
881 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
882 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
883 }
884 }
885 }
886 // Watch for phrases (surrounded by quotes)
887 if (character == '\"')
888 {
889 // Has a phrase just started?
890 if (in_phrase == false)
891 {
892 in_phrase = true;
893 }
894 // Or has a phrase just finished?
895 else if (in_phrase == true)
896 {
897 in_phrase = false;
898 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
899 }
900
901 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
902 }
903 }
904
905 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
906 }
907
908 /**
909 * Highlights query terms in a piece of text.
910 */
911 private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
912 {
913 // Convert the content string to an array of characters for speed
914 char[] content_characters = new char[content.length()];
915 content.getChars(0, content.length(), content_characters, 0);
916
917 // Now skim through the content, identifying word matches
918 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
919 int word_start = 0;
920 boolean in_word = false;
921 boolean preceding_word_matched = false;
922 boolean inTag = false;
923 for (int i = 0; i < content_characters.length; i++)
924 {
925 //We don't want to find words inside HTML tags
926 if (content_characters[i] == '<')
927 {
928 inTag = true;
929 continue;
930 }
931 else if (inTag && content_characters[i] == '>')
932 {
933 inTag = false;
934 }
935 else if (inTag)
936 {
937 continue;
938 }
939
940 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
941
942 // Has a word just started?
943 if (in_word == false && is_character_letter_or_digit == true)
944 {
945 in_word = true;
946 word_start = i;
947 }
948
949 // Or has a word just finished?
950 else if (in_word == true && is_character_letter_or_digit == false)
951 {
952 in_word = false;
953
954 // Check if the word matches any of the query term equivalents
955 String word = new String(content_characters, word_start, (i - word_start));
956 if (query_term_variants.contains(word))
957 {
958 // We have found a matching word, so remember its location
959 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
960 preceding_word_matched = true;
961 }
962 else
963 {
964 preceding_word_matched = false;
965 }
966 }
967 }
968
969 // Don't forget the last word...
970 if (in_word == true)
971 {
972 // Check if the word matches any of the query term equivalents
973 String word = new String(content_characters, word_start, (content_characters.length - word_start));
974 if (query_term_variants.contains(word))
975 {
976 // We have found a matching word, so remember its location
977 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
978 }
979 }
980
981 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
982 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
983
984 // Deal with phrases now
985 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
986 for (int i = 0; i < word_matches.size(); i++)
987 {
988 WordMatch word_match = word_matches.get(i);
989
990 // See if any partial phrase matches are extended by this word
991 if (word_match.preceding_word_matched)
992 {
993 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
994 {
995 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
996 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
997 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
998 if (phrase_query_p_term_x_variants.contains(word_match.word))
999 {
1000 partial_phrase_match.num_words_matched++;
1001
1002 // Has a complete phrase match occurred?
1003 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1004 {
1005 // Check for overlaps by looking at the previous highlight range
1006 if (!highlight_end_positions.isEmpty())
1007 {
1008 int last_highlight_index = highlight_end_positions.size() - 1;
1009 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1010 if (last_highlight_end > partial_phrase_match.start_position)
1011 {
1012 // There is an overlap, so remove the previous phrase match
1013 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1014 highlight_end_positions.remove(last_highlight_index);
1015 partial_phrase_match.start_position = last_highlight_start;
1016 }
1017 }
1018
1019 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1020 highlight_end_positions.add(new Integer(word_match.end_position));
1021 }
1022 // No, but add the partial match back into the list for next time
1023 else
1024 {
1025 partial_phrase_matches.add(partial_phrase_match);
1026 }
1027 }
1028 }
1029 }
1030 else
1031 {
1032 partial_phrase_matches.clear();
1033 }
1034
1035 // See if this word is at the start of any of the phrases
1036 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1037 {
1038 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1039 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1040 if (phrase_query_p_term_1_variants.contains(word_match.word))
1041 {
1042 // If this phrase is just one word long, we have a complete match
1043 if (phrase_query_p_term_variants_list.size() == 1)
1044 {
1045 highlight_start_positions.add(new Integer(word_match.start_position));
1046 highlight_end_positions.add(new Integer(word_match.end_position));
1047 }
1048 // Otherwise we have the start of a potential phrase match
1049 else
1050 {
1051 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1052 }
1053 }
1054 }
1055 }
1056
1057 // Now add the annotation tags into the document at the correct points
1058 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1059
1060 int last_wrote = 0;
1061 for (int i = 0; i < highlight_start_positions.size(); i++)
1062 {
1063 int highlight_start = highlight_start_positions.get(i).intValue();
1064 int highlight_end = highlight_end_positions.get(i).intValue();
1065
1066 // Print anything before the highlight range
1067 if (last_wrote < highlight_start)
1068 {
1069 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1070 content_element.appendChild(this.doc.createTextNode(preceding_text));
1071 }
1072
1073 // Print the highlight text, annotated
1074 if (highlight_end > last_wrote)
1075 {
1076 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1077 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1078 annotation_element.setAttribute("type", "query_term");
1079 content_element.appendChild(annotation_element);
1080 last_wrote = highlight_end;
1081 }
1082 }
1083
1084 // Finish off any unwritten text
1085 if (last_wrote < content_characters.length)
1086 {
1087 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1088 content_element.appendChild(this.doc.createTextNode(remaining_text));
1089 }
1090
1091 return content_element;
1092 }
1093
1094 static private class WordMatch
1095 {
1096 public String word;
1097 public int start_position;
1098 public int end_position;
1099 public boolean preceding_word_matched;
1100
1101 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1102 {
1103 this.word = word;
1104 this.start_position = start_position;
1105 this.end_position = end_position;
1106 this.preceding_word_matched = preceding_word_matched;
1107 }
1108 }
1109
1110 static private class PartialPhraseMatch
1111 {
1112 public int start_position;
1113 public int query_phrase_number;
1114 public int num_words_matched;
1115
1116 public PartialPhraseMatch(int start_position, int query_phrase_number)
1117 {
1118 this.start_position = start_position;
1119 this.query_phrase_number = query_phrase_number;
1120 this.num_words_matched = 1;
1121 }
1122 }
1123}
Note: See TracBrowser for help on using the repository browser.