source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25128

Last change on this file since 25128 was 25128, checked in by sjm84, 12 years ago

Second round of changes adding in the login ability, also interface options are now returned whenever site metadata is returned

  • Property svn:keywords set to Author Date Id Revision
File size: 39.5 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router */
41public class DocumentAction extends Action
42{
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /**
52 * if this is set to true, when a document is displayed, any annotation type
53 * services (enrich) will be offered to the user as well
54 */
55 protected boolean provide_annotations = false;
56
57 protected boolean highlight_query_terms = false;
58
59 public boolean configure()
60 {
61 super.configure();
62 String highlight = (String) config_params.get("highlightQueryTerms");
63 if (highlight != null && highlight.equals("true"))
64 {
65 highlight_query_terms = true;
66 }
67 String annotate = (String) config_params.get("displayAnnotationService");
68 if (annotate != null && annotate.equals("true"))
69 {
70 provide_annotations = true;
71 }
72 return true;
73 }
74
75 public Node process(Node message_node)
76 {
77 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
78
79 Element message = this.converter.nodeToElement(message_node);
80
81 // the response
82 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
83 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
84 result.appendChild(page_response);
85
86 // get the request - assume only one
87 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
88 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
89 HashMap params = GSXML.extractParams(cgi_paramList, false);
90
91 // just in case there are some that need to get passed to the services
92 HashMap service_params = (HashMap) params.get("s0");
93
94 String has_rl = null;
95 String has_href = null;
96 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
97 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
98 String collection = (String) params.get(GSParams.COLLECTION);
99 UserContext userContext = new UserContext(request);
100 String document_name = (String) params.get(GSParams.DOCUMENT);
101 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals("")))
102 {
103 logger.error("no document specified!");
104 return result;
105 }
106 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
107 if (document_type == null)
108 {
109 document_type = "simple";
110 }
111 //whether to retrieve siblings or not
112 boolean get_siblings = false;
113 String sibs = (String) params.get(SIBLING_ARG);
114 if (sibs != null && sibs.equals("1"))
115 {
116 get_siblings = true;
117 }
118
119 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
120 if (sibling_num != null && !sibling_num.equals(""))
121 {
122 // we have to modify the doc name
123 document_name = document_name + "." + sibling_num + ".ss";
124 }
125
126 boolean expand_document = false;
127 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
128 if (ed_arg != null && ed_arg.equals("1"))
129 {
130 expand_document = true;
131 }
132
133 boolean expand_contents = false;
134 if (expand_document)
135 { // we always expand the contents with the text
136 expand_contents = true;
137 }
138 else
139 {
140 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
141 if (ec_arg != null && ec_arg.equals("1"))
142 {
143 expand_contents = true;
144 }
145 }
146
147 //append site metadata
148 addSiteMetadata(page_response, userContext);
149 addInterfaceOptions(page_response);
150
151 // get the additional data needed for the page
152 getBackgroundData(page_response, collection, userContext);
153 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
154
155 // the_document is where all the doc info - structure and metadata etc
156 // is added into, to be returned in the page
157 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
158 page_response.appendChild(the_document);
159
160 // set the doctype from the cgi arg as an attribute
161 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
162
163 // create a basic doc list containing the current node
164 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
165 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
166 basic_doc_list.appendChild(current_doc);
167 if (document_name.length() != 0)
168 {
169 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
170 }
171 else if (has_href.length() != 0)
172 {
173 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
174 current_doc.setAttribute("externalURL", has_rl);
175 }
176
177 // Create a parameter list to specify the required structure information
178 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
179
180 if (service_params != null)
181 {
182 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
183 }
184
185 Element ds_param = null;
186 boolean get_structure = false;
187 boolean get_structure_info = false;
188 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
189 {
190 get_structure_info = true;
191
192 if (expand_contents)
193 {
194 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
195 ds_param_list.appendChild(ds_param);
196 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
197 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
198 }
199
200 // get teh info needed for paged naviagtion
201 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
202 ds_param_list.appendChild(ds_param);
203 ds_param.setAttribute(GSXML.NAME_ATT, "info");
204 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
205 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206 ds_param_list.appendChild(ds_param);
207 ds_param.setAttribute(GSXML.NAME_ATT, "info");
208 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
209 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
210 ds_param_list.appendChild(ds_param);
211 ds_param.setAttribute(GSXML.NAME_ATT, "info");
212 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
213
214 if (get_siblings)
215 {
216 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
217 ds_param_list.appendChild(ds_param);
218 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
219 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
220 }
221
222 }
223 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
224 {
225 get_structure = true;
226 if (expand_contents)
227 {
228 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
229 ds_param_list.appendChild(ds_param);
230 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
231 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
232 }
233 else
234 {
235 // get the info needed for table of contents
236 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
237 ds_param_list.appendChild(ds_param);
238 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
239 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
240 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
241 ds_param_list.appendChild(ds_param);
242 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
243 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
244 if (get_siblings)
245 {
246 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
247 ds_param_list.appendChild(ds_param);
248 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
249 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
250 }
251 }
252 }
253 else
254 {
255 // we dont need any structure
256 }
257
258 boolean has_dummy = false;
259 if (get_structure || get_structure_info)
260 {
261
262 // Build a request to obtain the document structure
263 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
264 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
265 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
266 ds_message.appendChild(ds_request);
267 ds_request.appendChild(ds_param_list);
268
269 // create a doc_node_list and put in the doc_node that we are interested in
270 ds_request.appendChild(basic_doc_list);
271
272 // Process the document structure retrieve message
273 Element ds_response_message = (Element) this.mr.process(ds_message);
274 if (processErrorElements(ds_response_message, page_response))
275 {
276 return result;
277 }
278
279 // get the info and print out
280 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
281 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
282 path = GSPath.appendLink(path, "nodeStructureInfo");
283 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
284 // get the doc_node bit
285 if (ds_response_struct_info != null)
286 {
287 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
288 }
289 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
290 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
291 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
292 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
293
294 if (ds_response_structure != null)
295 {
296 // add the contents of the structure bit into the_document
297 NodeList structs = ds_response_structure.getChildNodes();
298 for (int i = 0; i < structs.getLength(); i++)
299 {
300 the_document.appendChild(this.doc.importNode(structs.item(i), true));
301 }
302 }
303 else
304 {
305 // no structure nodes, so put in a dummy doc node
306 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
307 if (document_name.length() != 0)
308 {
309 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
310 }
311 else if (has_href.length() != 0)
312 {
313 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
314 doc_node.setAttribute("externalURL", has_rl);
315 }
316 the_document.appendChild(doc_node);
317 has_dummy = true;
318 }
319 }
320 else
321 { // a simple type - we dont have a dummy node for simple
322 // should think about this more
323 // no structure request, so just put in a dummy doc node
324 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
325 if (document_name.length() != 0)
326 {
327 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
328 }
329 else if (has_href.length() != 0)
330 {
331 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
332 doc_node.setAttribute("externalURL", has_rl);
333 }
334 the_document.appendChild(doc_node);
335 has_dummy = true;
336 }
337
338 // Build a request to obtain some document metadata
339 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
340 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
341 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
342 dm_message.appendChild(dm_request);
343 // Create a parameter list to specify the required metadata information
344
345 HashSet meta_names = new HashSet();
346 meta_names.add("Title"); // the default
347 if (format_elem != null)
348 {
349 getRequiredMetadataNames(format_elem, meta_names);
350 }
351
352 Element dm_param_list = createMetadataParamList(meta_names);
353 if (service_params != null)
354 {
355 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
356 }
357
358 dm_request.appendChild(dm_param_list);
359
360 // create the doc node list for the metadata request
361 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
362 dm_request.appendChild(dm_doc_list);
363
364 // Add each node from the structure response into the metadata request
365 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
366 for (int i = 0; i < doc_nodes.getLength(); i++)
367 {
368 Element doc_node = (Element) doc_nodes.item(i);
369 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
370
371 // Add the documentNode to the list
372 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
373 dm_doc_list.appendChild(dm_doc_node);
374 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
375 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
376 }
377
378 // we also want a metadata request to the top level document to get
379 // assocfilepath - this could be cached too
380 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
381 dm_message.appendChild(doc_meta_request);
382 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
383 if (service_params != null)
384 {
385 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
386 }
387
388 doc_meta_request.appendChild(doc_meta_param_list);
389 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
390 doc_meta_param_list.appendChild(doc_param);
391 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
392 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
393
394 // create the doc node list for the metadata request
395 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
396 doc_meta_request.appendChild(doc_list);
397
398 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
399 // the node we want is the root document node
400 if (document_name.length() != 0)
401 {
402 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name + ".rt");
403 }
404 else if (has_href.length() != 0)
405 {
406 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href + ".rt");
407 doc_node.setAttribute("externalURL", has_rl);
408 }
409 doc_list.appendChild(doc_node);
410
411 Element dm_response_message = (Element) this.mr.process(dm_message);
412 if (processErrorElements(dm_response_message, page_response))
413 {
414 return result;
415 }
416
417 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
418 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
419
420 // Merge the metadata with the structure information
421 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
422 for (int i = 0; i < doc_nodes.getLength(); i++)
423 {
424 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
425 }
426 // get the top level doc metadata out
427 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
428 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
429 GSXML.mergeMetadataLists(the_document, top_doc_node);
430
431 // Build a request to obtain some document content
432 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
433 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
434 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
435 dc_message.appendChild(dc_request);
436
437 // Create a parameter list to specify the request parameters - empty for now
438 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
439 if (service_params != null)
440 {
441 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
442 }
443
444 dc_request.appendChild(dc_param_list);
445
446 // get the content
447 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
448 if (expand_document)
449 {
450 dc_request.appendChild(dm_doc_list);
451 }
452 else
453 {
454 dc_request.appendChild(basic_doc_list);
455 }
456 logger.debug("request = " + converter.getString(dc_message));
457 Element dc_response_message = (Element) this.mr.process(dc_message);
458 if (processErrorElements(dc_response_message, page_response))
459 {
460 return result;
461 }
462
463 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
464
465 if (expand_document)
466 {
467 // Merge the content with the structure information
468 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
469 for (int i = 0; i < doc_nodes.getLength(); i++)
470 {
471 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
472 if (content != null)
473 {
474 if (highlight_query_terms)
475 {
476 content = highlightQueryTerms(request, (Element) content);
477 }
478 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
479 }
480 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
481 }
482 }
483 else
484 {
485 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
486 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
487 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
488 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
489
490 if (dc_response_doc_content == null)
491 {
492 // no content to add
493 if (dc_response_doc_external != null)
494 {
495 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
496
497 the_document.setAttribute("selectedNode", modified_doc_id);
498 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
499 }
500 return result;
501 }
502 if (highlight_query_terms)
503 {
504 dc_response_doc.removeChild(dc_response_doc_content);
505
506 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
507 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
508 }
509
510 if (provide_annotations)
511 {
512 String service_selected = (String) params.get(ENRICH_DOC_ARG);
513 if (service_selected != null && service_selected.equals("1"))
514 {
515 // now we can modifiy the response doc if needed
516 String enrich_service = (String) params.get(GSParams.SERVICE);
517 // send a message to the service
518 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
519 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
520 enrich_message.appendChild(enrich_request);
521 // check for parameters
522 HashMap e_service_params = (HashMap) params.get("s1");
523 if (e_service_params != null)
524 {
525 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
526 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
527 enrich_request.appendChild(enrich_pl);
528 }
529 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
530 enrich_request.appendChild(e_doc_list);
531 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
532
533 Node enrich_response = this.mr.process(enrich_message);
534
535 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
536 path = GSPath.createPath(links);
537 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
538
539 }
540 } // if provide_annotations
541
542 // use the returned id rather than the sent one cos there may have
543 // been modifiers such as .pr that are removed.
544 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
545 the_document.setAttribute("selectedNode", modified_doc_id);
546 if (has_dummy)
547 {
548 // change the id if necessary and add the content
549 Element dummy_node = (Element) doc_nodes.item(0);
550
551 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
552 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
553 // hack for simple type
554 if (document_type.equals("simple"))
555 {
556 // we dont want the internal docNode, just want the content and metadata in the document
557 // rethink this!!
558 the_document.removeChild(dummy_node);
559
560 NodeList dummy_children = dummy_node.getChildNodes();
561 //for (int i=0; i<dummy_children.getLength(); i++) {
562 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
563 {
564 // special case as we don't want more than one metadata list
565 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
566 {
567 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
568 }
569 else
570 {
571 the_document.appendChild(dummy_children.item(i));
572 }
573 }
574 }
575 }
576 else
577 {
578 // Merge the document content with the metadata and structure information
579 for (int i = 0; i < doc_nodes.getLength(); i++)
580 {
581 Node dn = doc_nodes.item(i);
582 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
583 if (dn_id.equals(modified_doc_id))
584 {
585 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
586 break;
587 }
588 }
589 }
590 }
591 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
592 return result;
593 }
594
595 /**
596 * tell the param class what its arguments are if an action has its own
597 * arguments, this should add them to the params object - particularly
598 * important for args that should not be saved
599 */
600 public boolean getActionParameters(GSParams params)
601 {
602 params.addParameter(GOTO_PAGE_ARG, false);
603 params.addParameter(ENRICH_DOC_ARG, false);
604 return true;
605 }
606
607 /**
608 * this method gets the collection description, the format info, the list of
609 * enrich services, etc - stuff that is needed for the page, but is the same
610 * whatever the query is - should be cached
611 */
612 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
613 {
614
615 // create a message to process - contains requests for the collection
616 // description, the format element, the enrich services on offer
617 // these could all be cached
618 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
619 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
620 // the format request - ignore for now, where does this request go to??
621 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
622 info_message.appendChild(format_request);
623
624 // the enrich_services request - only do this if provide_annotations is true
625
626 if (provide_annotations)
627 {
628 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
629 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
630 info_message.appendChild(enrich_services_request);
631 }
632
633 Element info_response = (Element) this.mr.process(info_message);
634
635 // the collection is the first response
636 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
637 Element format_resp = (Element) responses.item(0);
638
639 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
640 if (format_elem != null)
641 {
642 logger.debug("doc action found a format statement");
643 // set teh format type
644 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
645 page_response.appendChild(this.doc.importNode(format_elem, true));
646 }
647
648 if (provide_annotations)
649 {
650 Element services_resp = (Element) responses.item(1);
651
652 // a new message for the mr
653 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
654
655 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
656 boolean service_found = false;
657 for (int j = 0; j < e_services.getLength(); j++)
658 {
659 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
660 {
661 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
662 enrich_message.appendChild(s);
663 service_found = true;
664 }
665 }
666 if (service_found)
667 {
668 Element enrich_response = (Element) this.mr.process(enrich_message);
669
670 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
671 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
672 for (int i = 0; i < e_responses.getLength(); i++)
673 {
674 Element e_resp = (Element) e_responses.item(i);
675 Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
676 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
677 service_list.appendChild(e_service);
678 }
679 page_response.appendChild(service_list);
680 }
681 } // if provide_annotations
682 return true;
683
684 }
685
686 /**
687 * this involves a bit of a hack to get the equivalent query terms - has to
688 * requery the query service - uses the last selected service name. (if it
689 * ends in query). should this action do the query or should it send a
690 * message to the query action? but that will involve lots of extra stuff.
691 * also doesn't handle phrases properly - just highlights all the terms
692 * found in the text.
693 */
694 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
695 {
696
697 // do the query again to get term info
698 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
699 HashMap params = GSXML.extractParams(cgi_param_list, false);
700
701 HashMap previous_params = (HashMap) params.get("p");
702 if (previous_params == null)
703 {
704 return dc_response_doc_content;
705 }
706 String service_name = (String) previous_params.get(GSParams.SERVICE);
707 if (service_name == null || !service_name.endsWith("Query"))
708 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
709 logger.debug("invalid service, not doing highlighting");
710 return dc_response_doc_content;
711 }
712 String collection = (String) params.get(GSParams.COLLECTION);
713 UserContext userContext = new UserContext(request);
714 String to = GSPath.appendLink(collection, service_name);
715
716 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
717 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
718 mr_query_message.appendChild(mr_query_request);
719
720 // paramList
721 HashMap service_params = (HashMap) params.get("s1");
722
723 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
724 GSXML.addParametersToList(this.doc, query_param_list, service_params);
725 mr_query_request.appendChild(query_param_list);
726
727 // do the query
728 Element mr_query_response = (Element) this.mr.process(mr_query_message);
729
730 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
731 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
732 if (query_term_list_element == null)
733 {
734 // no term info
735 logger.error("No query term information.\n");
736 return dc_response_doc_content;
737 }
738
739 String content = GSXML.getNodeText(dc_response_doc_content);
740
741 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
742 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
743
744 HashSet query_term_variants = new HashSet();
745 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
746 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
747 {
748 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
749 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
750 {
751 for (int i = 0; i < terms_nodelist.getLength(); i++)
752 {
753 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
754 String termValueU = null;
755 String termValueL = null;
756
757 if (termValue.length() > 1)
758 {
759 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
760 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
761 }
762 else
763 {
764 termValueU = termValue.substring(0, 1).toUpperCase();
765 termValueL = termValue.substring(0, 1).toLowerCase();
766 }
767
768 query_term_variants.add(termValueU);
769 query_term_variants.add(termValueL);
770 }
771 }
772 }
773 else
774 {
775 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
776 {
777 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
778 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
779 for (int j = 0; j < equivalent_terms.length; j++)
780 {
781 query_term_variants.add(equivalent_terms[j]);
782 }
783 }
784 }
785
786 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
787
788 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
789 String performed_query = GSXML.getNodeText(query_element) + " ";
790
791 ArrayList phrase_query_p_term_variants_list = new ArrayList();
792 int term_start = 0;
793 boolean in_term = false;
794 boolean in_phrase = false;
795 for (int i = 0; i < performed_query.length(); i++)
796 {
797 char character = performed_query.charAt(i);
798 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
799
800 // Has a query term just started?
801 if (in_term == false && is_character_letter_or_digit == true)
802 {
803 in_term = true;
804 term_start = i;
805 }
806
807 // Or has a term just finished?
808 else if (in_term == true && is_character_letter_or_digit == false)
809 {
810 in_term = false;
811 String term = performed_query.substring(term_start, i);
812
813 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
814 if (term_element != null)
815 {
816
817 HashSet phrase_query_p_term_x_variants = new HashSet();
818
819 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
820 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
821 {
822 String termValueU = null;
823 String termValueL = null;
824
825 if (term.length() > 1)
826 {
827 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
828 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
829 }
830 else
831 {
832 termValueU = term.substring(0, 1).toUpperCase();
833 termValueL = term.substring(0, 1).toLowerCase();
834 }
835
836 phrase_query_p_term_x_variants.add(termValueU);
837 phrase_query_p_term_x_variants.add(termValueL);
838 }
839 else
840 {
841 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
842 {
843 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
844 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
845 for (int k = 0; k < term_equivalent_terms.length; k++)
846 {
847 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
848 }
849 }
850 }
851 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
852
853 if (in_phrase == false)
854 {
855 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
856 phrase_query_p_term_variants_list = new ArrayList();
857 }
858 }
859 }
860 // Watch for phrases (surrounded by quotes)
861 if (character == '\"')
862 {
863 // Has a phrase just started?
864 if (in_phrase == false)
865 {
866 in_phrase = true;
867 }
868 // Or has a phrase just finished?
869 else if (in_phrase == true)
870 {
871 in_phrase = false;
872 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
873 }
874
875 phrase_query_p_term_variants_list = new ArrayList();
876 }
877 }
878
879 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
880 }
881
882 /**
883 * Highlights query terms in a piece of text.
884 */
885 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
886 {
887 // Convert the content string to an array of characters for speed
888 char[] content_characters = new char[content.length()];
889 content.getChars(0, content.length(), content_characters, 0);
890
891 // Now skim through the content, identifying word matches
892 ArrayList word_matches = new ArrayList();
893 int word_start = 0;
894 boolean in_word = false;
895 boolean preceding_word_matched = false;
896 boolean inTag = false;
897 for (int i = 0; i < content_characters.length; i++)
898 {
899 //We don't want to find words inside HTML tags
900 if (content_characters[i] == '<')
901 {
902 inTag = true;
903 continue;
904 }
905 else if (inTag && content_characters[i] == '>')
906 {
907 inTag = false;
908 }
909 else if (inTag)
910 {
911 continue;
912 }
913
914 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
915
916 // Has a word just started?
917 if (in_word == false && is_character_letter_or_digit == true)
918 {
919 in_word = true;
920 word_start = i;
921 }
922
923 // Or has a word just finished?
924 else if (in_word == true && is_character_letter_or_digit == false)
925 {
926 in_word = false;
927
928 // Check if the word matches any of the query term equivalents
929 String word = new String(content_characters, word_start, (i - word_start));
930 if (query_term_variants.contains(word))
931 {
932 // We have found a matching word, so remember its location
933 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
934 preceding_word_matched = true;
935 }
936 else
937 {
938 preceding_word_matched = false;
939 }
940 }
941 }
942
943 // Don't forget the last word...
944 if (in_word == true)
945 {
946 // Check if the word matches any of the query term equivalents
947 String word = new String(content_characters, word_start, (content_characters.length - word_start));
948 if (query_term_variants.contains(word))
949 {
950 // We have found a matching word, so remember its location
951 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
952 }
953 }
954
955 ArrayList highlight_start_positions = new ArrayList();
956 ArrayList highlight_end_positions = new ArrayList();
957
958 // Deal with phrases now
959 ArrayList partial_phrase_matches = new ArrayList();
960 for (int i = 0; i < word_matches.size(); i++)
961 {
962 WordMatch word_match = (WordMatch) word_matches.get(i);
963
964 // See if any partial phrase matches are extended by this word
965 if (word_match.preceding_word_matched)
966 {
967 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
968 {
969 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
970 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
971 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
972 if (phrase_query_p_term_x_variants.contains(word_match.word))
973 {
974 partial_phrase_match.num_words_matched++;
975
976 // Has a complete phrase match occurred?
977 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
978 {
979 // Check for overlaps by looking at the previous highlight range
980 if (!highlight_end_positions.isEmpty())
981 {
982 int last_highlight_index = highlight_end_positions.size() - 1;
983 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
984 if (last_highlight_end > partial_phrase_match.start_position)
985 {
986 // There is an overlap, so remove the previous phrase match
987 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
988 highlight_end_positions.remove(last_highlight_index);
989 partial_phrase_match.start_position = last_highlight_start;
990 }
991 }
992
993 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
994 highlight_end_positions.add(new Integer(word_match.end_position));
995 }
996 // No, but add the partial match back into the list for next time
997 else
998 {
999 partial_phrase_matches.add(partial_phrase_match);
1000 }
1001 }
1002 }
1003 }
1004 else
1005 {
1006 partial_phrase_matches.clear();
1007 }
1008
1009 // See if this word is at the start of any of the phrases
1010 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1011 {
1012 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1013 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1014 if (phrase_query_p_term_1_variants.contains(word_match.word))
1015 {
1016 // If this phrase is just one word long, we have a complete match
1017 if (phrase_query_p_term_variants_list.size() == 1)
1018 {
1019 highlight_start_positions.add(new Integer(word_match.start_position));
1020 highlight_end_positions.add(new Integer(word_match.end_position));
1021 }
1022 // Otherwise we have the start of a potential phrase match
1023 else
1024 {
1025 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1026 }
1027 }
1028 }
1029 }
1030
1031 // Now add the annotation tags into the document at the correct points
1032 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1033
1034 int last_wrote = 0;
1035 for (int i = 0; i < highlight_start_positions.size(); i++)
1036 {
1037 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1038 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1039
1040 // Print anything before the highlight range
1041 if (last_wrote < highlight_start)
1042 {
1043 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1044 content_element.appendChild(this.doc.createTextNode(preceding_text));
1045 }
1046
1047 // Print the highlight text, annotated
1048 if (highlight_end > last_wrote)
1049 {
1050 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1051 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1052 annotation_element.setAttribute("type", "query_term");
1053 content_element.appendChild(annotation_element);
1054 last_wrote = highlight_end;
1055 }
1056 }
1057
1058 // Finish off any unwritten text
1059 if (last_wrote < content_characters.length)
1060 {
1061 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1062 content_element.appendChild(this.doc.createTextNode(remaining_text));
1063 }
1064
1065 return content_element;
1066 }
1067
1068 static private class WordMatch
1069 {
1070 public String word;
1071 public int start_position;
1072 public int end_position;
1073 public boolean preceding_word_matched;
1074
1075 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1076 {
1077 this.word = word;
1078 this.start_position = start_position;
1079 this.end_position = end_position;
1080 this.preceding_word_matched = preceding_word_matched;
1081 }
1082 }
1083
1084 static private class PartialPhraseMatch
1085 {
1086 public int start_position;
1087 public int query_phrase_number;
1088 public int num_words_matched;
1089
1090 public PartialPhraseMatch(int start_position, int query_phrase_number)
1091 {
1092 this.start_position = start_position;
1093 this.query_phrase_number = query_phrase_number;
1094 this.num_words_matched = 1;
1095 }
1096 }
1097}
Note: See TracBrowser for help on using the repository browser.