source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 26044

Last change on this file since 26044 was 26044, checked in by kjdon, 12 years ago

removed a couple of debug print statements

  • Property svn:keywords set to Author Date Id Revision
File size: 42.7 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37import java.io.Serializable;
38
39import org.apache.log4j.*;
40
41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
61 protected boolean highlight_query_terms = false;
62
63 public boolean configure()
64 {
65 super.configure();
66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
69 highlight_query_terms = true;
70 }
71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
74 provide_annotations = true;
75 }
76 return true;
77 }
78
79 public Node process(Node message_node)
80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83 Element message = this.converter.nodeToElement(message_node);
84
85 // the response
86 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88 result.appendChild(page_response);
89
90 // get the request - assume only one
91 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
93 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
94
95 // just in case there are some that need to get passed to the services
96 HashMap service_params = (HashMap) params.get("s0");
97
98 String collection = (String) params.get(GSParams.COLLECTION);
99 String document_id = (String) params.get(GSParams.DOCUMENT);
100 if (document_id != null && document_id.equals(""))
101 {
102 document_id = null;
103 }
104 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
105 if (href != null && href.equals(""))
106 {
107 href = null;
108 }
109 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110 if (document_id == null && href == null)
111 {
112 logger.error("no document specified!");
113 return result;
114 }
115 if (rl != null && rl.equals("0"))
116 {
117 // this is a true external link, we should have been directed to a different page or action
118 logger.error("rl value was 0, shouldn't get here");
119 return result;
120 }
121 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
122 if (document_type != null && document_type.equals(""))
123 {
124 //document_type = "hierarchy";
125 document_type = null; // we'll get it later if not already specified
126 }
127 //whether to retrieve siblings or not
128 boolean get_siblings = false;
129 String sibs = (String) params.get(SIBLING_ARG);
130 if (sibs != null && sibs.equals("1"))
131 {
132 get_siblings = true;
133 }
134
135 String doc_id_modifier = "";
136 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
137 if (sibling_num != null && !sibling_num.equals(""))
138 {
139 // we have to modify the doc name
140 doc_id_modifier = "." + sibling_num + ".ss";
141 }
142
143 boolean expand_document = false;
144 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
145 if (ed_arg != null && ed_arg.equals("1"))
146 {
147 expand_document = true;
148 }
149
150 boolean expand_contents = false;
151 if (expand_document)
152 { // we always expand the contents with the text
153 expand_contents = true;
154 }
155 else
156 {
157 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
158 if (ec_arg != null && ec_arg.equals("1"))
159 {
160 expand_contents = true;
161 }
162 }
163
164 UserContext userContext = new UserContext(request);
165
166 //append site metadata
167 addSiteMetadata(page_response, userContext);
168 addInterfaceOptions(page_response);
169
170 // get the additional data needed for the page
171 getBackgroundData(page_response, collection, userContext);
172 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
173
174 // the_document is where all the doc info - structure and metadata etc
175 // is added into, to be returned in the page
176 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
177 page_response.appendChild(the_document);
178
179 // create a basic doc list containing the current node
180 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
181 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
182 basic_doc_list.appendChild(current_doc);
183 if (document_id != null)
184 {
185 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
186 }
187 else
188 {
189 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
190 // do we need this??
191 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
192 }
193
194 if (document_type == null)
195 {
196 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
197 }
198 if (document_type != null)
199 {
200 // set the doctype from the cgi arg or from the server as an attribute
201 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
202 }
203 else
204 {
205 logger.error("doctype is null!!!***********");
206 }
207
208 // Create a parameter list to specify the required structure information
209 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
210
211 if (service_params != null)
212 {
213 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
214 }
215
216 Element ds_param = null;
217 boolean get_structure = false;
218 boolean get_structure_info = false;
219 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
220 {
221 get_structure_info = true;
222
223 if (expand_contents)
224 {
225 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
226 ds_param_list.appendChild(ds_param);
227 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
228 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
229 }
230
231 // get the info needed for paged naviagtion
232 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
233 ds_param_list.appendChild(ds_param);
234 ds_param.setAttribute(GSXML.NAME_ATT, "info");
235 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
236 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
237 ds_param_list.appendChild(ds_param);
238 ds_param.setAttribute(GSXML.NAME_ATT, "info");
239 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
240 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
241 ds_param_list.appendChild(ds_param);
242 ds_param.setAttribute(GSXML.NAME_ATT, "info");
243 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
244
245 if (get_siblings)
246 {
247 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
248 ds_param_list.appendChild(ds_param);
249 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
250 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
251 }
252
253 }
254 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) ||document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY) )
255 {
256 get_structure = true;
257 if (expand_contents)
258 {
259 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
260 ds_param_list.appendChild(ds_param);
261 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
262 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
263 }
264 else
265 {
266 // get the info needed for table of contents
267 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
268 ds_param_list.appendChild(ds_param);
269 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
270 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
271 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
272 ds_param_list.appendChild(ds_param);
273 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
274 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
275 if (get_siblings)
276 {
277 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
278 ds_param_list.appendChild(ds_param);
279 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
280 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
281 }
282 }
283 }
284 else
285 {
286 // we dont need any structure
287 }
288
289 boolean has_dummy = false;
290 if (get_structure || get_structure_info)
291 {
292
293 // Build a request to obtain the document structure
294 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
295 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
296 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
297 ds_message.appendChild(ds_request);
298 ds_request.appendChild(ds_param_list);
299
300 // add the node list we created earlier
301 ds_request.appendChild(basic_doc_list);
302
303 // Process the document structure retrieve message
304 Element ds_response_message = (Element) this.mr.process(ds_message);
305 if (processErrorElements(ds_response_message, page_response))
306 {
307 return result;
308 }
309
310 // get the info and print out
311 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
312 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
313 path = GSPath.appendLink(path, "nodeStructureInfo");
314 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
315 // get the doc_node bit
316 if (ds_response_struct_info != null)
317 {
318 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
319 }
320 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
321 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
322 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
323 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
324
325 if (ds_response_structure != null)
326 {
327 // add the contents of the structure bit into the_document
328 NodeList structs = ds_response_structure.getChildNodes();
329 for (int i = 0; i < structs.getLength(); i++)
330 {
331 the_document.appendChild(this.doc.importNode(structs.item(i), true));
332 }
333 }
334 else
335 {
336 // no structure nodes, so put in a dummy doc node
337 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
338 if (document_id != null)
339 {
340 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
341 }
342 else
343 {
344 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
345
346 }
347 the_document.appendChild(doc_node);
348 has_dummy = true;
349 }
350 }
351 else
352 { // a simple type - we dont have a dummy node for simple
353 // should think about this more
354 // no structure request, so just put in a dummy doc node
355 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
356 if (document_id != null)
357 {
358 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
359 }
360 else
361 {
362 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
363 }
364 the_document.appendChild(doc_node);
365 has_dummy = true;
366 }
367
368 // Build a request to obtain some document metadata
369 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
370 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
371 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
372 dm_message.appendChild(dm_request);
373 // Create a parameter list to specify the required metadata information
374
375 HashSet<String> meta_names = new HashSet<String>();
376 meta_names.add("Title"); // the default
377 if (format_elem != null)
378 {
379 getRequiredMetadataNames(format_elem, meta_names);
380 }
381
382 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
383 if(extraMetaListElem != null)
384 {
385 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
386 for(int i = 0; i < extraMetaList.getLength(); i++)
387 {
388 meta_names.add(((Element)extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
389 }
390 }
391
392 Element dm_param_list = createMetadataParamList(meta_names);
393 if (service_params != null)
394 {
395 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
396 }
397
398 dm_request.appendChild(dm_param_list);
399
400 // create the doc node list for the metadata request
401 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
402 dm_request.appendChild(dm_doc_list);
403
404 // Add each node from the structure response into the metadata request
405 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
406 for (int i = 0; i < doc_nodes.getLength(); i++)
407 {
408 Element doc_node = (Element) doc_nodes.item(i);
409 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
410
411 // Add the documentNode to the list
412 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
413 dm_doc_list.appendChild(dm_doc_node);
414 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
415 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
416 }
417
418 // we also want a metadata request to the top level document to get
419 // assocfilepath - this could be cached too
420 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
421 dm_message.appendChild(doc_meta_request);
422 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
423 if (service_params != null)
424 {
425 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
426 }
427
428 doc_meta_request.appendChild(doc_meta_param_list);
429 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
430 doc_meta_param_list.appendChild(doc_param);
431 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
432 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
433
434 // create the doc node list for the metadata request
435 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
436 doc_meta_request.appendChild(doc_list);
437
438 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
439 // the node we want is the root document node
440 if (document_id != null)
441 {
442 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
443 }
444 else
445 {
446 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
447 // can we assume that href is always a top level doc??
448 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
449 //doc_node.setAttribute("externalURL", has_rl);
450 }
451 doc_list.appendChild(doc_node);
452
453 Element dm_response_message = (Element) this.mr.process(dm_message);
454 if (processErrorElements(dm_response_message, page_response))
455 {
456 return result;
457 }
458
459 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
460 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
461
462 // Merge the metadata with the structure information
463 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
464 for (int i = 0; i < doc_nodes.getLength(); i++)
465 {
466 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
467 }
468 // get the top level doc metadata out
469 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
470 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
471 GSXML.mergeMetadataLists(the_document, top_doc_node);
472
473 // Build a request to obtain some document content
474 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
475 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
476 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
477 dc_message.appendChild(dc_request);
478
479 // Create a parameter list to specify the request parameters - empty for now
480 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
481 if (service_params != null)
482 {
483 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
484 }
485
486 dc_request.appendChild(dc_param_list);
487
488 // get the content
489 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
490 if (expand_document)
491 {
492 dc_request.appendChild(dm_doc_list);
493 }
494 else
495 {
496 dc_request.appendChild(basic_doc_list);
497 }
498 logger.debug("request = " + XMLConverter.getString(dc_message));
499 Element dc_response_message = (Element) this.mr.process(dc_message);
500 if (processErrorElements(dc_response_message, page_response))
501 {
502 return result;
503 }
504
505 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
506
507 if (expand_document)
508 {
509 // Merge the content with the structure information
510 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
511 for (int i = 0; i < doc_nodes.getLength(); i++)
512 {
513 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
514 if (content != null)
515 {
516 if (highlight_query_terms)
517 {
518 content = highlightQueryTerms(request, (Element) content);
519 }
520 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
521 }
522 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
523 }
524 }
525 else
526 {
527 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
528 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
529 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
530 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
531
532 if (dc_response_doc_content == null)
533 {
534 // no content to add
535 if (dc_response_doc.getAttribute("external").equals("true"))
536 {
537
538 //if (dc_response_doc_external != null)
539 //{
540 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
541
542 the_document.setAttribute("selectedNode", href_id);
543 the_document.setAttribute("external", href_id);
544 }
545 return result;
546 }
547 if (highlight_query_terms)
548 {
549 dc_response_doc.removeChild(dc_response_doc_content);
550
551 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
552 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
553 }
554
555 if (provide_annotations)
556 {
557 String service_selected = (String) params.get(ENRICH_DOC_ARG);
558 if (service_selected != null && service_selected.equals("1"))
559 {
560 // now we can modifiy the response doc if needed
561 String enrich_service = (String) params.get(GSParams.SERVICE);
562 // send a message to the service
563 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
564 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
565 enrich_message.appendChild(enrich_request);
566 // check for parameters
567 HashMap e_service_params = (HashMap) params.get("s1");
568 if (e_service_params != null)
569 {
570 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
571 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
572 enrich_request.appendChild(enrich_pl);
573 }
574 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
575 enrich_request.appendChild(e_doc_list);
576 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
577
578 Node enrich_response = this.mr.process(enrich_message);
579
580 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
581 path = GSPath.createPath(links);
582 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
583
584 }
585 } // if provide_annotations
586
587 // use the returned id rather than the sent one cos there may have
588 // been modifiers such as .pr that are removed.
589 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
590 the_document.setAttribute("selectedNode", modified_doc_id);
591 if (has_dummy)
592 {
593 // change the id if necessary and add the content
594 Element dummy_node = (Element) doc_nodes.item(0);
595
596 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
597 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
598 // hack for simple type
599 if (document_type.equals("simple"))
600 {
601 // we dont want the internal docNode, just want the content and metadata in the document
602 // rethink this!!
603 the_document.removeChild(dummy_node);
604
605 NodeList dummy_children = dummy_node.getChildNodes();
606 //for (int i=0; i<dummy_children.getLength(); i++) {
607 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
608 {
609 // special case as we don't want more than one metadata list
610 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
611 {
612 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
613 }
614 else
615 {
616 the_document.appendChild(dummy_children.item(i));
617 }
618 }
619 }
620 }
621 else
622 {
623 // Merge the document content with the metadata and structure information
624 for (int i = 0; i < doc_nodes.getLength(); i++)
625 {
626 Node dn = doc_nodes.item(i);
627 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
628 if (dn_id.equals(modified_doc_id))
629 {
630 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
631 break;
632 }
633 }
634 }
635 }
636 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
637 return result;
638 }
639
640 /**
641 * tell the param class what its arguments are if an action has its own
642 * arguments, this should add them to the params object - particularly
643 * important for args that should not be saved
644 */
645 public boolean addActionParameters(GSParams params)
646 {
647 params.addParameter(GOTO_PAGE_ARG, false);
648 params.addParameter(ENRICH_DOC_ARG, false);
649 params.addParameter(EXPAND_DOCUMENT_ARG, false);
650 params.addParameter(EXPAND_CONTENTS_ARG, false);
651 params.addParameter(REALISTIC_BOOK_ARG, false);
652
653 return true;
654 }
655
656 /**
657 * this method gets the collection description, the format info, the list of
658 * enrich services, etc - stuff that is needed for the page, but is the same
659 * whatever the query is - should be cached
660 */
661 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
662 {
663
664 // create a message to process - contains requests for the collection
665 // description, the format element, the enrich services on offer
666 // these could all be cached
667 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
668 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
669 // the format request - ignore for now, where does this request go to??
670 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
671 info_message.appendChild(format_request);
672
673 // the enrich_services request - only do this if provide_annotations is true
674
675 if (provide_annotations)
676 {
677 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
678 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
679 info_message.appendChild(enrich_services_request);
680 }
681
682 Element info_response = (Element) this.mr.process(info_message);
683
684 // the collection is the first response
685 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
686 Element format_resp = (Element) responses.item(0);
687
688 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
689 if (format_elem != null)
690 {
691 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
692 if(global_format_elem != null)
693 {
694 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
695 }
696
697 // set the format type
698 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
699 page_response.appendChild(this.doc.importNode(format_elem, true));
700 }
701
702 if (provide_annotations)
703 {
704 Element services_resp = (Element) responses.item(1);
705
706 // a new message for the mr
707 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
708 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
709 boolean service_found = false;
710 for (int j = 0; j < e_services.getLength(); j++)
711 {
712 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
713 {
714 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
715 enrich_message.appendChild(s);
716 service_found = true;
717 }
718 }
719 if (service_found)
720 {
721 Element enrich_response = (Element) this.mr.process(enrich_message);
722
723 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
724 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
725 for (int i = 0; i < e_responses.getLength(); i++)
726 {
727 Element e_resp = (Element) e_responses.item(i);
728 Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
729 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
730 service_list.appendChild(e_service);
731 }
732 page_response.appendChild(service_list);
733 }
734 } // if provide_annotations
735 return true;
736
737 }
738
739 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
740 {
741 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
742 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
743 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
744 ds_message.appendChild(ds_request);
745
746 // Create a parameter list to specify the required structure information
747 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
748 Element ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
749 ds_param_list.appendChild(ds_param);
750 ds_param.setAttribute(GSXML.NAME_ATT, "info");
751 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
752
753 ds_request.appendChild(ds_param_list);
754
755 // add the node list we created earlier
756 ds_request.appendChild(basic_doc_list);
757
758 // Process the document structure retrieve message
759 Element ds_response_message = (Element) this.mr.process(ds_message);
760 if (processErrorElements(ds_response_message, page_response))
761 {
762 return null;
763 }
764
765 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
766 String path = GSPath.createPath(links);
767 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
768 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
769 if (doctype_elem != null)
770 {
771 String doc_type = doctype_elem.getAttribute("value");
772 return doc_type;
773 }
774 return null;
775 }
776
777 /**
778 * this involves a bit of a hack to get the equivalent query terms - has to
779 * requery the query service - uses the last selected service name. (if it
780 * ends in query). should this action do the query or should it send a
781 * message to the query action? but that will involve lots of extra stuff.
782 * also doesn't handle phrases properly - just highlights all the terms
783 * found in the text.
784 */
785 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
786 {
787 // do the query again to get term info
788 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
789 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
790
791 HashMap previous_params = (HashMap) params.get("p");
792 if (previous_params == null)
793 {
794 return dc_response_doc_content;
795 }
796 String service_name = (String) previous_params.get(GSParams.SERVICE);
797 if (service_name == null || !service_name.endsWith("Query"))
798 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
799 logger.debug("invalid service, not doing highlighting");
800 return dc_response_doc_content;
801 }
802 String collection = (String) params.get(GSParams.COLLECTION);
803 UserContext userContext = new UserContext(request);
804 String to = GSPath.appendLink(collection, service_name);
805
806 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
807 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
808 mr_query_message.appendChild(mr_query_request);
809
810 // paramList
811 HashMap service_params = (HashMap) params.get("s1");
812
813 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
814 GSXML.addParametersToList(this.doc, query_param_list, service_params);
815 mr_query_request.appendChild(query_param_list);
816
817 // do the query
818 Element mr_query_response = (Element) this.mr.process(mr_query_message);
819
820 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
821 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
822 if (query_term_list_element == null)
823 {
824 // no term info
825 logger.error("No query term information.\n");
826 return dc_response_doc_content;
827 }
828
829 String content = GSXML.getNodeText(dc_response_doc_content);
830
831 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
832 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
833
834 HashSet<String> query_term_variants = new HashSet<String>();
835 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
836 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
837 {
838 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
839 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
840 {
841 for (int i = 0; i < terms_nodelist.getLength(); i++)
842 {
843 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
844 String termValueU = null;
845 String termValueL = null;
846
847 if (termValue.length() > 1)
848 {
849 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
850 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
851 }
852 else
853 {
854 termValueU = termValue.substring(0, 1).toUpperCase();
855 termValueL = termValue.substring(0, 1).toLowerCase();
856 }
857
858 query_term_variants.add(termValueU);
859 query_term_variants.add(termValueL);
860 }
861 }
862 }
863 else
864 {
865 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
866 {
867 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
868 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
869 for (int j = 0; j < equivalent_terms.length; j++)
870 {
871 query_term_variants.add(equivalent_terms[j]);
872 }
873 }
874 }
875
876 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
877
878 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
879 String performed_query = GSXML.getNodeText(query_element) + " ";
880
881 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
882 int term_start = 0;
883 boolean in_term = false;
884 boolean in_phrase = false;
885 for (int i = 0; i < performed_query.length(); i++)
886 {
887 char character = performed_query.charAt(i);
888 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
889
890 // Has a query term just started?
891 if (in_term == false && is_character_letter_or_digit == true)
892 {
893 in_term = true;
894 term_start = i;
895 }
896
897 // Or has a term just finished?
898 else if (in_term == true && is_character_letter_or_digit == false)
899 {
900 in_term = false;
901 String term = performed_query.substring(term_start, i);
902
903 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
904 if (term_element != null)
905 {
906
907 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
908
909 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
910 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
911 {
912 String termValueU = null;
913 String termValueL = null;
914
915 if (term.length() > 1)
916 {
917 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
918 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
919 }
920 else
921 {
922 termValueU = term.substring(0, 1).toUpperCase();
923 termValueL = term.substring(0, 1).toLowerCase();
924 }
925
926 phrase_query_p_term_x_variants.add(termValueU);
927 phrase_query_p_term_x_variants.add(termValueL);
928 }
929 else
930 {
931 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
932 {
933 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
934 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
935 for (int k = 0; k < term_equivalent_terms.length; k++)
936 {
937 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
938 }
939 }
940 }
941 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
942
943 if (in_phrase == false)
944 {
945 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
946 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
947 }
948 }
949 }
950 // Watch for phrases (surrounded by quotes)
951 if (character == '\"')
952 {
953 // Has a phrase just started?
954 if (in_phrase == false)
955 {
956 in_phrase = true;
957 }
958 // Or has a phrase just finished?
959 else if (in_phrase == true)
960 {
961 in_phrase = false;
962 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
963 }
964
965 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
966 }
967 }
968
969 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
970 }
971
972 /**
973 * Highlights query terms in a piece of text.
974 */
975 private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
976 {
977 // Convert the content string to an array of characters for speed
978 char[] content_characters = new char[content.length()];
979 content.getChars(0, content.length(), content_characters, 0);
980
981 // Now skim through the content, identifying word matches
982 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
983 int word_start = 0;
984 boolean in_word = false;
985 boolean preceding_word_matched = false;
986 boolean inTag = false;
987 for (int i = 0; i < content_characters.length; i++)
988 {
989 //We don't want to find words inside HTML tags
990 if (content_characters[i] == '<')
991 {
992 inTag = true;
993 continue;
994 }
995 else if (inTag && content_characters[i] == '>')
996 {
997 inTag = false;
998 }
999 else if (inTag)
1000 {
1001 continue;
1002 }
1003
1004 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1005
1006 // Has a word just started?
1007 if (in_word == false && is_character_letter_or_digit == true)
1008 {
1009 in_word = true;
1010 word_start = i;
1011 }
1012
1013 // Or has a word just finished?
1014 else if (in_word == true && is_character_letter_or_digit == false)
1015 {
1016 in_word = false;
1017
1018 // Check if the word matches any of the query term equivalents
1019 String word = new String(content_characters, word_start, (i - word_start));
1020 if (query_term_variants.contains(word))
1021 {
1022 // We have found a matching word, so remember its location
1023 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1024 preceding_word_matched = true;
1025 }
1026 else
1027 {
1028 preceding_word_matched = false;
1029 }
1030 }
1031 }
1032
1033 // Don't forget the last word...
1034 if (in_word == true)
1035 {
1036 // Check if the word matches any of the query term equivalents
1037 String word = new String(content_characters, word_start, (content_characters.length - word_start));
1038 if (query_term_variants.contains(word))
1039 {
1040 // We have found a matching word, so remember its location
1041 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1042 }
1043 }
1044
1045 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1046 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1047
1048 // Deal with phrases now
1049 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1050 for (int i = 0; i < word_matches.size(); i++)
1051 {
1052 WordMatch word_match = word_matches.get(i);
1053
1054 // See if any partial phrase matches are extended by this word
1055 if (word_match.preceding_word_matched)
1056 {
1057 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1058 {
1059 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1060 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1061 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1062 if (phrase_query_p_term_x_variants.contains(word_match.word))
1063 {
1064 partial_phrase_match.num_words_matched++;
1065
1066 // Has a complete phrase match occurred?
1067 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1068 {
1069 // Check for overlaps by looking at the previous highlight range
1070 if (!highlight_end_positions.isEmpty())
1071 {
1072 int last_highlight_index = highlight_end_positions.size() - 1;
1073 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1074 if (last_highlight_end > partial_phrase_match.start_position)
1075 {
1076 // There is an overlap, so remove the previous phrase match
1077 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1078 highlight_end_positions.remove(last_highlight_index);
1079 partial_phrase_match.start_position = last_highlight_start;
1080 }
1081 }
1082
1083 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1084 highlight_end_positions.add(new Integer(word_match.end_position));
1085 }
1086 // No, but add the partial match back into the list for next time
1087 else
1088 {
1089 partial_phrase_matches.add(partial_phrase_match);
1090 }
1091 }
1092 }
1093 }
1094 else
1095 {
1096 partial_phrase_matches.clear();
1097 }
1098
1099 // See if this word is at the start of any of the phrases
1100 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1101 {
1102 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1103 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1104 if (phrase_query_p_term_1_variants.contains(word_match.word))
1105 {
1106 // If this phrase is just one word long, we have a complete match
1107 if (phrase_query_p_term_variants_list.size() == 1)
1108 {
1109 highlight_start_positions.add(new Integer(word_match.start_position));
1110 highlight_end_positions.add(new Integer(word_match.end_position));
1111 }
1112 // Otherwise we have the start of a potential phrase match
1113 else
1114 {
1115 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1116 }
1117 }
1118 }
1119 }
1120
1121 // Now add the annotation tags into the document at the correct points
1122 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1123
1124 int last_wrote = 0;
1125 for (int i = 0; i < highlight_start_positions.size(); i++)
1126 {
1127 int highlight_start = highlight_start_positions.get(i).intValue();
1128 int highlight_end = highlight_end_positions.get(i).intValue();
1129
1130 // Print anything before the highlight range
1131 if (last_wrote < highlight_start)
1132 {
1133 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1134 content_element.appendChild(this.doc.createTextNode(preceding_text));
1135 }
1136
1137 // Print the highlight text, annotated
1138 if (highlight_end > last_wrote)
1139 {
1140 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1141 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1142 annotation_element.setAttribute("type", "query_term");
1143 content_element.appendChild(annotation_element);
1144 last_wrote = highlight_end;
1145 }
1146 }
1147
1148 // Finish off any unwritten text
1149 if (last_wrote < content_characters.length)
1150 {
1151 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1152 content_element.appendChild(this.doc.createTextNode(remaining_text));
1153 }
1154
1155 return content_element;
1156 }
1157
1158 static private class WordMatch
1159 {
1160 public String word;
1161 public int start_position;
1162 public int end_position;
1163 public boolean preceding_word_matched;
1164
1165 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1166 {
1167 this.word = word;
1168 this.start_position = start_position;
1169 this.end_position = end_position;
1170 this.preceding_word_matched = preceding_word_matched;
1171 }
1172 }
1173
1174 static private class PartialPhraseMatch
1175 {
1176 public int start_position;
1177 public int query_phrase_number;
1178 public int num_words_matched;
1179
1180 public PartialPhraseMatch(int start_position, int query_phrase_number)
1181 {
1182 this.start_position = start_position;
1183 this.query_phrase_number = query_phrase_number;
1184 this.num_words_matched = 1;
1185 }
1186 }
1187}
Note: See TracBrowser for help on using the repository browser.