source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 29521

Last change on this file since 29521 was 29521, checked in by kjdon, 9 years ago

get the format element and look for parameter defaults in it. If we have a simple doc, and have a dummy doc (could be if ed=1 has been set as a default for the collection), then remove the dummy node.

  • Property svn:keywords set to Author Date Id Revision
File size: 44.4 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37import java.io.Serializable;
38
39import org.apache.log4j.*;
40
41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
61 protected boolean highlight_query_terms = false;
62
63 public boolean configure()
64 {
65 super.configure();
66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
69 highlight_query_terms = true;
70 }
71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
74 provide_annotations = true;
75 }
76 return true;
77 }
78
79 public Node process(Node message_node)
80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83 Element message = GSXML.nodeToElement(message_node);
84 Document doc = message.getOwnerDocument();
85
86 // the response
87 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
88 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
89 result.appendChild(page_response);
90
91 // get the request - assume only one
92 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
93 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
94 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
95
96 // just in case there are some that need to get passed to the services
97 HashMap service_params = (HashMap) params.get("s0");
98
99 String collection = (String) params.get(GSParams.COLLECTION);
100 String document_id = (String) params.get(GSParams.DOCUMENT);
101 if (document_id != null && document_id.equals(""))
102 {
103 document_id = null;
104 }
105 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
106 if (href != null && href.equals(""))
107 {
108 href = null;
109 }
110 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
111 if (document_id == null && href == null)
112 {
113 logger.error("no document specified!");
114 return result;
115 }
116 if (rl != null && rl.equals("0"))
117 {
118 // this is a true external link, we should have been directed to a different page or action
119 logger.error("rl value was 0, shouldn't get here");
120 return result;
121 }
122
123 UserContext userContext = new UserContext(request);
124
125 //append site metadata
126 addSiteMetadata(page_response, userContext);
127 addInterfaceOptions(page_response);
128
129 // get the additional data needed for the page
130 getBackgroundData(page_response, collection, userContext);
131 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
132
133 if (format_elem != null) {
134 // lets look for param defaults set in config file
135 NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
136 for (int i=0; i<param_defaults.getLength(); i++) {
137 Element p = (Element)param_defaults.item(i);
138 String name = p.getAttribute(GSXML.NAME_ATT);
139 if (params.get(name) ==null) {
140 // wasn't set from interface
141 String value = p.getAttribute(GSXML.VALUE_ATT);
142 params.put(name, value );
143 // also add into request param xml so that xslt knows it too
144 GSXML.addParameterToList(cgi_paramList, name, value);
145 }
146 }
147 }
148 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
149 if (document_type != null && document_type.equals(""))
150 {
151 //document_type = "hierarchy";
152 document_type = null; // we'll get it later if not already specified
153 }
154 //whether to retrieve siblings or not
155 boolean get_siblings = false;
156 String sibs = (String) params.get(SIBLING_ARG);
157 if (sibs != null && sibs.equals("1"))
158 {
159 get_siblings = true;
160 }
161
162 String doc_id_modifier = "";
163 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
164 if (sibling_num != null && !sibling_num.equals(""))
165 {
166 // we have to modify the doc name
167 doc_id_modifier = "." + sibling_num + ".ss";
168 }
169
170 boolean expand_document = false;
171 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
172 if (ed_arg != null && ed_arg.equals("1"))
173 {
174 expand_document = true;
175 }
176
177 boolean expand_contents = false;
178 if (expand_document)
179 { // we always expand the contents with the text
180 expand_contents = true;
181 }
182 else
183 {
184 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
185 if (ec_arg != null && ec_arg.equals("1"))
186 {
187 expand_contents = true;
188 }
189 }
190
191 // UserContext userContext = new UserContext(request);
192
193 // //append site metadata
194 // addSiteMetadata(page_response, userContext);
195 // addInterfaceOptions(page_response);
196
197 // // get the additional data needed for the page
198 // getBackgroundData(page_response, collection, userContext);
199 // Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
200
201 // the_document is where all the doc info - structure and metadata etc
202 // is added into, to be returned in the page
203 Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
204 page_response.appendChild(the_document);
205
206 // create a basic doc list containing the current node
207 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
208 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
209 basic_doc_list.appendChild(current_doc);
210 if (document_id != null)
211 {
212 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
213 }
214 else
215 {
216 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
217 // do we need this??
218 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
219 }
220
221 if (document_type == null)
222 {
223 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
224 }
225 if (document_type == null)
226 {
227 logger.error("doctype is null!!!***********");
228 document_type = GSXML.DOC_TYPE_SIMPLE;
229 }
230
231 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
232
233
234 // Create a parameter list to specify the required structure information
235 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
236
237 if (service_params != null)
238 {
239 GSXML.addParametersToList(ds_param_list, service_params);
240 }
241
242 Element ds_param = null;
243 boolean get_structure = false;
244 boolean get_structure_info = false;
245 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
246 {
247 get_structure_info = true;
248
249 if (expand_contents)
250 {
251 ds_param = doc.createElement(GSXML.PARAM_ELEM);
252 ds_param_list.appendChild(ds_param);
253 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
254 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
255 }
256
257 // get the info needed for paged naviagtion
258 ds_param = doc.createElement(GSXML.PARAM_ELEM);
259 ds_param_list.appendChild(ds_param);
260 ds_param.setAttribute(GSXML.NAME_ATT, "info");
261 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
262 ds_param = doc.createElement(GSXML.PARAM_ELEM);
263 ds_param_list.appendChild(ds_param);
264 ds_param.setAttribute(GSXML.NAME_ATT, "info");
265 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
266 ds_param = doc.createElement(GSXML.PARAM_ELEM);
267 ds_param_list.appendChild(ds_param);
268 ds_param.setAttribute(GSXML.NAME_ATT, "info");
269 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
270
271 if (get_siblings)
272 {
273 ds_param = doc.createElement(GSXML.PARAM_ELEM);
274 ds_param_list.appendChild(ds_param);
275 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
277 }
278
279 }
280 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) || document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
281 {
282 get_structure = true;
283 if (expand_contents)
284 {
285 ds_param = doc.createElement(GSXML.PARAM_ELEM);
286 ds_param_list.appendChild(ds_param);
287 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
288 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
289 }
290 else
291 {
292 // get the info needed for table of contents
293 ds_param = doc.createElement(GSXML.PARAM_ELEM);
294 ds_param_list.appendChild(ds_param);
295 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
296 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
297 ds_param = doc.createElement(GSXML.PARAM_ELEM);
298 ds_param_list.appendChild(ds_param);
299 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
300 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
301 if (get_siblings)
302 {
303 ds_param = doc.createElement(GSXML.PARAM_ELEM);
304 ds_param_list.appendChild(ds_param);
305 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
306 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
307 }
308 }
309 }
310 else
311 {
312 // we dont need any structure
313 }
314
315 boolean has_dummy = false;
316 if (get_structure || get_structure_info)
317 {
318
319 // Build a request to obtain the document structure
320 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
321 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
322 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
323 ds_message.appendChild(ds_request);
324 ds_request.appendChild(ds_param_list);
325
326 // add the node list we created earlier
327 ds_request.appendChild(basic_doc_list);
328
329 // Process the document structure retrieve message
330 Element ds_response_message = (Element) this.mr.process(ds_message);
331 if (processErrorElements(ds_response_message, page_response))
332 {
333 return result;
334 }
335
336 // get the info and print out
337 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
338 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
339 path = GSPath.appendLink(path, "nodeStructureInfo");
340 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
341 // get the doc_node bit
342 if (ds_response_struct_info != null)
343 {
344 the_document.appendChild(doc.importNode(ds_response_struct_info, true));
345 }
346 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
347 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
348 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
349 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
350
351 if (ds_response_structure != null)
352 {
353 // add the contents of the structure bit into the_document
354 NodeList structs = ds_response_structure.getChildNodes();
355 for (int i = 0; i < structs.getLength(); i++)
356 {
357 the_document.appendChild(doc.importNode(structs.item(i), true));
358 }
359 }
360 else
361 {
362 // no structure nodes, so put in a dummy doc node
363 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
364 if (document_id != null)
365 {
366 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
367 }
368 else
369 {
370 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
371
372 }
373 the_document.appendChild(doc_node);
374 has_dummy = true;
375 }
376 }
377 else
378 { // a simple type - we dont have a dummy node for simple
379 // should think about this more
380 // no structure request, so just put in a dummy doc node
381 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
382 if (document_id != null)
383 {
384 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
385 }
386 else
387 {
388 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
389 }
390 the_document.appendChild(doc_node);
391 has_dummy = true;
392 }
393
394 // Build a request to obtain some document metadata
395 Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
396 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
397 Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
398 dm_message.appendChild(dm_request);
399 // Create a parameter list to specify the required metadata information
400
401 HashSet<String> meta_names = new HashSet<String>();
402 meta_names.add("Title"); // the default
403 if (format_elem != null)
404 {
405 getRequiredMetadataNames(format_elem, meta_names);
406 }
407
408 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
409 if (extraMetaListElem != null)
410 {
411 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
412 for (int i = 0; i < extraMetaList.getLength(); i++)
413 {
414 meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
415 }
416 }
417
418 Element dm_param_list = createMetadataParamList(doc,meta_names);
419 if (service_params != null)
420 {
421 GSXML.addParametersToList(dm_param_list, service_params);
422 }
423
424 dm_request.appendChild(dm_param_list);
425
426 // create the doc node list for the metadata request
427 Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428 dm_request.appendChild(dm_doc_list);
429
430 // Add each node from the structure response into the metadata request
431 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
432 for (int i = 0; i < doc_nodes.getLength(); i++)
433 {
434 Element doc_node = (Element) doc_nodes.item(i);
435 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
436
437 // Add the documentNode to the list
438 Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
439 dm_doc_list.appendChild(dm_doc_node);
440 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
441 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
442 }
443
444 // we also want a metadata request to the top level document to get
445 // assocfilepath - this could be cached too
446 Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
447 dm_message.appendChild(doc_meta_request);
448 Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
449 if (service_params != null)
450 {
451 GSXML.addParametersToList(doc_meta_param_list, service_params);
452 }
453
454 doc_meta_request.appendChild(doc_meta_param_list);
455 Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
456 doc_meta_param_list.appendChild(doc_param);
457 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
458 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
459
460 // create the doc node list for the metadata request
461 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
462 doc_meta_request.appendChild(doc_list);
463
464 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
465 // the node we want is the root document node
466 if (document_id != null)
467 {
468 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
469 }
470 else
471 {
472 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
473 // can we assume that href is always a top level doc??
474 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
475 //doc_node.setAttribute("externalURL", has_rl);
476 }
477 doc_list.appendChild(doc_node);
478
479 Element dm_response_message = (Element) this.mr.process(dm_message);
480 if (processErrorElements(dm_response_message, page_response))
481 {
482 return result;
483 }
484
485 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
486 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
487
488 // Merge the metadata with the structure information
489 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
490 for (int i = 0; i < doc_nodes.getLength(); i++)
491 {
492 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
493 }
494 // get the top level doc metadata out
495 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
496 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
497 GSXML.mergeMetadataLists(the_document, top_doc_node);
498
499 // Build a request to obtain some document content
500 Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
501 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
502 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
503 dc_message.appendChild(dc_request);
504
505 // Create a parameter list to specify the request parameters - empty for now
506 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
507 if (service_params != null)
508 {
509 GSXML.addParametersToList(dc_param_list, service_params);
510 }
511
512 dc_request.appendChild(dc_param_list);
513
514 // get the content
515 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
516 if (expand_document)
517 {
518 dc_request.appendChild(dm_doc_list);
519 }
520 else
521 {
522 dc_request.appendChild(basic_doc_list);
523 }
524 logger.debug("request = " + XMLConverter.getString(dc_message));
525 Element dc_response_message = (Element) this.mr.process(dc_message);
526 if (processErrorElements(dc_response_message, page_response))
527 {
528 return result;
529 }
530
531 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
532
533 if (expand_document)
534 {
535 // Merge the content with the structure information
536 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
537 for (int i = 0; i < doc_nodes.getLength(); i++)
538 {
539 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
540 if (content != null)
541 {
542 if (highlight_query_terms)
543 {
544 content = highlightQueryTerms(request, (Element) content);
545 }
546 doc_nodes.item(i).appendChild(doc.importNode(content, true));
547 }
548 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
549 }
550 if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
551 Element dummy_node = (Element) doc_nodes.item(0);
552 the_document.removeChild(dummy_node);
553 the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
554 NodeList dummy_children = dummy_node.getChildNodes();
555 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
556 {
557 // special case as we don't want more than one metadata list
558 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
559 {
560 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
561 }
562 else
563 {
564 the_document.appendChild(dummy_children.item(i));
565 }
566 }
567 }
568 }
569 else
570 {
571 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
572 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
573 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
574 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
575
576 if (dc_response_doc_content == null)
577 {
578 // no content to add
579 if (dc_response_doc.getAttribute("external").equals("true"))
580 {
581
582 //if (dc_response_doc_external != null)
583 //{
584 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
585
586 the_document.setAttribute("selectedNode", href_id);
587 the_document.setAttribute("external", href_id);
588 }
589 return result;
590 }
591 if (highlight_query_terms)
592 {
593 dc_response_doc.removeChild(dc_response_doc_content);
594
595 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
596 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
597 }
598
599 if (provide_annotations)
600 {
601 String service_selected = (String) params.get(ENRICH_DOC_ARG);
602 if (service_selected != null && service_selected.equals("1"))
603 {
604 // now we can modifiy the response doc if needed
605 String enrich_service = (String) params.get(GSParams.SERVICE);
606 // send a message to the service
607 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
608 Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
609 enrich_message.appendChild(enrich_request);
610 // check for parameters
611 HashMap e_service_params = (HashMap) params.get("s1");
612 if (e_service_params != null)
613 {
614 Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
615 GSXML.addParametersToList(enrich_pl, e_service_params);
616 enrich_request.appendChild(enrich_pl);
617 }
618 Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
619 enrich_request.appendChild(e_doc_list);
620 e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
621
622 Node enrich_response = this.mr.process(enrich_message);
623
624 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
625 path = GSPath.createPath(links);
626 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
627
628 }
629 } // if provide_annotations
630
631 // use the returned id rather than the sent one cos there may have
632 // been modifiers such as .pr that are removed.
633 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
634 the_document.setAttribute("selectedNode", modified_doc_id);
635 if (has_dummy)
636 {
637 // change the id if necessary and add the content
638 Element dummy_node = (Element) doc_nodes.item(0);
639
640 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
641 dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
642 // hack for simple type
643 if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
644 {
645 // we dont want the internal docNode, just want the content and metadata in the document
646 // rethink this!!
647 the_document.removeChild(dummy_node);
648
649 NodeList dummy_children = dummy_node.getChildNodes();
650 //for (int i=0; i<dummy_children.getLength(); i++) {
651 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
652 {
653 // special case as we don't want more than one metadata list
654 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
655 {
656 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
657 }
658 else
659 {
660 the_document.appendChild(dummy_children.item(i));
661 }
662 }
663 }
664
665 the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
666 }
667 else
668 {
669 // Merge the document content with the metadata and structure information
670 for (int i = 0; i < doc_nodes.getLength(); i++)
671 {
672 Node dn = doc_nodes.item(i);
673 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
674 if (dn_id.equals(modified_doc_id))
675 {
676 dn.appendChild(doc.importNode(dc_response_doc_content, true));
677 break;
678 }
679 }
680 }
681 }
682 //logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
683 return result;
684 }
685
686 /**
687 * tell the param class what its arguments are if an action has its own
688 * arguments, this should add them to the params object - particularly
689 * important for args that should not be saved
690 */
691 public boolean addActionParameters(GSParams params)
692 {
693 params.addParameter(GOTO_PAGE_ARG, false);
694 params.addParameter(ENRICH_DOC_ARG, false);
695 params.addParameter(EXPAND_DOCUMENT_ARG, false);
696 params.addParameter(EXPAND_CONTENTS_ARG, false);
697 params.addParameter(REALISTIC_BOOK_ARG, false);
698
699 return true;
700 }
701
702 /**
703 * this method gets the collection description, the format info, the list of
704 * enrich services, etc - stuff that is needed for the page, but is the same
705 * whatever the query is - should be cached
706 */
707 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
708 {
709 Document doc = page_response.getOwnerDocument();
710
711 // create a message to process - contains requests for the collection
712 // description, the format element, the enrich services on offer
713 // these could all be cached
714 Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
715 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
716 // the format request - ignore for now, where does this request go to??
717 Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
718 info_message.appendChild(format_request);
719
720 // the enrich_services request - only do this if provide_annotations is true
721
722 if (provide_annotations)
723 {
724 Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
725 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
726 info_message.appendChild(enrich_services_request);
727 }
728
729 Element info_response = (Element) this.mr.process(info_message);
730
731 // the collection is the first response
732 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
733 Element format_resp = (Element) responses.item(0);
734
735 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
736 if (format_elem != null)
737 {
738 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
739 if (global_format_elem != null)
740 {
741 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
742 }
743
744 // set the format type
745 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
746 page_response.appendChild(doc.importNode(format_elem, true));
747 }
748
749 if (provide_annotations)
750 {
751 Element services_resp = (Element) responses.item(1);
752
753 // a new message for the mr
754 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
755 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
756 boolean service_found = false;
757 for (int j = 0; j < e_services.getLength(); j++)
758 {
759 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
760 {
761 Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
762 enrich_message.appendChild(s);
763 service_found = true;
764 }
765 }
766 if (service_found)
767 {
768 Element enrich_response = (Element) this.mr.process(enrich_message);
769
770 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
771 Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
772 for (int i = 0; i < e_responses.getLength(); i++)
773 {
774 Element e_resp = (Element) e_responses.item(i);
775 Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
776 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
777 service_list.appendChild(e_service);
778 }
779 page_response.appendChild(service_list);
780 }
781 } // if provide_annotations
782 return true;
783
784 }
785
786 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
787 {
788 Document doc = basic_doc_list.getOwnerDocument();
789
790 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
791 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
792 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
793 ds_message.appendChild(ds_request);
794
795 // Create a parameter list to specify the required structure information
796 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
797 Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
798 ds_param_list.appendChild(ds_param);
799 ds_param.setAttribute(GSXML.NAME_ATT, "info");
800 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
801
802 ds_request.appendChild(ds_param_list);
803
804 // add the node list we created earlier
805 ds_request.appendChild(basic_doc_list);
806
807 // Process the document structure retrieve message
808 Element ds_response_message = (Element) this.mr.process(ds_message);
809 if (processErrorElements(ds_response_message, page_response))
810 {
811 return null;
812 }
813
814 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
815 String path = GSPath.createPath(links);
816 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
817 if (info_elem == null) {
818 return null;
819 }
820 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
821 if (doctype_elem != null)
822 {
823 String doc_type = doctype_elem.getAttribute("value");
824 return doc_type;
825 }
826 return null;
827 }
828
829 /**
830 * this involves a bit of a hack to get the equivalent query terms - has to
831 * requery the query service - uses the last selected service name. (if it
832 * ends in query). should this action do the query or should it send a
833 * message to the query action? but that will involve lots of extra stuff.
834 * also doesn't handle phrases properly - just highlights all the terms
835 * found in the text.
836 */
837 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
838 {
839 Document doc = request.getOwnerDocument();
840
841 // do the query again to get term info
842 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
843 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
844
845 HashMap previous_params = (HashMap) params.get("p");
846 if (previous_params == null)
847 {
848 return dc_response_doc_content;
849 }
850 String service_name = (String) previous_params.get(GSParams.SERVICE);
851 if (service_name == null || !service_name.endsWith("Query"))
852 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
853 logger.debug("invalid service, not doing highlighting");
854 return dc_response_doc_content;
855 }
856 String collection = (String) params.get(GSParams.COLLECTION);
857 UserContext userContext = new UserContext(request);
858 String to = GSPath.appendLink(collection, service_name);
859
860 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
861 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
862 mr_query_message.appendChild(mr_query_request);
863
864 // paramList
865 HashMap service_params = (HashMap) params.get("s1");
866
867 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
868 GSXML.addParametersToList(query_param_list, service_params);
869 mr_query_request.appendChild(query_param_list);
870
871 // do the query
872 Element mr_query_response = (Element) this.mr.process(mr_query_message);
873
874 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
875 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
876 if (query_term_list_element == null)
877 {
878 // no term info
879 logger.error("No query term information.\n");
880 return dc_response_doc_content;
881 }
882
883 String content = GSXML.getNodeText(dc_response_doc_content);
884
885 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
886 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
887
888 HashSet<String> query_term_variants = new HashSet<String>();
889 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
890 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
891 {
892 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
893 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
894 {
895 for (int i = 0; i < terms_nodelist.getLength(); i++)
896 {
897 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
898 String termValueU = null;
899 String termValueL = null;
900
901 if (termValue.length() > 1)
902 {
903 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
904 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
905 }
906 else
907 {
908 termValueU = termValue.substring(0, 1).toUpperCase();
909 termValueL = termValue.substring(0, 1).toLowerCase();
910 }
911
912 query_term_variants.add(termValueU);
913 query_term_variants.add(termValueL);
914 }
915 }
916 }
917 else
918 {
919 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
920 {
921 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
922 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
923 for (int j = 0; j < equivalent_terms.length; j++)
924 {
925 query_term_variants.add(equivalent_terms[j]);
926 }
927 }
928 }
929
930 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
931
932 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
933 String performed_query = GSXML.getNodeText(query_element) + " ";
934
935 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
936 int term_start = 0;
937 boolean in_term = false;
938 boolean in_phrase = false;
939 for (int i = 0; i < performed_query.length(); i++)
940 {
941 char character = performed_query.charAt(i);
942 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
943
944 // Has a query term just started?
945 if (in_term == false && is_character_letter_or_digit == true)
946 {
947 in_term = true;
948 term_start = i;
949 }
950
951 // Or has a term just finished?
952 else if (in_term == true && is_character_letter_or_digit == false)
953 {
954 in_term = false;
955 String term = performed_query.substring(term_start, i);
956
957 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
958 if (term_element != null)
959 {
960
961 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
962
963 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
964 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
965 {
966 String termValueU = null;
967 String termValueL = null;
968
969 if (term.length() > 1)
970 {
971 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
972 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
973 }
974 else
975 {
976 termValueU = term.substring(0, 1).toUpperCase();
977 termValueL = term.substring(0, 1).toLowerCase();
978 }
979
980 phrase_query_p_term_x_variants.add(termValueU);
981 phrase_query_p_term_x_variants.add(termValueL);
982 }
983 else
984 {
985 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
986 {
987 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
988 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
989 for (int k = 0; k < term_equivalent_terms.length; k++)
990 {
991 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
992 }
993 }
994 }
995 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
996
997 if (in_phrase == false)
998 {
999 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1000 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1001 }
1002 }
1003 }
1004 // Watch for phrases (surrounded by quotes)
1005 if (character == '\"')
1006 {
1007 // Has a phrase just started?
1008 if (in_phrase == false)
1009 {
1010 in_phrase = true;
1011 }
1012 // Or has a phrase just finished?
1013 else if (in_phrase == true)
1014 {
1015 in_phrase = false;
1016 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1017 }
1018
1019 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1020 }
1021 }
1022
1023 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1024 }
1025
1026 /**
1027 * Highlights query terms in a piece of text.
1028 */
1029 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1030 {
1031 // Convert the content string to an array of characters for speed
1032 char[] content_characters = new char[content.length()];
1033 content.getChars(0, content.length(), content_characters, 0);
1034
1035 // Now skim through the content, identifying word matches
1036 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1037 int word_start = 0;
1038 boolean in_word = false;
1039 boolean preceding_word_matched = false;
1040 boolean inTag = false;
1041 for (int i = 0; i < content_characters.length; i++)
1042 {
1043 //We don't want to find words inside HTML tags
1044 if (content_characters[i] == '<')
1045 {
1046 inTag = true;
1047 continue;
1048 }
1049 else if (inTag && content_characters[i] == '>')
1050 {
1051 inTag = false;
1052 }
1053 else if (inTag)
1054 {
1055 continue;
1056 }
1057
1058 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1059
1060 // Has a word just started?
1061 if (in_word == false && is_character_letter_or_digit == true)
1062 {
1063 in_word = true;
1064 word_start = i;
1065 }
1066
1067 // Or has a word just finished?
1068 else if (in_word == true && is_character_letter_or_digit == false)
1069 {
1070 in_word = false;
1071
1072 // Check if the word matches any of the query term equivalents
1073 String word = new String(content_characters, word_start, (i - word_start));
1074 if (query_term_variants.contains(word))
1075 {
1076 // We have found a matching word, so remember its location
1077 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1078 preceding_word_matched = true;
1079 }
1080 else
1081 {
1082 preceding_word_matched = false;
1083 }
1084 }
1085 }
1086
1087 // Don't forget the last word...
1088 if (in_word == true)
1089 {
1090 // Check if the word matches any of the query term equivalents
1091 String word = new String(content_characters, word_start, (content_characters.length - word_start));
1092 if (query_term_variants.contains(word))
1093 {
1094 // We have found a matching word, so remember its location
1095 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1096 }
1097 }
1098
1099 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1100 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1101
1102 // Deal with phrases now
1103 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1104 for (int i = 0; i < word_matches.size(); i++)
1105 {
1106 WordMatch word_match = word_matches.get(i);
1107
1108 // See if any partial phrase matches are extended by this word
1109 if (word_match.preceding_word_matched)
1110 {
1111 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1112 {
1113 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1114 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1115 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1116 if (phrase_query_p_term_x_variants.contains(word_match.word))
1117 {
1118 partial_phrase_match.num_words_matched++;
1119
1120 // Has a complete phrase match occurred?
1121 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1122 {
1123 // Check for overlaps by looking at the previous highlight range
1124 if (!highlight_end_positions.isEmpty())
1125 {
1126 int last_highlight_index = highlight_end_positions.size() - 1;
1127 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1128 if (last_highlight_end > partial_phrase_match.start_position)
1129 {
1130 // There is an overlap, so remove the previous phrase match
1131 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1132 highlight_end_positions.remove(last_highlight_index);
1133 partial_phrase_match.start_position = last_highlight_start;
1134 }
1135 }
1136
1137 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1138 highlight_end_positions.add(new Integer(word_match.end_position));
1139 }
1140 // No, but add the partial match back into the list for next time
1141 else
1142 {
1143 partial_phrase_matches.add(partial_phrase_match);
1144 }
1145 }
1146 }
1147 }
1148 else
1149 {
1150 partial_phrase_matches.clear();
1151 }
1152
1153 // See if this word is at the start of any of the phrases
1154 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1155 {
1156 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1157 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1158 if (phrase_query_p_term_1_variants.contains(word_match.word))
1159 {
1160 // If this phrase is just one word long, we have a complete match
1161 if (phrase_query_p_term_variants_list.size() == 1)
1162 {
1163 highlight_start_positions.add(new Integer(word_match.start_position));
1164 highlight_end_positions.add(new Integer(word_match.end_position));
1165 }
1166 // Otherwise we have the start of a potential phrase match
1167 else
1168 {
1169 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1170 }
1171 }
1172 }
1173 }
1174
1175 // Now add the annotation tags into the document at the correct points
1176 Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1177
1178 int last_wrote = 0;
1179 for (int i = 0; i < highlight_start_positions.size(); i++)
1180 {
1181 int highlight_start = highlight_start_positions.get(i).intValue();
1182 int highlight_end = highlight_end_positions.get(i).intValue();
1183
1184 // Print anything before the highlight range
1185 if (last_wrote < highlight_start)
1186 {
1187 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1188 content_element.appendChild(doc.createTextNode(preceding_text));
1189 }
1190
1191 // Print the highlight text, annotated
1192 if (highlight_end > last_wrote)
1193 {
1194 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1195 Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1196 annotation_element.setAttribute("type", "query_term");
1197 content_element.appendChild(annotation_element);
1198 last_wrote = highlight_end;
1199 }
1200 }
1201
1202 // Finish off any unwritten text
1203 if (last_wrote < content_characters.length)
1204 {
1205 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1206 content_element.appendChild(doc.createTextNode(remaining_text));
1207 }
1208
1209 return content_element;
1210 }
1211
1212 static private class WordMatch
1213 {
1214 public String word;
1215 public int start_position;
1216 public int end_position;
1217 public boolean preceding_word_matched;
1218
1219 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1220 {
1221 this.word = word;
1222 this.start_position = start_position;
1223 this.end_position = end_position;
1224 this.preceding_word_matched = preceding_word_matched;
1225 }
1226 }
1227
1228 static private class PartialPhraseMatch
1229 {
1230 public int start_position;
1231 public int query_phrase_number;
1232 public int num_words_matched;
1233
1234 public PartialPhraseMatch(int start_position, int query_phrase_number)
1235 {
1236 this.start_position = start_position;
1237 this.query_phrase_number = query_phrase_number;
1238 this.num_words_matched = 1;
1239 }
1240 }
1241}
Note: See TracBrowser for help on using the repository browser.