source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 29922

Last change on this file since 29922 was 29922, checked in by Georgiy Litvinov, 9 years ago

No metadata while following internal link bugfix

  • Property svn:keywords set to Author Date Id Revision
File size: 44.5 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37import java.io.Serializable;
38
39import org.apache.log4j.*;
40
41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
61 protected boolean highlight_query_terms = false;
62
63 public boolean configure()
64 {
65 super.configure();
66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
69 highlight_query_terms = true;
70 }
71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
74 provide_annotations = true;
75 }
76 return true;
77 }
78
79 public Node process(Node message_node)
80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83 Element message = GSXML.nodeToElement(message_node);
84 Document doc = message.getOwnerDocument();
85
86 // the response
87 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
88 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
89 result.appendChild(page_response);
90
91 // get the request - assume only one
92 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
93 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
94 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
95
96 // just in case there are some that need to get passed to the services
97 HashMap service_params = (HashMap) params.get("s0");
98
99 String collection = (String) params.get(GSParams.COLLECTION);
100 String document_id = (String) params.get(GSParams.DOCUMENT);
101 if (document_id != null && document_id.equals(""))
102 {
103 document_id = null;
104 }
105 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
106 if (href != null && href.equals(""))
107 {
108 href = null;
109 }
110 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
111 if (document_id == null && href == null)
112 {
113 logger.error("no document specified!");
114 return result;
115 }
116 if (rl != null && rl.equals("0"))
117 {
118 // this is a true external link, we should have been directed to a different page or action
119 logger.error("rl value was 0, shouldn't get here");
120 return result;
121 }
122
123 UserContext userContext = new UserContext(request);
124
125 //append site metadata
126 addSiteMetadata(page_response, userContext);
127 addInterfaceOptions(page_response);
128
129 // get the additional data needed for the page
130 getBackgroundData(page_response, collection, userContext);
131 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
132
133 if (format_elem != null) {
134 // lets look for param defaults set in config file
135 NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
136 for (int i=0; i<param_defaults.getLength(); i++) {
137 Element p = (Element)param_defaults.item(i);
138 String name = p.getAttribute(GSXML.NAME_ATT);
139 if (params.get(name) ==null) {
140 // wasn't set from interface
141 String value = p.getAttribute(GSXML.VALUE_ATT);
142 params.put(name, value );
143 // also add into request param xml so that xslt knows it too
144 GSXML.addParameterToList(cgi_paramList, name, value);
145 }
146 }
147 }
148 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
149 if (document_type != null && document_type.equals(""))
150 {
151 //document_type = "hierarchy";
152 document_type = null; // we'll get it later if not already specified
153 }
154 //whether to retrieve siblings or not
155 boolean get_siblings = false;
156 String sibs = (String) params.get(SIBLING_ARG);
157 if (sibs != null && sibs.equals("1"))
158 {
159 get_siblings = true;
160 }
161
162 String doc_id_modifier = "";
163 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
164 if (sibling_num != null && !sibling_num.equals(""))
165 {
166 // we have to modify the doc name
167 doc_id_modifier = "." + sibling_num + ".ss";
168 }
169
170 boolean expand_document = false;
171 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
172 if (ed_arg != null && ed_arg.equals("1"))
173 {
174 expand_document = true;
175 }
176
177 boolean expand_contents = false;
178 if (expand_document)
179 { // we always expand the contents with the text
180 expand_contents = true;
181 }
182 else
183 {
184 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
185 if (ec_arg != null && ec_arg.equals("1"))
186 {
187 expand_contents = true;
188 }
189 }
190
191 // UserContext userContext = new UserContext(request);
192
193 // //append site metadata
194 // addSiteMetadata(page_response, userContext);
195 // addInterfaceOptions(page_response);
196
197 // // get the additional data needed for the page
198 // getBackgroundData(page_response, collection, userContext);
199 // Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
200
201 // the_document is where all the doc info - structure and metadata etc
202 // is added into, to be returned in the page
203 Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
204 page_response.appendChild(the_document);
205
206 // create a basic doc list containing the current node
207 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
208 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
209 basic_doc_list.appendChild(current_doc);
210 if (document_id != null)
211 {
212 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
213 }
214 else
215 {
216 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
217 // do we need this??
218 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
219 }
220
221 if (document_type == null)
222 {
223 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
224 }
225 if (document_type == null)
226 {
227 logger.error("doctype is null!!!***********");
228 document_type = GSXML.DOC_TYPE_SIMPLE;
229 }
230
231 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
232
233
234 // Create a parameter list to specify the required structure information
235 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
236
237 if (service_params != null)
238 {
239 GSXML.addParametersToList(ds_param_list, service_params);
240 }
241
242 Element ds_param = null;
243 boolean get_structure = false;
244 boolean get_structure_info = false;
245 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
246 {
247 get_structure_info = true;
248
249 if (expand_contents)
250 {
251 ds_param = doc.createElement(GSXML.PARAM_ELEM);
252 ds_param_list.appendChild(ds_param);
253 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
254 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
255 }
256
257 // get the info needed for paged naviagtion
258 ds_param = doc.createElement(GSXML.PARAM_ELEM);
259 ds_param_list.appendChild(ds_param);
260 ds_param.setAttribute(GSXML.NAME_ATT, "info");
261 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
262 ds_param = doc.createElement(GSXML.PARAM_ELEM);
263 ds_param_list.appendChild(ds_param);
264 ds_param.setAttribute(GSXML.NAME_ATT, "info");
265 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
266 ds_param = doc.createElement(GSXML.PARAM_ELEM);
267 ds_param_list.appendChild(ds_param);
268 ds_param.setAttribute(GSXML.NAME_ATT, "info");
269 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
270
271 if (get_siblings)
272 {
273 ds_param = doc.createElement(GSXML.PARAM_ELEM);
274 ds_param_list.appendChild(ds_param);
275 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
277 }
278
279 }
280 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) || document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
281 {
282 get_structure = true;
283 if (expand_contents)
284 {
285 ds_param = doc.createElement(GSXML.PARAM_ELEM);
286 ds_param_list.appendChild(ds_param);
287 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
288 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
289 }
290 else
291 {
292 // get the info needed for table of contents
293 ds_param = doc.createElement(GSXML.PARAM_ELEM);
294 ds_param_list.appendChild(ds_param);
295 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
296 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
297 ds_param = doc.createElement(GSXML.PARAM_ELEM);
298 ds_param_list.appendChild(ds_param);
299 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
300 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
301 if (get_siblings)
302 {
303 ds_param = doc.createElement(GSXML.PARAM_ELEM);
304 ds_param_list.appendChild(ds_param);
305 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
306 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
307 }
308 }
309 }
310 else
311 {
312 // we dont need any structure
313 }
314
315 boolean has_dummy = false;
316 if (get_structure || get_structure_info)
317 {
318
319 // Build a request to obtain the document structure
320 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
321 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
322 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
323 ds_message.appendChild(ds_request);
324 ds_request.appendChild(ds_param_list);
325
326 // add the node list we created earlier
327 ds_request.appendChild(basic_doc_list);
328
329 // Process the document structure retrieve message
330 Element ds_response_message = (Element) this.mr.process(ds_message);
331 if (processErrorElements(ds_response_message, page_response))
332 {
333 return result;
334 }
335
336 // get the info and print out
337 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
338 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
339 path = GSPath.appendLink(path, "nodeStructureInfo");
340 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
341 // get the doc_node bit
342 if (ds_response_struct_info != null)
343 {
344 the_document.appendChild(doc.importNode(ds_response_struct_info, true));
345 }
346 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
347 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
348 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
349 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
350
351 if (ds_response_structure != null)
352 {
353 // add the contents of the structure bit into the_document
354 NodeList structs = ds_response_structure.getChildNodes();
355 for (int i = 0; i < structs.getLength(); i++)
356 {
357 the_document.appendChild(doc.importNode(structs.item(i), true));
358 }
359 }
360 else
361 {
362 // no structure nodes, so put in a dummy doc node
363 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
364 if (document_id != null)
365 {
366 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
367 }
368 else
369 {
370 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
371
372 }
373 the_document.appendChild(doc_node);
374 has_dummy = true;
375 }
376 }
377 else
378 { // a simple type - we dont have a dummy node for simple
379 // should think about this more
380 // no structure request, so just put in a dummy doc node
381 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
382 if (document_id != null)
383 {
384 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
385 }
386 else
387 {
388 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
389 }
390 the_document.appendChild(doc_node);
391 has_dummy = true;
392 }
393
394 // Build a request to obtain some document metadata
395 Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
396 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
397 Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
398 dm_message.appendChild(dm_request);
399 // Create a parameter list to specify the required metadata information
400
401 HashSet<String> meta_names = new HashSet<String>();
402 meta_names.add("Title"); // the default
403 if (format_elem != null)
404 {
405 getRequiredMetadataNames(format_elem, meta_names);
406 }
407
408 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
409 if (extraMetaListElem != null)
410 {
411 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
412 for (int i = 0; i < extraMetaList.getLength(); i++)
413 {
414 meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
415 }
416 }
417
418 Element dm_param_list = createMetadataParamList(doc,meta_names);
419 if (service_params != null)
420 {
421 GSXML.addParametersToList(dm_param_list, service_params);
422 }
423
424 dm_request.appendChild(dm_param_list);
425
426 // create the doc node list for the metadata request
427 Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428 dm_request.appendChild(dm_doc_list);
429
430 // Add each node from the structure response into the metadata request
431 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
432 for (int i = 0; i < doc_nodes.getLength(); i++)
433 {
434 Element doc_node = (Element) doc_nodes.item(i);
435 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
436
437 // Add the documentNode to the list
438 Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
439 dm_doc_list.appendChild(dm_doc_node);
440 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
441 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
442 if (document_id == null){
443 dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
444 }
445
446 }
447
448 // we also want a metadata request to the top level document to get
449 // assocfilepath - this could be cached too
450 Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
451 dm_message.appendChild(doc_meta_request);
452 Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
453 if (service_params != null)
454 {
455 GSXML.addParametersToList(doc_meta_param_list, service_params);
456 }
457
458 doc_meta_request.appendChild(doc_meta_param_list);
459 Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
460 doc_meta_param_list.appendChild(doc_param);
461 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
462 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
463
464 // create the doc node list for the metadata request
465 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
466 doc_meta_request.appendChild(doc_list);
467
468 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
469 // the node we want is the root document node
470 if (document_id != null)
471 {
472 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
473 }
474 /*else
475 {
476 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
477 // can we assume that href is always a top level doc??
478 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
479 //doc_node.setAttribute("externalURL", has_rl);
480 }*/
481 doc_list.appendChild(doc_node);
482
483 Element dm_response_message = (Element) this.mr.process(dm_message);
484 if (processErrorElements(dm_response_message, page_response))
485 {
486 return result;
487 }
488
489 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
490 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
491
492 // Merge the metadata with the structure information
493 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
494 for (int i = 0; i < doc_nodes.getLength(); i++)
495 {
496 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
497 }
498 // get the top level doc metadata out
499 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
500 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
501 GSXML.mergeMetadataLists(the_document, top_doc_node);
502
503 // Build a request to obtain some document content
504 Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
505 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
506 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
507 dc_message.appendChild(dc_request);
508
509 // Create a parameter list to specify the request parameters - empty for now
510 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
511 if (service_params != null)
512 {
513 GSXML.addParametersToList(dc_param_list, service_params);
514 }
515
516 dc_request.appendChild(dc_param_list);
517
518 // get the content
519 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
520 if (expand_document)
521 {
522 dc_request.appendChild(dm_doc_list);
523 }
524 else
525 {
526 dc_request.appendChild(basic_doc_list);
527 }
528 logger.debug("request = " + XMLConverter.getString(dc_message));
529 Element dc_response_message = (Element) this.mr.process(dc_message);
530 if (processErrorElements(dc_response_message, page_response))
531 {
532 return result;
533 }
534
535 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
536
537 if (expand_document)
538 {
539 // Merge the content with the structure information
540 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
541 for (int i = 0; i < doc_nodes.getLength(); i++)
542 {
543 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
544 if (content != null)
545 {
546 if (highlight_query_terms)
547 {
548 content = highlightQueryTerms(request, (Element) content);
549 }
550 doc_nodes.item(i).appendChild(doc.importNode(content, true));
551 }
552 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
553 }
554 if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
555 Element dummy_node = (Element) doc_nodes.item(0);
556 the_document.removeChild(dummy_node);
557 the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
558 NodeList dummy_children = dummy_node.getChildNodes();
559 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
560 {
561 // special case as we don't want more than one metadata list
562 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
563 {
564 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
565 }
566 else
567 {
568 the_document.appendChild(dummy_children.item(i));
569 }
570 }
571 }
572 }
573 else
574 {
575 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
576 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
577 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
578 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
579
580 if (dc_response_doc_content == null)
581 {
582 // no content to add
583 if (dc_response_doc.getAttribute("external").equals("true"))
584 {
585
586 //if (dc_response_doc_external != null)
587 //{
588 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
589
590 the_document.setAttribute("selectedNode", href_id);
591 the_document.setAttribute("external", href_id);
592 }
593 return result;
594 }
595 if (highlight_query_terms)
596 {
597 dc_response_doc.removeChild(dc_response_doc_content);
598
599 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
600 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
601 }
602
603 if (provide_annotations)
604 {
605 String service_selected = (String) params.get(ENRICH_DOC_ARG);
606 if (service_selected != null && service_selected.equals("1"))
607 {
608 // now we can modifiy the response doc if needed
609 String enrich_service = (String) params.get(GSParams.SERVICE);
610 // send a message to the service
611 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
612 Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
613 enrich_message.appendChild(enrich_request);
614 // check for parameters
615 HashMap e_service_params = (HashMap) params.get("s1");
616 if (e_service_params != null)
617 {
618 Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
619 GSXML.addParametersToList(enrich_pl, e_service_params);
620 enrich_request.appendChild(enrich_pl);
621 }
622 Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
623 enrich_request.appendChild(e_doc_list);
624 e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
625
626 Node enrich_response = this.mr.process(enrich_message);
627
628 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
629 path = GSPath.createPath(links);
630 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
631
632 }
633 } // if provide_annotations
634
635 // use the returned id rather than the sent one cos there may have
636 // been modifiers such as .pr that are removed.
637 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
638 the_document.setAttribute("selectedNode", modified_doc_id);
639 if (has_dummy)
640 {
641 // change the id if necessary and add the content
642 Element dummy_node = (Element) doc_nodes.item(0);
643
644 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
645 dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
646 // hack for simple type
647 if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
648 {
649 // we dont want the internal docNode, just want the content and metadata in the document
650 // rethink this!!
651 the_document.removeChild(dummy_node);
652
653 NodeList dummy_children = dummy_node.getChildNodes();
654 //for (int i=0; i<dummy_children.getLength(); i++) {
655 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
656 {
657 // special case as we don't want more than one metadata list
658 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
659 {
660 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
661 }
662 else
663 {
664 the_document.appendChild(dummy_children.item(i));
665 }
666 }
667 }
668
669 the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
670 }
671 else
672 {
673 // Merge the document content with the metadata and structure information
674 for (int i = 0; i < doc_nodes.getLength(); i++)
675 {
676 Node dn = doc_nodes.item(i);
677 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
678 if (dn_id.equals(modified_doc_id))
679 {
680 dn.appendChild(doc.importNode(dc_response_doc_content, true));
681 break;
682 }
683 }
684 }
685 }
686 //logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
687 return result;
688 }
689
690 /**
691 * tell the param class what its arguments are if an action has its own
692 * arguments, this should add them to the params object - particularly
693 * important for args that should not be saved
694 */
695 public boolean addActionParameters(GSParams params)
696 {
697 params.addParameter(GOTO_PAGE_ARG, false);
698 params.addParameter(ENRICH_DOC_ARG, false);
699 params.addParameter(EXPAND_DOCUMENT_ARG, false);
700 params.addParameter(EXPAND_CONTENTS_ARG, false);
701 params.addParameter(REALISTIC_BOOK_ARG, false);
702
703 return true;
704 }
705
706 /**
707 * this method gets the collection description, the format info, the list of
708 * enrich services, etc - stuff that is needed for the page, but is the same
709 * whatever the query is - should be cached
710 */
711 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
712 {
713 Document doc = page_response.getOwnerDocument();
714
715 // create a message to process - contains requests for the collection
716 // description, the format element, the enrich services on offer
717 // these could all be cached
718 Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
719 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
720 // the format request - ignore for now, where does this request go to??
721 Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
722 info_message.appendChild(format_request);
723
724 // the enrich_services request - only do this if provide_annotations is true
725
726 if (provide_annotations)
727 {
728 Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
729 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
730 info_message.appendChild(enrich_services_request);
731 }
732
733 Element info_response = (Element) this.mr.process(info_message);
734
735 // the collection is the first response
736 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
737 Element format_resp = (Element) responses.item(0);
738
739 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
740 if (format_elem != null)
741 {
742 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
743 if (global_format_elem != null)
744 {
745 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
746 }
747
748 // set the format type
749 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
750 page_response.appendChild(doc.importNode(format_elem, true));
751 }
752
753 if (provide_annotations)
754 {
755 Element services_resp = (Element) responses.item(1);
756
757 // a new message for the mr
758 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
759 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
760 boolean service_found = false;
761 for (int j = 0; j < e_services.getLength(); j++)
762 {
763 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
764 {
765 Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
766 enrich_message.appendChild(s);
767 service_found = true;
768 }
769 }
770 if (service_found)
771 {
772 Element enrich_response = (Element) this.mr.process(enrich_message);
773
774 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
775 Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
776 for (int i = 0; i < e_responses.getLength(); i++)
777 {
778 Element e_resp = (Element) e_responses.item(i);
779 Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
780 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
781 service_list.appendChild(e_service);
782 }
783 page_response.appendChild(service_list);
784 }
785 } // if provide_annotations
786 return true;
787
788 }
789
790 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
791 {
792 Document doc = basic_doc_list.getOwnerDocument();
793
794 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
795 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
796 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
797 ds_message.appendChild(ds_request);
798
799 // Create a parameter list to specify the required structure information
800 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
801 Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
802 ds_param_list.appendChild(ds_param);
803 ds_param.setAttribute(GSXML.NAME_ATT, "info");
804 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
805
806 ds_request.appendChild(ds_param_list);
807
808 // add the node list we created earlier
809 ds_request.appendChild(basic_doc_list);
810
811 // Process the document structure retrieve message
812 Element ds_response_message = (Element) this.mr.process(ds_message);
813 if (processErrorElements(ds_response_message, page_response))
814 {
815 return null;
816 }
817
818 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
819 String path = GSPath.createPath(links);
820 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
821 if (info_elem == null) {
822 return null;
823 }
824 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
825 if (doctype_elem != null)
826 {
827 String doc_type = doctype_elem.getAttribute("value");
828 return doc_type;
829 }
830 return null;
831 }
832
833 /**
834 * this involves a bit of a hack to get the equivalent query terms - has to
835 * requery the query service - uses the last selected service name. (if it
836 * ends in query). should this action do the query or should it send a
837 * message to the query action? but that will involve lots of extra stuff.
838 * also doesn't handle phrases properly - just highlights all the terms
839 * found in the text.
840 */
841 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
842 {
843 Document doc = request.getOwnerDocument();
844
845 // do the query again to get term info
846 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
847 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
848
849 HashMap previous_params = (HashMap) params.get("p");
850 if (previous_params == null)
851 {
852 return dc_response_doc_content;
853 }
854 String service_name = (String) previous_params.get(GSParams.SERVICE);
855 if (service_name == null || !service_name.endsWith("Query"))
856 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
857 logger.debug("invalid service, not doing highlighting");
858 return dc_response_doc_content;
859 }
860 String collection = (String) params.get(GSParams.COLLECTION);
861 UserContext userContext = new UserContext(request);
862 String to = GSPath.appendLink(collection, service_name);
863
864 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
865 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
866 mr_query_message.appendChild(mr_query_request);
867
868 // paramList
869 HashMap service_params = (HashMap) params.get("s1");
870
871 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
872 GSXML.addParametersToList(query_param_list, service_params);
873 mr_query_request.appendChild(query_param_list);
874
875 // do the query
876 Element mr_query_response = (Element) this.mr.process(mr_query_message);
877
878 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
879 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
880 if (query_term_list_element == null)
881 {
882 // no term info
883 logger.error("No query term information.\n");
884 return dc_response_doc_content;
885 }
886
887 String content = GSXML.getNodeText(dc_response_doc_content);
888
889 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
890 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
891
892 HashSet<String> query_term_variants = new HashSet<String>();
893 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
894 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
895 {
896 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
897 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
898 {
899 for (int i = 0; i < terms_nodelist.getLength(); i++)
900 {
901 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
902 String termValueU = null;
903 String termValueL = null;
904
905 if (termValue.length() > 1)
906 {
907 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
908 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
909 }
910 else
911 {
912 termValueU = termValue.substring(0, 1).toUpperCase();
913 termValueL = termValue.substring(0, 1).toLowerCase();
914 }
915
916 query_term_variants.add(termValueU);
917 query_term_variants.add(termValueL);
918 }
919 }
920 }
921 else
922 {
923 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
924 {
925 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
926 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
927 for (int j = 0; j < equivalent_terms.length; j++)
928 {
929 query_term_variants.add(equivalent_terms[j]);
930 }
931 }
932 }
933
934 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
935
936 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
937 String performed_query = GSXML.getNodeText(query_element) + " ";
938
939 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
940 int term_start = 0;
941 boolean in_term = false;
942 boolean in_phrase = false;
943 for (int i = 0; i < performed_query.length(); i++)
944 {
945 char character = performed_query.charAt(i);
946 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
947
948 // Has a query term just started?
949 if (in_term == false && is_character_letter_or_digit == true)
950 {
951 in_term = true;
952 term_start = i;
953 }
954
955 // Or has a term just finished?
956 else if (in_term == true && is_character_letter_or_digit == false)
957 {
958 in_term = false;
959 String term = performed_query.substring(term_start, i);
960
961 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
962 if (term_element != null)
963 {
964
965 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
966
967 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
968 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
969 {
970 String termValueU = null;
971 String termValueL = null;
972
973 if (term.length() > 1)
974 {
975 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
976 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
977 }
978 else
979 {
980 termValueU = term.substring(0, 1).toUpperCase();
981 termValueL = term.substring(0, 1).toLowerCase();
982 }
983
984 phrase_query_p_term_x_variants.add(termValueU);
985 phrase_query_p_term_x_variants.add(termValueL);
986 }
987 else
988 {
989 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
990 {
991 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
992 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
993 for (int k = 0; k < term_equivalent_terms.length; k++)
994 {
995 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
996 }
997 }
998 }
999 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
1000
1001 if (in_phrase == false)
1002 {
1003 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1004 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1005 }
1006 }
1007 }
1008 // Watch for phrases (surrounded by quotes)
1009 if (character == '\"')
1010 {
1011 // Has a phrase just started?
1012 if (in_phrase == false)
1013 {
1014 in_phrase = true;
1015 }
1016 // Or has a phrase just finished?
1017 else if (in_phrase == true)
1018 {
1019 in_phrase = false;
1020 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1021 }
1022
1023 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1024 }
1025 }
1026
1027 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1028 }
1029
1030 /**
1031 * Highlights query terms in a piece of text.
1032 */
1033 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1034 {
1035 // Convert the content string to an array of characters for speed
1036 char[] content_characters = new char[content.length()];
1037 content.getChars(0, content.length(), content_characters, 0);
1038
1039 // Now skim through the content, identifying word matches
1040 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1041 int word_start = 0;
1042 boolean in_word = false;
1043 boolean preceding_word_matched = false;
1044 boolean inTag = false;
1045 for (int i = 0; i < content_characters.length; i++)
1046 {
1047 //We don't want to find words inside HTML tags
1048 if (content_characters[i] == '<')
1049 {
1050 inTag = true;
1051 continue;
1052 }
1053 else if (inTag && content_characters[i] == '>')
1054 {
1055 inTag = false;
1056 }
1057 else if (inTag)
1058 {
1059 continue;
1060 }
1061
1062 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1063
1064 // Has a word just started?
1065 if (in_word == false && is_character_letter_or_digit == true)
1066 {
1067 in_word = true;
1068 word_start = i;
1069 }
1070
1071 // Or has a word just finished?
1072 else if (in_word == true && is_character_letter_or_digit == false)
1073 {
1074 in_word = false;
1075
1076 // Check if the word matches any of the query term equivalents
1077 String word = new String(content_characters, word_start, (i - word_start));
1078 if (query_term_variants.contains(word))
1079 {
1080 // We have found a matching word, so remember its location
1081 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1082 preceding_word_matched = true;
1083 }
1084 else
1085 {
1086 preceding_word_matched = false;
1087 }
1088 }
1089 }
1090
1091 // Don't forget the last word...
1092 if (in_word == true)
1093 {
1094 // Check if the word matches any of the query term equivalents
1095 String word = new String(content_characters, word_start, (content_characters.length - word_start));
1096 if (query_term_variants.contains(word))
1097 {
1098 // We have found a matching word, so remember its location
1099 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1100 }
1101 }
1102
1103 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1104 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1105
1106 // Deal with phrases now
1107 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1108 for (int i = 0; i < word_matches.size(); i++)
1109 {
1110 WordMatch word_match = word_matches.get(i);
1111
1112 // See if any partial phrase matches are extended by this word
1113 if (word_match.preceding_word_matched)
1114 {
1115 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1116 {
1117 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1118 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1119 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1120 if (phrase_query_p_term_x_variants.contains(word_match.word))
1121 {
1122 partial_phrase_match.num_words_matched++;
1123
1124 // Has a complete phrase match occurred?
1125 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1126 {
1127 // Check for overlaps by looking at the previous highlight range
1128 if (!highlight_end_positions.isEmpty())
1129 {
1130 int last_highlight_index = highlight_end_positions.size() - 1;
1131 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1132 if (last_highlight_end > partial_phrase_match.start_position)
1133 {
1134 // There is an overlap, so remove the previous phrase match
1135 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1136 highlight_end_positions.remove(last_highlight_index);
1137 partial_phrase_match.start_position = last_highlight_start;
1138 }
1139 }
1140
1141 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1142 highlight_end_positions.add(new Integer(word_match.end_position));
1143 }
1144 // No, but add the partial match back into the list for next time
1145 else
1146 {
1147 partial_phrase_matches.add(partial_phrase_match);
1148 }
1149 }
1150 }
1151 }
1152 else
1153 {
1154 partial_phrase_matches.clear();
1155 }
1156
1157 // See if this word is at the start of any of the phrases
1158 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1159 {
1160 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1161 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1162 if (phrase_query_p_term_1_variants.contains(word_match.word))
1163 {
1164 // If this phrase is just one word long, we have a complete match
1165 if (phrase_query_p_term_variants_list.size() == 1)
1166 {
1167 highlight_start_positions.add(new Integer(word_match.start_position));
1168 highlight_end_positions.add(new Integer(word_match.end_position));
1169 }
1170 // Otherwise we have the start of a potential phrase match
1171 else
1172 {
1173 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1174 }
1175 }
1176 }
1177 }
1178
1179 // Now add the annotation tags into the document at the correct points
1180 Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1181
1182 int last_wrote = 0;
1183 for (int i = 0; i < highlight_start_positions.size(); i++)
1184 {
1185 int highlight_start = highlight_start_positions.get(i).intValue();
1186 int highlight_end = highlight_end_positions.get(i).intValue();
1187
1188 // Print anything before the highlight range
1189 if (last_wrote < highlight_start)
1190 {
1191 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1192 content_element.appendChild(doc.createTextNode(preceding_text));
1193 }
1194
1195 // Print the highlight text, annotated
1196 if (highlight_end > last_wrote)
1197 {
1198 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1199 Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1200 annotation_element.setAttribute("type", "query_term");
1201 content_element.appendChild(annotation_element);
1202 last_wrote = highlight_end;
1203 }
1204 }
1205
1206 // Finish off any unwritten text
1207 if (last_wrote < content_characters.length)
1208 {
1209 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1210 content_element.appendChild(doc.createTextNode(remaining_text));
1211 }
1212
1213 return content_element;
1214 }
1215
1216 static private class WordMatch
1217 {
1218 public String word;
1219 public int start_position;
1220 public int end_position;
1221 public boolean preceding_word_matched;
1222
1223 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1224 {
1225 this.word = word;
1226 this.start_position = start_position;
1227 this.end_position = end_position;
1228 this.preceding_word_matched = preceding_word_matched;
1229 }
1230 }
1231
1232 static private class PartialPhraseMatch
1233 {
1234 public int start_position;
1235 public int query_phrase_number;
1236 public int num_words_matched;
1237
1238 public PartialPhraseMatch(int start_position, int query_phrase_number)
1239 {
1240 this.start_position = start_position;
1241 this.query_phrase_number = query_phrase_number;
1242 this.num_words_matched = 1;
1243 }
1244 }
1245}
Note: See TracBrowser for help on using the repository browser.