source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25991

Last change on this file since 25991 was 25985, checked in by sjm84, 12 years ago

All of the actions that use format statements will now merge in the global format statement

  • Property svn:keywords set to Author Date Id Revision
File size: 42.4 KB
RevLine 
[3801]1/*
[24812]2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
[3645]19package org.greenstone.gsdl3.action;
20
[3801]21// Greenstone classes
[3645]22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
[3801]24
[3645]25// XML classes
[24812]26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
[4287]29import org.w3c.dom.Text;
[3801]30import org.w3c.dom.NodeList;
[3645]31
[3801]32// General Java classes
[8731]33import java.util.ArrayList;
[3645]34import java.util.HashMap;
[4287]35import java.util.HashSet;
[3645]36import java.io.File;
[25635]37import java.io.Serializable;
[3645]38
[13124]39import org.apache.log4j.*;
[3801]40
[24812]41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
[13124]44
[24116]45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]46
[24116]47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
[25305]51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
[24812]54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
[24116]61 protected boolean highlight_query_terms = false;
[5694]62
[24812]63 public boolean configure()
64 {
[24116]65 super.configure();
[24812]66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
[24116]69 highlight_query_terms = true;
70 }
[24812]71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
[24116]74 provide_annotations = true;
75 }
[25953]76 return true;
77 }
[24812]78
79 public Node process(Node message_node)
[24116]80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]82
[24116]83 Element message = this.converter.nodeToElement(message_node);
[19984]84
[24116]85 // the response
86 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88 result.appendChild(page_response);
[19984]89
[24116]90 // get the request - assume only one
[24812]91 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]93 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]94
[24116]95 // just in case there are some that need to get passed to the services
[24812]96 HashMap service_params = (HashMap) params.get("s0");
[4717]97
[24116]98 String collection = (String) params.get(GSParams.COLLECTION);
[25305]99 String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]100 if (document_id != null && document_id.equals(""))
101 {
102 document_id = null;
[25305]103 }
104 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]105 if (href != null && href.equals(""))
106 {
107 href = null;
[25305]108 }
109 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110 if (document_id == null && href == null)
[24812]111 {
[24116]112 logger.error("no document specified!");
113 return result;
114 }
[25355]115 if (rl != null && rl.equals("0"))
116 {
117 // this is a true external link, we should have been directed to a different page or action
118 logger.error("rl value was 0, shouldn't get here");
119 return result;
[25305]120 }
[24116]121 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
[25816]122 if (document_type != null && document_type.equals(""))
[24812]123 {
[25953]124 //document_type = "hierarchy";
125 document_type = null; // we'll get it later if not already specified
[24116]126 }
127 //whether to retrieve siblings or not
128 boolean get_siblings = false;
129 String sibs = (String) params.get(SIBLING_ARG);
[24812]130 if (sibs != null && sibs.equals("1"))
131 {
[24116]132 get_siblings = true;
133 }
[24812]134
[25305]135 String doc_id_modifier = "";
[24116]136 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]137 if (sibling_num != null && !sibling_num.equals(""))
138 {
[24116]139 // we have to modify the doc name
[25355]140 doc_id_modifier = "." + sibling_num + ".ss";
[24116]141 }
[24812]142
[24116]143 boolean expand_document = false;
[25305]144 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]145 if (ed_arg != null && ed_arg.equals("1"))
146 {
[24116]147 expand_document = true;
148 }
[14525]149
[24116]150 boolean expand_contents = false;
[24812]151 if (expand_document)
152 { // we always expand the contents with the text
[24116]153 expand_contents = true;
[24812]154 }
155 else
156 {
[25305]157 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]158 if (ec_arg != null && ec_arg.equals("1"))
159 {
[24116]160 expand_contents = true;
161 }
[5694]162 }
[25355]163
[25305]164 UserContext userContext = new UserContext(request);
[4257]165
[24116]166 //append site metadata
[24993]167 addSiteMetadata(page_response, userContext);
[25128]168 addInterfaceOptions(page_response);
[3801]169
[24116]170 // get the additional data needed for the page
[24993]171 getBackgroundData(page_response, collection, userContext);
[24812]172 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
173
[24116]174 // the_document is where all the doc info - structure and metadata etc
175 // is added into, to be returned in the page
176 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
177 page_response.appendChild(the_document);
[9874]178
[24116]179 // create a basic doc list containing the current node
[24812]180 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]181 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
182 basic_doc_list.appendChild(current_doc);
[25305]183 if (document_id != null)
[24812]184 {
[25355]185 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]186 }
[25355]187 else
[24812]188 {
[25305]189 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
190 // do we need this??
191 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]192 }
[3801]193
[25953]194 if (document_type == null)
195 {
196 logger.error("getting document type");
197 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
198 logger.error("new doc type = " + document_type);
[25816]199 }
[25953]200 if (document_type != null)
201 {
202 // set the doctype from the cgi arg or from the server as an attribute
203 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
[25816]204 }
[25953]205 else
206 {
207 logger.error("doctype is null!!!***********");
208 }
[25816]209
[24116]210 // Create a parameter list to specify the required structure information
[24812]211 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
212
213 if (service_params != null)
214 {
[24116]215 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
216 }
[3817]217
[24812]218 Element ds_param = null;
[24116]219 boolean get_structure = false;
220 boolean get_structure_info = false;
[24889]221 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]222 {
[24116]223 get_structure_info = true;
[24889]224
225 if (expand_contents)
226 {
227 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
228 ds_param_list.appendChild(ds_param);
229 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
230 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
231 }
232
[25305]233 // get the info needed for paged naviagtion
[24116]234 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
235 ds_param_list.appendChild(ds_param);
236 ds_param.setAttribute(GSXML.NAME_ATT, "info");
237 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
238 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
239 ds_param_list.appendChild(ds_param);
240 ds_param.setAttribute(GSXML.NAME_ATT, "info");
241 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
242 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
243 ds_param_list.appendChild(ds_param);
244 ds_param.setAttribute(GSXML.NAME_ATT, "info");
245 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]246
[24889]247 if (get_siblings)
248 {
249 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
250 ds_param_list.appendChild(ds_param);
251 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
252 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
253 }
254
[24812]255 }
[25968]256 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) ||document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY) )
[24812]257 {
[24116]258 get_structure = true;
[24812]259 if (expand_contents)
260 {
[24116]261 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
262 ds_param_list.appendChild(ds_param);
263 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
264 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]265 }
266 else
267 {
[24116]268 // get the info needed for table of contents
269 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
270 ds_param_list.appendChild(ds_param);
271 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
272 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
273 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
274 ds_param_list.appendChild(ds_param);
275 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]277 if (get_siblings)
278 {
[24116]279 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
280 ds_param_list.appendChild(ds_param);
281 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
282 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
283 }
284 }
[24812]285 }
286 else
287 {
[24116]288 // we dont need any structure
289 }
[3801]290
[24116]291 boolean has_dummy = false;
[24812]292 if (get_structure || get_structure_info)
293 {
[8676]294
[24116]295 // Build a request to obtain the document structure
296 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
297 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[24993]298 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]299 ds_message.appendChild(ds_request);
300 ds_request.appendChild(ds_param_list);
[24812]301
[25816]302 // add the node list we created earlier
[24116]303 ds_request.appendChild(basic_doc_list);
[24812]304
[24116]305 // Process the document structure retrieve message
306 Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]307 if (processErrorElements(ds_response_message, page_response))
308 {
[24116]309 return result;
310 }
[4030]311
[24116]312 // get the info and print out
[24812]313 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]314 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
315 path = GSPath.appendLink(path, "nodeStructureInfo");
316 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
317 // get the doc_node bit
[24812]318 if (ds_response_struct_info != null)
319 {
320 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
[24116]321 }
[24812]322 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]323 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
324 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
325 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]326
327 if (ds_response_structure != null)
328 {
[24116]329 // add the contents of the structure bit into the_document
330 NodeList structs = ds_response_structure.getChildNodes();
[24812]331 for (int i = 0; i < structs.getLength(); i++)
332 {
[24116]333 the_document.appendChild(this.doc.importNode(structs.item(i), true));
334 }
[24812]335 }
336 else
337 {
[24116]338 // no structure nodes, so put in a dummy doc node
339 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]340 if (document_id != null)
[24812]341 {
[25305]342 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]343 }
[25355]344 else
[24812]345 {
[25305]346 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]347
[24116]348 }
349 the_document.appendChild(doc_node);
350 has_dummy = true;
351 }
[24812]352 }
353 else
354 { // a simple type - we dont have a dummy node for simple
[24116]355 // should think about this more
356 // no structure request, so just put in a dummy doc node
357 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]358 if (document_id != null)
[24812]359 {
[25305]360 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]361 }
[25355]362 else
[24812]363 {
[25305]364 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]365 }
366 the_document.appendChild(doc_node);
367 has_dummy = true;
368 }
[24812]369
[24116]370 // Build a request to obtain some document metadata
371 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
[24812]372 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[24993]373 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]374 dm_message.appendChild(dm_request);
375 // Create a parameter list to specify the required metadata information
[24812]376
[25635]377 HashSet<String> meta_names = new HashSet<String>();
[24116]378 meta_names.add("Title"); // the default
[24812]379 if (format_elem != null)
380 {
[24889]381 getRequiredMetadataNames(format_elem, meta_names);
[24116]382 }
[24812]383
[24116]384 Element dm_param_list = createMetadataParamList(meta_names);
[24812]385 if (service_params != null)
386 {
[24116]387 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
388 }
[24812]389
[24116]390 dm_request.appendChild(dm_param_list);
[24812]391
[24116]392 // create the doc node list for the metadata request
[24812]393 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]394 dm_request.appendChild(dm_doc_list);
[4030]395
[24116]396 // Add each node from the structure response into the metadata request
397 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]398 for (int i = 0; i < doc_nodes.getLength(); i++)
399 {
[24116]400 Element doc_node = (Element) doc_nodes.item(i);
401 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]402
[24116]403 // Add the documentNode to the list
404 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
405 dm_doc_list.appendChild(dm_doc_node);
406 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]407 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[24116]408 }
[3801]409
[24116]410 // we also want a metadata request to the top level document to get
411 // assocfilepath - this could be cached too
[24993]412 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]413 dm_message.appendChild(doc_meta_request);
[24812]414 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
415 if (service_params != null)
416 {
[24116]417 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
418 }
[3801]419
[24116]420 doc_meta_request.appendChild(doc_meta_param_list);
421 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
422 doc_meta_param_list.appendChild(doc_param);
423 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
424 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]425
[24116]426 // create the doc node list for the metadata request
[24812]427 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]428 doc_meta_request.appendChild(doc_list);
[3801]429
[24116]430 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
431 // the node we want is the root document node
[25355]432 if (document_id != null)
[24812]433 {
[25305]434 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]435 }
[25355]436 else
[24812]437 {
[25355]438 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
439 // can we assume that href is always a top level doc??
440 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]441 //doc_node.setAttribute("externalURL", has_rl);
[24116]442 }
443 doc_list.appendChild(doc_node);
[24889]444
[24116]445 Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]446 if (processErrorElements(dm_response_message, page_response))
447 {
[24116]448 return result;
449 }
[9874]450
[24812]451 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]452 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]453
[24116]454 // Merge the metadata with the structure information
455 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]456 for (int i = 0; i < doc_nodes.getLength(); i++)
457 {
[24116]458 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]459 }
[24116]460 // get the top level doc metadata out
[24812]461 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
462 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]463 GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]464
[24116]465 // Build a request to obtain some document content
466 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
[24812]467 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[24993]468 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]469 dc_message.appendChild(dc_request);
[5694]470
[24116]471 // Create a parameter list to specify the request parameters - empty for now
[24812]472 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
473 if (service_params != null)
474 {
[24116]475 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
476 }
[4858]477
[24116]478 dc_request.appendChild(dc_param_list);
479
480 // get the content
481 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]482 if (expand_document)
483 {
[24116]484 dc_request.appendChild(dm_doc_list);
[24812]485 }
486 else
487 {
[24116]488 dc_request.appendChild(basic_doc_list);
[4858]489 }
[25642]490 logger.debug("request = " + XMLConverter.getString(dc_message));
[24116]491 Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]492 if (processErrorElements(dc_response_message, page_response))
493 {
[24116]494 return result;
[4827]495 }
[3987]496
[24116]497 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]498
[24812]499 if (expand_document)
500 {
[24116]501 // Merge the content with the structure information
502 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]503 for (int i = 0; i < doc_nodes.getLength(); i++)
504 {
505 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
506 if (content != null)
507 {
508 if (highlight_query_terms)
509 {
510 content = highlightQueryTerms(request, (Element) content);
[24116]511 }
512 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
513 }
514 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
515 }
[24812]516 }
517 else
518 {
[24116]519 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
520 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
521 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]522 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]523
[24812]524 if (dc_response_doc_content == null)
525 {
[24116]526 // no content to add
[25355]527 if (dc_response_doc.getAttribute("external").equals("true"))
528 {
529
530 //if (dc_response_doc_external != null)
531 //{
[25305]532 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]533
[25305]534 the_document.setAttribute("selectedNode", href_id);
535 the_document.setAttribute("external", href_id);
[25355]536 }
537 return result;
[24116]538 }
[24812]539 if (highlight_query_terms)
540 {
[24116]541 dc_response_doc.removeChild(dc_response_doc_content);
[24812]542
[24116]543 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
544 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
545 }
[24812]546
547 if (provide_annotations)
548 {
549 String service_selected = (String) params.get(ENRICH_DOC_ARG);
550 if (service_selected != null && service_selected.equals("1"))
551 {
[24116]552 // now we can modifiy the response doc if needed
[24812]553 String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]554 // send a message to the service
555 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
[24993]556 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]557 enrich_message.appendChild(enrich_request);
558 // check for parameters
[24812]559 HashMap e_service_params = (HashMap) params.get("s1");
560 if (e_service_params != null)
561 {
562 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
563 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
[24116]564 enrich_request.appendChild(enrich_pl);
565 }
[24812]566 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]567 enrich_request.appendChild(e_doc_list);
568 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
[24812]569
[24116]570 Node enrich_response = this.mr.process(enrich_message);
[24812]571
572 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]573 path = GSPath.createPath(links);
[24812]574 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
575
576 }
[24116]577 } // if provide_annotations
[3987]578
[24116]579 // use the returned id rather than the sent one cos there may have
580 // been modifiers such as .pr that are removed.
581 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
582 the_document.setAttribute("selectedNode", modified_doc_id);
[24812]583 if (has_dummy)
584 {
[24116]585 // change the id if necessary and add the content
[24812]586 Element dummy_node = (Element) doc_nodes.item(0);
587
[24116]588 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
589 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
590 // hack for simple type
[24812]591 if (document_type.equals("simple"))
592 {
[24116]593 // we dont want the internal docNode, just want the content and metadata in the document
594 // rethink this!!
595 the_document.removeChild(dummy_node);
[4023]596
[24116]597 NodeList dummy_children = dummy_node.getChildNodes();
598 //for (int i=0; i<dummy_children.getLength(); i++) {
[24812]599 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
600 {
[24116]601 // special case as we don't want more than one metadata list
[24812]602 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
603 {
[24116]604 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]605 }
606 else
607 {
[24116]608 the_document.appendChild(dummy_children.item(i));
609 }
610 }
611 }
[24812]612 }
613 else
614 {
[24116]615 // Merge the document content with the metadata and structure information
[24812]616 for (int i = 0; i < doc_nodes.getLength(); i++)
617 {
[24116]618 Node dn = doc_nodes.item(i);
[24812]619 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
620 if (dn_id.equals(modified_doc_id))
621 {
[24116]622 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
623 break;
624 }
625 }
626 }
627 }
628 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
629 return result;
[3801]630 }
[24812]631
632 /**
633 * tell the param class what its arguments are if an action has its own
634 * arguments, this should add them to the params object - particularly
635 * important for args that should not be saved
636 */
[25305]637 public boolean addActionParameters(GSParams params)
[24812]638 {
[24116]639 params.addParameter(GOTO_PAGE_ARG, false);
640 params.addParameter(ENRICH_DOC_ARG, false);
[25305]641 params.addParameter(EXPAND_DOCUMENT_ARG, false);
642 params.addParameter(EXPAND_CONTENTS_ARG, false);
643 params.addParameter(REALISTIC_BOOK_ARG, false);
644
[24116]645 return true;
[4717]646 }
[4023]647
[24812]648 /**
649 * this method gets the collection description, the format info, the list of
650 * enrich services, etc - stuff that is needed for the page, but is the same
651 * whatever the query is - should be cached
652 */
[24993]653 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]654 {
[4023]655
[24116]656 // create a message to process - contains requests for the collection
657 // description, the format element, the enrich services on offer
658 // these could all be cached
659 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
660 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
661 // the format request - ignore for now, where does this request go to??
[24993]662 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]663 info_message.appendChild(format_request);
664
665 // the enrich_services request - only do this if provide_annotations is true
666
[24812]667 if (provide_annotations)
668 {
[24993]669 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]670 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
671 info_message.appendChild(enrich_services_request);
[4023]672 }
[24116]673
[24812]674 Element info_response = (Element) this.mr.process(info_message);
675
[24116]676 // the collection is the first response
677 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
678 Element format_resp = (Element) responses.item(0);
[24812]679
680 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
681 if (format_elem != null)
682 {
[25985]683 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
684 if(global_format_elem != null)
685 {
686 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
687 }
688
689 // set the format type
[24812]690 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[24116]691 page_response.appendChild(this.doc.importNode(format_elem, true));
[4023]692 }
[4287]693
[24812]694 if (provide_annotations)
695 {
696 Element services_resp = (Element) responses.item(1);
[4287]697
[24116]698 // a new message for the mr
699 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
700 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
701 boolean service_found = false;
[24812]702 for (int j = 0; j < e_services.getLength(); j++)
703 {
704 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
705 {
[24993]706 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]707 enrich_message.appendChild(s);
708 service_found = true;
709 }
710 }
[24812]711 if (service_found)
712 {
713 Element enrich_response = (Element) this.mr.process(enrich_message);
714
[24116]715 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
716 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]717 for (int i = 0; i < e_responses.getLength(); i++)
718 {
719 Element e_resp = (Element) e_responses.item(i);
720 Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]721 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
722 service_list.appendChild(e_service);
723 }
724 page_response.appendChild(service_list);
725 }
726 } // if provide_annotations
727 return true;
[24812]728
[9874]729 }
[4287]730
[25953]731 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
732 {
733 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
734 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
735 Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
736 ds_message.appendChild(ds_request);
[25816]737
[25953]738 // Create a parameter list to specify the required structure information
739 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
740 Element ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
741 ds_param_list.appendChild(ds_param);
742 ds_param.setAttribute(GSXML.NAME_ATT, "info");
743 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]744
[25953]745 ds_request.appendChild(ds_param_list);
[25816]746
[25953]747 // add the node list we created earlier
748 ds_request.appendChild(basic_doc_list);
749
750 // Process the document structure retrieve message
751 Element ds_response_message = (Element) this.mr.process(ds_message);
752 if (processErrorElements(ds_response_message, page_response))
753 {
754 return null;
755 }
756
757 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
758 String path = GSPath.createPath(links);
759 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
760 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
761 if (doctype_elem != null)
762 {
763 String doc_type = doctype_elem.getAttribute("value");
764 return doc_type;
765 }
766 return null;
767 }
768
[24812]769 /**
770 * this involves a bit of a hack to get the equivalent query terms - has to
771 * requery the query service - uses the last selected service name. (if it
772 * ends in query). should this action do the query or should it send a
773 * message to the query action? but that will involve lots of extra stuff.
[24889]774 * also doesn't handle phrases properly - just highlights all the terms
775 * found in the text.
[24812]776 */
777 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
778 {
[24116]779 // do the query again to get term info
[24812]780 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]781 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]782
783 HashMap previous_params = (HashMap) params.get("p");
784 if (previous_params == null)
785 {
[24116]786 return dc_response_doc_content;
787 }
[24812]788 String service_name = (String) previous_params.get(GSParams.SERVICE);
789 if (service_name == null || !service_name.endsWith("Query"))
790 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]791 logger.debug("invalid service, not doing highlighting");
792 return dc_response_doc_content;
793 }
[24812]794 String collection = (String) params.get(GSParams.COLLECTION);
[24993]795 UserContext userContext = new UserContext(request);
[24116]796 String to = GSPath.appendLink(collection, service_name);
[24812]797
[24116]798 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
[24993]799 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]800 mr_query_message.appendChild(mr_query_request);
[24812]801
[24116]802 // paramList
[24812]803 HashMap service_params = (HashMap) params.get("s1");
804
[24116]805 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
806 GSXML.addParametersToList(this.doc, query_param_list, service_params);
807 mr_query_request.appendChild(query_param_list);
[8731]808
[24116]809 // do the query
[24812]810 Element mr_query_response = (Element) this.mr.process(mr_query_message);
811
812 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]813 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]814 if (query_term_list_element == null)
815 {
[24116]816 // no term info
817 logger.error("No query term information.\n");
818 return dc_response_doc_content;
819 }
[8731]820
[24116]821 String content = GSXML.getNodeText(dc_response_doc_content);
[4287]822
[24812]823 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]824 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]825
[25635]826 HashSet<String> query_term_variants = new HashSet<String>();
[24116]827 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]828 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
[24116]829 {
830 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]831 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]832 {
[24812]833 for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]834 {
[24812]835 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]836 String termValueU = null;
837 String termValueL = null;
[24812]838
839 if (termValue.length() > 1)
[24116]840 {
841 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
842 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
843 }
844 else
845 {
846 termValueU = termValue.substring(0, 1).toUpperCase();
847 termValueL = termValue.substring(0, 1).toLowerCase();
848 }
[24812]849
[24116]850 query_term_variants.add(termValueU);
851 query_term_variants.add(termValueL);
852 }
853 }
854 }
855 else
856 {
[24812]857 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
858 {
[24116]859 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
860 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]861 for (int j = 0; j < equivalent_terms.length; j++)
862 {
[24116]863 query_term_variants.add(equivalent_terms[j]);
864 }
865 }
866 }
[4287]867
[25635]868 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]869
[24116]870 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
871 String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]872
[25635]873 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]874 int term_start = 0;
875 boolean in_term = false;
876 boolean in_phrase = false;
[24812]877 for (int i = 0; i < performed_query.length(); i++)
878 {
[24116]879 char character = performed_query.charAt(i);
880 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
881
882 // Has a query term just started?
[24812]883 if (in_term == false && is_character_letter_or_digit == true)
884 {
[24116]885 in_term = true;
886 term_start = i;
887 }
888
889 // Or has a term just finished?
[24812]890 else if (in_term == true && is_character_letter_or_digit == false)
891 {
[24116]892 in_term = false;
893 String term = performed_query.substring(term_start, i);
[24812]894
[24116]895 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]896 if (term_element != null)
897 {
898
[25635]899 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]900
[24116]901 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]902 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
[24116]903 {
904 String termValueU = null;
905 String termValueL = null;
[24812]906
907 if (term.length() > 1)
[24116]908 {
909 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
910 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
911 }
912 else
913 {
914 termValueU = term.substring(0, 1).toUpperCase();
915 termValueL = term.substring(0, 1).toLowerCase();
916 }
[24812]917
[24116]918 phrase_query_p_term_x_variants.add(termValueU);
919 phrase_query_p_term_x_variants.add(termValueL);
920 }
921 else
922 {
[24812]923 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
924 {
[24116]925 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
926 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]927 for (int k = 0; k < term_equivalent_terms.length; k++)
928 {
[24116]929 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
930 }
931 }
932 }
933 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]934
935 if (in_phrase == false)
936 {
[24116]937 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]938 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]939 }
940 }
[9007]941 }
[24116]942 // Watch for phrases (surrounded by quotes)
[24812]943 if (character == '\"')
944 {
[24116]945 // Has a phrase just started?
[24812]946 if (in_phrase == false)
947 {
[24116]948 in_phrase = true;
949 }
950 // Or has a phrase just finished?
[24812]951 else if (in_phrase == true)
952 {
[24116]953 in_phrase = false;
954 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
955 }
956
[25635]957 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]958 }
[4287]959 }
[8731]960
[24116]961 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]962 }
963
[24116]964 /**
[24812]965 * Highlights query terms in a piece of text.
966 */
[25635]967 private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]968 {
969 // Convert the content string to an array of characters for speed
970 char[] content_characters = new char[content.length()];
971 content.getChars(0, content.length(), content_characters, 0);
[8731]972
[24116]973 // Now skim through the content, identifying word matches
[25635]974 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]975 int word_start = 0;
976 boolean in_word = false;
977 boolean preceding_word_matched = false;
[24813]978 boolean inTag = false;
[24812]979 for (int i = 0; i < content_characters.length; i++)
980 {
[24813]981 //We don't want to find words inside HTML tags
[24993]982 if (content_characters[i] == '<')
[24813]983 {
984 inTag = true;
985 continue;
986 }
987 else if (inTag && content_characters[i] == '>')
988 {
989 inTag = false;
990 }
991 else if (inTag)
992 {
993 continue;
994 }
[24993]995
[24116]996 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]997
[24116]998 // Has a word just started?
[24812]999 if (in_word == false && is_character_letter_or_digit == true)
1000 {
[24116]1001 in_word = true;
1002 word_start = i;
1003 }
[8731]1004
[24116]1005 // Or has a word just finished?
[24812]1006 else if (in_word == true && is_character_letter_or_digit == false)
1007 {
[24116]1008 in_word = false;
[8731]1009
[24116]1010 // Check if the word matches any of the query term equivalents
1011 String word = new String(content_characters, word_start, (i - word_start));
[24812]1012 if (query_term_variants.contains(word))
1013 {
[24116]1014 // We have found a matching word, so remember its location
1015 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1016 preceding_word_matched = true;
1017 }
[24812]1018 else
1019 {
[24116]1020 preceding_word_matched = false;
1021 }
1022 }
1023 }
[8731]1024
[24116]1025 // Don't forget the last word...
[24812]1026 if (in_word == true)
1027 {
[24116]1028 // Check if the word matches any of the query term equivalents
1029 String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]1030 if (query_term_variants.contains(word))
1031 {
[24116]1032 // We have found a matching word, so remember its location
1033 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1034 }
[8731]1035 }
1036
[25635]1037 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1038 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]1039
[24116]1040 // Deal with phrases now
[25635]1041 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]1042 for (int i = 0; i < word_matches.size(); i++)
1043 {
[25635]1044 WordMatch word_match = word_matches.get(i);
[8731]1045
[24116]1046 // See if any partial phrase matches are extended by this word
[24812]1047 if (word_match.preceding_word_matched)
1048 {
1049 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1050 {
[25635]1051 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1052 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]1053 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]1054 if (phrase_query_p_term_x_variants.contains(word_match.word))
1055 {
[24116]1056 partial_phrase_match.num_words_matched++;
[8731]1057
[24116]1058 // Has a complete phrase match occurred?
[24812]1059 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1060 {
[24116]1061 // Check for overlaps by looking at the previous highlight range
[24812]1062 if (!highlight_end_positions.isEmpty())
1063 {
[24116]1064 int last_highlight_index = highlight_end_positions.size() - 1;
[25635]1065 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]1066 if (last_highlight_end > partial_phrase_match.start_position)
1067 {
[24116]1068 // There is an overlap, so remove the previous phrase match
[25635]1069 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]1070 highlight_end_positions.remove(last_highlight_index);
1071 partial_phrase_match.start_position = last_highlight_start;
1072 }
1073 }
[8731]1074
[24116]1075 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1076 highlight_end_positions.add(new Integer(word_match.end_position));
1077 }
1078 // No, but add the partial match back into the list for next time
[24812]1079 else
1080 {
[24116]1081 partial_phrase_matches.add(partial_phrase_match);
1082 }
1083 }
1084 }
1085 }
[24812]1086 else
1087 {
[24116]1088 partial_phrase_matches.clear();
1089 }
[8731]1090
[24116]1091 // See if this word is at the start of any of the phrases
[24812]1092 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1093 {
[25635]1094 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[24116]1095 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]1096 if (phrase_query_p_term_1_variants.contains(word_match.word))
1097 {
[24116]1098 // If this phrase is just one word long, we have a complete match
[24812]1099 if (phrase_query_p_term_variants_list.size() == 1)
1100 {
[24116]1101 highlight_start_positions.add(new Integer(word_match.start_position));
1102 highlight_end_positions.add(new Integer(word_match.end_position));
1103 }
1104 // Otherwise we have the start of a potential phrase match
[24812]1105 else
1106 {
[24116]1107 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1108 }
1109 }
1110 }
[4287]1111 }
[4717]1112
[24116]1113 // Now add the annotation tags into the document at the correct points
1114 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]1115
[24116]1116 int last_wrote = 0;
[24812]1117 for (int i = 0; i < highlight_start_positions.size(); i++)
1118 {
[25635]1119 int highlight_start = highlight_start_positions.get(i).intValue();
1120 int highlight_end = highlight_end_positions.get(i).intValue();
[8731]1121
[24116]1122 // Print anything before the highlight range
[24812]1123 if (last_wrote < highlight_start)
1124 {
[24116]1125 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1126 content_element.appendChild(this.doc.createTextNode(preceding_text));
1127 }
[8731]1128
[24116]1129 // Print the highlight text, annotated
[24812]1130 if (highlight_end > last_wrote)
1131 {
[24116]1132 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1133 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1134 annotation_element.setAttribute("type", "query_term");
1135 content_element.appendChild(annotation_element);
1136 last_wrote = highlight_end;
1137 }
1138 }
[8731]1139
[24116]1140 // Finish off any unwritten text
[24812]1141 if (last_wrote < content_characters.length)
1142 {
[24116]1143 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1144 content_element.appendChild(this.doc.createTextNode(remaining_text));
1145 }
1146
1147 return content_element;
[8731]1148 }
1149
[24116]1150 static private class WordMatch
1151 {
1152 public String word;
1153 public int start_position;
1154 public int end_position;
1155 public boolean preceding_word_matched;
[8731]1156
[24116]1157 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1158 {
1159 this.word = word;
1160 this.start_position = start_position;
1161 this.end_position = end_position;
1162 this.preceding_word_matched = preceding_word_matched;
1163 }
[8731]1164 }
1165
[24116]1166 static private class PartialPhraseMatch
1167 {
1168 public int start_position;
1169 public int query_phrase_number;
1170 public int num_words_matched;
[8731]1171
[24116]1172 public PartialPhraseMatch(int start_position, int query_phrase_number)
1173 {
1174 this.start_position = start_position;
1175 this.query_phrase_number = query_phrase_number;
1176 this.num_words_matched = 1;
1177 }
[8731]1178 }
[3645]1179}
Note: See TracBrowser for help on using the repository browser.