source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 30554

Last change on this file since 30554 was 30554, checked in by kjdon, 8 years ago

undoing last commit which was made by mistake

  • Property svn:keywords set to Author Date Id Revision
File size: 46.0 KB
RevLine 
[3801]1/*
[24812]2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
[3645]19package org.greenstone.gsdl3.action;
20
[3801]21// Greenstone classes
[3645]22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
[3801]24
[3645]25// XML classes
[24812]26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
[4287]29import org.w3c.dom.Text;
[3801]30import org.w3c.dom.NodeList;
[3645]31
[3801]32// General Java classes
[8731]33import java.util.ArrayList;
[3645]34import java.util.HashMap;
[4287]35import java.util.HashSet;
[3645]36import java.io.File;
[25635]37import java.io.Serializable;
[3645]38
[13124]39import org.apache.log4j.*;
[3801]40
[24812]41/** Action class for retrieving Documents via the message router */
42public class DocumentAction extends Action
43{
[13124]44
[24116]45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]46
[24116]47 // this is used to specify that the sibling nodes of a selected one should be obtained
48 public static final String SIBLING_ARG = "sib";
49 public static final String GOTO_PAGE_ARG = "gp";
50 public static final String ENRICH_DOC_ARG = "end";
[25305]51 public static final String EXPAND_DOCUMENT_ARG = "ed";
52 public static final String EXPAND_CONTENTS_ARG = "ec";
53 public static final String REALISTIC_BOOK_ARG = "book";
[24812]54
55 /**
56 * if this is set to true, when a document is displayed, any annotation type
57 * services (enrich) will be offered to the user as well
58 */
59 protected boolean provide_annotations = false;
60
[24116]61 protected boolean highlight_query_terms = false;
[5694]62
[24812]63 public boolean configure()
64 {
[24116]65 super.configure();
[24812]66 String highlight = (String) config_params.get("highlightQueryTerms");
67 if (highlight != null && highlight.equals("true"))
68 {
[24116]69 highlight_query_terms = true;
70 }
[24812]71 String annotate = (String) config_params.get("displayAnnotationService");
72 if (annotate != null && annotate.equals("true"))
73 {
[24116]74 provide_annotations = true;
75 }
[25953]76 return true;
77 }
[24812]78
79 public Node process(Node message_node)
[24116]80 {
81 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]82
[28964]83 Element message = GSXML.nodeToElement(message_node);
[28382]84 Document doc = message.getOwnerDocument();
85
[24116]86 // the response
[28382]87 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
88 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
[24116]89 result.appendChild(page_response);
[19984]90
[24116]91 // get the request - assume only one
[24812]92 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
93 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]94 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]95
[24116]96 // just in case there are some that need to get passed to the services
[24812]97 HashMap service_params = (HashMap) params.get("s0");
[4717]98
[24116]99 String collection = (String) params.get(GSParams.COLLECTION);
[25305]100 String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]101 if (document_id != null && document_id.equals(""))
102 {
103 document_id = null;
[25305]104 }
105 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]106 if (href != null && href.equals(""))
107 {
108 href = null;
[25305]109 }
110 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
111 if (document_id == null && href == null)
[24812]112 {
[24116]113 logger.error("no document specified!");
114 return result;
115 }
[25355]116 if (rl != null && rl.equals("0"))
117 {
118 // this is a true external link, we should have been directed to a different page or action
119 logger.error("rl value was 0, shouldn't get here");
120 return result;
[25305]121 }
[29521]122
123 UserContext userContext = new UserContext(request);
124
125 //append site metadata
126 addSiteMetadata(page_response, userContext);
127 addInterfaceOptions(page_response);
128
129 // get the additional data needed for the page
130 getBackgroundData(page_response, collection, userContext);
131 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
132
133 if (format_elem != null) {
134 // lets look for param defaults set in config file
135 NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
136 for (int i=0; i<param_defaults.getLength(); i++) {
137 Element p = (Element)param_defaults.item(i);
138 String name = p.getAttribute(GSXML.NAME_ATT);
139 if (params.get(name) ==null) {
140 // wasn't set from interface
141 String value = p.getAttribute(GSXML.VALUE_ATT);
142 params.put(name, value );
143 // also add into request param xml so that xslt knows it too
144 GSXML.addParameterToList(cgi_paramList, name, value);
145 }
146 }
147 }
[24116]148 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
[25816]149 if (document_type != null && document_type.equals(""))
[24812]150 {
[25953]151 //document_type = "hierarchy";
152 document_type = null; // we'll get it later if not already specified
[24116]153 }
154 //whether to retrieve siblings or not
155 boolean get_siblings = false;
156 String sibs = (String) params.get(SIBLING_ARG);
[24812]157 if (sibs != null && sibs.equals("1"))
158 {
[24116]159 get_siblings = true;
160 }
[24812]161
[25305]162 String doc_id_modifier = "";
[24116]163 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]164 if (sibling_num != null && !sibling_num.equals(""))
165 {
[24116]166 // we have to modify the doc name
[25355]167 doc_id_modifier = "." + sibling_num + ".ss";
[24116]168 }
[24812]169
[24116]170 boolean expand_document = false;
[25305]171 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]172 if (ed_arg != null && ed_arg.equals("1"))
173 {
[24116]174 expand_document = true;
175 }
[14525]176
[24116]177 boolean expand_contents = false;
[24812]178 if (expand_document)
179 { // we always expand the contents with the text
[24116]180 expand_contents = true;
[24812]181 }
182 else
183 {
[25305]184 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]185 if (ec_arg != null && ec_arg.equals("1"))
186 {
[24116]187 expand_contents = true;
188 }
[5694]189 }
[25355]190
[29521]191 // UserContext userContext = new UserContext(request);
[4257]192
[29521]193 // //append site metadata
194 // addSiteMetadata(page_response, userContext);
195 // addInterfaceOptions(page_response);
[3801]196
[29521]197 // // get the additional data needed for the page
198 // getBackgroundData(page_response, collection, userContext);
199 // Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
[24812]200
[24116]201 // the_document is where all the doc info - structure and metadata etc
202 // is added into, to be returned in the page
[28382]203 Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
[24116]204 page_response.appendChild(the_document);
[9874]205
[24116]206 // create a basic doc list containing the current node
[28382]207 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
208 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]209 basic_doc_list.appendChild(current_doc);
[25305]210 if (document_id != null)
[24812]211 {
[25355]212 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]213 }
[25355]214 else
[24812]215 {
[25305]216 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
217 // do we need this??
218 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]219 }
[3801]220
[25953]221 if (document_type == null)
222 {
223 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[25816]224 }
[29439]225 if (document_type == null)
[25953]226 {
[29439]227 logger.error("doctype is null!!!***********");
228 document_type = GSXML.DOC_TYPE_SIMPLE;
[25816]229 }
[29439]230
231 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
232
[25816]233
[24116]234 // Create a parameter list to specify the required structure information
[28382]235 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]236
237 if (service_params != null)
238 {
[28964]239 GSXML.addParametersToList(ds_param_list, service_params);
[24116]240 }
[3817]241
[24812]242 Element ds_param = null;
[24116]243 boolean get_structure = false;
244 boolean get_structure_info = false;
[24889]245 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]246 {
[24116]247 get_structure_info = true;
[24889]248
249 if (expand_contents)
250 {
[28382]251 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]252 ds_param_list.appendChild(ds_param);
253 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
254 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
255 }
256
[25305]257 // get the info needed for paged naviagtion
[28382]258 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]259 ds_param_list.appendChild(ds_param);
260 ds_param.setAttribute(GSXML.NAME_ATT, "info");
261 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
[28382]262 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]263 ds_param_list.appendChild(ds_param);
264 ds_param.setAttribute(GSXML.NAME_ATT, "info");
265 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
[28382]266 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]267 ds_param_list.appendChild(ds_param);
268 ds_param.setAttribute(GSXML.NAME_ATT, "info");
269 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]270
[24889]271 if (get_siblings)
272 {
[28382]273 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]274 ds_param_list.appendChild(ds_param);
275 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
277 }
278
[24812]279 }
[28258]280 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) || document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
[24812]281 {
[24116]282 get_structure = true;
[24812]283 if (expand_contents)
284 {
[28382]285 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]286 ds_param_list.appendChild(ds_param);
287 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
288 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]289 }
290 else
291 {
[24116]292 // get the info needed for table of contents
[28382]293 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]294 ds_param_list.appendChild(ds_param);
295 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
296 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
[28382]297 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]298 ds_param_list.appendChild(ds_param);
299 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
300 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]301 if (get_siblings)
302 {
[28382]303 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]304 ds_param_list.appendChild(ds_param);
305 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
306 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
307 }
308 }
[24812]309 }
310 else
311 {
[24116]312 // we dont need any structure
313 }
[3801]314
[24116]315 boolean has_dummy = false;
[24812]316 if (get_structure || get_structure_info)
317 {
[8676]318
[24116]319 // Build a request to obtain the document structure
[28382]320 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]321 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]322 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]323 ds_message.appendChild(ds_request);
324 ds_request.appendChild(ds_param_list);
[24812]325
[25816]326 // add the node list we created earlier
[24116]327 ds_request.appendChild(basic_doc_list);
[24812]328
[24116]329 // Process the document structure retrieve message
330 Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]331 if (processErrorElements(ds_response_message, page_response))
332 {
[24116]333 return result;
334 }
[4030]335
[24116]336 // get the info and print out
[24812]337 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]338 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
339 path = GSPath.appendLink(path, "nodeStructureInfo");
340 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
341 // get the doc_node bit
[24812]342 if (ds_response_struct_info != null)
343 {
[28382]344 the_document.appendChild(doc.importNode(ds_response_struct_info, true));
[24116]345 }
[24812]346 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]347 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
348 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
349 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]350
351 if (ds_response_structure != null)
352 {
[24116]353 // add the contents of the structure bit into the_document
354 NodeList structs = ds_response_structure.getChildNodes();
[24812]355 for (int i = 0; i < structs.getLength(); i++)
356 {
[28382]357 the_document.appendChild(doc.importNode(structs.item(i), true));
[24116]358 }
[24812]359 }
360 else
361 {
[24116]362 // no structure nodes, so put in a dummy doc node
[28382]363 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]364 if (document_id != null)
[24812]365 {
[25305]366 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]367 }
[25355]368 else
[24812]369 {
[25305]370 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]371
[24116]372 }
373 the_document.appendChild(doc_node);
374 has_dummy = true;
375 }
[24812]376 }
377 else
378 { // a simple type - we dont have a dummy node for simple
[24116]379 // should think about this more
380 // no structure request, so just put in a dummy doc node
[28382]381 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]382 if (document_id != null)
[24812]383 {
[25305]384 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]385 }
[25355]386 else
[24812]387 {
[25305]388 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]389 }
390 the_document.appendChild(doc_node);
391 has_dummy = true;
392 }
[24812]393
[24116]394 // Build a request to obtain some document metadata
[28382]395 Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]396 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[28382]397 Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]398 dm_message.appendChild(dm_request);
399 // Create a parameter list to specify the required metadata information
[24812]400
[25635]401 HashSet<String> meta_names = new HashSet<String>();
[24116]402 meta_names.add("Title"); // the default
[24812]403 if (format_elem != null)
404 {
[24889]405 getRequiredMetadataNames(format_elem, meta_names);
[24116]406 }
[28258]407
[26026]408 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
[28258]409 if (extraMetaListElem != null)
[26026]410 {
411 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
[28258]412 for (int i = 0; i < extraMetaList.getLength(); i++)
[26026]413 {
[28258]414 meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
[26026]415 }
416 }
[24812]417
[28382]418 Element dm_param_list = createMetadataParamList(doc,meta_names);
[24812]419 if (service_params != null)
420 {
[28964]421 GSXML.addParametersToList(dm_param_list, service_params);
[24116]422 }
[24812]423
[24116]424 dm_request.appendChild(dm_param_list);
[24812]425
[24116]426 // create the doc node list for the metadata request
[28382]427 Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]428 dm_request.appendChild(dm_doc_list);
[4030]429
[24116]430 // Add each node from the structure response into the metadata request
431 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]432 for (int i = 0; i < doc_nodes.getLength(); i++)
433 {
[24116]434 Element doc_node = (Element) doc_nodes.item(i);
435 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]436
[24116]437 // Add the documentNode to the list
[28382]438 Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]439 dm_doc_list.appendChild(dm_doc_node);
440 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]441 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[29922]442 if (document_id == null){
443 dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
444 }
445
[24116]446 }
[3801]447
[24116]448 // we also want a metadata request to the top level document to get
449 // assocfilepath - this could be cached too
[28382]450 Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]451 dm_message.appendChild(doc_meta_request);
[28382]452 Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]453 if (service_params != null)
454 {
[28964]455 GSXML.addParametersToList(doc_meta_param_list, service_params);
[24116]456 }
[3801]457
[24116]458 doc_meta_request.appendChild(doc_meta_param_list);
[28382]459 Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]460 doc_meta_param_list.appendChild(doc_param);
461 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
462 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]463
[24116]464 // create the doc node list for the metadata request
[28382]465 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]466 doc_meta_request.appendChild(doc_list);
[3801]467
[28382]468 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]469 // the node we want is the root document node
[25355]470 if (document_id != null)
[24812]471 {
[25305]472 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]473 }
[29922]474 /*else
[24812]475 {
[25355]476 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
477 // can we assume that href is always a top level doc??
478 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]479 //doc_node.setAttribute("externalURL", has_rl);
[29922]480 }*/
[24116]481 doc_list.appendChild(doc_node);
[24889]482
[24116]483 Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]484 if (processErrorElements(dm_response_message, page_response))
485 {
[24116]486 return result;
487 }
[9874]488
[24812]489 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]490 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]491
[24116]492 // Merge the metadata with the structure information
493 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]494 for (int i = 0; i < doc_nodes.getLength(); i++)
495 {
[24116]496 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]497 }
[24116]498 // get the top level doc metadata out
[24812]499 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
500 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]501 GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]502
[24116]503 // Build a request to obtain some document content
[28382]504 Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]505 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[28382]506 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]507 dc_message.appendChild(dc_request);
[5694]508
[24116]509 // Create a parameter list to specify the request parameters - empty for now
[28382]510 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]511 if (service_params != null)
512 {
[28964]513 GSXML.addParametersToList(dc_param_list, service_params);
[24116]514 }
[4858]515
[24116]516 dc_request.appendChild(dc_param_list);
517
518 // get the content
519 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]520 if (expand_document)
521 {
[24116]522 dc_request.appendChild(dm_doc_list);
[24812]523 }
524 else
525 {
[24116]526 dc_request.appendChild(basic_doc_list);
[4858]527 }
[25642]528 logger.debug("request = " + XMLConverter.getString(dc_message));
[24116]529 Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]530 if (processErrorElements(dc_response_message, page_response))
531 {
[24116]532 return result;
[4827]533 }
[3987]534
[24116]535 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]536
[24812]537 if (expand_document)
538 {
[24116]539 // Merge the content with the structure information
540 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]541 for (int i = 0; i < doc_nodes.getLength(); i++)
542 {
543 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
544 if (content != null)
545 {
546 if (highlight_query_terms)
547 {
[30554]548 content = highlightQueryTerms(request, (Element) content);
[24116]549 }
[28382]550 doc_nodes.item(i).appendChild(doc.importNode(content, true));
[24116]551 }
552 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
553 }
[29521]554 if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
555 Element dummy_node = (Element) doc_nodes.item(0);
556 the_document.removeChild(dummy_node);
557 the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
558 NodeList dummy_children = dummy_node.getChildNodes();
559 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
560 {
561 // special case as we don't want more than one metadata list
562 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
563 {
564 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
565 }
566 else
567 {
568 the_document.appendChild(dummy_children.item(i));
569 }
570 }
571 }
[24812]572 }
573 else
574 {
[24116]575 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
576 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
577 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]578 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]579
[24812]580 if (dc_response_doc_content == null)
581 {
[24116]582 // no content to add
[25355]583 if (dc_response_doc.getAttribute("external").equals("true"))
584 {
585
586 //if (dc_response_doc_external != null)
587 //{
[25305]588 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]589
[25305]590 the_document.setAttribute("selectedNode", href_id);
591 the_document.setAttribute("external", href_id);
[25355]592 }
593 return result;
[24116]594 }
[24812]595 if (highlight_query_terms)
596 {
[24116]597 dc_response_doc.removeChild(dc_response_doc_content);
[24812]598
[30554]599 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
[24116]600 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
601 }
[24812]602
603 if (provide_annotations)
604 {
605 String service_selected = (String) params.get(ENRICH_DOC_ARG);
606 if (service_selected != null && service_selected.equals("1"))
607 {
[24116]608 // now we can modifiy the response doc if needed
[24812]609 String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]610 // send a message to the service
[28382]611 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
612 Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]613 enrich_message.appendChild(enrich_request);
614 // check for parameters
[24812]615 HashMap e_service_params = (HashMap) params.get("s1");
616 if (e_service_params != null)
617 {
[28382]618 Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]619 GSXML.addParametersToList(enrich_pl, e_service_params);
[24116]620 enrich_request.appendChild(enrich_pl);
621 }
[28382]622 Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]623 enrich_request.appendChild(e_doc_list);
[28382]624 e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
[24812]625
[24116]626 Node enrich_response = this.mr.process(enrich_message);
[24812]627
628 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]629 path = GSPath.createPath(links);
[24812]630 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
631
632 }
[24116]633 } // if provide_annotations
[3987]634
[24116]635 // use the returned id rather than the sent one cos there may have
636 // been modifiers such as .pr that are removed.
637 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
638 the_document.setAttribute("selectedNode", modified_doc_id);
[24812]639 if (has_dummy)
640 {
[24116]641 // change the id if necessary and add the content
[24812]642 Element dummy_node = (Element) doc_nodes.item(0);
643
[24116]644 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[28382]645 dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]646 // hack for simple type
[26140]647 if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
[24812]648 {
[24116]649 // we dont want the internal docNode, just want the content and metadata in the document
650 // rethink this!!
651 the_document.removeChild(dummy_node);
[4023]652
[24116]653 NodeList dummy_children = dummy_node.getChildNodes();
654 //for (int i=0; i<dummy_children.getLength(); i++) {
[24812]655 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
656 {
[24116]657 // special case as we don't want more than one metadata list
[24812]658 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
659 {
[24116]660 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]661 }
662 else
663 {
[24116]664 the_document.appendChild(dummy_children.item(i));
665 }
666 }
667 }
[28258]668
[26140]669 the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[24812]670 }
671 else
672 {
[24116]673 // Merge the document content with the metadata and structure information
[24812]674 for (int i = 0; i < doc_nodes.getLength(); i++)
675 {
[24116]676 Node dn = doc_nodes.item(i);
[24812]677 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
678 if (dn_id.equals(modified_doc_id))
679 {
[28382]680 dn.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]681 break;
682 }
683 }
684 }
685 }
[29307]686 //logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
[24116]687 return result;
[3801]688 }
[24812]689
690 /**
691 * tell the param class what its arguments are if an action has its own
692 * arguments, this should add them to the params object - particularly
693 * important for args that should not be saved
694 */
[25305]695 public boolean addActionParameters(GSParams params)
[24812]696 {
[24116]697 params.addParameter(GOTO_PAGE_ARG, false);
698 params.addParameter(ENRICH_DOC_ARG, false);
[25305]699 params.addParameter(EXPAND_DOCUMENT_ARG, false);
700 params.addParameter(EXPAND_CONTENTS_ARG, false);
701 params.addParameter(REALISTIC_BOOK_ARG, false);
702
[24116]703 return true;
[4717]704 }
[4023]705
[24812]706 /**
707 * this method gets the collection description, the format info, the list of
708 * enrich services, etc - stuff that is needed for the page, but is the same
709 * whatever the query is - should be cached
710 */
[24993]711 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]712 {
[28382]713 Document doc = page_response.getOwnerDocument();
714
[24116]715 // create a message to process - contains requests for the collection
716 // description, the format element, the enrich services on offer
717 // these could all be cached
[28382]718 Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]719 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
720 // the format request - ignore for now, where does this request go to??
[28382]721 Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]722 info_message.appendChild(format_request);
723
724 // the enrich_services request - only do this if provide_annotations is true
725
[24812]726 if (provide_annotations)
727 {
[28382]728 Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]729 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
730 info_message.appendChild(enrich_services_request);
[4023]731 }
[24116]732
[24812]733 Element info_response = (Element) this.mr.process(info_message);
734
[24116]735 // the collection is the first response
736 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
737 Element format_resp = (Element) responses.item(0);
[24812]738
739 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
740 if (format_elem != null)
741 {
[25985]742 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
[28258]743 if (global_format_elem != null)
[25985]744 {
745 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
746 }
747
748 // set the format type
[24812]749 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[28382]750 page_response.appendChild(doc.importNode(format_elem, true));
[4023]751 }
[4287]752
[24812]753 if (provide_annotations)
754 {
755 Element services_resp = (Element) responses.item(1);
[4287]756
[24116]757 // a new message for the mr
[28382]758 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]759 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
760 boolean service_found = false;
[24812]761 for (int j = 0; j < e_services.getLength(); j++)
762 {
763 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
764 {
[28382]765 Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]766 enrich_message.appendChild(s);
767 service_found = true;
768 }
769 }
[24812]770 if (service_found)
771 {
772 Element enrich_response = (Element) this.mr.process(enrich_message);
773
[24116]774 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
[28382]775 Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]776 for (int i = 0; i < e_responses.getLength(); i++)
777 {
778 Element e_resp = (Element) e_responses.item(i);
[28382]779 Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]780 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
781 service_list.appendChild(e_service);
782 }
783 page_response.appendChild(service_list);
784 }
785 } // if provide_annotations
786 return true;
[24812]787
[9874]788 }
[4287]789
[25953]790 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
791 {
[28382]792 Document doc = basic_doc_list.getOwnerDocument();
793
794 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[25953]795 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]796 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[25953]797 ds_message.appendChild(ds_request);
[25816]798
[25953]799 // Create a parameter list to specify the required structure information
[28382]800 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
801 Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
[25953]802 ds_param_list.appendChild(ds_param);
803 ds_param.setAttribute(GSXML.NAME_ATT, "info");
804 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]805
[25953]806 ds_request.appendChild(ds_param_list);
[25816]807
[25953]808 // add the node list we created earlier
809 ds_request.appendChild(basic_doc_list);
810
811 // Process the document structure retrieve message
812 Element ds_response_message = (Element) this.mr.process(ds_message);
813 if (processErrorElements(ds_response_message, page_response))
814 {
815 return null;
816 }
817
818 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
819 String path = GSPath.createPath(links);
820 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
[29439]821 if (info_elem == null) {
822 return null;
823 }
[25953]824 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
825 if (doctype_elem != null)
826 {
827 String doc_type = doctype_elem.getAttribute("value");
828 return doc_type;
829 }
830 return null;
831 }
832
[24812]833 /**
834 * this involves a bit of a hack to get the equivalent query terms - has to
835 * requery the query service - uses the last selected service name. (if it
836 * ends in query). should this action do the query or should it send a
837 * message to the query action? but that will involve lots of extra stuff.
[24889]838 * also doesn't handle phrases properly - just highlights all the terms
839 * found in the text.
[24812]840 */
[30554]841 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
[24812]842 {
[28382]843 Document doc = request.getOwnerDocument();
844
[24116]845 // do the query again to get term info
[24812]846 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]847 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]848
849 HashMap previous_params = (HashMap) params.get("p");
850 if (previous_params == null)
851 {
[24116]852 return dc_response_doc_content;
853 }
[24812]854 String service_name = (String) previous_params.get(GSParams.SERVICE);
855 if (service_name == null || !service_name.endsWith("Query"))
856 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]857 logger.debug("invalid service, not doing highlighting");
858 return dc_response_doc_content;
859 }
[24812]860 String collection = (String) params.get(GSParams.COLLECTION);
[24993]861 UserContext userContext = new UserContext(request);
[24116]862 String to = GSPath.appendLink(collection, service_name);
[24812]863
[28382]864 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
865 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]866 mr_query_message.appendChild(mr_query_request);
[24812]867
[24116]868 // paramList
[24812]869 HashMap service_params = (HashMap) params.get("s1");
870
[28382]871 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]872 GSXML.addParametersToList(query_param_list, service_params);
[30049]873 GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
[24116]874 mr_query_request.appendChild(query_param_list);
[8731]875
[24116]876 // do the query
[24812]877 Element mr_query_response = (Element) this.mr.process(mr_query_message);
[30049]878
879 String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
880 Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
881 if (highlighted_Node != null)
882 {
[30056]883 // Build a request to process highlighted text
884
885 Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
886 to = GSPath.appendLink(collection, "DocumentContentRetrieve");
887 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
888 hl_message.appendChild(dc_request);
889
890 // Create a parameter list to specify the request parameters - empty for now
891 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
892 dc_request.appendChild(dc_param_list);
893
894 // get the content
895 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
896 dc_request.appendChild(doc_list);
897 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
898 doc_list.appendChild(current_doc);
899 current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
900 //Append highlighted content to request for processing
901 dc_request.appendChild(doc.importNode(highlighted_Node, true));
902
903 Element hl_response_message = (Element) this.mr.process(hl_message);
904 //Get results
905 NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
906 Element content = (Element) contentList.item(0);
907 return content;
[30049]908 }
[24812]909
910 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]911 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]912 if (query_term_list_element == null)
913 {
[24116]914 // no term info
915 logger.error("No query term information.\n");
916 return dc_response_doc_content;
917 }
[8731]918
[24116]919 String content = GSXML.getNodeText(dc_response_doc_content);
[4287]920
[24812]921 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]922 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]923
[25635]924 HashSet<String> query_term_variants = new HashSet<String>();
[24116]925 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]926 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
[24116]927 {
928 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]929 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]930 {
[24812]931 for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]932 {
[24812]933 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]934 String termValueU = null;
935 String termValueL = null;
[24812]936
937 if (termValue.length() > 1)
[24116]938 {
939 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
940 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
941 }
942 else
943 {
944 termValueU = termValue.substring(0, 1).toUpperCase();
945 termValueL = termValue.substring(0, 1).toLowerCase();
946 }
[24812]947
[24116]948 query_term_variants.add(termValueU);
949 query_term_variants.add(termValueL);
950 }
951 }
952 }
953 else
954 {
[24812]955 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
956 {
[24116]957 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
958 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]959 for (int j = 0; j < equivalent_terms.length; j++)
960 {
[24116]961 query_term_variants.add(equivalent_terms[j]);
962 }
963 }
964 }
[4287]965
[25635]966 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]967
[24116]968 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
969 String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]970
[25635]971 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]972 int term_start = 0;
973 boolean in_term = false;
974 boolean in_phrase = false;
[24812]975 for (int i = 0; i < performed_query.length(); i++)
976 {
[24116]977 char character = performed_query.charAt(i);
978 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
979
980 // Has a query term just started?
[24812]981 if (in_term == false && is_character_letter_or_digit == true)
982 {
[24116]983 in_term = true;
984 term_start = i;
985 }
986
987 // Or has a term just finished?
[24812]988 else if (in_term == true && is_character_letter_or_digit == false)
989 {
[24116]990 in_term = false;
991 String term = performed_query.substring(term_start, i);
[24812]992
[24116]993 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]994 if (term_element != null)
995 {
996
[25635]997 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]998
[24116]999 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]1000 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
[24116]1001 {
1002 String termValueU = null;
1003 String termValueL = null;
[24812]1004
1005 if (term.length() > 1)
[24116]1006 {
1007 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1008 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1009 }
1010 else
1011 {
1012 termValueU = term.substring(0, 1).toUpperCase();
1013 termValueL = term.substring(0, 1).toLowerCase();
1014 }
[24812]1015
[24116]1016 phrase_query_p_term_x_variants.add(termValueU);
1017 phrase_query_p_term_x_variants.add(termValueL);
1018 }
1019 else
1020 {
[24812]1021 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1022 {
[24116]1023 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1024 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]1025 for (int k = 0; k < term_equivalent_terms.length; k++)
1026 {
[24116]1027 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1028 }
1029 }
1030 }
1031 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]1032
1033 if (in_phrase == false)
1034 {
[24116]1035 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]1036 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1037 }
1038 }
[9007]1039 }
[24116]1040 // Watch for phrases (surrounded by quotes)
[24812]1041 if (character == '\"')
1042 {
[24116]1043 // Has a phrase just started?
[24812]1044 if (in_phrase == false)
1045 {
[24116]1046 in_phrase = true;
1047 }
1048 // Or has a phrase just finished?
[24812]1049 else if (in_phrase == true)
1050 {
[24116]1051 in_phrase = false;
1052 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1053 }
1054
[25635]1055 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1056 }
[4287]1057 }
[8731]1058
[28382]1059 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]1060 }
1061
[24116]1062 /**
[24812]1063 * Highlights query terms in a piece of text.
1064 */
[28382]1065 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]1066 {
1067 // Convert the content string to an array of characters for speed
1068 char[] content_characters = new char[content.length()];
1069 content.getChars(0, content.length(), content_characters, 0);
[8731]1070
[24116]1071 // Now skim through the content, identifying word matches
[25635]1072 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]1073 int word_start = 0;
1074 boolean in_word = false;
1075 boolean preceding_word_matched = false;
[24813]1076 boolean inTag = false;
[24812]1077 for (int i = 0; i < content_characters.length; i++)
1078 {
[24813]1079 //We don't want to find words inside HTML tags
[24993]1080 if (content_characters[i] == '<')
[24813]1081 {
1082 inTag = true;
1083 continue;
1084 }
1085 else if (inTag && content_characters[i] == '>')
1086 {
1087 inTag = false;
1088 }
1089 else if (inTag)
1090 {
1091 continue;
1092 }
[24993]1093
[24116]1094 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]1095
[24116]1096 // Has a word just started?
[24812]1097 if (in_word == false && is_character_letter_or_digit == true)
1098 {
[24116]1099 in_word = true;
1100 word_start = i;
1101 }
[8731]1102
[24116]1103 // Or has a word just finished?
[24812]1104 else if (in_word == true && is_character_letter_or_digit == false)
1105 {
[24116]1106 in_word = false;
[8731]1107
[24116]1108 // Check if the word matches any of the query term equivalents
1109 String word = new String(content_characters, word_start, (i - word_start));
[24812]1110 if (query_term_variants.contains(word))
1111 {
[24116]1112 // We have found a matching word, so remember its location
1113 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1114 preceding_word_matched = true;
1115 }
[24812]1116 else
1117 {
[24116]1118 preceding_word_matched = false;
1119 }
1120 }
1121 }
[8731]1122
[24116]1123 // Don't forget the last word...
[24812]1124 if (in_word == true)
1125 {
[24116]1126 // Check if the word matches any of the query term equivalents
1127 String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]1128 if (query_term_variants.contains(word))
1129 {
[24116]1130 // We have found a matching word, so remember its location
1131 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1132 }
[8731]1133 }
1134
[25635]1135 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1136 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]1137
[24116]1138 // Deal with phrases now
[25635]1139 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]1140 for (int i = 0; i < word_matches.size(); i++)
1141 {
[25635]1142 WordMatch word_match = word_matches.get(i);
[8731]1143
[24116]1144 // See if any partial phrase matches are extended by this word
[24812]1145 if (word_match.preceding_word_matched)
1146 {
1147 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1148 {
[25635]1149 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1150 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]1151 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]1152 if (phrase_query_p_term_x_variants.contains(word_match.word))
1153 {
[24116]1154 partial_phrase_match.num_words_matched++;
[8731]1155
[24116]1156 // Has a complete phrase match occurred?
[24812]1157 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1158 {
[24116]1159 // Check for overlaps by looking at the previous highlight range
[24812]1160 if (!highlight_end_positions.isEmpty())
1161 {
[24116]1162 int last_highlight_index = highlight_end_positions.size() - 1;
[25635]1163 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]1164 if (last_highlight_end > partial_phrase_match.start_position)
1165 {
[24116]1166 // There is an overlap, so remove the previous phrase match
[25635]1167 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]1168 highlight_end_positions.remove(last_highlight_index);
1169 partial_phrase_match.start_position = last_highlight_start;
1170 }
1171 }
[8731]1172
[24116]1173 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1174 highlight_end_positions.add(new Integer(word_match.end_position));
1175 }
1176 // No, but add the partial match back into the list for next time
[24812]1177 else
1178 {
[24116]1179 partial_phrase_matches.add(partial_phrase_match);
1180 }
1181 }
1182 }
1183 }
[24812]1184 else
1185 {
[24116]1186 partial_phrase_matches.clear();
1187 }
[8731]1188
[24116]1189 // See if this word is at the start of any of the phrases
[24812]1190 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1191 {
[25635]1192 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[24116]1193 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]1194 if (phrase_query_p_term_1_variants.contains(word_match.word))
1195 {
[24116]1196 // If this phrase is just one word long, we have a complete match
[24812]1197 if (phrase_query_p_term_variants_list.size() == 1)
1198 {
[24116]1199 highlight_start_positions.add(new Integer(word_match.start_position));
1200 highlight_end_positions.add(new Integer(word_match.end_position));
1201 }
1202 // Otherwise we have the start of a potential phrase match
[24812]1203 else
1204 {
[24116]1205 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1206 }
1207 }
1208 }
[4287]1209 }
[4717]1210
[24116]1211 // Now add the annotation tags into the document at the correct points
[28382]1212 Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]1213
[24116]1214 int last_wrote = 0;
[24812]1215 for (int i = 0; i < highlight_start_positions.size(); i++)
1216 {
[25635]1217 int highlight_start = highlight_start_positions.get(i).intValue();
1218 int highlight_end = highlight_end_positions.get(i).intValue();
[8731]1219
[24116]1220 // Print anything before the highlight range
[24812]1221 if (last_wrote < highlight_start)
1222 {
[24116]1223 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
[28382]1224 content_element.appendChild(doc.createTextNode(preceding_text));
[24116]1225 }
[8731]1226
[24116]1227 // Print the highlight text, annotated
[24812]1228 if (highlight_end > last_wrote)
1229 {
[24116]1230 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
[28382]1231 Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
[24116]1232 annotation_element.setAttribute("type", "query_term");
1233 content_element.appendChild(annotation_element);
1234 last_wrote = highlight_end;
1235 }
1236 }
[8731]1237
[24116]1238 // Finish off any unwritten text
[24812]1239 if (last_wrote < content_characters.length)
1240 {
[24116]1241 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
[28382]1242 content_element.appendChild(doc.createTextNode(remaining_text));
[24116]1243 }
1244 return content_element;
[8731]1245 }
1246
[24116]1247 static private class WordMatch
1248 {
1249 public String word;
1250 public int start_position;
1251 public int end_position;
1252 public boolean preceding_word_matched;
[8731]1253
[24116]1254 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1255 {
1256 this.word = word;
1257 this.start_position = start_position;
1258 this.end_position = end_position;
1259 this.preceding_word_matched = preceding_word_matched;
1260 }
[8731]1261 }
1262
[24116]1263 static private class PartialPhraseMatch
1264 {
1265 public int start_position;
1266 public int query_phrase_number;
1267 public int num_words_matched;
[8731]1268
[24116]1269 public PartialPhraseMatch(int start_position, int query_phrase_number)
1270 {
1271 this.start_position = start_position;
1272 this.query_phrase_number = query_phrase_number;
1273 this.num_words_matched = 1;
1274 }
[8731]1275 }
[3645]1276}
Note: See TracBrowser for help on using the repository browser.