source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32069

Last change on this file since 32069 was 32069, checked in by kjdon, 6 years ago

forgot to add the import GlobalProperties line

  • Property svn:keywords set to Author Date Id Revision
File size: 49.7 KB
RevLine 
[3801]1/*
[24812]2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
[3645]19package org.greenstone.gsdl3.action;
20
[3801]21// Greenstone classes
[3645]22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
[32069]24import org.greenstone.util.GlobalProperties;
[3801]25
[3645]26// XML classes
[24812]27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.Node;
[4287]30import org.w3c.dom.Text;
[3801]31import org.w3c.dom.NodeList;
[3645]32
[3801]33// General Java classes
[8731]34import java.util.ArrayList;
[3645]35import java.util.HashMap;
[4287]36import java.util.HashSet;
[3645]37import java.io.File;
[25635]38import java.io.Serializable;
[3645]39
[13124]40import org.apache.log4j.*;
[3801]41
[24812]42/** Action class for retrieving Documents via the message router */
43public class DocumentAction extends Action
44{
[13124]45
[24116]46 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]47
[24116]48 // this is used to specify that the sibling nodes of a selected one should be obtained
49 public static final String SIBLING_ARG = "sib";
50 public static final String GOTO_PAGE_ARG = "gp";
51 public static final String ENRICH_DOC_ARG = "end";
[25305]52 public static final String EXPAND_DOCUMENT_ARG = "ed";
53 public static final String EXPAND_CONTENTS_ARG = "ec";
54 public static final String REALISTIC_BOOK_ARG = "book";
[32068]55 public static final String NO_TEXT_ARG = "noText";
56 public static final String DOC_EDIT_ARG = "docEdit";
57
[24812]58 /**
59 * if this is set to true, when a document is displayed, any annotation type
60 * services (enrich) will be offered to the user as well
61 */
62 protected boolean provide_annotations = false;
63
[24116]64 protected boolean highlight_query_terms = false;
[5694]65
[24812]66 public boolean configure()
67 {
[24116]68 super.configure();
[24812]69 String highlight = (String) config_params.get("highlightQueryTerms");
70 if (highlight != null && highlight.equals("true"))
71 {
[24116]72 highlight_query_terms = true;
73 }
[24812]74 String annotate = (String) config_params.get("displayAnnotationService");
75 if (annotate != null && annotate.equals("true"))
76 {
[24116]77 provide_annotations = true;
78 }
[25953]79 return true;
80 }
[24812]81
82 public Node process(Node message_node)
[24116]83 {
84 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]85
[28964]86 Element message = GSXML.nodeToElement(message_node);
[32068]87 Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
[28382]88
[24116]89 // the response
[28382]90 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
91 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
[24116]92 result.appendChild(page_response);
[19984]93
[24116]94 // get the request - assume only one
[24812]95 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
96 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]97 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]98
[24116]99 // just in case there are some that need to get passed to the services
[24812]100 HashMap service_params = (HashMap) params.get("s0");
[4717]101
[24116]102 String collection = (String) params.get(GSParams.COLLECTION);
[25305]103 String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]104 if (document_id != null && document_id.equals(""))
105 {
106 document_id = null;
[25305]107 }
108 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]109 if (href != null && href.equals(""))
110 {
111 href = null;
[25305]112 }
113 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
114 if (document_id == null && href == null)
[24812]115 {
[24116]116 logger.error("no document specified!");
117 return result;
118 }
[25355]119 if (rl != null && rl.equals("0"))
120 {
121 // this is a true external link, we should have been directed to a different page or action
122 logger.error("rl value was 0, shouldn't get here");
123 return result;
[25305]124 }
[29521]125
126 UserContext userContext = new UserContext(request);
127
128 //append site metadata
129 addSiteMetadata(page_response, userContext);
130 addInterfaceOptions(page_response);
131
132 // get the additional data needed for the page
133 getBackgroundData(page_response, collection, userContext);
134 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
135
136 if (format_elem != null) {
137 // lets look for param defaults set in config file
138 NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
139 for (int i=0; i<param_defaults.getLength(); i++) {
140 Element p = (Element)param_defaults.item(i);
141 String name = p.getAttribute(GSXML.NAME_ATT);
142 if (params.get(name) ==null) {
143 // wasn't set from interface
144 String value = p.getAttribute(GSXML.VALUE_ATT);
145 params.put(name, value );
146 // also add into request param xml so that xslt knows it too
147 GSXML.addParameterToList(cgi_paramList, name, value);
148 }
149 }
150 }
[32068]151
152
153 boolean editing_document = false;
154 String doc_edit = (String) params.get(DOC_EDIT_ARG);
155 if (doc_edit != null && doc_edit.equals("1")) {
156 editing_document = true;
157 }
158
159 // are we editing mode? just get the archive document, convert to our internal doc format, and return it
160 if (editing_document) {
161
162 // call get archive doc
163 Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
164 String to = "DocXMLGetSection";
165 Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
166 dx_message.appendChild(dx_request);
167 Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
168 dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
169 dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
170 dx_request.appendChild(dx_section);
171
172 Element dx_response_message = (Element) this.mr.process(dx_message);
173 if (processErrorElements(dx_response_message, page_response))
174 {
175 return result;
176 }
177
178 // get the section out
179 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
180 Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
181 if (section == null) {
182 logger.error("no archive doc returned for "+document_id);
183 return result;
184 }
185 // convert the archive format into the internal format that the page response requires
186
187 Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
188 page_response.appendChild(doc_elem);
189 section.setAttribute(GSXML.NODE_ID_ATT, document_id);
190
191 Element transformed_section = transformArchiveToDocument(section);
192 doc_elem.appendChild(doc.importNode(transformed_section, true));
193 logger.error("dx result = "+XMLConverter.getPrettyString(result));
194 return result;
195 }
196
[24116]197 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
[25816]198 if (document_type != null && document_type.equals(""))
[24812]199 {
[25953]200 //document_type = "hierarchy";
201 document_type = null; // we'll get it later if not already specified
[24116]202 }
203 //whether to retrieve siblings or not
204 boolean get_siblings = false;
205 String sibs = (String) params.get(SIBLING_ARG);
[24812]206 if (sibs != null && sibs.equals("1"))
207 {
[24116]208 get_siblings = true;
209 }
[24812]210
[25305]211 String doc_id_modifier = "";
[24116]212 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]213 if (sibling_num != null && !sibling_num.equals(""))
214 {
[24116]215 // we have to modify the doc name
[25355]216 doc_id_modifier = "." + sibling_num + ".ss";
[24116]217 }
[24812]218
[24116]219 boolean expand_document = false;
[25305]220 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]221 if (ed_arg != null && ed_arg.equals("1"))
222 {
[24116]223 expand_document = true;
224 }
[14525]225
[24116]226 boolean expand_contents = false;
[24812]227 if (expand_document)
228 { // we always expand the contents with the text
[24116]229 expand_contents = true;
[24812]230 }
231 else
232 {
[25305]233 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]234 if (ec_arg != null && ec_arg.equals("1"))
235 {
[24116]236 expand_contents = true;
237 }
[5694]238 }
[25355]239
[32068]240 // do we want text content? Not if no_text=1.
241 // expand_document overrides this. - should it??
242 boolean get_text = true;
243 String nt_arg = (String) params.get(NO_TEXT_ARG);
244
245 if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
246 logger.error("SETTING GET TEXT TO FALSE");
247 get_text = false;
248 } else {
249 logger.error("GET TEXT REMAINS TRUE");
250 }
[4257]251
[24116]252 // the_document is where all the doc info - structure and metadata etc
253 // is added into, to be returned in the page
[28382]254 Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
[24116]255 page_response.appendChild(the_document);
[9874]256
[24116]257 // create a basic doc list containing the current node
[28382]258 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
259 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]260 basic_doc_list.appendChild(current_doc);
[25305]261 if (document_id != null)
[24812]262 {
[25355]263 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]264 }
[25355]265 else
[24812]266 {
[25305]267 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
268 // do we need this??
269 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]270 }
[3801]271
[25953]272 if (document_type == null)
273 {
274 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[25816]275 }
[29439]276 if (document_type == null)
[25953]277 {
[31249]278 logger.debug("doctype is null, setting to simple");
[29439]279 document_type = GSXML.DOC_TYPE_SIMPLE;
[25816]280 }
[29439]281
282 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
283
[25816]284
[24116]285 // Create a parameter list to specify the required structure information
[28382]286 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]287
288 if (service_params != null)
289 {
[28964]290 GSXML.addParametersToList(ds_param_list, service_params);
[24116]291 }
[3817]292
[24812]293 Element ds_param = null;
[24116]294 boolean get_structure = false;
295 boolean get_structure_info = false;
[24889]296 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]297 {
[24116]298 get_structure_info = true;
[24889]299
300 if (expand_contents)
301 {
[28382]302 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]303 ds_param_list.appendChild(ds_param);
304 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
305 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
306 }
307
[25305]308 // get the info needed for paged naviagtion
[28382]309 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]310 ds_param_list.appendChild(ds_param);
311 ds_param.setAttribute(GSXML.NAME_ATT, "info");
312 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
[28382]313 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]314 ds_param_list.appendChild(ds_param);
315 ds_param.setAttribute(GSXML.NAME_ATT, "info");
316 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
[28382]317 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]318 ds_param_list.appendChild(ds_param);
319 ds_param.setAttribute(GSXML.NAME_ATT, "info");
320 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]321
[24889]322 if (get_siblings)
323 {
[28382]324 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]325 ds_param_list.appendChild(ds_param);
326 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
327 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
328 }
329
[24812]330 }
[28258]331 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) || document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
[24812]332 {
[24116]333 get_structure = true;
[24812]334 if (expand_contents)
335 {
[28382]336 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]337 ds_param_list.appendChild(ds_param);
338 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
339 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]340 }
341 else
342 {
[24116]343 // get the info needed for table of contents
[28382]344 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]345 ds_param_list.appendChild(ds_param);
346 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
347 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
[28382]348 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]349 ds_param_list.appendChild(ds_param);
350 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
351 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]352 if (get_siblings)
353 {
[28382]354 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]355 ds_param_list.appendChild(ds_param);
356 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
357 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
358 }
359 }
[24812]360 }
361 else
362 {
[31249]363 // we dont need any structure
[24116]364 }
[3801]365
[24116]366 boolean has_dummy = false;
[24812]367 if (get_structure || get_structure_info)
368 {
[8676]369
[24116]370 // Build a request to obtain the document structure
[28382]371 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]372 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]373 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]374 ds_message.appendChild(ds_request);
375 ds_request.appendChild(ds_param_list);
[24812]376
[25816]377 // add the node list we created earlier
[24116]378 ds_request.appendChild(basic_doc_list);
[24812]379
[24116]380 // Process the document structure retrieve message
381 Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]382 if (processErrorElements(ds_response_message, page_response))
383 {
[24116]384 return result;
385 }
[4030]386
[24116]387 // get the info and print out
[24812]388 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]389 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
390 path = GSPath.appendLink(path, "nodeStructureInfo");
391 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
392 // get the doc_node bit
[24812]393 if (ds_response_struct_info != null)
394 {
[28382]395 the_document.appendChild(doc.importNode(ds_response_struct_info, true));
[24116]396 }
[24812]397 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]398 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
399 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
400 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]401
402 if (ds_response_structure != null)
403 {
[24116]404 // add the contents of the structure bit into the_document
405 NodeList structs = ds_response_structure.getChildNodes();
[24812]406 for (int i = 0; i < structs.getLength(); i++)
407 {
[28382]408 the_document.appendChild(doc.importNode(structs.item(i), true));
[24116]409 }
[24812]410 }
411 else
412 {
[24116]413 // no structure nodes, so put in a dummy doc node
[28382]414 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]415 if (document_id != null)
[24812]416 {
[25305]417 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]418 }
[25355]419 else
[24812]420 {
[25305]421 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]422
[24116]423 }
424 the_document.appendChild(doc_node);
425 has_dummy = true;
426 }
[24812]427 }
428 else
429 { // a simple type - we dont have a dummy node for simple
[24116]430 // should think about this more
431 // no structure request, so just put in a dummy doc node
[28382]432 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]433 if (document_id != null)
[24812]434 {
[25305]435 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]436 }
[25355]437 else
[24812]438 {
[25305]439 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]440 }
441 the_document.appendChild(doc_node);
442 has_dummy = true;
443 }
[24812]444
[24116]445 // Build a request to obtain some document metadata
[28382]446 Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]447 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[28382]448 Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]449 dm_message.appendChild(dm_request);
450 // Create a parameter list to specify the required metadata information
[24812]451
[25635]452 HashSet<String> meta_names = new HashSet<String>();
[24116]453 meta_names.add("Title"); // the default
[24812]454 if (format_elem != null)
455 {
[24889]456 getRequiredMetadataNames(format_elem, meta_names);
[24116]457 }
[28258]458
[26026]459 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
[28258]460 if (extraMetaListElem != null)
[26026]461 {
462 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
[28258]463 for (int i = 0; i < extraMetaList.getLength(); i++)
[26026]464 {
[28258]465 meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
[26026]466 }
467 }
[24812]468
[28382]469 Element dm_param_list = createMetadataParamList(doc,meta_names);
[24812]470 if (service_params != null)
471 {
[28964]472 GSXML.addParametersToList(dm_param_list, service_params);
[24116]473 }
[24812]474
[24116]475 dm_request.appendChild(dm_param_list);
[24812]476
[24116]477 // create the doc node list for the metadata request
[28382]478 Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]479 dm_request.appendChild(dm_doc_list);
[4030]480
[24116]481 // Add each node from the structure response into the metadata request
482 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]483 for (int i = 0; i < doc_nodes.getLength(); i++)
484 {
[24116]485 Element doc_node = (Element) doc_nodes.item(i);
486 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]487
[24116]488 // Add the documentNode to the list
[28382]489 Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]490 dm_doc_list.appendChild(dm_doc_node);
491 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]492 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[29922]493 if (document_id == null){
494 dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
495 }
496
[24116]497 }
[3801]498
[24116]499 // we also want a metadata request to the top level document to get
500 // assocfilepath - this could be cached too
[28382]501 Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]502 dm_message.appendChild(doc_meta_request);
[28382]503 Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]504 if (service_params != null)
505 {
[28964]506 GSXML.addParametersToList(doc_meta_param_list, service_params);
[24116]507 }
[3801]508
[24116]509 doc_meta_request.appendChild(doc_meta_param_list);
[28382]510 Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]511 doc_meta_param_list.appendChild(doc_param);
512 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
513 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]514
[24116]515 // create the doc node list for the metadata request
[28382]516 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]517 doc_meta_request.appendChild(doc_list);
[3801]518
[28382]519 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]520 // the node we want is the root document node
[25355]521 if (document_id != null)
[24812]522 {
[25305]523 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]524 }
[29922]525 /*else
[24812]526 {
[25355]527 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
528 // can we assume that href is always a top level doc??
529 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]530 //doc_node.setAttribute("externalURL", has_rl);
[29922]531 }*/
[24116]532 doc_list.appendChild(doc_node);
[24889]533
[24116]534 Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]535 if (processErrorElements(dm_response_message, page_response))
536 {
[24116]537 return result;
538 }
[9874]539
[24812]540 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]541 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]542
[24116]543 // Merge the metadata with the structure information
544 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]545 for (int i = 0; i < doc_nodes.getLength(); i++)
546 {
[24116]547 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]548 }
[24116]549 // get the top level doc metadata out
[24812]550 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
551 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]552 GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]553
[32068]554 // do we want doc text content? If not, we are done.
555 if (!get_text) {
556 // don't get text
557 return result;
558 }
559
[24116]560 // Build a request to obtain some document content
[28382]561 Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]562 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[28382]563 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]564 dc_message.appendChild(dc_request);
[5694]565
[24116]566 // Create a parameter list to specify the request parameters - empty for now
[28382]567 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]568 if (service_params != null)
569 {
[28964]570 GSXML.addParametersToList(dc_param_list, service_params);
[24116]571 }
[4858]572
[24116]573 dc_request.appendChild(dc_param_list);
574
575 // get the content
576 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]577 if (expand_document)
578 {
[24116]579 dc_request.appendChild(dm_doc_list);
[24812]580 }
581 else
582 {
[24116]583 dc_request.appendChild(basic_doc_list);
[4858]584 }
[24116]585 Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]586 if (processErrorElements(dc_response_message, page_response))
587 {
[24116]588 return result;
[4827]589 }
[24116]590 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]591
[24812]592 if (expand_document)
593 {
[24116]594 // Merge the content with the structure information
595 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]596 for (int i = 0; i < doc_nodes.getLength(); i++)
597 {
[31249]598 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
[24812]599 if (content != null)
600 {
601 if (highlight_query_terms)
602 {
[31249]603 String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
604 content = highlightQueryTerms(request, node_id, (Element) content);
[24116]605 }
[31249]606
[28382]607 doc_nodes.item(i).appendChild(doc.importNode(content, true));
[24116]608 }
609 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
610 }
[29521]611 if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
612 Element dummy_node = (Element) doc_nodes.item(0);
613 the_document.removeChild(dummy_node);
614 the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
615 NodeList dummy_children = dummy_node.getChildNodes();
616 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
617 {
618 // special case as we don't want more than one metadata list
619 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
620 {
621 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
622 }
623 else
624 {
625 the_document.appendChild(dummy_children.item(i));
626 }
627 }
628 }
[24812]629 }
630 else
631 {
[24116]632 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
633 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
634 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]635 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]636
[24812]637 if (dc_response_doc_content == null)
638 {
[24116]639 // no content to add
[25355]640 if (dc_response_doc.getAttribute("external").equals("true"))
641 {
642
643 //if (dc_response_doc_external != null)
644 //{
[25305]645 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]646
[25305]647 the_document.setAttribute("selectedNode", href_id);
648 the_document.setAttribute("external", href_id);
[25355]649 }
650 return result;
[24116]651 }
[24812]652 if (highlight_query_terms)
653 {
[24116]654 dc_response_doc.removeChild(dc_response_doc_content);
[24812]655
[31249]656 dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
[24116]657 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
658 }
[24812]659
660 if (provide_annotations)
661 {
662 String service_selected = (String) params.get(ENRICH_DOC_ARG);
663 if (service_selected != null && service_selected.equals("1"))
664 {
[24116]665 // now we can modifiy the response doc if needed
[24812]666 String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]667 // send a message to the service
[28382]668 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
669 Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]670 enrich_message.appendChild(enrich_request);
671 // check for parameters
[24812]672 HashMap e_service_params = (HashMap) params.get("s1");
673 if (e_service_params != null)
674 {
[28382]675 Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]676 GSXML.addParametersToList(enrich_pl, e_service_params);
[24116]677 enrich_request.appendChild(enrich_pl);
678 }
[28382]679 Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]680 enrich_request.appendChild(e_doc_list);
[28382]681 e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
[24812]682
[24116]683 Node enrich_response = this.mr.process(enrich_message);
[24812]684
685 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]686 path = GSPath.createPath(links);
[24812]687 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
688
689 }
[24116]690 } // if provide_annotations
[3987]691
[24116]692 // use the returned id rather than the sent one cos there may have
693 // been modifiers such as .pr that are removed.
694 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
695 the_document.setAttribute("selectedNode", modified_doc_id);
[24812]696 if (has_dummy)
697 {
[24116]698 // change the id if necessary and add the content
[24812]699 Element dummy_node = (Element) doc_nodes.item(0);
700
[24116]701 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[28382]702 dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]703 // hack for simple type
[26140]704 if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
[24812]705 {
[24116]706 // we dont want the internal docNode, just want the content and metadata in the document
707 // rethink this!!
708 the_document.removeChild(dummy_node);
[4023]709
[24116]710 NodeList dummy_children = dummy_node.getChildNodes();
711 //for (int i=0; i<dummy_children.getLength(); i++) {
[24812]712 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
713 {
[24116]714 // special case as we don't want more than one metadata list
[24812]715 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
716 {
[24116]717 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]718 }
719 else
720 {
[24116]721 the_document.appendChild(dummy_children.item(i));
722 }
723 }
724 }
[28258]725
[26140]726 the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[24812]727 }
728 else
729 {
[24116]730 // Merge the document content with the metadata and structure information
[24812]731 for (int i = 0; i < doc_nodes.getLength(); i++)
732 {
[24116]733 Node dn = doc_nodes.item(i);
[24812]734 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
735 if (dn_id.equals(modified_doc_id))
736 {
[28382]737 dn.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]738 break;
739 }
740 }
741 }
742 }
[29307]743 //logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
[24116]744 return result;
[3801]745 }
[24812]746
747 /**
748 * tell the param class what its arguments are if an action has its own
749 * arguments, this should add them to the params object - particularly
750 * important for args that should not be saved
751 */
[25305]752 public boolean addActionParameters(GSParams params)
[24812]753 {
[24116]754 params.addParameter(GOTO_PAGE_ARG, false);
755 params.addParameter(ENRICH_DOC_ARG, false);
[25305]756 params.addParameter(EXPAND_DOCUMENT_ARG, false);
757 params.addParameter(EXPAND_CONTENTS_ARG, false);
758 params.addParameter(REALISTIC_BOOK_ARG, false);
759
[24116]760 return true;
[4717]761 }
[4023]762
[24812]763 /**
764 * this method gets the collection description, the format info, the list of
765 * enrich services, etc - stuff that is needed for the page, but is the same
766 * whatever the query is - should be cached
767 */
[24993]768 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]769 {
[28382]770 Document doc = page_response.getOwnerDocument();
771
[24116]772 // create a message to process - contains requests for the collection
773 // description, the format element, the enrich services on offer
774 // these could all be cached
[28382]775 Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]776 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
777 // the format request - ignore for now, where does this request go to??
[28382]778 Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]779 info_message.appendChild(format_request);
780
781 // the enrich_services request - only do this if provide_annotations is true
782
[24812]783 if (provide_annotations)
784 {
[28382]785 Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]786 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
787 info_message.appendChild(enrich_services_request);
[4023]788 }
[24116]789
[24812]790 Element info_response = (Element) this.mr.process(info_message);
791
[24116]792 // the collection is the first response
793 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
794 Element format_resp = (Element) responses.item(0);
[24812]795
796 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
797 if (format_elem != null)
798 {
[25985]799 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
[28258]800 if (global_format_elem != null)
[25985]801 {
802 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
803 }
804
805 // set the format type
[24812]806 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[28382]807 page_response.appendChild(doc.importNode(format_elem, true));
[4023]808 }
[4287]809
[24812]810 if (provide_annotations)
811 {
812 Element services_resp = (Element) responses.item(1);
[4287]813
[24116]814 // a new message for the mr
[28382]815 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]816 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
817 boolean service_found = false;
[24812]818 for (int j = 0; j < e_services.getLength(); j++)
819 {
820 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
821 {
[28382]822 Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]823 enrich_message.appendChild(s);
824 service_found = true;
825 }
826 }
[24812]827 if (service_found)
828 {
829 Element enrich_response = (Element) this.mr.process(enrich_message);
830
[24116]831 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
[28382]832 Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]833 for (int i = 0; i < e_responses.getLength(); i++)
834 {
835 Element e_resp = (Element) e_responses.item(i);
[28382]836 Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]837 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
838 service_list.appendChild(e_service);
839 }
840 page_response.appendChild(service_list);
841 }
842 } // if provide_annotations
843 return true;
[24812]844
[9874]845 }
[4287]846
[25953]847 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
848 {
[28382]849 Document doc = basic_doc_list.getOwnerDocument();
850
851 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[25953]852 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]853 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[25953]854 ds_message.appendChild(ds_request);
[25816]855
[25953]856 // Create a parameter list to specify the required structure information
[28382]857 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
858 Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
[25953]859 ds_param_list.appendChild(ds_param);
860 ds_param.setAttribute(GSXML.NAME_ATT, "info");
861 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]862
[25953]863 ds_request.appendChild(ds_param_list);
[25816]864
[25953]865 // add the node list we created earlier
866 ds_request.appendChild(basic_doc_list);
867
868 // Process the document structure retrieve message
869 Element ds_response_message = (Element) this.mr.process(ds_message);
870 if (processErrorElements(ds_response_message, page_response))
871 {
872 return null;
873 }
874
875 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
876 String path = GSPath.createPath(links);
877 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
[29439]878 if (info_elem == null) {
879 return null;
880 }
[25953]881 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
882 if (doctype_elem != null)
883 {
884 String doc_type = doctype_elem.getAttribute("value");
885 return doc_type;
886 }
887 return null;
888 }
889
[32068]890 /** run the XSLT transform which converts from doc.xml format to our internal document format */
891 protected Element transformArchiveToDocument(Element section) {
892
893 String stylesheet_file = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), null, "archive2document.xsl");
894 Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_file));
895 if (stylesheet_doc == null) {
896 logger.error("Couldn't load in stylesheet "+stylesheet_file);
897 return section;
898 }
899
900 Document section_doc = XMLConverter.newDOM();
901 section_doc.appendChild(section_doc.importNode(section, true));
902 Node result = this.transformer.transform(stylesheet_doc, section_doc);
903 logger.error("transform result = "+XMLConverter.getPrettyString(result));
904
905 Element new_element;
906 if (result.getNodeType() == Node.DOCUMENT_NODE)
907 {
908 new_element = ((Document) result).getDocumentElement();
909 }
910 else
911 {
912 new_element = (Element) result;
913 }
914
915
916 return new_element;
917
918 }
919
920
[24812]921 /**
922 * this involves a bit of a hack to get the equivalent query terms - has to
923 * requery the query service - uses the last selected service name. (if it
924 * ends in query). should this action do the query or should it send a
925 * message to the query action? but that will involve lots of extra stuff.
[24889]926 * also doesn't handle phrases properly - just highlights all the terms
927 * found in the text.
[24812]928 */
[31249]929 protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
[24812]930 {
[28382]931 Document doc = request.getOwnerDocument();
932
[24116]933 // do the query again to get term info
[24812]934 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]935 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]936
937 HashMap previous_params = (HashMap) params.get("p");
938 if (previous_params == null)
939 {
[24116]940 return dc_response_doc_content;
941 }
[24812]942 String service_name = (String) previous_params.get(GSParams.SERVICE);
943 if (service_name == null || !service_name.endsWith("Query"))
944 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]945 logger.debug("invalid service, not doing highlighting");
946 return dc_response_doc_content;
947 }
[24812]948 String collection = (String) params.get(GSParams.COLLECTION);
[24993]949 UserContext userContext = new UserContext(request);
[24116]950 String to = GSPath.appendLink(collection, service_name);
[24812]951
[28382]952 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
953 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]954 mr_query_message.appendChild(mr_query_request);
[24812]955
[24116]956 // paramList
[24812]957 HashMap service_params = (HashMap) params.get("s1");
958
[28382]959 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]960 GSXML.addParametersToList(query_param_list, service_params);
[31249]961 if (current_node_id != null) {
962 GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
963 } else {
964 GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
965 }
[24116]966 mr_query_request.appendChild(query_param_list);
967 // do the query
[24812]968 Element mr_query_response = (Element) this.mr.process(mr_query_message);
[30049]969 String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
970 Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
[31249]971 // For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
[30049]972 if (highlighted_Node != null)
973 {
[30056]974 // Build a request to process highlighted text
975
976 Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
977 to = GSPath.appendLink(collection, "DocumentContentRetrieve");
978 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
979 hl_message.appendChild(dc_request);
980
981 // Create a parameter list to specify the request parameters - empty for now
982 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
983 dc_request.appendChild(dc_param_list);
984
985 // get the content
986 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
987 dc_request.appendChild(doc_list);
988 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
989 doc_list.appendChild(current_doc);
990 current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
991 //Append highlighted content to request for processing
992 dc_request.appendChild(doc.importNode(highlighted_Node, true));
993 Element hl_response_message = (Element) this.mr.process(hl_message);
[31249]994
[30056]995 //Get results
996 NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
997 Element content = (Element) contentList.item(0);
998 return content;
[30049]999 }
[24812]1000 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]1001 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]1002 if (query_term_list_element == null)
1003 {
[24116]1004 // no term info
1005 logger.error("No query term information.\n");
1006 return dc_response_doc_content;
1007 }
[8731]1008
[24116]1009 String content = GSXML.getNodeText(dc_response_doc_content);
[4287]1010
[24812]1011 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]1012 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]1013
[25635]1014 HashSet<String> query_term_variants = new HashSet<String>();
[24116]1015 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]1016 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
[24116]1017 {
1018 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]1019 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]1020 {
[24812]1021 for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]1022 {
[24812]1023 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]1024 String termValueU = null;
1025 String termValueL = null;
[24812]1026
1027 if (termValue.length() > 1)
[24116]1028 {
1029 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
1030 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
1031 }
1032 else
1033 {
1034 termValueU = termValue.substring(0, 1).toUpperCase();
1035 termValueL = termValue.substring(0, 1).toLowerCase();
1036 }
[24812]1037
[24116]1038 query_term_variants.add(termValueU);
1039 query_term_variants.add(termValueL);
1040 }
1041 }
1042 }
1043 else
1044 {
[24812]1045 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
1046 {
[24116]1047 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
1048 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]1049 for (int j = 0; j < equivalent_terms.length; j++)
1050 {
[24116]1051 query_term_variants.add(equivalent_terms[j]);
1052 }
1053 }
1054 }
[4287]1055
[25635]1056 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]1057
[24116]1058 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
1059 String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]1060
[25635]1061 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1062 int term_start = 0;
1063 boolean in_term = false;
1064 boolean in_phrase = false;
[24812]1065 for (int i = 0; i < performed_query.length(); i++)
1066 {
[24116]1067 char character = performed_query.charAt(i);
1068 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
1069
1070 // Has a query term just started?
[24812]1071 if (in_term == false && is_character_letter_or_digit == true)
1072 {
[24116]1073 in_term = true;
1074 term_start = i;
1075 }
1076
1077 // Or has a term just finished?
[24812]1078 else if (in_term == true && is_character_letter_or_digit == false)
1079 {
[24116]1080 in_term = false;
1081 String term = performed_query.substring(term_start, i);
[24812]1082
[24116]1083 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]1084 if (term_element != null)
1085 {
1086
[25635]1087 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]1088
[24116]1089 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]1090 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
[24116]1091 {
1092 String termValueU = null;
1093 String termValueL = null;
[24812]1094
1095 if (term.length() > 1)
[24116]1096 {
1097 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1098 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1099 }
1100 else
1101 {
1102 termValueU = term.substring(0, 1).toUpperCase();
1103 termValueL = term.substring(0, 1).toLowerCase();
1104 }
[24812]1105
[24116]1106 phrase_query_p_term_x_variants.add(termValueU);
1107 phrase_query_p_term_x_variants.add(termValueL);
1108 }
1109 else
1110 {
[24812]1111 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1112 {
[24116]1113 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1114 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]1115 for (int k = 0; k < term_equivalent_terms.length; k++)
1116 {
[24116]1117 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1118 }
1119 }
1120 }
1121 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]1122
1123 if (in_phrase == false)
1124 {
[24116]1125 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]1126 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1127 }
1128 }
[9007]1129 }
[24116]1130 // Watch for phrases (surrounded by quotes)
[24812]1131 if (character == '\"')
1132 {
[24116]1133 // Has a phrase just started?
[24812]1134 if (in_phrase == false)
1135 {
[24116]1136 in_phrase = true;
1137 }
1138 // Or has a phrase just finished?
[24812]1139 else if (in_phrase == true)
1140 {
[24116]1141 in_phrase = false;
1142 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1143 }
1144
[25635]1145 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1146 }
[4287]1147 }
[8731]1148
[28382]1149 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]1150 }
1151
[24116]1152 /**
[24812]1153 * Highlights query terms in a piece of text.
1154 */
[28382]1155 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]1156 {
1157 // Convert the content string to an array of characters for speed
1158 char[] content_characters = new char[content.length()];
1159 content.getChars(0, content.length(), content_characters, 0);
[8731]1160
[24116]1161 // Now skim through the content, identifying word matches
[25635]1162 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]1163 int word_start = 0;
1164 boolean in_word = false;
1165 boolean preceding_word_matched = false;
[24813]1166 boolean inTag = false;
[24812]1167 for (int i = 0; i < content_characters.length; i++)
1168 {
[24813]1169 //We don't want to find words inside HTML tags
[24993]1170 if (content_characters[i] == '<')
[24813]1171 {
1172 inTag = true;
1173 continue;
1174 }
1175 else if (inTag && content_characters[i] == '>')
1176 {
1177 inTag = false;
1178 }
1179 else if (inTag)
1180 {
1181 continue;
1182 }
[24993]1183
[24116]1184 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]1185
[24116]1186 // Has a word just started?
[24812]1187 if (in_word == false && is_character_letter_or_digit == true)
1188 {
[24116]1189 in_word = true;
1190 word_start = i;
1191 }
[8731]1192
[24116]1193 // Or has a word just finished?
[24812]1194 else if (in_word == true && is_character_letter_or_digit == false)
1195 {
[24116]1196 in_word = false;
[8731]1197
[24116]1198 // Check if the word matches any of the query term equivalents
1199 String word = new String(content_characters, word_start, (i - word_start));
[24812]1200 if (query_term_variants.contains(word))
1201 {
[24116]1202 // We have found a matching word, so remember its location
1203 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1204 preceding_word_matched = true;
1205 }
[24812]1206 else
1207 {
[24116]1208 preceding_word_matched = false;
1209 }
1210 }
1211 }
[8731]1212
[24116]1213 // Don't forget the last word...
[24812]1214 if (in_word == true)
1215 {
[24116]1216 // Check if the word matches any of the query term equivalents
1217 String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]1218 if (query_term_variants.contains(word))
1219 {
[24116]1220 // We have found a matching word, so remember its location
1221 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1222 }
[8731]1223 }
1224
[25635]1225 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1226 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]1227
[24116]1228 // Deal with phrases now
[25635]1229 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]1230 for (int i = 0; i < word_matches.size(); i++)
1231 {
[25635]1232 WordMatch word_match = word_matches.get(i);
[8731]1233
[24116]1234 // See if any partial phrase matches are extended by this word
[24812]1235 if (word_match.preceding_word_matched)
1236 {
1237 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1238 {
[25635]1239 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1240 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]1241 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]1242 if (phrase_query_p_term_x_variants.contains(word_match.word))
1243 {
[24116]1244 partial_phrase_match.num_words_matched++;
[8731]1245
[24116]1246 // Has a complete phrase match occurred?
[24812]1247 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1248 {
[24116]1249 // Check for overlaps by looking at the previous highlight range
[24812]1250 if (!highlight_end_positions.isEmpty())
1251 {
[24116]1252 int last_highlight_index = highlight_end_positions.size() - 1;
[25635]1253 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]1254 if (last_highlight_end > partial_phrase_match.start_position)
1255 {
[24116]1256 // There is an overlap, so remove the previous phrase match
[25635]1257 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]1258 highlight_end_positions.remove(last_highlight_index);
1259 partial_phrase_match.start_position = last_highlight_start;
1260 }
1261 }
[8731]1262
[24116]1263 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1264 highlight_end_positions.add(new Integer(word_match.end_position));
1265 }
1266 // No, but add the partial match back into the list for next time
[24812]1267 else
1268 {
[24116]1269 partial_phrase_matches.add(partial_phrase_match);
1270 }
1271 }
1272 }
1273 }
[24812]1274 else
1275 {
[24116]1276 partial_phrase_matches.clear();
1277 }
[8731]1278
[24116]1279 // See if this word is at the start of any of the phrases
[24812]1280 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1281 {
[25635]1282 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[31686]1283 if (phrase_query_p_term_variants_list.size()>0) {
[24116]1284 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]1285 if (phrase_query_p_term_1_variants.contains(word_match.word))
1286 {
[24116]1287 // If this phrase is just one word long, we have a complete match
[24812]1288 if (phrase_query_p_term_variants_list.size() == 1)
1289 {
[24116]1290 highlight_start_positions.add(new Integer(word_match.start_position));
1291 highlight_end_positions.add(new Integer(word_match.end_position));
1292 }
1293 // Otherwise we have the start of a potential phrase match
[24812]1294 else
1295 {
[24116]1296 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1297 }
1298 }
[31686]1299 }
[24116]1300 }
[4287]1301 }
[4717]1302
[24116]1303 // Now add the annotation tags into the document at the correct points
[28382]1304 Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]1305
[24116]1306 int last_wrote = 0;
[24812]1307 for (int i = 0; i < highlight_start_positions.size(); i++)
1308 {
[25635]1309 int highlight_start = highlight_start_positions.get(i).intValue();
1310 int highlight_end = highlight_end_positions.get(i).intValue();
[8731]1311
[24116]1312 // Print anything before the highlight range
[24812]1313 if (last_wrote < highlight_start)
1314 {
[24116]1315 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
[28382]1316 content_element.appendChild(doc.createTextNode(preceding_text));
[24116]1317 }
[8731]1318
[24116]1319 // Print the highlight text, annotated
[24812]1320 if (highlight_end > last_wrote)
1321 {
[24116]1322 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
[28382]1323 Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
[24116]1324 annotation_element.setAttribute("type", "query_term");
1325 content_element.appendChild(annotation_element);
1326 last_wrote = highlight_end;
1327 }
1328 }
[8731]1329
[24116]1330 // Finish off any unwritten text
[24812]1331 if (last_wrote < content_characters.length)
1332 {
[24116]1333 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
[28382]1334 content_element.appendChild(doc.createTextNode(remaining_text));
[24116]1335 }
1336 return content_element;
[8731]1337 }
1338
[24116]1339 static private class WordMatch
1340 {
1341 public String word;
1342 public int start_position;
1343 public int end_position;
1344 public boolean preceding_word_matched;
[8731]1345
[24116]1346 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1347 {
1348 this.word = word;
1349 this.start_position = start_position;
1350 this.end_position = end_position;
1351 this.preceding_word_matched = preceding_word_matched;
1352 }
[8731]1353 }
1354
[24116]1355 static private class PartialPhraseMatch
1356 {
1357 public int start_position;
1358 public int query_phrase_number;
1359 public int num_words_matched;
[8731]1360
[24116]1361 public PartialPhraseMatch(int start_position, int query_phrase_number)
1362 {
1363 this.start_position = start_position;
1364 this.query_phrase_number = query_phrase_number;
1365 this.num_words_matched = 1;
1366 }
[8731]1367 }
[3645]1368}
Note: See TracBrowser for help on using the repository browser.