source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32111

Last change on this file since 32111 was 32111, checked in by kjdon, 6 years ago

pass in base interfaces array to the call to find archive2document.xsl. If you have a custom interface it will probably live in hte default one. Then check to make sure the file was there before trying to use it.

  • Property svn:keywords set to Author Date Id Revision
File size: 53.3 KB
RevLine 
[3801]1/*
[24812]2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
[3645]19package org.greenstone.gsdl3.action;
20
[3801]21// Greenstone classes
[3645]22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
[32069]24import org.greenstone.util.GlobalProperties;
[3801]25
[3645]26// XML classes
[24812]27import org.w3c.dom.Document;
28import org.w3c.dom.Element;
29import org.w3c.dom.Node;
[4287]30import org.w3c.dom.Text;
[3801]31import org.w3c.dom.NodeList;
[3645]32
[3801]33// General Java classes
[8731]34import java.util.ArrayList;
[3645]35import java.util.HashMap;
[4287]36import java.util.HashSet;
[3645]37import java.io.File;
[25635]38import java.io.Serializable;
[3645]39
[13124]40import org.apache.log4j.*;
[3801]41
[24812]42/** Action class for retrieving Documents via the message router */
43public class DocumentAction extends Action
44{
[13124]45
[24116]46 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]47
[24116]48 // this is used to specify that the sibling nodes of a selected one should be obtained
49 public static final String SIBLING_ARG = "sib";
50 public static final String GOTO_PAGE_ARG = "gp";
51 public static final String ENRICH_DOC_ARG = "end";
[25305]52 public static final String EXPAND_DOCUMENT_ARG = "ed";
53 public static final String EXPAND_CONTENTS_ARG = "ec";
54 public static final String REALISTIC_BOOK_ARG = "book";
[32068]55 public static final String NO_TEXT_ARG = "noText";
56 public static final String DOC_EDIT_ARG = "docEdit";
57
[24812]58 /**
59 * if this is set to true, when a document is displayed, any annotation type
60 * services (enrich) will be offered to the user as well
61 */
62 protected boolean provide_annotations = false;
63
[24116]64 protected boolean highlight_query_terms = false;
[5694]65
[24812]66 public boolean configure()
67 {
[24116]68 super.configure();
[24812]69 String highlight = (String) config_params.get("highlightQueryTerms");
70 if (highlight != null && highlight.equals("true"))
71 {
[24116]72 highlight_query_terms = true;
73 }
[24812]74 String annotate = (String) config_params.get("displayAnnotationService");
75 if (annotate != null && annotate.equals("true"))
76 {
[24116]77 provide_annotations = true;
78 }
[25953]79 return true;
80 }
[24812]81
82 public Node process(Node message_node)
[24116]83 {
84 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]85
[28964]86 Element message = GSXML.nodeToElement(message_node);
[32068]87 Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
[28382]88
[24116]89 // the response
[28382]90 Element result = doc.createElement(GSXML.MESSAGE_ELEM);
91 Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
[24116]92 result.appendChild(page_response);
[19984]93
[24116]94 // get the request - assume only one
[24812]95 Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
96 Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]97 HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]98
[24116]99 // just in case there are some that need to get passed to the services
[24812]100 HashMap service_params = (HashMap) params.get("s0");
[4717]101
[24116]102 String collection = (String) params.get(GSParams.COLLECTION);
[25305]103 String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]104 if (document_id != null && document_id.equals(""))
105 {
106 document_id = null;
[25305]107 }
108 String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]109 if (href != null && href.equals(""))
110 {
111 href = null;
[25305]112 }
113 String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
114 if (document_id == null && href == null)
[24812]115 {
[24116]116 logger.error("no document specified!");
117 return result;
118 }
[25355]119 if (rl != null && rl.equals("0"))
120 {
121 // this is a true external link, we should have been directed to a different page or action
122 logger.error("rl value was 0, shouldn't get here");
123 return result;
[25305]124 }
[29521]125
126 UserContext userContext = new UserContext(request);
127
128 //append site metadata
129 addSiteMetadata(page_response, userContext);
130 addInterfaceOptions(page_response);
131
132 // get the additional data needed for the page
133 getBackgroundData(page_response, collection, userContext);
134 Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
135
136 if (format_elem != null) {
137 // lets look for param defaults set in config file
138 NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
139 for (int i=0; i<param_defaults.getLength(); i++) {
140 Element p = (Element)param_defaults.item(i);
141 String name = p.getAttribute(GSXML.NAME_ATT);
142 if (params.get(name) ==null) {
143 // wasn't set from interface
144 String value = p.getAttribute(GSXML.VALUE_ATT);
145 params.put(name, value );
146 // also add into request param xml so that xslt knows it too
147 GSXML.addParameterToList(cgi_paramList, name, value);
148 }
149 }
150 }
[32068]151
[32070]152 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
153 if (document_type != null && document_type.equals(""))
154 {
155 //document_type = "hierarchy";
156 document_type = null; // we'll get it later if not already specified
157 }
158 // what if it is null here?? Anu to check...
[32068]159
[32071]160
[32068]161 boolean editing_document = false;
162 String doc_edit = (String) params.get(DOC_EDIT_ARG);
163 if (doc_edit != null && doc_edit.equals("1")) {
164 editing_document = true;
165 }
166
167 // are we editing mode? just get the archive document, convert to our internal doc format, and return it
168 if (editing_document) {
169
170 // call get archive doc
171 Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
172 String to = "DocXMLGetSection";
173 Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
174 dx_message.appendChild(dx_request);
175 Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
176 dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
177 dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
178 dx_request.appendChild(dx_section);
179
180 Element dx_response_message = (Element) this.mr.process(dx_message);
181 if (processErrorElements(dx_response_message, page_response))
182 {
183 return result;
184 }
185
186 // get the section out
187 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
188 Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
189 if (section == null) {
190 logger.error("no archive doc returned for "+document_id);
191 return result;
192 }
193 // convert the archive format into the internal format that the page response requires
194
[32071]195 // work out doctype
[32075]196 // NOTE: this will be coming from collection database in index
197 // the archive file doesn't store this. So we have to assume
198 // that the doc type will not be changing with any
199 // modifications happening to archives.
200
201 // if doc type is null, then we need to work it out.
[32071]202 // create a basic doc list containing the current node
[32075]203
[32071]204 if (document_type == null) {
[32075]205 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
206 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
207 basic_doc_list.appendChild(current_doc);
208 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id);
209 basic_doc_list.appendChild(current_doc);
210 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[32071]211 }
[32075]212
[32071]213 if (document_type == null) {
214 logger.debug("@@@ doctype is null, setting to simple");
215 document_type = GSXML.DOC_TYPE_SIMPLE;
216 }
[32075]217
218 Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
[32070]219 doc_elem.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
[32068]220 page_response.appendChild(doc_elem);
221
222 Element transformed_section = transformArchiveToDocument(section);
[32075]223 if (document_type == GSXML.DOC_TYPE_SIMPLE) {
224 // simple doc, only returning a single document node, which is the top level section.
225 doc_elem.setAttribute(GSXML.NODE_ID_ATT, document_id);
226 GSXML.mergeElements(doc_elem, transformed_section);
227 return result;
228 }
229
230 // multi sectioned document.
231 transformed_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
[32071]232 // In docEdit mode, we obtain the text from archives, from doc.xml
233 // Now the transformation has replaced <Section> with <documentNode>
234 // Need to add nodeID, nodeType and docType attributes to each docNode
235 // as doc.xml doesn't store that.
236 insertDocNodeAttributes(transformed_section, document_type, null);
[32068]237 doc_elem.appendChild(doc.importNode(transformed_section, true));
[32071]238 logger.debug("dx result = "+XMLConverter.getPrettyString(result));
239
[32068]240 return result;
241 }
[32071]242
[24116]243 //whether to retrieve siblings or not
244 boolean get_siblings = false;
245 String sibs = (String) params.get(SIBLING_ARG);
[24812]246 if (sibs != null && sibs.equals("1"))
247 {
[24116]248 get_siblings = true;
249 }
[24812]250
[25305]251 String doc_id_modifier = "";
[24116]252 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]253 if (sibling_num != null && !sibling_num.equals(""))
254 {
[24116]255 // we have to modify the doc name
[25355]256 doc_id_modifier = "." + sibling_num + ".ss";
[24116]257 }
[24812]258
[24116]259 boolean expand_document = false;
[25305]260 String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]261 if (ed_arg != null && ed_arg.equals("1"))
262 {
[24116]263 expand_document = true;
264 }
[14525]265
[24116]266 boolean expand_contents = false;
[24812]267 if (expand_document)
268 { // we always expand the contents with the text
[24116]269 expand_contents = true;
[24812]270 }
271 else
272 {
[25305]273 String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]274 if (ec_arg != null && ec_arg.equals("1"))
275 {
[24116]276 expand_contents = true;
277 }
[5694]278 }
[25355]279
[32068]280 // do we want text content? Not if no_text=1.
281 // expand_document overrides this. - should it??
282 boolean get_text = true;
283 String nt_arg = (String) params.get(NO_TEXT_ARG);
284
285 if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
[32071]286 logger.debug("SETTING GET TEXT TO FALSE");
[32068]287 get_text = false;
288 } else {
[32071]289 logger.debug("GET TEXT REMAINS TRUE");
[32068]290 }
[4257]291
[24116]292 // the_document is where all the doc info - structure and metadata etc
293 // is added into, to be returned in the page
[28382]294 Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
[24116]295 page_response.appendChild(the_document);
[9874]296
[24116]297 // create a basic doc list containing the current node
[28382]298 Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
299 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]300 basic_doc_list.appendChild(current_doc);
[25305]301 if (document_id != null)
[24812]302 {
[25355]303 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]304 }
[25355]305 else
[24812]306 {
[25305]307 current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
308 // do we need this??
309 current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]310 }
[32071]311
[25953]312 if (document_type == null)
313 {
314 document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[25816]315 }
[29439]316 if (document_type == null)
[25953]317 {
[32071]318 logger.debug("##### doctype is null, setting to simple");
[29439]319 document_type = GSXML.DOC_TYPE_SIMPLE;
[25816]320 }
[29439]321
322 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
323
[24116]324 // Create a parameter list to specify the required structure information
[28382]325 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]326
327 if (service_params != null)
328 {
[28964]329 GSXML.addParametersToList(ds_param_list, service_params);
[24116]330 }
[3817]331
[24812]332 Element ds_param = null;
[24116]333 boolean get_structure = false;
334 boolean get_structure_info = false;
[24889]335 if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]336 {
[24116]337 get_structure_info = true;
[24889]338
339 if (expand_contents)
340 {
[28382]341 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]342 ds_param_list.appendChild(ds_param);
343 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
344 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
345 }
346
[25305]347 // get the info needed for paged naviagtion
[28382]348 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]349 ds_param_list.appendChild(ds_param);
350 ds_param.setAttribute(GSXML.NAME_ATT, "info");
351 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
[28382]352 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]353 ds_param_list.appendChild(ds_param);
354 ds_param.setAttribute(GSXML.NAME_ATT, "info");
355 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
[28382]356 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]357 ds_param_list.appendChild(ds_param);
358 ds_param.setAttribute(GSXML.NAME_ATT, "info");
359 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]360
[24889]361 if (get_siblings)
362 {
[28382]363 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]364 ds_param_list.appendChild(ds_param);
365 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
366 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
367 }
368
[24812]369 }
[28258]370 else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) || document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
[24812]371 {
[24116]372 get_structure = true;
[24812]373 if (expand_contents)
374 {
[28382]375 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]376 ds_param_list.appendChild(ds_param);
377 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
378 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]379 }
380 else
381 {
[24116]382 // get the info needed for table of contents
[28382]383 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]384 ds_param_list.appendChild(ds_param);
385 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
386 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
[28382]387 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]388 ds_param_list.appendChild(ds_param);
389 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
390 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]391 if (get_siblings)
392 {
[28382]393 ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]394 ds_param_list.appendChild(ds_param);
395 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
396 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
397 }
398 }
[24812]399 }
400 else
401 {
[31249]402 // we dont need any structure
[24116]403 }
[3801]404
[24116]405 boolean has_dummy = false;
[24812]406 if (get_structure || get_structure_info)
407 {
[8676]408
[24116]409 // Build a request to obtain the document structure
[28382]410 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]411 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]412 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]413 ds_message.appendChild(ds_request);
414 ds_request.appendChild(ds_param_list);
[24812]415
[25816]416 // add the node list we created earlier
[24116]417 ds_request.appendChild(basic_doc_list);
[24812]418
[24116]419 // Process the document structure retrieve message
420 Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]421 if (processErrorElements(ds_response_message, page_response))
422 {
[24116]423 return result;
424 }
[4030]425
[24116]426 // get the info and print out
[24812]427 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]428 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
429 path = GSPath.appendLink(path, "nodeStructureInfo");
430 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
431 // get the doc_node bit
[24812]432 if (ds_response_struct_info != null)
433 {
[28382]434 the_document.appendChild(doc.importNode(ds_response_struct_info, true));
[24116]435 }
[24812]436 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]437 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
438 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
439 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]440
441 if (ds_response_structure != null)
442 {
[24116]443 // add the contents of the structure bit into the_document
444 NodeList structs = ds_response_structure.getChildNodes();
[24812]445 for (int i = 0; i < structs.getLength(); i++)
446 {
[28382]447 the_document.appendChild(doc.importNode(structs.item(i), true));
[24116]448 }
[24812]449 }
450 else
451 {
[24116]452 // no structure nodes, so put in a dummy doc node
[28382]453 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]454 if (document_id != null)
[24812]455 {
[25305]456 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]457 }
[25355]458 else
[24812]459 {
[25305]460 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]461
[24116]462 }
463 the_document.appendChild(doc_node);
464 has_dummy = true;
465 }
[24812]466 }
467 else
468 { // a simple type - we dont have a dummy node for simple
[24116]469 // should think about this more
470 // no structure request, so just put in a dummy doc node
[28382]471 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]472 if (document_id != null)
[24812]473 {
[25305]474 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]475 }
[25355]476 else
[24812]477 {
[25305]478 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]479 }
480 the_document.appendChild(doc_node);
481 has_dummy = true;
482 }
[24812]483
[24116]484 // Build a request to obtain some document metadata
[28382]485 Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]486 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[28382]487 Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]488 dm_message.appendChild(dm_request);
489 // Create a parameter list to specify the required metadata information
[24812]490
[25635]491 HashSet<String> meta_names = new HashSet<String>();
[24116]492 meta_names.add("Title"); // the default
[24812]493 if (format_elem != null)
494 {
[24889]495 getRequiredMetadataNames(format_elem, meta_names);
[24116]496 }
[28258]497
[26026]498 Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
[28258]499 if (extraMetaListElem != null)
[26026]500 {
501 NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
[28258]502 for (int i = 0; i < extraMetaList.getLength(); i++)
[26026]503 {
[28258]504 meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
[26026]505 }
506 }
[24812]507
[28382]508 Element dm_param_list = createMetadataParamList(doc,meta_names);
[24812]509 if (service_params != null)
510 {
[28964]511 GSXML.addParametersToList(dm_param_list, service_params);
[24116]512 }
[24812]513
[24116]514 dm_request.appendChild(dm_param_list);
[24812]515
[24116]516 // create the doc node list for the metadata request
[28382]517 Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]518 dm_request.appendChild(dm_doc_list);
[4030]519
[24116]520 // Add each node from the structure response into the metadata request
521 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]522 for (int i = 0; i < doc_nodes.getLength(); i++)
523 {
[24116]524 Element doc_node = (Element) doc_nodes.item(i);
525 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]526
[24116]527 // Add the documentNode to the list
[28382]528 Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]529 dm_doc_list.appendChild(dm_doc_node);
530 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]531 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[29922]532 if (document_id == null){
533 dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
534 }
535
[24116]536 }
[3801]537
[24116]538 // we also want a metadata request to the top level document to get
539 // assocfilepath - this could be cached too
[28382]540 Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]541 dm_message.appendChild(doc_meta_request);
[28382]542 Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]543 if (service_params != null)
544 {
[28964]545 GSXML.addParametersToList(doc_meta_param_list, service_params);
[24116]546 }
[3801]547
[24116]548 doc_meta_request.appendChild(doc_meta_param_list);
[28382]549 Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]550 doc_meta_param_list.appendChild(doc_param);
551 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
552 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]553
[24116]554 // create the doc node list for the metadata request
[28382]555 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]556 doc_meta_request.appendChild(doc_list);
[3801]557
[28382]558 Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]559 // the node we want is the root document node
[25355]560 if (document_id != null)
[24812]561 {
[25305]562 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]563 }
[29922]564 /*else
[24812]565 {
[25355]566 doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
567 // can we assume that href is always a top level doc??
568 //doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]569 //doc_node.setAttribute("externalURL", has_rl);
[29922]570 }*/
[24116]571 doc_list.appendChild(doc_node);
[24889]572
[24116]573 Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]574 if (processErrorElements(dm_response_message, page_response))
575 {
[24116]576 return result;
577 }
[9874]578
[24812]579 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]580 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]581
[24116]582 // Merge the metadata with the structure information
583 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]584 for (int i = 0; i < doc_nodes.getLength(); i++)
585 {
[24116]586 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]587 }
[24116]588 // get the top level doc metadata out
[24812]589 Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
590 Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]591 GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]592
[32068]593 // do we want doc text content? If not, we are done.
594 if (!get_text) {
595 // don't get text
596 return result;
597 }
598
[24116]599 // Build a request to obtain some document content
[28382]600 Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]601 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[28382]602 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]603 dc_message.appendChild(dc_request);
[5694]604
[24116]605 // Create a parameter list to specify the request parameters - empty for now
[28382]606 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]607 if (service_params != null)
608 {
[28964]609 GSXML.addParametersToList(dc_param_list, service_params);
[24116]610 }
[4858]611
[24116]612 dc_request.appendChild(dc_param_list);
613
614 // get the content
615 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]616 if (expand_document)
617 {
[24116]618 dc_request.appendChild(dm_doc_list);
[24812]619 }
620 else
621 {
[24116]622 dc_request.appendChild(basic_doc_list);
[4858]623 }
[24116]624 Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]625 if (processErrorElements(dc_response_message, page_response))
626 {
[24116]627 return result;
[4827]628 }
[24116]629 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]630
[24812]631 if (expand_document)
632 {
[24116]633 // Merge the content with the structure information
634 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]635 for (int i = 0; i < doc_nodes.getLength(); i++)
636 {
[31249]637 Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
[24812]638 if (content != null)
639 {
640 if (highlight_query_terms)
641 {
[31249]642 String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
643 content = highlightQueryTerms(request, node_id, (Element) content);
[24116]644 }
[31249]645
[28382]646 doc_nodes.item(i).appendChild(doc.importNode(content, true));
[24116]647 }
648 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
649 }
[29521]650 if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
651 Element dummy_node = (Element) doc_nodes.item(0);
652 the_document.removeChild(dummy_node);
653 the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
654 NodeList dummy_children = dummy_node.getChildNodes();
655 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
656 {
657 // special case as we don't want more than one metadata list
658 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
659 {
660 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
661 }
662 else
663 {
664 the_document.appendChild(dummy_children.item(i));
665 }
666 }
667 }
[24812]668 }
669 else
670 {
[24116]671 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
672 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
673 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]674 //Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]675
[24812]676 if (dc_response_doc_content == null)
677 {
[24116]678 // no content to add
[25355]679 if (dc_response_doc.getAttribute("external").equals("true"))
680 {
681
682 //if (dc_response_doc_external != null)
683 //{
[25305]684 String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]685
[25305]686 the_document.setAttribute("selectedNode", href_id);
687 the_document.setAttribute("external", href_id);
[25355]688 }
689 return result;
[24116]690 }
[24812]691 if (highlight_query_terms)
692 {
[24116]693 dc_response_doc.removeChild(dc_response_doc_content);
[24812]694
[31249]695 dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
[24116]696 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
697 }
[24812]698
699 if (provide_annotations)
700 {
701 String service_selected = (String) params.get(ENRICH_DOC_ARG);
702 if (service_selected != null && service_selected.equals("1"))
703 {
[24116]704 // now we can modifiy the response doc if needed
[24812]705 String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]706 // send a message to the service
[28382]707 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
708 Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]709 enrich_message.appendChild(enrich_request);
710 // check for parameters
[24812]711 HashMap e_service_params = (HashMap) params.get("s1");
712 if (e_service_params != null)
713 {
[28382]714 Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]715 GSXML.addParametersToList(enrich_pl, e_service_params);
[24116]716 enrich_request.appendChild(enrich_pl);
717 }
[28382]718 Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]719 enrich_request.appendChild(e_doc_list);
[28382]720 e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
[24812]721
[24116]722 Node enrich_response = this.mr.process(enrich_message);
[24812]723
724 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]725 path = GSPath.createPath(links);
[24812]726 dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
727
728 }
[24116]729 } // if provide_annotations
[3987]730
[24116]731 // use the returned id rather than the sent one cos there may have
732 // been modifiers such as .pr that are removed.
733 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
734 the_document.setAttribute("selectedNode", modified_doc_id);
[24812]735 if (has_dummy)
736 {
[24116]737 // change the id if necessary and add the content
[24812]738 Element dummy_node = (Element) doc_nodes.item(0);
739
[24116]740 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[28382]741 dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]742 // hack for simple type
[26140]743 if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
[24812]744 {
[24116]745 // we dont want the internal docNode, just want the content and metadata in the document
746 // rethink this!!
747 the_document.removeChild(dummy_node);
[4023]748
[24116]749 NodeList dummy_children = dummy_node.getChildNodes();
750 //for (int i=0; i<dummy_children.getLength(); i++) {
[24812]751 for (int i = dummy_children.getLength() - 1; i >= 0; i--)
752 {
[24116]753 // special case as we don't want more than one metadata list
[24812]754 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
755 {
[24116]756 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]757 }
758 else
759 {
[24116]760 the_document.appendChild(dummy_children.item(i));
761 }
762 }
763 }
[28258]764
[26140]765 the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[24812]766 }
767 else
768 {
[24116]769 // Merge the document content with the metadata and structure information
[24812]770 for (int i = 0; i < doc_nodes.getLength(); i++)
771 {
[24116]772 Node dn = doc_nodes.item(i);
[24812]773 String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
774 if (dn_id.equals(modified_doc_id))
775 {
[28382]776 dn.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]777 break;
778 }
779 }
780 }
781 }
[29307]782 //logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
[24116]783 return result;
[3801]784 }
[24812]785
786 /**
787 * tell the param class what its arguments are if an action has its own
788 * arguments, this should add them to the params object - particularly
789 * important for args that should not be saved
790 */
[25305]791 public boolean addActionParameters(GSParams params)
[24812]792 {
[24116]793 params.addParameter(GOTO_PAGE_ARG, false);
794 params.addParameter(ENRICH_DOC_ARG, false);
[25305]795 params.addParameter(EXPAND_DOCUMENT_ARG, false);
796 params.addParameter(EXPAND_CONTENTS_ARG, false);
797 params.addParameter(REALISTIC_BOOK_ARG, false);
798
[24116]799 return true;
[4717]800 }
[4023]801
[24812]802 /**
803 * this method gets the collection description, the format info, the list of
804 * enrich services, etc - stuff that is needed for the page, but is the same
805 * whatever the query is - should be cached
806 */
[24993]807 protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]808 {
[28382]809 Document doc = page_response.getOwnerDocument();
810
[24116]811 // create a message to process - contains requests for the collection
812 // description, the format element, the enrich services on offer
813 // these could all be cached
[28382]814 Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]815 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
816 // the format request - ignore for now, where does this request go to??
[28382]817 Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]818 info_message.appendChild(format_request);
819
820 // the enrich_services request - only do this if provide_annotations is true
821
[24812]822 if (provide_annotations)
823 {
[28382]824 Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]825 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
826 info_message.appendChild(enrich_services_request);
[4023]827 }
[24116]828
[24812]829 Element info_response = (Element) this.mr.process(info_message);
830
[24116]831 // the collection is the first response
832 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
833 Element format_resp = (Element) responses.item(0);
[24812]834
835 Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
836 if (format_elem != null)
837 {
[25985]838 Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
[28258]839 if (global_format_elem != null)
[25985]840 {
841 GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
842 }
843
844 // set the format type
[24812]845 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[28382]846 page_response.appendChild(doc.importNode(format_elem, true));
[4023]847 }
[4287]848
[24812]849 if (provide_annotations)
850 {
851 Element services_resp = (Element) responses.item(1);
[4287]852
[24116]853 // a new message for the mr
[28382]854 Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]855 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
856 boolean service_found = false;
[24812]857 for (int j = 0; j < e_services.getLength(); j++)
858 {
859 if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
860 {
[28382]861 Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]862 enrich_message.appendChild(s);
863 service_found = true;
864 }
865 }
[24812]866 if (service_found)
867 {
868 Element enrich_response = (Element) this.mr.process(enrich_message);
869
[24116]870 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
[28382]871 Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]872 for (int i = 0; i < e_responses.getLength(); i++)
873 {
874 Element e_resp = (Element) e_responses.item(i);
[28382]875 Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]876 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
877 service_list.appendChild(e_service);
878 }
879 page_response.appendChild(service_list);
880 }
881 } // if provide_annotations
882 return true;
[24812]883
[9874]884 }
[4287]885
[25953]886 protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
887 {
[28382]888 Document doc = basic_doc_list.getOwnerDocument();
889
890 Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[25953]891 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]892 Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[25953]893 ds_message.appendChild(ds_request);
[25816]894
[25953]895 // Create a parameter list to specify the required structure information
[28382]896 Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
897 Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
[25953]898 ds_param_list.appendChild(ds_param);
899 ds_param.setAttribute(GSXML.NAME_ATT, "info");
900 ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]901
[25953]902 ds_request.appendChild(ds_param_list);
[25816]903
[25953]904 // add the node list we created earlier
905 ds_request.appendChild(basic_doc_list);
906
907 // Process the document structure retrieve message
908 Element ds_response_message = (Element) this.mr.process(ds_message);
909 if (processErrorElements(ds_response_message, page_response))
910 {
911 return null;
912 }
913
914 String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
915 String path = GSPath.createPath(links);
916 Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
[29439]917 if (info_elem == null) {
918 return null;
919 }
[25953]920 Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
921 if (doctype_elem != null)
922 {
923 String doc_type = doctype_elem.getAttribute("value");
924 return doc_type;
925 }
926 return null;
927 }
928
[32071]929 // Recursive method to set the docType, nodeType and nodeID attributes of each docNode
930 // The docType remains constant as in parameter document_type
931 // The nodeID for the first (root) docNode is already set. For all children, the rootNode id
932 // is updated to be <parent-id>.<num-child>, where the first parent-id is rootNode id.
933 // The nodeType is root if rootNode, internal if there are children and leaf if no children
934 protected void insertDocNodeAttributes(Element docNode, String document_type, String id) {
935
936 boolean isRoot = false;
937 if(id == null) { // rootNode, get the root nodeID to work with recursively
938 id = docNode.getAttribute(GSXML.NODE_ID_ATT);
939 isRoot = true;
940 } else { // for all but the root node, need to still set the nodeID
941 docNode.setAttribute(GSXML.NODE_ID_ATT, id);
942 }
943
944 docNode.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
945
946 NodeList docNodes = GSXML.getChildrenByTagName(docNode, GSXML.DOC_NODE_ELEM);
947 if(docNodes.getLength() > 0) {
948 docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
949 for(int i = 0; i < docNodes.getLength(); i++) {
950 Element childDocNode = (Element)docNodes.item(i);
951
952 // work out the child docNode's nodeID based on current id
953 String nodeID = id + "." + (i+1);
954 insertDocNodeAttributes(childDocNode, document_type, nodeID); //recursion step
955 }
956 } else {
957 docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
958 }
959
960 // rootNode's nodeType is a special case: it's "root", not "leaf" or "internal"
961 if(isRoot) docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
962
963 }
964
[32068]965 /** run the XSLT transform which converts from doc.xml format to our internal document format */
966 protected Element transformArchiveToDocument(Element section) {
967
[32111]968 String stylesheet_filename = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), (ArrayList<String>) this.config_params.get(GSConstants.BASE_INTERFACES), "archive2document.xsl");
969 if (stylesheet_filename == null) {
970 logger.error("Couldn't find stylesheet archive2document.xsl");
971 return section;
972 }
973
974 Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_filename));
[32068]975 if (stylesheet_doc == null) {
[32111]976 logger.error("Couldn't load in stylesheet "+stylesheet_filename);
[32068]977 return section;
978 }
979
980 Document section_doc = XMLConverter.newDOM();
981 section_doc.appendChild(section_doc.importNode(section, true));
982 Node result = this.transformer.transform(stylesheet_doc, section_doc);
[32071]983 logger.debug("transform result = "+XMLConverter.getPrettyString(result));
[32068]984
985 Element new_element;
[32071]986 if (result.getNodeType() == Node.DOCUMENT_NODE) {
[32068]987 new_element = ((Document) result).getDocumentElement();
[32071]988 } else {
[32068]989 new_element = (Element) result;
[32071]990 }
[32068]991
992
993 return new_element;
994
995 }
996
997
[24812]998 /**
999 * this involves a bit of a hack to get the equivalent query terms - has to
1000 * requery the query service - uses the last selected service name. (if it
1001 * ends in query). should this action do the query or should it send a
1002 * message to the query action? but that will involve lots of extra stuff.
[24889]1003 * also doesn't handle phrases properly - just highlights all the terms
1004 * found in the text.
[24812]1005 */
[31249]1006 protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
[24812]1007 {
[28382]1008 Document doc = request.getOwnerDocument();
1009
[24116]1010 // do the query again to get term info
[24812]1011 Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]1012 HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]1013
1014 HashMap previous_params = (HashMap) params.get("p");
1015 if (previous_params == null)
1016 {
[24116]1017 return dc_response_doc_content;
1018 }
[24812]1019 String service_name = (String) previous_params.get(GSParams.SERVICE);
1020 if (service_name == null || !service_name.endsWith("Query"))
1021 { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]1022 logger.debug("invalid service, not doing highlighting");
1023 return dc_response_doc_content;
1024 }
[24812]1025 String collection = (String) params.get(GSParams.COLLECTION);
[24993]1026 UserContext userContext = new UserContext(request);
[24116]1027 String to = GSPath.appendLink(collection, service_name);
[24812]1028
[28382]1029 Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
1030 Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]1031 mr_query_message.appendChild(mr_query_request);
[24812]1032
[24116]1033 // paramList
[24812]1034 HashMap service_params = (HashMap) params.get("s1");
1035
[28382]1036 Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]1037 GSXML.addParametersToList(query_param_list, service_params);
[31249]1038 if (current_node_id != null) {
1039 GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
1040 } else {
1041 GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
1042 }
[24116]1043 mr_query_request.appendChild(query_param_list);
1044 // do the query
[24812]1045 Element mr_query_response = (Element) this.mr.process(mr_query_message);
[30049]1046 String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
1047 Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
[31249]1048 // For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
[30049]1049 if (highlighted_Node != null)
1050 {
[30056]1051 // Build a request to process highlighted text
1052
1053 Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
1054 to = GSPath.appendLink(collection, "DocumentContentRetrieve");
1055 Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1056 hl_message.appendChild(dc_request);
1057
1058 // Create a parameter list to specify the request parameters - empty for now
1059 Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1060 dc_request.appendChild(dc_param_list);
1061
1062 // get the content
1063 Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
1064 dc_request.appendChild(doc_list);
1065 Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
1066 doc_list.appendChild(current_doc);
1067 current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
1068 //Append highlighted content to request for processing
1069 dc_request.appendChild(doc.importNode(highlighted_Node, true));
1070 Element hl_response_message = (Element) this.mr.process(hl_message);
[31249]1071
[30056]1072 //Get results
1073 NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
1074 Element content = (Element) contentList.item(0);
1075 return content;
[30049]1076 }
[24812]1077 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]1078 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]1079 if (query_term_list_element == null)
1080 {
[24116]1081 // no term info
1082 logger.error("No query term information.\n");
1083 return dc_response_doc_content;
1084 }
[8731]1085
[24116]1086 String content = GSXML.getNodeText(dc_response_doc_content);
[4287]1087
[24812]1088 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]1089 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]1090
[25635]1091 HashSet<String> query_term_variants = new HashSet<String>();
[24116]1092 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]1093 if (equivalent_terms_nodelist == null || equivalent_terms_nodelist.getLength() == 0)
[24116]1094 {
1095 NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]1096 if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]1097 {
[24812]1098 for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]1099 {
[24812]1100 String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]1101 String termValueU = null;
1102 String termValueL = null;
[24812]1103
1104 if (termValue.length() > 1)
[24116]1105 {
1106 termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
1107 termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
1108 }
1109 else
1110 {
1111 termValueU = termValue.substring(0, 1).toUpperCase();
1112 termValueL = termValue.substring(0, 1).toLowerCase();
1113 }
[24812]1114
[24116]1115 query_term_variants.add(termValueU);
1116 query_term_variants.add(termValueL);
1117 }
1118 }
1119 }
1120 else
1121 {
[24812]1122 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
1123 {
[24116]1124 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
1125 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]1126 for (int j = 0; j < equivalent_terms.length; j++)
1127 {
[24116]1128 query_term_variants.add(equivalent_terms[j]);
1129 }
1130 }
1131 }
[4287]1132
[25635]1133 ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]1134
[24116]1135 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
1136 String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]1137
[25635]1138 ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1139 int term_start = 0;
1140 boolean in_term = false;
1141 boolean in_phrase = false;
[24812]1142 for (int i = 0; i < performed_query.length(); i++)
1143 {
[24116]1144 char character = performed_query.charAt(i);
1145 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
1146
1147 // Has a query term just started?
[24812]1148 if (in_term == false && is_character_letter_or_digit == true)
1149 {
[24116]1150 in_term = true;
1151 term_start = i;
1152 }
1153
1154 // Or has a term just finished?
[24812]1155 else if (in_term == true && is_character_letter_or_digit == false)
1156 {
[24116]1157 in_term = false;
1158 String term = performed_query.substring(term_start, i);
[24812]1159
[24116]1160 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]1161 if (term_element != null)
1162 {
1163
[25635]1164 HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]1165
[24116]1166 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]1167 if (term_equivalent_terms_nodelist == null || term_equivalent_terms_nodelist.getLength() == 0)
[24116]1168 {
1169 String termValueU = null;
1170 String termValueL = null;
[24812]1171
1172 if (term.length() > 1)
[24116]1173 {
1174 termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1175 termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1176 }
1177 else
1178 {
1179 termValueU = term.substring(0, 1).toUpperCase();
1180 termValueL = term.substring(0, 1).toLowerCase();
1181 }
[24812]1182
[24116]1183 phrase_query_p_term_x_variants.add(termValueU);
1184 phrase_query_p_term_x_variants.add(termValueL);
1185 }
1186 else
1187 {
[24812]1188 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1189 {
[24116]1190 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1191 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]1192 for (int k = 0; k < term_equivalent_terms.length; k++)
1193 {
[24116]1194 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1195 }
1196 }
1197 }
1198 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]1199
1200 if (in_phrase == false)
1201 {
[24116]1202 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]1203 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1204 }
1205 }
[9007]1206 }
[24116]1207 // Watch for phrases (surrounded by quotes)
[24812]1208 if (character == '\"')
1209 {
[24116]1210 // Has a phrase just started?
[24812]1211 if (in_phrase == false)
1212 {
[24116]1213 in_phrase = true;
1214 }
1215 // Or has a phrase just finished?
[24812]1216 else if (in_phrase == true)
1217 {
[24116]1218 in_phrase = false;
1219 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1220 }
1221
[25635]1222 phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]1223 }
[4287]1224 }
[8731]1225
[28382]1226 return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]1227 }
1228
[24116]1229 /**
[24812]1230 * Highlights query terms in a piece of text.
1231 */
[28382]1232 private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]1233 {
1234 // Convert the content string to an array of characters for speed
1235 char[] content_characters = new char[content.length()];
1236 content.getChars(0, content.length(), content_characters, 0);
[8731]1237
[24116]1238 // Now skim through the content, identifying word matches
[25635]1239 ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]1240 int word_start = 0;
1241 boolean in_word = false;
1242 boolean preceding_word_matched = false;
[24813]1243 boolean inTag = false;
[24812]1244 for (int i = 0; i < content_characters.length; i++)
1245 {
[24813]1246 //We don't want to find words inside HTML tags
[24993]1247 if (content_characters[i] == '<')
[24813]1248 {
1249 inTag = true;
1250 continue;
1251 }
1252 else if (inTag && content_characters[i] == '>')
1253 {
1254 inTag = false;
1255 }
1256 else if (inTag)
1257 {
1258 continue;
1259 }
[24993]1260
[24116]1261 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]1262
[24116]1263 // Has a word just started?
[24812]1264 if (in_word == false && is_character_letter_or_digit == true)
1265 {
[24116]1266 in_word = true;
1267 word_start = i;
1268 }
[8731]1269
[24116]1270 // Or has a word just finished?
[24812]1271 else if (in_word == true && is_character_letter_or_digit == false)
1272 {
[24116]1273 in_word = false;
[8731]1274
[24116]1275 // Check if the word matches any of the query term equivalents
1276 String word = new String(content_characters, word_start, (i - word_start));
[24812]1277 if (query_term_variants.contains(word))
1278 {
[24116]1279 // We have found a matching word, so remember its location
1280 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1281 preceding_word_matched = true;
1282 }
[24812]1283 else
1284 {
[24116]1285 preceding_word_matched = false;
1286 }
1287 }
1288 }
[8731]1289
[24116]1290 // Don't forget the last word...
[24812]1291 if (in_word == true)
1292 {
[24116]1293 // Check if the word matches any of the query term equivalents
1294 String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]1295 if (query_term_variants.contains(word))
1296 {
[24116]1297 // We have found a matching word, so remember its location
1298 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1299 }
[8731]1300 }
1301
[25635]1302 ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1303 ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]1304
[24116]1305 // Deal with phrases now
[25635]1306 ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]1307 for (int i = 0; i < word_matches.size(); i++)
1308 {
[25635]1309 WordMatch word_match = word_matches.get(i);
[8731]1310
[24116]1311 // See if any partial phrase matches are extended by this word
[24812]1312 if (word_match.preceding_word_matched)
1313 {
1314 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1315 {
[25635]1316 PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1317 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]1318 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]1319 if (phrase_query_p_term_x_variants.contains(word_match.word))
1320 {
[24116]1321 partial_phrase_match.num_words_matched++;
[8731]1322
[24116]1323 // Has a complete phrase match occurred?
[24812]1324 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1325 {
[24116]1326 // Check for overlaps by looking at the previous highlight range
[24812]1327 if (!highlight_end_positions.isEmpty())
1328 {
[24116]1329 int last_highlight_index = highlight_end_positions.size() - 1;
[25635]1330 int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]1331 if (last_highlight_end > partial_phrase_match.start_position)
1332 {
[24116]1333 // There is an overlap, so remove the previous phrase match
[25635]1334 int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]1335 highlight_end_positions.remove(last_highlight_index);
1336 partial_phrase_match.start_position = last_highlight_start;
1337 }
1338 }
[8731]1339
[24116]1340 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1341 highlight_end_positions.add(new Integer(word_match.end_position));
1342 }
1343 // No, but add the partial match back into the list for next time
[24812]1344 else
1345 {
[24116]1346 partial_phrase_matches.add(partial_phrase_match);
1347 }
1348 }
1349 }
1350 }
[24812]1351 else
1352 {
[24116]1353 partial_phrase_matches.clear();
1354 }
[8731]1355
[24116]1356 // See if this word is at the start of any of the phrases
[24812]1357 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1358 {
[25635]1359 ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[31686]1360 if (phrase_query_p_term_variants_list.size()>0) {
[24116]1361 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]1362 if (phrase_query_p_term_1_variants.contains(word_match.word))
1363 {
[24116]1364 // If this phrase is just one word long, we have a complete match
[24812]1365 if (phrase_query_p_term_variants_list.size() == 1)
1366 {
[24116]1367 highlight_start_positions.add(new Integer(word_match.start_position));
1368 highlight_end_positions.add(new Integer(word_match.end_position));
1369 }
1370 // Otherwise we have the start of a potential phrase match
[24812]1371 else
1372 {
[24116]1373 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1374 }
1375 }
[31686]1376 }
[24116]1377 }
[4287]1378 }
[4717]1379
[24116]1380 // Now add the annotation tags into the document at the correct points
[28382]1381 Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]1382
[24116]1383 int last_wrote = 0;
[24812]1384 for (int i = 0; i < highlight_start_positions.size(); i++)
1385 {
[25635]1386 int highlight_start = highlight_start_positions.get(i).intValue();
1387 int highlight_end = highlight_end_positions.get(i).intValue();
[8731]1388
[24116]1389 // Print anything before the highlight range
[24812]1390 if (last_wrote < highlight_start)
1391 {
[24116]1392 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
[28382]1393 content_element.appendChild(doc.createTextNode(preceding_text));
[24116]1394 }
[8731]1395
[24116]1396 // Print the highlight text, annotated
[24812]1397 if (highlight_end > last_wrote)
1398 {
[24116]1399 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
[28382]1400 Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
[24116]1401 annotation_element.setAttribute("type", "query_term");
1402 content_element.appendChild(annotation_element);
1403 last_wrote = highlight_end;
1404 }
1405 }
[8731]1406
[24116]1407 // Finish off any unwritten text
[24812]1408 if (last_wrote < content_characters.length)
1409 {
[24116]1410 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
[28382]1411 content_element.appendChild(doc.createTextNode(remaining_text));
[24116]1412 }
1413 return content_element;
[8731]1414 }
1415
[24116]1416 static private class WordMatch
1417 {
1418 public String word;
1419 public int start_position;
1420 public int end_position;
1421 public boolean preceding_word_matched;
[8731]1422
[24116]1423 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1424 {
1425 this.word = word;
1426 this.start_position = start_position;
1427 this.end_position = end_position;
1428 this.preceding_word_matched = preceding_word_matched;
1429 }
[8731]1430 }
1431
[24116]1432 static private class PartialPhraseMatch
1433 {
1434 public int start_position;
1435 public int query_phrase_number;
1436 public int num_words_matched;
[8731]1437
[24116]1438 public PartialPhraseMatch(int start_position, int query_phrase_number)
1439 {
1440 this.start_position = start_position;
1441 this.query_phrase_number = query_phrase_number;
1442 this.num_words_matched = 1;
1443 }
[8731]1444 }
[3645]1445}
Note: See TracBrowser for help on using the repository browser.