Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32111

Last change on this file since 32111 was 32111, checked in by kjdon, 6 years ago
pass in base interfaces array to the call to find archive2document.xsl. If you have a custom interface it will probably live in hte default one. Then check to make sure the file was there before trying to use it.
Property svn:keywords set to `Author Date Id Revision`
File size: 53.3 KB

Rev	Line
[3801]	1	/*
[24812]	2	* DocumentAction.java
	3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	18	*/
[3645]	19	package org.greenstone.gsdl3.action;
	20
[3801]	21	// Greenstone classes
[3645]	22	import org.greenstone.gsdl3.core.ModuleInterface;
	23	import org.greenstone.gsdl3.util.*;
[32069]	24	import org.greenstone.util.GlobalProperties;
[3801]	25
[3645]	26	// XML classes
[24812]	27	import org.w3c.dom.Document;
	28	import org.w3c.dom.Element;
	29	import org.w3c.dom.Node;
[4287]	30	import org.w3c.dom.Text;
[3801]	31	import org.w3c.dom.NodeList;
[3645]	32
[3801]	33	// General Java classes
[8731]	34	import java.util.ArrayList;
[3645]	35	import java.util.HashMap;
[4287]	36	import java.util.HashSet;
[3645]	37	import java.io.File;
[25635]	38	import java.io.Serializable;
[3645]	39
[13124]	40	import org.apache.log4j.*;
[3801]	41
[24812]	42	/** Action class for retrieving Documents via the message router */
	43	public class DocumentAction extends Action
	44	{
[13124]	45
[24116]	46	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]	47
[24116]	48	// this is used to specify that the sibling nodes of a selected one should be obtained
	49	public static final String SIBLING_ARG = "sib";
	50	public static final String GOTO_PAGE_ARG = "gp";
	51	public static final String ENRICH_DOC_ARG = "end";
[25305]	52	public static final String EXPAND_DOCUMENT_ARG = "ed";
	53	public static final String EXPAND_CONTENTS_ARG = "ec";
	54	public static final String REALISTIC_BOOK_ARG = "book";
[32068]	55	public static final String NO_TEXT_ARG = "noText";
	56	public static final String DOC_EDIT_ARG = "docEdit";
	57
[24812]	58	/**
	59	* if this is set to true, when a document is displayed, any annotation type
	60	* services (enrich) will be offered to the user as well
	61	*/
	62	protected boolean provide_annotations = false;
	63
[24116]	64	protected boolean highlight_query_terms = false;
[5694]	65
[24812]	66	public boolean configure()
	67	{
[24116]	68	super.configure();
[24812]	69	String highlight = (String) config_params.get("highlightQueryTerms");
	70	if (highlight != null && highlight.equals("true"))
	71	{
[24116]	72	highlight_query_terms = true;
	73	}
[24812]	74	String annotate = (String) config_params.get("displayAnnotationService");
	75	if (annotate != null && annotate.equals("true"))
	76	{
[24116]	77	provide_annotations = true;
	78	}
[25953]	79	return true;
	80	}
[24812]	81
	82	public Node process(Node message_node)
[24116]	83	{
	84	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]	85
[28964]	86	Element message = GSXML.nodeToElement(message_node);
[32068]	87	Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
[28382]	88
[24116]	89	// the response
[28382]	90	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
	91	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
[24116]	92	result.appendChild(page_response);
[19984]	93
[24116]	94	// get the request - assume only one
[24812]	95	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
	96	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]	97	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]	98
[24116]	99	// just in case there are some that need to get passed to the services
[24812]	100	HashMap service_params = (HashMap) params.get("s0");
[4717]	101
[24116]	102	String collection = (String) params.get(GSParams.COLLECTION);
[25305]	103	String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]	104	if (document_id != null && document_id.equals(""))
	105	{
	106	document_id = null;
[25305]	107	}
	108	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]	109	if (href != null && href.equals(""))
	110	{
	111	href = null;
[25305]	112	}
	113	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
	114	if (document_id == null && href == null)
[24812]	115	{
[24116]	116	logger.error("no document specified!");
	117	return result;
	118	}
[25355]	119	if (rl != null && rl.equals("0"))
	120	{
	121	// this is a true external link, we should have been directed to a different page or action
	122	logger.error("rl value was 0, shouldn't get here");
	123	return result;
[25305]	124	}
[29521]	125
	126	UserContext userContext = new UserContext(request);
	127
	128	//append site metadata
	129	addSiteMetadata(page_response, userContext);
	130	addInterfaceOptions(page_response);
	131
	132	// get the additional data needed for the page
	133	getBackgroundData(page_response, collection, userContext);
	134	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
	135
	136	if (format_elem != null) {
	137	// lets look for param defaults set in config file
	138	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
	139	for (int i=0; i<param_defaults.getLength(); i++) {
	140	Element p = (Element)param_defaults.item(i);
	141	String name = p.getAttribute(GSXML.NAME_ATT);
	142	if (params.get(name) ==null) {
	143	// wasn't set from interface
	144	String value = p.getAttribute(GSXML.VALUE_ATT);
	145	params.put(name, value );
	146	// also add into request param xml so that xslt knows it too
	147	GSXML.addParameterToList(cgi_paramList, name, value);
	148	}
	149	}
	150	}
[32068]	151
[32070]	152	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
	153	if (document_type != null && document_type.equals(""))
	154	{
	155	//document_type = "hierarchy";
	156	document_type = null; // we'll get it later if not already specified
	157	}
	158	// what if it is null here?? Anu to check...
[32068]	159
[32071]	160
[32068]	161	boolean editing_document = false;
	162	String doc_edit = (String) params.get(DOC_EDIT_ARG);
	163	if (doc_edit != null && doc_edit.equals("1")) {
	164	editing_document = true;
	165	}
	166
	167	// are we editing mode? just get the archive document, convert to our internal doc format, and return it
	168	if (editing_document) {
	169
	170	// call get archive doc
	171	Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
	172	String to = "DocXMLGetSection";
	173	Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
	174	dx_message.appendChild(dx_request);
	175	Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
	176	dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
	177	dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
	178	dx_request.appendChild(dx_section);
	179
	180	Element dx_response_message = (Element) this.mr.process(dx_message);
	181	if (processErrorElements(dx_response_message, page_response))
	182	{
	183	return result;
	184	}
	185
	186	// get the section out
	187	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
	188	Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
	189	if (section == null) {
	190	logger.error("no archive doc returned for "+document_id);
	191	return result;
	192	}
	193	// convert the archive format into the internal format that the page response requires
	194
[32071]	195	// work out doctype
[32075]	196	// NOTE: this will be coming from collection database in index
	197	// the archive file doesn't store this. So we have to assume
	198	// that the doc type will not be changing with any
	199	// modifications happening to archives.
	200
	201	// if doc type is null, then we need to work it out.
[32071]	202	// create a basic doc list containing the current node
[32075]	203
[32071]	204	if (document_type == null) {
[32075]	205	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
	206	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
	207	basic_doc_list.appendChild(current_doc);
	208	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id);
	209	basic_doc_list.appendChild(current_doc);
	210	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[32071]	211	}
[32075]	212
[32071]	213	if (document_type == null) {
	214	logger.debug("@@@ doctype is null, setting to simple");
	215	document_type = GSXML.DOC_TYPE_SIMPLE;
	216	}
[32075]	217
	218	Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
[32070]	219	doc_elem.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
[32068]	220	page_response.appendChild(doc_elem);
	221
	222	Element transformed_section = transformArchiveToDocument(section);
[32075]	223	if (document_type == GSXML.DOC_TYPE_SIMPLE) {
	224	// simple doc, only returning a single document node, which is the top level section.
	225	doc_elem.setAttribute(GSXML.NODE_ID_ATT, document_id);
	226	GSXML.mergeElements(doc_elem, transformed_section);
	227	return result;
	228	}
	229
	230	// multi sectioned document.
	231	transformed_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
[32071]	232	// In docEdit mode, we obtain the text from archives, from doc.xml
	233	// Now the transformation has replaced <Section> with <documentNode>
	234	// Need to add nodeID, nodeType and docType attributes to each docNode
	235	// as doc.xml doesn't store that.
	236	insertDocNodeAttributes(transformed_section, document_type, null);
[32068]	237	doc_elem.appendChild(doc.importNode(transformed_section, true));
[32071]	238	logger.debug("dx result = "+XMLConverter.getPrettyString(result));
	239
[32068]	240	return result;
	241	}
[32071]	242
[24116]	243	//whether to retrieve siblings or not
	244	boolean get_siblings = false;
	245	String sibs = (String) params.get(SIBLING_ARG);
[24812]	246	if (sibs != null && sibs.equals("1"))
	247	{
[24116]	248	get_siblings = true;
	249	}
[24812]	250
[25305]	251	String doc_id_modifier = "";
[24116]	252	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]	253	if (sibling_num != null && !sibling_num.equals(""))
	254	{
[24116]	255	// we have to modify the doc name
[25355]	256	doc_id_modifier = "." + sibling_num + ".ss";
[24116]	257	}
[24812]	258
[24116]	259	boolean expand_document = false;
[25305]	260	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]	261	if (ed_arg != null && ed_arg.equals("1"))
	262	{
[24116]	263	expand_document = true;
	264	}
[14525]	265
[24116]	266	boolean expand_contents = false;
[24812]	267	if (expand_document)
	268	{ // we always expand the contents with the text
[24116]	269	expand_contents = true;
[24812]	270	}
	271	else
	272	{
[25305]	273	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]	274	if (ec_arg != null && ec_arg.equals("1"))
	275	{
[24116]	276	expand_contents = true;
	277	}
[5694]	278	}
[25355]	279
[32068]	280	// do we want text content? Not if no_text=1.
	281	// expand_document overrides this. - should it??
	282	boolean get_text = true;
	283	String nt_arg = (String) params.get(NO_TEXT_ARG);
	284
	285	if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
[32071]	286	logger.debug("SETTING GET TEXT TO FALSE");
[32068]	287	get_text = false;
	288	} else {
[32071]	289	logger.debug("GET TEXT REMAINS TRUE");
[32068]	290	}
[4257]	291
[24116]	292	// the_document is where all the doc info - structure and metadata etc
	293	// is added into, to be returned in the page
[28382]	294	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
[24116]	295	page_response.appendChild(the_document);
[9874]	296
[24116]	297	// create a basic doc list containing the current node
[28382]	298	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
	299	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	300	basic_doc_list.appendChild(current_doc);
[25305]	301	if (document_id != null)
[24812]	302	{
[25355]	303	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]	304	}
[25355]	305	else
[24812]	306	{
[25305]	307	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
	308	// do we need this??
	309	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]	310	}
[32071]	311
[25953]	312	if (document_type == null)
	313	{
	314	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[25816]	315	}
[29439]	316	if (document_type == null)
[25953]	317	{
[32071]	318	logger.debug("##### doctype is null, setting to simple");
[29439]	319	document_type = GSXML.DOC_TYPE_SIMPLE;
[25816]	320	}
[29439]	321
	322	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
	323
[24116]	324	// Create a parameter list to specify the required structure information
[28382]	325	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	326
	327	if (service_params != null)
	328	{
[28964]	329	GSXML.addParametersToList(ds_param_list, service_params);
[24116]	330	}
[3817]	331
[24812]	332	Element ds_param = null;
[24116]	333	boolean get_structure = false;
	334	boolean get_structure_info = false;
[24889]	335	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]	336	{
[24116]	337	get_structure_info = true;
[24889]	338
	339	if (expand_contents)
	340	{
[28382]	341	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]	342	ds_param_list.appendChild(ds_param);
	343	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	344	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
	345	}
	346
[25305]	347	// get the info needed for paged naviagtion
[28382]	348	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	349	ds_param_list.appendChild(ds_param);
	350	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	351	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
[28382]	352	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	353	ds_param_list.appendChild(ds_param);
	354	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	355	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
[28382]	356	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	357	ds_param_list.appendChild(ds_param);
	358	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	359	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]	360
[24889]	361	if (get_siblings)
	362	{
[28382]	363	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]	364	ds_param_list.appendChild(ds_param);
	365	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	366	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
	367	}
	368
[24812]	369	}
[28258]	370	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
[24812]	371	{
[24116]	372	get_structure = true;
[24812]	373	if (expand_contents)
	374	{
[28382]	375	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	376	ds_param_list.appendChild(ds_param);
	377	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	378	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]	379	}
	380	else
	381	{
[24116]	382	// get the info needed for table of contents
[28382]	383	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	384	ds_param_list.appendChild(ds_param);
	385	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	386	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
[28382]	387	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	388	ds_param_list.appendChild(ds_param);
	389	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	390	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]	391	if (get_siblings)
	392	{
[28382]	393	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	394	ds_param_list.appendChild(ds_param);
	395	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	396	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
	397	}
	398	}
[24812]	399	}
	400	else
	401	{
[31249]	402	// we dont need any structure
[24116]	403	}
[3801]	404
[24116]	405	boolean has_dummy = false;
[24812]	406	if (get_structure \|\| get_structure_info)
	407	{
[8676]	408
[24116]	409	// Build a request to obtain the document structure
[28382]	410	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	411	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]	412	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	413	ds_message.appendChild(ds_request);
	414	ds_request.appendChild(ds_param_list);
[24812]	415
[25816]	416	// add the node list we created earlier
[24116]	417	ds_request.appendChild(basic_doc_list);
[24812]	418
[24116]	419	// Process the document structure retrieve message
	420	Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]	421	if (processErrorElements(ds_response_message, page_response))
	422	{
[24116]	423	return result;
	424	}
[4030]	425
[24116]	426	// get the info and print out
[24812]	427	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	428	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	429	path = GSPath.appendLink(path, "nodeStructureInfo");
	430	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
	431	// get the doc_node bit
[24812]	432	if (ds_response_struct_info != null)
	433	{
[28382]	434	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
[24116]	435	}
[24812]	436	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	437	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	438	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
	439	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]	440
	441	if (ds_response_structure != null)
	442	{
[24116]	443	// add the contents of the structure bit into the_document
	444	NodeList structs = ds_response_structure.getChildNodes();
[24812]	445	for (int i = 0; i < structs.getLength(); i++)
	446	{
[28382]	447	the_document.appendChild(doc.importNode(structs.item(i), true));
[24116]	448	}
[24812]	449	}
	450	else
	451	{
[24116]	452	// no structure nodes, so put in a dummy doc node
[28382]	453	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]	454	if (document_id != null)
[24812]	455	{
[25305]	456	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]	457	}
[25355]	458	else
[24812]	459	{
[25305]	460	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]	461
[24116]	462	}
	463	the_document.appendChild(doc_node);
	464	has_dummy = true;
	465	}
[24812]	466	}
	467	else
	468	{ // a simple type - we dont have a dummy node for simple
[24116]	469	// should think about this more
	470	// no structure request, so just put in a dummy doc node
[28382]	471	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]	472	if (document_id != null)
[24812]	473	{
[25305]	474	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]	475	}
[25355]	476	else
[24812]	477	{
[25305]	478	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]	479	}
	480	the_document.appendChild(doc_node);
	481	has_dummy = true;
	482	}
[24812]	483
[24116]	484	// Build a request to obtain some document metadata
[28382]	485	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]	486	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[28382]	487	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	488	dm_message.appendChild(dm_request);
	489	// Create a parameter list to specify the required metadata information
[24812]	490
[25635]	491	HashSet<String> meta_names = new HashSet<String>();
[24116]	492	meta_names.add("Title"); // the default
[24812]	493	if (format_elem != null)
	494	{
[24889]	495	getRequiredMetadataNames(format_elem, meta_names);
[24116]	496	}
[28258]	497
[26026]	498	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
[28258]	499	if (extraMetaListElem != null)
[26026]	500	{
	501	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
[28258]	502	for (int i = 0; i < extraMetaList.getLength(); i++)
[26026]	503	{
[28258]	504	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
[26026]	505	}
	506	}
[24812]	507
[28382]	508	Element dm_param_list = createMetadataParamList(doc,meta_names);
[24812]	509	if (service_params != null)
	510	{
[28964]	511	GSXML.addParametersToList(dm_param_list, service_params);
[24116]	512	}
[24812]	513
[24116]	514	dm_request.appendChild(dm_param_list);
[24812]	515
[24116]	516	// create the doc node list for the metadata request
[28382]	517	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	518	dm_request.appendChild(dm_doc_list);
[4030]	519
[24116]	520	// Add each node from the structure response into the metadata request
	521	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]	522	for (int i = 0; i < doc_nodes.getLength(); i++)
	523	{
[24116]	524	Element doc_node = (Element) doc_nodes.item(i);
	525	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]	526
[24116]	527	// Add the documentNode to the list
[28382]	528	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	529	dm_doc_list.appendChild(dm_doc_node);
	530	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]	531	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[29922]	532	if (document_id == null){
	533	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
	534	}
	535
[24116]	536	}
[3801]	537
[24116]	538	// we also want a metadata request to the top level document to get
	539	// assocfilepath - this could be cached too
[28382]	540	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	541	dm_message.appendChild(doc_meta_request);
[28382]	542	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	543	if (service_params != null)
	544	{
[28964]	545	GSXML.addParametersToList(doc_meta_param_list, service_params);
[24116]	546	}
[3801]	547
[24116]	548	doc_meta_request.appendChild(doc_meta_param_list);
[28382]	549	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	550	doc_meta_param_list.appendChild(doc_param);
	551	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
	552	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]	553
[24116]	554	// create the doc node list for the metadata request
[28382]	555	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	556	doc_meta_request.appendChild(doc_list);
[3801]	557
[28382]	558	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	559	// the node we want is the root document node
[25355]	560	if (document_id != null)
[24812]	561	{
[25305]	562	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]	563	}
[29922]	564	/*else
[24812]	565	{
[25355]	566	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
	567	// can we assume that href is always a top level doc??
	568	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]	569	//doc_node.setAttribute("externalURL", has_rl);
[29922]	570	}*/
[24116]	571	doc_list.appendChild(doc_node);
[24889]	572
[24116]	573	Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]	574	if (processErrorElements(dm_response_message, page_response))
	575	{
[24116]	576	return result;
	577	}
[9874]	578
[24812]	579	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	580	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]	581
[24116]	582	// Merge the metadata with the structure information
	583	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]	584	for (int i = 0; i < doc_nodes.getLength(); i++)
	585	{
[24116]	586	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]	587	}
[24116]	588	// get the top level doc metadata out
[24812]	589	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
	590	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]	591	GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]	592
[32068]	593	// do we want doc text content? If not, we are done.
	594	if (!get_text) {
	595	// don't get text
	596	return result;
	597	}
	598
[24116]	599	// Build a request to obtain some document content
[28382]	600	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]	601	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[28382]	602	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	603	dc_message.appendChild(dc_request);
[5694]	604
[24116]	605	// Create a parameter list to specify the request parameters - empty for now
[28382]	606	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	607	if (service_params != null)
	608	{
[28964]	609	GSXML.addParametersToList(dc_param_list, service_params);
[24116]	610	}
[4858]	611
[24116]	612	dc_request.appendChild(dc_param_list);
	613
	614	// get the content
	615	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]	616	if (expand_document)
	617	{
[24116]	618	dc_request.appendChild(dm_doc_list);
[24812]	619	}
	620	else
	621	{
[24116]	622	dc_request.appendChild(basic_doc_list);
[4858]	623	}
[24116]	624	Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]	625	if (processErrorElements(dc_response_message, page_response))
	626	{
[24116]	627	return result;
[4827]	628	}
[24116]	629	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]	630
[24812]	631	if (expand_document)
	632	{
[24116]	633	// Merge the content with the structure information
	634	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]	635	for (int i = 0; i < doc_nodes.getLength(); i++)
	636	{
[31249]	637	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
[24812]	638	if (content != null)
	639	{
	640	if (highlight_query_terms)
	641	{
[31249]	642	String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
	643	content = highlightQueryTerms(request, node_id, (Element) content);
[24116]	644	}
[31249]	645
[28382]	646	doc_nodes.item(i).appendChild(doc.importNode(content, true));
[24116]	647	}
	648	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
	649	}
[29521]	650	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
	651	Element dummy_node = (Element) doc_nodes.item(0);
	652	the_document.removeChild(dummy_node);
	653	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
	654	NodeList dummy_children = dummy_node.getChildNodes();
	655	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
	656	{
	657	// special case as we don't want more than one metadata list
	658	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
	659	{
	660	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
	661	}
	662	else
	663	{
	664	the_document.appendChild(dummy_children.item(i));
	665	}
	666	}
	667	}
[24812]	668	}
	669	else
	670	{
[24116]	671	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	672	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
	673	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]	674	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]	675
[24812]	676	if (dc_response_doc_content == null)
	677	{
[24116]	678	// no content to add
[25355]	679	if (dc_response_doc.getAttribute("external").equals("true"))
	680	{
	681
	682	//if (dc_response_doc_external != null)
	683	//{
[25305]	684	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]	685
[25305]	686	the_document.setAttribute("selectedNode", href_id);
	687	the_document.setAttribute("external", href_id);
[25355]	688	}
	689	return result;
[24116]	690	}
[24812]	691	if (highlight_query_terms)
	692	{
[24116]	693	dc_response_doc.removeChild(dc_response_doc_content);
[24812]	694
[31249]	695	dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
[24116]	696	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
	697	}
[24812]	698
	699	if (provide_annotations)
	700	{
	701	String service_selected = (String) params.get(ENRICH_DOC_ARG);
	702	if (service_selected != null && service_selected.equals("1"))
	703	{
[24116]	704	// now we can modifiy the response doc if needed
[24812]	705	String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]	706	// send a message to the service
[28382]	707	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
	708	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]	709	enrich_message.appendChild(enrich_request);
	710	// check for parameters
[24812]	711	HashMap e_service_params = (HashMap) params.get("s1");
	712	if (e_service_params != null)
	713	{
[28382]	714	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]	715	GSXML.addParametersToList(enrich_pl, e_service_params);
[24116]	716	enrich_request.appendChild(enrich_pl);
	717	}
[28382]	718	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	719	enrich_request.appendChild(e_doc_list);
[28382]	720	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
[24812]	721
[24116]	722	Node enrich_response = this.mr.process(enrich_message);
[24812]	723
	724	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]	725	path = GSPath.createPath(links);
[24812]	726	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
	727
	728	}
[24116]	729	} // if provide_annotations
[3987]	730
[24116]	731	// use the returned id rather than the sent one cos there may have
	732	// been modifiers such as .pr that are removed.
	733	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
	734	the_document.setAttribute("selectedNode", modified_doc_id);
[24812]	735	if (has_dummy)
	736	{
[24116]	737	// change the id if necessary and add the content
[24812]	738	Element dummy_node = (Element) doc_nodes.item(0);
	739
[24116]	740	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[28382]	741	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]	742	// hack for simple type
[26140]	743	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
[24812]	744	{
[24116]	745	// we dont want the internal docNode, just want the content and metadata in the document
	746	// rethink this!!
	747	the_document.removeChild(dummy_node);
[4023]	748
[24116]	749	NodeList dummy_children = dummy_node.getChildNodes();
	750	//for (int i=0; i<dummy_children.getLength(); i++) {
[24812]	751	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
	752	{
[24116]	753	// special case as we don't want more than one metadata list
[24812]	754	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
	755	{
[24116]	756	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]	757	}
	758	else
	759	{
[24116]	760	the_document.appendChild(dummy_children.item(i));
	761	}
	762	}
	763	}
[28258]	764
[26140]	765	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[24812]	766	}
	767	else
	768	{
[24116]	769	// Merge the document content with the metadata and structure information
[24812]	770	for (int i = 0; i < doc_nodes.getLength(); i++)
	771	{
[24116]	772	Node dn = doc_nodes.item(i);
[24812]	773	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
	774	if (dn_id.equals(modified_doc_id))
	775	{
[28382]	776	dn.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]	777	break;
	778	}
	779	}
	780	}
	781	}
[29307]	782	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
[24116]	783	return result;
[3801]	784	}
[24812]	785
	786	/**
	787	* tell the param class what its arguments are if an action has its own
	788	* arguments, this should add them to the params object - particularly
	789	* important for args that should not be saved
	790	*/
[25305]	791	public boolean addActionParameters(GSParams params)
[24812]	792	{
[24116]	793	params.addParameter(GOTO_PAGE_ARG, false);
	794	params.addParameter(ENRICH_DOC_ARG, false);
[25305]	795	params.addParameter(EXPAND_DOCUMENT_ARG, false);
	796	params.addParameter(EXPAND_CONTENTS_ARG, false);
	797	params.addParameter(REALISTIC_BOOK_ARG, false);
	798
[24116]	799	return true;
[4717]	800	}
[4023]	801
[24812]	802	/**
	803	* this method gets the collection description, the format info, the list of
	804	* enrich services, etc - stuff that is needed for the page, but is the same
	805	* whatever the query is - should be cached
	806	*/
[24993]	807	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]	808	{
[28382]	809	Document doc = page_response.getOwnerDocument();
	810
[24116]	811	// create a message to process - contains requests for the collection
	812	// description, the format element, the enrich services on offer
	813	// these could all be cached
[28382]	814	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	815	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
	816	// the format request - ignore for now, where does this request go to??
[28382]	817	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]	818	info_message.appendChild(format_request);
	819
	820	// the enrich_services request - only do this if provide_annotations is true
	821
[24812]	822	if (provide_annotations)
	823	{
[28382]	824	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]	825	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
	826	info_message.appendChild(enrich_services_request);
[4023]	827	}
[24116]	828
[24812]	829	Element info_response = (Element) this.mr.process(info_message);
	830
[24116]	831	// the collection is the first response
	832	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
	833	Element format_resp = (Element) responses.item(0);
[24812]	834
	835	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
	836	if (format_elem != null)
	837	{
[25985]	838	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
[28258]	839	if (global_format_elem != null)
[25985]	840	{
	841	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
	842	}
	843
	844	// set the format type
[24812]	845	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[28382]	846	page_response.appendChild(doc.importNode(format_elem, true));
[4023]	847	}
[4287]	848
[24812]	849	if (provide_annotations)
	850	{
	851	Element services_resp = (Element) responses.item(1);
[4287]	852
[24116]	853	// a new message for the mr
[28382]	854	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	855	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
	856	boolean service_found = false;
[24812]	857	for (int j = 0; j < e_services.getLength(); j++)
	858	{
	859	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
	860	{
[28382]	861	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]	862	enrich_message.appendChild(s);
	863	service_found = true;
	864	}
	865	}
[24812]	866	if (service_found)
	867	{
	868	Element enrich_response = (Element) this.mr.process(enrich_message);
	869
[24116]	870	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
[28382]	871	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]	872	for (int i = 0; i < e_responses.getLength(); i++)
	873	{
	874	Element e_resp = (Element) e_responses.item(i);
[28382]	875	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]	876	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
	877	service_list.appendChild(e_service);
	878	}
	879	page_response.appendChild(service_list);
	880	}
	881	} // if provide_annotations
	882	return true;
[24812]	883
[9874]	884	}
[4287]	885
[25953]	886	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
	887	{
[28382]	888	Document doc = basic_doc_list.getOwnerDocument();
	889
	890	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[25953]	891	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]	892	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[25953]	893	ds_message.appendChild(ds_request);
[25816]	894
[25953]	895	// Create a parameter list to specify the required structure information
[28382]	896	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
	897	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
[25953]	898	ds_param_list.appendChild(ds_param);
	899	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	900	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]	901
[25953]	902	ds_request.appendChild(ds_param_list);
[25816]	903
[25953]	904	// add the node list we created earlier
	905	ds_request.appendChild(basic_doc_list);
	906
	907	// Process the document structure retrieve message
	908	Element ds_response_message = (Element) this.mr.process(ds_message);
	909	if (processErrorElements(ds_response_message, page_response))
	910	{
	911	return null;
	912	}
	913
	914	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
	915	String path = GSPath.createPath(links);
	916	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
[29439]	917	if (info_elem == null) {
	918	return null;
	919	}
[25953]	920	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
	921	if (doctype_elem != null)
	922	{
	923	String doc_type = doctype_elem.getAttribute("value");
	924	return doc_type;
	925	}
	926	return null;
	927	}
	928
[32071]	929	// Recursive method to set the docType, nodeType and nodeID attributes of each docNode
	930	// The docType remains constant as in parameter document_type
	931	// The nodeID for the first (root) docNode is already set. For all children, the rootNode id
	932	// is updated to be <parent-id>.<num-child>, where the first parent-id is rootNode id.
	933	// The nodeType is root if rootNode, internal if there are children and leaf if no children
	934	protected void insertDocNodeAttributes(Element docNode, String document_type, String id) {
	935
	936	boolean isRoot = false;
	937	if(id == null) { // rootNode, get the root nodeID to work with recursively
	938	id = docNode.getAttribute(GSXML.NODE_ID_ATT);
	939	isRoot = true;
	940	} else { // for all but the root node, need to still set the nodeID
	941	docNode.setAttribute(GSXML.NODE_ID_ATT, id);
	942	}
	943
	944	docNode.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
	945
	946	NodeList docNodes = GSXML.getChildrenByTagName(docNode, GSXML.DOC_NODE_ELEM);
	947	if(docNodes.getLength() > 0) {
	948	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
	949	for(int i = 0; i < docNodes.getLength(); i++) {
	950	Element childDocNode = (Element)docNodes.item(i);
	951
	952	// work out the child docNode's nodeID based on current id
	953	String nodeID = id + "." + (i+1);
	954	insertDocNodeAttributes(childDocNode, document_type, nodeID); //recursion step
	955	}
	956	} else {
	957	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
	958	}
	959
	960	// rootNode's nodeType is a special case: it's "root", not "leaf" or "internal"
	961	if(isRoot) docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
	962
	963	}
	964
[32068]	965	/** run the XSLT transform which converts from doc.xml format to our internal document format */
	966	protected Element transformArchiveToDocument(Element section) {
	967
[32111]	968	String stylesheet_filename = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), (ArrayList<String>) this.config_params.get(GSConstants.BASE_INTERFACES), "archive2document.xsl");
	969	if (stylesheet_filename == null) {
	970	logger.error("Couldn't find stylesheet archive2document.xsl");
	971	return section;
	972	}
	973
	974	Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_filename));
[32068]	975	if (stylesheet_doc == null) {
[32111]	976	logger.error("Couldn't load in stylesheet "+stylesheet_filename);
[32068]	977	return section;
	978	}
	979
	980	Document section_doc = XMLConverter.newDOM();
	981	section_doc.appendChild(section_doc.importNode(section, true));
	982	Node result = this.transformer.transform(stylesheet_doc, section_doc);
[32071]	983	logger.debug("transform result = "+XMLConverter.getPrettyString(result));
[32068]	984
	985	Element new_element;
[32071]	986	if (result.getNodeType() == Node.DOCUMENT_NODE) {
[32068]	987	new_element = ((Document) result).getDocumentElement();
[32071]	988	} else {
[32068]	989	new_element = (Element) result;
[32071]	990	}
[32068]	991
	992
	993	return new_element;
	994
	995	}
	996
	997
[24812]	998	/**
	999	* this involves a bit of a hack to get the equivalent query terms - has to
	1000	* requery the query service - uses the last selected service name. (if it
	1001	* ends in query). should this action do the query or should it send a
	1002	* message to the query action? but that will involve lots of extra stuff.
[24889]	1003	* also doesn't handle phrases properly - just highlights all the terms
	1004	* found in the text.
[24812]	1005	*/
[31249]	1006	protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
[24812]	1007	{
[28382]	1008	Document doc = request.getOwnerDocument();
	1009
[24116]	1010	// do the query again to get term info
[24812]	1011	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]	1012	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]	1013
	1014	HashMap previous_params = (HashMap) params.get("p");
	1015	if (previous_params == null)
	1016	{
[24116]	1017	return dc_response_doc_content;
	1018	}
[24812]	1019	String service_name = (String) previous_params.get(GSParams.SERVICE);
	1020	if (service_name == null \|\| !service_name.endsWith("Query"))
	1021	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]	1022	logger.debug("invalid service, not doing highlighting");
	1023	return dc_response_doc_content;
	1024	}
[24812]	1025	String collection = (String) params.get(GSParams.COLLECTION);
[24993]	1026	UserContext userContext = new UserContext(request);
[24116]	1027	String to = GSPath.appendLink(collection, service_name);
[24812]	1028
[28382]	1029	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
	1030	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	1031	mr_query_message.appendChild(mr_query_request);
[24812]	1032
[24116]	1033	// paramList
[24812]	1034	HashMap service_params = (HashMap) params.get("s1");
	1035
[28382]	1036	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]	1037	GSXML.addParametersToList(query_param_list, service_params);
[31249]	1038	if (current_node_id != null) {
	1039	GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
	1040	} else {
	1041	GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
	1042	}
[24116]	1043	mr_query_request.appendChild(query_param_list);
	1044	// do the query
[24812]	1045	Element mr_query_response = (Element) this.mr.process(mr_query_message);
[30049]	1046	String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
	1047	Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
[31249]	1048	// For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
[30049]	1049	if (highlighted_Node != null)
	1050	{
[30056]	1051	// Build a request to process highlighted text
	1052
	1053	Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
	1054	to = GSPath.appendLink(collection, "DocumentContentRetrieve");
	1055	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
	1056	hl_message.appendChild(dc_request);
	1057
	1058	// Create a parameter list to specify the request parameters - empty for now
	1059	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
	1060	dc_request.appendChild(dc_param_list);
	1061
	1062	// get the content
	1063	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
	1064	dc_request.appendChild(doc_list);
	1065	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
	1066	doc_list.appendChild(current_doc);
	1067	current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
	1068	//Append highlighted content to request for processing
	1069	dc_request.appendChild(doc.importNode(highlighted_Node, true));
	1070	Element hl_response_message = (Element) this.mr.process(hl_message);
[31249]	1071
[30056]	1072	//Get results
	1073	NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
	1074	Element content = (Element) contentList.item(0);
	1075	return content;
[30049]	1076	}
[24812]	1077	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]	1078	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]	1079	if (query_term_list_element == null)
	1080	{
[24116]	1081	// no term info
	1082	logger.error("No query term information.\n");
	1083	return dc_response_doc_content;
	1084	}
[8731]	1085
[24116]	1086	String content = GSXML.getNodeText(dc_response_doc_content);
[4287]	1087
[24812]	1088	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]	1089	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]	1090
[25635]	1091	HashSet<String> query_term_variants = new HashSet<String>();
[24116]	1092	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]	1093	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
[24116]	1094	{
	1095	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]	1096	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]	1097	{
[24812]	1098	for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]	1099	{
[24812]	1100	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]	1101	String termValueU = null;
	1102	String termValueL = null;
[24812]	1103
	1104	if (termValue.length() > 1)
[24116]	1105	{
	1106	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
	1107	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
	1108	}
	1109	else
	1110	{
	1111	termValueU = termValue.substring(0, 1).toUpperCase();
	1112	termValueL = termValue.substring(0, 1).toLowerCase();
	1113	}
[24812]	1114
[24116]	1115	query_term_variants.add(termValueU);
	1116	query_term_variants.add(termValueL);
	1117	}
	1118	}
	1119	}
	1120	else
	1121	{
[24812]	1122	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
	1123	{
[24116]	1124	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
	1125	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]	1126	for (int j = 0; j < equivalent_terms.length; j++)
	1127	{
[24116]	1128	query_term_variants.add(equivalent_terms[j]);
	1129	}
	1130	}
	1131	}
[4287]	1132
[25635]	1133	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]	1134
[24116]	1135	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
	1136	String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]	1137
[25635]	1138	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1139	int term_start = 0;
	1140	boolean in_term = false;
	1141	boolean in_phrase = false;
[24812]	1142	for (int i = 0; i < performed_query.length(); i++)
	1143	{
[24116]	1144	char character = performed_query.charAt(i);
	1145	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
	1146
	1147	// Has a query term just started?
[24812]	1148	if (in_term == false && is_character_letter_or_digit == true)
	1149	{
[24116]	1150	in_term = true;
	1151	term_start = i;
	1152	}
	1153
	1154	// Or has a term just finished?
[24812]	1155	else if (in_term == true && is_character_letter_or_digit == false)
	1156	{
[24116]	1157	in_term = false;
	1158	String term = performed_query.substring(term_start, i);
[24812]	1159
[24116]	1160	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]	1161	if (term_element != null)
	1162	{
	1163
[25635]	1164	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]	1165
[24116]	1166	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]	1167	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
[24116]	1168	{
	1169	String termValueU = null;
	1170	String termValueL = null;
[24812]	1171
	1172	if (term.length() > 1)
[24116]	1173	{
	1174	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
	1175	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
	1176	}
	1177	else
	1178	{
	1179	termValueU = term.substring(0, 1).toUpperCase();
	1180	termValueL = term.substring(0, 1).toLowerCase();
	1181	}
[24812]	1182
[24116]	1183	phrase_query_p_term_x_variants.add(termValueU);
	1184	phrase_query_p_term_x_variants.add(termValueL);
	1185	}
	1186	else
	1187	{
[24812]	1188	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
	1189	{
[24116]	1190	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
	1191	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]	1192	for (int k = 0; k < term_equivalent_terms.length; k++)
	1193	{
[24116]	1194	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
	1195	}
	1196	}
	1197	}
	1198	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]	1199
	1200	if (in_phrase == false)
	1201	{
[24116]	1202	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]	1203	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1204	}
	1205	}
[9007]	1206	}
[24116]	1207	// Watch for phrases (surrounded by quotes)
[24812]	1208	if (character == '\"')
	1209	{
[24116]	1210	// Has a phrase just started?
[24812]	1211	if (in_phrase == false)
	1212	{
[24116]	1213	in_phrase = true;
	1214	}
	1215	// Or has a phrase just finished?
[24812]	1216	else if (in_phrase == true)
	1217	{
[24116]	1218	in_phrase = false;
	1219	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
	1220	}
	1221
[25635]	1222	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1223	}
[4287]	1224	}
[8731]	1225
[28382]	1226	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]	1227	}
	1228
[24116]	1229	/**
[24812]	1230	* Highlights query terms in a piece of text.
	1231	*/
[28382]	1232	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]	1233	{
	1234	// Convert the content string to an array of characters for speed
	1235	char[] content_characters = new char[content.length()];
	1236	content.getChars(0, content.length(), content_characters, 0);
[8731]	1237
[24116]	1238	// Now skim through the content, identifying word matches
[25635]	1239	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]	1240	int word_start = 0;
	1241	boolean in_word = false;
	1242	boolean preceding_word_matched = false;
[24813]	1243	boolean inTag = false;
[24812]	1244	for (int i = 0; i < content_characters.length; i++)
	1245	{
[24813]	1246	//We don't want to find words inside HTML tags
[24993]	1247	if (content_characters[i] == '<')
[24813]	1248	{
	1249	inTag = true;
	1250	continue;
	1251	}
	1252	else if (inTag && content_characters[i] == '>')
	1253	{
	1254	inTag = false;
	1255	}
	1256	else if (inTag)
	1257	{
	1258	continue;
	1259	}
[24993]	1260
[24116]	1261	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]	1262
[24116]	1263	// Has a word just started?
[24812]	1264	if (in_word == false && is_character_letter_or_digit == true)
	1265	{
[24116]	1266	in_word = true;
	1267	word_start = i;
	1268	}
[8731]	1269
[24116]	1270	// Or has a word just finished?
[24812]	1271	else if (in_word == true && is_character_letter_or_digit == false)
	1272	{
[24116]	1273	in_word = false;
[8731]	1274
[24116]	1275	// Check if the word matches any of the query term equivalents
	1276	String word = new String(content_characters, word_start, (i - word_start));
[24812]	1277	if (query_term_variants.contains(word))
	1278	{
[24116]	1279	// We have found a matching word, so remember its location
	1280	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
	1281	preceding_word_matched = true;
	1282	}
[24812]	1283	else
	1284	{
[24116]	1285	preceding_word_matched = false;
	1286	}
	1287	}
	1288	}
[8731]	1289
[24116]	1290	// Don't forget the last word...
[24812]	1291	if (in_word == true)
	1292	{
[24116]	1293	// Check if the word matches any of the query term equivalents
	1294	String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]	1295	if (query_term_variants.contains(word))
	1296	{
[24116]	1297	// We have found a matching word, so remember its location
	1298	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
	1299	}
[8731]	1300	}
	1301
[25635]	1302	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
	1303	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]	1304
[24116]	1305	// Deal with phrases now
[25635]	1306	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]	1307	for (int i = 0; i < word_matches.size(); i++)
	1308	{
[25635]	1309	WordMatch word_match = word_matches.get(i);
[8731]	1310
[24116]	1311	// See if any partial phrase matches are extended by this word
[24812]	1312	if (word_match.preceding_word_matched)
	1313	{
	1314	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
	1315	{
[25635]	1316	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
	1317	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]	1318	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]	1319	if (phrase_query_p_term_x_variants.contains(word_match.word))
	1320	{
[24116]	1321	partial_phrase_match.num_words_matched++;
[8731]	1322
[24116]	1323	// Has a complete phrase match occurred?
[24812]	1324	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
	1325	{
[24116]	1326	// Check for overlaps by looking at the previous highlight range
[24812]	1327	if (!highlight_end_positions.isEmpty())
	1328	{
[24116]	1329	int last_highlight_index = highlight_end_positions.size() - 1;
[25635]	1330	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]	1331	if (last_highlight_end > partial_phrase_match.start_position)
	1332	{
[24116]	1333	// There is an overlap, so remove the previous phrase match
[25635]	1334	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]	1335	highlight_end_positions.remove(last_highlight_index);
	1336	partial_phrase_match.start_position = last_highlight_start;
	1337	}
	1338	}
[8731]	1339
[24116]	1340	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
	1341	highlight_end_positions.add(new Integer(word_match.end_position));
	1342	}
	1343	// No, but add the partial match back into the list for next time
[24812]	1344	else
	1345	{
[24116]	1346	partial_phrase_matches.add(partial_phrase_match);
	1347	}
	1348	}
	1349	}
	1350	}
[24812]	1351	else
	1352	{
[24116]	1353	partial_phrase_matches.clear();
	1354	}
[8731]	1355
[24116]	1356	// See if this word is at the start of any of the phrases
[24812]	1357	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
	1358	{
[25635]	1359	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[31686]	1360	if (phrase_query_p_term_variants_list.size()>0) {
[24116]	1361	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]	1362	if (phrase_query_p_term_1_variants.contains(word_match.word))
	1363	{
[24116]	1364	// If this phrase is just one word long, we have a complete match
[24812]	1365	if (phrase_query_p_term_variants_list.size() == 1)
	1366	{
[24116]	1367	highlight_start_positions.add(new Integer(word_match.start_position));
	1368	highlight_end_positions.add(new Integer(word_match.end_position));
	1369	}
	1370	// Otherwise we have the start of a potential phrase match
[24812]	1371	else
	1372	{
[24116]	1373	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
	1374	}
	1375	}
[31686]	1376	}
[24116]	1377	}
[4287]	1378	}
[4717]	1379
[24116]	1380	// Now add the annotation tags into the document at the correct points
[28382]	1381	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]	1382
[24116]	1383	int last_wrote = 0;
[24812]	1384	for (int i = 0; i < highlight_start_positions.size(); i++)
	1385	{
[25635]	1386	int highlight_start = highlight_start_positions.get(i).intValue();
	1387	int highlight_end = highlight_end_positions.get(i).intValue();
[8731]	1388
[24116]	1389	// Print anything before the highlight range
[24812]	1390	if (last_wrote < highlight_start)
	1391	{
[24116]	1392	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
[28382]	1393	content_element.appendChild(doc.createTextNode(preceding_text));
[24116]	1394	}
[8731]	1395
[24116]	1396	// Print the highlight text, annotated
[24812]	1397	if (highlight_end > last_wrote)
	1398	{
[24116]	1399	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
[28382]	1400	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
[24116]	1401	annotation_element.setAttribute("type", "query_term");
	1402	content_element.appendChild(annotation_element);
	1403	last_wrote = highlight_end;
	1404	}
	1405	}
[8731]	1406
[24116]	1407	// Finish off any unwritten text
[24812]	1408	if (last_wrote < content_characters.length)
	1409	{
[24116]	1410	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
[28382]	1411	content_element.appendChild(doc.createTextNode(remaining_text));
[24116]	1412	}
	1413	return content_element;
[8731]	1414	}
	1415
[24116]	1416	static private class WordMatch
	1417	{
	1418	public String word;
	1419	public int start_position;
	1420	public int end_position;
	1421	public boolean preceding_word_matched;
[8731]	1422
[24116]	1423	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
	1424	{
	1425	this.word = word;
	1426	this.start_position = start_position;
	1427	this.end_position = end_position;
	1428	this.preceding_word_matched = preceding_word_matched;
	1429	}
[8731]	1430	}
	1431
[24116]	1432	static private class PartialPhraseMatch
	1433	{
	1434	public int start_position;
	1435	public int query_phrase_number;
	1436	public int num_words_matched;
[8731]	1437
[24116]	1438	public PartialPhraseMatch(int start_position, int query_phrase_number)
	1439	{
	1440	this.start_position = start_position;
	1441	this.query_phrase_number = query_phrase_number;
	1442	this.num_words_matched = 1;
	1443	}
[8731]	1444	}
[3645]	1445	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: