Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32069

Last change on this file since 32069 was 32069, checked in by kjdon, 6 years ago
forgot to add the import GlobalProperties line
Property svn:keywords set to `Author Date Id Revision`
File size: 49.7 KB

Rev	Line
[3801]	1	/*
[24812]	2	* DocumentAction.java
	3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	18	*/
[3645]	19	package org.greenstone.gsdl3.action;
	20
[3801]	21	// Greenstone classes
[3645]	22	import org.greenstone.gsdl3.core.ModuleInterface;
	23	import org.greenstone.gsdl3.util.*;
[32069]	24	import org.greenstone.util.GlobalProperties;
[3801]	25
[3645]	26	// XML classes
[24812]	27	import org.w3c.dom.Document;
	28	import org.w3c.dom.Element;
	29	import org.w3c.dom.Node;
[4287]	30	import org.w3c.dom.Text;
[3801]	31	import org.w3c.dom.NodeList;
[3645]	32
[3801]	33	// General Java classes
[8731]	34	import java.util.ArrayList;
[3645]	35	import java.util.HashMap;
[4287]	36	import java.util.HashSet;
[3645]	37	import java.io.File;
[25635]	38	import java.io.Serializable;
[3645]	39
[13124]	40	import org.apache.log4j.*;
[3801]	41
[24812]	42	/** Action class for retrieving Documents via the message router */
	43	public class DocumentAction extends Action
	44	{
[13124]	45
[24116]	46	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
[13124]	47
[24116]	48	// this is used to specify that the sibling nodes of a selected one should be obtained
	49	public static final String SIBLING_ARG = "sib";
	50	public static final String GOTO_PAGE_ARG = "gp";
	51	public static final String ENRICH_DOC_ARG = "end";
[25305]	52	public static final String EXPAND_DOCUMENT_ARG = "ed";
	53	public static final String EXPAND_CONTENTS_ARG = "ec";
	54	public static final String REALISTIC_BOOK_ARG = "book";
[32068]	55	public static final String NO_TEXT_ARG = "noText";
	56	public static final String DOC_EDIT_ARG = "docEdit";
	57
[24812]	58	/**
	59	* if this is set to true, when a document is displayed, any annotation type
	60	* services (enrich) will be offered to the user as well
	61	*/
	62	protected boolean provide_annotations = false;
	63
[24116]	64	protected boolean highlight_query_terms = false;
[5694]	65
[24812]	66	public boolean configure()
	67	{
[24116]	68	super.configure();
[24812]	69	String highlight = (String) config_params.get("highlightQueryTerms");
	70	if (highlight != null && highlight.equals("true"))
	71	{
[24116]	72	highlight_query_terms = true;
	73	}
[24812]	74	String annotate = (String) config_params.get("displayAnnotationService");
	75	if (annotate != null && annotate.equals("true"))
	76	{
[24116]	77	provide_annotations = true;
	78	}
[25953]	79	return true;
	80	}
[24812]	81
	82	public Node process(Node message_node)
[24116]	83	{
	84	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
[24812]	85
[28964]	86	Element message = GSXML.nodeToElement(message_node);
[32068]	87	Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
[28382]	88
[24116]	89	// the response
[28382]	90	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
	91	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
[24116]	92	result.appendChild(page_response);
[19984]	93
[24116]	94	// get the request - assume only one
[24812]	95	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
	96	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]	97	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
[4023]	98
[24116]	99	// just in case there are some that need to get passed to the services
[24812]	100	HashMap service_params = (HashMap) params.get("s0");
[4717]	101
[24116]	102	String collection = (String) params.get(GSParams.COLLECTION);
[25305]	103	String document_id = (String) params.get(GSParams.DOCUMENT);
[25355]	104	if (document_id != null && document_id.equals(""))
	105	{
	106	document_id = null;
[25305]	107	}
	108	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
[25355]	109	if (href != null && href.equals(""))
	110	{
	111	href = null;
[25305]	112	}
	113	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
	114	if (document_id == null && href == null)
[24812]	115	{
[24116]	116	logger.error("no document specified!");
	117	return result;
	118	}
[25355]	119	if (rl != null && rl.equals("0"))
	120	{
	121	// this is a true external link, we should have been directed to a different page or action
	122	logger.error("rl value was 0, shouldn't get here");
	123	return result;
[25305]	124	}
[29521]	125
	126	UserContext userContext = new UserContext(request);
	127
	128	//append site metadata
	129	addSiteMetadata(page_response, userContext);
	130	addInterfaceOptions(page_response);
	131
	132	// get the additional data needed for the page
	133	getBackgroundData(page_response, collection, userContext);
	134	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
	135
	136	if (format_elem != null) {
	137	// lets look for param defaults set in config file
	138	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
	139	for (int i=0; i<param_defaults.getLength(); i++) {
	140	Element p = (Element)param_defaults.item(i);
	141	String name = p.getAttribute(GSXML.NAME_ATT);
	142	if (params.get(name) ==null) {
	143	// wasn't set from interface
	144	String value = p.getAttribute(GSXML.VALUE_ATT);
	145	params.put(name, value );
	146	// also add into request param xml so that xslt knows it too
	147	GSXML.addParameterToList(cgi_paramList, name, value);
	148	}
	149	}
	150	}
[32068]	151
	152
	153	boolean editing_document = false;
	154	String doc_edit = (String) params.get(DOC_EDIT_ARG);
	155	if (doc_edit != null && doc_edit.equals("1")) {
	156	editing_document = true;
	157	}
	158
	159	// are we editing mode? just get the archive document, convert to our internal doc format, and return it
	160	if (editing_document) {
	161
	162	// call get archive doc
	163	Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
	164	String to = "DocXMLGetSection";
	165	Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
	166	dx_message.appendChild(dx_request);
	167	Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
	168	dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
	169	dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
	170	dx_request.appendChild(dx_section);
	171
	172	Element dx_response_message = (Element) this.mr.process(dx_message);
	173	if (processErrorElements(dx_response_message, page_response))
	174	{
	175	return result;
	176	}
	177
	178	// get the section out
	179	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
	180	Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
	181	if (section == null) {
	182	logger.error("no archive doc returned for "+document_id);
	183	return result;
	184	}
	185	// convert the archive format into the internal format that the page response requires
	186
	187	Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
	188	page_response.appendChild(doc_elem);
	189	section.setAttribute(GSXML.NODE_ID_ATT, document_id);
	190
	191	Element transformed_section = transformArchiveToDocument(section);
	192	doc_elem.appendChild(doc.importNode(transformed_section, true));
	193	logger.error("dx result = "+XMLConverter.getPrettyString(result));
	194	return result;
	195	}
	196
[24116]	197	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
[25816]	198	if (document_type != null && document_type.equals(""))
[24812]	199	{
[25953]	200	//document_type = "hierarchy";
	201	document_type = null; // we'll get it later if not already specified
[24116]	202	}
	203	//whether to retrieve siblings or not
	204	boolean get_siblings = false;
	205	String sibs = (String) params.get(SIBLING_ARG);
[24812]	206	if (sibs != null && sibs.equals("1"))
	207	{
[24116]	208	get_siblings = true;
	209	}
[24812]	210
[25305]	211	String doc_id_modifier = "";
[24116]	212	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
[24812]	213	if (sibling_num != null && !sibling_num.equals(""))
	214	{
[24116]	215	// we have to modify the doc name
[25355]	216	doc_id_modifier = "." + sibling_num + ".ss";
[24116]	217	}
[24812]	218
[24116]	219	boolean expand_document = false;
[25305]	220	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
[24812]	221	if (ed_arg != null && ed_arg.equals("1"))
	222	{
[24116]	223	expand_document = true;
	224	}
[14525]	225
[24116]	226	boolean expand_contents = false;
[24812]	227	if (expand_document)
	228	{ // we always expand the contents with the text
[24116]	229	expand_contents = true;
[24812]	230	}
	231	else
	232	{
[25305]	233	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
[24812]	234	if (ec_arg != null && ec_arg.equals("1"))
	235	{
[24116]	236	expand_contents = true;
	237	}
[5694]	238	}
[25355]	239
[32068]	240	// do we want text content? Not if no_text=1.
	241	// expand_document overrides this. - should it??
	242	boolean get_text = true;
	243	String nt_arg = (String) params.get(NO_TEXT_ARG);
	244
	245	if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
	246	logger.error("SETTING GET TEXT TO FALSE");
	247	get_text = false;
	248	} else {
	249	logger.error("GET TEXT REMAINS TRUE");
	250	}
[4257]	251
[24116]	252	// the_document is where all the doc info - structure and metadata etc
	253	// is added into, to be returned in the page
[28382]	254	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
[24116]	255	page_response.appendChild(the_document);
[9874]	256
[24116]	257	// create a basic doc list containing the current node
[28382]	258	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
	259	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	260	basic_doc_list.appendChild(current_doc);
[25305]	261	if (document_id != null)
[24812]	262	{
[25355]	263	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
[24812]	264	}
[25355]	265	else
[24812]	266	{
[25305]	267	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
	268	// do we need this??
	269	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
[14525]	270	}
[3801]	271
[25953]	272	if (document_type == null)
	273	{
	274	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
[25816]	275	}
[29439]	276	if (document_type == null)
[25953]	277	{
[31249]	278	logger.debug("doctype is null, setting to simple");
[29439]	279	document_type = GSXML.DOC_TYPE_SIMPLE;
[25816]	280	}
[29439]	281
	282	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
	283
[25816]	284
[24116]	285	// Create a parameter list to specify the required structure information
[28382]	286	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	287
	288	if (service_params != null)
	289	{
[28964]	290	GSXML.addParametersToList(ds_param_list, service_params);
[24116]	291	}
[3817]	292
[24812]	293	Element ds_param = null;
[24116]	294	boolean get_structure = false;
	295	boolean get_structure_info = false;
[24889]	296	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
[24812]	297	{
[24116]	298	get_structure_info = true;
[24889]	299
	300	if (expand_contents)
	301	{
[28382]	302	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]	303	ds_param_list.appendChild(ds_param);
	304	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	305	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
	306	}
	307
[25305]	308	// get the info needed for paged naviagtion
[28382]	309	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	310	ds_param_list.appendChild(ds_param);
	311	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	312	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
[28382]	313	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	314	ds_param_list.appendChild(ds_param);
	315	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	316	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
[28382]	317	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	318	ds_param_list.appendChild(ds_param);
	319	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	320	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
[24812]	321
[24889]	322	if (get_siblings)
	323	{
[28382]	324	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24889]	325	ds_param_list.appendChild(ds_param);
	326	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	327	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
	328	}
	329
[24812]	330	}
[28258]	331	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
[24812]	332	{
[24116]	333	get_structure = true;
[24812]	334	if (expand_contents)
	335	{
[28382]	336	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	337	ds_param_list.appendChild(ds_param);
	338	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	339	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
[24812]	340	}
	341	else
	342	{
[24116]	343	// get the info needed for table of contents
[28382]	344	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	345	ds_param_list.appendChild(ds_param);
	346	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	347	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
[28382]	348	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	349	ds_param_list.appendChild(ds_param);
	350	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	351	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
[24812]	352	if (get_siblings)
	353	{
[28382]	354	ds_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	355	ds_param_list.appendChild(ds_param);
	356	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
	357	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
	358	}
	359	}
[24812]	360	}
	361	else
	362	{
[31249]	363	// we dont need any structure
[24116]	364	}
[3801]	365
[24116]	366	boolean has_dummy = false;
[24812]	367	if (get_structure \|\| get_structure_info)
	368	{
[8676]	369
[24116]	370	// Build a request to obtain the document structure
[28382]	371	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	372	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]	373	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	374	ds_message.appendChild(ds_request);
	375	ds_request.appendChild(ds_param_list);
[24812]	376
[25816]	377	// add the node list we created earlier
[24116]	378	ds_request.appendChild(basic_doc_list);
[24812]	379
[24116]	380	// Process the document structure retrieve message
	381	Element ds_response_message = (Element) this.mr.process(ds_message);
[24812]	382	if (processErrorElements(ds_response_message, page_response))
	383	{
[24116]	384	return result;
	385	}
[4030]	386
[24116]	387	// get the info and print out
[24812]	388	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	389	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	390	path = GSPath.appendLink(path, "nodeStructureInfo");
	391	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
	392	// get the doc_node bit
[24812]	393	if (ds_response_struct_info != null)
	394	{
[28382]	395	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
[24116]	396	}
[24812]	397	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	398	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	399	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
	400	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
[24812]	401
	402	if (ds_response_structure != null)
	403	{
[24116]	404	// add the contents of the structure bit into the_document
	405	NodeList structs = ds_response_structure.getChildNodes();
[24812]	406	for (int i = 0; i < structs.getLength(); i++)
	407	{
[28382]	408	the_document.appendChild(doc.importNode(structs.item(i), true));
[24116]	409	}
[24812]	410	}
	411	else
	412	{
[24116]	413	// no structure nodes, so put in a dummy doc node
[28382]	414	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]	415	if (document_id != null)
[24812]	416	{
[25305]	417	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]	418	}
[25355]	419	else
[24812]	420	{
[25305]	421	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[25355]	422
[24116]	423	}
	424	the_document.appendChild(doc_node);
	425	has_dummy = true;
	426	}
[24812]	427	}
	428	else
	429	{ // a simple type - we dont have a dummy node for simple
[24116]	430	// should think about this more
	431	// no structure request, so just put in a dummy doc node
[28382]	432	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[25305]	433	if (document_id != null)
[24812]	434	{
[25305]	435	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
[24812]	436	}
[25355]	437	else
[24812]	438	{
[25305]	439	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
[24116]	440	}
	441	the_document.appendChild(doc_node);
	442	has_dummy = true;
	443	}
[24812]	444
[24116]	445	// Build a request to obtain some document metadata
[28382]	446	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]	447	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
[28382]	448	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	449	dm_message.appendChild(dm_request);
	450	// Create a parameter list to specify the required metadata information
[24812]	451
[25635]	452	HashSet<String> meta_names = new HashSet<String>();
[24116]	453	meta_names.add("Title"); // the default
[24812]	454	if (format_elem != null)
	455	{
[24889]	456	getRequiredMetadataNames(format_elem, meta_names);
[24116]	457	}
[28258]	458
[26026]	459	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
[28258]	460	if (extraMetaListElem != null)
[26026]	461	{
	462	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
[28258]	463	for (int i = 0; i < extraMetaList.getLength(); i++)
[26026]	464	{
[28258]	465	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
[26026]	466	}
	467	}
[24812]	468
[28382]	469	Element dm_param_list = createMetadataParamList(doc,meta_names);
[24812]	470	if (service_params != null)
	471	{
[28964]	472	GSXML.addParametersToList(dm_param_list, service_params);
[24116]	473	}
[24812]	474
[24116]	475	dm_request.appendChild(dm_param_list);
[24812]	476
[24116]	477	// create the doc node list for the metadata request
[28382]	478	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	479	dm_request.appendChild(dm_doc_list);
[4030]	480
[24116]	481	// Add each node from the structure response into the metadata request
	482	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
[24812]	483	for (int i = 0; i < doc_nodes.getLength(); i++)
	484	{
[24116]	485	Element doc_node = (Element) doc_nodes.item(i);
	486	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
[3801]	487
[24116]	488	// Add the documentNode to the list
[28382]	489	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	490	dm_doc_list.appendChild(dm_doc_node);
	491	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
[24812]	492	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
[29922]	493	if (document_id == null){
	494	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
	495	}
	496
[24116]	497	}
[3801]	498
[24116]	499	// we also want a metadata request to the top level document to get
	500	// assocfilepath - this could be cached too
[28382]	501	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	502	dm_message.appendChild(doc_meta_request);
[28382]	503	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	504	if (service_params != null)
	505	{
[28964]	506	GSXML.addParametersToList(doc_meta_param_list, service_params);
[24116]	507	}
[3801]	508
[24116]	509	doc_meta_request.appendChild(doc_meta_param_list);
[28382]	510	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
[24116]	511	doc_meta_param_list.appendChild(doc_param);
	512	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
	513	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
[8676]	514
[24116]	515	// create the doc node list for the metadata request
[28382]	516	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	517	doc_meta_request.appendChild(doc_list);
[3801]	518
[28382]	519	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
[24116]	520	// the node we want is the root document node
[25355]	521	if (document_id != null)
[24812]	522	{
[25305]	523	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
[24812]	524	}
[29922]	525	/*else
[24812]	526	{
[25355]	527	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
	528	// can we assume that href is always a top level doc??
	529	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
[25305]	530	//doc_node.setAttribute("externalURL", has_rl);
[29922]	531	}*/
[24116]	532	doc_list.appendChild(doc_node);
[24889]	533
[24116]	534	Element dm_response_message = (Element) this.mr.process(dm_message);
[24812]	535	if (processErrorElements(dm_response_message, page_response))
	536	{
[24116]	537	return result;
	538	}
[9874]	539
[24812]	540	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	541	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
[3801]	542
[24116]	543	// Merge the metadata with the structure information
	544	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
[24812]	545	for (int i = 0; i < doc_nodes.getLength(); i++)
	546	{
[24116]	547	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
[8833]	548	}
[24116]	549	// get the top level doc metadata out
[24812]	550	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
	551	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
[24116]	552	GSXML.mergeMetadataLists(the_document, top_doc_node);
[24812]	553
[32068]	554	// do we want doc text content? If not, we are done.
	555	if (!get_text) {
	556	// don't get text
	557	return result;
	558	}
	559
[24116]	560	// Build a request to obtain some document content
[28382]	561	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24812]	562	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
[28382]	563	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	564	dc_message.appendChild(dc_request);
[5694]	565
[24116]	566	// Create a parameter list to specify the request parameters - empty for now
[28382]	567	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[24812]	568	if (service_params != null)
	569	{
[28964]	570	GSXML.addParametersToList(dc_param_list, service_params);
[24116]	571	}
[4858]	572
[24116]	573	dc_request.appendChild(dc_param_list);
	574
	575	// get the content
	576	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
[24812]	577	if (expand_document)
	578	{
[24116]	579	dc_request.appendChild(dm_doc_list);
[24812]	580	}
	581	else
	582	{
[24116]	583	dc_request.appendChild(basic_doc_list);
[4858]	584	}
[24116]	585	Element dc_response_message = (Element) this.mr.process(dc_message);
[24812]	586	if (processErrorElements(dc_response_message, page_response))
	587	{
[24116]	588	return result;
[4827]	589	}
[24116]	590	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
[25953]	591
[24812]	592	if (expand_document)
	593	{
[24116]	594	// Merge the content with the structure information
	595	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
[24812]	596	for (int i = 0; i < doc_nodes.getLength(); i++)
	597	{
[31249]	598	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
[24812]	599	if (content != null)
	600	{
	601	if (highlight_query_terms)
	602	{
[31249]	603	String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
	604	content = highlightQueryTerms(request, node_id, (Element) content);
[24116]	605	}
[31249]	606
[28382]	607	doc_nodes.item(i).appendChild(doc.importNode(content, true));
[24116]	608	}
	609	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
	610	}
[29521]	611	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
	612	Element dummy_node = (Element) doc_nodes.item(0);
	613	the_document.removeChild(dummy_node);
	614	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
	615	NodeList dummy_children = dummy_node.getChildNodes();
	616	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
	617	{
	618	// special case as we don't want more than one metadata list
	619	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
	620	{
	621	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
	622	}
	623	else
	624	{
	625	the_document.appendChild(dummy_children.item(i));
	626	}
	627	}
	628	}
[24812]	629	}
	630	else
	631	{
[24116]	632	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
	633	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
	634	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
[25305]	635	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
[25953]	636
[24812]	637	if (dc_response_doc_content == null)
	638	{
[24116]	639	// no content to add
[25355]	640	if (dc_response_doc.getAttribute("external").equals("true"))
	641	{
	642
	643	//if (dc_response_doc_external != null)
	644	//{
[25305]	645	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
[24812]	646
[25305]	647	the_document.setAttribute("selectedNode", href_id);
	648	the_document.setAttribute("external", href_id);
[25355]	649	}
	650	return result;
[24116]	651	}
[24812]	652	if (highlight_query_terms)
	653	{
[24116]	654	dc_response_doc.removeChild(dc_response_doc_content);
[24812]	655
[31249]	656	dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
[24116]	657	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
	658	}
[24812]	659
	660	if (provide_annotations)
	661	{
	662	String service_selected = (String) params.get(ENRICH_DOC_ARG);
	663	if (service_selected != null && service_selected.equals("1"))
	664	{
[24116]	665	// now we can modifiy the response doc if needed
[24812]	666	String enrich_service = (String) params.get(GSParams.SERVICE);
[24116]	667	// send a message to the service
[28382]	668	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
	669	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
[24116]	670	enrich_message.appendChild(enrich_request);
	671	// check for parameters
[24812]	672	HashMap e_service_params = (HashMap) params.get("s1");
	673	if (e_service_params != null)
	674	{
[28382]	675	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]	676	GSXML.addParametersToList(enrich_pl, e_service_params);
[24116]	677	enrich_request.appendChild(enrich_pl);
	678	}
[28382]	679	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
[24116]	680	enrich_request.appendChild(e_doc_list);
[28382]	681	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
[24812]	682
[24116]	683	Node enrich_response = this.mr.process(enrich_message);
[24812]	684
	685	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
[24116]	686	path = GSPath.createPath(links);
[24812]	687	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
	688
	689	}
[24116]	690	} // if provide_annotations
[3987]	691
[24116]	692	// use the returned id rather than the sent one cos there may have
	693	// been modifiers such as .pr that are removed.
	694	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
	695	the_document.setAttribute("selectedNode", modified_doc_id);
[24812]	696	if (has_dummy)
	697	{
[24116]	698	// change the id if necessary and add the content
[24812]	699	Element dummy_node = (Element) doc_nodes.item(0);
	700
[24116]	701	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[28382]	702	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]	703	// hack for simple type
[26140]	704	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
[24812]	705	{
[24116]	706	// we dont want the internal docNode, just want the content and metadata in the document
	707	// rethink this!!
	708	the_document.removeChild(dummy_node);
[4023]	709
[24116]	710	NodeList dummy_children = dummy_node.getChildNodes();
	711	//for (int i=0; i<dummy_children.getLength(); i++) {
[24812]	712	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
	713	{
[24116]	714	// special case as we don't want more than one metadata list
[24812]	715	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
	716	{
[24116]	717	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
[24812]	718	}
	719	else
	720	{
[24116]	721	the_document.appendChild(dummy_children.item(i));
	722	}
	723	}
	724	}
[28258]	725
[26140]	726	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
[24812]	727	}
	728	else
	729	{
[24116]	730	// Merge the document content with the metadata and structure information
[24812]	731	for (int i = 0; i < doc_nodes.getLength(); i++)
	732	{
[24116]	733	Node dn = doc_nodes.item(i);
[24812]	734	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
	735	if (dn_id.equals(modified_doc_id))
	736	{
[28382]	737	dn.appendChild(doc.importNode(dc_response_doc_content, true));
[24116]	738	break;
	739	}
	740	}
	741	}
	742	}
[29307]	743	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
[24116]	744	return result;
[3801]	745	}
[24812]	746
	747	/**
	748	* tell the param class what its arguments are if an action has its own
	749	* arguments, this should add them to the params object - particularly
	750	* important for args that should not be saved
	751	*/
[25305]	752	public boolean addActionParameters(GSParams params)
[24812]	753	{
[24116]	754	params.addParameter(GOTO_PAGE_ARG, false);
	755	params.addParameter(ENRICH_DOC_ARG, false);
[25305]	756	params.addParameter(EXPAND_DOCUMENT_ARG, false);
	757	params.addParameter(EXPAND_CONTENTS_ARG, false);
	758	params.addParameter(REALISTIC_BOOK_ARG, false);
	759
[24116]	760	return true;
[4717]	761	}
[4023]	762
[24812]	763	/**
	764	* this method gets the collection description, the format info, the list of
	765	* enrich services, etc - stuff that is needed for the page, but is the same
	766	* whatever the query is - should be cached
	767	*/
[24993]	768	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
[24812]	769	{
[28382]	770	Document doc = page_response.getOwnerDocument();
	771
[24116]	772	// create a message to process - contains requests for the collection
	773	// description, the format element, the enrich services on offer
	774	// these could all be cached
[28382]	775	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	776	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
	777	// the format request - ignore for now, where does this request go to??
[28382]	778	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
[24116]	779	info_message.appendChild(format_request);
	780
	781	// the enrich_services request - only do this if provide_annotations is true
	782
[24812]	783	if (provide_annotations)
	784	{
[28382]	785	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
[24116]	786	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
	787	info_message.appendChild(enrich_services_request);
[4023]	788	}
[24116]	789
[24812]	790	Element info_response = (Element) this.mr.process(info_message);
	791
[24116]	792	// the collection is the first response
	793	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
	794	Element format_resp = (Element) responses.item(0);
[24812]	795
	796	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
	797	if (format_elem != null)
	798	{
[25985]	799	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
[28258]	800	if (global_format_elem != null)
[25985]	801	{
	802	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
	803	}
	804
	805	// set the format type
[24812]	806	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
[28382]	807	page_response.appendChild(doc.importNode(format_elem, true));
[4023]	808	}
[4287]	809
[24812]	810	if (provide_annotations)
	811	{
	812	Element services_resp = (Element) responses.item(1);
[4287]	813
[24116]	814	// a new message for the mr
[28382]	815	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
[24116]	816	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
	817	boolean service_found = false;
[24812]	818	for (int j = 0; j < e_services.getLength(); j++)
	819	{
	820	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
	821	{
[28382]	822	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
[24116]	823	enrich_message.appendChild(s);
	824	service_found = true;
	825	}
	826	}
[24812]	827	if (service_found)
	828	{
	829	Element enrich_response = (Element) this.mr.process(enrich_message);
	830
[24116]	831	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
[28382]	832	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
[24812]	833	for (int i = 0; i < e_responses.getLength(); i++)
	834	{
	835	Element e_resp = (Element) e_responses.item(i);
[28382]	836	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
[24116]	837	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
	838	service_list.appendChild(e_service);
	839	}
	840	page_response.appendChild(service_list);
	841	}
	842	} // if provide_annotations
	843	return true;
[24812]	844
[9874]	845	}
[4287]	846
[25953]	847	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
	848	{
[28382]	849	Document doc = basic_doc_list.getOwnerDocument();
	850
	851	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
[25953]	852	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
[28382]	853	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[25953]	854	ds_message.appendChild(ds_request);
[25816]	855
[25953]	856	// Create a parameter list to specify the required structure information
[28382]	857	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
	858	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
[25953]	859	ds_param_list.appendChild(ds_param);
	860	ds_param.setAttribute(GSXML.NAME_ATT, "info");
	861	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
[25816]	862
[25953]	863	ds_request.appendChild(ds_param_list);
[25816]	864
[25953]	865	// add the node list we created earlier
	866	ds_request.appendChild(basic_doc_list);
	867
	868	// Process the document structure retrieve message
	869	Element ds_response_message = (Element) this.mr.process(ds_message);
	870	if (processErrorElements(ds_response_message, page_response))
	871	{
	872	return null;
	873	}
	874
	875	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
	876	String path = GSPath.createPath(links);
	877	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
[29439]	878	if (info_elem == null) {
	879	return null;
	880	}
[25953]	881	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
	882	if (doctype_elem != null)
	883	{
	884	String doc_type = doctype_elem.getAttribute("value");
	885	return doc_type;
	886	}
	887	return null;
	888	}
	889
[32068]	890	/** run the XSLT transform which converts from doc.xml format to our internal document format */
	891	protected Element transformArchiveToDocument(Element section) {
	892
	893	String stylesheet_file = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), null, "archive2document.xsl");
	894	Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_file));
	895	if (stylesheet_doc == null) {
	896	logger.error("Couldn't load in stylesheet "+stylesheet_file);
	897	return section;
	898	}
	899
	900	Document section_doc = XMLConverter.newDOM();
	901	section_doc.appendChild(section_doc.importNode(section, true));
	902	Node result = this.transformer.transform(stylesheet_doc, section_doc);
	903	logger.error("transform result = "+XMLConverter.getPrettyString(result));
	904
	905	Element new_element;
	906	if (result.getNodeType() == Node.DOCUMENT_NODE)
	907	{
	908	new_element = ((Document) result).getDocumentElement();
	909	}
	910	else
	911	{
	912	new_element = (Element) result;
	913	}
	914
	915
	916	return new_element;
	917
	918	}
	919
	920
[24812]	921	/**
	922	* this involves a bit of a hack to get the equivalent query terms - has to
	923	* requery the query service - uses the last selected service name. (if it
	924	* ends in query). should this action do the query or should it send a
	925	* message to the query action? but that will involve lots of extra stuff.
[24889]	926	* also doesn't handle phrases properly - just highlights all the terms
	927	* found in the text.
[24812]	928	*/
[31249]	929	protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
[24812]	930	{
[28382]	931	Document doc = request.getOwnerDocument();
	932
[24116]	933	// do the query again to get term info
[24812]	934	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[25635]	935	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
[24812]	936
	937	HashMap previous_params = (HashMap) params.get("p");
	938	if (previous_params == null)
	939	{
[24116]	940	return dc_response_doc_content;
	941	}
[24812]	942	String service_name = (String) previous_params.get(GSParams.SERVICE);
	943	if (service_name == null \|\| !service_name.endsWith("Query"))
	944	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
[24116]	945	logger.debug("invalid service, not doing highlighting");
	946	return dc_response_doc_content;
	947	}
[24812]	948	String collection = (String) params.get(GSParams.COLLECTION);
[24993]	949	UserContext userContext = new UserContext(request);
[24116]	950	String to = GSPath.appendLink(collection, service_name);
[24812]	951
[28382]	952	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
	953	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
[24116]	954	mr_query_message.appendChild(mr_query_request);
[24812]	955
[24116]	956	// paramList
[24812]	957	HashMap service_params = (HashMap) params.get("s1");
	958
[28382]	959	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
[28964]	960	GSXML.addParametersToList(query_param_list, service_params);
[31249]	961	if (current_node_id != null) {
	962	GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
	963	} else {
	964	GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
	965	}
[24116]	966	mr_query_request.appendChild(query_param_list);
	967	// do the query
[24812]	968	Element mr_query_response = (Element) this.mr.process(mr_query_message);
[30049]	969	String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
	970	Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
[31249]	971	// For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
[30049]	972	if (highlighted_Node != null)
	973	{
[30056]	974	// Build a request to process highlighted text
	975
	976	Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
	977	to = GSPath.appendLink(collection, "DocumentContentRetrieve");
	978	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
	979	hl_message.appendChild(dc_request);
	980
	981	// Create a parameter list to specify the request parameters - empty for now
	982	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
	983	dc_request.appendChild(dc_param_list);
	984
	985	// get the content
	986	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
	987	dc_request.appendChild(doc_list);
	988	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
	989	doc_list.appendChild(current_doc);
	990	current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
	991	//Append highlighted content to request for processing
	992	dc_request.appendChild(doc.importNode(highlighted_Node, true));
	993	Element hl_response_message = (Element) this.mr.process(hl_message);
[31249]	994
[30056]	995	//Get results
	996	NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
	997	Element content = (Element) contentList.item(0);
	998	return content;
[30049]	999	}
[24812]	1000	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
[24116]	1001	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
[24812]	1002	if (query_term_list_element == null)
	1003	{
[24116]	1004	// no term info
	1005	logger.error("No query term information.\n");
	1006	return dc_response_doc_content;
	1007	}
[8731]	1008
[24116]	1009	String content = GSXML.getNodeText(dc_response_doc_content);
[4287]	1010
[24812]	1011	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
[24116]	1012	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
[4717]	1013
[25635]	1014	HashSet<String> query_term_variants = new HashSet<String>();
[24116]	1015	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
[24812]	1016	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
[24116]	1017	{
	1018	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
[24812]	1019	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
[24116]	1020	{
[24812]	1021	for (int i = 0; i < terms_nodelist.getLength(); i++)
[24116]	1022	{
[24812]	1023	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
[24116]	1024	String termValueU = null;
	1025	String termValueL = null;
[24812]	1026
	1027	if (termValue.length() > 1)
[24116]	1028	{
	1029	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
	1030	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
	1031	}
	1032	else
	1033	{
	1034	termValueU = termValue.substring(0, 1).toUpperCase();
	1035	termValueL = termValue.substring(0, 1).toLowerCase();
	1036	}
[24812]	1037
[24116]	1038	query_term_variants.add(termValueU);
	1039	query_term_variants.add(termValueL);
	1040	}
	1041	}
	1042	}
	1043	else
	1044	{
[24812]	1045	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
	1046	{
[24116]	1047	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
	1048	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
[24812]	1049	for (int j = 0; j < equivalent_terms.length; j++)
	1050	{
[24116]	1051	query_term_variants.add(equivalent_terms[j]);
	1052	}
	1053	}
	1054	}
[4287]	1055
[25635]	1056	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
[4287]	1057
[24116]	1058	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
	1059	String performed_query = GSXML.getNodeText(query_element) + " ";
[8731]	1060
[25635]	1061	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1062	int term_start = 0;
	1063	boolean in_term = false;
	1064	boolean in_phrase = false;
[24812]	1065	for (int i = 0; i < performed_query.length(); i++)
	1066	{
[24116]	1067	char character = performed_query.charAt(i);
	1068	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
	1069
	1070	// Has a query term just started?
[24812]	1071	if (in_term == false && is_character_letter_or_digit == true)
	1072	{
[24116]	1073	in_term = true;
	1074	term_start = i;
	1075	}
	1076
	1077	// Or has a term just finished?
[24812]	1078	else if (in_term == true && is_character_letter_or_digit == false)
	1079	{
[24116]	1080	in_term = false;
	1081	String term = performed_query.substring(term_start, i);
[24812]	1082
[24116]	1083	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
[24812]	1084	if (term_element != null)
	1085	{
	1086
[25635]	1087	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
[24812]	1088
[24116]	1089	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
[24812]	1090	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
[24116]	1091	{
	1092	String termValueU = null;
	1093	String termValueL = null;
[24812]	1094
	1095	if (term.length() > 1)
[24116]	1096	{
	1097	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
	1098	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
	1099	}
	1100	else
	1101	{
	1102	termValueU = term.substring(0, 1).toUpperCase();
	1103	termValueL = term.substring(0, 1).toLowerCase();
	1104	}
[24812]	1105
[24116]	1106	phrase_query_p_term_x_variants.add(termValueU);
	1107	phrase_query_p_term_x_variants.add(termValueL);
	1108	}
	1109	else
	1110	{
[24812]	1111	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
	1112	{
[24116]	1113	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
	1114	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
[24812]	1115	for (int k = 0; k < term_equivalent_terms.length; k++)
	1116	{
[24116]	1117	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
	1118	}
	1119	}
	1120	}
	1121	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
[24812]	1122
	1123	if (in_phrase == false)
	1124	{
[24116]	1125	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
[25635]	1126	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1127	}
	1128	}
[9007]	1129	}
[24116]	1130	// Watch for phrases (surrounded by quotes)
[24812]	1131	if (character == '\"')
	1132	{
[24116]	1133	// Has a phrase just started?
[24812]	1134	if (in_phrase == false)
	1135	{
[24116]	1136	in_phrase = true;
	1137	}
	1138	// Or has a phrase just finished?
[24812]	1139	else if (in_phrase == true)
	1140	{
[24116]	1141	in_phrase = false;
	1142	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
	1143	}
	1144
[25635]	1145	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
[24116]	1146	}
[4287]	1147	}
[8731]	1148
[28382]	1149	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
[8731]	1150	}
	1151
[24116]	1152	/**
[24812]	1153	* Highlights query terms in a piece of text.
	1154	*/
[28382]	1155	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
[24116]	1156	{
	1157	// Convert the content string to an array of characters for speed
	1158	char[] content_characters = new char[content.length()];
	1159	content.getChars(0, content.length(), content_characters, 0);
[8731]	1160
[24116]	1161	// Now skim through the content, identifying word matches
[25635]	1162	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
[24116]	1163	int word_start = 0;
	1164	boolean in_word = false;
	1165	boolean preceding_word_matched = false;
[24813]	1166	boolean inTag = false;
[24812]	1167	for (int i = 0; i < content_characters.length; i++)
	1168	{
[24813]	1169	//We don't want to find words inside HTML tags
[24993]	1170	if (content_characters[i] == '<')
[24813]	1171	{
	1172	inTag = true;
	1173	continue;
	1174	}
	1175	else if (inTag && content_characters[i] == '>')
	1176	{
	1177	inTag = false;
	1178	}
	1179	else if (inTag)
	1180	{
	1181	continue;
	1182	}
[24993]	1183
[24116]	1184	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
[24993]	1185
[24116]	1186	// Has a word just started?
[24812]	1187	if (in_word == false && is_character_letter_or_digit == true)
	1188	{
[24116]	1189	in_word = true;
	1190	word_start = i;
	1191	}
[8731]	1192
[24116]	1193	// Or has a word just finished?
[24812]	1194	else if (in_word == true && is_character_letter_or_digit == false)
	1195	{
[24116]	1196	in_word = false;
[8731]	1197
[24116]	1198	// Check if the word matches any of the query term equivalents
	1199	String word = new String(content_characters, word_start, (i - word_start));
[24812]	1200	if (query_term_variants.contains(word))
	1201	{
[24116]	1202	// We have found a matching word, so remember its location
	1203	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
	1204	preceding_word_matched = true;
	1205	}
[24812]	1206	else
	1207	{
[24116]	1208	preceding_word_matched = false;
	1209	}
	1210	}
	1211	}
[8731]	1212
[24116]	1213	// Don't forget the last word...
[24812]	1214	if (in_word == true)
	1215	{
[24116]	1216	// Check if the word matches any of the query term equivalents
	1217	String word = new String(content_characters, word_start, (content_characters.length - word_start));
[24812]	1218	if (query_term_variants.contains(word))
	1219	{
[24116]	1220	// We have found a matching word, so remember its location
	1221	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
	1222	}
[8731]	1223	}
	1224
[25635]	1225	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
	1226	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
[8731]	1227
[24116]	1228	// Deal with phrases now
[25635]	1229	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
[24812]	1230	for (int i = 0; i < word_matches.size(); i++)
	1231	{
[25635]	1232	WordMatch word_match = word_matches.get(i);
[8731]	1233
[24116]	1234	// See if any partial phrase matches are extended by this word
[24812]	1235	if (word_match.preceding_word_matched)
	1236	{
	1237	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
	1238	{
[25635]	1239	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
	1240	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
[24116]	1241	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
[24812]	1242	if (phrase_query_p_term_x_variants.contains(word_match.word))
	1243	{
[24116]	1244	partial_phrase_match.num_words_matched++;
[8731]	1245
[24116]	1246	// Has a complete phrase match occurred?
[24812]	1247	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
	1248	{
[24116]	1249	// Check for overlaps by looking at the previous highlight range
[24812]	1250	if (!highlight_end_positions.isEmpty())
	1251	{
[24116]	1252	int last_highlight_index = highlight_end_positions.size() - 1;
[25635]	1253	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
[24812]	1254	if (last_highlight_end > partial_phrase_match.start_position)
	1255	{
[24116]	1256	// There is an overlap, so remove the previous phrase match
[25635]	1257	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
[24116]	1258	highlight_end_positions.remove(last_highlight_index);
	1259	partial_phrase_match.start_position = last_highlight_start;
	1260	}
	1261	}
[8731]	1262
[24116]	1263	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
	1264	highlight_end_positions.add(new Integer(word_match.end_position));
	1265	}
	1266	// No, but add the partial match back into the list for next time
[24812]	1267	else
	1268	{
[24116]	1269	partial_phrase_matches.add(partial_phrase_match);
	1270	}
	1271	}
	1272	}
	1273	}
[24812]	1274	else
	1275	{
[24116]	1276	partial_phrase_matches.clear();
	1277	}
[8731]	1278
[24116]	1279	// See if this word is at the start of any of the phrases
[24812]	1280	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
	1281	{
[25635]	1282	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
[31686]	1283	if (phrase_query_p_term_variants_list.size()>0) {
[24116]	1284	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
[24812]	1285	if (phrase_query_p_term_1_variants.contains(word_match.word))
	1286	{
[24116]	1287	// If this phrase is just one word long, we have a complete match
[24812]	1288	if (phrase_query_p_term_variants_list.size() == 1)
	1289	{
[24116]	1290	highlight_start_positions.add(new Integer(word_match.start_position));
	1291	highlight_end_positions.add(new Integer(word_match.end_position));
	1292	}
	1293	// Otherwise we have the start of a potential phrase match
[24812]	1294	else
	1295	{
[24116]	1296	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
	1297	}
	1298	}
[31686]	1299	}
[24116]	1300	}
[4287]	1301	}
[4717]	1302
[24116]	1303	// Now add the annotation tags into the document at the correct points
[28382]	1304	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
[8731]	1305
[24116]	1306	int last_wrote = 0;
[24812]	1307	for (int i = 0; i < highlight_start_positions.size(); i++)
	1308	{
[25635]	1309	int highlight_start = highlight_start_positions.get(i).intValue();
	1310	int highlight_end = highlight_end_positions.get(i).intValue();
[8731]	1311
[24116]	1312	// Print anything before the highlight range
[24812]	1313	if (last_wrote < highlight_start)
	1314	{
[24116]	1315	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
[28382]	1316	content_element.appendChild(doc.createTextNode(preceding_text));
[24116]	1317	}
[8731]	1318
[24116]	1319	// Print the highlight text, annotated
[24812]	1320	if (highlight_end > last_wrote)
	1321	{
[24116]	1322	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
[28382]	1323	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
[24116]	1324	annotation_element.setAttribute("type", "query_term");
	1325	content_element.appendChild(annotation_element);
	1326	last_wrote = highlight_end;
	1327	}
	1328	}
[8731]	1329
[24116]	1330	// Finish off any unwritten text
[24812]	1331	if (last_wrote < content_characters.length)
	1332	{
[24116]	1333	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
[28382]	1334	content_element.appendChild(doc.createTextNode(remaining_text));
[24116]	1335	}
	1336	return content_element;
[8731]	1337	}
	1338
[24116]	1339	static private class WordMatch
	1340	{
	1341	public String word;
	1342	public int start_position;
	1343	public int end_position;
	1344	public boolean preceding_word_matched;
[8731]	1345
[24116]	1346	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
	1347	{
	1348	this.word = word;
	1349	this.start_position = start_position;
	1350	this.end_position = end_position;
	1351	this.preceding_word_matched = preceding_word_matched;
	1352	}
[8731]	1353	}
	1354
[24116]	1355	static private class PartialPhraseMatch
	1356	{
	1357	public int start_position;
	1358	public int query_phrase_number;
	1359	public int num_words_matched;
[8731]	1360
[24116]	1361	public PartialPhraseMatch(int start_position, int query_phrase_number)
	1362	{
	1363	this.start_position = start_position;
	1364	this.query_phrase_number = query_phrase_number;
	1365	this.num_words_matched = 1;
	1366	}
[8731]	1367	}
[3645]	1368	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: