Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32448

Last change on this file since 32448 was 32448, checked in by kjdon, 6 years ago
params class changed, now returns false by default for shouldsave. so don't need to add any that we don't want saving in the session. turned hard coded strings into static string variables
Property svn:keywords set to `Author Date Id Revision`
File size: 54.0 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24	import org.greenstone.util.GlobalProperties;
25
26	// XML classes
27	import org.w3c.dom.Document;
28	import org.w3c.dom.Element;
29	import org.w3c.dom.Node;
30	import org.w3c.dom.Text;
31	import org.w3c.dom.NodeList;
32
33	// General Java classes
34	import java.util.ArrayList;
35	import java.util.HashMap;
36	import java.util.HashSet;
37	import java.io.File;
38	import java.io.Serializable;
39
40	import org.apache.log4j.*;
41
42	/** Action class for retrieving Documents via the message router */
43	public class DocumentAction extends Action
44	{
45
46	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
47
48	// this is used to specify that the sibling nodes of a selected one should be obtained
49	public static final String SIBLING_ARG = "sib";
50	public static final String GOTO_PAGE_ARG = "gp";
51	public static final String ENRICH_DOC_ARG = "end";
52	public static final String EXPAND_DOCUMENT_ARG = "ed";
53	public static final String EXPAND_CONTENTS_ARG = "ec";
54	public static final String REALISTIC_BOOK_ARG = "book";
55	public static final String NO_TEXT_ARG = "noText";
56	public static final String DOC_EDIT_ARG = "docEdit";
57
58	/**
59	* if this is set to true, when a document is displayed, any annotation type
60	* services (enrich) will be offered to the user as well
61	*/
62	protected boolean provide_annotations = false;
63
64	protected boolean highlight_query_terms = false;
65
66	public boolean configure()
67	{
68	super.configure();
69	String highlight = (String) config_params.get("highlightQueryTerms");
70	if (highlight != null && highlight.equals("true"))
71	{
72	highlight_query_terms = true;
73	}
74	String annotate = (String) config_params.get("displayAnnotationService");
75	if (annotate != null && annotate.equals("true"))
76	{
77	provide_annotations = true;
78	}
79	return true;
80	}
81
82	public Node process(Node message_node)
83	{
84	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
85
86	Element message = GSXML.nodeToElement(message_node);
87	Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
88
89	// the response
90	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
91	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
92	result.appendChild(page_response);
93
94	// get the request - assume only one
95	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
96	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
97	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
98
99	// just in case there are some that need to get passed to the services
100	HashMap service_params = (HashMap) params.get("s0");
101
102	String collection = (String) params.get(GSParams.COLLECTION);
103	String document_id = (String) params.get(GSParams.DOCUMENT);
104	if (document_id != null && document_id.equals(""))
105	{
106	document_id = null;
107	}
108	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
109	if (href != null && href.equals(""))
110	{
111	href = null;
112	}
113	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
114	if (document_id == null && href == null)
115	{
116	logger.error("no document specified!");
117	return result;
118	}
119	if (rl != null && rl.equals("0"))
120	{
121	// this is a true external link, we should have been directed to a different page or action
122	logger.error("rl value was 0, shouldn't get here");
123	return result;
124	}
125
126	UserContext userContext = new UserContext(request);
127
128	//append site metadata
129	addSiteMetadata(page_response, userContext);
130	addInterfaceOptions(page_response);
131
132	// get the additional data needed for the page
133	getBackgroundData(page_response, collection, userContext);
134	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
135
136	if (format_elem != null) {
137	// lets look for param defaults set in config file
138	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
139	for (int i=0; i<param_defaults.getLength(); i++) {
140	Element p = (Element)param_defaults.item(i);
141	String name = p.getAttribute(GSXML.NAME_ATT);
142	if (params.get(name) ==null) {
143	// wasn't set from interface
144	String value = p.getAttribute(GSXML.VALUE_ATT);
145	params.put(name, value );
146	// also add into request param xml so that xslt knows it too
147	GSXML.addParameterToList(cgi_paramList, name, value);
148	}
149	}
150	}
151
152	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
153	if (document_type != null && document_type.equals(""))
154	{
155	//document_type = "hierarchy";
156	document_type = null; // we'll get it later if not already specified
157	}
158	// what if it is null here?? Anu to check...
159
160
161	boolean editing_document = false;
162	String doc_edit = (String) params.get(DOC_EDIT_ARG);
163	if (doc_edit != null && doc_edit.equals("1")) {
164	editing_document = true;
165	}
166
167	// are we editing mode? just get the archive document, convert to our internal doc format, and return it
168	if (editing_document) {
169
170	// call get archive doc
171	Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
172	String to = "DocXMLGetSection";
173	Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
174	dx_message.appendChild(dx_request);
175	Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
176	dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
177	dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
178	dx_request.appendChild(dx_section);
179
180	Element dx_response_message = (Element) this.mr.process(dx_message);
181	if (processErrorElements(dx_response_message, page_response))
182	{
183	return result;
184	}
185
186	// get the section out
187	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
188	Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
189	if (section == null) {
190	logger.error("no archive doc returned for "+document_id);
191	return result;
192	}
193	// convert the archive format into the internal format that the page response requires
194
195	// work out doctype
196	// NOTE: this will be coming from collection database in index
197	// the archive file doesn't store this. So we have to assume
198	// that the doc type will not be changing with any
199	// modifications happening to archives.
200
201	// if doc type is null, then we need to work it out.
202	// create a basic doc list containing the current node
203
204	if (document_type == null) {
205	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
206	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
207	basic_doc_list.appendChild(current_doc);
208	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id);
209	basic_doc_list.appendChild(current_doc);
210	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
211	}
212
213	if (document_type == null) {
214	logger.debug("@@@ doctype is null, setting to simple");
215	document_type = GSXML.DOC_TYPE_SIMPLE;
216	}
217
218	Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
219	doc_elem.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
220	page_response.appendChild(doc_elem);
221
222	Element transformed_section = transformArchiveToDocument(section);
223	if (document_type == GSXML.DOC_TYPE_SIMPLE) {
224	// simple doc, only returning a single document node, which is the top level section.
225	doc_elem.setAttribute(GSXML.NODE_ID_ATT, document_id);
226	GSXML.mergeElements(doc_elem, transformed_section);
227	return result;
228	}
229
230	// multi sectioned document.
231	transformed_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
232	// In docEdit mode, we obtain the text from archives, from doc.xml
233	// Now the transformation has replaced <Section> with <documentNode>
234	// Need to add nodeID, nodeType and docType attributes to each docNode
235	// as doc.xml doesn't store that.
236	insertDocNodeAttributes(transformed_section, document_type, null);
237	doc_elem.appendChild(doc.importNode(transformed_section, true));
238	logger.debug("dx result = "+XMLConverter.getPrettyString(result));
239
240	return result;
241	}
242
243	//whether to retrieve siblings or not
244	boolean get_siblings = false;
245	String sibs = (String) params.get(SIBLING_ARG);
246	if (sibs != null && sibs.equals("1"))
247	{
248	get_siblings = true;
249	}
250
251	String doc_id_modifier = "";
252	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
253	if (sibling_num != null && !sibling_num.equals(""))
254	{
255	// we have to modify the doc name
256	doc_id_modifier = "." + sibling_num + ".ss";
257	}
258
259	boolean expand_document = false;
260	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
261	if (ed_arg != null && ed_arg.equals("1"))
262	{
263	expand_document = true;
264	}
265
266	boolean expand_contents = false;
267	if (expand_document)
268	{ // we always expand the contents with the text
269	expand_contents = true;
270	}
271	else
272	{
273	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
274	if (ec_arg != null && ec_arg.equals("1"))
275	{
276	expand_contents = true;
277	}
278	}
279
280	// do we want text content? Not if no_text=1.
281	// expand_document overrides this. - should it??
282	boolean get_text = true;
283	String nt_arg = (String) params.get(NO_TEXT_ARG);
284
285	if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
286	logger.debug("SETTING GET TEXT TO FALSE");
287	get_text = false;
288	} else {
289	logger.debug("GET TEXT REMAINS TRUE");
290	}
291
292	// the_document is where all the doc info - structure and metadata etc
293	// is added into, to be returned in the page
294	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
295	page_response.appendChild(the_document);
296
297	// create a basic doc list containing the current node
298	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
299	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
300	basic_doc_list.appendChild(current_doc);
301	if (document_id != null)
302	{
303	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
304	}
305	else
306	{
307	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
308	// do we need this??
309	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
310	}
311
312	if (document_type == null)
313	{
314	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
315	}
316	if (document_type == null)
317	{
318	logger.debug("##### doctype is null, setting to simple");
319	document_type = GSXML.DOC_TYPE_SIMPLE;
320	}
321
322	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
323
324	// Create a parameter list to specify the required structure information
325	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
326
327	if (service_params != null)
328	{
329	GSXML.addParametersToList(ds_param_list, service_params);
330	}
331
332	Element ds_param = null;
333	boolean get_structure = false;
334	boolean get_structure_info = false;
335	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
336	{
337	get_structure_info = true;
338
339	if (expand_contents)
340	{
341	ds_param = doc.createElement(GSXML.PARAM_ELEM);
342	ds_param_list.appendChild(ds_param);
343	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
344	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
345	}
346
347	// get the info needed for paged naviagtion
348	ds_param = doc.createElement(GSXML.PARAM_ELEM);
349	ds_param_list.appendChild(ds_param);
350	ds_param.setAttribute(GSXML.NAME_ATT, "info");
351	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
352	ds_param = doc.createElement(GSXML.PARAM_ELEM);
353	ds_param_list.appendChild(ds_param);
354	ds_param.setAttribute(GSXML.NAME_ATT, "info");
355	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
356	ds_param = doc.createElement(GSXML.PARAM_ELEM);
357	ds_param_list.appendChild(ds_param);
358	ds_param.setAttribute(GSXML.NAME_ATT, "info");
359	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
360
361	if (get_siblings)
362	{
363	ds_param = doc.createElement(GSXML.PARAM_ELEM);
364	ds_param_list.appendChild(ds_param);
365	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
366	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
367	}
368
369	}
370	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
371	{
372	get_structure = true;
373	if (expand_contents)
374	{
375	ds_param = doc.createElement(GSXML.PARAM_ELEM);
376	ds_param_list.appendChild(ds_param);
377	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
378	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
379	}
380	else
381	{
382	// get the info needed for table of contents
383	ds_param = doc.createElement(GSXML.PARAM_ELEM);
384	ds_param_list.appendChild(ds_param);
385	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
386	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
387	ds_param = doc.createElement(GSXML.PARAM_ELEM);
388	ds_param_list.appendChild(ds_param);
389	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
390	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
391	if (get_siblings)
392	{
393	ds_param = doc.createElement(GSXML.PARAM_ELEM);
394	ds_param_list.appendChild(ds_param);
395	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
396	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
397	}
398	}
399	}
400	else
401	{
402	// we dont need any structure
403	}
404
405	boolean has_dummy = false;
406	if (get_structure \|\| get_structure_info)
407	{
408
409	// Build a request to obtain the document structure
410	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
411	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
412	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
413	ds_message.appendChild(ds_request);
414	ds_request.appendChild(ds_param_list);
415
416	// add the node list we created earlier
417	ds_request.appendChild(basic_doc_list);
418
419	// Process the document structure retrieve message
420	Element ds_response_message = (Element) this.mr.process(ds_message);
421	if (processErrorElements(ds_response_message, page_response))
422	{
423	return result;
424	}
425
426	// get the info and print out
427	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
429	path = GSPath.appendLink(path, "nodeStructureInfo");
430	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
431	// get the doc_node bit
432	if (ds_response_struct_info != null)
433	{
434	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
435	}
436	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
437	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
438	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
439	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
440
441	if (ds_response_structure != null)
442	{
443	// add the contents of the structure bit into the_document
444	NodeList structs = ds_response_structure.getChildNodes();
445	for (int i = 0; i < structs.getLength(); i++)
446	{
447	the_document.appendChild(doc.importNode(structs.item(i), true));
448	}
449	}
450	else
451	{
452	// no structure nodes, so put in a dummy doc node
453	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
454	if (document_id != null)
455	{
456	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
457	}
458	else
459	{
460	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
461
462	}
463	the_document.appendChild(doc_node);
464	has_dummy = true;
465	}
466	}
467	else
468	{ // a simple type - we dont have a dummy node for simple
469	// should think about this more
470	// no structure request, so just put in a dummy doc node
471	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
472	if (document_id != null)
473	{
474	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
475	}
476	else
477	{
478	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
479	}
480	the_document.appendChild(doc_node);
481	has_dummy = true;
482	}
483
484	// Build a request to obtain some document metadata
485	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
486	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
487	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
488	dm_message.appendChild(dm_request);
489	// Create a parameter list to specify the required metadata information
490
491	HashSet<String> meta_names = new HashSet<String>();
492	meta_names.add("Title"); // the default
493	if (format_elem != null)
494	{
495	getRequiredMetadataNames(format_elem, meta_names);
496	}
497
498	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
499	if (extraMetaListElem != null)
500	{
501	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
502	for (int i = 0; i < extraMetaList.getLength(); i++)
503	{
504	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
505	}
506	}
507
508	Element dm_param_list = createMetadataParamList(doc,meta_names);
509	if (service_params != null)
510	{
511	GSXML.addParametersToList(dm_param_list, service_params);
512	}
513
514	dm_request.appendChild(dm_param_list);
515
516	// create the doc node list for the metadata request
517	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
518	dm_request.appendChild(dm_doc_list);
519
520	// Add each node from the structure response into the metadata request
521	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
522	for (int i = 0; i < doc_nodes.getLength(); i++)
523	{
524	Element doc_node = (Element) doc_nodes.item(i);
525	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
526
527	// Add the documentNode to the list
528	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
529	if (needSectionContent(params)) {
530	if (doc_node_id.equals(document_id)) {
531	dm_doc_list.appendChild(dm_doc_node);
532	}
533	} else {
534	dm_doc_list.appendChild(dm_doc_node);
535	}
536	//dm_doc_list.appendChild(dm_doc_node);
537	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
538	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
539	if (document_id == null){
540	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
541	}
542
543	}
544	// we also want a metadata request to the top level document to get
545	// assocfilepath - this could be cached too
546	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
547	dm_message.appendChild(doc_meta_request);
548	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
549	if (service_params != null)
550	{
551	GSXML.addParametersToList(doc_meta_param_list, service_params);
552	}
553
554	doc_meta_request.appendChild(doc_meta_param_list);
555	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
556	doc_meta_param_list.appendChild(doc_param);
557	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
558	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
559
560	// create the doc node list for the metadata request
561	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
562	doc_meta_request.appendChild(doc_list);
563
564	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
565	// the node we want is the root document node
566	if (document_id != null)
567	{
568	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
569	}
570	/*else
571	{
572	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
573	// can we assume that href is always a top level doc??
574	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
575	//doc_node.setAttribute("externalURL", has_rl);
576	}*/
577	doc_list.appendChild(doc_node);
578
579	Element dm_response_message = (Element) this.mr.process(dm_message);
580	if (processErrorElements(dm_response_message, page_response))
581	{
582	return result;
583	}
584
585	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
586	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
587
588	// Merge the metadata with the structure information
589	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
590	for (int i = 0; i < doc_nodes.getLength(); i++)
591	{
592	Node dcNode;
593	String node_idd = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
594	if (node_idd.isEmpty()) {
595	String href_id_att = ((Element)doc_nodes.item(i)).getAttribute(GSXML.HREF_ID_ATT);
596	dcNode = GSXML.getNamedElement(dm_response_doc_list, "documentNode", GSXML.HREF_ID_ATT, href_id_att);
597	} else {
598	dcNode = GSXML.getNamedElement(dm_response_doc_list, "documentNode", GSXML.NODE_ID_ATT, node_idd);
599	}
600	GSXML.mergeMetadataLists(doc_nodes.item(i), dcNode);
601	}
602	// get the top level doc metadata out
603	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
604	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
605	GSXML.mergeMetadataLists(the_document, top_doc_node);
606
607	// do we want doc text content? If not, we are done.
608	if (!get_text) {
609	// don't get text
610	return result;
611	}
612
613	// Build a request to obtain some document content
614	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
615	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
616	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
617	dc_message.appendChild(dc_request);
618
619	// Create a parameter list to specify the request parameters - empty for now
620	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
621	if (service_params != null)
622	{
623	GSXML.addParametersToList(dc_param_list, service_params);
624	}
625
626	dc_request.appendChild(dc_param_list);
627
628	// get the content
629	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
630	if (expand_document)
631	{
632	dc_request.appendChild(dm_doc_list);
633	}
634	else
635	{
636	dc_request.appendChild(basic_doc_list);
637	}
638	Element dc_response_message = (Element) this.mr.process(dc_message);
639
640	if (processErrorElements(dc_response_message, page_response))
641	{
642	return result;
643
644	}
645	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
646
647	if (expand_document)
648	{
649	// Merge the content with the structure information
650	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
651	for (int i = 0; i < doc_nodes.getLength(); i++)
652	{
653	String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
654	//Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
655	Node docNode = GSXML.getNamedElement(dc_response_doc_list, "documentNode", GSXML.NODE_ID_ATT, node_id);
656	Node content = GSXML.getChildByTagName(docNode, GSXML.NODE_CONTENT_ELEM);
657	if (content != null)
658	{
659	if (highlight_query_terms)
660	{
661
662	content = highlightQueryTerms(request, node_id, (Element) content);
663	}
664
665	doc_nodes.item(i).appendChild(doc.importNode(content, true));
666	}
667	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
668	}
669	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
670	Element dummy_node = (Element) doc_nodes.item(0);
671	the_document.removeChild(dummy_node);
672	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
673	NodeList dummy_children = dummy_node.getChildNodes();
674	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
675	{
676	// special case as we don't want more than one metadata list
677	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
678	{
679	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
680	}
681	else
682	{
683	the_document.appendChild(dummy_children.item(i));
684	}
685	}
686	}
687	}
688	else
689	{
690	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
691	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
692	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
693	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
694
695	if (dc_response_doc_content == null)
696	{
697	// no content to add
698	if (dc_response_doc.getAttribute("external").equals("true"))
699	{
700
701	//if (dc_response_doc_external != null)
702	//{
703	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
704
705	the_document.setAttribute("selectedNode", href_id);
706	the_document.setAttribute("external", href_id);
707	}
708	return result;
709	}
710	if (highlight_query_terms)
711	{
712	dc_response_doc.removeChild(dc_response_doc_content);
713
714	dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
715	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
716	}
717
718	if (provide_annotations)
719	{
720	String service_selected = (String) params.get(ENRICH_DOC_ARG);
721	if (service_selected != null && service_selected.equals("1"))
722	{
723	// now we can modifiy the response doc if needed
724	String enrich_service = (String) params.get(GSParams.SERVICE);
725	// send a message to the service
726	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
727	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
728	enrich_message.appendChild(enrich_request);
729	// check for parameters
730	HashMap e_service_params = (HashMap) params.get("s1");
731	if (e_service_params != null)
732	{
733	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
734	GSXML.addParametersToList(enrich_pl, e_service_params);
735	enrich_request.appendChild(enrich_pl);
736	}
737	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
738	enrich_request.appendChild(e_doc_list);
739	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
740
741	Node enrich_response = this.mr.process(enrich_message);
742
743	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
744	path = GSPath.createPath(links);
745	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
746
747	}
748	} // if provide_annotations
749
750	// use the returned id rather than the sent one cos there may have
751	// been modifiers such as .pr that are removed.
752	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
753	the_document.setAttribute("selectedNode", modified_doc_id);
754	if (has_dummy)
755	{
756	// change the id if necessary and add the content
757	Element dummy_node = (Element) doc_nodes.item(0);
758
759	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
760	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
761	// hack for simple type
762	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
763	{
764	// we dont want the internal docNode, just want the content and metadata in the document
765	// rethink this!!
766	the_document.removeChild(dummy_node);
767
768	NodeList dummy_children = dummy_node.getChildNodes();
769	//for (int i=0; i<dummy_children.getLength(); i++) {
770	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
771	{
772	// special case as we don't want more than one metadata list
773	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
774	{
775	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
776	}
777	else
778	{
779	the_document.appendChild(dummy_children.item(i));
780	}
781	}
782	}
783
784	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
785	}
786	else
787	{
788	// Merge the document content with the metadata and structure information
789	for (int i = 0; i < doc_nodes.getLength(); i++)
790	{
791	Node dn = doc_nodes.item(i);
792	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
793	if (dn_id.equals(modified_doc_id))
794	{
795	dn.appendChild(doc.importNode(dc_response_doc_content, true));
796	break;
797	}
798	}
799	}
800	}
801	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
802	return result;
803	}
804
805
806	private boolean needSectionContent(HashMap<String, Serializable> params) {
807	String document_id = (String) params.get(GSParams.DOCUMENT);
808	String ilt = (String) params.get(GSParams.INLINE_TEMPLATE);
809	String iltPrefix = "<xsl:template match=\"/\"><text><xsl:for-each select=\"/page/pageResponse/document//documentNode[@nodeID =";
810	if (ilt != null && ilt.startsWith(iltPrefix) && document_id != null) {
811	return true;
812	}
813
814	return false;
815	}
816	/**
817	* this method gets the collection description, the format info, the list of
818	* enrich services, etc - stuff that is needed for the page, but is the same
819	* whatever the query is - should be cached
820	*/
821	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
822	{
823	Document doc = page_response.getOwnerDocument();
824
825	// create a message to process - contains requests for the collection
826	// description, the format element, the enrich services on offer
827	// these could all be cached
828	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
829	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
830	// the format request - ignore for now, where does this request go to??
831	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
832	info_message.appendChild(format_request);
833
834	// the enrich_services request - only do this if provide_annotations is true
835
836	if (provide_annotations)
837	{
838	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
839	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
840	info_message.appendChild(enrich_services_request);
841	}
842
843	Element info_response = (Element) this.mr.process(info_message);
844
845	// the collection is the first response
846	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
847	Element format_resp = (Element) responses.item(0);
848
849	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
850	if (format_elem != null)
851	{
852	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
853	if (global_format_elem != null)
854	{
855	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
856	}
857
858	// set the format type
859	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
860	page_response.appendChild(doc.importNode(format_elem, true));
861	}
862
863	if (provide_annotations)
864	{
865	Element services_resp = (Element) responses.item(1);
866
867	// a new message for the mr
868	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
869	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
870	boolean service_found = false;
871	for (int j = 0; j < e_services.getLength(); j++)
872	{
873	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
874	{
875	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
876	enrich_message.appendChild(s);
877	service_found = true;
878	}
879	}
880	if (service_found)
881	{
882	Element enrich_response = (Element) this.mr.process(enrich_message);
883
884	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
885	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
886	for (int i = 0; i < e_responses.getLength(); i++)
887	{
888	Element e_resp = (Element) e_responses.item(i);
889	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
890	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
891	service_list.appendChild(e_service);
892	}
893	page_response.appendChild(service_list);
894	}
895	} // if provide_annotations
896	return true;
897
898	}
899
900	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
901	{
902	Document doc = basic_doc_list.getOwnerDocument();
903
904	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
905	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
906	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
907	ds_message.appendChild(ds_request);
908
909	// Create a parameter list to specify the required structure information
910	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
911	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
912	ds_param_list.appendChild(ds_param);
913	ds_param.setAttribute(GSXML.NAME_ATT, "info");
914	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
915
916	ds_request.appendChild(ds_param_list);
917
918	// add the node list we created earlier
919	ds_request.appendChild(basic_doc_list);
920
921	// Process the document structure retrieve message
922	Element ds_response_message = (Element) this.mr.process(ds_message);
923	if (processErrorElements(ds_response_message, page_response))
924	{
925	return null;
926	}
927
928	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
929	String path = GSPath.createPath(links);
930	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
931	if (info_elem == null) {
932	return null;
933	}
934	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
935	if (doctype_elem != null)
936	{
937	String doc_type = doctype_elem.getAttribute("value");
938	return doc_type;
939	}
940	return null;
941	}
942
943	// Recursive method to set the docType, nodeType and nodeID attributes of each docNode
944	// The docType remains constant as in parameter document_type
945	// The nodeID for the first (root) docNode is already set. For all children, the rootNode id
946	// is updated to be <parent-id>.<num-child>, where the first parent-id is rootNode id.
947	// The nodeType is root if rootNode, internal if there are children and leaf if no children
948	protected void insertDocNodeAttributes(Element docNode, String document_type, String id) {
949
950	boolean isRoot = false;
951	if(id == null) { // rootNode, get the root nodeID to work with recursively
952	id = docNode.getAttribute(GSXML.NODE_ID_ATT);
953	isRoot = true;
954	} else { // for all but the root node, need to still set the nodeID
955	docNode.setAttribute(GSXML.NODE_ID_ATT, id);
956	}
957
958	docNode.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
959
960	NodeList docNodes = GSXML.getChildrenByTagName(docNode, GSXML.DOC_NODE_ELEM);
961	if(docNodes.getLength() > 0) {
962	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
963	for(int i = 0; i < docNodes.getLength(); i++) {
964	Element childDocNode = (Element)docNodes.item(i);
965
966	// work out the child docNode's nodeID based on current id
967	String nodeID = id + "." + (i+1);
968	insertDocNodeAttributes(childDocNode, document_type, nodeID); //recursion step
969	}
970	} else {
971	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
972	}
973
974	// rootNode's nodeType is a special case: it's "root", not "leaf" or "internal"
975	if(isRoot) docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
976
977	}
978
979	/** run the XSLT transform which converts from doc.xml format to our internal document format */
980	protected Element transformArchiveToDocument(Element section) {
981
982	String stylesheet_filename = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), (ArrayList<String>) this.config_params.get(GSConstants.BASE_INTERFACES), "archive2document.xsl");
983	if (stylesheet_filename == null) {
984	logger.error("Couldn't find stylesheet archive2document.xsl");
985	return section;
986	}
987
988	Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_filename));
989	if (stylesheet_doc == null) {
990	logger.error("Couldn't load in stylesheet "+stylesheet_filename);
991	return section;
992	}
993
994	Document section_doc = XMLConverter.newDOM();
995	section_doc.appendChild(section_doc.importNode(section, true));
996	Node result = this.transformer.transform(stylesheet_doc, section_doc);
997	logger.debug("transform result = "+XMLConverter.getPrettyString(result));
998
999	Element new_element;
1000	if (result.getNodeType() == Node.DOCUMENT_NODE) {
1001	new_element = ((Document) result).getDocumentElement();
1002	} else {
1003	new_element = (Element) result;
1004	}
1005
1006
1007	return new_element;
1008
1009	}
1010
1011
1012	/**
1013	* this involves a bit of a hack to get the equivalent query terms - has to
1014	* requery the query service - uses the last selected service name. (if it
1015	* ends in query). should this action do the query or should it send a
1016	* message to the query action? but that will involve lots of extra stuff.
1017	* also doesn't handle phrases properly - just highlights all the terms
1018	* found in the text.
1019	*/
1020	protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
1021	{
1022	Document doc = request.getOwnerDocument();
1023
1024	// do the query again to get term info
1025	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1026	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
1027
1028	HashMap previous_params = (HashMap) params.get("p");
1029	if (previous_params == null)
1030	{
1031	return dc_response_doc_content;
1032	}
1033	String service_name = (String) previous_params.get(GSParams.SERVICE);
1034	if (service_name == null \|\| !service_name.endsWith("Query"))
1035	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
1036	logger.debug("invalid service, not doing highlighting");
1037	return dc_response_doc_content;
1038	}
1039	String collection = (String) params.get(GSParams.COLLECTION);
1040	UserContext userContext = new UserContext(request);
1041	String to = GSPath.appendLink(collection, service_name);
1042
1043	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
1044	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1045	mr_query_message.appendChild(mr_query_request);
1046
1047	// paramList
1048	HashMap service_params = (HashMap) params.get("s1");
1049
1050	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1051	GSXML.addParametersToList(query_param_list, service_params);
1052	if (current_node_id != null) {
1053	GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
1054	} else {
1055	GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
1056	}
1057	mr_query_request.appendChild(query_param_list);
1058	// do the query
1059	Element mr_query_response = (Element) this.mr.process(mr_query_message);
1060	String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
1061	Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
1062	// For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
1063	if (highlighted_Node != null)
1064	{
1065	// Build a request to process highlighted text
1066
1067	Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
1068	to = GSPath.appendLink(collection, "DocumentContentRetrieve");
1069	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1070	hl_message.appendChild(dc_request);
1071
1072	// Create a parameter list to specify the request parameters - empty for now
1073	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1074	dc_request.appendChild(dc_param_list);
1075
1076	// get the content
1077	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
1078	dc_request.appendChild(doc_list);
1079	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
1080	doc_list.appendChild(current_doc);
1081	current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
1082	//Append highlighted content to request for processing
1083	dc_request.appendChild(doc.importNode(highlighted_Node, true));
1084	Element hl_response_message = (Element) this.mr.process(hl_message);
1085
1086	//Get results
1087	NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
1088	Element content = (Element) contentList.item(0);
1089	return content;
1090	}
1091	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
1092	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
1093	if (query_term_list_element == null)
1094	{
1095	// no term info
1096	logger.error("No query term information.\n");
1097	return dc_response_doc_content;
1098	}
1099
1100	String content = GSXML.getNodeText(dc_response_doc_content);
1101
1102	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
1103	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
1104
1105	HashSet<String> query_term_variants = new HashSet<String>();
1106	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
1107	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
1108	{
1109	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
1110	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
1111	{
1112	for (int i = 0; i < terms_nodelist.getLength(); i++)
1113	{
1114	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
1115	String termValueU = null;
1116	String termValueL = null;
1117
1118	if (termValue.length() > 1)
1119	{
1120	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
1121	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
1122	}
1123	else
1124	{
1125	termValueU = termValue.substring(0, 1).toUpperCase();
1126	termValueL = termValue.substring(0, 1).toLowerCase();
1127	}
1128
1129	query_term_variants.add(termValueU);
1130	query_term_variants.add(termValueL);
1131	}
1132	}
1133	}
1134	else
1135	{
1136	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
1137	{
1138	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
1139	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
1140	for (int j = 0; j < equivalent_terms.length; j++)
1141	{
1142	query_term_variants.add(equivalent_terms[j]);
1143	}
1144	}
1145	}
1146
1147	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
1148
1149	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
1150	String performed_query = GSXML.getNodeText(query_element) + " ";
1151
1152	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1153	int term_start = 0;
1154	boolean in_term = false;
1155	boolean in_phrase = false;
1156	for (int i = 0; i < performed_query.length(); i++)
1157	{
1158	char character = performed_query.charAt(i);
1159	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
1160
1161	// Has a query term just started?
1162	if (in_term == false && is_character_letter_or_digit == true)
1163	{
1164	in_term = true;
1165	term_start = i;
1166	}
1167
1168	// Or has a term just finished?
1169	else if (in_term == true && is_character_letter_or_digit == false)
1170	{
1171	in_term = false;
1172	String term = performed_query.substring(term_start, i);
1173
1174	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
1175	if (term_element != null)
1176	{
1177
1178	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
1179
1180	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
1181	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
1182	{
1183	String termValueU = null;
1184	String termValueL = null;
1185
1186	if (term.length() > 1)
1187	{
1188	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1189	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1190	}
1191	else
1192	{
1193	termValueU = term.substring(0, 1).toUpperCase();
1194	termValueL = term.substring(0, 1).toLowerCase();
1195	}
1196
1197	phrase_query_p_term_x_variants.add(termValueU);
1198	phrase_query_p_term_x_variants.add(termValueL);
1199	}
1200	else
1201	{
1202	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1203	{
1204	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1205	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
1206	for (int k = 0; k < term_equivalent_terms.length; k++)
1207	{
1208	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1209	}
1210	}
1211	}
1212	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
1213
1214	if (in_phrase == false)
1215	{
1216	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1217	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1218	}
1219	}
1220	}
1221	// Watch for phrases (surrounded by quotes)
1222	if (character == '\"')
1223	{
1224	// Has a phrase just started?
1225	if (in_phrase == false)
1226	{
1227	in_phrase = true;
1228	}
1229	// Or has a phrase just finished?
1230	else if (in_phrase == true)
1231	{
1232	in_phrase = false;
1233	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1234	}
1235
1236	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1237	}
1238	}
1239
1240	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1241	}
1242
1243	/**
1244	* Highlights query terms in a piece of text.
1245	*/
1246	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1247	{
1248	// Convert the content string to an array of characters for speed
1249	char[] content_characters = new char[content.length()];
1250	content.getChars(0, content.length(), content_characters, 0);
1251
1252	// Now skim through the content, identifying word matches
1253	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1254	int word_start = 0;
1255	boolean in_word = false;
1256	boolean preceding_word_matched = false;
1257	boolean inTag = false;
1258	for (int i = 0; i < content_characters.length; i++)
1259	{
1260	//We don't want to find words inside HTML tags
1261	if (content_characters[i] == '<')
1262	{
1263	inTag = true;
1264	continue;
1265	}
1266	else if (inTag && content_characters[i] == '>')
1267	{
1268	inTag = false;
1269	}
1270	else if (inTag)
1271	{
1272	continue;
1273	}
1274
1275	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1276
1277	// Has a word just started?
1278	if (in_word == false && is_character_letter_or_digit == true)
1279	{
1280	in_word = true;
1281	word_start = i;
1282	}
1283
1284	// Or has a word just finished?
1285	else if (in_word == true && is_character_letter_or_digit == false)
1286	{
1287	in_word = false;
1288
1289	// Check if the word matches any of the query term equivalents
1290	String word = new String(content_characters, word_start, (i - word_start));
1291	if (query_term_variants.contains(word))
1292	{
1293	// We have found a matching word, so remember its location
1294	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1295	preceding_word_matched = true;
1296	}
1297	else
1298	{
1299	preceding_word_matched = false;
1300	}
1301	}
1302	}
1303
1304	// Don't forget the last word...
1305	if (in_word == true)
1306	{
1307	// Check if the word matches any of the query term equivalents
1308	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1309	if (query_term_variants.contains(word))
1310	{
1311	// We have found a matching word, so remember its location
1312	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1313	}
1314	}
1315
1316	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1317	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1318
1319	// Deal with phrases now
1320	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1321	for (int i = 0; i < word_matches.size(); i++)
1322	{
1323	WordMatch word_match = word_matches.get(i);
1324
1325	// See if any partial phrase matches are extended by this word
1326	if (word_match.preceding_word_matched)
1327	{
1328	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1329	{
1330	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1331	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1332	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1333	if (phrase_query_p_term_x_variants.contains(word_match.word))
1334	{
1335	partial_phrase_match.num_words_matched++;
1336
1337	// Has a complete phrase match occurred?
1338	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1339	{
1340	// Check for overlaps by looking at the previous highlight range
1341	if (!highlight_end_positions.isEmpty())
1342	{
1343	int last_highlight_index = highlight_end_positions.size() - 1;
1344	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1345	if (last_highlight_end > partial_phrase_match.start_position)
1346	{
1347	// There is an overlap, so remove the previous phrase match
1348	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1349	highlight_end_positions.remove(last_highlight_index);
1350	partial_phrase_match.start_position = last_highlight_start;
1351	}
1352	}
1353
1354	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1355	highlight_end_positions.add(new Integer(word_match.end_position));
1356	}
1357	// No, but add the partial match back into the list for next time
1358	else
1359	{
1360	partial_phrase_matches.add(partial_phrase_match);
1361	}
1362	}
1363	}
1364	}
1365	else
1366	{
1367	partial_phrase_matches.clear();
1368	}
1369
1370	// See if this word is at the start of any of the phrases
1371	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1372	{
1373	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1374	if (phrase_query_p_term_variants_list.size()>0) {
1375	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1376	if (phrase_query_p_term_1_variants.contains(word_match.word))
1377	{
1378	// If this phrase is just one word long, we have a complete match
1379	if (phrase_query_p_term_variants_list.size() == 1)
1380	{
1381	highlight_start_positions.add(new Integer(word_match.start_position));
1382	highlight_end_positions.add(new Integer(word_match.end_position));
1383	}
1384	// Otherwise we have the start of a potential phrase match
1385	else
1386	{
1387	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1388	}
1389	}
1390	}
1391	}
1392	}
1393
1394	// Now add the annotation tags into the document at the correct points
1395	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1396
1397	int last_wrote = 0;
1398	for (int i = 0; i < highlight_start_positions.size(); i++)
1399	{
1400	int highlight_start = highlight_start_positions.get(i).intValue();
1401	int highlight_end = highlight_end_positions.get(i).intValue();
1402
1403	// Print anything before the highlight range
1404	if (last_wrote < highlight_start)
1405	{
1406	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1407	content_element.appendChild(doc.createTextNode(preceding_text));
1408	}
1409
1410	// Print the highlight text, annotated
1411	if (highlight_end > last_wrote)
1412	{
1413	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1414	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1415	annotation_element.setAttribute("type", "query_term");
1416	content_element.appendChild(annotation_element);
1417	last_wrote = highlight_end;
1418	}
1419	}
1420
1421	// Finish off any unwritten text
1422	if (last_wrote < content_characters.length)
1423	{
1424	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1425	content_element.appendChild(doc.createTextNode(remaining_text));
1426	}
1427	return content_element;
1428	}
1429
1430	static private class WordMatch
1431	{
1432	public String word;
1433	public int start_position;
1434	public int end_position;
1435	public boolean preceding_word_matched;
1436
1437	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1438	{
1439	this.word = word;
1440	this.start_position = start_position;
1441	this.end_position = end_position;
1442	this.preceding_word_matched = preceding_word_matched;
1443	}
1444	}
1445
1446	static private class PartialPhraseMatch
1447	{
1448	public int start_position;
1449	public int query_phrase_number;
1450	public int num_words_matched;
1451
1452	public PartialPhraseMatch(int start_position, int query_phrase_number)
1453	{
1454	this.start_position = start_position;
1455	this.query_phrase_number = query_phrase_number;
1456	this.num_words_matched = 1;
1457	}
1458	}
1459	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: