Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 32128

Last change on this file since 32128 was 32128, checked in by Georgiy Litvinov, 6 years ago
Remove all sections from requests except needed by inline template.
Property svn:keywords set to `Author Date Id Revision`
File size: 54.2 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24	import org.greenstone.util.GlobalProperties;
25
26	// XML classes
27	import org.w3c.dom.Document;
28	import org.w3c.dom.Element;
29	import org.w3c.dom.Node;
30	import org.w3c.dom.Text;
31	import org.w3c.dom.NodeList;
32
33	// General Java classes
34	import java.util.ArrayList;
35	import java.util.HashMap;
36	import java.util.HashSet;
37	import java.io.File;
38	import java.io.Serializable;
39
40	import org.apache.log4j.*;
41
42	/** Action class for retrieving Documents via the message router */
43	public class DocumentAction extends Action
44	{
45
46	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
47
48	// this is used to specify that the sibling nodes of a selected one should be obtained
49	public static final String SIBLING_ARG = "sib";
50	public static final String GOTO_PAGE_ARG = "gp";
51	public static final String ENRICH_DOC_ARG = "end";
52	public static final String EXPAND_DOCUMENT_ARG = "ed";
53	public static final String EXPAND_CONTENTS_ARG = "ec";
54	public static final String REALISTIC_BOOK_ARG = "book";
55	public static final String NO_TEXT_ARG = "noText";
56	public static final String DOC_EDIT_ARG = "docEdit";
57
58	/**
59	* if this is set to true, when a document is displayed, any annotation type
60	* services (enrich) will be offered to the user as well
61	*/
62	protected boolean provide_annotations = false;
63
64	protected boolean highlight_query_terms = false;
65
66	public boolean configure()
67	{
68	super.configure();
69	String highlight = (String) config_params.get("highlightQueryTerms");
70	if (highlight != null && highlight.equals("true"))
71	{
72	highlight_query_terms = true;
73	}
74	String annotate = (String) config_params.get("displayAnnotationService");
75	if (annotate != null && annotate.equals("true"))
76	{
77	provide_annotations = true;
78	}
79	return true;
80	}
81
82	public Node process(Node message_node)
83	{
84	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
85
86	Element message = GSXML.nodeToElement(message_node);
87	Document doc = XMLConverter.newDOM(); //message.getOwnerDocument();
88
89	// the response
90	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
91	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
92	result.appendChild(page_response);
93
94	// get the request - assume only one
95	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
96	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
97	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
98
99	// just in case there are some that need to get passed to the services
100	HashMap service_params = (HashMap) params.get("s0");
101
102	String collection = (String) params.get(GSParams.COLLECTION);
103	String document_id = (String) params.get(GSParams.DOCUMENT);
104	if (document_id != null && document_id.equals(""))
105	{
106	document_id = null;
107	}
108	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
109	if (href != null && href.equals(""))
110	{
111	href = null;
112	}
113	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
114	if (document_id == null && href == null)
115	{
116	logger.error("no document specified!");
117	return result;
118	}
119	if (rl != null && rl.equals("0"))
120	{
121	// this is a true external link, we should have been directed to a different page or action
122	logger.error("rl value was 0, shouldn't get here");
123	return result;
124	}
125
126	UserContext userContext = new UserContext(request);
127
128	//append site metadata
129	addSiteMetadata(page_response, userContext);
130	addInterfaceOptions(page_response);
131
132	// get the additional data needed for the page
133	getBackgroundData(page_response, collection, userContext);
134	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
135
136	if (format_elem != null) {
137	// lets look for param defaults set in config file
138	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
139	for (int i=0; i<param_defaults.getLength(); i++) {
140	Element p = (Element)param_defaults.item(i);
141	String name = p.getAttribute(GSXML.NAME_ATT);
142	if (params.get(name) ==null) {
143	// wasn't set from interface
144	String value = p.getAttribute(GSXML.VALUE_ATT);
145	params.put(name, value );
146	// also add into request param xml so that xslt knows it too
147	GSXML.addParameterToList(cgi_paramList, name, value);
148	}
149	}
150	}
151
152	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
153	if (document_type != null && document_type.equals(""))
154	{
155	//document_type = "hierarchy";
156	document_type = null; // we'll get it later if not already specified
157	}
158	// what if it is null here?? Anu to check...
159
160
161	boolean editing_document = false;
162	String doc_edit = (String) params.get(DOC_EDIT_ARG);
163	if (doc_edit != null && doc_edit.equals("1")) {
164	editing_document = true;
165	}
166
167	// are we editing mode? just get the archive document, convert to our internal doc format, and return it
168	if (editing_document) {
169
170	// call get archive doc
171	Element dx_message = doc.createElement(GSXML.MESSAGE_ELEM);
172	String to = "DocXMLGetSection";
173	Element dx_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
174	dx_message.appendChild(dx_request);
175	Element dx_section = doc.createElement(GSXML.DOCXML_SECTION_ELEM);
176	dx_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
177	dx_section.setAttribute(GSXML.COLLECTION_ATT, collection);
178	dx_request.appendChild(dx_section);
179
180	Element dx_response_message = (Element) this.mr.process(dx_message);
181	if (processErrorElements(dx_response_message, page_response))
182	{
183	return result;
184	}
185
186	// get the section out
187	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOCXML_SECTION_ELEM);
188	Element section = (Element) GSXML.getNodeByPath(dx_response_message, path);
189	if (section == null) {
190	logger.error("no archive doc returned for "+document_id);
191	return result;
192	}
193	// convert the archive format into the internal format that the page response requires
194
195	// work out doctype
196	// NOTE: this will be coming from collection database in index
197	// the archive file doesn't store this. So we have to assume
198	// that the doc type will not be changing with any
199	// modifications happening to archives.
200
201	// if doc type is null, then we need to work it out.
202	// create a basic doc list containing the current node
203
204	if (document_type == null) {
205	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
206	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
207	basic_doc_list.appendChild(current_doc);
208	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id);
209	basic_doc_list.appendChild(current_doc);
210	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
211	}
212
213	if (document_type == null) {
214	logger.debug("@@@ doctype is null, setting to simple");
215	document_type = GSXML.DOC_TYPE_SIMPLE;
216	}
217
218	Element doc_elem = doc.createElement(GSXML.DOCUMENT_ELEM);
219	doc_elem.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
220	page_response.appendChild(doc_elem);
221
222	Element transformed_section = transformArchiveToDocument(section);
223	if (document_type == GSXML.DOC_TYPE_SIMPLE) {
224	// simple doc, only returning a single document node, which is the top level section.
225	doc_elem.setAttribute(GSXML.NODE_ID_ATT, document_id);
226	GSXML.mergeElements(doc_elem, transformed_section);
227	return result;
228	}
229
230	// multi sectioned document.
231	transformed_section.setAttribute(GSXML.NODE_ID_ATT, document_id);
232	// In docEdit mode, we obtain the text from archives, from doc.xml
233	// Now the transformation has replaced <Section> with <documentNode>
234	// Need to add nodeID, nodeType and docType attributes to each docNode
235	// as doc.xml doesn't store that.
236	insertDocNodeAttributes(transformed_section, document_type, null);
237	doc_elem.appendChild(doc.importNode(transformed_section, true));
238	logger.debug("dx result = "+XMLConverter.getPrettyString(result));
239
240	return result;
241	}
242
243	//whether to retrieve siblings or not
244	boolean get_siblings = false;
245	String sibs = (String) params.get(SIBLING_ARG);
246	if (sibs != null && sibs.equals("1"))
247	{
248	get_siblings = true;
249	}
250
251	String doc_id_modifier = "";
252	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
253	if (sibling_num != null && !sibling_num.equals(""))
254	{
255	// we have to modify the doc name
256	doc_id_modifier = "." + sibling_num + ".ss";
257	}
258
259	boolean expand_document = false;
260	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
261	if (ed_arg != null && ed_arg.equals("1"))
262	{
263	expand_document = true;
264	}
265
266	boolean expand_contents = false;
267	if (expand_document)
268	{ // we always expand the contents with the text
269	expand_contents = true;
270	}
271	else
272	{
273	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
274	if (ec_arg != null && ec_arg.equals("1"))
275	{
276	expand_contents = true;
277	}
278	}
279
280	// do we want text content? Not if no_text=1.
281	// expand_document overrides this. - should it??
282	boolean get_text = true;
283	String nt_arg = (String) params.get(NO_TEXT_ARG);
284
285	if (!expand_document && nt_arg!=null && nt_arg.equals("1")) {
286	logger.debug("SETTING GET TEXT TO FALSE");
287	get_text = false;
288	} else {
289	logger.debug("GET TEXT REMAINS TRUE");
290	}
291
292	// the_document is where all the doc info - structure and metadata etc
293	// is added into, to be returned in the page
294	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
295	page_response.appendChild(the_document);
296
297	// create a basic doc list containing the current node
298	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
299	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
300	basic_doc_list.appendChild(current_doc);
301	if (document_id != null)
302	{
303	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
304	}
305	else
306	{
307	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
308	// do we need this??
309	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
310	}
311
312	if (document_type == null)
313	{
314	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
315	}
316	if (document_type == null)
317	{
318	logger.debug("##### doctype is null, setting to simple");
319	document_type = GSXML.DOC_TYPE_SIMPLE;
320	}
321
322	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
323
324	// Create a parameter list to specify the required structure information
325	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
326
327	if (service_params != null)
328	{
329	GSXML.addParametersToList(ds_param_list, service_params);
330	}
331
332	Element ds_param = null;
333	boolean get_structure = false;
334	boolean get_structure_info = false;
335	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
336	{
337	get_structure_info = true;
338
339	if (expand_contents)
340	{
341	ds_param = doc.createElement(GSXML.PARAM_ELEM);
342	ds_param_list.appendChild(ds_param);
343	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
344	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
345	}
346
347	// get the info needed for paged naviagtion
348	ds_param = doc.createElement(GSXML.PARAM_ELEM);
349	ds_param_list.appendChild(ds_param);
350	ds_param.setAttribute(GSXML.NAME_ATT, "info");
351	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
352	ds_param = doc.createElement(GSXML.PARAM_ELEM);
353	ds_param_list.appendChild(ds_param);
354	ds_param.setAttribute(GSXML.NAME_ATT, "info");
355	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
356	ds_param = doc.createElement(GSXML.PARAM_ELEM);
357	ds_param_list.appendChild(ds_param);
358	ds_param.setAttribute(GSXML.NAME_ATT, "info");
359	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
360
361	if (get_siblings)
362	{
363	ds_param = doc.createElement(GSXML.PARAM_ELEM);
364	ds_param_list.appendChild(ds_param);
365	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
366	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
367	}
368
369	}
370	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
371	{
372	get_structure = true;
373	if (expand_contents)
374	{
375	ds_param = doc.createElement(GSXML.PARAM_ELEM);
376	ds_param_list.appendChild(ds_param);
377	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
378	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
379	}
380	else
381	{
382	// get the info needed for table of contents
383	ds_param = doc.createElement(GSXML.PARAM_ELEM);
384	ds_param_list.appendChild(ds_param);
385	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
386	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
387	ds_param = doc.createElement(GSXML.PARAM_ELEM);
388	ds_param_list.appendChild(ds_param);
389	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
390	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
391	if (get_siblings)
392	{
393	ds_param = doc.createElement(GSXML.PARAM_ELEM);
394	ds_param_list.appendChild(ds_param);
395	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
396	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
397	}
398	}
399	}
400	else
401	{
402	// we dont need any structure
403	}
404
405	boolean has_dummy = false;
406	if (get_structure \|\| get_structure_info)
407	{
408
409	// Build a request to obtain the document structure
410	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
411	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
412	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
413	ds_message.appendChild(ds_request);
414	ds_request.appendChild(ds_param_list);
415
416	// add the node list we created earlier
417	ds_request.appendChild(basic_doc_list);
418
419	// Process the document structure retrieve message
420	Element ds_response_message = (Element) this.mr.process(ds_message);
421	if (processErrorElements(ds_response_message, page_response))
422	{
423	return result;
424	}
425
426	// get the info and print out
427	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
429	path = GSPath.appendLink(path, "nodeStructureInfo");
430	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
431	// get the doc_node bit
432	if (ds_response_struct_info != null)
433	{
434	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
435	}
436	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
437	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
438	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
439	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
440
441	if (ds_response_structure != null)
442	{
443	// add the contents of the structure bit into the_document
444	NodeList structs = ds_response_structure.getChildNodes();
445	for (int i = 0; i < structs.getLength(); i++)
446	{
447	the_document.appendChild(doc.importNode(structs.item(i), true));
448	}
449	}
450	else
451	{
452	// no structure nodes, so put in a dummy doc node
453	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
454	if (document_id != null)
455	{
456	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
457	}
458	else
459	{
460	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
461
462	}
463	the_document.appendChild(doc_node);
464	has_dummy = true;
465	}
466	}
467	else
468	{ // a simple type - we dont have a dummy node for simple
469	// should think about this more
470	// no structure request, so just put in a dummy doc node
471	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
472	if (document_id != null)
473	{
474	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
475	}
476	else
477	{
478	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
479	}
480	the_document.appendChild(doc_node);
481	has_dummy = true;
482	}
483
484	// Build a request to obtain some document metadata
485	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
486	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
487	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
488	dm_message.appendChild(dm_request);
489	// Create a parameter list to specify the required metadata information
490
491	HashSet<String> meta_names = new HashSet<String>();
492	meta_names.add("Title"); // the default
493	if (format_elem != null)
494	{
495	getRequiredMetadataNames(format_elem, meta_names);
496	}
497
498	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
499	if (extraMetaListElem != null)
500	{
501	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
502	for (int i = 0; i < extraMetaList.getLength(); i++)
503	{
504	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
505	}
506	}
507
508	Element dm_param_list = createMetadataParamList(doc,meta_names);
509	if (service_params != null)
510	{
511	GSXML.addParametersToList(dm_param_list, service_params);
512	}
513
514	dm_request.appendChild(dm_param_list);
515
516	// create the doc node list for the metadata request
517	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
518	dm_request.appendChild(dm_doc_list);
519
520	// Add each node from the structure response into the metadata request
521	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
522	for (int i = 0; i < doc_nodes.getLength(); i++)
523	{
524	Element doc_node = (Element) doc_nodes.item(i);
525	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
526
527	// Add the documentNode to the list
528	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
529	if (needSectionContent(params)) {
530	if (doc_node_id.equals(document_id)) {
531	dm_doc_list.appendChild(dm_doc_node);
532	}
533	} else {
534	dm_doc_list.appendChild(dm_doc_node);
535	}
536	//dm_doc_list.appendChild(dm_doc_node);
537	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
538	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
539	if (document_id == null){
540	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
541	}
542
543	}
544	// we also want a metadata request to the top level document to get
545	// assocfilepath - this could be cached too
546	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
547	dm_message.appendChild(doc_meta_request);
548	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
549	if (service_params != null)
550	{
551	GSXML.addParametersToList(doc_meta_param_list, service_params);
552	}
553
554	doc_meta_request.appendChild(doc_meta_param_list);
555	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
556	doc_meta_param_list.appendChild(doc_param);
557	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
558	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
559
560	// create the doc node list for the metadata request
561	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
562	doc_meta_request.appendChild(doc_list);
563
564	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
565	// the node we want is the root document node
566	if (document_id != null)
567	{
568	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
569	}
570	/*else
571	{
572	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
573	// can we assume that href is always a top level doc??
574	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
575	//doc_node.setAttribute("externalURL", has_rl);
576	}*/
577	doc_list.appendChild(doc_node);
578
579	Element dm_response_message = (Element) this.mr.process(dm_message);
580	if (processErrorElements(dm_response_message, page_response))
581	{
582	return result;
583	}
584
585	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
586	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
587
588	// Merge the metadata with the structure information
589	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
590	for (int i = 0; i < doc_nodes.getLength(); i++)
591	{
592	String node_idd = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
593	Node dcNode = GSXML.getNamedElement(dm_response_doc_list, "documentNode", GSXML.NODE_ID_ATT, node_idd);
594	GSXML.mergeMetadataLists(doc_nodes.item(i), dcNode);
595	}
596	// get the top level doc metadata out
597	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
598	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
599	GSXML.mergeMetadataLists(the_document, top_doc_node);
600
601	// do we want doc text content? If not, we are done.
602	if (!get_text) {
603	// don't get text
604	return result;
605	}
606
607	// Build a request to obtain some document content
608	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
609	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
610	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
611	dc_message.appendChild(dc_request);
612
613	// Create a parameter list to specify the request parameters - empty for now
614	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
615	if (service_params != null)
616	{
617	GSXML.addParametersToList(dc_param_list, service_params);
618	}
619
620	dc_request.appendChild(dc_param_list);
621
622	// get the content
623	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
624	if (expand_document)
625	{
626	dc_request.appendChild(dm_doc_list);
627	}
628	else
629	{
630	dc_request.appendChild(basic_doc_list);
631	}
632	Element dc_response_message = (Element) this.mr.process(dc_message);
633
634	if (processErrorElements(dc_response_message, page_response))
635	{
636	return result;
637
638	}
639	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
640
641	if (expand_document)
642	{
643	// Merge the content with the structure information
644	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
645	for (int i = 0; i < doc_nodes.getLength(); i++)
646	{
647	String node_id = ((Element)doc_nodes.item(i)).getAttribute(GSXML.NODE_ID_ATT);
648	//Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), GSXML.NODE_CONTENT_ELEM);
649	Node docNode = GSXML.getNamedElement(dc_response_doc_list, "documentNode", GSXML.NODE_ID_ATT, node_id);
650	Node content = GSXML.getChildByTagName(docNode, GSXML.NODE_CONTENT_ELEM);
651	if (content != null)
652	{
653	if (highlight_query_terms)
654	{
655
656	content = highlightQueryTerms(request, node_id, (Element) content);
657	}
658
659	doc_nodes.item(i).appendChild(doc.importNode(content, true));
660	}
661	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
662	}
663	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
664	Element dummy_node = (Element) doc_nodes.item(0);
665	the_document.removeChild(dummy_node);
666	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
667	NodeList dummy_children = dummy_node.getChildNodes();
668	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
669	{
670	// special case as we don't want more than one metadata list
671	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
672	{
673	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
674	}
675	else
676	{
677	the_document.appendChild(dummy_children.item(i));
678	}
679	}
680	}
681	}
682	else
683	{
684	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
685	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
686	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
687	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
688
689	if (dc_response_doc_content == null)
690	{
691	// no content to add
692	if (dc_response_doc.getAttribute("external").equals("true"))
693	{
694
695	//if (dc_response_doc_external != null)
696	//{
697	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
698
699	the_document.setAttribute("selectedNode", href_id);
700	the_document.setAttribute("external", href_id);
701	}
702	return result;
703	}
704	if (highlight_query_terms)
705	{
706	dc_response_doc.removeChild(dc_response_doc_content);
707
708	dc_response_doc_content = highlightQueryTerms(request, null, dc_response_doc_content);
709	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
710	}
711
712	if (provide_annotations)
713	{
714	String service_selected = (String) params.get(ENRICH_DOC_ARG);
715	if (service_selected != null && service_selected.equals("1"))
716	{
717	// now we can modifiy the response doc if needed
718	String enrich_service = (String) params.get(GSParams.SERVICE);
719	// send a message to the service
720	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
721	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
722	enrich_message.appendChild(enrich_request);
723	// check for parameters
724	HashMap e_service_params = (HashMap) params.get("s1");
725	if (e_service_params != null)
726	{
727	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
728	GSXML.addParametersToList(enrich_pl, e_service_params);
729	enrich_request.appendChild(enrich_pl);
730	}
731	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
732	enrich_request.appendChild(e_doc_list);
733	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
734
735	Node enrich_response = this.mr.process(enrich_message);
736
737	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
738	path = GSPath.createPath(links);
739	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
740
741	}
742	} // if provide_annotations
743
744	// use the returned id rather than the sent one cos there may have
745	// been modifiers such as .pr that are removed.
746	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
747	the_document.setAttribute("selectedNode", modified_doc_id);
748	if (has_dummy)
749	{
750	// change the id if necessary and add the content
751	Element dummy_node = (Element) doc_nodes.item(0);
752
753	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
754	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
755	// hack for simple type
756	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
757	{
758	// we dont want the internal docNode, just want the content and metadata in the document
759	// rethink this!!
760	the_document.removeChild(dummy_node);
761
762	NodeList dummy_children = dummy_node.getChildNodes();
763	//for (int i=0; i<dummy_children.getLength(); i++) {
764	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
765	{
766	// special case as we don't want more than one metadata list
767	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
768	{
769	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
770	}
771	else
772	{
773	the_document.appendChild(dummy_children.item(i));
774	}
775	}
776	}
777
778	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
779	}
780	else
781	{
782	// Merge the document content with the metadata and structure information
783	for (int i = 0; i < doc_nodes.getLength(); i++)
784	{
785	Node dn = doc_nodes.item(i);
786	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
787	if (dn_id.equals(modified_doc_id))
788	{
789	dn.appendChild(doc.importNode(dc_response_doc_content, true));
790	break;
791	}
792	}
793	}
794	}
795	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
796	return result;
797	}
798
799	/**
800	* tell the param class what its arguments are if an action has its own
801	* arguments, this should add them to the params object - particularly
802	* important for args that should not be saved
803	*/
804	public boolean addActionParameters(GSParams params)
805	{
806	params.addParameter(GOTO_PAGE_ARG, false);
807	params.addParameter(ENRICH_DOC_ARG, false);
808	params.addParameter(EXPAND_DOCUMENT_ARG, false);
809	params.addParameter(EXPAND_CONTENTS_ARG, false);
810	params.addParameter(REALISTIC_BOOK_ARG, false);
811
812	return true;
813	}
814
815	private boolean needSectionContent(HashMap<String, Serializable> params) {
816	String document_id = (String) params.get(GSParams.DOCUMENT);
817	String ilt = (String) params.get(GSParams.INLINE_TEMPLATE);
818	String iltPrefix = "<xsl:template match=\"/\"><text><xsl:for-each select=\"/page/pageResponse/document//documentNode[@nodeID =";
819	if (ilt != null && ilt.startsWith(iltPrefix) && document_id != null) {
820	return true;
821	}
822
823	return false;
824	}
825	/**
826	* this method gets the collection description, the format info, the list of
827	* enrich services, etc - stuff that is needed for the page, but is the same
828	* whatever the query is - should be cached
829	*/
830	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
831	{
832	Document doc = page_response.getOwnerDocument();
833
834	// create a message to process - contains requests for the collection
835	// description, the format element, the enrich services on offer
836	// these could all be cached
837	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
838	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
839	// the format request - ignore for now, where does this request go to??
840	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
841	info_message.appendChild(format_request);
842
843	// the enrich_services request - only do this if provide_annotations is true
844
845	if (provide_annotations)
846	{
847	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
848	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
849	info_message.appendChild(enrich_services_request);
850	}
851
852	Element info_response = (Element) this.mr.process(info_message);
853
854	// the collection is the first response
855	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
856	Element format_resp = (Element) responses.item(0);
857
858	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
859	if (format_elem != null)
860	{
861	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
862	if (global_format_elem != null)
863	{
864	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
865	}
866
867	// set the format type
868	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
869	page_response.appendChild(doc.importNode(format_elem, true));
870	}
871
872	if (provide_annotations)
873	{
874	Element services_resp = (Element) responses.item(1);
875
876	// a new message for the mr
877	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
878	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
879	boolean service_found = false;
880	for (int j = 0; j < e_services.getLength(); j++)
881	{
882	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
883	{
884	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
885	enrich_message.appendChild(s);
886	service_found = true;
887	}
888	}
889	if (service_found)
890	{
891	Element enrich_response = (Element) this.mr.process(enrich_message);
892
893	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
894	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
895	for (int i = 0; i < e_responses.getLength(); i++)
896	{
897	Element e_resp = (Element) e_responses.item(i);
898	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
899	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
900	service_list.appendChild(e_service);
901	}
902	page_response.appendChild(service_list);
903	}
904	} // if provide_annotations
905	return true;
906
907	}
908
909	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
910	{
911	Document doc = basic_doc_list.getOwnerDocument();
912
913	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
914	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
915	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
916	ds_message.appendChild(ds_request);
917
918	// Create a parameter list to specify the required structure information
919	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
920	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
921	ds_param_list.appendChild(ds_param);
922	ds_param.setAttribute(GSXML.NAME_ATT, "info");
923	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
924
925	ds_request.appendChild(ds_param_list);
926
927	// add the node list we created earlier
928	ds_request.appendChild(basic_doc_list);
929
930	// Process the document structure retrieve message
931	Element ds_response_message = (Element) this.mr.process(ds_message);
932	if (processErrorElements(ds_response_message, page_response))
933	{
934	return null;
935	}
936
937	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
938	String path = GSPath.createPath(links);
939	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
940	if (info_elem == null) {
941	return null;
942	}
943	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
944	if (doctype_elem != null)
945	{
946	String doc_type = doctype_elem.getAttribute("value");
947	return doc_type;
948	}
949	return null;
950	}
951
952	// Recursive method to set the docType, nodeType and nodeID attributes of each docNode
953	// The docType remains constant as in parameter document_type
954	// The nodeID for the first (root) docNode is already set. For all children, the rootNode id
955	// is updated to be <parent-id>.<num-child>, where the first parent-id is rootNode id.
956	// The nodeType is root if rootNode, internal if there are children and leaf if no children
957	protected void insertDocNodeAttributes(Element docNode, String document_type, String id) {
958
959	boolean isRoot = false;
960	if(id == null) { // rootNode, get the root nodeID to work with recursively
961	id = docNode.getAttribute(GSXML.NODE_ID_ATT);
962	isRoot = true;
963	} else { // for all but the root node, need to still set the nodeID
964	docNode.setAttribute(GSXML.NODE_ID_ATT, id);
965	}
966
967	docNode.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
968
969	NodeList docNodes = GSXML.getChildrenByTagName(docNode, GSXML.DOC_NODE_ELEM);
970	if(docNodes.getLength() > 0) {
971	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_INTERNAL);
972	for(int i = 0; i < docNodes.getLength(); i++) {
973	Element childDocNode = (Element)docNodes.item(i);
974
975	// work out the child docNode's nodeID based on current id
976	String nodeID = id + "." + (i+1);
977	insertDocNodeAttributes(childDocNode, document_type, nodeID); //recursion step
978	}
979	} else {
980	docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_LEAF);
981	}
982
983	// rootNode's nodeType is a special case: it's "root", not "leaf" or "internal"
984	if(isRoot) docNode.setAttribute(GSXML.NODE_TYPE_ATT, GSXML.NODE_TYPE_ROOT);
985
986	}
987
988	/** run the XSLT transform which converts from doc.xml format to our internal document format */
989	protected Element transformArchiveToDocument(Element section) {
990
991	String stylesheet_filename = GSFile.stylesheetFile(GlobalProperties.getGSDL3Home(), (String) this.config_params.get(GSConstants.SITE_NAME), "", (String) this.config_params.get(GSConstants.INTERFACE_NAME), (ArrayList<String>) this.config_params.get(GSConstants.BASE_INTERFACES), "archive2document.xsl");
992	if (stylesheet_filename == null) {
993	logger.error("Couldn't find stylesheet archive2document.xsl");
994	return section;
995	}
996
997	Document stylesheet_doc = XMLConverter.getDOM(new File(stylesheet_filename));
998	if (stylesheet_doc == null) {
999	logger.error("Couldn't load in stylesheet "+stylesheet_filename);
1000	return section;
1001	}
1002
1003	Document section_doc = XMLConverter.newDOM();
1004	section_doc.appendChild(section_doc.importNode(section, true));
1005	Node result = this.transformer.transform(stylesheet_doc, section_doc);
1006	logger.debug("transform result = "+XMLConverter.getPrettyString(result));
1007
1008	Element new_element;
1009	if (result.getNodeType() == Node.DOCUMENT_NODE) {
1010	new_element = ((Document) result).getDocumentElement();
1011	} else {
1012	new_element = (Element) result;
1013	}
1014
1015
1016	return new_element;
1017
1018	}
1019
1020
1021	/**
1022	* this involves a bit of a hack to get the equivalent query terms - has to
1023	* requery the query service - uses the last selected service name. (if it
1024	* ends in query). should this action do the query or should it send a
1025	* message to the query action? but that will involve lots of extra stuff.
1026	* also doesn't handle phrases properly - just highlights all the terms
1027	* found in the text.
1028	*/
1029	protected Element highlightQueryTerms(Element request, String current_node_id, Element dc_response_doc_content)
1030	{
1031	Document doc = request.getOwnerDocument();
1032
1033	// do the query again to get term info
1034	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1035	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
1036
1037	HashMap previous_params = (HashMap) params.get("p");
1038	if (previous_params == null)
1039	{
1040	return dc_response_doc_content;
1041	}
1042	String service_name = (String) previous_params.get(GSParams.SERVICE);
1043	if (service_name == null \|\| !service_name.endsWith("Query"))
1044	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
1045	logger.debug("invalid service, not doing highlighting");
1046	return dc_response_doc_content;
1047	}
1048	String collection = (String) params.get(GSParams.COLLECTION);
1049	UserContext userContext = new UserContext(request);
1050	String to = GSPath.appendLink(collection, service_name);
1051
1052	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
1053	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1054	mr_query_message.appendChild(mr_query_request);
1055
1056	// paramList
1057	HashMap service_params = (HashMap) params.get("s1");
1058
1059	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1060	GSXML.addParametersToList(query_param_list, service_params);
1061	if (current_node_id != null) {
1062	GSXML.addParameterToList(query_param_list, "hldocOID", current_node_id);
1063	} else {
1064	GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
1065	}
1066	mr_query_request.appendChild(query_param_list);
1067	// do the query
1068	Element mr_query_response = (Element) this.mr.process(mr_query_message);
1069	String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
1070	Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
1071	// For SOLR, the above query may come back with a nodeContent element, which is the hldocOID section content, with search terms marked up. We send it back to the documnetContentRetrieve service so that resolveTextMacros can be applied, and it can be properly encased in documentNode etc elements
1072	if (highlighted_Node != null)
1073	{
1074	// Build a request to process highlighted text
1075
1076	Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
1077	to = GSPath.appendLink(collection, "DocumentContentRetrieve");
1078	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1079	hl_message.appendChild(dc_request);
1080
1081	// Create a parameter list to specify the request parameters - empty for now
1082	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1083	dc_request.appendChild(dc_param_list);
1084
1085	// get the content
1086	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
1087	dc_request.appendChild(doc_list);
1088	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
1089	doc_list.appendChild(current_doc);
1090	current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
1091	//Append highlighted content to request for processing
1092	dc_request.appendChild(doc.importNode(highlighted_Node, true));
1093	Element hl_response_message = (Element) this.mr.process(hl_message);
1094
1095	//Get results
1096	NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
1097	Element content = (Element) contentList.item(0);
1098	return content;
1099	}
1100	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
1101	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
1102	if (query_term_list_element == null)
1103	{
1104	// no term info
1105	logger.error("No query term information.\n");
1106	return dc_response_doc_content;
1107	}
1108
1109	String content = GSXML.getNodeText(dc_response_doc_content);
1110
1111	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
1112	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
1113
1114	HashSet<String> query_term_variants = new HashSet<String>();
1115	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
1116	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
1117	{
1118	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
1119	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
1120	{
1121	for (int i = 0; i < terms_nodelist.getLength(); i++)
1122	{
1123	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
1124	String termValueU = null;
1125	String termValueL = null;
1126
1127	if (termValue.length() > 1)
1128	{
1129	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
1130	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
1131	}
1132	else
1133	{
1134	termValueU = termValue.substring(0, 1).toUpperCase();
1135	termValueL = termValue.substring(0, 1).toLowerCase();
1136	}
1137
1138	query_term_variants.add(termValueU);
1139	query_term_variants.add(termValueL);
1140	}
1141	}
1142	}
1143	else
1144	{
1145	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
1146	{
1147	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
1148	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
1149	for (int j = 0; j < equivalent_terms.length; j++)
1150	{
1151	query_term_variants.add(equivalent_terms[j]);
1152	}
1153	}
1154	}
1155
1156	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
1157
1158	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
1159	String performed_query = GSXML.getNodeText(query_element) + " ";
1160
1161	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1162	int term_start = 0;
1163	boolean in_term = false;
1164	boolean in_phrase = false;
1165	for (int i = 0; i < performed_query.length(); i++)
1166	{
1167	char character = performed_query.charAt(i);
1168	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
1169
1170	// Has a query term just started?
1171	if (in_term == false && is_character_letter_or_digit == true)
1172	{
1173	in_term = true;
1174	term_start = i;
1175	}
1176
1177	// Or has a term just finished?
1178	else if (in_term == true && is_character_letter_or_digit == false)
1179	{
1180	in_term = false;
1181	String term = performed_query.substring(term_start, i);
1182
1183	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
1184	if (term_element != null)
1185	{
1186
1187	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
1188
1189	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
1190	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
1191	{
1192	String termValueU = null;
1193	String termValueL = null;
1194
1195	if (term.length() > 1)
1196	{
1197	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1198	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1199	}
1200	else
1201	{
1202	termValueU = term.substring(0, 1).toUpperCase();
1203	termValueL = term.substring(0, 1).toLowerCase();
1204	}
1205
1206	phrase_query_p_term_x_variants.add(termValueU);
1207	phrase_query_p_term_x_variants.add(termValueL);
1208	}
1209	else
1210	{
1211	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1212	{
1213	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1214	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
1215	for (int k = 0; k < term_equivalent_terms.length; k++)
1216	{
1217	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1218	}
1219	}
1220	}
1221	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
1222
1223	if (in_phrase == false)
1224	{
1225	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1226	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1227	}
1228	}
1229	}
1230	// Watch for phrases (surrounded by quotes)
1231	if (character == '\"')
1232	{
1233	// Has a phrase just started?
1234	if (in_phrase == false)
1235	{
1236	in_phrase = true;
1237	}
1238	// Or has a phrase just finished?
1239	else if (in_phrase == true)
1240	{
1241	in_phrase = false;
1242	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1243	}
1244
1245	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1246	}
1247	}
1248
1249	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1250	}
1251
1252	/**
1253	* Highlights query terms in a piece of text.
1254	*/
1255	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1256	{
1257	// Convert the content string to an array of characters for speed
1258	char[] content_characters = new char[content.length()];
1259	content.getChars(0, content.length(), content_characters, 0);
1260
1261	// Now skim through the content, identifying word matches
1262	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1263	int word_start = 0;
1264	boolean in_word = false;
1265	boolean preceding_word_matched = false;
1266	boolean inTag = false;
1267	for (int i = 0; i < content_characters.length; i++)
1268	{
1269	//We don't want to find words inside HTML tags
1270	if (content_characters[i] == '<')
1271	{
1272	inTag = true;
1273	continue;
1274	}
1275	else if (inTag && content_characters[i] == '>')
1276	{
1277	inTag = false;
1278	}
1279	else if (inTag)
1280	{
1281	continue;
1282	}
1283
1284	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1285
1286	// Has a word just started?
1287	if (in_word == false && is_character_letter_or_digit == true)
1288	{
1289	in_word = true;
1290	word_start = i;
1291	}
1292
1293	// Or has a word just finished?
1294	else if (in_word == true && is_character_letter_or_digit == false)
1295	{
1296	in_word = false;
1297
1298	// Check if the word matches any of the query term equivalents
1299	String word = new String(content_characters, word_start, (i - word_start));
1300	if (query_term_variants.contains(word))
1301	{
1302	// We have found a matching word, so remember its location
1303	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1304	preceding_word_matched = true;
1305	}
1306	else
1307	{
1308	preceding_word_matched = false;
1309	}
1310	}
1311	}
1312
1313	// Don't forget the last word...
1314	if (in_word == true)
1315	{
1316	// Check if the word matches any of the query term equivalents
1317	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1318	if (query_term_variants.contains(word))
1319	{
1320	// We have found a matching word, so remember its location
1321	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1322	}
1323	}
1324
1325	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1326	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1327
1328	// Deal with phrases now
1329	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1330	for (int i = 0; i < word_matches.size(); i++)
1331	{
1332	WordMatch word_match = word_matches.get(i);
1333
1334	// See if any partial phrase matches are extended by this word
1335	if (word_match.preceding_word_matched)
1336	{
1337	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1338	{
1339	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1340	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1341	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1342	if (phrase_query_p_term_x_variants.contains(word_match.word))
1343	{
1344	partial_phrase_match.num_words_matched++;
1345
1346	// Has a complete phrase match occurred?
1347	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1348	{
1349	// Check for overlaps by looking at the previous highlight range
1350	if (!highlight_end_positions.isEmpty())
1351	{
1352	int last_highlight_index = highlight_end_positions.size() - 1;
1353	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1354	if (last_highlight_end > partial_phrase_match.start_position)
1355	{
1356	// There is an overlap, so remove the previous phrase match
1357	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1358	highlight_end_positions.remove(last_highlight_index);
1359	partial_phrase_match.start_position = last_highlight_start;
1360	}
1361	}
1362
1363	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1364	highlight_end_positions.add(new Integer(word_match.end_position));
1365	}
1366	// No, but add the partial match back into the list for next time
1367	else
1368	{
1369	partial_phrase_matches.add(partial_phrase_match);
1370	}
1371	}
1372	}
1373	}
1374	else
1375	{
1376	partial_phrase_matches.clear();
1377	}
1378
1379	// See if this word is at the start of any of the phrases
1380	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1381	{
1382	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1383	if (phrase_query_p_term_variants_list.size()>0) {
1384	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1385	if (phrase_query_p_term_1_variants.contains(word_match.word))
1386	{
1387	// If this phrase is just one word long, we have a complete match
1388	if (phrase_query_p_term_variants_list.size() == 1)
1389	{
1390	highlight_start_positions.add(new Integer(word_match.start_position));
1391	highlight_end_positions.add(new Integer(word_match.end_position));
1392	}
1393	// Otherwise we have the start of a potential phrase match
1394	else
1395	{
1396	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1397	}
1398	}
1399	}
1400	}
1401	}
1402
1403	// Now add the annotation tags into the document at the correct points
1404	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1405
1406	int last_wrote = 0;
1407	for (int i = 0; i < highlight_start_positions.size(); i++)
1408	{
1409	int highlight_start = highlight_start_positions.get(i).intValue();
1410	int highlight_end = highlight_end_positions.get(i).intValue();
1411
1412	// Print anything before the highlight range
1413	if (last_wrote < highlight_start)
1414	{
1415	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1416	content_element.appendChild(doc.createTextNode(preceding_text));
1417	}
1418
1419	// Print the highlight text, annotated
1420	if (highlight_end > last_wrote)
1421	{
1422	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1423	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1424	annotation_element.setAttribute("type", "query_term");
1425	content_element.appendChild(annotation_element);
1426	last_wrote = highlight_end;
1427	}
1428	}
1429
1430	// Finish off any unwritten text
1431	if (last_wrote < content_characters.length)
1432	{
1433	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1434	content_element.appendChild(doc.createTextNode(remaining_text));
1435	}
1436	return content_element;
1437	}
1438
1439	static private class WordMatch
1440	{
1441	public String word;
1442	public int start_position;
1443	public int end_position;
1444	public boolean preceding_word_matched;
1445
1446	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1447	{
1448	this.word = word;
1449	this.start_position = start_position;
1450	this.end_position = end_position;
1451	this.preceding_word_matched = preceding_word_matched;
1452	}
1453	}
1454
1455	static private class PartialPhraseMatch
1456	{
1457	public int start_position;
1458	public int query_phrase_number;
1459	public int num_words_matched;
1460
1461	public PartialPhraseMatch(int start_position, int query_phrase_number)
1462	{
1463	this.start_position = start_position;
1464	this.query_phrase_number = query_phrase_number;
1465	this.num_words_matched = 1;
1466	}
1467	}
1468	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: