Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 30553

Last change on this file since 30553 was 30553, checked in by kjdon, 8 years ago
added ability for teh collectionConfig.xml file to carry additional stuff. Can have extraInfo element at the top level (inside collectionConfig). For now this is used to add extra items to the navigation bar (<navigationTab type=external-link
Property svn:keywords set to `Author Date Id Revision`
File size: 51.1 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.Arrays;
35	import java.util.HashMap;
36	import java.util.HashSet;
37	import java.io.File;
38	import java.io.Serializable;
39
40	import org.apache.log4j.*;
41
42	/** Action class for retrieving Documents via the message router */
43	public class DocumentAction extends Action
44	{
45
46	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
47
48	// this is used to specify that the sibling nodes of a selected one should be obtained
49	public static final String SIBLING_ARG = "sib";
50	public static final String GOTO_PAGE_ARG = "gp";
51	public static final String ENRICH_DOC_ARG = "end";
52	public static final String EXPAND_DOCUMENT_ARG = "ed";
53	public static final String EXPAND_CONTENTS_ARG = "ec";
54	public static final String REALISTIC_BOOK_ARG = "book";
55
56	/**
57	* if this is set to true, when a document is displayed, any annotation type
58	* services (enrich) will be offered to the user as well
59	*/
60	protected boolean provide_annotations = false;
61
62	protected boolean highlight_query_terms = false;
63
64	public boolean configure()
65	{
66	super.configure();
67	String highlight = (String) config_params.get("highlightQueryTerms");
68	if (highlight != null && highlight.equals("true"))
69	{
70	highlight_query_terms = true;
71	}
72	String annotate = (String) config_params.get("displayAnnotationService");
73	if (annotate != null && annotate.equals("true"))
74	{
75	provide_annotations = true;
76	}
77	return true;
78	}
79
80	public Node process(Node message_node)
81	{
82	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
83
84	Element message = GSXML.nodeToElement(message_node);
85	Document doc = message.getOwnerDocument();
86
87	// the response
88	Element result = doc.createElement(GSXML.MESSAGE_ELEM);
89	Element page_response = doc.createElement(GSXML.RESPONSE_ELEM);
90	result.appendChild(page_response);
91
92	// get the request - assume only one
93	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
94	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
95	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
96
97	// just in case there are some that need to get passed to the services
98	HashMap service_params = (HashMap) params.get("s0");
99
100	String collection = (String) params.get(GSParams.COLLECTION);
101	String document_id = (String) params.get(GSParams.DOCUMENT);
102	if (document_id != null && document_id.equals(""))
103	{
104	document_id = null;
105	}
106	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
107	if (href != null && href.equals(""))
108	{
109	href = null;
110	}
111	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
112	if (document_id == null && href == null)
113	{
114	logger.error("no document specified!");
115	return result;
116	}
117	if (rl != null && rl.equals("0"))
118	{
119	// this is a true external link, we should have been directed to a different page or action
120	logger.error("rl value was 0, shouldn't get here");
121	return result;
122	}
123
124	String query_terms = (String) params.get("terms");
125	logger.error("terms = "+query_terms);
126	String query = (String) params.get("query");
127	UserContext userContext = new UserContext(request);
128
129	//append site metadata
130	addSiteMetadata(page_response, userContext);
131	addInterfaceOptions(page_response);
132
133	// get the additional data needed for the page
134	getBackgroundData(page_response, collection, userContext);
135	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
136
137	if (format_elem != null) {
138	// lets look for param defaults set in config file
139	NodeList param_defaults = format_elem.getElementsByTagName("paramDefault");
140	for (int i=0; i<param_defaults.getLength(); i++) {
141	Element p = (Element)param_defaults.item(i);
142	String name = p.getAttribute(GSXML.NAME_ATT);
143	if (params.get(name) ==null) {
144	// wasn't set from interface
145	String value = p.getAttribute(GSXML.VALUE_ATT);
146	params.put(name, value );
147	// also add into request param xml so that xslt knows it too
148	GSXML.addParameterToList(cgi_paramList, name, value);
149	}
150	}
151	}
152	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
153	if (document_type != null && document_type.equals(""))
154	{
155	//document_type = "hierarchy";
156	document_type = null; // we'll get it later if not already specified
157	}
158	//whether to retrieve siblings or not
159	boolean get_siblings = false;
160	String sibs = (String) params.get(SIBLING_ARG);
161	if (sibs != null && sibs.equals("1"))
162	{
163	get_siblings = true;
164	}
165
166	String doc_id_modifier = "";
167	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
168	if (sibling_num != null && !sibling_num.equals(""))
169	{
170	// we have to modify the doc name
171	doc_id_modifier = "." + sibling_num + ".ss";
172	}
173
174	boolean expand_document = false;
175	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
176	if (ed_arg != null && ed_arg.equals("1"))
177	{
178	expand_document = true;
179	}
180
181	boolean expand_contents = false;
182	if (expand_document)
183	{ // we always expand the contents with the text
184	expand_contents = true;
185	}
186	else
187	{
188	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
189	if (ec_arg != null && ec_arg.equals("1"))
190	{
191	expand_contents = true;
192	}
193	}
194
195	// UserContext userContext = new UserContext(request);
196
197	// //append site metadata
198	// addSiteMetadata(page_response, userContext);
199	// addInterfaceOptions(page_response);
200
201	// // get the additional data needed for the page
202	// getBackgroundData(page_response, collection, userContext);
203	// Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
204
205	// the_document is where all the doc info - structure and metadata etc
206	// is added into, to be returned in the page
207	Element the_document = doc.createElement(GSXML.DOCUMENT_ELEM);
208	page_response.appendChild(the_document);
209
210	// create a basic doc list containing the current node
211	Element basic_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
212	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
213	basic_doc_list.appendChild(current_doc);
214	if (document_id != null)
215	{
216	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
217	}
218	else
219	{
220	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
221	// do we need this??
222	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
223	}
224
225	if (document_type == null)
226	{
227	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
228	}
229	if (document_type == null)
230	{
231	logger.error("doctype is null!!!***********");
232	document_type = GSXML.DOC_TYPE_SIMPLE;
233	}
234
235	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
236
237
238	// Create a parameter list to specify the required structure information
239	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
240
241	if (service_params != null)
242	{
243	GSXML.addParametersToList(ds_param_list, service_params);
244	}
245
246	Element ds_param = null;
247	boolean get_structure = false;
248	boolean get_structure_info = false;
249	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
250	{
251	get_structure_info = true;
252
253	if (expand_contents)
254	{
255	ds_param = doc.createElement(GSXML.PARAM_ELEM);
256	ds_param_list.appendChild(ds_param);
257	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
258	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
259	}
260
261	// get the info needed for paged naviagtion
262	ds_param = doc.createElement(GSXML.PARAM_ELEM);
263	ds_param_list.appendChild(ds_param);
264	ds_param.setAttribute(GSXML.NAME_ATT, "info");
265	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
266	ds_param = doc.createElement(GSXML.PARAM_ELEM);
267	ds_param_list.appendChild(ds_param);
268	ds_param.setAttribute(GSXML.NAME_ATT, "info");
269	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
270	ds_param = doc.createElement(GSXML.PARAM_ELEM);
271	ds_param_list.appendChild(ds_param);
272	ds_param.setAttribute(GSXML.NAME_ATT, "info");
273	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
274
275	if (get_siblings)
276	{
277	ds_param = doc.createElement(GSXML.PARAM_ELEM);
278	ds_param_list.appendChild(ds_param);
279	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
280	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
281	}
282
283	}
284	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\| document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY))
285	{
286	get_structure = true;
287	if (expand_contents)
288	{
289	ds_param = doc.createElement(GSXML.PARAM_ELEM);
290	ds_param_list.appendChild(ds_param);
291	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
292	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
293	}
294	else
295	{
296	// get the info needed for table of contents
297	ds_param = doc.createElement(GSXML.PARAM_ELEM);
298	ds_param_list.appendChild(ds_param);
299	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
300	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
301	ds_param = doc.createElement(GSXML.PARAM_ELEM);
302	ds_param_list.appendChild(ds_param);
303	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
304	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
305	if (get_siblings)
306	{
307	ds_param = doc.createElement(GSXML.PARAM_ELEM);
308	ds_param_list.appendChild(ds_param);
309	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
310	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
311	}
312	}
313	}
314	else
315	{
316	// we dont need any structure
317	}
318
319	boolean has_dummy = false;
320	if (get_structure \|\| get_structure_info)
321	{
322
323	// Build a request to obtain the document structure
324	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
325	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
326	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
327	ds_message.appendChild(ds_request);
328	ds_request.appendChild(ds_param_list);
329
330	// add the node list we created earlier
331	ds_request.appendChild(basic_doc_list);
332
333	// Process the document structure retrieve message
334	Element ds_response_message = (Element) this.mr.process(ds_message);
335	if (processErrorElements(ds_response_message, page_response))
336	{
337	return result;
338	}
339
340	// get the info and print out
341	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
342	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
343	path = GSPath.appendLink(path, "nodeStructureInfo");
344	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
345	// get the doc_node bit
346	if (ds_response_struct_info != null)
347	{
348	the_document.appendChild(doc.importNode(ds_response_struct_info, true));
349	}
350	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
351	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
352	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
353	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
354
355	if (ds_response_structure != null)
356	{
357	// add the contents of the structure bit into the_document
358	NodeList structs = ds_response_structure.getChildNodes();
359	for (int i = 0; i < structs.getLength(); i++)
360	{
361	the_document.appendChild(doc.importNode(structs.item(i), true));
362	}
363	}
364	else
365	{
366	// no structure nodes, so put in a dummy doc node
367	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
368	if (document_id != null)
369	{
370	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
371	}
372	else
373	{
374	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
375
376	}
377	the_document.appendChild(doc_node);
378	has_dummy = true;
379	}
380	}
381	else
382	{ // a simple type - we dont have a dummy node for simple
383	// should think about this more
384	// no structure request, so just put in a dummy doc node
385	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
386	if (document_id != null)
387	{
388	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
389	}
390	else
391	{
392	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
393	}
394	the_document.appendChild(doc_node);
395	has_dummy = true;
396	}
397
398	// Build a request to obtain some document metadata
399	Element dm_message = doc.createElement(GSXML.MESSAGE_ELEM);
400	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
401	Element dm_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
402	dm_message.appendChild(dm_request);
403	// Create a parameter list to specify the required metadata information
404
405	HashSet<String> meta_names = new HashSet<String>();
406	meta_names.add("Title"); // the default
407	if (format_elem != null)
408	{
409	getRequiredMetadataNames(format_elem, meta_names);
410	}
411
412	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
413	if (extraMetaListElem != null)
414	{
415	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
416	for (int i = 0; i < extraMetaList.getLength(); i++)
417	{
418	meta_names.add(((Element) extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
419	}
420	}
421
422	Element dm_param_list = createMetadataParamList(doc,meta_names);
423	if (service_params != null)
424	{
425	GSXML.addParametersToList(dm_param_list, service_params);
426	}
427
428	dm_request.appendChild(dm_param_list);
429
430	// create the doc node list for the metadata request
431	Element dm_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
432	dm_request.appendChild(dm_doc_list);
433
434	// Add each node from the structure response into the metadata request
435	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
436	for (int i = 0; i < doc_nodes.getLength(); i++)
437	{
438	Element doc_node = (Element) doc_nodes.item(i);
439	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
440
441	// Add the documentNode to the list
442	Element dm_doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
443	dm_doc_list.appendChild(dm_doc_node);
444	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
445	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
446	if (document_id == null){
447	dm_doc_node.setAttribute(GSXML.HREF_ID_ATT, href );
448	}
449
450	}
451
452	// we also want a metadata request to the top level document to get
453	// assocfilepath - this could be cached too
454	Element doc_meta_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
455	dm_message.appendChild(doc_meta_request);
456	Element doc_meta_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
457	if (service_params != null)
458	{
459	GSXML.addParametersToList(doc_meta_param_list, service_params);
460	}
461
462	doc_meta_request.appendChild(doc_meta_param_list);
463	Element doc_param = doc.createElement(GSXML.PARAM_ELEM);
464	doc_meta_param_list.appendChild(doc_param);
465	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
466	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
467
468	// create the doc node list for the metadata request
469	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
470	doc_meta_request.appendChild(doc_list);
471
472	Element doc_node = doc.createElement(GSXML.DOC_NODE_ELEM);
473	// the node we want is the root document node
474	if (document_id != null)
475	{
476	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
477	}
478	/*else
479	{
480	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
481	// can we assume that href is always a top level doc??
482	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
483	//doc_node.setAttribute("externalURL", has_rl);
484	}*/
485	doc_list.appendChild(doc_node);
486
487	Element dm_response_message = (Element) this.mr.process(dm_message);
488	if (processErrorElements(dm_response_message, page_response))
489	{
490	return result;
491	}
492
493	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
494	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
495
496	// Merge the metadata with the structure information
497	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
498	for (int i = 0; i < doc_nodes.getLength(); i++)
499	{
500	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
501	}
502	// get the top level doc metadata out
503	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
504	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
505	GSXML.mergeMetadataLists(the_document, top_doc_node);
506
507	// Build a request to obtain some document content
508	Element dc_message = doc.createElement(GSXML.MESSAGE_ELEM);
509	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
510	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
511	dc_message.appendChild(dc_request);
512
513	// Create a parameter list to specify the request parameters - empty for now
514	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
515	if (service_params != null)
516	{
517	GSXML.addParametersToList(dc_param_list, service_params);
518	}
519
520	dc_request.appendChild(dc_param_list);
521
522	// get the content
523	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
524	if (expand_document)
525	{
526	dc_request.appendChild(dm_doc_list);
527	}
528	else
529	{
530	dc_request.appendChild(basic_doc_list);
531	}
532	logger.debug("request = " + XMLConverter.getString(dc_message));
533	Element dc_response_message = (Element) this.mr.process(dc_message);
534	if (processErrorElements(dc_response_message, page_response))
535	{
536	return result;
537	}
538
539	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
540
541	if (expand_document)
542	{
543	// Merge the content with the structure information
544	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
545	for (int i = 0; i < doc_nodes.getLength(); i++)
546	{
547	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
548	if (content != null)
549	{
550	if (highlight_query_terms)
551	{
552	content = highlightQueryTermsOld(request, (Element) content); // highlightQueryTerms(query_terms, query, request.getOwnerDocument(), (Element) content); //request, (Element) content);
553	}
554	doc_nodes.item(i).appendChild(doc.importNode(content, true));
555	}
556	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
557	}
558	if (has_dummy && document_type.equals(GSXML.DOC_TYPE_SIMPLE)) {
559	Element dummy_node = (Element) doc_nodes.item(0);
560	the_document.removeChild(dummy_node);
561	the_document.setAttribute(GSXML.NODE_ID_ATT, dummy_node.getAttribute(GSXML.NODE_ID_ATT));
562	NodeList dummy_children = dummy_node.getChildNodes();
563	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
564	{
565	// special case as we don't want more than one metadata list
566	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
567	{
568	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
569	}
570	else
571	{
572	the_document.appendChild(dummy_children.item(i));
573	}
574	}
575	}
576	}
577	else
578	{
579	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
580	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
581	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
582	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
583
584	if (dc_response_doc_content == null)
585	{
586	// no content to add
587	if (dc_response_doc.getAttribute("external").equals("true"))
588	{
589
590	//if (dc_response_doc_external != null)
591	//{
592	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
593
594	the_document.setAttribute("selectedNode", href_id);
595	the_document.setAttribute("external", href_id);
596	}
597	return result;
598	}
599	if (highlight_query_terms)
600	{
601	dc_response_doc.removeChild(dc_response_doc_content);
602
603	dc_response_doc_content = highlightQueryTermsOld(request, dc_response_doc_content); //highlightQueryTerms(query_terms, query, request.getOwnerDocument(), dc_response_doc_content); //request, dc_response_doc_content);
604	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
605	}
606
607	if (provide_annotations)
608	{
609	String service_selected = (String) params.get(ENRICH_DOC_ARG);
610	if (service_selected != null && service_selected.equals("1"))
611	{
612	// now we can modifiy the response doc if needed
613	String enrich_service = (String) params.get(GSParams.SERVICE);
614	// send a message to the service
615	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
616	Element enrich_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
617	enrich_message.appendChild(enrich_request);
618	// check for parameters
619	HashMap e_service_params = (HashMap) params.get("s1");
620	if (e_service_params != null)
621	{
622	Element enrich_pl = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
623	GSXML.addParametersToList(enrich_pl, e_service_params);
624	enrich_request.appendChild(enrich_pl);
625	}
626	Element e_doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
627	enrich_request.appendChild(e_doc_list);
628	e_doc_list.appendChild(doc.importNode(dc_response_doc, true));
629
630	Node enrich_response = this.mr.process(enrich_message);
631
632	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
633	path = GSPath.createPath(links);
634	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
635
636	}
637	} // if provide_annotations
638
639	// use the returned id rather than the sent one cos there may have
640	// been modifiers such as .pr that are removed.
641	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
642	the_document.setAttribute("selectedNode", modified_doc_id);
643	if (has_dummy)
644	{
645	// change the id if necessary and add the content
646	Element dummy_node = (Element) doc_nodes.item(0);
647
648	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
649	dummy_node.appendChild(doc.importNode(dc_response_doc_content, true));
650	// hack for simple type
651	if (document_type.equals(GSXML.DOC_TYPE_SIMPLE))
652	{
653	// we dont want the internal docNode, just want the content and metadata in the document
654	// rethink this!!
655	the_document.removeChild(dummy_node);
656
657	NodeList dummy_children = dummy_node.getChildNodes();
658	//for (int i=0; i<dummy_children.getLength(); i++) {
659	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
660	{
661	// special case as we don't want more than one metadata list
662	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
663	{
664	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
665	}
666	else
667	{
668	the_document.appendChild(dummy_children.item(i));
669	}
670	}
671	}
672
673	the_document.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
674	}
675	else
676	{
677	// Merge the document content with the metadata and structure information
678	for (int i = 0; i < doc_nodes.getLength(); i++)
679	{
680	Node dn = doc_nodes.item(i);
681	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
682	if (dn_id.equals(modified_doc_id))
683	{
684	dn.appendChild(doc.importNode(dc_response_doc_content, true));
685	break;
686	}
687	}
688	}
689	}
690	//logger.debug("(DocumentAction) Page:\n" + GSXML.xmlNodeToString(result));
691	return result;
692	}
693
694	/**
695	* tell the param class what its arguments are if an action has its own
696	* arguments, this should add them to the params object - particularly
697	* important for args that should not be saved
698	*/
699	public boolean addActionParameters(GSParams params)
700	{
701	params.addParameter(GOTO_PAGE_ARG, false);
702	params.addParameter(ENRICH_DOC_ARG, false);
703	params.addParameter(EXPAND_DOCUMENT_ARG, false);
704	params.addParameter(EXPAND_CONTENTS_ARG, false);
705	params.addParameter(REALISTIC_BOOK_ARG, false);
706
707	return true;
708	}
709
710	/**
711	* this method gets the collection description, the format info, the list of
712	* enrich services, etc - stuff that is needed for the page, but is the same
713	* whatever the query is - should be cached
714	*/
715	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
716	{
717	Document doc = page_response.getOwnerDocument();
718
719	// create a message to process - contains requests for the collection
720	// description, the format element, the enrich services on offer
721	// these could all be cached
722	Element info_message = doc.createElement(GSXML.MESSAGE_ELEM);
723	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
724	// the format request - ignore for now, where does this request go to??
725	Element format_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
726	info_message.appendChild(format_request);
727
728	// the enrich_services request - only do this if provide_annotations is true
729
730	if (provide_annotations)
731	{
732	Element enrich_services_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
733	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
734	info_message.appendChild(enrich_services_request);
735	}
736
737	Element info_response = (Element) this.mr.process(info_message);
738
739	// the collection is the first response
740	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
741	Element format_resp = (Element) responses.item(0);
742
743	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
744	if (format_elem != null)
745	{
746	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
747	if (global_format_elem != null)
748	{
749	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
750	}
751
752	// set the format type
753	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
754	page_response.appendChild(doc.importNode(format_elem, true));
755	}
756
757	if (provide_annotations)
758	{
759	Element services_resp = (Element) responses.item(1);
760
761	// a new message for the mr
762	Element enrich_message = doc.createElement(GSXML.MESSAGE_ELEM);
763	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
764	boolean service_found = false;
765	for (int j = 0; j < e_services.getLength(); j++)
766	{
767	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
768	{
769	Element s = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
770	enrich_message.appendChild(s);
771	service_found = true;
772	}
773	}
774	if (service_found)
775	{
776	Element enrich_response = (Element) this.mr.process(enrich_message);
777
778	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
779	Element service_list = doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
780	for (int i = 0; i < e_responses.getLength(); i++)
781	{
782	Element e_resp = (Element) e_responses.item(i);
783	Element e_service = (Element) doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
784	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
785	service_list.appendChild(e_service);
786	}
787	page_response.appendChild(service_list);
788	}
789	} // if provide_annotations
790	return true;
791
792	}
793
794	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
795	{
796	Document doc = basic_doc_list.getOwnerDocument();
797
798	Element ds_message = doc.createElement(GSXML.MESSAGE_ELEM);
799	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
800	Element ds_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
801	ds_message.appendChild(ds_request);
802
803	// Create a parameter list to specify the required structure information
804	Element ds_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
805	Element ds_param = doc.createElement(GSXML.PARAM_ELEM);
806	ds_param_list.appendChild(ds_param);
807	ds_param.setAttribute(GSXML.NAME_ATT, "info");
808	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
809
810	ds_request.appendChild(ds_param_list);
811
812	// add the node list we created earlier
813	ds_request.appendChild(basic_doc_list);
814
815	// Process the document structure retrieve message
816	Element ds_response_message = (Element) this.mr.process(ds_message);
817	if (processErrorElements(ds_response_message, page_response))
818	{
819	return null;
820	}
821
822	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
823	String path = GSPath.createPath(links);
824	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
825	if (info_elem == null) {
826	return null;
827	}
828	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
829	if (doctype_elem != null)
830	{
831	String doc_type = doctype_elem.getAttribute("value");
832	return doc_type;
833	}
834	return null;
835	}
836
837	/**
838	* this involves a bit of a hack to get the equivalent query terms - has to
839	* requery the query service - uses the last selected service name. (if it
840	* ends in query). should this action do the query or should it send a
841	* message to the query action? but that will involve lots of extra stuff.
842	* also doesn't handle phrases properly - just highlights all the terms
843	* found in the text.
844	*/
845	protected Element highlightQueryTerms(String terms, String performed_query, Document doc, Element dc_response_doc_content) {
846	logger.error("in highlight, terms = "+terms);
847	if (terms == null \|\| performed_query == null) {
848	return dc_response_doc_content;
849	}
850	HashMap<String, HashSet<String>> term_to_variants_map = new HashMap<String, HashSet<String>>();
851	HashSet<String> query_term_variants = new HashSet<String>();
852
853	// terms in the form snail:snail,SNAILS,Snail;farm:farm,farming,Farming
854	String[] term_list = terms.split(";");
855	for (int i=0; i<term_list.length; i++) {
856	String term_x = term_list[i];
857	int colon_index = term_x.indexOf(';');
858	String main_term;
859	String term_variants;
860	if (colon_index == -1) {
861	main_term = term_x;
862	term_variants = main_term;
863	} else {
864	main_term = term_x.substring(0, colon_index);
865	term_variants = term_x.substring(colon_index+1);
866	}
867	query_term_variants.add(main_term);
868	term_to_variants_map.put(main_term, new HashSet<String>(Arrays.asList(term_variants.split(","))));
869	}
870
871	String content = GSXML.getNodeText(dc_response_doc_content);
872
873	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
874
875	//Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
876	//String performed_query = //GSXML.getNodeText(query_element) + " ";
877
878	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
879	int term_start = 0;
880	boolean in_term = false;
881	boolean in_phrase = false;
882	for (int i = 0; i < performed_query.length(); i++)
883	{
884	char character = performed_query.charAt(i);
885	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
886
887	// Has a query term just started?
888	if (in_term == false && is_character_letter_or_digit == true)
889	{
890	in_term = true;
891	term_start = i;
892	}
893
894	// Or has a term just finished?
895	else if (in_term == true && is_character_letter_or_digit == false)
896	{
897	in_term = false;
898	String term = performed_query.substring(term_start, i);
899	HashSet<String> phrase_query_p_term_x_variants = term_to_variants_map.get(term);
900	// Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
901	// if (term_element != null)
902	// {
903
904	// HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
905
906	// NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
907	// if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
908	// {
909	// String termValueU = null;
910	// String termValueL = null;
911
912	// if (term.length() > 1)
913	// {
914	// termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
915	// termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
916	// }
917	// else
918	// {
919	// termValueU = term.substring(0, 1).toUpperCase();
920	// termValueL = term.substring(0, 1).toLowerCase();
921	// }
922
923	// phrase_query_p_term_x_variants.add(termValueU);
924	// phrase_query_p_term_x_variants.add(termValueL);
925	// }
926	// else
927	// {
928	// for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
929	// {
930	// Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
931	// String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
932	// for (int k = 0; k < term_equivalent_terms.length; k++)
933	// {
934	// phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
935	// }
936	// }
937	// }
938	if (phrase_query_p_term_x_variants != null) {
939	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
940
941	if (in_phrase == false)
942	{
943	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
944	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
945	}
946	}
947	//}
948	}
949	// Watch for phrases (surrounded by quotes)
950	if (character == '\"')
951	{
952	// Has a phrase just started?
953	if (in_phrase == false)
954	{
955	in_phrase = true;
956	}
957	// Or has a phrase just finished?
958	else if (in_phrase == true)
959	{
960	in_phrase = false;
961	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
962	}
963
964	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
965	}
966	}
967
968	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
969	}
970	protected Element highlightQueryTermsOld(Element request, Element dc_response_doc_content)
971	{
972	Document doc = request.getOwnerDocument();
973
974	// do the query again to get term info
975	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
976	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
977
978	HashMap previous_params = (HashMap) params.get("p");
979	if (previous_params == null)
980	{
981	return dc_response_doc_content;
982	}
983	String service_name = (String) previous_params.get(GSParams.SERVICE);
984	if (service_name == null \|\| !service_name.endsWith("Query"))
985	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
986	logger.debug("invalid service, not doing highlighting");
987	return dc_response_doc_content;
988	}
989	String collection = (String) params.get(GSParams.COLLECTION);
990	UserContext userContext = new UserContext(request);
991	String to = GSPath.appendLink(collection, service_name);
992
993	Element mr_query_message = doc.createElement(GSXML.MESSAGE_ELEM);
994	Element mr_query_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
995	mr_query_message.appendChild(mr_query_request);
996
997	// paramList
998	HashMap service_params = (HashMap) params.get("s1");
999
1000	Element query_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1001	GSXML.addParametersToList(query_param_list, service_params);
1002	GSXML.addParameterToList(query_param_list, "hldocOID", (String) params.get(GSParams.DOCUMENT));
1003	mr_query_request.appendChild(query_param_list);
1004
1005	// do the query
1006	Element mr_query_response = (Element) this.mr.process(mr_query_message);
1007
1008	String pathNode = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.NODE_CONTENT_ELEM);
1009	Element highlighted_Node = (Element) GSXML.getNodeByPath(mr_query_response, pathNode);
1010	if (highlighted_Node != null)
1011	{
1012	// Build a request to process highlighted text
1013
1014	Element hl_message = doc.createElement(GSXML.MESSAGE_ELEM);
1015	to = GSPath.appendLink(collection, "DocumentContentRetrieve");
1016	Element dc_request = GSXML.createBasicRequest(doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
1017	hl_message.appendChild(dc_request);
1018
1019	// Create a parameter list to specify the request parameters - empty for now
1020	Element dc_param_list = doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
1021	dc_request.appendChild(dc_param_list);
1022
1023	// get the content
1024	Element doc_list = doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
1025	dc_request.appendChild(doc_list);
1026	Element current_doc = doc.createElement(GSXML.DOC_NODE_ELEM);
1027	doc_list.appendChild(current_doc);
1028	current_doc.setAttribute(GSXML.NODE_ID_ATT, (String) params.get(GSParams.DOCUMENT));
1029	//Append highlighted content to request for processing
1030	dc_request.appendChild(doc.importNode(highlighted_Node, true));
1031
1032	Element hl_response_message = (Element) this.mr.process(hl_message);
1033	//Get results
1034	NodeList contentList = hl_response_message.getElementsByTagName(GSXML.NODE_CONTENT_ELEM);
1035	Element content = (Element) contentList.item(0);
1036	return content;
1037	}
1038
1039	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
1040	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
1041	if (query_term_list_element == null)
1042	{
1043	// no term info
1044	logger.error("No query term information.\n");
1045	return dc_response_doc_content;
1046	}
1047
1048	String content = GSXML.getNodeText(dc_response_doc_content);
1049
1050	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
1051	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
1052
1053	HashSet<String> query_term_variants = new HashSet<String>();
1054	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
1055	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
1056	{
1057	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
1058	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
1059	{
1060	for (int i = 0; i < terms_nodelist.getLength(); i++)
1061	{
1062	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
1063	String termValueU = null;
1064	String termValueL = null;
1065
1066	if (termValue.length() > 1)
1067	{
1068	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
1069	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
1070	}
1071	else
1072	{
1073	termValueU = termValue.substring(0, 1).toUpperCase();
1074	termValueL = termValue.substring(0, 1).toLowerCase();
1075	}
1076
1077	query_term_variants.add(termValueU);
1078	query_term_variants.add(termValueL);
1079	}
1080	}
1081	}
1082	else
1083	{
1084	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
1085	{
1086	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
1087	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
1088	for (int j = 0; j < equivalent_terms.length; j++)
1089	{
1090	query_term_variants.add(equivalent_terms[j]);
1091	}
1092	}
1093	}
1094
1095	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
1096
1097	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
1098	String performed_query = GSXML.getNodeText(query_element) + " ";
1099
1100	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1101	int term_start = 0;
1102	boolean in_term = false;
1103	boolean in_phrase = false;
1104	for (int i = 0; i < performed_query.length(); i++)
1105	{
1106	char character = performed_query.charAt(i);
1107	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
1108
1109	// Has a query term just started?
1110	if (in_term == false && is_character_letter_or_digit == true)
1111	{
1112	in_term = true;
1113	term_start = i;
1114	}
1115
1116	// Or has a term just finished?
1117	else if (in_term == true && is_character_letter_or_digit == false)
1118	{
1119	in_term = false;
1120	String term = performed_query.substring(term_start, i);
1121
1122	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
1123	if (term_element != null)
1124	{
1125
1126	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
1127
1128	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
1129	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
1130	{
1131	String termValueU = null;
1132	String termValueL = null;
1133
1134	if (term.length() > 1)
1135	{
1136	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
1137	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
1138	}
1139	else
1140	{
1141	termValueU = term.substring(0, 1).toUpperCase();
1142	termValueL = term.substring(0, 1).toLowerCase();
1143	}
1144
1145	phrase_query_p_term_x_variants.add(termValueU);
1146	phrase_query_p_term_x_variants.add(termValueL);
1147	}
1148	else
1149	{
1150	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
1151	{
1152	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
1153	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
1154	for (int k = 0; k < term_equivalent_terms.length; k++)
1155	{
1156	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
1157	}
1158	}
1159	}
1160	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
1161
1162	if (in_phrase == false)
1163	{
1164	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1165	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1166	}
1167	}
1168	}
1169	// Watch for phrases (surrounded by quotes)
1170	if (character == '\"')
1171	{
1172	// Has a phrase just started?
1173	if (in_phrase == false)
1174	{
1175	in_phrase = true;
1176	}
1177	// Or has a phrase just finished?
1178	else if (in_phrase == true)
1179	{
1180	in_phrase = false;
1181	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
1182	}
1183
1184	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
1185	}
1186	}
1187
1188	return highlightQueryTermsInternal(doc, content, query_term_variants, phrase_query_term_variants_hierarchy);
1189	}
1190
1191	/**
1192	* Highlights query terms in a piece of text.
1193	*/
1194	private Element highlightQueryTermsInternal(Document doc, String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
1195	{
1196
1197	logger.error("size = "+ query_term_variants.size());
1198	// Convert the content string to an array of characters for speed
1199	char[] content_characters = new char[content.length()];
1200	content.getChars(0, content.length(), content_characters, 0);
1201
1202	// Now skim through the content, identifying word matches
1203	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
1204	int word_start = 0;
1205	boolean in_word = false;
1206	boolean preceding_word_matched = false;
1207	boolean inTag = false;
1208	for (int i = 0; i < content_characters.length; i++)
1209	{
1210	//We don't want to find words inside HTML tags
1211	if (content_characters[i] == '<')
1212	{
1213	inTag = true;
1214	continue;
1215	}
1216	else if (inTag && content_characters[i] == '>')
1217	{
1218	inTag = false;
1219	}
1220	else if (inTag)
1221	{
1222	continue;
1223	}
1224
1225	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1226
1227	// Has a word just started?
1228	if (in_word == false && is_character_letter_or_digit == true)
1229	{
1230	in_word = true;
1231	word_start = i;
1232	}
1233
1234	// Or has a word just finished?
1235	else if (in_word == true && is_character_letter_or_digit == false)
1236	{
1237	in_word = false;
1238
1239	// Check if the word matches any of the query term equivalents
1240	String word = new String(content_characters, word_start, (i - word_start));
1241	if (query_term_variants.contains(word))
1242	{
1243	// We have found a matching word, so remember its location
1244	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1245	preceding_word_matched = true;
1246	}
1247	else
1248	{
1249	preceding_word_matched = false;
1250	}
1251	}
1252	}
1253
1254	// Don't forget the last word...
1255	if (in_word == true)
1256	{
1257	// Check if the word matches any of the query term equivalents
1258	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1259	if (query_term_variants.contains(word))
1260	{
1261	// We have found a matching word, so remember its location
1262	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1263	}
1264	}
1265
1266	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1267	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1268
1269	// Deal with phrases now
1270	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1271	for (int i = 0; i < word_matches.size(); i++)
1272	{
1273	WordMatch word_match = word_matches.get(i);
1274
1275	// See if any partial phrase matches are extended by this word
1276	if (word_match.preceding_word_matched)
1277	{
1278	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1279	{
1280	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1281	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1282	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1283	if (phrase_query_p_term_x_variants.contains(word_match.word))
1284	{
1285	partial_phrase_match.num_words_matched++;
1286
1287	// Has a complete phrase match occurred?
1288	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1289	{
1290	// Check for overlaps by looking at the previous highlight range
1291	if (!highlight_end_positions.isEmpty())
1292	{
1293	int last_highlight_index = highlight_end_positions.size() - 1;
1294	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1295	if (last_highlight_end > partial_phrase_match.start_position)
1296	{
1297	// There is an overlap, so remove the previous phrase match
1298	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1299	highlight_end_positions.remove(last_highlight_index);
1300	partial_phrase_match.start_position = last_highlight_start;
1301	}
1302	}
1303
1304	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1305	highlight_end_positions.add(new Integer(word_match.end_position));
1306	}
1307	// No, but add the partial match back into the list for next time
1308	else
1309	{
1310	partial_phrase_matches.add(partial_phrase_match);
1311	}
1312	}
1313	}
1314	}
1315	else
1316	{
1317	partial_phrase_matches.clear();
1318	}
1319
1320	// See if this word is at the start of any of the phrases
1321	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1322	{
1323	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1324	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1325	if (phrase_query_p_term_1_variants.contains(word_match.word))
1326	{
1327	// If this phrase is just one word long, we have a complete match
1328	if (phrase_query_p_term_variants_list.size() == 1)
1329	{
1330	highlight_start_positions.add(new Integer(word_match.start_position));
1331	highlight_end_positions.add(new Integer(word_match.end_position));
1332	}
1333	// Otherwise we have the start of a potential phrase match
1334	else
1335	{
1336	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1337	}
1338	}
1339	}
1340	}
1341
1342	// Now add the annotation tags into the document at the correct points
1343	Element content_element = doc.createElement(GSXML.NODE_CONTENT_ELEM);
1344
1345	int last_wrote = 0;
1346	for (int i = 0; i < highlight_start_positions.size(); i++)
1347	{
1348	int highlight_start = highlight_start_positions.get(i).intValue();
1349	int highlight_end = highlight_end_positions.get(i).intValue();
1350
1351	// Print anything before the highlight range
1352	if (last_wrote < highlight_start)
1353	{
1354	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1355	content_element.appendChild(doc.createTextNode(preceding_text));
1356	}
1357
1358	// Print the highlight text, annotated
1359	if (highlight_end > last_wrote)
1360	{
1361	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1362	Element annotation_element = GSXML.createTextElement(doc, "annotation", highlight_text);
1363	annotation_element.setAttribute("type", "query_term");
1364	content_element.appendChild(annotation_element);
1365	last_wrote = highlight_end;
1366	}
1367	}
1368
1369	// Finish off any unwritten text
1370	if (last_wrote < content_characters.length)
1371	{
1372	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1373	content_element.appendChild(doc.createTextNode(remaining_text));
1374	}
1375	return content_element;
1376	}
1377
1378	static private class WordMatch
1379	{
1380	public String word;
1381	public int start_position;
1382	public int end_position;
1383	public boolean preceding_word_matched;
1384
1385	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1386	{
1387	this.word = word;
1388	this.start_position = start_position;
1389	this.end_position = end_position;
1390	this.preceding_word_matched = preceding_word_matched;
1391	}
1392	}
1393
1394	static private class PartialPhraseMatch
1395	{
1396	public int start_position;
1397	public int query_phrase_number;
1398	public int num_words_matched;
1399
1400	public PartialPhraseMatch(int start_position, int query_phrase_number)
1401	{
1402	this.start_position = start_position;
1403	this.query_phrase_number = query_phrase_number;
1404	this.num_words_matched = 1;
1405	}
1406	}
1407	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: