Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25985

Last change on this file since 25985 was 25985, checked in by sjm84, 12 years ago
All of the actions that use format statements will now merge in the global format statement
Property svn:keywords set to `Author Date Id Revision`
File size: 42.4 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37	import java.io.Serializable;
38
39	import org.apache.log4j.*;
40
41	/** Action class for retrieving Documents via the message router */
42	public class DocumentAction extends Action
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47	// this is used to specify that the sibling nodes of a selected one should be obtained
48	public static final String SIBLING_ARG = "sib";
49	public static final String GOTO_PAGE_ARG = "gp";
50	public static final String ENRICH_DOC_ARG = "end";
51	public static final String EXPAND_DOCUMENT_ARG = "ed";
52	public static final String EXPAND_CONTENTS_ARG = "ec";
53	public static final String REALISTIC_BOOK_ARG = "book";
54
55	/**
56	* if this is set to true, when a document is displayed, any annotation type
57	* services (enrich) will be offered to the user as well
58	*/
59	protected boolean provide_annotations = false;
60
61	protected boolean highlight_query_terms = false;
62
63	public boolean configure()
64	{
65	super.configure();
66	String highlight = (String) config_params.get("highlightQueryTerms");
67	if (highlight != null && highlight.equals("true"))
68	{
69	highlight_query_terms = true;
70	}
71	String annotate = (String) config_params.get("displayAnnotationService");
72	if (annotate != null && annotate.equals("true"))
73	{
74	provide_annotations = true;
75	}
76	return true;
77	}
78
79	public Node process(Node message_node)
80	{
81	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83	Element message = this.converter.nodeToElement(message_node);
84
85	// the response
86	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88	result.appendChild(page_response);
89
90	// get the request - assume only one
91	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
93	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
94
95	// just in case there are some that need to get passed to the services
96	HashMap service_params = (HashMap) params.get("s0");
97
98	String collection = (String) params.get(GSParams.COLLECTION);
99	String document_id = (String) params.get(GSParams.DOCUMENT);
100	if (document_id != null && document_id.equals(""))
101	{
102	document_id = null;
103	}
104	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
105	if (href != null && href.equals(""))
106	{
107	href = null;
108	}
109	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110	if (document_id == null && href == null)
111	{
112	logger.error("no document specified!");
113	return result;
114	}
115	if (rl != null && rl.equals("0"))
116	{
117	// this is a true external link, we should have been directed to a different page or action
118	logger.error("rl value was 0, shouldn't get here");
119	return result;
120	}
121	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
122	if (document_type != null && document_type.equals(""))
123	{
124	//document_type = "hierarchy";
125	document_type = null; // we'll get it later if not already specified
126	}
127	//whether to retrieve siblings or not
128	boolean get_siblings = false;
129	String sibs = (String) params.get(SIBLING_ARG);
130	if (sibs != null && sibs.equals("1"))
131	{
132	get_siblings = true;
133	}
134
135	String doc_id_modifier = "";
136	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
137	if (sibling_num != null && !sibling_num.equals(""))
138	{
139	// we have to modify the doc name
140	doc_id_modifier = "." + sibling_num + ".ss";
141	}
142
143	boolean expand_document = false;
144	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
145	if (ed_arg != null && ed_arg.equals("1"))
146	{
147	expand_document = true;
148	}
149
150	boolean expand_contents = false;
151	if (expand_document)
152	{ // we always expand the contents with the text
153	expand_contents = true;
154	}
155	else
156	{
157	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
158	if (ec_arg != null && ec_arg.equals("1"))
159	{
160	expand_contents = true;
161	}
162	}
163
164	UserContext userContext = new UserContext(request);
165
166	//append site metadata
167	addSiteMetadata(page_response, userContext);
168	addInterfaceOptions(page_response);
169
170	// get the additional data needed for the page
171	getBackgroundData(page_response, collection, userContext);
172	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
173
174	// the_document is where all the doc info - structure and metadata etc
175	// is added into, to be returned in the page
176	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
177	page_response.appendChild(the_document);
178
179	// create a basic doc list containing the current node
180	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
181	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
182	basic_doc_list.appendChild(current_doc);
183	if (document_id != null)
184	{
185	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
186	}
187	else
188	{
189	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
190	// do we need this??
191	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
192	}
193
194	if (document_type == null)
195	{
196	logger.error("getting document type");
197	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
198	logger.error("new doc type = " + document_type);
199	}
200	if (document_type != null)
201	{
202	// set the doctype from the cgi arg or from the server as an attribute
203	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
204	}
205	else
206	{
207	logger.error("doctype is null!!!***********");
208	}
209
210	// Create a parameter list to specify the required structure information
211	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
212
213	if (service_params != null)
214	{
215	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
216	}
217
218	Element ds_param = null;
219	boolean get_structure = false;
220	boolean get_structure_info = false;
221	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
222	{
223	get_structure_info = true;
224
225	if (expand_contents)
226	{
227	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
228	ds_param_list.appendChild(ds_param);
229	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
230	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
231	}
232
233	// get the info needed for paged naviagtion
234	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
235	ds_param_list.appendChild(ds_param);
236	ds_param.setAttribute(GSXML.NAME_ATT, "info");
237	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
238	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
239	ds_param_list.appendChild(ds_param);
240	ds_param.setAttribute(GSXML.NAME_ATT, "info");
241	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
242	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
243	ds_param_list.appendChild(ds_param);
244	ds_param.setAttribute(GSXML.NAME_ATT, "info");
245	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
246
247	if (get_siblings)
248	{
249	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
250	ds_param_list.appendChild(ds_param);
251	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
252	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
253	}
254
255	}
256	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\|document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY) )
257	{
258	get_structure = true;
259	if (expand_contents)
260	{
261	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
262	ds_param_list.appendChild(ds_param);
263	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
264	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
265	}
266	else
267	{
268	// get the info needed for table of contents
269	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
270	ds_param_list.appendChild(ds_param);
271	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
272	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
273	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
274	ds_param_list.appendChild(ds_param);
275	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
276	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
277	if (get_siblings)
278	{
279	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
280	ds_param_list.appendChild(ds_param);
281	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
282	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
283	}
284	}
285	}
286	else
287	{
288	// we dont need any structure
289	}
290
291	boolean has_dummy = false;
292	if (get_structure \|\| get_structure_info)
293	{
294
295	// Build a request to obtain the document structure
296	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
297	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
298	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
299	ds_message.appendChild(ds_request);
300	ds_request.appendChild(ds_param_list);
301
302	// add the node list we created earlier
303	ds_request.appendChild(basic_doc_list);
304
305	// Process the document structure retrieve message
306	Element ds_response_message = (Element) this.mr.process(ds_message);
307	if (processErrorElements(ds_response_message, page_response))
308	{
309	return result;
310	}
311
312	// get the info and print out
313	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
314	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
315	path = GSPath.appendLink(path, "nodeStructureInfo");
316	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
317	// get the doc_node bit
318	if (ds_response_struct_info != null)
319	{
320	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
321	}
322	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
323	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
324	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
325	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
326
327	if (ds_response_structure != null)
328	{
329	// add the contents of the structure bit into the_document
330	NodeList structs = ds_response_structure.getChildNodes();
331	for (int i = 0; i < structs.getLength(); i++)
332	{
333	the_document.appendChild(this.doc.importNode(structs.item(i), true));
334	}
335	}
336	else
337	{
338	// no structure nodes, so put in a dummy doc node
339	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
340	if (document_id != null)
341	{
342	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
343	}
344	else
345	{
346	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
347
348	}
349	the_document.appendChild(doc_node);
350	has_dummy = true;
351	}
352	}
353	else
354	{ // a simple type - we dont have a dummy node for simple
355	// should think about this more
356	// no structure request, so just put in a dummy doc node
357	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
358	if (document_id != null)
359	{
360	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
361	}
362	else
363	{
364	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
365	}
366	the_document.appendChild(doc_node);
367	has_dummy = true;
368	}
369
370	// Build a request to obtain some document metadata
371	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
372	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
373	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
374	dm_message.appendChild(dm_request);
375	// Create a parameter list to specify the required metadata information
376
377	HashSet<String> meta_names = new HashSet<String>();
378	meta_names.add("Title"); // the default
379	if (format_elem != null)
380	{
381	getRequiredMetadataNames(format_elem, meta_names);
382	}
383
384	Element dm_param_list = createMetadataParamList(meta_names);
385	if (service_params != null)
386	{
387	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
388	}
389
390	dm_request.appendChild(dm_param_list);
391
392	// create the doc node list for the metadata request
393	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
394	dm_request.appendChild(dm_doc_list);
395
396	// Add each node from the structure response into the metadata request
397	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
398	for (int i = 0; i < doc_nodes.getLength(); i++)
399	{
400	Element doc_node = (Element) doc_nodes.item(i);
401	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
402
403	// Add the documentNode to the list
404	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
405	dm_doc_list.appendChild(dm_doc_node);
406	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
407	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
408	}
409
410	// we also want a metadata request to the top level document to get
411	// assocfilepath - this could be cached too
412	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
413	dm_message.appendChild(doc_meta_request);
414	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
415	if (service_params != null)
416	{
417	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
418	}
419
420	doc_meta_request.appendChild(doc_meta_param_list);
421	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
422	doc_meta_param_list.appendChild(doc_param);
423	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
424	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
425
426	// create the doc node list for the metadata request
427	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
428	doc_meta_request.appendChild(doc_list);
429
430	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
431	// the node we want is the root document node
432	if (document_id != null)
433	{
434	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
435	}
436	else
437	{
438	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
439	// can we assume that href is always a top level doc??
440	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
441	//doc_node.setAttribute("externalURL", has_rl);
442	}
443	doc_list.appendChild(doc_node);
444
445	Element dm_response_message = (Element) this.mr.process(dm_message);
446	if (processErrorElements(dm_response_message, page_response))
447	{
448	return result;
449	}
450
451	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
452	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
453
454	// Merge the metadata with the structure information
455	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
456	for (int i = 0; i < doc_nodes.getLength(); i++)
457	{
458	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
459	}
460	// get the top level doc metadata out
461	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
462	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
463	GSXML.mergeMetadataLists(the_document, top_doc_node);
464
465	// Build a request to obtain some document content
466	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
467	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
468	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
469	dc_message.appendChild(dc_request);
470
471	// Create a parameter list to specify the request parameters - empty for now
472	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
473	if (service_params != null)
474	{
475	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
476	}
477
478	dc_request.appendChild(dc_param_list);
479
480	// get the content
481	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
482	if (expand_document)
483	{
484	dc_request.appendChild(dm_doc_list);
485	}
486	else
487	{
488	dc_request.appendChild(basic_doc_list);
489	}
490	logger.debug("request = " + XMLConverter.getString(dc_message));
491	Element dc_response_message = (Element) this.mr.process(dc_message);
492	if (processErrorElements(dc_response_message, page_response))
493	{
494	return result;
495	}
496
497	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
498
499	if (expand_document)
500	{
501	// Merge the content with the structure information
502	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
503	for (int i = 0; i < doc_nodes.getLength(); i++)
504	{
505	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
506	if (content != null)
507	{
508	if (highlight_query_terms)
509	{
510	content = highlightQueryTerms(request, (Element) content);
511	}
512	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
513	}
514	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
515	}
516	}
517	else
518	{
519	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
520	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
521	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
522	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
523
524	if (dc_response_doc_content == null)
525	{
526	// no content to add
527	if (dc_response_doc.getAttribute("external").equals("true"))
528	{
529
530	//if (dc_response_doc_external != null)
531	//{
532	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
533
534	the_document.setAttribute("selectedNode", href_id);
535	the_document.setAttribute("external", href_id);
536	}
537	return result;
538	}
539	if (highlight_query_terms)
540	{
541	dc_response_doc.removeChild(dc_response_doc_content);
542
543	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
544	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
545	}
546
547	if (provide_annotations)
548	{
549	String service_selected = (String) params.get(ENRICH_DOC_ARG);
550	if (service_selected != null && service_selected.equals("1"))
551	{
552	// now we can modifiy the response doc if needed
553	String enrich_service = (String) params.get(GSParams.SERVICE);
554	// send a message to the service
555	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
556	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
557	enrich_message.appendChild(enrich_request);
558	// check for parameters
559	HashMap e_service_params = (HashMap) params.get("s1");
560	if (e_service_params != null)
561	{
562	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
563	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
564	enrich_request.appendChild(enrich_pl);
565	}
566	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
567	enrich_request.appendChild(e_doc_list);
568	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
569
570	Node enrich_response = this.mr.process(enrich_message);
571
572	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
573	path = GSPath.createPath(links);
574	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
575
576	}
577	} // if provide_annotations
578
579	// use the returned id rather than the sent one cos there may have
580	// been modifiers such as .pr that are removed.
581	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
582	the_document.setAttribute("selectedNode", modified_doc_id);
583	if (has_dummy)
584	{
585	// change the id if necessary and add the content
586	Element dummy_node = (Element) doc_nodes.item(0);
587
588	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
589	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
590	// hack for simple type
591	if (document_type.equals("simple"))
592	{
593	// we dont want the internal docNode, just want the content and metadata in the document
594	// rethink this!!
595	the_document.removeChild(dummy_node);
596
597	NodeList dummy_children = dummy_node.getChildNodes();
598	//for (int i=0; i<dummy_children.getLength(); i++) {
599	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
600	{
601	// special case as we don't want more than one metadata list
602	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
603	{
604	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
605	}
606	else
607	{
608	the_document.appendChild(dummy_children.item(i));
609	}
610	}
611	}
612	}
613	else
614	{
615	// Merge the document content with the metadata and structure information
616	for (int i = 0; i < doc_nodes.getLength(); i++)
617	{
618	Node dn = doc_nodes.item(i);
619	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
620	if (dn_id.equals(modified_doc_id))
621	{
622	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
623	break;
624	}
625	}
626	}
627	}
628	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
629	return result;
630	}
631
632	/**
633	* tell the param class what its arguments are if an action has its own
634	* arguments, this should add them to the params object - particularly
635	* important for args that should not be saved
636	*/
637	public boolean addActionParameters(GSParams params)
638	{
639	params.addParameter(GOTO_PAGE_ARG, false);
640	params.addParameter(ENRICH_DOC_ARG, false);
641	params.addParameter(EXPAND_DOCUMENT_ARG, false);
642	params.addParameter(EXPAND_CONTENTS_ARG, false);
643	params.addParameter(REALISTIC_BOOK_ARG, false);
644
645	return true;
646	}
647
648	/**
649	* this method gets the collection description, the format info, the list of
650	* enrich services, etc - stuff that is needed for the page, but is the same
651	* whatever the query is - should be cached
652	*/
653	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
654	{
655
656	// create a message to process - contains requests for the collection
657	// description, the format element, the enrich services on offer
658	// these could all be cached
659	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
660	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
661	// the format request - ignore for now, where does this request go to??
662	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
663	info_message.appendChild(format_request);
664
665	// the enrich_services request - only do this if provide_annotations is true
666
667	if (provide_annotations)
668	{
669	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
670	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
671	info_message.appendChild(enrich_services_request);
672	}
673
674	Element info_response = (Element) this.mr.process(info_message);
675
676	// the collection is the first response
677	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
678	Element format_resp = (Element) responses.item(0);
679
680	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
681	if (format_elem != null)
682	{
683	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
684	if(global_format_elem != null)
685	{
686	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
687	}
688
689	// set the format type
690	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
691	page_response.appendChild(this.doc.importNode(format_elem, true));
692	}
693
694	if (provide_annotations)
695	{
696	Element services_resp = (Element) responses.item(1);
697
698	// a new message for the mr
699	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
700	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
701	boolean service_found = false;
702	for (int j = 0; j < e_services.getLength(); j++)
703	{
704	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
705	{
706	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
707	enrich_message.appendChild(s);
708	service_found = true;
709	}
710	}
711	if (service_found)
712	{
713	Element enrich_response = (Element) this.mr.process(enrich_message);
714
715	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
716	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
717	for (int i = 0; i < e_responses.getLength(); i++)
718	{
719	Element e_resp = (Element) e_responses.item(i);
720	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
721	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
722	service_list.appendChild(e_service);
723	}
724	page_response.appendChild(service_list);
725	}
726	} // if provide_annotations
727	return true;
728
729	}
730
731	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
732	{
733	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
734	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
735	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
736	ds_message.appendChild(ds_request);
737
738	// Create a parameter list to specify the required structure information
739	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
740	Element ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
741	ds_param_list.appendChild(ds_param);
742	ds_param.setAttribute(GSXML.NAME_ATT, "info");
743	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
744
745	ds_request.appendChild(ds_param_list);
746
747	// add the node list we created earlier
748	ds_request.appendChild(basic_doc_list);
749
750	// Process the document structure retrieve message
751	Element ds_response_message = (Element) this.mr.process(ds_message);
752	if (processErrorElements(ds_response_message, page_response))
753	{
754	return null;
755	}
756
757	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
758	String path = GSPath.createPath(links);
759	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
760	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
761	if (doctype_elem != null)
762	{
763	String doc_type = doctype_elem.getAttribute("value");
764	return doc_type;
765	}
766	return null;
767	}
768
769	/**
770	* this involves a bit of a hack to get the equivalent query terms - has to
771	* requery the query service - uses the last selected service name. (if it
772	* ends in query). should this action do the query or should it send a
773	* message to the query action? but that will involve lots of extra stuff.
774	* also doesn't handle phrases properly - just highlights all the terms
775	* found in the text.
776	*/
777	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
778	{
779	// do the query again to get term info
780	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
781	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
782
783	HashMap previous_params = (HashMap) params.get("p");
784	if (previous_params == null)
785	{
786	return dc_response_doc_content;
787	}
788	String service_name = (String) previous_params.get(GSParams.SERVICE);
789	if (service_name == null \|\| !service_name.endsWith("Query"))
790	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
791	logger.debug("invalid service, not doing highlighting");
792	return dc_response_doc_content;
793	}
794	String collection = (String) params.get(GSParams.COLLECTION);
795	UserContext userContext = new UserContext(request);
796	String to = GSPath.appendLink(collection, service_name);
797
798	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
799	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
800	mr_query_message.appendChild(mr_query_request);
801
802	// paramList
803	HashMap service_params = (HashMap) params.get("s1");
804
805	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
806	GSXML.addParametersToList(this.doc, query_param_list, service_params);
807	mr_query_request.appendChild(query_param_list);
808
809	// do the query
810	Element mr_query_response = (Element) this.mr.process(mr_query_message);
811
812	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
813	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
814	if (query_term_list_element == null)
815	{
816	// no term info
817	logger.error("No query term information.\n");
818	return dc_response_doc_content;
819	}
820
821	String content = GSXML.getNodeText(dc_response_doc_content);
822
823	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
824	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
825
826	HashSet<String> query_term_variants = new HashSet<String>();
827	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
828	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
829	{
830	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
831	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
832	{
833	for (int i = 0; i < terms_nodelist.getLength(); i++)
834	{
835	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
836	String termValueU = null;
837	String termValueL = null;
838
839	if (termValue.length() > 1)
840	{
841	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
842	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
843	}
844	else
845	{
846	termValueU = termValue.substring(0, 1).toUpperCase();
847	termValueL = termValue.substring(0, 1).toLowerCase();
848	}
849
850	query_term_variants.add(termValueU);
851	query_term_variants.add(termValueL);
852	}
853	}
854	}
855	else
856	{
857	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
858	{
859	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
860	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
861	for (int j = 0; j < equivalent_terms.length; j++)
862	{
863	query_term_variants.add(equivalent_terms[j]);
864	}
865	}
866	}
867
868	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
869
870	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
871	String performed_query = GSXML.getNodeText(query_element) + " ";
872
873	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
874	int term_start = 0;
875	boolean in_term = false;
876	boolean in_phrase = false;
877	for (int i = 0; i < performed_query.length(); i++)
878	{
879	char character = performed_query.charAt(i);
880	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
881
882	// Has a query term just started?
883	if (in_term == false && is_character_letter_or_digit == true)
884	{
885	in_term = true;
886	term_start = i;
887	}
888
889	// Or has a term just finished?
890	else if (in_term == true && is_character_letter_or_digit == false)
891	{
892	in_term = false;
893	String term = performed_query.substring(term_start, i);
894
895	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
896	if (term_element != null)
897	{
898
899	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
900
901	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
902	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
903	{
904	String termValueU = null;
905	String termValueL = null;
906
907	if (term.length() > 1)
908	{
909	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
910	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
911	}
912	else
913	{
914	termValueU = term.substring(0, 1).toUpperCase();
915	termValueL = term.substring(0, 1).toLowerCase();
916	}
917
918	phrase_query_p_term_x_variants.add(termValueU);
919	phrase_query_p_term_x_variants.add(termValueL);
920	}
921	else
922	{
923	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
924	{
925	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
926	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
927	for (int k = 0; k < term_equivalent_terms.length; k++)
928	{
929	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
930	}
931	}
932	}
933	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
934
935	if (in_phrase == false)
936	{
937	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
938	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
939	}
940	}
941	}
942	// Watch for phrases (surrounded by quotes)
943	if (character == '\"')
944	{
945	// Has a phrase just started?
946	if (in_phrase == false)
947	{
948	in_phrase = true;
949	}
950	// Or has a phrase just finished?
951	else if (in_phrase == true)
952	{
953	in_phrase = false;
954	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
955	}
956
957	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
958	}
959	}
960
961	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
962	}
963
964	/**
965	* Highlights query terms in a piece of text.
966	*/
967	private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
968	{
969	// Convert the content string to an array of characters for speed
970	char[] content_characters = new char[content.length()];
971	content.getChars(0, content.length(), content_characters, 0);
972
973	// Now skim through the content, identifying word matches
974	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
975	int word_start = 0;
976	boolean in_word = false;
977	boolean preceding_word_matched = false;
978	boolean inTag = false;
979	for (int i = 0; i < content_characters.length; i++)
980	{
981	//We don't want to find words inside HTML tags
982	if (content_characters[i] == '<')
983	{
984	inTag = true;
985	continue;
986	}
987	else if (inTag && content_characters[i] == '>')
988	{
989	inTag = false;
990	}
991	else if (inTag)
992	{
993	continue;
994	}
995
996	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
997
998	// Has a word just started?
999	if (in_word == false && is_character_letter_or_digit == true)
1000	{
1001	in_word = true;
1002	word_start = i;
1003	}
1004
1005	// Or has a word just finished?
1006	else if (in_word == true && is_character_letter_or_digit == false)
1007	{
1008	in_word = false;
1009
1010	// Check if the word matches any of the query term equivalents
1011	String word = new String(content_characters, word_start, (i - word_start));
1012	if (query_term_variants.contains(word))
1013	{
1014	// We have found a matching word, so remember its location
1015	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1016	preceding_word_matched = true;
1017	}
1018	else
1019	{
1020	preceding_word_matched = false;
1021	}
1022	}
1023	}
1024
1025	// Don't forget the last word...
1026	if (in_word == true)
1027	{
1028	// Check if the word matches any of the query term equivalents
1029	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1030	if (query_term_variants.contains(word))
1031	{
1032	// We have found a matching word, so remember its location
1033	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1034	}
1035	}
1036
1037	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1038	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1039
1040	// Deal with phrases now
1041	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1042	for (int i = 0; i < word_matches.size(); i++)
1043	{
1044	WordMatch word_match = word_matches.get(i);
1045
1046	// See if any partial phrase matches are extended by this word
1047	if (word_match.preceding_word_matched)
1048	{
1049	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1050	{
1051	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1052	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1053	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1054	if (phrase_query_p_term_x_variants.contains(word_match.word))
1055	{
1056	partial_phrase_match.num_words_matched++;
1057
1058	// Has a complete phrase match occurred?
1059	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1060	{
1061	// Check for overlaps by looking at the previous highlight range
1062	if (!highlight_end_positions.isEmpty())
1063	{
1064	int last_highlight_index = highlight_end_positions.size() - 1;
1065	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1066	if (last_highlight_end > partial_phrase_match.start_position)
1067	{
1068	// There is an overlap, so remove the previous phrase match
1069	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1070	highlight_end_positions.remove(last_highlight_index);
1071	partial_phrase_match.start_position = last_highlight_start;
1072	}
1073	}
1074
1075	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1076	highlight_end_positions.add(new Integer(word_match.end_position));
1077	}
1078	// No, but add the partial match back into the list for next time
1079	else
1080	{
1081	partial_phrase_matches.add(partial_phrase_match);
1082	}
1083	}
1084	}
1085	}
1086	else
1087	{
1088	partial_phrase_matches.clear();
1089	}
1090
1091	// See if this word is at the start of any of the phrases
1092	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1093	{
1094	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1095	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1096	if (phrase_query_p_term_1_variants.contains(word_match.word))
1097	{
1098	// If this phrase is just one word long, we have a complete match
1099	if (phrase_query_p_term_variants_list.size() == 1)
1100	{
1101	highlight_start_positions.add(new Integer(word_match.start_position));
1102	highlight_end_positions.add(new Integer(word_match.end_position));
1103	}
1104	// Otherwise we have the start of a potential phrase match
1105	else
1106	{
1107	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1108	}
1109	}
1110	}
1111	}
1112
1113	// Now add the annotation tags into the document at the correct points
1114	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1115
1116	int last_wrote = 0;
1117	for (int i = 0; i < highlight_start_positions.size(); i++)
1118	{
1119	int highlight_start = highlight_start_positions.get(i).intValue();
1120	int highlight_end = highlight_end_positions.get(i).intValue();
1121
1122	// Print anything before the highlight range
1123	if (last_wrote < highlight_start)
1124	{
1125	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1126	content_element.appendChild(this.doc.createTextNode(preceding_text));
1127	}
1128
1129	// Print the highlight text, annotated
1130	if (highlight_end > last_wrote)
1131	{
1132	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1133	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1134	annotation_element.setAttribute("type", "query_term");
1135	content_element.appendChild(annotation_element);
1136	last_wrote = highlight_end;
1137	}
1138	}
1139
1140	// Finish off any unwritten text
1141	if (last_wrote < content_characters.length)
1142	{
1143	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1144	content_element.appendChild(this.doc.createTextNode(remaining_text));
1145	}
1146
1147	return content_element;
1148	}
1149
1150	static private class WordMatch
1151	{
1152	public String word;
1153	public int start_position;
1154	public int end_position;
1155	public boolean preceding_word_matched;
1156
1157	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1158	{
1159	this.word = word;
1160	this.start_position = start_position;
1161	this.end_position = end_position;
1162	this.preceding_word_matched = preceding_word_matched;
1163	}
1164	}
1165
1166	static private class PartialPhraseMatch
1167	{
1168	public int start_position;
1169	public int query_phrase_number;
1170	public int num_words_matched;
1171
1172	public PartialPhraseMatch(int start_position, int query_phrase_number)
1173	{
1174	this.start_position = start_position;
1175	this.query_phrase_number = query_phrase_number;
1176	this.num_words_matched = 1;
1177	}
1178	}
1179	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: