Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25305

Last change on this file since 25305 was 25305, checked in by kjdon, 12 years ago
tidying up handling of external links and hrefs that are relative greenstone links
Property svn:keywords set to `Author Date Id Revision`
File size: 40.0 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router */
41	public class DocumentAction extends Action
42	{
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50	public static final String EXPAND_DOCUMENT_ARG = "ed";
51	public static final String EXPAND_CONTENTS_ARG = "ec";
52	public static final String REALISTIC_BOOK_ARG = "book";
53
54	/**
55	* if this is set to true, when a document is displayed, any annotation type
56	* services (enrich) will be offered to the user as well
57	*/
58	protected boolean provide_annotations = false;
59
60	protected boolean highlight_query_terms = false;
61
62	public boolean configure()
63	{
64	super.configure();
65	String highlight = (String) config_params.get("highlightQueryTerms");
66	if (highlight != null && highlight.equals("true"))
67	{
68	highlight_query_terms = true;
69	}
70	String annotate = (String) config_params.get("displayAnnotationService");
71	if (annotate != null && annotate.equals("true"))
72	{
73	provide_annotations = true;
74	}
75	return true;
76	}
77
78	public Node process(Node message_node)
79	{
80	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
81
82	Element message = this.converter.nodeToElement(message_node);
83
84	// the response
85	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
86	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
87	result.appendChild(page_response);
88
89	// get the request - assume only one
90	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
91	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
92	HashMap params = GSXML.extractParams(cgi_paramList, false);
93
94	// just in case there are some that need to get passed to the services
95	HashMap service_params = (HashMap) params.get("s0");
96
97	String collection = (String) params.get(GSParams.COLLECTION);
98	String document_id = (String) params.get(GSParams.DOCUMENT);
99	if (document_id != null && document_id.equals("")) {
100	document_id = null;
101	}
102	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
103	if (href != null && href.equals("")) {
104	href = null;
105	}
106	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
107	if (document_id == null && href == null)
108	{
109	logger.error("no document specified!");
110	return result;
111	}
112	if (rl != null && rl.equals("0")) {
113	// this is a true external link, we should have been directed to a different page or action
114	logger.error("rl value was 0, shouldn't get here");
115	return result;
116	}
117	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
118	if (document_type == null \|\| document_type.equals(""))
119	{
120	document_type = "simple";
121	}
122	//whether to retrieve siblings or not
123	boolean get_siblings = false;
124	String sibs = (String) params.get(SIBLING_ARG);
125	if (sibs != null && sibs.equals("1"))
126	{
127	get_siblings = true;
128	}
129
130	String doc_id_modifier = "";
131	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
132	if (sibling_num != null && !sibling_num.equals(""))
133	{
134	// we have to modify the doc name
135	doc_id_modifier = "." + sibling_num + ".ss";
136	}
137
138	boolean expand_document = false;
139	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
140	if (ed_arg != null && ed_arg.equals("1"))
141	{
142	expand_document = true;
143	}
144
145	boolean expand_contents = false;
146	if (expand_document)
147	{ // we always expand the contents with the text
148	expand_contents = true;
149	}
150	else
151	{
152	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
153	if (ec_arg != null && ec_arg.equals("1"))
154	{
155	expand_contents = true;
156	}
157	}
158
159	UserContext userContext = new UserContext(request);
160
161	//append site metadata
162	addSiteMetadata(page_response, userContext);
163	addInterfaceOptions(page_response);
164
165	// get the additional data needed for the page
166	getBackgroundData(page_response, collection, userContext);
167	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
168
169	// the_document is where all the doc info - structure and metadata etc
170	// is added into, to be returned in the page
171	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
172	page_response.appendChild(the_document);
173
174	// set the doctype from the cgi arg as an attribute
175	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
176
177	// create a basic doc list containing the current node
178	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
179	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
180	basic_doc_list.appendChild(current_doc);
181	if (document_id != null)
182	{
183	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id+doc_id_modifier);
184	}
185	else
186	{
187	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
188	// do we need this??
189	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
190	}
191
192	// Create a parameter list to specify the required structure information
193	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
194
195	if (service_params != null)
196	{
197	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
198	}
199
200	Element ds_param = null;
201	boolean get_structure = false;
202	boolean get_structure_info = false;
203	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
204	{
205	get_structure_info = true;
206
207	if (expand_contents)
208	{
209	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
210	ds_param_list.appendChild(ds_param);
211	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
212	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
213	}
214
215	// get the info needed for paged naviagtion
216	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
217	ds_param_list.appendChild(ds_param);
218	ds_param.setAttribute(GSXML.NAME_ATT, "info");
219	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
220	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
221	ds_param_list.appendChild(ds_param);
222	ds_param.setAttribute(GSXML.NAME_ATT, "info");
223	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
224	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
225	ds_param_list.appendChild(ds_param);
226	ds_param.setAttribute(GSXML.NAME_ATT, "info");
227	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
228
229	if (get_siblings)
230	{
231	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
232	ds_param_list.appendChild(ds_param);
233	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
234	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
235	}
236
237	}
238	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
239	{
240	get_structure = true;
241	if (expand_contents)
242	{
243	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
244	ds_param_list.appendChild(ds_param);
245	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
246	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
247	}
248	else
249	{
250	// get the info needed for table of contents
251	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
252	ds_param_list.appendChild(ds_param);
253	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
254	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
255	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
256	ds_param_list.appendChild(ds_param);
257	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
258	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
259	if (get_siblings)
260	{
261	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
262	ds_param_list.appendChild(ds_param);
263	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
264	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
265	}
266	}
267	}
268	else
269	{
270	// we dont need any structure
271	}
272
273	boolean has_dummy = false;
274	if (get_structure \|\| get_structure_info)
275	{
276
277	// Build a request to obtain the document structure
278	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
279	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
280	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
281	ds_message.appendChild(ds_request);
282	ds_request.appendChild(ds_param_list);
283
284	// create a doc_node_list and put in the doc_node that we are interested in
285	ds_request.appendChild(basic_doc_list);
286
287	// Process the document structure retrieve message
288	Element ds_response_message = (Element) this.mr.process(ds_message);
289	if (processErrorElements(ds_response_message, page_response))
290	{
291	return result;
292	}
293
294	// get the info and print out
295	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
296	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
297	path = GSPath.appendLink(path, "nodeStructureInfo");
298	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
299	// get the doc_node bit
300	if (ds_response_struct_info != null)
301	{
302	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
303	}
304	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
305	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
306	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
307	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
308
309	if (ds_response_structure != null)
310	{
311	// add the contents of the structure bit into the_document
312	NodeList structs = ds_response_structure.getChildNodes();
313	for (int i = 0; i < structs.getLength(); i++)
314	{
315	the_document.appendChild(this.doc.importNode(structs.item(i), true));
316	}
317	}
318	else
319	{
320	// no structure nodes, so put in a dummy doc node
321	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
322	if (document_id != null)
323	{
324	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
325	}
326	else
327	{
328	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
329
330	}
331	the_document.appendChild(doc_node);
332	has_dummy = true;
333	}
334	}
335	else
336	{ // a simple type - we dont have a dummy node for simple
337	// should think about this more
338	// no structure request, so just put in a dummy doc node
339	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
340	if (document_id != null)
341	{
342	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
343	}
344	else
345	{
346	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
347	}
348	the_document.appendChild(doc_node);
349	has_dummy = true;
350	}
351
352	// Build a request to obtain some document metadata
353	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
354	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
355	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
356	dm_message.appendChild(dm_request);
357	// Create a parameter list to specify the required metadata information
358
359	HashSet meta_names = new HashSet();
360	meta_names.add("Title"); // the default
361	if (format_elem != null)
362	{
363	getRequiredMetadataNames(format_elem, meta_names);
364	}
365
366	Element dm_param_list = createMetadataParamList(meta_names);
367	if (service_params != null)
368	{
369	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
370	}
371
372	dm_request.appendChild(dm_param_list);
373
374	// create the doc node list for the metadata request
375	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
376	dm_request.appendChild(dm_doc_list);
377
378	// Add each node from the structure response into the metadata request
379	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
380	for (int i = 0; i < doc_nodes.getLength(); i++)
381	{
382	Element doc_node = (Element) doc_nodes.item(i);
383	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
384
385	// Add the documentNode to the list
386	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
387	dm_doc_list.appendChild(dm_doc_node);
388	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
389	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
390	}
391
392	// we also want a metadata request to the top level document to get
393	// assocfilepath - this could be cached too
394	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
395	dm_message.appendChild(doc_meta_request);
396	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
397	if (service_params != null)
398	{
399	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
400	}
401
402	doc_meta_request.appendChild(doc_meta_param_list);
403	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
404	doc_meta_param_list.appendChild(doc_param);
405	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
406	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
407
408	// create the doc node list for the metadata request
409	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
410	doc_meta_request.appendChild(doc_list);
411
412	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
413	// the node we want is the root document node
414	if (document_id != null)
415	{
416	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
417	}
418	else
419	{
420	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
421	// can we assume that href is always a top level doc??
422	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
423	//doc_node.setAttribute("externalURL", has_rl);
424	}
425	doc_list.appendChild(doc_node);
426
427	Element dm_response_message = (Element) this.mr.process(dm_message);
428	if (processErrorElements(dm_response_message, page_response))
429	{
430	return result;
431	}
432
433	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
434	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
435
436	// Merge the metadata with the structure information
437	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
438	for (int i = 0; i < doc_nodes.getLength(); i++)
439	{
440	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
441	}
442	// get the top level doc metadata out
443	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
444	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
445	GSXML.mergeMetadataLists(the_document, top_doc_node);
446
447	// Build a request to obtain some document content
448	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
449	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
450	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
451	dc_message.appendChild(dc_request);
452
453	// Create a parameter list to specify the request parameters - empty for now
454	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
455	if (service_params != null)
456	{
457	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
458	}
459
460	dc_request.appendChild(dc_param_list);
461
462	// get the content
463	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
464	if (expand_document)
465	{
466	dc_request.appendChild(dm_doc_list);
467	}
468	else
469	{
470	dc_request.appendChild(basic_doc_list);
471	}
472	logger.debug("request = " + converter.getString(dc_message));
473	Element dc_response_message = (Element) this.mr.process(dc_message);
474	if (processErrorElements(dc_response_message, page_response))
475	{
476	return result;
477	}
478
479	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
480
481	if (expand_document)
482	{
483	// Merge the content with the structure information
484	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
485	for (int i = 0; i < doc_nodes.getLength(); i++)
486	{
487	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
488	if (content != null)
489	{
490	if (highlight_query_terms)
491	{
492	content = highlightQueryTerms(request, (Element) content);
493	}
494	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
495	}
496	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
497	}
498	}
499	else
500	{
501	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
502	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
503	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
504	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
505
506	if (dc_response_doc_content == null)
507	{
508	// no content to add
509	if (dc_response_doc.getAttribute("external").equals("true")) {
510
511	//if (dc_response_doc_external != null)
512	//{
513	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
514
515	the_document.setAttribute("selectedNode", href_id);
516	the_document.setAttribute("external", href_id);
517	}
518	return result;
519	}
520	if (highlight_query_terms)
521	{
522	dc_response_doc.removeChild(dc_response_doc_content);
523
524	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
525	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
526	}
527
528	if (provide_annotations)
529	{
530	String service_selected = (String) params.get(ENRICH_DOC_ARG);
531	if (service_selected != null && service_selected.equals("1"))
532	{
533	// now we can modifiy the response doc if needed
534	String enrich_service = (String) params.get(GSParams.SERVICE);
535	// send a message to the service
536	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
537	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
538	enrich_message.appendChild(enrich_request);
539	// check for parameters
540	HashMap e_service_params = (HashMap) params.get("s1");
541	if (e_service_params != null)
542	{
543	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
544	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
545	enrich_request.appendChild(enrich_pl);
546	}
547	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
548	enrich_request.appendChild(e_doc_list);
549	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
550
551	Node enrich_response = this.mr.process(enrich_message);
552
553	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
554	path = GSPath.createPath(links);
555	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
556
557	}
558	} // if provide_annotations
559
560	// use the returned id rather than the sent one cos there may have
561	// been modifiers such as .pr that are removed.
562	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
563	the_document.setAttribute("selectedNode", modified_doc_id);
564	if (has_dummy)
565	{
566	// change the id if necessary and add the content
567	Element dummy_node = (Element) doc_nodes.item(0);
568
569	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
570	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
571	// hack for simple type
572	if (document_type.equals("simple"))
573	{
574	// we dont want the internal docNode, just want the content and metadata in the document
575	// rethink this!!
576	the_document.removeChild(dummy_node);
577
578	NodeList dummy_children = dummy_node.getChildNodes();
579	//for (int i=0; i<dummy_children.getLength(); i++) {
580	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
581	{
582	// special case as we don't want more than one metadata list
583	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
584	{
585	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
586	}
587	else
588	{
589	the_document.appendChild(dummy_children.item(i));
590	}
591	}
592	}
593	}
594	else
595	{
596	// Merge the document content with the metadata and structure information
597	for (int i = 0; i < doc_nodes.getLength(); i++)
598	{
599	Node dn = doc_nodes.item(i);
600	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
601	if (dn_id.equals(modified_doc_id))
602	{
603	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
604	break;
605	}
606	}
607	}
608	}
609	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
610	return result;
611	}
612
613	/**
614	* tell the param class what its arguments are if an action has its own
615	* arguments, this should add them to the params object - particularly
616	* important for args that should not be saved
617	*/
618	public boolean addActionParameters(GSParams params)
619	{
620	params.addParameter(GOTO_PAGE_ARG, false);
621	params.addParameter(ENRICH_DOC_ARG, false);
622	params.addParameter(EXPAND_DOCUMENT_ARG, false);
623	params.addParameter(EXPAND_CONTENTS_ARG, false);
624	params.addParameter(REALISTIC_BOOK_ARG, false);
625
626	return true;
627	}
628
629	/**
630	* this method gets the collection description, the format info, the list of
631	* enrich services, etc - stuff that is needed for the page, but is the same
632	* whatever the query is - should be cached
633	*/
634	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
635	{
636
637	// create a message to process - contains requests for the collection
638	// description, the format element, the enrich services on offer
639	// these could all be cached
640	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
641	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
642	// the format request - ignore for now, where does this request go to??
643	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
644	info_message.appendChild(format_request);
645
646	// the enrich_services request - only do this if provide_annotations is true
647
648	if (provide_annotations)
649	{
650	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
651	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
652	info_message.appendChild(enrich_services_request);
653	}
654
655	Element info_response = (Element) this.mr.process(info_message);
656
657	// the collection is the first response
658	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
659	Element format_resp = (Element) responses.item(0);
660
661	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
662	if (format_elem != null)
663	{
664	logger.debug("doc action found a format statement");
665	// set teh format type
666	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
667	page_response.appendChild(this.doc.importNode(format_elem, true));
668	}
669
670	if (provide_annotations)
671	{
672	Element services_resp = (Element) responses.item(1);
673
674	// a new message for the mr
675	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
676
677	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
678	boolean service_found = false;
679	for (int j = 0; j < e_services.getLength(); j++)
680	{
681	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
682	{
683	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
684	enrich_message.appendChild(s);
685	service_found = true;
686	}
687	}
688	if (service_found)
689	{
690	Element enrich_response = (Element) this.mr.process(enrich_message);
691
692	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
693	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
694	for (int i = 0; i < e_responses.getLength(); i++)
695	{
696	Element e_resp = (Element) e_responses.item(i);
697	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
698	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
699	service_list.appendChild(e_service);
700	}
701	page_response.appendChild(service_list);
702	}
703	} // if provide_annotations
704	return true;
705
706	}
707
708	/**
709	* this involves a bit of a hack to get the equivalent query terms - has to
710	* requery the query service - uses the last selected service name. (if it
711	* ends in query). should this action do the query or should it send a
712	* message to the query action? but that will involve lots of extra stuff.
713	* also doesn't handle phrases properly - just highlights all the terms
714	* found in the text.
715	*/
716	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
717	{
718
719	// do the query again to get term info
720	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
721	HashMap params = GSXML.extractParams(cgi_param_list, false);
722
723	HashMap previous_params = (HashMap) params.get("p");
724	if (previous_params == null)
725	{
726	return dc_response_doc_content;
727	}
728	String service_name = (String) previous_params.get(GSParams.SERVICE);
729	if (service_name == null \|\| !service_name.endsWith("Query"))
730	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
731	logger.debug("invalid service, not doing highlighting");
732	return dc_response_doc_content;
733	}
734	String collection = (String) params.get(GSParams.COLLECTION);
735	UserContext userContext = new UserContext(request);
736	String to = GSPath.appendLink(collection, service_name);
737
738	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
739	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
740	mr_query_message.appendChild(mr_query_request);
741
742	// paramList
743	HashMap service_params = (HashMap) params.get("s1");
744
745	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
746	GSXML.addParametersToList(this.doc, query_param_list, service_params);
747	mr_query_request.appendChild(query_param_list);
748
749	// do the query
750	Element mr_query_response = (Element) this.mr.process(mr_query_message);
751
752	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
753	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
754	if (query_term_list_element == null)
755	{
756	// no term info
757	logger.error("No query term information.\n");
758	return dc_response_doc_content;
759	}
760
761	String content = GSXML.getNodeText(dc_response_doc_content);
762
763	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
764	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
765
766	HashSet query_term_variants = new HashSet();
767	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
768	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
769	{
770	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
771	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
772	{
773	for (int i = 0; i < terms_nodelist.getLength(); i++)
774	{
775	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
776	String termValueU = null;
777	String termValueL = null;
778
779	if (termValue.length() > 1)
780	{
781	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
782	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
783	}
784	else
785	{
786	termValueU = termValue.substring(0, 1).toUpperCase();
787	termValueL = termValue.substring(0, 1).toLowerCase();
788	}
789
790	query_term_variants.add(termValueU);
791	query_term_variants.add(termValueL);
792	}
793	}
794	}
795	else
796	{
797	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
798	{
799	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
800	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
801	for (int j = 0; j < equivalent_terms.length; j++)
802	{
803	query_term_variants.add(equivalent_terms[j]);
804	}
805	}
806	}
807
808	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
809
810	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
811	String performed_query = GSXML.getNodeText(query_element) + " ";
812
813	ArrayList phrase_query_p_term_variants_list = new ArrayList();
814	int term_start = 0;
815	boolean in_term = false;
816	boolean in_phrase = false;
817	for (int i = 0; i < performed_query.length(); i++)
818	{
819	char character = performed_query.charAt(i);
820	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
821
822	// Has a query term just started?
823	if (in_term == false && is_character_letter_or_digit == true)
824	{
825	in_term = true;
826	term_start = i;
827	}
828
829	// Or has a term just finished?
830	else if (in_term == true && is_character_letter_or_digit == false)
831	{
832	in_term = false;
833	String term = performed_query.substring(term_start, i);
834
835	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
836	if (term_element != null)
837	{
838
839	HashSet phrase_query_p_term_x_variants = new HashSet();
840
841	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
842	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
843	{
844	String termValueU = null;
845	String termValueL = null;
846
847	if (term.length() > 1)
848	{
849	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
850	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
851	}
852	else
853	{
854	termValueU = term.substring(0, 1).toUpperCase();
855	termValueL = term.substring(0, 1).toLowerCase();
856	}
857
858	phrase_query_p_term_x_variants.add(termValueU);
859	phrase_query_p_term_x_variants.add(termValueL);
860	}
861	else
862	{
863	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
864	{
865	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
866	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
867	for (int k = 0; k < term_equivalent_terms.length; k++)
868	{
869	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
870	}
871	}
872	}
873	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
874
875	if (in_phrase == false)
876	{
877	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
878	phrase_query_p_term_variants_list = new ArrayList();
879	}
880	}
881	}
882	// Watch for phrases (surrounded by quotes)
883	if (character == '\"')
884	{
885	// Has a phrase just started?
886	if (in_phrase == false)
887	{
888	in_phrase = true;
889	}
890	// Or has a phrase just finished?
891	else if (in_phrase == true)
892	{
893	in_phrase = false;
894	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
895	}
896
897	phrase_query_p_term_variants_list = new ArrayList();
898	}
899	}
900
901	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
902	}
903
904	/**
905	* Highlights query terms in a piece of text.
906	*/
907	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
908	{
909	// Convert the content string to an array of characters for speed
910	char[] content_characters = new char[content.length()];
911	content.getChars(0, content.length(), content_characters, 0);
912
913	// Now skim through the content, identifying word matches
914	ArrayList word_matches = new ArrayList();
915	int word_start = 0;
916	boolean in_word = false;
917	boolean preceding_word_matched = false;
918	boolean inTag = false;
919	for (int i = 0; i < content_characters.length; i++)
920	{
921	//We don't want to find words inside HTML tags
922	if (content_characters[i] == '<')
923	{
924	inTag = true;
925	continue;
926	}
927	else if (inTag && content_characters[i] == '>')
928	{
929	inTag = false;
930	}
931	else if (inTag)
932	{
933	continue;
934	}
935
936	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
937
938	// Has a word just started?
939	if (in_word == false && is_character_letter_or_digit == true)
940	{
941	in_word = true;
942	word_start = i;
943	}
944
945	// Or has a word just finished?
946	else if (in_word == true && is_character_letter_or_digit == false)
947	{
948	in_word = false;
949
950	// Check if the word matches any of the query term equivalents
951	String word = new String(content_characters, word_start, (i - word_start));
952	if (query_term_variants.contains(word))
953	{
954	// We have found a matching word, so remember its location
955	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
956	preceding_word_matched = true;
957	}
958	else
959	{
960	preceding_word_matched = false;
961	}
962	}
963	}
964
965	// Don't forget the last word...
966	if (in_word == true)
967	{
968	// Check if the word matches any of the query term equivalents
969	String word = new String(content_characters, word_start, (content_characters.length - word_start));
970	if (query_term_variants.contains(word))
971	{
972	// We have found a matching word, so remember its location
973	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
974	}
975	}
976
977	ArrayList highlight_start_positions = new ArrayList();
978	ArrayList highlight_end_positions = new ArrayList();
979
980	// Deal with phrases now
981	ArrayList partial_phrase_matches = new ArrayList();
982	for (int i = 0; i < word_matches.size(); i++)
983	{
984	WordMatch word_match = (WordMatch) word_matches.get(i);
985
986	// See if any partial phrase matches are extended by this word
987	if (word_match.preceding_word_matched)
988	{
989	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
990	{
991	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
992	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
993	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
994	if (phrase_query_p_term_x_variants.contains(word_match.word))
995	{
996	partial_phrase_match.num_words_matched++;
997
998	// Has a complete phrase match occurred?
999	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1000	{
1001	// Check for overlaps by looking at the previous highlight range
1002	if (!highlight_end_positions.isEmpty())
1003	{
1004	int last_highlight_index = highlight_end_positions.size() - 1;
1005	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
1006	if (last_highlight_end > partial_phrase_match.start_position)
1007	{
1008	// There is an overlap, so remove the previous phrase match
1009	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
1010	highlight_end_positions.remove(last_highlight_index);
1011	partial_phrase_match.start_position = last_highlight_start;
1012	}
1013	}
1014
1015	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1016	highlight_end_positions.add(new Integer(word_match.end_position));
1017	}
1018	// No, but add the partial match back into the list for next time
1019	else
1020	{
1021	partial_phrase_matches.add(partial_phrase_match);
1022	}
1023	}
1024	}
1025	}
1026	else
1027	{
1028	partial_phrase_matches.clear();
1029	}
1030
1031	// See if this word is at the start of any of the phrases
1032	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1033	{
1034	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1035	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1036	if (phrase_query_p_term_1_variants.contains(word_match.word))
1037	{
1038	// If this phrase is just one word long, we have a complete match
1039	if (phrase_query_p_term_variants_list.size() == 1)
1040	{
1041	highlight_start_positions.add(new Integer(word_match.start_position));
1042	highlight_end_positions.add(new Integer(word_match.end_position));
1043	}
1044	// Otherwise we have the start of a potential phrase match
1045	else
1046	{
1047	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1048	}
1049	}
1050	}
1051	}
1052
1053	// Now add the annotation tags into the document at the correct points
1054	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1055
1056	int last_wrote = 0;
1057	for (int i = 0; i < highlight_start_positions.size(); i++)
1058	{
1059	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1060	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1061
1062	// Print anything before the highlight range
1063	if (last_wrote < highlight_start)
1064	{
1065	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1066	content_element.appendChild(this.doc.createTextNode(preceding_text));
1067	}
1068
1069	// Print the highlight text, annotated
1070	if (highlight_end > last_wrote)
1071	{
1072	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1073	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1074	annotation_element.setAttribute("type", "query_term");
1075	content_element.appendChild(annotation_element);
1076	last_wrote = highlight_end;
1077	}
1078	}
1079
1080	// Finish off any unwritten text
1081	if (last_wrote < content_characters.length)
1082	{
1083	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1084	content_element.appendChild(this.doc.createTextNode(remaining_text));
1085	}
1086
1087	return content_element;
1088	}
1089
1090	static private class WordMatch
1091	{
1092	public String word;
1093	public int start_position;
1094	public int end_position;
1095	public boolean preceding_word_matched;
1096
1097	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1098	{
1099	this.word = word;
1100	this.start_position = start_position;
1101	this.end_position = end_position;
1102	this.preceding_word_matched = preceding_word_matched;
1103	}
1104	}
1105
1106	static private class PartialPhraseMatch
1107	{
1108	public int start_position;
1109	public int query_phrase_number;
1110	public int num_words_matched;
1111
1112	public PartialPhraseMatch(int start_position, int query_phrase_number)
1113	{
1114	this.start_position = start_position;
1115	this.query_phrase_number = query_phrase_number;
1116	this.num_words_matched = 1;
1117	}
1118	}
1119	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: