Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25490

Last change on this file since 25490 was 25355, checked in by sjm84, 12 years ago
Some fixes to the file formatting
Property svn:keywords set to `Author Date Id Revision`
File size: 40.0 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router */
41	public class DocumentAction extends Action
42	{
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50	public static final String EXPAND_DOCUMENT_ARG = "ed";
51	public static final String EXPAND_CONTENTS_ARG = "ec";
52	public static final String REALISTIC_BOOK_ARG = "book";
53
54	/**
55	* if this is set to true, when a document is displayed, any annotation type
56	* services (enrich) will be offered to the user as well
57	*/
58	protected boolean provide_annotations = false;
59
60	protected boolean highlight_query_terms = false;
61
62	public boolean configure()
63	{
64	super.configure();
65	String highlight = (String) config_params.get("highlightQueryTerms");
66	if (highlight != null && highlight.equals("true"))
67	{
68	highlight_query_terms = true;
69	}
70	String annotate = (String) config_params.get("displayAnnotationService");
71	if (annotate != null && annotate.equals("true"))
72	{
73	provide_annotations = true;
74	}
75	return true;
76	}
77
78	public Node process(Node message_node)
79	{
80	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
81
82	Element message = this.converter.nodeToElement(message_node);
83
84	// the response
85	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
86	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
87	result.appendChild(page_response);
88
89	// get the request - assume only one
90	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
91	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
92	HashMap params = GSXML.extractParams(cgi_paramList, false);
93
94	// just in case there are some that need to get passed to the services
95	HashMap service_params = (HashMap) params.get("s0");
96
97	String collection = (String) params.get(GSParams.COLLECTION);
98	String document_id = (String) params.get(GSParams.DOCUMENT);
99	if (document_id != null && document_id.equals(""))
100	{
101	document_id = null;
102	}
103	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
104	if (href != null && href.equals(""))
105	{
106	href = null;
107	}
108	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
109	if (document_id == null && href == null)
110	{
111	logger.error("no document specified!");
112	return result;
113	}
114	if (rl != null && rl.equals("0"))
115	{
116	// this is a true external link, we should have been directed to a different page or action
117	logger.error("rl value was 0, shouldn't get here");
118	return result;
119	}
120	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
121	if (document_type == null \|\| document_type.equals(""))
122	{
123	document_type = "simple";
124	}
125	//whether to retrieve siblings or not
126	boolean get_siblings = false;
127	String sibs = (String) params.get(SIBLING_ARG);
128	if (sibs != null && sibs.equals("1"))
129	{
130	get_siblings = true;
131	}
132
133	String doc_id_modifier = "";
134	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
135	if (sibling_num != null && !sibling_num.equals(""))
136	{
137	// we have to modify the doc name
138	doc_id_modifier = "." + sibling_num + ".ss";
139	}
140
141	boolean expand_document = false;
142	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
143	if (ed_arg != null && ed_arg.equals("1"))
144	{
145	expand_document = true;
146	}
147
148	boolean expand_contents = false;
149	if (expand_document)
150	{ // we always expand the contents with the text
151	expand_contents = true;
152	}
153	else
154	{
155	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
156	if (ec_arg != null && ec_arg.equals("1"))
157	{
158	expand_contents = true;
159	}
160	}
161
162	UserContext userContext = new UserContext(request);
163
164	//append site metadata
165	addSiteMetadata(page_response, userContext);
166	addInterfaceOptions(page_response);
167
168	// get the additional data needed for the page
169	getBackgroundData(page_response, collection, userContext);
170	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
171
172	// the_document is where all the doc info - structure and metadata etc
173	// is added into, to be returned in the page
174	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
175	page_response.appendChild(the_document);
176
177	// set the doctype from the cgi arg as an attribute
178	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
179
180	// create a basic doc list containing the current node
181	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
182	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
183	basic_doc_list.appendChild(current_doc);
184	if (document_id != null)
185	{
186	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
187	}
188	else
189	{
190	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
191	// do we need this??
192	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
193	}
194
195	// Create a parameter list to specify the required structure information
196	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
197
198	if (service_params != null)
199	{
200	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
201	}
202
203	Element ds_param = null;
204	boolean get_structure = false;
205	boolean get_structure_info = false;
206	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
207	{
208	get_structure_info = true;
209
210	if (expand_contents)
211	{
212	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
213	ds_param_list.appendChild(ds_param);
214	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
215	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
216	}
217
218	// get the info needed for paged naviagtion
219	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
220	ds_param_list.appendChild(ds_param);
221	ds_param.setAttribute(GSXML.NAME_ATT, "info");
222	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
223	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
224	ds_param_list.appendChild(ds_param);
225	ds_param.setAttribute(GSXML.NAME_ATT, "info");
226	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
227	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
228	ds_param_list.appendChild(ds_param);
229	ds_param.setAttribute(GSXML.NAME_ATT, "info");
230	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
231
232	if (get_siblings)
233	{
234	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
235	ds_param_list.appendChild(ds_param);
236	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
237	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
238	}
239
240	}
241	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
242	{
243	get_structure = true;
244	if (expand_contents)
245	{
246	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
247	ds_param_list.appendChild(ds_param);
248	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
249	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
250	}
251	else
252	{
253	// get the info needed for table of contents
254	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
255	ds_param_list.appendChild(ds_param);
256	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
257	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
258	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
259	ds_param_list.appendChild(ds_param);
260	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
261	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
262	if (get_siblings)
263	{
264	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
265	ds_param_list.appendChild(ds_param);
266	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
267	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
268	}
269	}
270	}
271	else
272	{
273	// we dont need any structure
274	}
275
276	boolean has_dummy = false;
277	if (get_structure \|\| get_structure_info)
278	{
279
280	// Build a request to obtain the document structure
281	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
283	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
284	ds_message.appendChild(ds_request);
285	ds_request.appendChild(ds_param_list);
286
287	// create a doc_node_list and put in the doc_node that we are interested in
288	ds_request.appendChild(basic_doc_list);
289
290	// Process the document structure retrieve message
291	Element ds_response_message = (Element) this.mr.process(ds_message);
292	if (processErrorElements(ds_response_message, page_response))
293	{
294	return result;
295	}
296
297	// get the info and print out
298	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
299	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
300	path = GSPath.appendLink(path, "nodeStructureInfo");
301	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
302	// get the doc_node bit
303	if (ds_response_struct_info != null)
304	{
305	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
306	}
307	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
308	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
309	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
310	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
311
312	if (ds_response_structure != null)
313	{
314	// add the contents of the structure bit into the_document
315	NodeList structs = ds_response_structure.getChildNodes();
316	for (int i = 0; i < structs.getLength(); i++)
317	{
318	the_document.appendChild(this.doc.importNode(structs.item(i), true));
319	}
320	}
321	else
322	{
323	// no structure nodes, so put in a dummy doc node
324	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
325	if (document_id != null)
326	{
327	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
328	}
329	else
330	{
331	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
332
333	}
334	the_document.appendChild(doc_node);
335	has_dummy = true;
336	}
337	}
338	else
339	{ // a simple type - we dont have a dummy node for simple
340	// should think about this more
341	// no structure request, so just put in a dummy doc node
342	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
343	if (document_id != null)
344	{
345	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
346	}
347	else
348	{
349	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
350	}
351	the_document.appendChild(doc_node);
352	has_dummy = true;
353	}
354
355	// Build a request to obtain some document metadata
356	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
357	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
358	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
359	dm_message.appendChild(dm_request);
360	// Create a parameter list to specify the required metadata information
361
362	HashSet meta_names = new HashSet();
363	meta_names.add("Title"); // the default
364	if (format_elem != null)
365	{
366	getRequiredMetadataNames(format_elem, meta_names);
367	}
368
369	Element dm_param_list = createMetadataParamList(meta_names);
370	if (service_params != null)
371	{
372	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
373	}
374
375	dm_request.appendChild(dm_param_list);
376
377	// create the doc node list for the metadata request
378	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
379	dm_request.appendChild(dm_doc_list);
380
381	// Add each node from the structure response into the metadata request
382	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
383	for (int i = 0; i < doc_nodes.getLength(); i++)
384	{
385	Element doc_node = (Element) doc_nodes.item(i);
386	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
387
388	// Add the documentNode to the list
389	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
390	dm_doc_list.appendChild(dm_doc_node);
391	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
392	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
393	}
394
395	// we also want a metadata request to the top level document to get
396	// assocfilepath - this could be cached too
397	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
398	dm_message.appendChild(doc_meta_request);
399	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
400	if (service_params != null)
401	{
402	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
403	}
404
405	doc_meta_request.appendChild(doc_meta_param_list);
406	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
407	doc_meta_param_list.appendChild(doc_param);
408	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
409	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
410
411	// create the doc node list for the metadata request
412	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
413	doc_meta_request.appendChild(doc_list);
414
415	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
416	// the node we want is the root document node
417	if (document_id != null)
418	{
419	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
420	}
421	else
422	{
423	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
424	// can we assume that href is always a top level doc??
425	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
426	//doc_node.setAttribute("externalURL", has_rl);
427	}
428	doc_list.appendChild(doc_node);
429
430	Element dm_response_message = (Element) this.mr.process(dm_message);
431	if (processErrorElements(dm_response_message, page_response))
432	{
433	return result;
434	}
435
436	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
437	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
438
439	// Merge the metadata with the structure information
440	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
441	for (int i = 0; i < doc_nodes.getLength(); i++)
442	{
443	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
444	}
445	// get the top level doc metadata out
446	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
447	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
448	GSXML.mergeMetadataLists(the_document, top_doc_node);
449
450	// Build a request to obtain some document content
451	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
452	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
453	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
454	dc_message.appendChild(dc_request);
455
456	// Create a parameter list to specify the request parameters - empty for now
457	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
458	if (service_params != null)
459	{
460	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
461	}
462
463	dc_request.appendChild(dc_param_list);
464
465	// get the content
466	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
467	if (expand_document)
468	{
469	dc_request.appendChild(dm_doc_list);
470	}
471	else
472	{
473	dc_request.appendChild(basic_doc_list);
474	}
475	logger.debug("request = " + converter.getString(dc_message));
476	Element dc_response_message = (Element) this.mr.process(dc_message);
477	if (processErrorElements(dc_response_message, page_response))
478	{
479	return result;
480	}
481
482	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
483
484	if (expand_document)
485	{
486	// Merge the content with the structure information
487	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
488	for (int i = 0; i < doc_nodes.getLength(); i++)
489	{
490	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
491	if (content != null)
492	{
493	if (highlight_query_terms)
494	{
495	content = highlightQueryTerms(request, (Element) content);
496	}
497	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
498	}
499	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
500	}
501	}
502	else
503	{
504	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
505	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
506	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
507	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
508
509	if (dc_response_doc_content == null)
510	{
511	// no content to add
512	if (dc_response_doc.getAttribute("external").equals("true"))
513	{
514
515	//if (dc_response_doc_external != null)
516	//{
517	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
518
519	the_document.setAttribute("selectedNode", href_id);
520	the_document.setAttribute("external", href_id);
521	}
522	return result;
523	}
524	if (highlight_query_terms)
525	{
526	dc_response_doc.removeChild(dc_response_doc_content);
527
528	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
529	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
530	}
531
532	if (provide_annotations)
533	{
534	String service_selected = (String) params.get(ENRICH_DOC_ARG);
535	if (service_selected != null && service_selected.equals("1"))
536	{
537	// now we can modifiy the response doc if needed
538	String enrich_service = (String) params.get(GSParams.SERVICE);
539	// send a message to the service
540	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
541	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
542	enrich_message.appendChild(enrich_request);
543	// check for parameters
544	HashMap e_service_params = (HashMap) params.get("s1");
545	if (e_service_params != null)
546	{
547	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
548	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
549	enrich_request.appendChild(enrich_pl);
550	}
551	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
552	enrich_request.appendChild(e_doc_list);
553	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
554
555	Node enrich_response = this.mr.process(enrich_message);
556
557	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
558	path = GSPath.createPath(links);
559	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
560
561	}
562	} // if provide_annotations
563
564	// use the returned id rather than the sent one cos there may have
565	// been modifiers such as .pr that are removed.
566	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
567	the_document.setAttribute("selectedNode", modified_doc_id);
568	if (has_dummy)
569	{
570	// change the id if necessary and add the content
571	Element dummy_node = (Element) doc_nodes.item(0);
572
573	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
574	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
575	// hack for simple type
576	if (document_type.equals("simple"))
577	{
578	// we dont want the internal docNode, just want the content and metadata in the document
579	// rethink this!!
580	the_document.removeChild(dummy_node);
581
582	NodeList dummy_children = dummy_node.getChildNodes();
583	//for (int i=0; i<dummy_children.getLength(); i++) {
584	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
585	{
586	// special case as we don't want more than one metadata list
587	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
588	{
589	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
590	}
591	else
592	{
593	the_document.appendChild(dummy_children.item(i));
594	}
595	}
596	}
597	}
598	else
599	{
600	// Merge the document content with the metadata and structure information
601	for (int i = 0; i < doc_nodes.getLength(); i++)
602	{
603	Node dn = doc_nodes.item(i);
604	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
605	if (dn_id.equals(modified_doc_id))
606	{
607	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
608	break;
609	}
610	}
611	}
612	}
613	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
614	return result;
615	}
616
617	/**
618	* tell the param class what its arguments are if an action has its own
619	* arguments, this should add them to the params object - particularly
620	* important for args that should not be saved
621	*/
622	public boolean addActionParameters(GSParams params)
623	{
624	params.addParameter(GOTO_PAGE_ARG, false);
625	params.addParameter(ENRICH_DOC_ARG, false);
626	params.addParameter(EXPAND_DOCUMENT_ARG, false);
627	params.addParameter(EXPAND_CONTENTS_ARG, false);
628	params.addParameter(REALISTIC_BOOK_ARG, false);
629
630	return true;
631	}
632
633	/**
634	* this method gets the collection description, the format info, the list of
635	* enrich services, etc - stuff that is needed for the page, but is the same
636	* whatever the query is - should be cached
637	*/
638	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
639	{
640
641	// create a message to process - contains requests for the collection
642	// description, the format element, the enrich services on offer
643	// these could all be cached
644	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
645	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
646	// the format request - ignore for now, where does this request go to??
647	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
648	info_message.appendChild(format_request);
649
650	// the enrich_services request - only do this if provide_annotations is true
651
652	if (provide_annotations)
653	{
654	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
655	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
656	info_message.appendChild(enrich_services_request);
657	}
658
659	Element info_response = (Element) this.mr.process(info_message);
660
661	// the collection is the first response
662	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
663	Element format_resp = (Element) responses.item(0);
664
665	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
666	if (format_elem != null)
667	{
668	logger.debug("doc action found a format statement");
669	// set teh format type
670	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
671	page_response.appendChild(this.doc.importNode(format_elem, true));
672	}
673
674	if (provide_annotations)
675	{
676	Element services_resp = (Element) responses.item(1);
677
678	// a new message for the mr
679	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
680
681	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
682	boolean service_found = false;
683	for (int j = 0; j < e_services.getLength(); j++)
684	{
685	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
686	{
687	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
688	enrich_message.appendChild(s);
689	service_found = true;
690	}
691	}
692	if (service_found)
693	{
694	Element enrich_response = (Element) this.mr.process(enrich_message);
695
696	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
697	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
698	for (int i = 0; i < e_responses.getLength(); i++)
699	{
700	Element e_resp = (Element) e_responses.item(i);
701	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
702	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
703	service_list.appendChild(e_service);
704	}
705	page_response.appendChild(service_list);
706	}
707	} // if provide_annotations
708	return true;
709
710	}
711
712	/**
713	* this involves a bit of a hack to get the equivalent query terms - has to
714	* requery the query service - uses the last selected service name. (if it
715	* ends in query). should this action do the query or should it send a
716	* message to the query action? but that will involve lots of extra stuff.
717	* also doesn't handle phrases properly - just highlights all the terms
718	* found in the text.
719	*/
720	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
721	{
722	// do the query again to get term info
723	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
724	HashMap params = GSXML.extractParams(cgi_param_list, false);
725
726	HashMap previous_params = (HashMap) params.get("p");
727	if (previous_params == null)
728	{
729	return dc_response_doc_content;
730	}
731	String service_name = (String) previous_params.get(GSParams.SERVICE);
732	if (service_name == null \|\| !service_name.endsWith("Query"))
733	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
734	logger.debug("invalid service, not doing highlighting");
735	return dc_response_doc_content;
736	}
737	String collection = (String) params.get(GSParams.COLLECTION);
738	UserContext userContext = new UserContext(request);
739	String to = GSPath.appendLink(collection, service_name);
740
741	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
742	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
743	mr_query_message.appendChild(mr_query_request);
744
745	// paramList
746	HashMap service_params = (HashMap) params.get("s1");
747
748	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
749	GSXML.addParametersToList(this.doc, query_param_list, service_params);
750	mr_query_request.appendChild(query_param_list);
751
752	// do the query
753	Element mr_query_response = (Element) this.mr.process(mr_query_message);
754
755	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
756	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
757	if (query_term_list_element == null)
758	{
759	// no term info
760	logger.error("No query term information.\n");
761	return dc_response_doc_content;
762	}
763
764	String content = GSXML.getNodeText(dc_response_doc_content);
765
766	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
767	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
768
769	HashSet query_term_variants = new HashSet();
770	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
771	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
772	{
773	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
774	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
775	{
776	for (int i = 0; i < terms_nodelist.getLength(); i++)
777	{
778	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
779	String termValueU = null;
780	String termValueL = null;
781
782	if (termValue.length() > 1)
783	{
784	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
785	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
786	}
787	else
788	{
789	termValueU = termValue.substring(0, 1).toUpperCase();
790	termValueL = termValue.substring(0, 1).toLowerCase();
791	}
792
793	query_term_variants.add(termValueU);
794	query_term_variants.add(termValueL);
795	}
796	}
797	}
798	else
799	{
800	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
801	{
802	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
803	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
804	for (int j = 0; j < equivalent_terms.length; j++)
805	{
806	query_term_variants.add(equivalent_terms[j]);
807	}
808	}
809	}
810
811	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
812
813	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
814	String performed_query = GSXML.getNodeText(query_element) + " ";
815
816	ArrayList phrase_query_p_term_variants_list = new ArrayList();
817	int term_start = 0;
818	boolean in_term = false;
819	boolean in_phrase = false;
820	for (int i = 0; i < performed_query.length(); i++)
821	{
822	char character = performed_query.charAt(i);
823	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
824
825	// Has a query term just started?
826	if (in_term == false && is_character_letter_or_digit == true)
827	{
828	in_term = true;
829	term_start = i;
830	}
831
832	// Or has a term just finished?
833	else if (in_term == true && is_character_letter_or_digit == false)
834	{
835	in_term = false;
836	String term = performed_query.substring(term_start, i);
837
838	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
839	if (term_element != null)
840	{
841
842	HashSet phrase_query_p_term_x_variants = new HashSet();
843
844	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
845	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
846	{
847	String termValueU = null;
848	String termValueL = null;
849
850	if (term.length() > 1)
851	{
852	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
853	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
854	}
855	else
856	{
857	termValueU = term.substring(0, 1).toUpperCase();
858	termValueL = term.substring(0, 1).toLowerCase();
859	}
860
861	phrase_query_p_term_x_variants.add(termValueU);
862	phrase_query_p_term_x_variants.add(termValueL);
863	}
864	else
865	{
866	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
867	{
868	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
869	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
870	for (int k = 0; k < term_equivalent_terms.length; k++)
871	{
872	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
873	}
874	}
875	}
876	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
877
878	if (in_phrase == false)
879	{
880	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
881	phrase_query_p_term_variants_list = new ArrayList();
882	}
883	}
884	}
885	// Watch for phrases (surrounded by quotes)
886	if (character == '\"')
887	{
888	// Has a phrase just started?
889	if (in_phrase == false)
890	{
891	in_phrase = true;
892	}
893	// Or has a phrase just finished?
894	else if (in_phrase == true)
895	{
896	in_phrase = false;
897	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
898	}
899
900	phrase_query_p_term_variants_list = new ArrayList();
901	}
902	}
903
904	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
905	}
906
907	/**
908	* Highlights query terms in a piece of text.
909	*/
910	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
911	{
912	// Convert the content string to an array of characters for speed
913	char[] content_characters = new char[content.length()];
914	content.getChars(0, content.length(), content_characters, 0);
915
916	// Now skim through the content, identifying word matches
917	ArrayList word_matches = new ArrayList();
918	int word_start = 0;
919	boolean in_word = false;
920	boolean preceding_word_matched = false;
921	boolean inTag = false;
922	for (int i = 0; i < content_characters.length; i++)
923	{
924	//We don't want to find words inside HTML tags
925	if (content_characters[i] == '<')
926	{
927	inTag = true;
928	continue;
929	}
930	else if (inTag && content_characters[i] == '>')
931	{
932	inTag = false;
933	}
934	else if (inTag)
935	{
936	continue;
937	}
938
939	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
940
941	// Has a word just started?
942	if (in_word == false && is_character_letter_or_digit == true)
943	{
944	in_word = true;
945	word_start = i;
946	}
947
948	// Or has a word just finished?
949	else if (in_word == true && is_character_letter_or_digit == false)
950	{
951	in_word = false;
952
953	// Check if the word matches any of the query term equivalents
954	String word = new String(content_characters, word_start, (i - word_start));
955	if (query_term_variants.contains(word))
956	{
957	// We have found a matching word, so remember its location
958	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
959	preceding_word_matched = true;
960	}
961	else
962	{
963	preceding_word_matched = false;
964	}
965	}
966	}
967
968	// Don't forget the last word...
969	if (in_word == true)
970	{
971	// Check if the word matches any of the query term equivalents
972	String word = new String(content_characters, word_start, (content_characters.length - word_start));
973	if (query_term_variants.contains(word))
974	{
975	// We have found a matching word, so remember its location
976	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
977	}
978	}
979
980	ArrayList highlight_start_positions = new ArrayList();
981	ArrayList highlight_end_positions = new ArrayList();
982
983	// Deal with phrases now
984	ArrayList partial_phrase_matches = new ArrayList();
985	for (int i = 0; i < word_matches.size(); i++)
986	{
987	WordMatch word_match = (WordMatch) word_matches.get(i);
988
989	// See if any partial phrase matches are extended by this word
990	if (word_match.preceding_word_matched)
991	{
992	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
993	{
994	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
995	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
996	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
997	if (phrase_query_p_term_x_variants.contains(word_match.word))
998	{
999	partial_phrase_match.num_words_matched++;
1000
1001	// Has a complete phrase match occurred?
1002	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1003	{
1004	// Check for overlaps by looking at the previous highlight range
1005	if (!highlight_end_positions.isEmpty())
1006	{
1007	int last_highlight_index = highlight_end_positions.size() - 1;
1008	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
1009	if (last_highlight_end > partial_phrase_match.start_position)
1010	{
1011	// There is an overlap, so remove the previous phrase match
1012	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
1013	highlight_end_positions.remove(last_highlight_index);
1014	partial_phrase_match.start_position = last_highlight_start;
1015	}
1016	}
1017
1018	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1019	highlight_end_positions.add(new Integer(word_match.end_position));
1020	}
1021	// No, but add the partial match back into the list for next time
1022	else
1023	{
1024	partial_phrase_matches.add(partial_phrase_match);
1025	}
1026	}
1027	}
1028	}
1029	else
1030	{
1031	partial_phrase_matches.clear();
1032	}
1033
1034	// See if this word is at the start of any of the phrases
1035	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1036	{
1037	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1038	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1039	if (phrase_query_p_term_1_variants.contains(word_match.word))
1040	{
1041	// If this phrase is just one word long, we have a complete match
1042	if (phrase_query_p_term_variants_list.size() == 1)
1043	{
1044	highlight_start_positions.add(new Integer(word_match.start_position));
1045	highlight_end_positions.add(new Integer(word_match.end_position));
1046	}
1047	// Otherwise we have the start of a potential phrase match
1048	else
1049	{
1050	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1051	}
1052	}
1053	}
1054	}
1055
1056	// Now add the annotation tags into the document at the correct points
1057	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1058
1059	int last_wrote = 0;
1060	for (int i = 0; i < highlight_start_positions.size(); i++)
1061	{
1062	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1063	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1064
1065	// Print anything before the highlight range
1066	if (last_wrote < highlight_start)
1067	{
1068	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1069	content_element.appendChild(this.doc.createTextNode(preceding_text));
1070	}
1071
1072	// Print the highlight text, annotated
1073	if (highlight_end > last_wrote)
1074	{
1075	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1076	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1077	annotation_element.setAttribute("type", "query_term");
1078	content_element.appendChild(annotation_element);
1079	last_wrote = highlight_end;
1080	}
1081	}
1082
1083	// Finish off any unwritten text
1084	if (last_wrote < content_characters.length)
1085	{
1086	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1087	content_element.appendChild(this.doc.createTextNode(remaining_text));
1088	}
1089
1090	return content_element;
1091	}
1092
1093	static private class WordMatch
1094	{
1095	public String word;
1096	public int start_position;
1097	public int end_position;
1098	public boolean preceding_word_matched;
1099
1100	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1101	{
1102	this.word = word;
1103	this.start_position = start_position;
1104	this.end_position = end_position;
1105	this.preceding_word_matched = preceding_word_matched;
1106	}
1107	}
1108
1109	static private class PartialPhraseMatch
1110	{
1111	public int start_position;
1112	public int query_phrase_number;
1113	public int num_words_matched;
1114
1115	public PartialPhraseMatch(int start_position, int query_phrase_number)
1116	{
1117	this.start_position = start_position;
1118	this.query_phrase_number = query_phrase_number;
1119	this.num_words_matched = 1;
1120	}
1121	}
1122	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: