Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25128

Last change on this file since 25128 was 25128, checked in by sjm84, 12 years ago
Second round of changes adding in the login ability, also interface options are now returned whenever site metadata is returned
Property svn:keywords set to `Author Date Id Revision`
File size: 39.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router */
41	public class DocumentAction extends Action
42	{
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/**
52	* if this is set to true, when a document is displayed, any annotation type
53	* services (enrich) will be offered to the user as well
54	*/
55	protected boolean provide_annotations = false;
56
57	protected boolean highlight_query_terms = false;
58
59	public boolean configure()
60	{
61	super.configure();
62	String highlight = (String) config_params.get("highlightQueryTerms");
63	if (highlight != null && highlight.equals("true"))
64	{
65	highlight_query_terms = true;
66	}
67	String annotate = (String) config_params.get("displayAnnotationService");
68	if (annotate != null && annotate.equals("true"))
69	{
70	provide_annotations = true;
71	}
72	return true;
73	}
74
75	public Node process(Node message_node)
76	{
77	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
78
79	Element message = this.converter.nodeToElement(message_node);
80
81	// the response
82	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
83	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
84	result.appendChild(page_response);
85
86	// get the request - assume only one
87	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
88	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
89	HashMap params = GSXML.extractParams(cgi_paramList, false);
90
91	// just in case there are some that need to get passed to the services
92	HashMap service_params = (HashMap) params.get("s0");
93
94	String has_rl = null;
95	String has_href = null;
96	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
97	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
98	String collection = (String) params.get(GSParams.COLLECTION);
99	UserContext userContext = new UserContext(request);
100	String document_name = (String) params.get(GSParams.DOCUMENT);
101	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals("")))
102	{
103	logger.error("no document specified!");
104	return result;
105	}
106	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
107	if (document_type == null)
108	{
109	document_type = "simple";
110	}
111	//whether to retrieve siblings or not
112	boolean get_siblings = false;
113	String sibs = (String) params.get(SIBLING_ARG);
114	if (sibs != null && sibs.equals("1"))
115	{
116	get_siblings = true;
117	}
118
119	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
120	if (sibling_num != null && !sibling_num.equals(""))
121	{
122	// we have to modify the doc name
123	document_name = document_name + "." + sibling_num + ".ss";
124	}
125
126	boolean expand_document = false;
127	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
128	if (ed_arg != null && ed_arg.equals("1"))
129	{
130	expand_document = true;
131	}
132
133	boolean expand_contents = false;
134	if (expand_document)
135	{ // we always expand the contents with the text
136	expand_contents = true;
137	}
138	else
139	{
140	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
141	if (ec_arg != null && ec_arg.equals("1"))
142	{
143	expand_contents = true;
144	}
145	}
146
147	//append site metadata
148	addSiteMetadata(page_response, userContext);
149	addInterfaceOptions(page_response);
150
151	// get the additional data needed for the page
152	getBackgroundData(page_response, collection, userContext);
153	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
154
155	// the_document is where all the doc info - structure and metadata etc
156	// is added into, to be returned in the page
157	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
158	page_response.appendChild(the_document);
159
160	// set the doctype from the cgi arg as an attribute
161	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
162
163	// create a basic doc list containing the current node
164	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
165	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
166	basic_doc_list.appendChild(current_doc);
167	if (document_name.length() != 0)
168	{
169	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
170	}
171	else if (has_href.length() != 0)
172	{
173	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
174	current_doc.setAttribute("externalURL", has_rl);
175	}
176
177	// Create a parameter list to specify the required structure information
178	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
179
180	if (service_params != null)
181	{
182	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
183	}
184
185	Element ds_param = null;
186	boolean get_structure = false;
187	boolean get_structure_info = false;
188	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
189	{
190	get_structure_info = true;
191
192	if (expand_contents)
193	{
194	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
195	ds_param_list.appendChild(ds_param);
196	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
197	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
198	}
199
200	// get teh info needed for paged naviagtion
201	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
202	ds_param_list.appendChild(ds_param);
203	ds_param.setAttribute(GSXML.NAME_ATT, "info");
204	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
205	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206	ds_param_list.appendChild(ds_param);
207	ds_param.setAttribute(GSXML.NAME_ATT, "info");
208	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
209	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
210	ds_param_list.appendChild(ds_param);
211	ds_param.setAttribute(GSXML.NAME_ATT, "info");
212	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
213
214	if (get_siblings)
215	{
216	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
217	ds_param_list.appendChild(ds_param);
218	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
219	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
220	}
221
222	}
223	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
224	{
225	get_structure = true;
226	if (expand_contents)
227	{
228	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
229	ds_param_list.appendChild(ds_param);
230	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
231	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
232	}
233	else
234	{
235	// get the info needed for table of contents
236	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
237	ds_param_list.appendChild(ds_param);
238	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
239	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
240	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
241	ds_param_list.appendChild(ds_param);
242	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
243	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
244	if (get_siblings)
245	{
246	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
247	ds_param_list.appendChild(ds_param);
248	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
249	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
250	}
251	}
252	}
253	else
254	{
255	// we dont need any structure
256	}
257
258	boolean has_dummy = false;
259	if (get_structure \|\| get_structure_info)
260	{
261
262	// Build a request to obtain the document structure
263	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
264	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
265	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
266	ds_message.appendChild(ds_request);
267	ds_request.appendChild(ds_param_list);
268
269	// create a doc_node_list and put in the doc_node that we are interested in
270	ds_request.appendChild(basic_doc_list);
271
272	// Process the document structure retrieve message
273	Element ds_response_message = (Element) this.mr.process(ds_message);
274	if (processErrorElements(ds_response_message, page_response))
275	{
276	return result;
277	}
278
279	// get the info and print out
280	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
281	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
282	path = GSPath.appendLink(path, "nodeStructureInfo");
283	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
284	// get the doc_node bit
285	if (ds_response_struct_info != null)
286	{
287	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
288	}
289	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
290	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
291	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
292	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
293
294	if (ds_response_structure != null)
295	{
296	// add the contents of the structure bit into the_document
297	NodeList structs = ds_response_structure.getChildNodes();
298	for (int i = 0; i < structs.getLength(); i++)
299	{
300	the_document.appendChild(this.doc.importNode(structs.item(i), true));
301	}
302	}
303	else
304	{
305	// no structure nodes, so put in a dummy doc node
306	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
307	if (document_name.length() != 0)
308	{
309	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
310	}
311	else if (has_href.length() != 0)
312	{
313	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
314	doc_node.setAttribute("externalURL", has_rl);
315	}
316	the_document.appendChild(doc_node);
317	has_dummy = true;
318	}
319	}
320	else
321	{ // a simple type - we dont have a dummy node for simple
322	// should think about this more
323	// no structure request, so just put in a dummy doc node
324	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
325	if (document_name.length() != 0)
326	{
327	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
328	}
329	else if (has_href.length() != 0)
330	{
331	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
332	doc_node.setAttribute("externalURL", has_rl);
333	}
334	the_document.appendChild(doc_node);
335	has_dummy = true;
336	}
337
338	// Build a request to obtain some document metadata
339	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
340	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
341	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
342	dm_message.appendChild(dm_request);
343	// Create a parameter list to specify the required metadata information
344
345	HashSet meta_names = new HashSet();
346	meta_names.add("Title"); // the default
347	if (format_elem != null)
348	{
349	getRequiredMetadataNames(format_elem, meta_names);
350	}
351
352	Element dm_param_list = createMetadataParamList(meta_names);
353	if (service_params != null)
354	{
355	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
356	}
357
358	dm_request.appendChild(dm_param_list);
359
360	// create the doc node list for the metadata request
361	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
362	dm_request.appendChild(dm_doc_list);
363
364	// Add each node from the structure response into the metadata request
365	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
366	for (int i = 0; i < doc_nodes.getLength(); i++)
367	{
368	Element doc_node = (Element) doc_nodes.item(i);
369	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
370
371	// Add the documentNode to the list
372	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
373	dm_doc_list.appendChild(dm_doc_node);
374	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
375	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
376	}
377
378	// we also want a metadata request to the top level document to get
379	// assocfilepath - this could be cached too
380	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
381	dm_message.appendChild(doc_meta_request);
382	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
383	if (service_params != null)
384	{
385	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
386	}
387
388	doc_meta_request.appendChild(doc_meta_param_list);
389	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
390	doc_meta_param_list.appendChild(doc_param);
391	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
392	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
393
394	// create the doc node list for the metadata request
395	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
396	doc_meta_request.appendChild(doc_list);
397
398	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
399	// the node we want is the root document node
400	if (document_name.length() != 0)
401	{
402	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name + ".rt");
403	}
404	else if (has_href.length() != 0)
405	{
406	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href + ".rt");
407	doc_node.setAttribute("externalURL", has_rl);
408	}
409	doc_list.appendChild(doc_node);
410
411	Element dm_response_message = (Element) this.mr.process(dm_message);
412	if (processErrorElements(dm_response_message, page_response))
413	{
414	return result;
415	}
416
417	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
418	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
419
420	// Merge the metadata with the structure information
421	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
422	for (int i = 0; i < doc_nodes.getLength(); i++)
423	{
424	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
425	}
426	// get the top level doc metadata out
427	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
428	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
429	GSXML.mergeMetadataLists(the_document, top_doc_node);
430
431	// Build a request to obtain some document content
432	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
433	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
434	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
435	dc_message.appendChild(dc_request);
436
437	// Create a parameter list to specify the request parameters - empty for now
438	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
439	if (service_params != null)
440	{
441	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
442	}
443
444	dc_request.appendChild(dc_param_list);
445
446	// get the content
447	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
448	if (expand_document)
449	{
450	dc_request.appendChild(dm_doc_list);
451	}
452	else
453	{
454	dc_request.appendChild(basic_doc_list);
455	}
456	logger.debug("request = " + converter.getString(dc_message));
457	Element dc_response_message = (Element) this.mr.process(dc_message);
458	if (processErrorElements(dc_response_message, page_response))
459	{
460	return result;
461	}
462
463	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
464
465	if (expand_document)
466	{
467	// Merge the content with the structure information
468	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
469	for (int i = 0; i < doc_nodes.getLength(); i++)
470	{
471	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
472	if (content != null)
473	{
474	if (highlight_query_terms)
475	{
476	content = highlightQueryTerms(request, (Element) content);
477	}
478	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
479	}
480	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
481	}
482	}
483	else
484	{
485	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
486	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
487	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
488	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
489
490	if (dc_response_doc_content == null)
491	{
492	// no content to add
493	if (dc_response_doc_external != null)
494	{
495	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
496
497	the_document.setAttribute("selectedNode", modified_doc_id);
498	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
499	}
500	return result;
501	}
502	if (highlight_query_terms)
503	{
504	dc_response_doc.removeChild(dc_response_doc_content);
505
506	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
507	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
508	}
509
510	if (provide_annotations)
511	{
512	String service_selected = (String) params.get(ENRICH_DOC_ARG);
513	if (service_selected != null && service_selected.equals("1"))
514	{
515	// now we can modifiy the response doc if needed
516	String enrich_service = (String) params.get(GSParams.SERVICE);
517	// send a message to the service
518	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
519	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
520	enrich_message.appendChild(enrich_request);
521	// check for parameters
522	HashMap e_service_params = (HashMap) params.get("s1");
523	if (e_service_params != null)
524	{
525	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
526	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
527	enrich_request.appendChild(enrich_pl);
528	}
529	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
530	enrich_request.appendChild(e_doc_list);
531	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
532
533	Node enrich_response = this.mr.process(enrich_message);
534
535	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
536	path = GSPath.createPath(links);
537	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
538
539	}
540	} // if provide_annotations
541
542	// use the returned id rather than the sent one cos there may have
543	// been modifiers such as .pr that are removed.
544	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
545	the_document.setAttribute("selectedNode", modified_doc_id);
546	if (has_dummy)
547	{
548	// change the id if necessary and add the content
549	Element dummy_node = (Element) doc_nodes.item(0);
550
551	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
552	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
553	// hack for simple type
554	if (document_type.equals("simple"))
555	{
556	// we dont want the internal docNode, just want the content and metadata in the document
557	// rethink this!!
558	the_document.removeChild(dummy_node);
559
560	NodeList dummy_children = dummy_node.getChildNodes();
561	//for (int i=0; i<dummy_children.getLength(); i++) {
562	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
563	{
564	// special case as we don't want more than one metadata list
565	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
566	{
567	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
568	}
569	else
570	{
571	the_document.appendChild(dummy_children.item(i));
572	}
573	}
574	}
575	}
576	else
577	{
578	// Merge the document content with the metadata and structure information
579	for (int i = 0; i < doc_nodes.getLength(); i++)
580	{
581	Node dn = doc_nodes.item(i);
582	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
583	if (dn_id.equals(modified_doc_id))
584	{
585	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
586	break;
587	}
588	}
589	}
590	}
591	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
592	return result;
593	}
594
595	/**
596	* tell the param class what its arguments are if an action has its own
597	* arguments, this should add them to the params object - particularly
598	* important for args that should not be saved
599	*/
600	public boolean getActionParameters(GSParams params)
601	{
602	params.addParameter(GOTO_PAGE_ARG, false);
603	params.addParameter(ENRICH_DOC_ARG, false);
604	return true;
605	}
606
607	/**
608	* this method gets the collection description, the format info, the list of
609	* enrich services, etc - stuff that is needed for the page, but is the same
610	* whatever the query is - should be cached
611	*/
612	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
613	{
614
615	// create a message to process - contains requests for the collection
616	// description, the format element, the enrich services on offer
617	// these could all be cached
618	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
619	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
620	// the format request - ignore for now, where does this request go to??
621	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
622	info_message.appendChild(format_request);
623
624	// the enrich_services request - only do this if provide_annotations is true
625
626	if (provide_annotations)
627	{
628	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
629	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
630	info_message.appendChild(enrich_services_request);
631	}
632
633	Element info_response = (Element) this.mr.process(info_message);
634
635	// the collection is the first response
636	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
637	Element format_resp = (Element) responses.item(0);
638
639	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
640	if (format_elem != null)
641	{
642	logger.debug("doc action found a format statement");
643	// set teh format type
644	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
645	page_response.appendChild(this.doc.importNode(format_elem, true));
646	}
647
648	if (provide_annotations)
649	{
650	Element services_resp = (Element) responses.item(1);
651
652	// a new message for the mr
653	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
654
655	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
656	boolean service_found = false;
657	for (int j = 0; j < e_services.getLength(); j++)
658	{
659	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
660	{
661	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
662	enrich_message.appendChild(s);
663	service_found = true;
664	}
665	}
666	if (service_found)
667	{
668	Element enrich_response = (Element) this.mr.process(enrich_message);
669
670	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
671	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
672	for (int i = 0; i < e_responses.getLength(); i++)
673	{
674	Element e_resp = (Element) e_responses.item(i);
675	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
676	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
677	service_list.appendChild(e_service);
678	}
679	page_response.appendChild(service_list);
680	}
681	} // if provide_annotations
682	return true;
683
684	}
685
686	/**
687	* this involves a bit of a hack to get the equivalent query terms - has to
688	* requery the query service - uses the last selected service name. (if it
689	* ends in query). should this action do the query or should it send a
690	* message to the query action? but that will involve lots of extra stuff.
691	* also doesn't handle phrases properly - just highlights all the terms
692	* found in the text.
693	*/
694	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
695	{
696
697	// do the query again to get term info
698	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
699	HashMap params = GSXML.extractParams(cgi_param_list, false);
700
701	HashMap previous_params = (HashMap) params.get("p");
702	if (previous_params == null)
703	{
704	return dc_response_doc_content;
705	}
706	String service_name = (String) previous_params.get(GSParams.SERVICE);
707	if (service_name == null \|\| !service_name.endsWith("Query"))
708	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
709	logger.debug("invalid service, not doing highlighting");
710	return dc_response_doc_content;
711	}
712	String collection = (String) params.get(GSParams.COLLECTION);
713	UserContext userContext = new UserContext(request);
714	String to = GSPath.appendLink(collection, service_name);
715
716	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
717	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
718	mr_query_message.appendChild(mr_query_request);
719
720	// paramList
721	HashMap service_params = (HashMap) params.get("s1");
722
723	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
724	GSXML.addParametersToList(this.doc, query_param_list, service_params);
725	mr_query_request.appendChild(query_param_list);
726
727	// do the query
728	Element mr_query_response = (Element) this.mr.process(mr_query_message);
729
730	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
731	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
732	if (query_term_list_element == null)
733	{
734	// no term info
735	logger.error("No query term information.\n");
736	return dc_response_doc_content;
737	}
738
739	String content = GSXML.getNodeText(dc_response_doc_content);
740
741	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
742	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
743
744	HashSet query_term_variants = new HashSet();
745	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
746	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
747	{
748	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
749	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
750	{
751	for (int i = 0; i < terms_nodelist.getLength(); i++)
752	{
753	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
754	String termValueU = null;
755	String termValueL = null;
756
757	if (termValue.length() > 1)
758	{
759	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
760	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
761	}
762	else
763	{
764	termValueU = termValue.substring(0, 1).toUpperCase();
765	termValueL = termValue.substring(0, 1).toLowerCase();
766	}
767
768	query_term_variants.add(termValueU);
769	query_term_variants.add(termValueL);
770	}
771	}
772	}
773	else
774	{
775	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
776	{
777	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
778	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
779	for (int j = 0; j < equivalent_terms.length; j++)
780	{
781	query_term_variants.add(equivalent_terms[j]);
782	}
783	}
784	}
785
786	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
787
788	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
789	String performed_query = GSXML.getNodeText(query_element) + " ";
790
791	ArrayList phrase_query_p_term_variants_list = new ArrayList();
792	int term_start = 0;
793	boolean in_term = false;
794	boolean in_phrase = false;
795	for (int i = 0; i < performed_query.length(); i++)
796	{
797	char character = performed_query.charAt(i);
798	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
799
800	// Has a query term just started?
801	if (in_term == false && is_character_letter_or_digit == true)
802	{
803	in_term = true;
804	term_start = i;
805	}
806
807	// Or has a term just finished?
808	else if (in_term == true && is_character_letter_or_digit == false)
809	{
810	in_term = false;
811	String term = performed_query.substring(term_start, i);
812
813	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
814	if (term_element != null)
815	{
816
817	HashSet phrase_query_p_term_x_variants = new HashSet();
818
819	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
820	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
821	{
822	String termValueU = null;
823	String termValueL = null;
824
825	if (term.length() > 1)
826	{
827	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
828	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
829	}
830	else
831	{
832	termValueU = term.substring(0, 1).toUpperCase();
833	termValueL = term.substring(0, 1).toLowerCase();
834	}
835
836	phrase_query_p_term_x_variants.add(termValueU);
837	phrase_query_p_term_x_variants.add(termValueL);
838	}
839	else
840	{
841	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
842	{
843	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
844	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
845	for (int k = 0; k < term_equivalent_terms.length; k++)
846	{
847	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
848	}
849	}
850	}
851	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
852
853	if (in_phrase == false)
854	{
855	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
856	phrase_query_p_term_variants_list = new ArrayList();
857	}
858	}
859	}
860	// Watch for phrases (surrounded by quotes)
861	if (character == '\"')
862	{
863	// Has a phrase just started?
864	if (in_phrase == false)
865	{
866	in_phrase = true;
867	}
868	// Or has a phrase just finished?
869	else if (in_phrase == true)
870	{
871	in_phrase = false;
872	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
873	}
874
875	phrase_query_p_term_variants_list = new ArrayList();
876	}
877	}
878
879	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
880	}
881
882	/**
883	* Highlights query terms in a piece of text.
884	*/
885	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
886	{
887	// Convert the content string to an array of characters for speed
888	char[] content_characters = new char[content.length()];
889	content.getChars(0, content.length(), content_characters, 0);
890
891	// Now skim through the content, identifying word matches
892	ArrayList word_matches = new ArrayList();
893	int word_start = 0;
894	boolean in_word = false;
895	boolean preceding_word_matched = false;
896	boolean inTag = false;
897	for (int i = 0; i < content_characters.length; i++)
898	{
899	//We don't want to find words inside HTML tags
900	if (content_characters[i] == '<')
901	{
902	inTag = true;
903	continue;
904	}
905	else if (inTag && content_characters[i] == '>')
906	{
907	inTag = false;
908	}
909	else if (inTag)
910	{
911	continue;
912	}
913
914	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
915
916	// Has a word just started?
917	if (in_word == false && is_character_letter_or_digit == true)
918	{
919	in_word = true;
920	word_start = i;
921	}
922
923	// Or has a word just finished?
924	else if (in_word == true && is_character_letter_or_digit == false)
925	{
926	in_word = false;
927
928	// Check if the word matches any of the query term equivalents
929	String word = new String(content_characters, word_start, (i - word_start));
930	if (query_term_variants.contains(word))
931	{
932	// We have found a matching word, so remember its location
933	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
934	preceding_word_matched = true;
935	}
936	else
937	{
938	preceding_word_matched = false;
939	}
940	}
941	}
942
943	// Don't forget the last word...
944	if (in_word == true)
945	{
946	// Check if the word matches any of the query term equivalents
947	String word = new String(content_characters, word_start, (content_characters.length - word_start));
948	if (query_term_variants.contains(word))
949	{
950	// We have found a matching word, so remember its location
951	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
952	}
953	}
954
955	ArrayList highlight_start_positions = new ArrayList();
956	ArrayList highlight_end_positions = new ArrayList();
957
958	// Deal with phrases now
959	ArrayList partial_phrase_matches = new ArrayList();
960	for (int i = 0; i < word_matches.size(); i++)
961	{
962	WordMatch word_match = (WordMatch) word_matches.get(i);
963
964	// See if any partial phrase matches are extended by this word
965	if (word_match.preceding_word_matched)
966	{
967	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
968	{
969	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
970	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
971	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
972	if (phrase_query_p_term_x_variants.contains(word_match.word))
973	{
974	partial_phrase_match.num_words_matched++;
975
976	// Has a complete phrase match occurred?
977	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
978	{
979	// Check for overlaps by looking at the previous highlight range
980	if (!highlight_end_positions.isEmpty())
981	{
982	int last_highlight_index = highlight_end_positions.size() - 1;
983	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
984	if (last_highlight_end > partial_phrase_match.start_position)
985	{
986	// There is an overlap, so remove the previous phrase match
987	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
988	highlight_end_positions.remove(last_highlight_index);
989	partial_phrase_match.start_position = last_highlight_start;
990	}
991	}
992
993	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
994	highlight_end_positions.add(new Integer(word_match.end_position));
995	}
996	// No, but add the partial match back into the list for next time
997	else
998	{
999	partial_phrase_matches.add(partial_phrase_match);
1000	}
1001	}
1002	}
1003	}
1004	else
1005	{
1006	partial_phrase_matches.clear();
1007	}
1008
1009	// See if this word is at the start of any of the phrases
1010	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1011	{
1012	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1013	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1014	if (phrase_query_p_term_1_variants.contains(word_match.word))
1015	{
1016	// If this phrase is just one word long, we have a complete match
1017	if (phrase_query_p_term_variants_list.size() == 1)
1018	{
1019	highlight_start_positions.add(new Integer(word_match.start_position));
1020	highlight_end_positions.add(new Integer(word_match.end_position));
1021	}
1022	// Otherwise we have the start of a potential phrase match
1023	else
1024	{
1025	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1026	}
1027	}
1028	}
1029	}
1030
1031	// Now add the annotation tags into the document at the correct points
1032	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1033
1034	int last_wrote = 0;
1035	for (int i = 0; i < highlight_start_positions.size(); i++)
1036	{
1037	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1038	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1039
1040	// Print anything before the highlight range
1041	if (last_wrote < highlight_start)
1042	{
1043	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1044	content_element.appendChild(this.doc.createTextNode(preceding_text));
1045	}
1046
1047	// Print the highlight text, annotated
1048	if (highlight_end > last_wrote)
1049	{
1050	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1051	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1052	annotation_element.setAttribute("type", "query_term");
1053	content_element.appendChild(annotation_element);
1054	last_wrote = highlight_end;
1055	}
1056	}
1057
1058	// Finish off any unwritten text
1059	if (last_wrote < content_characters.length)
1060	{
1061	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1062	content_element.appendChild(this.doc.createTextNode(remaining_text));
1063	}
1064
1065	return content_element;
1066	}
1067
1068	static private class WordMatch
1069	{
1070	public String word;
1071	public int start_position;
1072	public int end_position;
1073	public boolean preceding_word_matched;
1074
1075	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1076	{
1077	this.word = word;
1078	this.start_position = start_position;
1079	this.end_position = end_position;
1080	this.preceding_word_matched = preceding_word_matched;
1081	}
1082	}
1083
1084	static private class PartialPhraseMatch
1085	{
1086	public int start_position;
1087	public int query_phrase_number;
1088	public int num_words_matched;
1089
1090	public PartialPhraseMatch(int start_position, int query_phrase_number)
1091	{
1092	this.start_position = start_position;
1093	this.query_phrase_number = query_phrase_number;
1094	this.num_words_matched = 1;
1095	}
1096	}
1097	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: