Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25816

Last change on this file since 25816 was 25816, checked in by kjdon, 12 years ago
if no document type is specified in cgi params, then get it from the collection, don't just assume hierarchy
Property svn:keywords set to `Author Date Id Revision`
File size: 42.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37	import java.io.Serializable;
38
39	import org.apache.log4j.*;
40
41	/** Action class for retrieving Documents via the message router */
42	public class DocumentAction extends Action
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47	// this is used to specify that the sibling nodes of a selected one should be obtained
48	public static final String SIBLING_ARG = "sib";
49	public static final String GOTO_PAGE_ARG = "gp";
50	public static final String ENRICH_DOC_ARG = "end";
51	public static final String EXPAND_DOCUMENT_ARG = "ed";
52	public static final String EXPAND_CONTENTS_ARG = "ec";
53	public static final String REALISTIC_BOOK_ARG = "book";
54
55	/**
56	* if this is set to true, when a document is displayed, any annotation type
57	* services (enrich) will be offered to the user as well
58	*/
59	protected boolean provide_annotations = false;
60
61	protected boolean highlight_query_terms = false;
62
63	public boolean configure()
64	{
65	super.configure();
66	String highlight = (String) config_params.get("highlightQueryTerms");
67	if (highlight != null && highlight.equals("true"))
68	{
69	highlight_query_terms = true;
70	}
71	String annotate = (String) config_params.get("displayAnnotationService");
72	if (annotate != null && annotate.equals("true"))
73	{
74	provide_annotations = true;
75	}
76	return true; }
77
78	public Node process(Node message_node)
79	{
80	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
81
82	Element message = this.converter.nodeToElement(message_node);
83
84	// the response
85	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
86	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
87	result.appendChild(page_response);
88
89	// get the request - assume only one
90	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
91	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
92	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
93
94	// just in case there are some that need to get passed to the services
95	HashMap service_params = (HashMap) params.get("s0");
96
97	String collection = (String) params.get(GSParams.COLLECTION);
98	String document_id = (String) params.get(GSParams.DOCUMENT);
99	if (document_id != null && document_id.equals(""))
100	{
101	document_id = null;
102	}
103	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
104	if (href != null && href.equals(""))
105	{
106	href = null;
107	}
108	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
109	if (document_id == null && href == null)
110	{
111	logger.error("no document specified!");
112	return result;
113	}
114	if (rl != null && rl.equals("0"))
115	{
116	// this is a true external link, we should have been directed to a different page or action
117	logger.error("rl value was 0, shouldn't get here");
118	return result;
119	}
120	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
121	if (document_type != null && document_type.equals(""))
122	{
123	//document_type = "hierarchy";
124	document_type = null; // we'll get it later if not already specified
125	}
126	//whether to retrieve siblings or not
127	boolean get_siblings = false;
128	String sibs = (String) params.get(SIBLING_ARG);
129	if (sibs != null && sibs.equals("1"))
130	{
131	get_siblings = true;
132	}
133
134	String doc_id_modifier = "";
135	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
136	if (sibling_num != null && !sibling_num.equals(""))
137	{
138	// we have to modify the doc name
139	doc_id_modifier = "." + sibling_num + ".ss";
140	}
141
142	boolean expand_document = false;
143	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
144	if (ed_arg != null && ed_arg.equals("1"))
145	{
146	expand_document = true;
147	}
148
149	boolean expand_contents = false;
150	if (expand_document)
151	{ // we always expand the contents with the text
152	expand_contents = true;
153	}
154	else
155	{
156	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
157	if (ec_arg != null && ec_arg.equals("1"))
158	{
159	expand_contents = true;
160	}
161	}
162
163	UserContext userContext = new UserContext(request);
164
165	//append site metadata
166	addSiteMetadata(page_response, userContext);
167	addInterfaceOptions(page_response);
168
169	// get the additional data needed for the page
170	getBackgroundData(page_response, collection, userContext);
171	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
172
173	// the_document is where all the doc info - structure and metadata etc
174	// is added into, to be returned in the page
175	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
176	page_response.appendChild(the_document);
177
178	// create a basic doc list containing the current node
179	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
180	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
181	basic_doc_list.appendChild(current_doc);
182	if (document_id != null)
183	{
184	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
185	}
186	else
187	{
188	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
189	// do we need this??
190	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
191	}
192
193	if (document_type == null) {
194	logger.error("getting document type");
195	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
196	logger.error("new doc type = "+document_type);
197	}
198	if (document_type != null) {
199	// set the doctype from the cgi arg or from the server as an attribute
200	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
201	}
202	else {
203	logger.error("doctype is null!!!***********");
204	}
205
206	// Create a parameter list to specify the required structure information
207	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
208
209	if (service_params != null)
210	{
211	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
212	}
213
214	Element ds_param = null;
215	boolean get_structure = false;
216	boolean get_structure_info = false;
217	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
218	{
219	get_structure_info = true;
220
221	if (expand_contents)
222	{
223	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
224	ds_param_list.appendChild(ds_param);
225	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
226	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
227	}
228
229	// get the info needed for paged naviagtion
230	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
231	ds_param_list.appendChild(ds_param);
232	ds_param.setAttribute(GSXML.NAME_ATT, "info");
233	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
234	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
235	ds_param_list.appendChild(ds_param);
236	ds_param.setAttribute(GSXML.NAME_ATT, "info");
237	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
238	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
239	ds_param_list.appendChild(ds_param);
240	ds_param.setAttribute(GSXML.NAME_ATT, "info");
241	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
242
243	if (get_siblings)
244	{
245	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
246	ds_param_list.appendChild(ds_param);
247	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
248	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
249	}
250
251	}
252	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
253	{
254	get_structure = true;
255	if (expand_contents)
256	{
257	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
258	ds_param_list.appendChild(ds_param);
259	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
260	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
261	}
262	else
263	{
264	// get the info needed for table of contents
265	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
266	ds_param_list.appendChild(ds_param);
267	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
268	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
269	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
270	ds_param_list.appendChild(ds_param);
271	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
272	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
273	if (get_siblings)
274	{
275	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
276	ds_param_list.appendChild(ds_param);
277	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
278	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
279	}
280	}
281	}
282	else
283	{
284	// we dont need any structure
285	}
286
287	boolean has_dummy = false;
288	if (get_structure \|\| get_structure_info)
289	{
290
291	// Build a request to obtain the document structure
292	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
293	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
294	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
295	ds_message.appendChild(ds_request);
296	ds_request.appendChild(ds_param_list);
297
298	// add the node list we created earlier
299	ds_request.appendChild(basic_doc_list);
300
301	// Process the document structure retrieve message
302	Element ds_response_message = (Element) this.mr.process(ds_message);
303	if (processErrorElements(ds_response_message, page_response))
304	{
305	return result;
306	}
307
308	// get the info and print out
309	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
310	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
311	path = GSPath.appendLink(path, "nodeStructureInfo");
312	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
313	// get the doc_node bit
314	if (ds_response_struct_info != null)
315	{
316	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
317	}
318	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
319	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
320	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
321	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
322
323	if (ds_response_structure != null)
324	{
325	// add the contents of the structure bit into the_document
326	NodeList structs = ds_response_structure.getChildNodes();
327	for (int i = 0; i < structs.getLength(); i++)
328	{
329	the_document.appendChild(this.doc.importNode(structs.item(i), true));
330	}
331	}
332	else
333	{
334	// no structure nodes, so put in a dummy doc node
335	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
336	if (document_id != null)
337	{
338	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
339	}
340	else
341	{
342	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
343
344	}
345	the_document.appendChild(doc_node);
346	has_dummy = true;
347	}
348	}
349	else
350	{ // a simple type - we dont have a dummy node for simple
351	// should think about this more
352	// no structure request, so just put in a dummy doc node
353	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
354	if (document_id != null)
355	{
356	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
357	}
358	else
359	{
360	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
361	}
362	the_document.appendChild(doc_node);
363	has_dummy = true;
364	}
365
366	// Build a request to obtain some document metadata
367	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
368	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
369	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
370	dm_message.appendChild(dm_request);
371	// Create a parameter list to specify the required metadata information
372
373	HashSet<String> meta_names = new HashSet<String>();
374	meta_names.add("Title"); // the default
375	if (format_elem != null)
376	{
377	getRequiredMetadataNames(format_elem, meta_names);
378	}
379
380	Element dm_param_list = createMetadataParamList(meta_names);
381	if (service_params != null)
382	{
383	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
384	}
385
386	dm_request.appendChild(dm_param_list);
387
388	// create the doc node list for the metadata request
389	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
390	dm_request.appendChild(dm_doc_list);
391
392	// Add each node from the structure response into the metadata request
393	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
394	for (int i = 0; i < doc_nodes.getLength(); i++)
395	{
396	Element doc_node = (Element) doc_nodes.item(i);
397	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
398
399	// Add the documentNode to the list
400	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
401	dm_doc_list.appendChild(dm_doc_node);
402	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
403	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
404	}
405
406	// we also want a metadata request to the top level document to get
407	// assocfilepath - this could be cached too
408	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
409	dm_message.appendChild(doc_meta_request);
410	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
411	if (service_params != null)
412	{
413	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
414	}
415
416	doc_meta_request.appendChild(doc_meta_param_list);
417	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
418	doc_meta_param_list.appendChild(doc_param);
419	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
420	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
421
422	// create the doc node list for the metadata request
423	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
424	doc_meta_request.appendChild(doc_list);
425
426	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
427	// the node we want is the root document node
428	if (document_id != null)
429	{
430	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
431	}
432	else
433	{
434	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
435	// can we assume that href is always a top level doc??
436	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
437	//doc_node.setAttribute("externalURL", has_rl);
438	}
439	doc_list.appendChild(doc_node);
440
441	Element dm_response_message = (Element) this.mr.process(dm_message);
442	if (processErrorElements(dm_response_message, page_response))
443	{
444	return result;
445	}
446
447	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
448	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
449
450	// Merge the metadata with the structure information
451	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
452	for (int i = 0; i < doc_nodes.getLength(); i++)
453	{
454	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
455	}
456	// get the top level doc metadata out
457	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
458	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
459	GSXML.mergeMetadataLists(the_document, top_doc_node);
460
461	// Build a request to obtain some document content
462	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
463	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
464	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
465	dc_message.appendChild(dc_request);
466
467	// Create a parameter list to specify the request parameters - empty for now
468	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
469	if (service_params != null)
470	{
471	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
472	}
473
474	dc_request.appendChild(dc_param_list);
475
476	// get the content
477	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
478	if (expand_document)
479	{
480	dc_request.appendChild(dm_doc_list);
481	}
482	else
483	{
484	dc_request.appendChild(basic_doc_list);
485	}
486	logger.debug("request = " + XMLConverter.getString(dc_message));
487	Element dc_response_message = (Element) this.mr.process(dc_message);
488	if (processErrorElements(dc_response_message, page_response))
489	{
490	return result;
491	}
492
493	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
494
495	if (expand_document)
496	{
497	// Merge the content with the structure information
498	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
499	for (int i = 0; i < doc_nodes.getLength(); i++)
500	{
501	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
502	if (content != null)
503	{
504	if (highlight_query_terms)
505	{
506	content = highlightQueryTerms(request, (Element) content);
507	}
508	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
509	}
510	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
511	}
512	}
513	else
514	{
515	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
516	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
517	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
518	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
519
520	if (dc_response_doc_content == null)
521	{
522	// no content to add
523	if (dc_response_doc.getAttribute("external").equals("true"))
524	{
525
526	//if (dc_response_doc_external != null)
527	//{
528	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
529
530	the_document.setAttribute("selectedNode", href_id);
531	the_document.setAttribute("external", href_id);
532	}
533	return result;
534	}
535	if (highlight_query_terms)
536	{
537	dc_response_doc.removeChild(dc_response_doc_content);
538
539	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
540	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
541	}
542
543	if (provide_annotations)
544	{
545	String service_selected = (String) params.get(ENRICH_DOC_ARG);
546	if (service_selected != null && service_selected.equals("1"))
547	{
548	// now we can modifiy the response doc if needed
549	String enrich_service = (String) params.get(GSParams.SERVICE);
550	// send a message to the service
551	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
552	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
553	enrich_message.appendChild(enrich_request);
554	// check for parameters
555	HashMap e_service_params = (HashMap) params.get("s1");
556	if (e_service_params != null)
557	{
558	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
559	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
560	enrich_request.appendChild(enrich_pl);
561	}
562	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
563	enrich_request.appendChild(e_doc_list);
564	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
565
566	Node enrich_response = this.mr.process(enrich_message);
567
568	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
569	path = GSPath.createPath(links);
570	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
571
572	}
573	} // if provide_annotations
574
575	// use the returned id rather than the sent one cos there may have
576	// been modifiers such as .pr that are removed.
577	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
578	the_document.setAttribute("selectedNode", modified_doc_id);
579	if (has_dummy)
580	{
581	// change the id if necessary and add the content
582	Element dummy_node = (Element) doc_nodes.item(0);
583
584	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
585	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
586	// hack for simple type
587	if (document_type.equals("simple"))
588	{
589	// we dont want the internal docNode, just want the content and metadata in the document
590	// rethink this!!
591	the_document.removeChild(dummy_node);
592
593	NodeList dummy_children = dummy_node.getChildNodes();
594	//for (int i=0; i<dummy_children.getLength(); i++) {
595	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
596	{
597	// special case as we don't want more than one metadata list
598	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
599	{
600	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
601	}
602	else
603	{
604	the_document.appendChild(dummy_children.item(i));
605	}
606	}
607	}
608	}
609	else
610	{
611	// Merge the document content with the metadata and structure information
612	for (int i = 0; i < doc_nodes.getLength(); i++)
613	{
614	Node dn = doc_nodes.item(i);
615	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
616	if (dn_id.equals(modified_doc_id))
617	{
618	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
619	break;
620	}
621	}
622	}
623	}
624	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
625	return result;
626	}
627
628	/**
629	* tell the param class what its arguments are if an action has its own
630	* arguments, this should add them to the params object - particularly
631	* important for args that should not be saved
632	*/
633	public boolean addActionParameters(GSParams params)
634	{
635	params.addParameter(GOTO_PAGE_ARG, false);
636	params.addParameter(ENRICH_DOC_ARG, false);
637	params.addParameter(EXPAND_DOCUMENT_ARG, false);
638	params.addParameter(EXPAND_CONTENTS_ARG, false);
639	params.addParameter(REALISTIC_BOOK_ARG, false);
640
641	return true;
642	}
643
644	/**
645	* this method gets the collection description, the format info, the list of
646	* enrich services, etc - stuff that is needed for the page, but is the same
647	* whatever the query is - should be cached
648	*/
649	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
650	{
651
652	// create a message to process - contains requests for the collection
653	// description, the format element, the enrich services on offer
654	// these could all be cached
655	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
656	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
657	// the format request - ignore for now, where does this request go to??
658	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
659	info_message.appendChild(format_request);
660
661	// the enrich_services request - only do this if provide_annotations is true
662
663	if (provide_annotations)
664	{
665	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
666	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
667	info_message.appendChild(enrich_services_request);
668	}
669
670	Element info_response = (Element) this.mr.process(info_message);
671
672	// the collection is the first response
673	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
674	Element format_resp = (Element) responses.item(0);
675
676	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
677	if (format_elem != null)
678	{
679	logger.debug("doc action found a format statement");
680	// set teh format type
681	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
682	page_response.appendChild(this.doc.importNode(format_elem, true));
683	}
684
685	if (provide_annotations)
686	{
687	Element services_resp = (Element) responses.item(1);
688
689	// a new message for the mr
690	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
691	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
692	boolean service_found = false;
693	for (int j = 0; j < e_services.getLength(); j++)
694	{
695	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
696	{
697	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
698	enrich_message.appendChild(s);
699	service_found = true;
700	}
701	}
702	if (service_found)
703	{
704	Element enrich_response = (Element) this.mr.process(enrich_message);
705
706	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
707	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
708	for (int i = 0; i < e_responses.getLength(); i++)
709	{
710	Element e_resp = (Element) e_responses.item(i);
711	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
712	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
713	service_list.appendChild(e_service);
714	}
715	page_response.appendChild(service_list);
716	}
717	} // if provide_annotations
718	return true;
719
720	}
721
722	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
723	{
724
725	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
726	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
727	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
728	ds_message.appendChild(ds_request);
729
730	// Create a parameter list to specify the required structure information
731	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
732	Element ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
733	ds_param_list.appendChild(ds_param);
734	ds_param.setAttribute(GSXML.NAME_ATT, "info");
735	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
736
737	ds_request.appendChild(ds_param_list);
738
739	// add the node list we created earlier
740	ds_request.appendChild(basic_doc_list);
741
742	logger.error("doctype request = "+this.converter.getPrettyString(ds_request));
743	// Process the document structure retrieve message
744	Element ds_response_message = (Element) this.mr.process(ds_message);
745	logger.error("doctype response = "+this.converter.getPrettyString(ds_response_message));
746	if (processErrorElements(ds_response_message, page_response))
747	{
748	return null;
749	}
750
751	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo"};
752	String path = GSPath.createPath(links);
753	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
754	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
755	if (doctype_elem != null) {
756	String doc_type = doctype_elem.getAttribute("value");
757	return doc_type;
758	}
759	return null;
760	}
761
762	/**
763	* this involves a bit of a hack to get the equivalent query terms - has to
764	* requery the query service - uses the last selected service name. (if it
765	* ends in query). should this action do the query or should it send a
766	* message to the query action? but that will involve lots of extra stuff.
767	* also doesn't handle phrases properly - just highlights all the terms
768	* found in the text.
769	*/
770	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
771	{
772	// do the query again to get term info
773	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
774	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
775
776	HashMap previous_params = (HashMap) params.get("p");
777	if (previous_params == null)
778	{
779	return dc_response_doc_content;
780	}
781	String service_name = (String) previous_params.get(GSParams.SERVICE);
782	if (service_name == null \|\| !service_name.endsWith("Query"))
783	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
784	logger.debug("invalid service, not doing highlighting");
785	return dc_response_doc_content;
786	}
787	String collection = (String) params.get(GSParams.COLLECTION);
788	UserContext userContext = new UserContext(request);
789	String to = GSPath.appendLink(collection, service_name);
790
791	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
792	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
793	mr_query_message.appendChild(mr_query_request);
794
795	// paramList
796	HashMap service_params = (HashMap) params.get("s1");
797
798	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
799	GSXML.addParametersToList(this.doc, query_param_list, service_params);
800	mr_query_request.appendChild(query_param_list);
801
802	// do the query
803	Element mr_query_response = (Element) this.mr.process(mr_query_message);
804
805	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
806	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
807	if (query_term_list_element == null)
808	{
809	// no term info
810	logger.error("No query term information.\n");
811	return dc_response_doc_content;
812	}
813
814	String content = GSXML.getNodeText(dc_response_doc_content);
815
816	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
817	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
818
819	HashSet<String> query_term_variants = new HashSet<String>();
820	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
821	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
822	{
823	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
824	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
825	{
826	for (int i = 0; i < terms_nodelist.getLength(); i++)
827	{
828	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
829	String termValueU = null;
830	String termValueL = null;
831
832	if (termValue.length() > 1)
833	{
834	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
835	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
836	}
837	else
838	{
839	termValueU = termValue.substring(0, 1).toUpperCase();
840	termValueL = termValue.substring(0, 1).toLowerCase();
841	}
842
843	query_term_variants.add(termValueU);
844	query_term_variants.add(termValueL);
845	}
846	}
847	}
848	else
849	{
850	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
851	{
852	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
853	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
854	for (int j = 0; j < equivalent_terms.length; j++)
855	{
856	query_term_variants.add(equivalent_terms[j]);
857	}
858	}
859	}
860
861	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
862
863	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
864	String performed_query = GSXML.getNodeText(query_element) + " ";
865
866	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
867	int term_start = 0;
868	boolean in_term = false;
869	boolean in_phrase = false;
870	for (int i = 0; i < performed_query.length(); i++)
871	{
872	char character = performed_query.charAt(i);
873	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
874
875	// Has a query term just started?
876	if (in_term == false && is_character_letter_or_digit == true)
877	{
878	in_term = true;
879	term_start = i;
880	}
881
882	// Or has a term just finished?
883	else if (in_term == true && is_character_letter_or_digit == false)
884	{
885	in_term = false;
886	String term = performed_query.substring(term_start, i);
887
888	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
889	if (term_element != null)
890	{
891
892	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
893
894	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
895	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
896	{
897	String termValueU = null;
898	String termValueL = null;
899
900	if (term.length() > 1)
901	{
902	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
903	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
904	}
905	else
906	{
907	termValueU = term.substring(0, 1).toUpperCase();
908	termValueL = term.substring(0, 1).toLowerCase();
909	}
910
911	phrase_query_p_term_x_variants.add(termValueU);
912	phrase_query_p_term_x_variants.add(termValueL);
913	}
914	else
915	{
916	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
917	{
918	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
919	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
920	for (int k = 0; k < term_equivalent_terms.length; k++)
921	{
922	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
923	}
924	}
925	}
926	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
927
928	if (in_phrase == false)
929	{
930	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
931	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
932	}
933	}
934	}
935	// Watch for phrases (surrounded by quotes)
936	if (character == '\"')
937	{
938	// Has a phrase just started?
939	if (in_phrase == false)
940	{
941	in_phrase = true;
942	}
943	// Or has a phrase just finished?
944	else if (in_phrase == true)
945	{
946	in_phrase = false;
947	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
948	}
949
950	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
951	}
952	}
953
954	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
955	}
956
957	/**
958	* Highlights query terms in a piece of text.
959	*/
960	private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
961	{
962	// Convert the content string to an array of characters for speed
963	char[] content_characters = new char[content.length()];
964	content.getChars(0, content.length(), content_characters, 0);
965
966	// Now skim through the content, identifying word matches
967	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
968	int word_start = 0;
969	boolean in_word = false;
970	boolean preceding_word_matched = false;
971	boolean inTag = false;
972	for (int i = 0; i < content_characters.length; i++)
973	{
974	//We don't want to find words inside HTML tags
975	if (content_characters[i] == '<')
976	{
977	inTag = true;
978	continue;
979	}
980	else if (inTag && content_characters[i] == '>')
981	{
982	inTag = false;
983	}
984	else if (inTag)
985	{
986	continue;
987	}
988
989	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
990
991	// Has a word just started?
992	if (in_word == false && is_character_letter_or_digit == true)
993	{
994	in_word = true;
995	word_start = i;
996	}
997
998	// Or has a word just finished?
999	else if (in_word == true && is_character_letter_or_digit == false)
1000	{
1001	in_word = false;
1002
1003	// Check if the word matches any of the query term equivalents
1004	String word = new String(content_characters, word_start, (i - word_start));
1005	if (query_term_variants.contains(word))
1006	{
1007	// We have found a matching word, so remember its location
1008	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1009	preceding_word_matched = true;
1010	}
1011	else
1012	{
1013	preceding_word_matched = false;
1014	}
1015	}
1016	}
1017
1018	// Don't forget the last word...
1019	if (in_word == true)
1020	{
1021	// Check if the word matches any of the query term equivalents
1022	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1023	if (query_term_variants.contains(word))
1024	{
1025	// We have found a matching word, so remember its location
1026	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1027	}
1028	}
1029
1030	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1031	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1032
1033	// Deal with phrases now
1034	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1035	for (int i = 0; i < word_matches.size(); i++)
1036	{
1037	WordMatch word_match = word_matches.get(i);
1038
1039	// See if any partial phrase matches are extended by this word
1040	if (word_match.preceding_word_matched)
1041	{
1042	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1043	{
1044	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1045	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1046	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1047	if (phrase_query_p_term_x_variants.contains(word_match.word))
1048	{
1049	partial_phrase_match.num_words_matched++;
1050
1051	// Has a complete phrase match occurred?
1052	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1053	{
1054	// Check for overlaps by looking at the previous highlight range
1055	if (!highlight_end_positions.isEmpty())
1056	{
1057	int last_highlight_index = highlight_end_positions.size() - 1;
1058	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1059	if (last_highlight_end > partial_phrase_match.start_position)
1060	{
1061	// There is an overlap, so remove the previous phrase match
1062	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1063	highlight_end_positions.remove(last_highlight_index);
1064	partial_phrase_match.start_position = last_highlight_start;
1065	}
1066	}
1067
1068	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1069	highlight_end_positions.add(new Integer(word_match.end_position));
1070	}
1071	// No, but add the partial match back into the list for next time
1072	else
1073	{
1074	partial_phrase_matches.add(partial_phrase_match);
1075	}
1076	}
1077	}
1078	}
1079	else
1080	{
1081	partial_phrase_matches.clear();
1082	}
1083
1084	// See if this word is at the start of any of the phrases
1085	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1086	{
1087	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1088	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1089	if (phrase_query_p_term_1_variants.contains(word_match.word))
1090	{
1091	// If this phrase is just one word long, we have a complete match
1092	if (phrase_query_p_term_variants_list.size() == 1)
1093	{
1094	highlight_start_positions.add(new Integer(word_match.start_position));
1095	highlight_end_positions.add(new Integer(word_match.end_position));
1096	}
1097	// Otherwise we have the start of a potential phrase match
1098	else
1099	{
1100	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1101	}
1102	}
1103	}
1104	}
1105
1106	// Now add the annotation tags into the document at the correct points
1107	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1108
1109	int last_wrote = 0;
1110	for (int i = 0; i < highlight_start_positions.size(); i++)
1111	{
1112	int highlight_start = highlight_start_positions.get(i).intValue();
1113	int highlight_end = highlight_end_positions.get(i).intValue();
1114
1115	// Print anything before the highlight range
1116	if (last_wrote < highlight_start)
1117	{
1118	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1119	content_element.appendChild(this.doc.createTextNode(preceding_text));
1120	}
1121
1122	// Print the highlight text, annotated
1123	if (highlight_end > last_wrote)
1124	{
1125	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1126	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1127	annotation_element.setAttribute("type", "query_term");
1128	content_element.appendChild(annotation_element);
1129	last_wrote = highlight_end;
1130	}
1131	}
1132
1133	// Finish off any unwritten text
1134	if (last_wrote < content_characters.length)
1135	{
1136	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1137	content_element.appendChild(this.doc.createTextNode(remaining_text));
1138	}
1139
1140	return content_element;
1141	}
1142
1143	static private class WordMatch
1144	{
1145	public String word;
1146	public int start_position;
1147	public int end_position;
1148	public boolean preceding_word_matched;
1149
1150	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1151	{
1152	this.word = word;
1153	this.start_position = start_position;
1154	this.end_position = end_position;
1155	this.preceding_word_matched = preceding_word_matched;
1156	}
1157	}
1158
1159	static private class PartialPhraseMatch
1160	{
1161	public int start_position;
1162	public int query_phrase_number;
1163	public int num_words_matched;
1164
1165	public PartialPhraseMatch(int start_position, int query_phrase_number)
1166	{
1167	this.start_position = start_position;
1168	this.query_phrase_number = query_phrase_number;
1169	this.num_words_matched = 1;
1170	}
1171	}
1172	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: