Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 25642

Last change on this file since 25642 was 25642, checked in by sjm84, 12 years ago
A few minor changes to DocumentAction
Property svn:keywords set to `Author Date Id Revision`
File size: 40.3 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37	import java.io.Serializable;
38
39	import org.apache.log4j.*;
40
41	/** Action class for retrieving Documents via the message router */
42	public class DocumentAction extends Action
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47	// this is used to specify that the sibling nodes of a selected one should be obtained
48	public static final String SIBLING_ARG = "sib";
49	public static final String GOTO_PAGE_ARG = "gp";
50	public static final String ENRICH_DOC_ARG = "end";
51	public static final String EXPAND_DOCUMENT_ARG = "ed";
52	public static final String EXPAND_CONTENTS_ARG = "ec";
53	public static final String REALISTIC_BOOK_ARG = "book";
54
55	/**
56	* if this is set to true, when a document is displayed, any annotation type
57	* services (enrich) will be offered to the user as well
58	*/
59	protected boolean provide_annotations = false;
60
61	protected boolean highlight_query_terms = false;
62
63	public boolean configure()
64	{
65	super.configure();
66	String highlight = (String) config_params.get("highlightQueryTerms");
67	if (highlight != null && highlight.equals("true"))
68	{
69	highlight_query_terms = true;
70	}
71	String annotate = (String) config_params.get("displayAnnotationService");
72	if (annotate != null && annotate.equals("true"))
73	{
74	provide_annotations = true;
75	}
76	return true;
77	}
78
79	public Node process(Node message_node)
80	{
81	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83	Element message = this.converter.nodeToElement(message_node);
84
85	// the response
86	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88	result.appendChild(page_response);
89
90	// get the request - assume only one
91	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
93	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
94
95	// just in case there are some that need to get passed to the services
96	HashMap service_params = (HashMap) params.get("s0");
97
98	String collection = (String) params.get(GSParams.COLLECTION);
99	String document_id = (String) params.get(GSParams.DOCUMENT);
100	if (document_id != null && document_id.equals(""))
101	{
102	document_id = null;
103	}
104	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
105	if (href != null && href.equals(""))
106	{
107	href = null;
108	}
109	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110	if (document_id == null && href == null)
111	{
112	logger.error("no document specified!");
113	return result;
114	}
115	if (rl != null && rl.equals("0"))
116	{
117	// this is a true external link, we should have been directed to a different page or action
118	logger.error("rl value was 0, shouldn't get here");
119	return result;
120	}
121	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
122	if (document_type == null \|\| document_type.equals(""))
123	{
124	document_type = "hierarchy";
125	}
126	//whether to retrieve siblings or not
127	boolean get_siblings = false;
128	String sibs = (String) params.get(SIBLING_ARG);
129	if (sibs != null && sibs.equals("1"))
130	{
131	get_siblings = true;
132	}
133
134	String doc_id_modifier = "";
135	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
136	if (sibling_num != null && !sibling_num.equals(""))
137	{
138	// we have to modify the doc name
139	doc_id_modifier = "." + sibling_num + ".ss";
140	}
141
142	boolean expand_document = false;
143	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
144	if (ed_arg != null && ed_arg.equals("1"))
145	{
146	expand_document = true;
147	}
148
149	boolean expand_contents = false;
150	if (expand_document)
151	{ // we always expand the contents with the text
152	expand_contents = true;
153	}
154	else
155	{
156	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
157	if (ec_arg != null && ec_arg.equals("1"))
158	{
159	expand_contents = true;
160	}
161	}
162
163	UserContext userContext = new UserContext(request);
164
165	//append site metadata
166	addSiteMetadata(page_response, userContext);
167	addInterfaceOptions(page_response);
168
169	// get the additional data needed for the page
170	getBackgroundData(page_response, collection, userContext);
171	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
172
173	// the_document is where all the doc info - structure and metadata etc
174	// is added into, to be returned in the page
175	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
176	page_response.appendChild(the_document);
177
178	// set the doctype from the cgi arg as an attribute
179	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
180
181	// create a basic doc list containing the current node
182	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
183	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
184	basic_doc_list.appendChild(current_doc);
185	if (document_id != null)
186	{
187	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
188	}
189	else
190	{
191	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
192	// do we need this??
193	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
194	}
195
196	// Create a parameter list to specify the required structure information
197	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
198
199	if (service_params != null)
200	{
201	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
202	}
203
204	Element ds_param = null;
205	boolean get_structure = false;
206	boolean get_structure_info = false;
207	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
208	{
209	get_structure_info = true;
210
211	if (expand_contents)
212	{
213	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
214	ds_param_list.appendChild(ds_param);
215	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
216	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
217	}
218
219	// get the info needed for paged naviagtion
220	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
221	ds_param_list.appendChild(ds_param);
222	ds_param.setAttribute(GSXML.NAME_ATT, "info");
223	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
224	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
225	ds_param_list.appendChild(ds_param);
226	ds_param.setAttribute(GSXML.NAME_ATT, "info");
227	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
228	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
229	ds_param_list.appendChild(ds_param);
230	ds_param.setAttribute(GSXML.NAME_ATT, "info");
231	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
232
233	if (get_siblings)
234	{
235	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
236	ds_param_list.appendChild(ds_param);
237	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
238	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
239	}
240
241	}
242	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
243	{
244	get_structure = true;
245	if (expand_contents)
246	{
247	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
248	ds_param_list.appendChild(ds_param);
249	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
250	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
251	}
252	else
253	{
254	// get the info needed for table of contents
255	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
256	ds_param_list.appendChild(ds_param);
257	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
258	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
259	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
260	ds_param_list.appendChild(ds_param);
261	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
262	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
263	if (get_siblings)
264	{
265	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
266	ds_param_list.appendChild(ds_param);
267	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
268	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
269	}
270	}
271	}
272	else
273	{
274	// we dont need any structure
275	}
276
277	boolean has_dummy = false;
278	if (get_structure \|\| get_structure_info)
279	{
280
281	// Build a request to obtain the document structure
282	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
283	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
284	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
285	ds_message.appendChild(ds_request);
286	ds_request.appendChild(ds_param_list);
287
288	// create a doc_node_list and put in the doc_node that we are interested in
289	ds_request.appendChild(basic_doc_list);
290
291	// Process the document structure retrieve message
292	Element ds_response_message = (Element) this.mr.process(ds_message);
293	if (processErrorElements(ds_response_message, page_response))
294	{
295	return result;
296	}
297
298	// get the info and print out
299	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
300	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
301	path = GSPath.appendLink(path, "nodeStructureInfo");
302	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
303	// get the doc_node bit
304	if (ds_response_struct_info != null)
305	{
306	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
307	}
308	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
309	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
310	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
311	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
312
313	if (ds_response_structure != null)
314	{
315	// add the contents of the structure bit into the_document
316	NodeList structs = ds_response_structure.getChildNodes();
317	for (int i = 0; i < structs.getLength(); i++)
318	{
319	the_document.appendChild(this.doc.importNode(structs.item(i), true));
320	}
321	}
322	else
323	{
324	// no structure nodes, so put in a dummy doc node
325	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
326	if (document_id != null)
327	{
328	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
329	}
330	else
331	{
332	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
333
334	}
335	the_document.appendChild(doc_node);
336	has_dummy = true;
337	}
338	}
339	else
340	{ // a simple type - we dont have a dummy node for simple
341	// should think about this more
342	// no structure request, so just put in a dummy doc node
343	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
344	if (document_id != null)
345	{
346	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
347	}
348	else
349	{
350	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
351	}
352	the_document.appendChild(doc_node);
353	has_dummy = true;
354	}
355
356	// Build a request to obtain some document metadata
357	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
358	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
359	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
360	dm_message.appendChild(dm_request);
361	// Create a parameter list to specify the required metadata information
362
363	HashSet<String> meta_names = new HashSet<String>();
364	meta_names.add("Title"); // the default
365	if (format_elem != null)
366	{
367	getRequiredMetadataNames(format_elem, meta_names);
368	}
369
370	Element dm_param_list = createMetadataParamList(meta_names);
371	if (service_params != null)
372	{
373	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
374	}
375
376	dm_request.appendChild(dm_param_list);
377
378	// create the doc node list for the metadata request
379	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
380	dm_request.appendChild(dm_doc_list);
381
382	// Add each node from the structure response into the metadata request
383	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
384	for (int i = 0; i < doc_nodes.getLength(); i++)
385	{
386	Element doc_node = (Element) doc_nodes.item(i);
387	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
388
389	// Add the documentNode to the list
390	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
391	dm_doc_list.appendChild(dm_doc_node);
392	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
393	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
394	}
395
396	// we also want a metadata request to the top level document to get
397	// assocfilepath - this could be cached too
398	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
399	dm_message.appendChild(doc_meta_request);
400	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
401	if (service_params != null)
402	{
403	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
404	}
405
406	doc_meta_request.appendChild(doc_meta_param_list);
407	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
408	doc_meta_param_list.appendChild(doc_param);
409	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
410	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
411
412	// create the doc node list for the metadata request
413	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
414	doc_meta_request.appendChild(doc_list);
415
416	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
417	// the node we want is the root document node
418	if (document_id != null)
419	{
420	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
421	}
422	else
423	{
424	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
425	// can we assume that href is always a top level doc??
426	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
427	//doc_node.setAttribute("externalURL", has_rl);
428	}
429	doc_list.appendChild(doc_node);
430
431	Element dm_response_message = (Element) this.mr.process(dm_message);
432	if (processErrorElements(dm_response_message, page_response))
433	{
434	return result;
435	}
436
437	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
438	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
439
440	// Merge the metadata with the structure information
441	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
442	for (int i = 0; i < doc_nodes.getLength(); i++)
443	{
444	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
445	}
446	// get the top level doc metadata out
447	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
448	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
449	GSXML.mergeMetadataLists(the_document, top_doc_node);
450
451	// Build a request to obtain some document content
452	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
453	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
454	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
455	dc_message.appendChild(dc_request);
456
457	// Create a parameter list to specify the request parameters - empty for now
458	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
459	if (service_params != null)
460	{
461	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
462	}
463
464	dc_request.appendChild(dc_param_list);
465
466	// get the content
467	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
468	if (expand_document)
469	{
470	dc_request.appendChild(dm_doc_list);
471	}
472	else
473	{
474	dc_request.appendChild(basic_doc_list);
475	}
476	logger.debug("request = " + XMLConverter.getString(dc_message));
477	Element dc_response_message = (Element) this.mr.process(dc_message);
478	if (processErrorElements(dc_response_message, page_response))
479	{
480	return result;
481	}
482
483	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
484
485	if (expand_document)
486	{
487	// Merge the content with the structure information
488	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
489	for (int i = 0; i < doc_nodes.getLength(); i++)
490	{
491	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
492	if (content != null)
493	{
494	if (highlight_query_terms)
495	{
496	content = highlightQueryTerms(request, (Element) content);
497	}
498	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
499	}
500	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
501	}
502	}
503	else
504	{
505	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
506	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
507	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
508	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
509
510	if (dc_response_doc_content == null)
511	{
512	// no content to add
513	if (dc_response_doc.getAttribute("external").equals("true"))
514	{
515
516	//if (dc_response_doc_external != null)
517	//{
518	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
519
520	the_document.setAttribute("selectedNode", href_id);
521	the_document.setAttribute("external", href_id);
522	}
523	return result;
524	}
525	if (highlight_query_terms)
526	{
527	dc_response_doc.removeChild(dc_response_doc_content);
528
529	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
530	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
531	}
532
533	if (provide_annotations)
534	{
535	String service_selected = (String) params.get(ENRICH_DOC_ARG);
536	if (service_selected != null && service_selected.equals("1"))
537	{
538	// now we can modifiy the response doc if needed
539	String enrich_service = (String) params.get(GSParams.SERVICE);
540	// send a message to the service
541	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
542	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
543	enrich_message.appendChild(enrich_request);
544	// check for parameters
545	HashMap e_service_params = (HashMap) params.get("s1");
546	if (e_service_params != null)
547	{
548	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
549	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
550	enrich_request.appendChild(enrich_pl);
551	}
552	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
553	enrich_request.appendChild(e_doc_list);
554	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
555
556	Node enrich_response = this.mr.process(enrich_message);
557
558	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
559	path = GSPath.createPath(links);
560	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
561
562	}
563	} // if provide_annotations
564
565	// use the returned id rather than the sent one cos there may have
566	// been modifiers such as .pr that are removed.
567	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
568	the_document.setAttribute("selectedNode", modified_doc_id);
569	if (has_dummy)
570	{
571	// change the id if necessary and add the content
572	Element dummy_node = (Element) doc_nodes.item(0);
573
574	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
575	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
576	// hack for simple type
577	if (document_type.equals("simple"))
578	{
579	// we dont want the internal docNode, just want the content and metadata in the document
580	// rethink this!!
581	the_document.removeChild(dummy_node);
582
583	NodeList dummy_children = dummy_node.getChildNodes();
584	//for (int i=0; i<dummy_children.getLength(); i++) {
585	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
586	{
587	// special case as we don't want more than one metadata list
588	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
589	{
590	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
591	}
592	else
593	{
594	the_document.appendChild(dummy_children.item(i));
595	}
596	}
597	}
598	}
599	else
600	{
601	// Merge the document content with the metadata and structure information
602	for (int i = 0; i < doc_nodes.getLength(); i++)
603	{
604	Node dn = doc_nodes.item(i);
605	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
606	if (dn_id.equals(modified_doc_id))
607	{
608	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
609	break;
610	}
611	}
612	}
613	}
614	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
615	return result;
616	}
617
618	/**
619	* tell the param class what its arguments are if an action has its own
620	* arguments, this should add them to the params object - particularly
621	* important for args that should not be saved
622	*/
623	public boolean addActionParameters(GSParams params)
624	{
625	params.addParameter(GOTO_PAGE_ARG, false);
626	params.addParameter(ENRICH_DOC_ARG, false);
627	params.addParameter(EXPAND_DOCUMENT_ARG, false);
628	params.addParameter(EXPAND_CONTENTS_ARG, false);
629	params.addParameter(REALISTIC_BOOK_ARG, false);
630
631	return true;
632	}
633
634	/**
635	* this method gets the collection description, the format info, the list of
636	* enrich services, etc - stuff that is needed for the page, but is the same
637	* whatever the query is - should be cached
638	*/
639	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
640	{
641
642	// create a message to process - contains requests for the collection
643	// description, the format element, the enrich services on offer
644	// these could all be cached
645	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
646	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
647	// the format request - ignore for now, where does this request go to??
648	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
649	info_message.appendChild(format_request);
650
651	// the enrich_services request - only do this if provide_annotations is true
652
653	if (provide_annotations)
654	{
655	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
656	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
657	info_message.appendChild(enrich_services_request);
658	}
659
660	Element info_response = (Element) this.mr.process(info_message);
661
662	// the collection is the first response
663	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
664	Element format_resp = (Element) responses.item(0);
665
666	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
667	if (format_elem != null)
668	{
669	logger.debug("doc action found a format statement");
670	// set teh format type
671	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
672	page_response.appendChild(this.doc.importNode(format_elem, true));
673	}
674
675	if (provide_annotations)
676	{
677	Element services_resp = (Element) responses.item(1);
678
679	// a new message for the mr
680	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
681
682	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
683	boolean service_found = false;
684	for (int j = 0; j < e_services.getLength(); j++)
685	{
686	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
687	{
688	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
689	enrich_message.appendChild(s);
690	service_found = true;
691	}
692	}
693	if (service_found)
694	{
695	Element enrich_response = (Element) this.mr.process(enrich_message);
696
697	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
698	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
699	for (int i = 0; i < e_responses.getLength(); i++)
700	{
701	Element e_resp = (Element) e_responses.item(i);
702	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
703	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
704	service_list.appendChild(e_service);
705	}
706	page_response.appendChild(service_list);
707	}
708	} // if provide_annotations
709	return true;
710
711	}
712
713	/**
714	* this involves a bit of a hack to get the equivalent query terms - has to
715	* requery the query service - uses the last selected service name. (if it
716	* ends in query). should this action do the query or should it send a
717	* message to the query action? but that will involve lots of extra stuff.
718	* also doesn't handle phrases properly - just highlights all the terms
719	* found in the text.
720	*/
721	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
722	{
723	// do the query again to get term info
724	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
725	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
726
727	HashMap previous_params = (HashMap) params.get("p");
728	if (previous_params == null)
729	{
730	return dc_response_doc_content;
731	}
732	String service_name = (String) previous_params.get(GSParams.SERVICE);
733	if (service_name == null \|\| !service_name.endsWith("Query"))
734	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
735	logger.debug("invalid service, not doing highlighting");
736	return dc_response_doc_content;
737	}
738	String collection = (String) params.get(GSParams.COLLECTION);
739	UserContext userContext = new UserContext(request);
740	String to = GSPath.appendLink(collection, service_name);
741
742	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
743	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
744	mr_query_message.appendChild(mr_query_request);
745
746	// paramList
747	HashMap service_params = (HashMap) params.get("s1");
748
749	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
750	GSXML.addParametersToList(this.doc, query_param_list, service_params);
751	mr_query_request.appendChild(query_param_list);
752
753	// do the query
754	Element mr_query_response = (Element) this.mr.process(mr_query_message);
755
756	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
757	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
758	if (query_term_list_element == null)
759	{
760	// no term info
761	logger.error("No query term information.\n");
762	return dc_response_doc_content;
763	}
764
765	String content = GSXML.getNodeText(dc_response_doc_content);
766
767	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
768	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
769
770	HashSet<String> query_term_variants = new HashSet<String>();
771	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
772	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
773	{
774	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
775	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
776	{
777	for (int i = 0; i < terms_nodelist.getLength(); i++)
778	{
779	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
780	String termValueU = null;
781	String termValueL = null;
782
783	if (termValue.length() > 1)
784	{
785	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
786	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
787	}
788	else
789	{
790	termValueU = termValue.substring(0, 1).toUpperCase();
791	termValueL = termValue.substring(0, 1).toLowerCase();
792	}
793
794	query_term_variants.add(termValueU);
795	query_term_variants.add(termValueL);
796	}
797	}
798	}
799	else
800	{
801	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
802	{
803	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
804	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
805	for (int j = 0; j < equivalent_terms.length; j++)
806	{
807	query_term_variants.add(equivalent_terms[j]);
808	}
809	}
810	}
811
812	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
813
814	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
815	String performed_query = GSXML.getNodeText(query_element) + " ";
816
817	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
818	int term_start = 0;
819	boolean in_term = false;
820	boolean in_phrase = false;
821	for (int i = 0; i < performed_query.length(); i++)
822	{
823	char character = performed_query.charAt(i);
824	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
825
826	// Has a query term just started?
827	if (in_term == false && is_character_letter_or_digit == true)
828	{
829	in_term = true;
830	term_start = i;
831	}
832
833	// Or has a term just finished?
834	else if (in_term == true && is_character_letter_or_digit == false)
835	{
836	in_term = false;
837	String term = performed_query.substring(term_start, i);
838
839	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
840	if (term_element != null)
841	{
842
843	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
844
845	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
846	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
847	{
848	String termValueU = null;
849	String termValueL = null;
850
851	if (term.length() > 1)
852	{
853	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
854	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
855	}
856	else
857	{
858	termValueU = term.substring(0, 1).toUpperCase();
859	termValueL = term.substring(0, 1).toLowerCase();
860	}
861
862	phrase_query_p_term_x_variants.add(termValueU);
863	phrase_query_p_term_x_variants.add(termValueL);
864	}
865	else
866	{
867	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
868	{
869	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
870	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
871	for (int k = 0; k < term_equivalent_terms.length; k++)
872	{
873	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
874	}
875	}
876	}
877	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
878
879	if (in_phrase == false)
880	{
881	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
882	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
883	}
884	}
885	}
886	// Watch for phrases (surrounded by quotes)
887	if (character == '\"')
888	{
889	// Has a phrase just started?
890	if (in_phrase == false)
891	{
892	in_phrase = true;
893	}
894	// Or has a phrase just finished?
895	else if (in_phrase == true)
896	{
897	in_phrase = false;
898	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
899	}
900
901	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
902	}
903	}
904
905	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
906	}
907
908	/**
909	* Highlights query terms in a piece of text.
910	*/
911	private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
912	{
913	// Convert the content string to an array of characters for speed
914	char[] content_characters = new char[content.length()];
915	content.getChars(0, content.length(), content_characters, 0);
916
917	// Now skim through the content, identifying word matches
918	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
919	int word_start = 0;
920	boolean in_word = false;
921	boolean preceding_word_matched = false;
922	boolean inTag = false;
923	for (int i = 0; i < content_characters.length; i++)
924	{
925	//We don't want to find words inside HTML tags
926	if (content_characters[i] == '<')
927	{
928	inTag = true;
929	continue;
930	}
931	else if (inTag && content_characters[i] == '>')
932	{
933	inTag = false;
934	}
935	else if (inTag)
936	{
937	continue;
938	}
939
940	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
941
942	// Has a word just started?
943	if (in_word == false && is_character_letter_or_digit == true)
944	{
945	in_word = true;
946	word_start = i;
947	}
948
949	// Or has a word just finished?
950	else if (in_word == true && is_character_letter_or_digit == false)
951	{
952	in_word = false;
953
954	// Check if the word matches any of the query term equivalents
955	String word = new String(content_characters, word_start, (i - word_start));
956	if (query_term_variants.contains(word))
957	{
958	// We have found a matching word, so remember its location
959	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
960	preceding_word_matched = true;
961	}
962	else
963	{
964	preceding_word_matched = false;
965	}
966	}
967	}
968
969	// Don't forget the last word...
970	if (in_word == true)
971	{
972	// Check if the word matches any of the query term equivalents
973	String word = new String(content_characters, word_start, (content_characters.length - word_start));
974	if (query_term_variants.contains(word))
975	{
976	// We have found a matching word, so remember its location
977	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
978	}
979	}
980
981	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
982	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
983
984	// Deal with phrases now
985	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
986	for (int i = 0; i < word_matches.size(); i++)
987	{
988	WordMatch word_match = word_matches.get(i);
989
990	// See if any partial phrase matches are extended by this word
991	if (word_match.preceding_word_matched)
992	{
993	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
994	{
995	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
996	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
997	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
998	if (phrase_query_p_term_x_variants.contains(word_match.word))
999	{
1000	partial_phrase_match.num_words_matched++;
1001
1002	// Has a complete phrase match occurred?
1003	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1004	{
1005	// Check for overlaps by looking at the previous highlight range
1006	if (!highlight_end_positions.isEmpty())
1007	{
1008	int last_highlight_index = highlight_end_positions.size() - 1;
1009	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1010	if (last_highlight_end > partial_phrase_match.start_position)
1011	{
1012	// There is an overlap, so remove the previous phrase match
1013	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1014	highlight_end_positions.remove(last_highlight_index);
1015	partial_phrase_match.start_position = last_highlight_start;
1016	}
1017	}
1018
1019	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1020	highlight_end_positions.add(new Integer(word_match.end_position));
1021	}
1022	// No, but add the partial match back into the list for next time
1023	else
1024	{
1025	partial_phrase_matches.add(partial_phrase_match);
1026	}
1027	}
1028	}
1029	}
1030	else
1031	{
1032	partial_phrase_matches.clear();
1033	}
1034
1035	// See if this word is at the start of any of the phrases
1036	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1037	{
1038	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1039	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1040	if (phrase_query_p_term_1_variants.contains(word_match.word))
1041	{
1042	// If this phrase is just one word long, we have a complete match
1043	if (phrase_query_p_term_variants_list.size() == 1)
1044	{
1045	highlight_start_positions.add(new Integer(word_match.start_position));
1046	highlight_end_positions.add(new Integer(word_match.end_position));
1047	}
1048	// Otherwise we have the start of a potential phrase match
1049	else
1050	{
1051	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1052	}
1053	}
1054	}
1055	}
1056
1057	// Now add the annotation tags into the document at the correct points
1058	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1059
1060	int last_wrote = 0;
1061	for (int i = 0; i < highlight_start_positions.size(); i++)
1062	{
1063	int highlight_start = highlight_start_positions.get(i).intValue();
1064	int highlight_end = highlight_end_positions.get(i).intValue();
1065
1066	// Print anything before the highlight range
1067	if (last_wrote < highlight_start)
1068	{
1069	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1070	content_element.appendChild(this.doc.createTextNode(preceding_text));
1071	}
1072
1073	// Print the highlight text, annotated
1074	if (highlight_end > last_wrote)
1075	{
1076	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1077	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1078	annotation_element.setAttribute("type", "query_term");
1079	content_element.appendChild(annotation_element);
1080	last_wrote = highlight_end;
1081	}
1082	}
1083
1084	// Finish off any unwritten text
1085	if (last_wrote < content_characters.length)
1086	{
1087	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1088	content_element.appendChild(this.doc.createTextNode(remaining_text));
1089	}
1090
1091	return content_element;
1092	}
1093
1094	static private class WordMatch
1095	{
1096	public String word;
1097	public int start_position;
1098	public int end_position;
1099	public boolean preceding_word_matched;
1100
1101	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1102	{
1103	this.word = word;
1104	this.start_position = start_position;
1105	this.end_position = end_position;
1106	this.preceding_word_matched = preceding_word_matched;
1107	}
1108	}
1109
1110	static private class PartialPhraseMatch
1111	{
1112	public int start_position;
1113	public int query_phrase_number;
1114	public int num_words_matched;
1115
1116	public PartialPhraseMatch(int start_position, int query_phrase_number)
1117	{
1118	this.start_position = start_position;
1119	this.query_phrase_number = query_phrase_number;
1120	this.num_words_matched = 1;
1121	}
1122	}
1123	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: