Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 26044

Last change on this file since 26044 was 26044, checked in by kjdon, 12 years ago
removed a couple of debug print statements
Property svn:keywords set to `Author Date Id Revision`
File size: 42.7 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37	import java.io.Serializable;
38
39	import org.apache.log4j.*;
40
41	/** Action class for retrieving Documents via the message router */
42	public class DocumentAction extends Action
43	{
44
45	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
46
47	// this is used to specify that the sibling nodes of a selected one should be obtained
48	public static final String SIBLING_ARG = "sib";
49	public static final String GOTO_PAGE_ARG = "gp";
50	public static final String ENRICH_DOC_ARG = "end";
51	public static final String EXPAND_DOCUMENT_ARG = "ed";
52	public static final String EXPAND_CONTENTS_ARG = "ec";
53	public static final String REALISTIC_BOOK_ARG = "book";
54
55	/**
56	* if this is set to true, when a document is displayed, any annotation type
57	* services (enrich) will be offered to the user as well
58	*/
59	protected boolean provide_annotations = false;
60
61	protected boolean highlight_query_terms = false;
62
63	public boolean configure()
64	{
65	super.configure();
66	String highlight = (String) config_params.get("highlightQueryTerms");
67	if (highlight != null && highlight.equals("true"))
68	{
69	highlight_query_terms = true;
70	}
71	String annotate = (String) config_params.get("displayAnnotationService");
72	if (annotate != null && annotate.equals("true"))
73	{
74	provide_annotations = true;
75	}
76	return true;
77	}
78
79	public Node process(Node message_node)
80	{
81	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
82
83	Element message = this.converter.nodeToElement(message_node);
84
85	// the response
86	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
87	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
88	result.appendChild(page_response);
89
90	// get the request - assume only one
91	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
92	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
93	HashMap<String, Serializable> params = GSXML.extractParams(cgi_paramList, false);
94
95	// just in case there are some that need to get passed to the services
96	HashMap service_params = (HashMap) params.get("s0");
97
98	String collection = (String) params.get(GSParams.COLLECTION);
99	String document_id = (String) params.get(GSParams.DOCUMENT);
100	if (document_id != null && document_id.equals(""))
101	{
102	document_id = null;
103	}
104	String href = (String) params.get(GSParams.HREF);//for an external link : get the href URL if it is existing in the params list
105	if (href != null && href.equals(""))
106	{
107	href = null;
108	}
109	String rl = (String) params.get(GSParams.RELATIVE_LINK);//for an external link : get the rl value if it is existing in the params list
110	if (document_id == null && href == null)
111	{
112	logger.error("no document specified!");
113	return result;
114	}
115	if (rl != null && rl.equals("0"))
116	{
117	// this is a true external link, we should have been directed to a different page or action
118	logger.error("rl value was 0, shouldn't get here");
119	return result;
120	}
121	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
122	if (document_type != null && document_type.equals(""))
123	{
124	//document_type = "hierarchy";
125	document_type = null; // we'll get it later if not already specified
126	}
127	//whether to retrieve siblings or not
128	boolean get_siblings = false;
129	String sibs = (String) params.get(SIBLING_ARG);
130	if (sibs != null && sibs.equals("1"))
131	{
132	get_siblings = true;
133	}
134
135	String doc_id_modifier = "";
136	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
137	if (sibling_num != null && !sibling_num.equals(""))
138	{
139	// we have to modify the doc name
140	doc_id_modifier = "." + sibling_num + ".ss";
141	}
142
143	boolean expand_document = false;
144	String ed_arg = (String) params.get(EXPAND_DOCUMENT_ARG);
145	if (ed_arg != null && ed_arg.equals("1"))
146	{
147	expand_document = true;
148	}
149
150	boolean expand_contents = false;
151	if (expand_document)
152	{ // we always expand the contents with the text
153	expand_contents = true;
154	}
155	else
156	{
157	String ec_arg = (String) params.get(EXPAND_CONTENTS_ARG);
158	if (ec_arg != null && ec_arg.equals("1"))
159	{
160	expand_contents = true;
161	}
162	}
163
164	UserContext userContext = new UserContext(request);
165
166	//append site metadata
167	addSiteMetadata(page_response, userContext);
168	addInterfaceOptions(page_response);
169
170	// get the additional data needed for the page
171	getBackgroundData(page_response, collection, userContext);
172	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
173
174	// the_document is where all the doc info - structure and metadata etc
175	// is added into, to be returned in the page
176	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
177	page_response.appendChild(the_document);
178
179	// create a basic doc list containing the current node
180	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
181	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
182	basic_doc_list.appendChild(current_doc);
183	if (document_id != null)
184	{
185	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_id + doc_id_modifier);
186	}
187	else
188	{
189	current_doc.setAttribute(GSXML.HREF_ID_ATT, href);
190	// do we need this??
191	current_doc.setAttribute(GSXML.ID_MOD_ATT, doc_id_modifier);
192	}
193
194	if (document_type == null)
195	{
196	document_type = getDocumentType(basic_doc_list, collection, userContext, page_response);
197	}
198	if (document_type != null)
199	{
200	// set the doctype from the cgi arg or from the server as an attribute
201	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
202	}
203	else
204	{
205	logger.error("doctype is null!!!***********");
206	}
207
208	// Create a parameter list to specify the required structure information
209	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
210
211	if (service_params != null)
212	{
213	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
214	}
215
216	Element ds_param = null;
217	boolean get_structure = false;
218	boolean get_structure_info = false;
219	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
220	{
221	get_structure_info = true;
222
223	if (expand_contents)
224	{
225	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
226	ds_param_list.appendChild(ds_param);
227	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
228	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
229	}
230
231	// get the info needed for paged naviagtion
232	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
233	ds_param_list.appendChild(ds_param);
234	ds_param.setAttribute(GSXML.NAME_ATT, "info");
235	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
236	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
237	ds_param_list.appendChild(ds_param);
238	ds_param.setAttribute(GSXML.NAME_ATT, "info");
239	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
240	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
241	ds_param_list.appendChild(ds_param);
242	ds_param.setAttribute(GSXML.NAME_ATT, "info");
243	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
244
245	if (get_siblings)
246	{
247	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
248	ds_param_list.appendChild(ds_param);
249	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
250	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
251	}
252
253	}
254	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY) \|\|document_type.equals(GSXML.DOC_TYPE_PAGED_HIERARCHY) )
255	{
256	get_structure = true;
257	if (expand_contents)
258	{
259	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
260	ds_param_list.appendChild(ds_param);
261	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
262	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
263	}
264	else
265	{
266	// get the info needed for table of contents
267	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
268	ds_param_list.appendChild(ds_param);
269	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
270	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
271	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
272	ds_param_list.appendChild(ds_param);
273	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
274	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
275	if (get_siblings)
276	{
277	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
278	ds_param_list.appendChild(ds_param);
279	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
280	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
281	}
282	}
283	}
284	else
285	{
286	// we dont need any structure
287	}
288
289	boolean has_dummy = false;
290	if (get_structure \|\| get_structure_info)
291	{
292
293	// Build a request to obtain the document structure
294	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
295	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
296	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
297	ds_message.appendChild(ds_request);
298	ds_request.appendChild(ds_param_list);
299
300	// add the node list we created earlier
301	ds_request.appendChild(basic_doc_list);
302
303	// Process the document structure retrieve message
304	Element ds_response_message = (Element) this.mr.process(ds_message);
305	if (processErrorElements(ds_response_message, page_response))
306	{
307	return result;
308	}
309
310	// get the info and print out
311	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
312	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
313	path = GSPath.appendLink(path, "nodeStructureInfo");
314	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
315	// get the doc_node bit
316	if (ds_response_struct_info != null)
317	{
318	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
319	}
320	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
321	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
322	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
323	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
324
325	if (ds_response_structure != null)
326	{
327	// add the contents of the structure bit into the_document
328	NodeList structs = ds_response_structure.getChildNodes();
329	for (int i = 0; i < structs.getLength(); i++)
330	{
331	the_document.appendChild(this.doc.importNode(structs.item(i), true));
332	}
333	}
334	else
335	{
336	// no structure nodes, so put in a dummy doc node
337	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
338	if (document_id != null)
339	{
340	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
341	}
342	else
343	{
344	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
345
346	}
347	the_document.appendChild(doc_node);
348	has_dummy = true;
349	}
350	}
351	else
352	{ // a simple type - we dont have a dummy node for simple
353	// should think about this more
354	// no structure request, so just put in a dummy doc node
355	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
356	if (document_id != null)
357	{
358	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id);
359	}
360	else
361	{
362	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);
363	}
364	the_document.appendChild(doc_node);
365	has_dummy = true;
366	}
367
368	// Build a request to obtain some document metadata
369	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
370	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
371	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
372	dm_message.appendChild(dm_request);
373	// Create a parameter list to specify the required metadata information
374
375	HashSet<String> meta_names = new HashSet<String>();
376	meta_names.add("Title"); // the default
377	if (format_elem != null)
378	{
379	getRequiredMetadataNames(format_elem, meta_names);
380	}
381
382	Element extraMetaListElem = (Element) GSXML.getChildByTagName(request, GSXML.EXTRA_METADATA + GSXML.LIST_MODIFIER);
383	if(extraMetaListElem != null)
384	{
385	NodeList extraMetaList = extraMetaListElem.getElementsByTagName(GSXML.EXTRA_METADATA);
386	for(int i = 0; i < extraMetaList.getLength(); i++)
387	{
388	meta_names.add(((Element)extraMetaList.item(i)).getAttribute(GSXML.NAME_ATT));
389	}
390	}
391
392	Element dm_param_list = createMetadataParamList(meta_names);
393	if (service_params != null)
394	{
395	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
396	}
397
398	dm_request.appendChild(dm_param_list);
399
400	// create the doc node list for the metadata request
401	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
402	dm_request.appendChild(dm_doc_list);
403
404	// Add each node from the structure response into the metadata request
405	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
406	for (int i = 0; i < doc_nodes.getLength(); i++)
407	{
408	Element doc_node = (Element) doc_nodes.item(i);
409	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
410
411	// Add the documentNode to the list
412	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
413	dm_doc_list.appendChild(dm_doc_node);
414	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
415	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
416	}
417
418	// we also want a metadata request to the top level document to get
419	// assocfilepath - this could be cached too
420	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
421	dm_message.appendChild(doc_meta_request);
422	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
423	if (service_params != null)
424	{
425	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
426	}
427
428	doc_meta_request.appendChild(doc_meta_param_list);
429	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
430	doc_meta_param_list.appendChild(doc_param);
431	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
432	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
433
434	// create the doc node list for the metadata request
435	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
436	doc_meta_request.appendChild(doc_list);
437
438	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
439	// the node we want is the root document node
440	if (document_id != null)
441	{
442	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_id + ".rt");
443	}
444	else
445	{
446	doc_node.setAttribute(GSXML.HREF_ID_ATT, href);// + ".rt");
447	// can we assume that href is always a top level doc??
448	//doc_node.setAttribute(GSXML.ID_MOD_ATT, ".rt");
449	//doc_node.setAttribute("externalURL", has_rl);
450	}
451	doc_list.appendChild(doc_node);
452
453	Element dm_response_message = (Element) this.mr.process(dm_message);
454	if (processErrorElements(dm_response_message, page_response))
455	{
456	return result;
457	}
458
459	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
460	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
461
462	// Merge the metadata with the structure information
463	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
464	for (int i = 0; i < doc_nodes.getLength(); i++)
465	{
466	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
467	}
468	// get the top level doc metadata out
469	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
470	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
471	GSXML.mergeMetadataLists(the_document, top_doc_node);
472
473	// Build a request to obtain some document content
474	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
475	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
476	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
477	dc_message.appendChild(dc_request);
478
479	// Create a parameter list to specify the request parameters - empty for now
480	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
481	if (service_params != null)
482	{
483	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
484	}
485
486	dc_request.appendChild(dc_param_list);
487
488	// get the content
489	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
490	if (expand_document)
491	{
492	dc_request.appendChild(dm_doc_list);
493	}
494	else
495	{
496	dc_request.appendChild(basic_doc_list);
497	}
498	logger.debug("request = " + XMLConverter.getString(dc_message));
499	Element dc_response_message = (Element) this.mr.process(dc_message);
500	if (processErrorElements(dc_response_message, page_response))
501	{
502	return result;
503	}
504
505	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
506
507	if (expand_document)
508	{
509	// Merge the content with the structure information
510	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
511	for (int i = 0; i < doc_nodes.getLength(); i++)
512	{
513	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
514	if (content != null)
515	{
516	if (highlight_query_terms)
517	{
518	content = highlightQueryTerms(request, (Element) content);
519	}
520	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
521	}
522	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
523	}
524	}
525	else
526	{
527	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
528	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
529	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
530	//Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
531
532	if (dc_response_doc_content == null)
533	{
534	// no content to add
535	if (dc_response_doc.getAttribute("external").equals("true"))
536	{
537
538	//if (dc_response_doc_external != null)
539	//{
540	String href_id = dc_response_doc.getAttribute(GSXML.HREF_ID_ATT);
541
542	the_document.setAttribute("selectedNode", href_id);
543	the_document.setAttribute("external", href_id);
544	}
545	return result;
546	}
547	if (highlight_query_terms)
548	{
549	dc_response_doc.removeChild(dc_response_doc_content);
550
551	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
552	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
553	}
554
555	if (provide_annotations)
556	{
557	String service_selected = (String) params.get(ENRICH_DOC_ARG);
558	if (service_selected != null && service_selected.equals("1"))
559	{
560	// now we can modifiy the response doc if needed
561	String enrich_service = (String) params.get(GSParams.SERVICE);
562	// send a message to the service
563	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
564	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, userContext);
565	enrich_message.appendChild(enrich_request);
566	// check for parameters
567	HashMap e_service_params = (HashMap) params.get("s1");
568	if (e_service_params != null)
569	{
570	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
571	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
572	enrich_request.appendChild(enrich_pl);
573	}
574	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
575	enrich_request.appendChild(e_doc_list);
576	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
577
578	Node enrich_response = this.mr.process(enrich_message);
579
580	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
581	path = GSPath.createPath(links);
582	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
583
584	}
585	} // if provide_annotations
586
587	// use the returned id rather than the sent one cos there may have
588	// been modifiers such as .pr that are removed.
589	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
590	the_document.setAttribute("selectedNode", modified_doc_id);
591	if (has_dummy)
592	{
593	// change the id if necessary and add the content
594	Element dummy_node = (Element) doc_nodes.item(0);
595
596	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
597	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
598	// hack for simple type
599	if (document_type.equals("simple"))
600	{
601	// we dont want the internal docNode, just want the content and metadata in the document
602	// rethink this!!
603	the_document.removeChild(dummy_node);
604
605	NodeList dummy_children = dummy_node.getChildNodes();
606	//for (int i=0; i<dummy_children.getLength(); i++) {
607	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
608	{
609	// special case as we don't want more than one metadata list
610	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
611	{
612	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
613	}
614	else
615	{
616	the_document.appendChild(dummy_children.item(i));
617	}
618	}
619	}
620	}
621	else
622	{
623	// Merge the document content with the metadata and structure information
624	for (int i = 0; i < doc_nodes.getLength(); i++)
625	{
626	Node dn = doc_nodes.item(i);
627	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
628	if (dn_id.equals(modified_doc_id))
629	{
630	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
631	break;
632	}
633	}
634	}
635	}
636	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
637	return result;
638	}
639
640	/**
641	* tell the param class what its arguments are if an action has its own
642	* arguments, this should add them to the params object - particularly
643	* important for args that should not be saved
644	*/
645	public boolean addActionParameters(GSParams params)
646	{
647	params.addParameter(GOTO_PAGE_ARG, false);
648	params.addParameter(ENRICH_DOC_ARG, false);
649	params.addParameter(EXPAND_DOCUMENT_ARG, false);
650	params.addParameter(EXPAND_CONTENTS_ARG, false);
651	params.addParameter(REALISTIC_BOOK_ARG, false);
652
653	return true;
654	}
655
656	/**
657	* this method gets the collection description, the format info, the list of
658	* enrich services, etc - stuff that is needed for the page, but is the same
659	* whatever the query is - should be cached
660	*/
661	protected boolean getBackgroundData(Element page_response, String collection, UserContext userContext)
662	{
663
664	// create a message to process - contains requests for the collection
665	// description, the format element, the enrich services on offer
666	// these could all be cached
667	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
668	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
669	// the format request - ignore for now, where does this request go to??
670	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, userContext);
671	info_message.appendChild(format_request);
672
673	// the enrich_services request - only do this if provide_annotations is true
674
675	if (provide_annotations)
676	{
677	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", userContext);
678	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
679	info_message.appendChild(enrich_services_request);
680	}
681
682	Element info_response = (Element) this.mr.process(info_message);
683
684	// the collection is the first response
685	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
686	Element format_resp = (Element) responses.item(0);
687
688	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
689	if (format_elem != null)
690	{
691	Element global_format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.GLOBAL_FORMAT_ELEM);
692	if(global_format_elem != null)
693	{
694	GSXSLT.mergeFormatElements(format_elem, global_format_elem, false);
695	}
696
697	// set the format type
698	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
699	page_response.appendChild(this.doc.importNode(format_elem, true));
700	}
701
702	if (provide_annotations)
703	{
704	Element services_resp = (Element) responses.item(1);
705
706	// a new message for the mr
707	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
708	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
709	boolean service_found = false;
710	for (int j = 0; j < e_services.getLength(); j++)
711	{
712	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
713	{
714	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), userContext);
715	enrich_message.appendChild(s);
716	service_found = true;
717	}
718	}
719	if (service_found)
720	{
721	Element enrich_response = (Element) this.mr.process(enrich_message);
722
723	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
724	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
725	for (int i = 0; i < e_responses.getLength(); i++)
726	{
727	Element e_resp = (Element) e_responses.item(i);
728	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
729	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
730	service_list.appendChild(e_service);
731	}
732	page_response.appendChild(service_list);
733	}
734	} // if provide_annotations
735	return true;
736
737	}
738
739	protected String getDocumentType(Element basic_doc_list, String collection, UserContext userContext, Element page_response)
740	{
741	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
742	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
743	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
744	ds_message.appendChild(ds_request);
745
746	// Create a parameter list to specify the required structure information
747	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
748	Element ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
749	ds_param_list.appendChild(ds_param);
750	ds_param.setAttribute(GSXML.NAME_ATT, "info");
751	ds_param.setAttribute(GSXML.VALUE_ATT, "documentType");
752
753	ds_request.appendChild(ds_param_list);
754
755	// add the node list we created earlier
756	ds_request.appendChild(basic_doc_list);
757
758	// Process the document structure retrieve message
759	Element ds_response_message = (Element) this.mr.process(ds_message);
760	if (processErrorElements(ds_response_message, page_response))
761	{
762	return null;
763	}
764
765	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, "nodeStructureInfo" };
766	String path = GSPath.createPath(links);
767	Element info_elem = (Element) GSXML.getNodeByPath(ds_response_message, path);
768	Element doctype_elem = GSXML.getNamedElement(info_elem, "info", "name", "documentType");
769	if (doctype_elem != null)
770	{
771	String doc_type = doctype_elem.getAttribute("value");
772	return doc_type;
773	}
774	return null;
775	}
776
777	/**
778	* this involves a bit of a hack to get the equivalent query terms - has to
779	* requery the query service - uses the last selected service name. (if it
780	* ends in query). should this action do the query or should it send a
781	* message to the query action? but that will involve lots of extra stuff.
782	* also doesn't handle phrases properly - just highlights all the terms
783	* found in the text.
784	*/
785	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
786	{
787	// do the query again to get term info
788	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
789	HashMap<String, Serializable> params = GSXML.extractParams(cgi_param_list, false);
790
791	HashMap previous_params = (HashMap) params.get("p");
792	if (previous_params == null)
793	{
794	return dc_response_doc_content;
795	}
796	String service_name = (String) previous_params.get(GSParams.SERVICE);
797	if (service_name == null \|\| !service_name.endsWith("Query"))
798	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
799	logger.debug("invalid service, not doing highlighting");
800	return dc_response_doc_content;
801	}
802	String collection = (String) params.get(GSParams.COLLECTION);
803	UserContext userContext = new UserContext(request);
804	String to = GSPath.appendLink(collection, service_name);
805
806	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
807	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, userContext);
808	mr_query_message.appendChild(mr_query_request);
809
810	// paramList
811	HashMap service_params = (HashMap) params.get("s1");
812
813	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
814	GSXML.addParametersToList(this.doc, query_param_list, service_params);
815	mr_query_request.appendChild(query_param_list);
816
817	// do the query
818	Element mr_query_response = (Element) this.mr.process(mr_query_message);
819
820	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
821	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
822	if (query_term_list_element == null)
823	{
824	// no term info
825	logger.error("No query term information.\n");
826	return dc_response_doc_content;
827	}
828
829	String content = GSXML.getNodeText(dc_response_doc_content);
830
831	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
832	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
833
834	HashSet<String> query_term_variants = new HashSet<String>();
835	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
836	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
837	{
838	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
839	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
840	{
841	for (int i = 0; i < terms_nodelist.getLength(); i++)
842	{
843	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
844	String termValueU = null;
845	String termValueL = null;
846
847	if (termValue.length() > 1)
848	{
849	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
850	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
851	}
852	else
853	{
854	termValueU = termValue.substring(0, 1).toUpperCase();
855	termValueL = termValue.substring(0, 1).toLowerCase();
856	}
857
858	query_term_variants.add(termValueU);
859	query_term_variants.add(termValueL);
860	}
861	}
862	}
863	else
864	{
865	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
866	{
867	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
868	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
869	for (int j = 0; j < equivalent_terms.length; j++)
870	{
871	query_term_variants.add(equivalent_terms[j]);
872	}
873	}
874	}
875
876	ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy = new ArrayList<ArrayList<HashSet<String>>>();
877
878	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
879	String performed_query = GSXML.getNodeText(query_element) + " ";
880
881	ArrayList<HashSet<String>> phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
882	int term_start = 0;
883	boolean in_term = false;
884	boolean in_phrase = false;
885	for (int i = 0; i < performed_query.length(); i++)
886	{
887	char character = performed_query.charAt(i);
888	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
889
890	// Has a query term just started?
891	if (in_term == false && is_character_letter_or_digit == true)
892	{
893	in_term = true;
894	term_start = i;
895	}
896
897	// Or has a term just finished?
898	else if (in_term == true && is_character_letter_or_digit == false)
899	{
900	in_term = false;
901	String term = performed_query.substring(term_start, i);
902
903	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
904	if (term_element != null)
905	{
906
907	HashSet<String> phrase_query_p_term_x_variants = new HashSet<String>();
908
909	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
910	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
911	{
912	String termValueU = null;
913	String termValueL = null;
914
915	if (term.length() > 1)
916	{
917	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
918	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
919	}
920	else
921	{
922	termValueU = term.substring(0, 1).toUpperCase();
923	termValueL = term.substring(0, 1).toLowerCase();
924	}
925
926	phrase_query_p_term_x_variants.add(termValueU);
927	phrase_query_p_term_x_variants.add(termValueL);
928	}
929	else
930	{
931	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
932	{
933	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
934	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
935	for (int k = 0; k < term_equivalent_terms.length; k++)
936	{
937	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
938	}
939	}
940	}
941	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
942
943	if (in_phrase == false)
944	{
945	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
946	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
947	}
948	}
949	}
950	// Watch for phrases (surrounded by quotes)
951	if (character == '\"')
952	{
953	// Has a phrase just started?
954	if (in_phrase == false)
955	{
956	in_phrase = true;
957	}
958	// Or has a phrase just finished?
959	else if (in_phrase == true)
960	{
961	in_phrase = false;
962	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
963	}
964
965	phrase_query_p_term_variants_list = new ArrayList<HashSet<String>>();
966	}
967	}
968
969	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
970	}
971
972	/**
973	* Highlights query terms in a piece of text.
974	*/
975	private Element highlightQueryTermsInternal(String content, HashSet<String> query_term_variants, ArrayList<ArrayList<HashSet<String>>> phrase_query_term_variants_hierarchy)
976	{
977	// Convert the content string to an array of characters for speed
978	char[] content_characters = new char[content.length()];
979	content.getChars(0, content.length(), content_characters, 0);
980
981	// Now skim through the content, identifying word matches
982	ArrayList<WordMatch> word_matches = new ArrayList<WordMatch>();
983	int word_start = 0;
984	boolean in_word = false;
985	boolean preceding_word_matched = false;
986	boolean inTag = false;
987	for (int i = 0; i < content_characters.length; i++)
988	{
989	//We don't want to find words inside HTML tags
990	if (content_characters[i] == '<')
991	{
992	inTag = true;
993	continue;
994	}
995	else if (inTag && content_characters[i] == '>')
996	{
997	inTag = false;
998	}
999	else if (inTag)
1000	{
1001	continue;
1002	}
1003
1004	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
1005
1006	// Has a word just started?
1007	if (in_word == false && is_character_letter_or_digit == true)
1008	{
1009	in_word = true;
1010	word_start = i;
1011	}
1012
1013	// Or has a word just finished?
1014	else if (in_word == true && is_character_letter_or_digit == false)
1015	{
1016	in_word = false;
1017
1018	// Check if the word matches any of the query term equivalents
1019	String word = new String(content_characters, word_start, (i - word_start));
1020	if (query_term_variants.contains(word))
1021	{
1022	// We have found a matching word, so remember its location
1023	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
1024	preceding_word_matched = true;
1025	}
1026	else
1027	{
1028	preceding_word_matched = false;
1029	}
1030	}
1031	}
1032
1033	// Don't forget the last word...
1034	if (in_word == true)
1035	{
1036	// Check if the word matches any of the query term equivalents
1037	String word = new String(content_characters, word_start, (content_characters.length - word_start));
1038	if (query_term_variants.contains(word))
1039	{
1040	// We have found a matching word, so remember its location
1041	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
1042	}
1043	}
1044
1045	ArrayList<Integer> highlight_start_positions = new ArrayList<Integer>();
1046	ArrayList<Integer> highlight_end_positions = new ArrayList<Integer>();
1047
1048	// Deal with phrases now
1049	ArrayList<PartialPhraseMatch> partial_phrase_matches = new ArrayList<PartialPhraseMatch>();
1050	for (int i = 0; i < word_matches.size(); i++)
1051	{
1052	WordMatch word_match = word_matches.get(i);
1053
1054	// See if any partial phrase matches are extended by this word
1055	if (word_match.preceding_word_matched)
1056	{
1057	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
1058	{
1059	PartialPhraseMatch partial_phrase_match = partial_phrase_matches.remove(j);
1060	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
1061	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
1062	if (phrase_query_p_term_x_variants.contains(word_match.word))
1063	{
1064	partial_phrase_match.num_words_matched++;
1065
1066	// Has a complete phrase match occurred?
1067	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
1068	{
1069	// Check for overlaps by looking at the previous highlight range
1070	if (!highlight_end_positions.isEmpty())
1071	{
1072	int last_highlight_index = highlight_end_positions.size() - 1;
1073	int last_highlight_end = highlight_end_positions.get(last_highlight_index).intValue();
1074	if (last_highlight_end > partial_phrase_match.start_position)
1075	{
1076	// There is an overlap, so remove the previous phrase match
1077	int last_highlight_start = highlight_start_positions.remove(last_highlight_index).intValue();
1078	highlight_end_positions.remove(last_highlight_index);
1079	partial_phrase_match.start_position = last_highlight_start;
1080	}
1081	}
1082
1083	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
1084	highlight_end_positions.add(new Integer(word_match.end_position));
1085	}
1086	// No, but add the partial match back into the list for next time
1087	else
1088	{
1089	partial_phrase_matches.add(partial_phrase_match);
1090	}
1091	}
1092	}
1093	}
1094	else
1095	{
1096	partial_phrase_matches.clear();
1097	}
1098
1099	// See if this word is at the start of any of the phrases
1100	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1101	{
1102	ArrayList phrase_query_p_term_variants_list = phrase_query_term_variants_hierarchy.get(p);
1103	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1104	if (phrase_query_p_term_1_variants.contains(word_match.word))
1105	{
1106	// If this phrase is just one word long, we have a complete match
1107	if (phrase_query_p_term_variants_list.size() == 1)
1108	{
1109	highlight_start_positions.add(new Integer(word_match.start_position));
1110	highlight_end_positions.add(new Integer(word_match.end_position));
1111	}
1112	// Otherwise we have the start of a potential phrase match
1113	else
1114	{
1115	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1116	}
1117	}
1118	}
1119	}
1120
1121	// Now add the annotation tags into the document at the correct points
1122	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1123
1124	int last_wrote = 0;
1125	for (int i = 0; i < highlight_start_positions.size(); i++)
1126	{
1127	int highlight_start = highlight_start_positions.get(i).intValue();
1128	int highlight_end = highlight_end_positions.get(i).intValue();
1129
1130	// Print anything before the highlight range
1131	if (last_wrote < highlight_start)
1132	{
1133	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1134	content_element.appendChild(this.doc.createTextNode(preceding_text));
1135	}
1136
1137	// Print the highlight text, annotated
1138	if (highlight_end > last_wrote)
1139	{
1140	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1141	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1142	annotation_element.setAttribute("type", "query_term");
1143	content_element.appendChild(annotation_element);
1144	last_wrote = highlight_end;
1145	}
1146	}
1147
1148	// Finish off any unwritten text
1149	if (last_wrote < content_characters.length)
1150	{
1151	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1152	content_element.appendChild(this.doc.createTextNode(remaining_text));
1153	}
1154
1155	return content_element;
1156	}
1157
1158	static private class WordMatch
1159	{
1160	public String word;
1161	public int start_position;
1162	public int end_position;
1163	public boolean preceding_word_matched;
1164
1165	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1166	{
1167	this.word = word;
1168	this.start_position = start_position;
1169	this.end_position = end_position;
1170	this.preceding_word_matched = preceding_word_matched;
1171	}
1172	}
1173
1174	static private class PartialPhraseMatch
1175	{
1176	public int start_position;
1177	public int query_phrase_number;
1178	public int num_words_matched;
1179
1180	public PartialPhraseMatch(int start_position, int query_phrase_number)
1181	{
1182	this.start_position = start_position;
1183	this.query_phrase_number = query_phrase_number;
1184	this.num_words_matched = 1;
1185	}
1186	}
1187	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: