Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 24889

Last change on this file since 24889 was 24889, checked in by sjm84, 12 years ago
Adjusted how metadata names (to return) are aquired
Property svn:keywords set to `Author Date Id Revision`
File size: 39.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router */
41	public class DocumentAction extends Action
42	{
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/**
52	* if this is set to true, when a document is displayed, any annotation type
53	* services (enrich) will be offered to the user as well
54	*/
55	protected boolean provide_annotations = false;
56
57	protected boolean highlight_query_terms = false;
58
59	public boolean configure()
60	{
61	super.configure();
62	String highlight = (String) config_params.get("highlightQueryTerms");
63	if (highlight != null && highlight.equals("true"))
64	{
65	highlight_query_terms = true;
66	}
67	String annotate = (String) config_params.get("displayAnnotationService");
68	if (annotate != null && annotate.equals("true"))
69	{
70	provide_annotations = true;
71	}
72	return true;
73	}
74
75	public Node process(Node message_node)
76	{
77	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
78
79	Element message = this.converter.nodeToElement(message_node);
80
81	// the response
82	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
83	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
84	result.appendChild(page_response);
85
86	// get the request - assume only one
87	Element request = (Element) GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
88	Element cgi_paramList = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
89	HashMap params = GSXML.extractParams(cgi_paramList, false);
90
91	// just in case there are some that need to get passed to the services
92	HashMap service_params = (HashMap) params.get("s0");
93
94	String has_rl = null;
95	String has_href = null;
96	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
97	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
98	String collection = (String) params.get(GSParams.COLLECTION);
99	String lang = request.getAttribute(GSXML.LANG_ATT);
100	String uid = request.getAttribute(GSXML.USER_ID_ATT);
101	String document_name = (String) params.get(GSParams.DOCUMENT);
102	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals("")))
103	{
104	logger.error("no document specified!");
105	return result;
106	}
107	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
108	if (document_type == null)
109	{
110	document_type = "simple";
111	}
112	//whether to retrieve siblings or not
113	boolean get_siblings = false;
114	String sibs = (String) params.get(SIBLING_ARG);
115	if (sibs != null && sibs.equals("1"))
116	{
117	get_siblings = true;
118	}
119
120	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
121	if (sibling_num != null && !sibling_num.equals(""))
122	{
123	// we have to modify the doc name
124	document_name = document_name + "." + sibling_num + ".ss";
125	}
126
127	boolean expand_document = false;
128	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
129	if (ed_arg != null && ed_arg.equals("1"))
130	{
131	expand_document = true;
132	}
133
134	boolean expand_contents = false;
135	if (expand_document)
136	{ // we always expand the contents with the text
137	expand_contents = true;
138	}
139	else
140	{
141	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
142	if (ec_arg != null && ec_arg.equals("1"))
143	{
144	expand_contents = true;
145	}
146	}
147
148	//append site metadata
149	addSiteMetadata(page_response, lang, uid);
150
151	// get the additional data needed for the page
152	getBackgroundData(page_response, collection, lang, uid);
153	Element format_elem = (Element) GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
154
155	// the_document is where all the doc info - structure and metadata etc
156	// is added into, to be returned in the page
157	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
158	page_response.appendChild(the_document);
159
160	// set the doctype from the cgi arg as an attribute
161	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
162
163	// create a basic doc list containing the current node
164	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
165	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
166	basic_doc_list.appendChild(current_doc);
167	if (document_name.length() != 0)
168	{
169	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
170	}
171	else if (has_href.length() != 0)
172	{
173	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
174	current_doc.setAttribute("externalURL", has_rl);
175	}
176
177	// Create a parameter list to specify the required structure information
178	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
179
180	if (service_params != null)
181	{
182	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
183	}
184
185	Element ds_param = null;
186	boolean get_structure = false;
187	boolean get_structure_info = false;
188	if (document_type.equals(GSXML.DOC_TYPE_PAGED))
189	{
190	get_structure_info = true;
191
192	if (expand_contents)
193	{
194	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
195	ds_param_list.appendChild(ds_param);
196	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
197	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
198	}
199
200	// get teh info needed for paged naviagtion
201	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
202	ds_param_list.appendChild(ds_param);
203	ds_param.setAttribute(GSXML.NAME_ATT, "info");
204	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
205	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206	ds_param_list.appendChild(ds_param);
207	ds_param.setAttribute(GSXML.NAME_ATT, "info");
208	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
209	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
210	ds_param_list.appendChild(ds_param);
211	ds_param.setAttribute(GSXML.NAME_ATT, "info");
212	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
213
214	if (get_siblings)
215	{
216	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
217	ds_param_list.appendChild(ds_param);
218	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
219	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
220	}
221
222	}
223	else if (document_type.equals(GSXML.DOC_TYPE_HIERARCHY))
224	{
225	get_structure = true;
226	if (expand_contents)
227	{
228	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
229	ds_param_list.appendChild(ds_param);
230	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
231	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
232	}
233	else
234	{
235	// get the info needed for table of contents
236	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
237	ds_param_list.appendChild(ds_param);
238	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
239	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
240	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
241	ds_param_list.appendChild(ds_param);
242	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
243	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
244	if (get_siblings)
245	{
246	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
247	ds_param_list.appendChild(ds_param);
248	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
249	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
250	}
251	}
252	}
253	else
254	{
255	// we dont need any structure
256	}
257
258	boolean has_dummy = false;
259	if (get_structure \|\| get_structure_info)
260	{
261
262	// Build a request to obtain the document structure
263	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
264	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
265	Element ds_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
266	ds_message.appendChild(ds_request);
267	ds_request.appendChild(ds_param_list);
268
269	// create a doc_node_list and put in the doc_node that we are interested in
270	ds_request.appendChild(basic_doc_list);
271
272	// Process the document structure retrieve message
273	Element ds_response_message = (Element) this.mr.process(ds_message);
274	if (processErrorElements(ds_response_message, page_response))
275	{
276	return result;
277	}
278
279	// get the info and print out
280	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
281	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
282	path = GSPath.appendLink(path, "nodeStructureInfo");
283	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
284	// get the doc_node bit
285	if (ds_response_struct_info != null)
286	{
287	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
288	}
289	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
290	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
291	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
292	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
293
294	if (ds_response_structure != null)
295	{
296	// add the contents of the structure bit into the_document
297	NodeList structs = ds_response_structure.getChildNodes();
298	for (int i = 0; i < structs.getLength(); i++)
299	{
300	the_document.appendChild(this.doc.importNode(structs.item(i), true));
301	}
302	}
303	else
304	{
305	// no structure nodes, so put in a dummy doc node
306	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
307	if (document_name.length() != 0)
308	{
309	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
310	}
311	else if (has_href.length() != 0)
312	{
313	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
314	doc_node.setAttribute("externalURL", has_rl);
315	}
316	the_document.appendChild(doc_node);
317	has_dummy = true;
318	}
319	}
320	else
321	{ // a simple type - we dont have a dummy node for simple
322	// should think about this more
323	// no structure request, so just put in a dummy doc node
324	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
325	if (document_name.length() != 0)
326	{
327	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
328	}
329	else if (has_href.length() != 0)
330	{
331	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
332	doc_node.setAttribute("externalURL", has_rl);
333	}
334	the_document.appendChild(doc_node);
335	has_dummy = true;
336	}
337
338	// Build a request to obtain some document metadata
339	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
340	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
341	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
342	dm_message.appendChild(dm_request);
343	// Create a parameter list to specify the required metadata information
344
345	HashSet meta_names = new HashSet();
346	meta_names.add("Title"); // the default
347	if (format_elem != null)
348	{
349	getRequiredMetadataNames(format_elem, meta_names);
350	}
351
352	Element dm_param_list = createMetadataParamList(meta_names);
353	if (service_params != null)
354	{
355	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
356	}
357
358	dm_request.appendChild(dm_param_list);
359
360	// create the doc node list for the metadata request
361	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
362	dm_request.appendChild(dm_doc_list);
363
364	// Add each node from the structure response into the metadata request
365	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
366	for (int i = 0; i < doc_nodes.getLength(); i++)
367	{
368	Element doc_node = (Element) doc_nodes.item(i);
369	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
370
371	// Add the documentNode to the list
372	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
373	dm_doc_list.appendChild(dm_doc_node);
374	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
375	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT, doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
376	}
377
378	// we also want a metadata request to the top level document to get
379	// assocfilepath - this could be cached too
380	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
381	dm_message.appendChild(doc_meta_request);
382	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
383	if (service_params != null)
384	{
385	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
386	}
387
388	doc_meta_request.appendChild(doc_meta_param_list);
389	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
390	doc_meta_param_list.appendChild(doc_param);
391	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
392	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
393
394	// create the doc node list for the metadata request
395	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
396	doc_meta_request.appendChild(doc_list);
397
398	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
399	// the node we want is the root document node
400	if (document_name.length() != 0)
401	{
402	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name + ".rt");
403	}
404	else if (has_href.length() != 0)
405	{
406	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href + ".rt");
407	doc_node.setAttribute("externalURL", has_rl);
408	}
409	doc_list.appendChild(doc_node);
410
411	Element dm_response_message = (Element) this.mr.process(dm_message);
412	if (processErrorElements(dm_response_message, page_response))
413	{
414	return result;
415	}
416
417	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
418	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
419
420	// Merge the metadata with the structure information
421	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
422	for (int i = 0; i < doc_nodes.getLength(); i++)
423	{
424	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
425	}
426	// get the top level doc metadata out
427	Element doc_meta_response = (Element) dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
428	Element top_doc_node = (Element) GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
429	GSXML.mergeMetadataLists(the_document, top_doc_node);
430
431	// Build a request to obtain some document content
432	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
433	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
434	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
435	dc_message.appendChild(dc_request);
436
437	// Create a parameter list to specify the request parameters - empty for now
438	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
439	if (service_params != null)
440	{
441	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
442	}
443
444	dc_request.appendChild(dc_param_list);
445
446	// get the content
447	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
448	if (expand_document)
449	{
450	dc_request.appendChild(dm_doc_list);
451	}
452	else
453	{
454	dc_request.appendChild(basic_doc_list);
455	}
456	logger.debug("request = " + converter.getString(dc_message));
457	Element dc_response_message = (Element) this.mr.process(dc_message);
458	if (processErrorElements(dc_response_message, page_response))
459	{
460	return result;
461	}
462
463	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
464
465	if (expand_document)
466	{
467	// Merge the content with the structure information
468	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
469	for (int i = 0; i < doc_nodes.getLength(); i++)
470	{
471	Node content = GSXML.getChildByTagName((Element) dc_response_docs.item(i), "nodeContent");
472	if (content != null)
473	{
474	if (highlight_query_terms)
475	{
476	content = highlightQueryTerms(request, (Element) content);
477	}
478	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
479	}
480	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
481	}
482	}
483	else
484	{
485	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
486	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
487	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
488	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
489
490	if (dc_response_doc_content == null)
491	{
492	// no content to add
493	if (dc_response_doc_external != null)
494	{
495	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
496
497	the_document.setAttribute("selectedNode", modified_doc_id);
498	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
499	}
500	return result;
501	}
502	if (highlight_query_terms)
503	{
504	dc_response_doc.removeChild(dc_response_doc_content);
505
506	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
507	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
508	}
509
510	if (provide_annotations)
511	{
512	String service_selected = (String) params.get(ENRICH_DOC_ARG);
513	if (service_selected != null && service_selected.equals("1"))
514	{
515	// now we can modifiy the response doc if needed
516	String enrich_service = (String) params.get(GSParams.SERVICE);
517	// send a message to the service
518	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
519	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
520	enrich_message.appendChild(enrich_request);
521	// check for parameters
522	HashMap e_service_params = (HashMap) params.get("s1");
523	if (e_service_params != null)
524	{
525	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
526	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
527	enrich_request.appendChild(enrich_pl);
528	}
529	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER);
530	enrich_request.appendChild(e_doc_list);
531	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
532
533	Node enrich_response = this.mr.process(enrich_message);
534
535	String[] links = { GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM + GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM };
536	path = GSPath.createPath(links);
537	dc_response_doc_content = (Element) GSXML.getNodeByPath(enrich_response, path);
538
539	}
540	} // if provide_annotations
541
542	// use the returned id rather than the sent one cos there may have
543	// been modifiers such as .pr that are removed.
544	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
545	the_document.setAttribute("selectedNode", modified_doc_id);
546	if (has_dummy)
547	{
548	// change the id if necessary and add the content
549	Element dummy_node = (Element) doc_nodes.item(0);
550
551	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
552	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
553	// hack for simple type
554	if (document_type.equals("simple"))
555	{
556	// we dont want the internal docNode, just want the content and metadata in the document
557	// rethink this!!
558	the_document.removeChild(dummy_node);
559
560	NodeList dummy_children = dummy_node.getChildNodes();
561	//for (int i=0; i<dummy_children.getLength(); i++) {
562	for (int i = dummy_children.getLength() - 1; i >= 0; i--)
563	{
564	// special case as we don't want more than one metadata list
565	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER))
566	{
567	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
568	}
569	else
570	{
571	the_document.appendChild(dummy_children.item(i));
572	}
573	}
574	}
575	}
576	else
577	{
578	// Merge the document content with the metadata and structure information
579	for (int i = 0; i < doc_nodes.getLength(); i++)
580	{
581	Node dn = doc_nodes.item(i);
582	String dn_id = ((Element) dn).getAttribute(GSXML.NODE_ID_ATT);
583	if (dn_id.equals(modified_doc_id))
584	{
585	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
586	break;
587	}
588	}
589	}
590	}
591	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
592	return result;
593	}
594
595	/**
596	* tell the param class what its arguments are if an action has its own
597	* arguments, this should add them to the params object - particularly
598	* important for args that should not be saved
599	*/
600	public boolean getActionParameters(GSParams params)
601	{
602	params.addParameter(GOTO_PAGE_ARG, false);
603	params.addParameter(ENRICH_DOC_ARG, false);
604	return true;
605	}
606
607	/**
608	* this method gets the collection description, the format info, the list of
609	* enrich services, etc - stuff that is needed for the page, but is the same
610	* whatever the query is - should be cached
611	*/
612	protected boolean getBackgroundData(Element page_response, String collection, String lang, String uid)
613	{
614
615	// create a message to process - contains requests for the collection
616	// description, the format element, the enrich services on offer
617	// these could all be cached
618	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
619	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
620	// the format request - ignore for now, where does this request go to??
621	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
622	info_message.appendChild(format_request);
623
624	// the enrich_services request - only do this if provide_annotations is true
625
626	if (provide_annotations)
627	{
628	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
629	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
630	info_message.appendChild(enrich_services_request);
631	}
632
633	Element info_response = (Element) this.mr.process(info_message);
634
635	// the collection is the first response
636	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
637	Element format_resp = (Element) responses.item(0);
638
639	Element format_elem = (Element) GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
640	if (format_elem != null)
641	{
642	logger.debug("doc action found a format statement");
643	// set teh format type
644	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
645	page_response.appendChild(this.doc.importNode(format_elem, true));
646	}
647
648	if (provide_annotations)
649	{
650	Element services_resp = (Element) responses.item(1);
651
652	// a new message for the mr
653	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
654
655	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
656	boolean service_found = false;
657	for (int j = 0; j < e_services.getLength(); j++)
658	{
659	if (((Element) e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich"))
660	{
661	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element) e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
662	enrich_message.appendChild(s);
663	service_found = true;
664	}
665	}
666	if (service_found)
667	{
668	Element enrich_response = (Element) this.mr.process(enrich_message);
669
670	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
671	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
672	for (int i = 0; i < e_responses.getLength(); i++)
673	{
674	Element e_resp = (Element) e_responses.item(i);
675	Element e_service = (Element) this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
676	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
677	service_list.appendChild(e_service);
678	}
679	page_response.appendChild(service_list);
680	}
681	} // if provide_annotations
682	return true;
683
684	}
685
686	/**
687	* this involves a bit of a hack to get the equivalent query terms - has to
688	* requery the query service - uses the last selected service name. (if it
689	* ends in query). should this action do the query or should it send a
690	* message to the query action? but that will involve lots of extra stuff.
691	* also doesn't handle phrases properly - just highlights all the terms
692	* found in the text.
693	*/
694	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content)
695	{
696
697	// do the query again to get term info
698	Element cgi_param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
699	HashMap params = GSXML.extractParams(cgi_param_list, false);
700
701	HashMap previous_params = (HashMap) params.get("p");
702	if (previous_params == null)
703	{
704	return dc_response_doc_content;
705	}
706	String service_name = (String) previous_params.get(GSParams.SERVICE);
707	if (service_name == null \|\| !service_name.endsWith("Query"))
708	{ // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
709	logger.debug("invalid service, not doing highlighting");
710	return dc_response_doc_content;
711	}
712	String collection = (String) params.get(GSParams.COLLECTION);
713	String lang = request.getAttribute(GSXML.LANG_ATT);
714	String uid = request.getAttribute(GSXML.USER_ID_ATT);
715	String to = GSPath.appendLink(collection, service_name);
716
717	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
718	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
719	mr_query_message.appendChild(mr_query_request);
720
721	// paramList
722	HashMap service_params = (HashMap) params.get("s1");
723
724	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
725	GSXML.addParametersToList(this.doc, query_param_list, service_params);
726	mr_query_request.appendChild(query_param_list);
727
728	// do the query
729	Element mr_query_response = (Element) this.mr.process(mr_query_message);
730
731	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM + GSXML.LIST_MODIFIER);
732	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
733	if (query_term_list_element == null)
734	{
735	// no term info
736	logger.error("No query term information.\n");
737	return dc_response_doc_content;
738	}
739
740	String content = GSXML.getNodeText(dc_response_doc_content);
741
742	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM + GSXML.LIST_MODIFIER);
743	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
744
745	HashSet query_term_variants = new HashSet();
746	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
747	if (equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
748	{
749	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
750	if (terms_nodelist != null && terms_nodelist.getLength() > 0)
751	{
752	for (int i = 0; i < terms_nodelist.getLength(); i++)
753	{
754	String termValue = ((Element) terms_nodelist.item(i)).getAttribute("name");
755	String termValueU = null;
756	String termValueL = null;
757
758	if (termValue.length() > 1)
759	{
760	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
761	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
762	}
763	else
764	{
765	termValueU = termValue.substring(0, 1).toUpperCase();
766	termValueL = termValue.substring(0, 1).toLowerCase();
767	}
768
769	query_term_variants.add(termValueU);
770	query_term_variants.add(termValueL);
771	}
772	}
773	}
774	else
775	{
776	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++)
777	{
778	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
779	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
780	for (int j = 0; j < equivalent_terms.length; j++)
781	{
782	query_term_variants.add(equivalent_terms[j]);
783	}
784	}
785	}
786
787	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
788
789	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
790	String performed_query = GSXML.getNodeText(query_element) + " ";
791
792	ArrayList phrase_query_p_term_variants_list = new ArrayList();
793	int term_start = 0;
794	boolean in_term = false;
795	boolean in_phrase = false;
796	for (int i = 0; i < performed_query.length(); i++)
797	{
798	char character = performed_query.charAt(i);
799	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
800
801	// Has a query term just started?
802	if (in_term == false && is_character_letter_or_digit == true)
803	{
804	in_term = true;
805	term_start = i;
806	}
807
808	// Or has a term just finished?
809	else if (in_term == true && is_character_letter_or_digit == false)
810	{
811	in_term = false;
812	String term = performed_query.substring(term_start, i);
813
814	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
815	if (term_element != null)
816	{
817
818	HashSet phrase_query_p_term_x_variants = new HashSet();
819
820	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
821	if (term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
822	{
823	String termValueU = null;
824	String termValueL = null;
825
826	if (term.length() > 1)
827	{
828	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
829	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
830	}
831	else
832	{
833	termValueU = term.substring(0, 1).toUpperCase();
834	termValueL = term.substring(0, 1).toLowerCase();
835	}
836
837	phrase_query_p_term_x_variants.add(termValueU);
838	phrase_query_p_term_x_variants.add(termValueL);
839	}
840	else
841	{
842	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++)
843	{
844	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
845	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
846	for (int k = 0; k < term_equivalent_terms.length; k++)
847	{
848	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
849	}
850	}
851	}
852	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
853
854	if (in_phrase == false)
855	{
856	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
857	phrase_query_p_term_variants_list = new ArrayList();
858	}
859	}
860	}
861	// Watch for phrases (surrounded by quotes)
862	if (character == '\"')
863	{
864	// Has a phrase just started?
865	if (in_phrase == false)
866	{
867	in_phrase = true;
868	}
869	// Or has a phrase just finished?
870	else if (in_phrase == true)
871	{
872	in_phrase = false;
873	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
874	}
875
876	phrase_query_p_term_variants_list = new ArrayList();
877	}
878	}
879
880	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
881	}
882
883	/**
884	* Highlights query terms in a piece of text.
885	*/
886	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
887	{
888	// Convert the content string to an array of characters for speed
889	char[] content_characters = new char[content.length()];
890	content.getChars(0, content.length(), content_characters, 0);
891
892	// Now skim through the content, identifying word matches
893	ArrayList word_matches = new ArrayList();
894	int word_start = 0;
895	boolean in_word = false;
896	boolean preceding_word_matched = false;
897	boolean inTag = false;
898	for (int i = 0; i < content_characters.length; i++)
899	{
900	//We don't want to find words inside HTML tags
901	if(content_characters[i] == '<')
902	{
903	inTag = true;
904	continue;
905	}
906	else if (inTag && content_characters[i] == '>')
907	{
908	inTag = false;
909	}
910	else if (inTag)
911	{
912	continue;
913	}
914
915	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
916
917	// Has a word just started?
918	if (in_word == false && is_character_letter_or_digit == true)
919	{
920	in_word = true;
921	word_start = i;
922	}
923
924	// Or has a word just finished?
925	else if (in_word == true && is_character_letter_or_digit == false)
926	{
927	in_word = false;
928
929	// Check if the word matches any of the query term equivalents
930	String word = new String(content_characters, word_start, (i - word_start));
931	if (query_term_variants.contains(word))
932	{
933	// We have found a matching word, so remember its location
934	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
935	preceding_word_matched = true;
936	}
937	else
938	{
939	preceding_word_matched = false;
940	}
941	}
942	}
943
944	// Don't forget the last word...
945	if (in_word == true)
946	{
947	// Check if the word matches any of the query term equivalents
948	String word = new String(content_characters, word_start, (content_characters.length - word_start));
949	if (query_term_variants.contains(word))
950	{
951	// We have found a matching word, so remember its location
952	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
953	}
954	}
955
956	ArrayList highlight_start_positions = new ArrayList();
957	ArrayList highlight_end_positions = new ArrayList();
958
959	// Deal with phrases now
960	ArrayList partial_phrase_matches = new ArrayList();
961	for (int i = 0; i < word_matches.size(); i++)
962	{
963	WordMatch word_match = (WordMatch) word_matches.get(i);
964
965	// See if any partial phrase matches are extended by this word
966	if (word_match.preceding_word_matched)
967	{
968	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--)
969	{
970	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
971	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
972	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
973	if (phrase_query_p_term_x_variants.contains(word_match.word))
974	{
975	partial_phrase_match.num_words_matched++;
976
977	// Has a complete phrase match occurred?
978	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size())
979	{
980	// Check for overlaps by looking at the previous highlight range
981	if (!highlight_end_positions.isEmpty())
982	{
983	int last_highlight_index = highlight_end_positions.size() - 1;
984	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
985	if (last_highlight_end > partial_phrase_match.start_position)
986	{
987	// There is an overlap, so remove the previous phrase match
988	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
989	highlight_end_positions.remove(last_highlight_index);
990	partial_phrase_match.start_position = last_highlight_start;
991	}
992	}
993
994	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
995	highlight_end_positions.add(new Integer(word_match.end_position));
996	}
997	// No, but add the partial match back into the list for next time
998	else
999	{
1000	partial_phrase_matches.add(partial_phrase_match);
1001	}
1002	}
1003	}
1004	}
1005	else
1006	{
1007	partial_phrase_matches.clear();
1008	}
1009
1010	// See if this word is at the start of any of the phrases
1011	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++)
1012	{
1013	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
1014	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
1015	if (phrase_query_p_term_1_variants.contains(word_match.word))
1016	{
1017	// If this phrase is just one word long, we have a complete match
1018	if (phrase_query_p_term_variants_list.size() == 1)
1019	{
1020	highlight_start_positions.add(new Integer(word_match.start_position));
1021	highlight_end_positions.add(new Integer(word_match.end_position));
1022	}
1023	// Otherwise we have the start of a potential phrase match
1024	else
1025	{
1026	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
1027	}
1028	}
1029	}
1030	}
1031
1032	// Now add the annotation tags into the document at the correct points
1033	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
1034
1035	int last_wrote = 0;
1036	for (int i = 0; i < highlight_start_positions.size(); i++)
1037	{
1038	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
1039	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
1040
1041	// Print anything before the highlight range
1042	if (last_wrote < highlight_start)
1043	{
1044	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
1045	content_element.appendChild(this.doc.createTextNode(preceding_text));
1046	}
1047
1048	// Print the highlight text, annotated
1049	if (highlight_end > last_wrote)
1050	{
1051	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
1052	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
1053	annotation_element.setAttribute("type", "query_term");
1054	content_element.appendChild(annotation_element);
1055	last_wrote = highlight_end;
1056	}
1057	}
1058
1059	// Finish off any unwritten text
1060	if (last_wrote < content_characters.length)
1061	{
1062	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
1063	content_element.appendChild(this.doc.createTextNode(remaining_text));
1064	}
1065
1066	return content_element;
1067	}
1068
1069	static private class WordMatch
1070	{
1071	public String word;
1072	public int start_position;
1073	public int end_position;
1074	public boolean preceding_word_matched;
1075
1076	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
1077	{
1078	this.word = word;
1079	this.start_position = start_position;
1080	this.end_position = end_position;
1081	this.preceding_word_matched = preceding_word_matched;
1082	}
1083	}
1084
1085	static private class PartialPhraseMatch
1086	{
1087	public int start_position;
1088	public int query_phrase_number;
1089	public int num_words_matched;
1090
1091	public PartialPhraseMatch(int start_position, int query_phrase_number)
1092	{
1093	this.start_position = start_position;
1094	this.query_phrase_number = query_phrase_number;
1095	this.num_words_matched = 1;
1096	}
1097	}
1098	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: