Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 16688

Last change on this file since 16688 was 16688, checked in by davidb, 16 years ago
Changed 'Element process(Element)' in ModuleInterface to 'Node process(Node)'. After some deliberation is was decided this is a more useful (generic) layer of the DOM to pass information around in. Helps with the DocType problem when producing XSL Transformed pages, for example. When this was an Element, it would loose track of its DocType. Supporting method provided in XMLConverter 'Element nodeToElement(Node)' which checks a nodes docType and casts to Element if appropriate, or if a Document, typecasts to that and then extracts the top-level Element. With this fundamental change in ModuleInterface, around 20 files needed to be updated (Actions, Services, etc) that build on top of 'process()' to reflect this change, and use nodeToElement where necessary.
Property svn:keywords set to `Author Date Id Revision`
File size: 36.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Node process (Node message_node)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	Element message = this.converter.nodeToElement(message_node);
74
75	// the response
76	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78	result.appendChild(page_response);
79
80	// get the request - assume only one
81	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83	HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85	// just in case there are some that need to get passed to the services
86	HashMap service_params = (HashMap)params.get("s0");
87
88	String has_rl = null;
89	String has_href = null;
90	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
91	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
92	String collection = (String) params.get(GSParams.COLLECTION);
93	String lang = request.getAttribute(GSXML.LANG_ATT);
94	String uid = request.getAttribute(GSXML.USER_ID_ATT);
95	String document_name = (String) params.get(GSParams.DOCUMENT);
96	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals(""))) {
97	logger.error("no document specified!");
98	return result;
99	}
100	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
101	if (document_type == null) {
102	document_type = "simple";
103	}
104	//whether to retrieve siblings or not
105	boolean get_siblings = false;
106	String sibs = (String) params.get(SIBLING_ARG);
107	if (sibs != null && sibs.equals("1")) {
108	get_siblings = true;
109	}
110
111	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
112	if (sibling_num != null && !sibling_num.equals("")) {
113	// we have to modify the doc name
114	document_name = document_name+"."+sibling_num+".ss";
115	}
116
117	boolean expand_document = false;
118	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
119	if (ed_arg != null && ed_arg.equals("1")) {
120	expand_document = true;
121	}
122
123
124	boolean expand_contents = false;
125	if (expand_document) { // we always expand the contents with the text
126	expand_contents = true;
127	} else {
128	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
129	if (ec_arg != null && ec_arg.equals("1")) {
130	expand_contents = true;
131	}
132	}
133	// get the additional data needed for the page
134	getBackgroundData(page_response, collection, lang, uid);
135	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
136
137	// the_document is where all the doc info - structure and metadata etc
138	// is added into, to be returned in the page
139	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
140	page_response.appendChild(the_document);
141
142	// set the doctype from the cgi arg as an attribute
143	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
144
145	// create a basic doc list containing the current node
146	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
147	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
148	basic_doc_list.appendChild(current_doc);
149	if (document_name.length()!=0){
150	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
151	}else if (has_href.length()!=0){
152	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
153	current_doc.setAttribute("externalURL", has_rl);
154	}
155
156	// Create a parameter list to specify the required structure information
157	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
158
159	if (service_params != null) {
160	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
161	}
162
163	Element ds_param = null;
164	boolean get_structure = false;
165	boolean get_structure_info = false;
166	if (document_type.equals("paged")) {
167	get_structure_info = true;
168	// get teh info needed for paged naviagtion
169	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
170	ds_param_list.appendChild(ds_param);
171	ds_param.setAttribute(GSXML.NAME_ATT, "info");
172	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
173	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
174	ds_param_list.appendChild(ds_param);
175	ds_param.setAttribute(GSXML.NAME_ATT, "info");
176	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
177	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178	ds_param_list.appendChild(ds_param);
179	ds_param.setAttribute(GSXML.NAME_ATT, "info");
180	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
181
182	} else if (document_type.equals("hierarchy")){
183	get_structure = true;
184	if (expand_contents) {
185	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
186	ds_param_list.appendChild(ds_param);
187	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
188	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
189	} else {
190	// get the info needed for table of contents
191	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
192	ds_param_list.appendChild(ds_param);
193	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
194	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
195	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
196	ds_param_list.appendChild(ds_param);
197	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
198	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
199	if (get_siblings) {
200	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201	ds_param_list.appendChild(ds_param);
202	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
204	}
205	}
206	} else {
207	// we dont need any structure
208	}
209
210	boolean has_dummy = false;
211	if (get_structure \|\| get_structure_info) {
212
213	// Build a request to obtain the document structure
214	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
215	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
216	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
217	ds_message.appendChild(ds_request);
218	ds_request.appendChild(ds_param_list);
219
220	// create a doc_node_list and put in the doc_node that we are interested in
221	ds_request.appendChild(basic_doc_list);
222
223	// Process the document structure retrieve message
224	Element ds_response_message = (Element) this.mr.process(ds_message);
225	if (processErrorElements(ds_response_message, page_response)) {
226	return result;
227	}
228
229	// get the info and print out
230	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
231	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
232	path = GSPath.appendLink(path, "nodeStructureInfo");
233	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
234	// get the doc_node bit
235	if (ds_response_struct_info != null) {
236	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
237	}
238	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
239	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
240	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
241	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
242
243	if (ds_response_structure != null) {
244	// add the contents of the structure bit into the_document
245	NodeList structs = ds_response_structure.getChildNodes();
246	for (int i=0; i<structs.getLength();i++) {
247	the_document.appendChild(this.doc.importNode(structs.item(i), true));
248	}
249	} else {
250	// no structure nodes, so put in a dummy doc node
251	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
252	if (document_name.length()!=0){
253	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
254	}else if (has_href.length()!=0){
255	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
256	doc_node.setAttribute("externalURL", has_rl);
257	}
258	the_document.appendChild(doc_node);
259	has_dummy = true;
260	}
261	} else { // a simple type - we dont have a dummy node for simple
262	// should think about this more
263	// no structure request, so just put in a dummy doc node
264	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
265	if (document_name.length()!=0){
266	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
267	}else if (has_href.length()!=0){
268	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
269	doc_node.setAttribute("externalURL", has_rl);
270	}
271	the_document.appendChild(doc_node);
272	has_dummy = true;
273	}
274
275	// Build a request to obtain some document metadata
276	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
277	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
278	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
279	dm_message.appendChild(dm_request);
280	// Create a parameter list to specify the required metadata information
281
282	HashSet meta_names = new HashSet();
283	meta_names.add("Title"); // the default
284	if (format_elem != null) {
285	extractMetadataNames(format_elem, meta_names);
286	}
287
288	Element dm_param_list = createMetadataParamList(meta_names);
289	if (service_params != null) {
290	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
291	}
292
293	dm_request.appendChild(dm_param_list);
294
295
296	// create the doc node list for the metadata request
297	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
298	dm_request.appendChild(dm_doc_list);
299
300	// Add each node from the structure response into the metadata request
301	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
302	for (int i = 0; i < doc_nodes.getLength(); i++) {
303	Element doc_node = (Element) doc_nodes.item(i);
304	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
305
306	// Add the documentNode to the list
307	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
308	dm_doc_list.appendChild(dm_doc_node);
309	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
310	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
311	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
312	}
313
314	// we also want a metadata request to the top level document to get
315	// assocfilepath - this could be cached too
316	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
317	dm_message.appendChild(doc_meta_request);
318	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
319	if (service_params != null) {
320	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
321	}
322
323	doc_meta_request.appendChild(doc_meta_param_list);
324	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
325	doc_meta_param_list.appendChild(doc_param);
326	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
327	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
328
329	// create the doc node list for the metadata request
330	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
331	doc_meta_request.appendChild(doc_list);
332
333	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
334	// the node we want is the root document node
335	if (document_name.length()!=0){
336	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
337	}else if (has_href.length()!=0){
338	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
339	doc_node.setAttribute("externalURL", has_rl);
340	}
341	doc_list.appendChild(doc_node);
342	Element dm_response_message = (Element) this.mr.process(dm_message);
343	if (processErrorElements(dm_response_message, page_response)) {
344	return result;
345	}
346
347	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
348	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
349
350	// Merge the metadata with the structure information
351	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
352	for (int i = 0; i < doc_nodes.getLength(); i++) {
353	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
354	}
355	// get teh top level doc metadata out
356	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
357	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
358	if (doc_meta_list != null) {
359	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
360	}
361	// Build a request to obtain some document content
362	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
363	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
364	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
365	dc_message.appendChild(dc_request);
366
367
368	// Create a parameter list to specify the request parameters - empty for now
369	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
370	if (service_params != null) {
371	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
372	}
373
374	dc_request.appendChild(dc_param_list);
375
376	// get the content
377	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
378	if (expand_document) {
379	dc_request.appendChild(dm_doc_list);
380	} else {
381	dc_request.appendChild(basic_doc_list);
382	}
383	logger.debug("request = "+converter.getString(dc_message));
384	Element dc_response_message = (Element) this.mr.process(dc_message);
385	if (processErrorElements(dc_response_message, page_response)) {
386	return result;
387	}
388
389	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
390
391	if (expand_document) {
392	// Merge the content with the structure information
393	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
394	for (int i = 0; i < doc_nodes.getLength(); i++) {
395	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
396	if (content != null) {
397	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
398	}
399	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
400	}
401	} else {
402	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
403	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
404	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
405	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
406
407	if (dc_response_doc_content == null) {
408	// no content to add
409	if (dc_response_doc_external !=null){
410	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
411
412	the_document.setAttribute("selectedNode", modified_doc_id);
413	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
414	}
415	return result;
416	}
417	if (highlight_query_terms) {
418	dc_response_doc.removeChild(dc_response_doc_content);
419
420	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
421	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
422	}
423
424
425	if (provide_annotations) {
426	String service_selected = (String)params.get(ENRICH_DOC_ARG);
427	if (service_selected != null && service_selected.equals("1")) {
428	// now we can modifiy the response doc if needed
429	String enrich_service = (String)params.get(GSParams.SERVICE);
430	// send a message to the service
431	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
432	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
433	enrich_message.appendChild(enrich_request);
434	// check for parameters
435	HashMap e_service_params = (HashMap)params.get("s1");
436	if (e_service_params != null) {
437	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
438	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
439	enrich_request.appendChild(enrich_pl);
440	}
441	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
442	enrich_request.appendChild(e_doc_list);
443	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
444
445	Node enrich_response = this.mr.process(enrich_message);
446
447	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
448	path = GSPath.createPath(links);
449	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
450
451	}
452	} // if provide_annotations
453
454
455	// use the returned id rather than the sent one cos there may have
456	// been modifiers such as .pr that are removed.
457	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
458	the_document.setAttribute("selectedNode", modified_doc_id);
459	if (has_dummy) {
460	// change the id if necessary and add the content
461	Element dummy_node = (Element)doc_nodes.item(0);
462
463	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
464	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
465	// hack for simple type
466	if (document_type.equals("simple")) {
467	// we dont want the internal docNode, just want the content and metadata in the document
468	// rethink this!!
469	the_document.removeChild(dummy_node);
470
471	NodeList dummy_children = dummy_node.getChildNodes();
472	//for (int i=0; i<dummy_children.getLength(); i++) {
473	for (int i=dummy_children.getLength()-1; i>=0; i--) {
474	the_document.appendChild(dummy_children.item(i));
475
476	}
477	}
478	} else {
479	// Merge the document content with the metadata and structure information
480	for (int i = 0; i < doc_nodes.getLength(); i++) {
481	Node dn = doc_nodes.item(i);
482	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
483	if (dn_id.equals(modified_doc_id)) {
484	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
485	break;
486	}
487	}
488	}
489	}
490	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
491	return result;
492	}
493
494	/** tell the param class what its arguments are
495	* if an action has its own arguments, this should add them to the params
496	* object - particularly important for args that should not be saved */
497	public boolean getActionParameters(GSParams params) {
498	params.addParameter(GOTO_PAGE_ARG, false);
499	params.addParameter(ENRICH_DOC_ARG, false);
500	return true;
501	}
502
503
504	/** this method gets the collection description, the format info, the
505	* list of enrich services, etc - stuff that is needed for the page,
506	* but is the same whatever the query is - should be cached */
507	protected boolean getBackgroundData(Element page_response,
508	String collection, String lang,
509	String uid) {
510
511	// create a message to process - contains requests for the collection
512	// description, the format element, the enrich services on offer
513	// these could all be cached
514	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
515	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
516	// the format request - ignore for now, where does this request go to??
517	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
518	info_message.appendChild(format_request);
519
520	// the enrich_services request - only do this if provide_annotations is true
521
522	if (provide_annotations) {
523	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
524	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
525	info_message.appendChild(enrich_services_request);
526	}
527
528	Element info_response = (Element)this.mr.process(info_message);
529
530	// the collection is the first response
531	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
532	Element format_resp = (Element) responses.item(0);
533
534	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
535	if (format_elem != null) {
536	logger.debug("doc action found a format statement");
537	// set teh format type
538	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
539	page_response.appendChild(this.doc.importNode(format_elem, true));
540	}
541
542	if (provide_annotations) {
543	Element services_resp = (Element)responses.item(1);
544
545	// a new message for the mr
546	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
547
548	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
549	boolean service_found = false;
550	for (int j=0; j<e_services.getLength(); j++) {
551	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
552	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
553	enrich_message.appendChild(s);
554	service_found = true;
555	}
556	}
557	if (service_found) {
558	Element enrich_response = (Element)this.mr.process(enrich_message);
559
560	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
561	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
562	for (int i=0; i<e_responses.getLength(); i++) {
563	Element e_resp = (Element)e_responses.item(i);
564	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
565	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
566	service_list.appendChild(e_service);
567	}
568	page_response.appendChild(service_list);
569	}
570	} // if provide_annotations
571	return true;
572
573	}
574
575	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
576	*/
577	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
578
579	// do the query again to get term info
580	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
581	HashMap params = GSXML.extractParams(cgi_param_list, false);
582
583	HashMap previous_params = (HashMap)params.get("p");
584	if (previous_params == null) {
585	return dc_response_doc_content;
586	}
587	String service_name = (String)previous_params.get(GSParams.SERVICE);
588	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
589	logger.error("invalid service, not doing highlighting");
590	return dc_response_doc_content;
591	}
592	String collection = (String)params.get(GSParams.COLLECTION);
593	String lang = request.getAttribute(GSXML.LANG_ATT);
594	String uid = request.getAttribute(GSXML.USER_ID_ATT);
595	String to = GSPath.appendLink(collection, service_name);
596
597	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
598	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
599	mr_query_message.appendChild(mr_query_request);
600
601	// paramList
602	HashMap service_params = (HashMap)params.get("s1");
603
604	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
605	GSXML.addParametersToList(this.doc, query_param_list, service_params);
606	mr_query_request.appendChild(query_param_list);
607
608	// do the query
609	Element mr_query_response = (Element)this.mr.process(mr_query_message);
610
611	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
612	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
613	if (query_term_list_element == null) {
614	// no term info
615	logger.error("No query term information.\n");
616	return dc_response_doc_content;
617	}
618
619	String content = GSXML.getNodeText(dc_response_doc_content);
620
621	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
622	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
623
624	HashSet query_term_variants = new HashSet();
625	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
626	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
627	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
628	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
629	for (int j = 0; j < equivalent_terms.length; j++) {
630	query_term_variants.add(equivalent_terms[j]);
631	}
632	}
633
634	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
635
636	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
637	String performed_query = GSXML.getNodeText(query_element) + " ";
638
639	ArrayList phrase_query_p_term_variants_list = new ArrayList();
640	int term_start = 0;
641	boolean in_term = false;
642	boolean in_phrase = false;
643	for (int i = 0; i < performed_query.length(); i++) {
644	char character = performed_query.charAt(i);
645	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
646
647	// Has a query term just started?
648	if (in_term == false && is_character_letter_or_digit == true) {
649	in_term = true;
650	term_start = i;
651	}
652
653	// Or has a term just finished?
654	else if (in_term == true && is_character_letter_or_digit == false) {
655	in_term = false;
656	String term = performed_query.substring(term_start, i);
657
658	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
659	if (term_element != null) {
660
661	HashSet phrase_query_p_term_x_variants = new HashSet();
662
663	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
664	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
665	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
666	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
667	for (int k = 0; k < term_equivalent_terms.length; k++) {
668	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
669	}
670	}
671	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
672
673	if (in_phrase == false) {
674	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
675	phrase_query_p_term_variants_list = new ArrayList();
676	}
677	}
678	}
679	// Watch for phrases (surrounded by quotes)
680	if (character == '\"') {
681	// Has a phrase just started?
682	if (in_phrase == false) {
683	in_phrase = true;
684	}
685	// Or has a phrase just finished?
686	else if (in_phrase == true) {
687	in_phrase = false;
688	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
689	}
690
691	phrase_query_p_term_variants_list = new ArrayList();
692	}
693	}
694
695	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
696	}
697
698
699	/**
700	* Highlights query terms in a piece of text.
701	*/
702	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
703	{
704	// Convert the content string to an array of characters for speed
705	char[] content_characters = new char[content.length()];
706	content.getChars(0, content.length(), content_characters, 0);
707
708	// Now skim through the content, identifying word matches
709	ArrayList word_matches = new ArrayList();
710	int word_start = 0;
711	boolean in_word = false;
712	boolean preceding_word_matched = false;
713	for (int i = 0; i < content_characters.length; i++) {
714	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
715
716	// Has a word just started?
717	if (in_word == false && is_character_letter_or_digit == true) {
718	in_word = true;
719	word_start = i;
720	}
721
722	// Or has a word just finished?
723	else if (in_word == true && is_character_letter_or_digit == false) {
724	in_word = false;
725
726	// Check if the word matches any of the query term equivalents
727	String word = new String(content_characters, word_start, (i - word_start));
728	if (query_term_variants.contains(word)) {
729	// We have found a matching word, so remember its location
730	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
731	preceding_word_matched = true;
732	}
733	else {
734	preceding_word_matched = false;
735	}
736	}
737	}
738
739	// Don't forget the last word...
740	if (in_word == true) {
741	// Check if the word matches any of the query term equivalents
742	String word = new String(content_characters, word_start, (content_characters.length - word_start));
743	if (query_term_variants.contains(word)) {
744	// We have found a matching word, so remember its location
745	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
746	}
747	}
748
749	ArrayList highlight_start_positions = new ArrayList();
750	ArrayList highlight_end_positions = new ArrayList();
751
752	// Deal with phrases now
753	ArrayList partial_phrase_matches = new ArrayList();
754	for (int i = 0; i < word_matches.size(); i++) {
755	WordMatch word_match = (WordMatch) word_matches.get(i);
756
757	// See if any partial phrase matches are extended by this word
758	if (word_match.preceding_word_matched) {
759	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
760	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
761	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
762	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
763	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
764	partial_phrase_match.num_words_matched++;
765
766	// Has a complete phrase match occurred?
767	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
768	// Check for overlaps by looking at the previous highlight range
769	if (!highlight_end_positions.isEmpty()) {
770	int last_highlight_index = highlight_end_positions.size() - 1;
771	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
772	if (last_highlight_end > partial_phrase_match.start_position) {
773	// There is an overlap, so remove the previous phrase match
774	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
775	highlight_end_positions.remove(last_highlight_index);
776	partial_phrase_match.start_position = last_highlight_start;
777	}
778	}
779
780	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
781	highlight_end_positions.add(new Integer(word_match.end_position));
782	}
783	// No, but add the partial match back into the list for next time
784	else {
785	partial_phrase_matches.add(partial_phrase_match);
786	}
787	}
788	}
789	}
790	else {
791	partial_phrase_matches.clear();
792	}
793
794	// See if this word is at the start of any of the phrases
795	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
796	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
797	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
798	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
799	// If this phrase is just one word long, we have a complete match
800	if (phrase_query_p_term_variants_list.size() == 1) {
801	highlight_start_positions.add(new Integer(word_match.start_position));
802	highlight_end_positions.add(new Integer(word_match.end_position));
803	}
804	// Otherwise we have the start of a potential phrase match
805	else {
806	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
807	}
808	}
809	}
810	}
811
812	// Now add the annotation tags into the document at the correct points
813	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
814
815	int last_wrote = 0;
816	for (int i = 0; i < highlight_start_positions.size(); i++) {
817	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
818	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
819
820	// Print anything before the highlight range
821	if (last_wrote < highlight_start) {
822	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
823	// System.err.print(preceding_text);
824	content_element.appendChild(this.doc.createTextNode(preceding_text));
825	}
826
827	// Print the highlight text, annotated
828	if (highlight_end > last_wrote) {
829	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
830	// System.err.print("\|" + highlight_text + "\|");
831	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
832	annotation_element.setAttribute("type", "query_term");
833	content_element.appendChild(annotation_element);
834	last_wrote = highlight_end;
835	}
836	}
837
838	// Finish off any unwritten text
839	if (last_wrote < content_characters.length) {
840	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
841	// System.err.print(remaining_text);
842	content_element.appendChild(this.doc.createTextNode(remaining_text));
843	}
844
845	return content_element;
846	}
847
848
849	static private class WordMatch
850	{
851	public String word;
852	public int start_position;
853	public int end_position;
854	public boolean preceding_word_matched;
855
856	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
857	{
858	this.word = word;
859	this.start_position = start_position;
860	this.end_position = end_position;
861	this.preceding_word_matched = preceding_word_matched;
862	}
863	}
864
865
866	static private class PartialPhraseMatch
867	{
868	public int start_position;
869	public int query_phrase_number;
870	public int num_words_matched;
871
872	public PartialPhraseMatch(int start_position, int query_phrase_number)
873	{
874	this.start_position = start_position;
875	this.query_phrase_number = query_phrase_number;
876	this.num_words_matched = 1;
877	}
878	}
879	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: