Context Navigation

DocumentAction.java@ 23632

Last change on this file since 23632 was 23632, checked in by sjm84, 13 years ago
Adding the latest trunk changes as well as tidying up several files and removing more -m32 stuff
Property svn:keywords set to `Author Date Id Revision`
File size: 36.7 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Node process (Node message_node)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	Element message = this.converter.nodeToElement(message_node);
74
75	// the response
76	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78	result.appendChild(page_response);
79
80	// get the request - assume only one
81	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83	HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85	// just in case there are some that need to get passed to the services
86	HashMap service_params = (HashMap)params.get("s0");
87
88
89	String has_rl = null;
90	String has_href = null;
91	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
92	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
93	String collection = (String) params.get(GSParams.COLLECTION);
94	String lang = request.getAttribute(GSXML.LANG_ATT);
95	String uid = request.getAttribute(GSXML.USER_ID_ATT);
96	String document_name = (String) params.get(GSParams.DOCUMENT);
97	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals(""))) {
98	logger.error("no document specified!");
99	return result;
100	}
101	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
102	if (document_type == null) {
103	document_type = "simple";
104	}
105	//whether to retrieve siblings or not
106	boolean get_siblings = false;
107	String sibs = (String) params.get(SIBLING_ARG);
108	if (sibs != null && sibs.equals("1")) {
109	get_siblings = true;
110	}
111
112	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
113	if (sibling_num != null && !sibling_num.equals("")) {
114	// we have to modify the doc name
115	document_name = document_name+"."+sibling_num+".ss";
116	}
117
118	boolean expand_document = false;
119	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
120	if (ed_arg != null && ed_arg.equals("1")) {
121	expand_document = true;
122	}
123
124
125	boolean expand_contents = false;
126	if (expand_document) { // we always expand the contents with the text
127	expand_contents = true;
128	} else {
129	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
130	if (ec_arg != null && ec_arg.equals("1")) {
131	expand_contents = true;
132	}
133	}
134
135	//append site metadata
136	addSiteMetadata( page_response, lang, uid);
137
138	// get the additional data needed for the page
139	getBackgroundData(page_response, collection, lang, uid);
140	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
141
142	// the_document is where all the doc info - structure and metadata etc
143	// is added into, to be returned in the page
144	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
145	page_response.appendChild(the_document);
146
147	// set the doctype from the cgi arg as an attribute
148	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
149
150	// create a basic doc list containing the current node
151	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
152	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
153	basic_doc_list.appendChild(current_doc);
154	if (document_name.length()!=0){
155	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
156	}else if (has_href.length()!=0){
157	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
158	current_doc.setAttribute("externalURL", has_rl);
159	}
160
161	// Create a parameter list to specify the required structure information
162	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163
164	if (service_params != null) {
165	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
166	}
167
168	Element ds_param = null;
169	boolean get_structure = false;
170	boolean get_structure_info = false;
171	if (document_type.equals("paged")) {
172	get_structure_info = true;
173	// get teh info needed for paged naviagtion
174	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175	ds_param_list.appendChild(ds_param);
176	ds_param.setAttribute(GSXML.NAME_ATT, "info");
177	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
178	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
179	ds_param_list.appendChild(ds_param);
180	ds_param.setAttribute(GSXML.NAME_ATT, "info");
181	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
182	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
183	ds_param_list.appendChild(ds_param);
184	ds_param.setAttribute(GSXML.NAME_ATT, "info");
185	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
186
187	} else if (document_type.equals("hierarchy")){
188	get_structure = true;
189	if (expand_contents) {
190	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
191	ds_param_list.appendChild(ds_param);
192	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
193	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
194	} else {
195	// get the info needed for table of contents
196	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
197	ds_param_list.appendChild(ds_param);
198	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
199	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
200	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201	ds_param_list.appendChild(ds_param);
202	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
204	if (get_siblings) {
205	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206	ds_param_list.appendChild(ds_param);
207	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
208	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
209	}
210	}
211	} else {
212	// we dont need any structure
213	}
214
215	boolean has_dummy = false;
216	if (get_structure \|\| get_structure_info) {
217
218	// Build a request to obtain the document structure
219	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
220	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
221	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
222	ds_message.appendChild(ds_request);
223	ds_request.appendChild(ds_param_list);
224
225	// create a doc_node_list and put in the doc_node that we are interested in
226	ds_request.appendChild(basic_doc_list);
227
228	// Process the document structure retrieve message
229	Element ds_response_message = (Element) this.mr.process(ds_message);
230	if (processErrorElements(ds_response_message, page_response)) {
231	return result;
232	}
233
234	// get the info and print out
235	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
236	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
237	path = GSPath.appendLink(path, "nodeStructureInfo");
238	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
239	// get the doc_node bit
240	if (ds_response_struct_info != null) {
241	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
242	}
243	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
244	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
245	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
246	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
247
248	if (ds_response_structure != null) {
249	// add the contents of the structure bit into the_document
250	NodeList structs = ds_response_structure.getChildNodes();
251	for (int i=0; i<structs.getLength();i++) {
252	the_document.appendChild(this.doc.importNode(structs.item(i), true));
253	}
254	} else {
255	// no structure nodes, so put in a dummy doc node
256	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257	if (document_name.length()!=0){
258	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
259	}else if (has_href.length()!=0){
260	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
261	doc_node.setAttribute("externalURL", has_rl);
262	}
263	the_document.appendChild(doc_node);
264	has_dummy = true;
265	}
266	} else { // a simple type - we dont have a dummy node for simple
267	// should think about this more
268	// no structure request, so just put in a dummy doc node
269	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
270	if (document_name.length()!=0){
271	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
272	}else if (has_href.length()!=0){
273	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
274	doc_node.setAttribute("externalURL", has_rl);
275	}
276	the_document.appendChild(doc_node);
277	has_dummy = true;
278	}
279
280	// Build a request to obtain some document metadata
281	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
283	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
284	dm_message.appendChild(dm_request);
285	// Create a parameter list to specify the required metadata information
286
287	HashSet meta_names = new HashSet();
288	meta_names.add("Title"); // the default
289	if (format_elem != null) {
290	extractMetadataNames(format_elem, meta_names);
291	}
292
293	Element dm_param_list = createMetadataParamList(meta_names);
294	if (service_params != null) {
295	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
296	}
297
298	dm_request.appendChild(dm_param_list);
299
300
301	// create the doc node list for the metadata request
302	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
303	dm_request.appendChild(dm_doc_list);
304
305	// Add each node from the structure response into the metadata request
306	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
307	for (int i = 0; i < doc_nodes.getLength(); i++) {
308	Element doc_node = (Element) doc_nodes.item(i);
309	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
310
311	// Add the documentNode to the list
312	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313	dm_doc_list.appendChild(dm_doc_node);
314	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
315	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
316	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
317	}
318
319	// we also want a metadata request to the top level document to get
320	// assocfilepath - this could be cached too
321	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
322	dm_message.appendChild(doc_meta_request);
323	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
324	if (service_params != null) {
325	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
326	}
327
328	doc_meta_request.appendChild(doc_meta_param_list);
329	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
330	doc_meta_param_list.appendChild(doc_param);
331	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
332	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
333
334	// create the doc node list for the metadata request
335	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
336	doc_meta_request.appendChild(doc_list);
337
338	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
339	// the node we want is the root document node
340	if (document_name.length()!=0){
341	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
342	}else if (has_href.length()!=0){
343	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
344	doc_node.setAttribute("externalURL", has_rl);
345	}
346	doc_list.appendChild(doc_node);
347	Element dm_response_message = (Element) this.mr.process(dm_message);
348	if (processErrorElements(dm_response_message, page_response)) {
349	return result;
350	}
351
352	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
353	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
354
355	// Merge the metadata with the structure information
356	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
357	for (int i = 0; i < doc_nodes.getLength(); i++) {
358	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
359	}
360	// get the top level doc metadata out
361	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
362	Element top_doc_node = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
363	GSXML.mergeMetadataLists(the_document, top_doc_node);
364
365	// Build a request to obtain some document content
366	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
367	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
368	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
369	dc_message.appendChild(dc_request);
370
371
372	// Create a parameter list to specify the request parameters - empty for now
373	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
374	if (service_params != null) {
375	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
376	}
377
378	dc_request.appendChild(dc_param_list);
379
380	// get the content
381	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
382	if (expand_document) {
383	dc_request.appendChild(dm_doc_list);
384	} else {
385	dc_request.appendChild(basic_doc_list);
386	}
387	logger.debug("request = "+converter.getString(dc_message));
388	Element dc_response_message = (Element) this.mr.process(dc_message);
389	if (processErrorElements(dc_response_message, page_response)) {
390	return result;
391	}
392
393	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
394
395	if (expand_document) {
396	// Merge the content with the structure information
397	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
398	for (int i = 0; i < doc_nodes.getLength(); i++) {
399	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
400	if (content != null) {
401	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
402	}
403	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
404	}
405	} else {
406	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
407	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
408	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
409	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
410
411	if (dc_response_doc_content == null) {
412	// no content to add
413	if (dc_response_doc_external !=null){
414	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
415
416	the_document.setAttribute("selectedNode", modified_doc_id);
417	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
418	}
419	return result;
420	}
421	if (highlight_query_terms) {
422	dc_response_doc.removeChild(dc_response_doc_content);
423
424	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
425	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
426	}
427
428
429	if (provide_annotations) {
430	String service_selected = (String)params.get(ENRICH_DOC_ARG);
431	if (service_selected != null && service_selected.equals("1")) {
432	// now we can modifiy the response doc if needed
433	String enrich_service = (String)params.get(GSParams.SERVICE);
434	// send a message to the service
435	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
436	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
437	enrich_message.appendChild(enrich_request);
438	// check for parameters
439	HashMap e_service_params = (HashMap)params.get("s1");
440	if (e_service_params != null) {
441	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
442	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
443	enrich_request.appendChild(enrich_pl);
444	}
445	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
446	enrich_request.appendChild(e_doc_list);
447	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
448
449	Node enrich_response = this.mr.process(enrich_message);
450
451	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
452	path = GSPath.createPath(links);
453	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
454
455	}
456	} // if provide_annotations
457
458
459	// use the returned id rather than the sent one cos there may have
460	// been modifiers such as .pr that are removed.
461	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
462	the_document.setAttribute("selectedNode", modified_doc_id);
463	if (has_dummy) {
464	// change the id if necessary and add the content
465	Element dummy_node = (Element)doc_nodes.item(0);
466
467	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
468	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
469	// hack for simple type
470	if (document_type.equals("simple")) {
471	// we dont want the internal docNode, just want the content and metadata in the document
472	// rethink this!!
473	the_document.removeChild(dummy_node);
474
475	NodeList dummy_children = dummy_node.getChildNodes();
476	//for (int i=0; i<dummy_children.getLength(); i++) {
477	for (int i=dummy_children.getLength()-1; i>=0; i--) {
478	// special case as we don't want more than one metadata list
479	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER)) {
480	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
481	} else {
482	the_document.appendChild(dummy_children.item(i));
483	}
484	}
485	}
486	} else {
487	// Merge the document content with the metadata and structure information
488	for (int i = 0; i < doc_nodes.getLength(); i++) {
489	Node dn = doc_nodes.item(i);
490	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
491	if (dn_id.equals(modified_doc_id)) {
492	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
493	break;
494	}
495	}
496	}
497	}
498	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
499	return result;
500	}
501
502	/** tell the param class what its arguments are
503	* if an action has its own arguments, this should add them to the params
504	* object - particularly important for args that should not be saved */
505	public boolean getActionParameters(GSParams params) {
506	params.addParameter(GOTO_PAGE_ARG, false);
507	params.addParameter(ENRICH_DOC_ARG, false);
508	return true;
509	}
510
511
512	/** this method gets the collection description, the format info, the
513	* list of enrich services, etc - stuff that is needed for the page,
514	* but is the same whatever the query is - should be cached */
515	protected boolean getBackgroundData(Element page_response,
516	String collection, String lang,
517	String uid) {
518
519	// create a message to process - contains requests for the collection
520	// description, the format element, the enrich services on offer
521	// these could all be cached
522	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
523	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
524	// the format request - ignore for now, where does this request go to??
525	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
526	info_message.appendChild(format_request);
527
528	// the enrich_services request - only do this if provide_annotations is true
529
530	if (provide_annotations) {
531	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
532	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
533	info_message.appendChild(enrich_services_request);
534	}
535
536	Element info_response = (Element)this.mr.process(info_message);
537
538	// the collection is the first response
539	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
540	Element format_resp = (Element) responses.item(0);
541
542	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
543	if (format_elem != null) {
544	logger.debug("doc action found a format statement");
545	// set teh format type
546	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
547	page_response.appendChild(this.doc.importNode(format_elem, true));
548	}
549
550	if (provide_annotations) {
551	Element services_resp = (Element)responses.item(1);
552
553	// a new message for the mr
554	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
555
556	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
557	boolean service_found = false;
558	for (int j=0; j<e_services.getLength(); j++) {
559	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
560	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
561	enrich_message.appendChild(s);
562	service_found = true;
563	}
564	}
565	if (service_found) {
566	Element enrich_response = (Element)this.mr.process(enrich_message);
567
568	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
569	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
570	for (int i=0; i<e_responses.getLength(); i++) {
571	Element e_resp = (Element)e_responses.item(i);
572	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
573	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
574	service_list.appendChild(e_service);
575	}
576	page_response.appendChild(service_list);
577	}
578	} // if provide_annotations
579	return true;
580
581	}
582
583	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
584	*/
585	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
586
587	// do the query again to get term info
588	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
589	HashMap params = GSXML.extractParams(cgi_param_list, false);
590
591	HashMap previous_params = (HashMap)params.get("p");
592	if (previous_params == null) {
593	return dc_response_doc_content;
594	}
595	String service_name = (String)previous_params.get(GSParams.SERVICE);
596	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
597	logger.debug("invalid service, not doing highlighting");
598	return dc_response_doc_content;
599	}
600	String collection = (String)params.get(GSParams.COLLECTION);
601	String lang = request.getAttribute(GSXML.LANG_ATT);
602	String uid = request.getAttribute(GSXML.USER_ID_ATT);
603	String to = GSPath.appendLink(collection, service_name);
604
605	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
606	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
607	mr_query_message.appendChild(mr_query_request);
608
609	// paramList
610	HashMap service_params = (HashMap)params.get("s1");
611
612	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
613	GSXML.addParametersToList(this.doc, query_param_list, service_params);
614	mr_query_request.appendChild(query_param_list);
615
616	// do the query
617	Element mr_query_response = (Element)this.mr.process(mr_query_message);
618
619	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
620	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
621	if (query_term_list_element == null) {
622	// no term info
623	logger.error("No query term information.\n");
624	return dc_response_doc_content;
625	}
626
627	String content = GSXML.getNodeText(dc_response_doc_content);
628
629	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
630	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
631
632	HashSet query_term_variants = new HashSet();
633	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
634	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
635	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
636	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
637	for (int j = 0; j < equivalent_terms.length; j++) {
638	query_term_variants.add(equivalent_terms[j]);
639	}
640	}
641
642	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
643
644	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
645	String performed_query = GSXML.getNodeText(query_element) + " ";
646
647	ArrayList phrase_query_p_term_variants_list = new ArrayList();
648	int term_start = 0;
649	boolean in_term = false;
650	boolean in_phrase = false;
651	for (int i = 0; i < performed_query.length(); i++) {
652	char character = performed_query.charAt(i);
653	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
654
655	// Has a query term just started?
656	if (in_term == false && is_character_letter_or_digit == true) {
657	in_term = true;
658	term_start = i;
659	}
660
661	// Or has a term just finished?
662	else if (in_term == true && is_character_letter_or_digit == false) {
663	in_term = false;
664	String term = performed_query.substring(term_start, i);
665
666	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
667	if (term_element != null) {
668
669	HashSet phrase_query_p_term_x_variants = new HashSet();
670
671	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
672	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
673	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
674	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
675	for (int k = 0; k < term_equivalent_terms.length; k++) {
676	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
677	}
678	}
679	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
680
681	if (in_phrase == false) {
682	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
683	phrase_query_p_term_variants_list = new ArrayList();
684	}
685	}
686	}
687	// Watch for phrases (surrounded by quotes)
688	if (character == '\"') {
689	// Has a phrase just started?
690	if (in_phrase == false) {
691	in_phrase = true;
692	}
693	// Or has a phrase just finished?
694	else if (in_phrase == true) {
695	in_phrase = false;
696	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
697	}
698
699	phrase_query_p_term_variants_list = new ArrayList();
700	}
701	}
702
703	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
704	}
705
706
707	/**
708	* Highlights query terms in a piece of text.
709	*/
710	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
711	{
712	// Convert the content string to an array of characters for speed
713	char[] content_characters = new char[content.length()];
714	content.getChars(0, content.length(), content_characters, 0);
715
716	// Now skim through the content, identifying word matches
717	ArrayList word_matches = new ArrayList();
718	int word_start = 0;
719	boolean in_word = false;
720	boolean preceding_word_matched = false;
721	for (int i = 0; i < content_characters.length; i++) {
722	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
723
724	// Has a word just started?
725	if (in_word == false && is_character_letter_or_digit == true) {
726	in_word = true;
727	word_start = i;
728	}
729
730	// Or has a word just finished?
731	else if (in_word == true && is_character_letter_or_digit == false) {
732	in_word = false;
733
734	// Check if the word matches any of the query term equivalents
735	String word = new String(content_characters, word_start, (i - word_start));
736	if (query_term_variants.contains(word)) {
737	// We have found a matching word, so remember its location
738	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
739	preceding_word_matched = true;
740	}
741	else {
742	preceding_word_matched = false;
743	}
744	}
745	}
746
747	// Don't forget the last word...
748	if (in_word == true) {
749	// Check if the word matches any of the query term equivalents
750	String word = new String(content_characters, word_start, (content_characters.length - word_start));
751	if (query_term_variants.contains(word)) {
752	// We have found a matching word, so remember its location
753	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
754	}
755	}
756
757	ArrayList highlight_start_positions = new ArrayList();
758	ArrayList highlight_end_positions = new ArrayList();
759
760	// Deal with phrases now
761	ArrayList partial_phrase_matches = new ArrayList();
762	for (int i = 0; i < word_matches.size(); i++) {
763	WordMatch word_match = (WordMatch) word_matches.get(i);
764
765	// See if any partial phrase matches are extended by this word
766	if (word_match.preceding_word_matched) {
767	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
768	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
769	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
770	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
771	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
772	partial_phrase_match.num_words_matched++;
773
774	// Has a complete phrase match occurred?
775	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
776	// Check for overlaps by looking at the previous highlight range
777	if (!highlight_end_positions.isEmpty()) {
778	int last_highlight_index = highlight_end_positions.size() - 1;
779	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
780	if (last_highlight_end > partial_phrase_match.start_position) {
781	// There is an overlap, so remove the previous phrase match
782	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
783	highlight_end_positions.remove(last_highlight_index);
784	partial_phrase_match.start_position = last_highlight_start;
785	}
786	}
787
788	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
789	highlight_end_positions.add(new Integer(word_match.end_position));
790	}
791	// No, but add the partial match back into the list for next time
792	else {
793	partial_phrase_matches.add(partial_phrase_match);
794	}
795	}
796	}
797	}
798	else {
799	partial_phrase_matches.clear();
800	}
801
802	// See if this word is at the start of any of the phrases
803	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
804	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
805	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
806	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
807	// If this phrase is just one word long, we have a complete match
808	if (phrase_query_p_term_variants_list.size() == 1) {
809	highlight_start_positions.add(new Integer(word_match.start_position));
810	highlight_end_positions.add(new Integer(word_match.end_position));
811	}
812	// Otherwise we have the start of a potential phrase match
813	else {
814	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
815	}
816	}
817	}
818	}
819
820	// Now add the annotation tags into the document at the correct points
821	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
822
823	int last_wrote = 0;
824	for (int i = 0; i < highlight_start_positions.size(); i++) {
825	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
826	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
827
828	// Print anything before the highlight range
829	if (last_wrote < highlight_start) {
830	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
831	content_element.appendChild(this.doc.createTextNode(preceding_text));
832	}
833
834	// Print the highlight text, annotated
835	if (highlight_end > last_wrote) {
836	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
837	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
838	annotation_element.setAttribute("type", "query_term");
839	content_element.appendChild(annotation_element);
840	last_wrote = highlight_end;
841	}
842	}
843
844	// Finish off any unwritten text
845	if (last_wrote < content_characters.length) {
846	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
847	content_element.appendChild(this.doc.createTextNode(remaining_text));
848	}
849
850	return content_element;
851	}
852
853
854	static private class WordMatch
855	{
856	public String word;
857	public int start_position;
858	public int end_position;
859	public boolean preceding_word_matched;
860
861	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
862	{
863	this.word = word;
864	this.start_position = start_position;
865	this.end_position = end_position;
866	this.preceding_word_matched = preceding_word_matched;
867	}
868	}
869
870
871	static private class PartialPhraseMatch
872	{
873	public int start_position;
874	public int query_phrase_number;
875	public int num_words_matched;
876
877	public PartialPhraseMatch(int start_position, int query_phrase_number)
878	{
879	this.start_position = start_position;
880	this.query_phrase_number = query_phrase_number;
881	this.num_words_matched = 1;
882	}
883	}
884	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 23632

Download in other formats: