Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 24219

Last change on this file since 24219 was 24116, checked in by sjm84, 13 years ago
Fixed search term highlighting in Lucene
Property svn:keywords set to `Author Date Id Revision`
File size: 38.4 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Node process (Node message_node)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	Element message = this.converter.nodeToElement(message_node);
74
75	// the response
76	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78	result.appendChild(page_response);
79
80	// get the request - assume only one
81	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83	HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85	// just in case there are some that need to get passed to the services
86	HashMap service_params = (HashMap)params.get("s0");
87
88
89	String has_rl = null;
90	String has_href = null;
91	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
92	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
93	String collection = (String) params.get(GSParams.COLLECTION);
94	String lang = request.getAttribute(GSXML.LANG_ATT);
95	String uid = request.getAttribute(GSXML.USER_ID_ATT);
96	String document_name = (String) params.get(GSParams.DOCUMENT);
97	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals(""))) {
98	logger.error("no document specified!");
99	return result;
100	}
101	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
102	if (document_type == null) {
103	document_type = "simple";
104	}
105	//whether to retrieve siblings or not
106	boolean get_siblings = false;
107	String sibs = (String) params.get(SIBLING_ARG);
108	if (sibs != null && sibs.equals("1")) {
109	get_siblings = true;
110	}
111
112	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
113	if (sibling_num != null && !sibling_num.equals("")) {
114	// we have to modify the doc name
115	document_name = document_name+"."+sibling_num+".ss";
116	}
117
118	boolean expand_document = false;
119	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
120	if (ed_arg != null && ed_arg.equals("1")) {
121	expand_document = true;
122	}
123
124
125	boolean expand_contents = false;
126	if (expand_document) { // we always expand the contents with the text
127	expand_contents = true;
128	} else {
129	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
130	if (ec_arg != null && ec_arg.equals("1")) {
131	expand_contents = true;
132	}
133	}
134
135	//append site metadata
136	addSiteMetadata( page_response, lang, uid);
137
138	// get the additional data needed for the page
139	getBackgroundData(page_response, collection, lang, uid);
140	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
141
142	// the_document is where all the doc info - structure and metadata etc
143	// is added into, to be returned in the page
144	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
145	page_response.appendChild(the_document);
146
147	// set the doctype from the cgi arg as an attribute
148	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
149
150	// create a basic doc list containing the current node
151	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
152	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
153	basic_doc_list.appendChild(current_doc);
154	if (document_name.length()!=0){
155	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
156	}else if (has_href.length()!=0){
157	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
158	current_doc.setAttribute("externalURL", has_rl);
159	}
160
161	// Create a parameter list to specify the required structure information
162	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163
164	if (service_params != null) {
165	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
166	}
167
168	Element ds_param = null;
169	boolean get_structure = false;
170	boolean get_structure_info = false;
171	if (document_type.equals("paged")) {
172	get_structure_info = true;
173	// get teh info needed for paged naviagtion
174	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175	ds_param_list.appendChild(ds_param);
176	ds_param.setAttribute(GSXML.NAME_ATT, "info");
177	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
178	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
179	ds_param_list.appendChild(ds_param);
180	ds_param.setAttribute(GSXML.NAME_ATT, "info");
181	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
182	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
183	ds_param_list.appendChild(ds_param);
184	ds_param.setAttribute(GSXML.NAME_ATT, "info");
185	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
186
187	} else if (document_type.equals("hierarchy")){
188	get_structure = true;
189	if (expand_contents) {
190	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
191	ds_param_list.appendChild(ds_param);
192	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
193	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
194	} else {
195	// get the info needed for table of contents
196	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
197	ds_param_list.appendChild(ds_param);
198	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
199	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
200	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201	ds_param_list.appendChild(ds_param);
202	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
204	if (get_siblings) {
205	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206	ds_param_list.appendChild(ds_param);
207	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
208	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
209	}
210	}
211	} else {
212	// we dont need any structure
213	}
214
215	boolean has_dummy = false;
216	if (get_structure \|\| get_structure_info) {
217
218	// Build a request to obtain the document structure
219	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
220	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
221	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
222	ds_message.appendChild(ds_request);
223	ds_request.appendChild(ds_param_list);
224
225	// create a doc_node_list and put in the doc_node that we are interested in
226	ds_request.appendChild(basic_doc_list);
227
228	// Process the document structure retrieve message
229	Element ds_response_message = (Element) this.mr.process(ds_message);
230	if (processErrorElements(ds_response_message, page_response)) {
231	return result;
232	}
233
234	// get the info and print out
235	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
236	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
237	path = GSPath.appendLink(path, "nodeStructureInfo");
238	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
239	// get the doc_node bit
240	if (ds_response_struct_info != null) {
241	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
242	}
243	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
244	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
245	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
246	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
247
248	if (ds_response_structure != null) {
249	// add the contents of the structure bit into the_document
250	NodeList structs = ds_response_structure.getChildNodes();
251	for (int i=0; i<structs.getLength();i++) {
252	the_document.appendChild(this.doc.importNode(structs.item(i), true));
253	}
254	} else {
255	// no structure nodes, so put in a dummy doc node
256	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257	if (document_name.length()!=0){
258	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
259	}else if (has_href.length()!=0){
260	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
261	doc_node.setAttribute("externalURL", has_rl);
262	}
263	the_document.appendChild(doc_node);
264	has_dummy = true;
265	}
266	} else { // a simple type - we dont have a dummy node for simple
267	// should think about this more
268	// no structure request, so just put in a dummy doc node
269	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
270	if (document_name.length()!=0){
271	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
272	}else if (has_href.length()!=0){
273	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
274	doc_node.setAttribute("externalURL", has_rl);
275	}
276	the_document.appendChild(doc_node);
277	has_dummy = true;
278	}
279
280	// Build a request to obtain some document metadata
281	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
283	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
284	dm_message.appendChild(dm_request);
285	// Create a parameter list to specify the required metadata information
286
287	HashSet meta_names = new HashSet();
288	meta_names.add("Title"); // the default
289	if (format_elem != null) {
290	extractMetadataNames(format_elem, meta_names);
291	}
292
293	Element dm_param_list = createMetadataParamList(meta_names);
294	if (service_params != null) {
295	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
296	}
297
298	dm_request.appendChild(dm_param_list);
299
300
301	// create the doc node list for the metadata request
302	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
303	dm_request.appendChild(dm_doc_list);
304
305	// Add each node from the structure response into the metadata request
306	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
307	for (int i = 0; i < doc_nodes.getLength(); i++) {
308	Element doc_node = (Element) doc_nodes.item(i);
309	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
310
311	// Add the documentNode to the list
312	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313	dm_doc_list.appendChild(dm_doc_node);
314	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
315	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
316	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
317	}
318
319	// we also want a metadata request to the top level document to get
320	// assocfilepath - this could be cached too
321	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
322	dm_message.appendChild(doc_meta_request);
323	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
324	if (service_params != null) {
325	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
326	}
327
328	doc_meta_request.appendChild(doc_meta_param_list);
329	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
330	doc_meta_param_list.appendChild(doc_param);
331	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
332	doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
333
334	// create the doc node list for the metadata request
335	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
336	doc_meta_request.appendChild(doc_list);
337
338	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
339	// the node we want is the root document node
340	if (document_name.length()!=0){
341	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
342	}else if (has_href.length()!=0){
343	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
344	doc_node.setAttribute("externalURL", has_rl);
345	}
346	doc_list.appendChild(doc_node);
347	Element dm_response_message = (Element) this.mr.process(dm_message);
348	if (processErrorElements(dm_response_message, page_response)) {
349	return result;
350	}
351
352	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
353	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
354
355	// Merge the metadata with the structure information
356	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
357	for (int i = 0; i < doc_nodes.getLength(); i++) {
358	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
359	}
360	// get the top level doc metadata out
361	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
362	Element top_doc_node = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
363	GSXML.mergeMetadataLists(the_document, top_doc_node);
364
365	// Build a request to obtain some document content
366	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
367	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
368	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
369	dc_message.appendChild(dc_request);
370
371
372	// Create a parameter list to specify the request parameters - empty for now
373	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
374	if (service_params != null) {
375	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
376	}
377
378	dc_request.appendChild(dc_param_list);
379
380	// get the content
381	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
382	if (expand_document) {
383	dc_request.appendChild(dm_doc_list);
384	} else {
385	dc_request.appendChild(basic_doc_list);
386	}
387	logger.debug("request = "+converter.getString(dc_message));
388	Element dc_response_message = (Element) this.mr.process(dc_message);
389	if (processErrorElements(dc_response_message, page_response)) {
390	return result;
391	}
392
393	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
394
395	if (expand_document) {
396	// Merge the content with the structure information
397	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
398	for (int i = 0; i < doc_nodes.getLength(); i++) {
399	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
400	if (content != null) {
401	if (highlight_query_terms) {
402	content = highlightQueryTerms(request, (Element)content);
403	}
404	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
405	}
406	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
407	}
408	} else {
409	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
410	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
411	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
412	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
413
414	if (dc_response_doc_content == null) {
415	// no content to add
416	if (dc_response_doc_external !=null){
417	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
418
419	the_document.setAttribute("selectedNode", modified_doc_id);
420	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
421	}
422	return result;
423	}
424	if (highlight_query_terms) {
425	dc_response_doc.removeChild(dc_response_doc_content);
426
427	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
428	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
429	}
430
431
432	if (provide_annotations) {
433	String service_selected = (String)params.get(ENRICH_DOC_ARG);
434	if (service_selected != null && service_selected.equals("1")) {
435	// now we can modifiy the response doc if needed
436	String enrich_service = (String)params.get(GSParams.SERVICE);
437	// send a message to the service
438	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
439	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
440	enrich_message.appendChild(enrich_request);
441	// check for parameters
442	HashMap e_service_params = (HashMap)params.get("s1");
443	if (e_service_params != null) {
444	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
445	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
446	enrich_request.appendChild(enrich_pl);
447	}
448	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
449	enrich_request.appendChild(e_doc_list);
450	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
451
452	Node enrich_response = this.mr.process(enrich_message);
453
454	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
455	path = GSPath.createPath(links);
456	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
457
458	}
459	} // if provide_annotations
460
461
462	// use the returned id rather than the sent one cos there may have
463	// been modifiers such as .pr that are removed.
464	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
465	the_document.setAttribute("selectedNode", modified_doc_id);
466	if (has_dummy) {
467	// change the id if necessary and add the content
468	Element dummy_node = (Element)doc_nodes.item(0);
469
470	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
471	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
472	// hack for simple type
473	if (document_type.equals("simple")) {
474	// we dont want the internal docNode, just want the content and metadata in the document
475	// rethink this!!
476	the_document.removeChild(dummy_node);
477
478	NodeList dummy_children = dummy_node.getChildNodes();
479	//for (int i=0; i<dummy_children.getLength(); i++) {
480	for (int i=dummy_children.getLength()-1; i>=0; i--) {
481	// special case as we don't want more than one metadata list
482	if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER)) {
483	GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
484	} else {
485	the_document.appendChild(dummy_children.item(i));
486	}
487	}
488	}
489	} else {
490	// Merge the document content with the metadata and structure information
491	for (int i = 0; i < doc_nodes.getLength(); i++) {
492	Node dn = doc_nodes.item(i);
493	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
494	if (dn_id.equals(modified_doc_id)) {
495	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
496	break;
497	}
498	}
499	}
500	}
501	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
502	return result;
503	}
504
505	/** tell the param class what its arguments are
506	* if an action has its own arguments, this should add them to the params
507	* object - particularly important for args that should not be saved */
508	public boolean getActionParameters(GSParams params) {
509	params.addParameter(GOTO_PAGE_ARG, false);
510	params.addParameter(ENRICH_DOC_ARG, false);
511	return true;
512	}
513
514
515	/** this method gets the collection description, the format info, the
516	* list of enrich services, etc - stuff that is needed for the page,
517	* but is the same whatever the query is - should be cached */
518	protected boolean getBackgroundData(Element page_response,
519	String collection, String lang,
520	String uid) {
521
522	// create a message to process - contains requests for the collection
523	// description, the format element, the enrich services on offer
524	// these could all be cached
525	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
526	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
527	// the format request - ignore for now, where does this request go to??
528	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
529	info_message.appendChild(format_request);
530
531	// the enrich_services request - only do this if provide_annotations is true
532
533	if (provide_annotations) {
534	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
535	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
536	info_message.appendChild(enrich_services_request);
537	}
538
539	Element info_response = (Element)this.mr.process(info_message);
540
541	// the collection is the first response
542	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
543	Element format_resp = (Element) responses.item(0);
544
545	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
546	if (format_elem != null) {
547	logger.debug("doc action found a format statement");
548	// set teh format type
549	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
550	page_response.appendChild(this.doc.importNode(format_elem, true));
551	}
552
553	if (provide_annotations) {
554	Element services_resp = (Element)responses.item(1);
555
556	// a new message for the mr
557	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
558
559	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
560	boolean service_found = false;
561	for (int j=0; j<e_services.getLength(); j++) {
562	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
563	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
564	enrich_message.appendChild(s);
565	service_found = true;
566	}
567	}
568	if (service_found) {
569	Element enrich_response = (Element)this.mr.process(enrich_message);
570
571	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
572	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
573	for (int i=0; i<e_responses.getLength(); i++) {
574	Element e_resp = (Element)e_responses.item(i);
575	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
576	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
577	service_list.appendChild(e_service);
578	}
579	page_response.appendChild(service_list);
580	}
581	} // if provide_annotations
582	return true;
583
584	}
585
586	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
587	*/
588	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
589
590	// do the query again to get term info
591	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
592	HashMap params = GSXML.extractParams(cgi_param_list, false);
593
594	HashMap previous_params = (HashMap)params.get("p");
595	if (previous_params == null) {
596	return dc_response_doc_content;
597	}
598	String service_name = (String)previous_params.get(GSParams.SERVICE);
599	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
600	logger.debug("invalid service, not doing highlighting");
601	return dc_response_doc_content;
602	}
603	String collection = (String)params.get(GSParams.COLLECTION);
604	String lang = request.getAttribute(GSXML.LANG_ATT);
605	String uid = request.getAttribute(GSXML.USER_ID_ATT);
606	String to = GSPath.appendLink(collection, service_name);
607
608	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
609	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
610	mr_query_message.appendChild(mr_query_request);
611
612	// paramList
613	HashMap service_params = (HashMap)params.get("s1");
614
615	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
616	GSXML.addParametersToList(this.doc, query_param_list, service_params);
617	mr_query_request.appendChild(query_param_list);
618
619	// do the query
620	Element mr_query_response = (Element)this.mr.process(mr_query_message);
621
622	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
623	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
624	if (query_term_list_element == null) {
625	// no term info
626	logger.error("No query term information.\n");
627	return dc_response_doc_content;
628	}
629
630	String content = GSXML.getNodeText(dc_response_doc_content);
631
632	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
633	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
634
635	HashSet query_term_variants = new HashSet();
636	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
637	if(equivalent_terms_nodelist == null \|\| equivalent_terms_nodelist.getLength() == 0)
638	{
639	NodeList terms_nodelist = query_term_list_element.getElementsByTagName("term");
640	if(terms_nodelist != null && terms_nodelist.getLength() > 0)
641	{
642	for(int i = 0; i < terms_nodelist.getLength(); i++)
643	{
644	String termValue = ((Element)terms_nodelist.item(i)).getAttribute("name");
645	String termValueU = null;
646	String termValueL = null;
647
648	if(termValue.length() > 1)
649	{
650	termValueU = termValue.substring(0, 1).toUpperCase() + termValue.substring(1);
651	termValueL = termValue.substring(0, 1).toLowerCase() + termValue.substring(1);
652	}
653	else
654	{
655	termValueU = termValue.substring(0, 1).toUpperCase();
656	termValueL = termValue.substring(0, 1).toLowerCase();
657	}
658
659	query_term_variants.add(termValueU);
660	query_term_variants.add(termValueL);
661	}
662	}
663	}
664	else
665	{
666	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
667	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
668	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
669	for (int j = 0; j < equivalent_terms.length; j++) {
670	query_term_variants.add(equivalent_terms[j]);
671	}
672	}
673	}
674
675	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
676
677	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
678	String performed_query = GSXML.getNodeText(query_element) + " ";
679
680	ArrayList phrase_query_p_term_variants_list = new ArrayList();
681	int term_start = 0;
682	boolean in_term = false;
683	boolean in_phrase = false;
684	for (int i = 0; i < performed_query.length(); i++) {
685	char character = performed_query.charAt(i);
686	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
687
688	// Has a query term just started?
689	if (in_term == false && is_character_letter_or_digit == true) {
690	in_term = true;
691	term_start = i;
692	}
693
694	// Or has a term just finished?
695	else if (in_term == true && is_character_letter_or_digit == false) {
696	in_term = false;
697	String term = performed_query.substring(term_start, i);
698
699	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
700	if (term_element != null) {
701
702	HashSet phrase_query_p_term_x_variants = new HashSet();
703
704	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
705	if(term_equivalent_terms_nodelist == null \|\| term_equivalent_terms_nodelist.getLength() == 0)
706	{
707	String termValueU = null;
708	String termValueL = null;
709
710	if(term.length() > 1)
711	{
712	termValueU = term.substring(0, 1).toUpperCase() + term.substring(1);
713	termValueL = term.substring(0, 1).toLowerCase() + term.substring(1);
714	}
715	else
716	{
717	termValueU = term.substring(0, 1).toUpperCase();
718	termValueL = term.substring(0, 1).toLowerCase();
719	}
720
721	phrase_query_p_term_x_variants.add(termValueU);
722	phrase_query_p_term_x_variants.add(termValueL);
723	}
724	else
725	{
726	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
727	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
728	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
729	for (int k = 0; k < term_equivalent_terms.length; k++) {
730	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
731	}
732	}
733	}
734	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
735
736	if (in_phrase == false) {
737	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
738	phrase_query_p_term_variants_list = new ArrayList();
739	}
740	}
741	}
742	// Watch for phrases (surrounded by quotes)
743	if (character == '\"') {
744	// Has a phrase just started?
745	if (in_phrase == false) {
746	in_phrase = true;
747	}
748	// Or has a phrase just finished?
749	else if (in_phrase == true) {
750	in_phrase = false;
751	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
752	}
753
754	phrase_query_p_term_variants_list = new ArrayList();
755	}
756	}
757
758	System.err.println(query_term_variants + " *** " + phrase_query_term_variants_hierarchy);
759	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
760	}
761
762
763	/**
764	* Highlights query terms in a piece of text.
765	*/
766	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
767	{
768	// Convert the content string to an array of characters for speed
769	char[] content_characters = new char[content.length()];
770	content.getChars(0, content.length(), content_characters, 0);
771
772	// Now skim through the content, identifying word matches
773	ArrayList word_matches = new ArrayList();
774	int word_start = 0;
775	boolean in_word = false;
776	boolean preceding_word_matched = false;
777	for (int i = 0; i < content_characters.length; i++) {
778	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
779
780	// Has a word just started?
781	if (in_word == false && is_character_letter_or_digit == true) {
782	in_word = true;
783	word_start = i;
784	}
785
786	// Or has a word just finished?
787	else if (in_word == true && is_character_letter_or_digit == false) {
788	in_word = false;
789
790	// Check if the word matches any of the query term equivalents
791	String word = new String(content_characters, word_start, (i - word_start));
792	if (query_term_variants.contains(word)) {
793	// We have found a matching word, so remember its location
794	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
795	preceding_word_matched = true;
796	}
797	else {
798	preceding_word_matched = false;
799	}
800	}
801	}
802
803	// Don't forget the last word...
804	if (in_word == true) {
805	// Check if the word matches any of the query term equivalents
806	String word = new String(content_characters, word_start, (content_characters.length - word_start));
807	if (query_term_variants.contains(word)) {
808	// We have found a matching word, so remember its location
809	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
810	}
811	}
812
813	ArrayList highlight_start_positions = new ArrayList();
814	ArrayList highlight_end_positions = new ArrayList();
815
816	// Deal with phrases now
817	ArrayList partial_phrase_matches = new ArrayList();
818	for (int i = 0; i < word_matches.size(); i++) {
819	WordMatch word_match = (WordMatch) word_matches.get(i);
820
821	// See if any partial phrase matches are extended by this word
822	if (word_match.preceding_word_matched) {
823	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
824	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
825	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
826	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
827	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
828	partial_phrase_match.num_words_matched++;
829
830	// Has a complete phrase match occurred?
831	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
832	// Check for overlaps by looking at the previous highlight range
833	if (!highlight_end_positions.isEmpty()) {
834	int last_highlight_index = highlight_end_positions.size() - 1;
835	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
836	if (last_highlight_end > partial_phrase_match.start_position) {
837	// There is an overlap, so remove the previous phrase match
838	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
839	highlight_end_positions.remove(last_highlight_index);
840	partial_phrase_match.start_position = last_highlight_start;
841	}
842	}
843
844	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
845	highlight_end_positions.add(new Integer(word_match.end_position));
846	}
847	// No, but add the partial match back into the list for next time
848	else {
849	partial_phrase_matches.add(partial_phrase_match);
850	}
851	}
852	}
853	}
854	else {
855	partial_phrase_matches.clear();
856	}
857
858	// See if this word is at the start of any of the phrases
859	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
860	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
861	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
862	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
863	// If this phrase is just one word long, we have a complete match
864	if (phrase_query_p_term_variants_list.size() == 1) {
865	highlight_start_positions.add(new Integer(word_match.start_position));
866	highlight_end_positions.add(new Integer(word_match.end_position));
867	}
868	// Otherwise we have the start of a potential phrase match
869	else {
870	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
871	}
872	}
873	}
874	}
875
876	// Now add the annotation tags into the document at the correct points
877	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
878
879	int last_wrote = 0;
880	for (int i = 0; i < highlight_start_positions.size(); i++) {
881	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
882	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
883
884	// Print anything before the highlight range
885	if (last_wrote < highlight_start) {
886	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
887	content_element.appendChild(this.doc.createTextNode(preceding_text));
888	}
889
890	// Print the highlight text, annotated
891	if (highlight_end > last_wrote) {
892	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
893	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
894	annotation_element.setAttribute("type", "query_term");
895	content_element.appendChild(annotation_element);
896	last_wrote = highlight_end;
897	}
898	}
899
900	// Finish off any unwritten text
901	if (last_wrote < content_characters.length) {
902	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
903	content_element.appendChild(this.doc.createTextNode(remaining_text));
904	}
905
906	return content_element;
907	}
908
909
910	static private class WordMatch
911	{
912	public String word;
913	public int start_position;
914	public int end_position;
915	public boolean preceding_word_matched;
916
917	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
918	{
919	this.word = word;
920	this.start_position = start_position;
921	this.end_position = end_position;
922	this.preceding_word_matched = preceding_word_matched;
923	}
924	}
925
926
927	static private class PartialPhraseMatch
928	{
929	public int start_position;
930	public int query_phrase_number;
931	public int num_words_matched;
932
933	public PartialPhraseMatch(int start_position, int query_phrase_number)
934	{
935	this.start_position = start_position;
936	this.query_phrase_number = query_phrase_number;
937	this.num_words_matched = 1;
938	}
939	}
940	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: