Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 20292

Last change on this file since 20292 was 20292, checked in by kjdon, 15 years ago
removed some System.err debug messages, which don't look like they are needed.
Property svn:keywords set to `Author Date Id Revision`
File size: 36.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Node process (Node message_node)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	Element message = this.converter.nodeToElement(message_node);
74
75	// the response
76	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78	result.appendChild(page_response);
79
80	// get the request - assume only one
81	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83	HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85	// just in case there are some that need to get passed to the services
86	HashMap service_params = (HashMap)params.get("s0");
87
88
89	String has_rl = null;
90	String has_href = null;
91	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
92	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
93	String collection = (String) params.get(GSParams.COLLECTION);
94	String lang = request.getAttribute(GSXML.LANG_ATT);
95	String uid = request.getAttribute(GSXML.USER_ID_ATT);
96	String document_name = (String) params.get(GSParams.DOCUMENT);
97	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals(""))) {
98	logger.error("no document specified!");
99	return result;
100	}
101	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
102	if (document_type == null) {
103	document_type = "simple";
104	}
105	//whether to retrieve siblings or not
106	boolean get_siblings = false;
107	String sibs = (String) params.get(SIBLING_ARG);
108	if (sibs != null && sibs.equals("1")) {
109	get_siblings = true;
110	}
111
112	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
113	if (sibling_num != null && !sibling_num.equals("")) {
114	// we have to modify the doc name
115	document_name = document_name+"."+sibling_num+".ss";
116	}
117
118	boolean expand_document = false;
119	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
120	if (ed_arg != null && ed_arg.equals("1")) {
121	expand_document = true;
122	}
123
124
125	boolean expand_contents = false;
126	if (expand_document) { // we always expand the contents with the text
127	expand_contents = true;
128	} else {
129	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
130	if (ec_arg != null && ec_arg.equals("1")) {
131	expand_contents = true;
132	}
133	}
134
135	//append site metadata
136	addSiteMetadata( page_response, lang, uid);
137
138	// get the additional data needed for the page
139	getBackgroundData(page_response, collection, lang, uid);
140	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
141
142	// the_document is where all the doc info - structure and metadata etc
143	// is added into, to be returned in the page
144	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
145	page_response.appendChild(the_document);
146
147	// set the doctype from the cgi arg as an attribute
148	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
149
150	// create a basic doc list containing the current node
151	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
152	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
153	basic_doc_list.appendChild(current_doc);
154	if (document_name.length()!=0){
155	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
156	}else if (has_href.length()!=0){
157	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
158	current_doc.setAttribute("externalURL", has_rl);
159	}
160
161	// Create a parameter list to specify the required structure information
162	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163
164	if (service_params != null) {
165	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
166	}
167
168	Element ds_param = null;
169	boolean get_structure = false;
170	boolean get_structure_info = false;
171	if (document_type.equals("paged")) {
172	get_structure_info = true;
173	// get teh info needed for paged naviagtion
174	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175	ds_param_list.appendChild(ds_param);
176	ds_param.setAttribute(GSXML.NAME_ATT, "info");
177	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
178	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
179	ds_param_list.appendChild(ds_param);
180	ds_param.setAttribute(GSXML.NAME_ATT, "info");
181	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
182	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
183	ds_param_list.appendChild(ds_param);
184	ds_param.setAttribute(GSXML.NAME_ATT, "info");
185	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
186
187	} else if (document_type.equals("hierarchy")){
188	get_structure = true;
189	if (expand_contents) {
190	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
191	ds_param_list.appendChild(ds_param);
192	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
193	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
194	} else {
195	// get the info needed for table of contents
196	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
197	ds_param_list.appendChild(ds_param);
198	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
199	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
200	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201	ds_param_list.appendChild(ds_param);
202	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
204	if (get_siblings) {
205	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206	ds_param_list.appendChild(ds_param);
207	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
208	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
209	}
210	}
211	} else {
212	// we dont need any structure
213	}
214
215	boolean has_dummy = false;
216	if (get_structure \|\| get_structure_info) {
217
218	// Build a request to obtain the document structure
219	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
220	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
221	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
222	ds_message.appendChild(ds_request);
223	ds_request.appendChild(ds_param_list);
224
225	// create a doc_node_list and put in the doc_node that we are interested in
226	ds_request.appendChild(basic_doc_list);
227
228	// Process the document structure retrieve message
229	Element ds_response_message = (Element) this.mr.process(ds_message);
230	if (processErrorElements(ds_response_message, page_response)) {
231	return result;
232	}
233
234	// get the info and print out
235	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
236	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
237	path = GSPath.appendLink(path, "nodeStructureInfo");
238	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
239	// get the doc_node bit
240	if (ds_response_struct_info != null) {
241	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
242	}
243	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
244	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
245	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
246	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
247
248	if (ds_response_structure != null) {
249	// add the contents of the structure bit into the_document
250	NodeList structs = ds_response_structure.getChildNodes();
251	for (int i=0; i<structs.getLength();i++) {
252	the_document.appendChild(this.doc.importNode(structs.item(i), true));
253	}
254	} else {
255	// no structure nodes, so put in a dummy doc node
256	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257	if (document_name.length()!=0){
258	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
259	}else if (has_href.length()!=0){
260	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
261	doc_node.setAttribute("externalURL", has_rl);
262	}
263	the_document.appendChild(doc_node);
264	has_dummy = true;
265	}
266	} else { // a simple type - we dont have a dummy node for simple
267	// should think about this more
268	// no structure request, so just put in a dummy doc node
269	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
270	if (document_name.length()!=0){
271	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
272	}else if (has_href.length()!=0){
273	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
274	doc_node.setAttribute("externalURL", has_rl);
275	}
276	the_document.appendChild(doc_node);
277	has_dummy = true;
278	}
279
280	// Build a request to obtain some document metadata
281	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
283	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
284	dm_message.appendChild(dm_request);
285	// Create a parameter list to specify the required metadata information
286
287	HashSet meta_names = new HashSet();
288	meta_names.add("Title"); // the default
289	if (format_elem != null) {
290	extractMetadataNames(format_elem, meta_names);
291	}
292
293	Element dm_param_list = createMetadataParamList(meta_names);
294	if (service_params != null) {
295	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
296	}
297
298	dm_request.appendChild(dm_param_list);
299
300
301	// create the doc node list for the metadata request
302	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
303	dm_request.appendChild(dm_doc_list);
304
305	// Add each node from the structure response into the metadata request
306	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
307	for (int i = 0; i < doc_nodes.getLength(); i++) {
308	Element doc_node = (Element) doc_nodes.item(i);
309	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
310
311	// Add the documentNode to the list
312	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313	dm_doc_list.appendChild(dm_doc_node);
314	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
315	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
316	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
317	}
318
319	// we also want a metadata request to the top level document to get
320	// assocfilepath - this could be cached too
321	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
322	dm_message.appendChild(doc_meta_request);
323	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
324	if (service_params != null) {
325	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
326	}
327
328	doc_meta_request.appendChild(doc_meta_param_list);
329	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
330	doc_meta_param_list.appendChild(doc_param);
331	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
332	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
333
334	// create the doc node list for the metadata request
335	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
336	doc_meta_request.appendChild(doc_list);
337
338	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
339	// the node we want is the root document node
340	if (document_name.length()!=0){
341	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
342	}else if (has_href.length()!=0){
343	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
344	doc_node.setAttribute("externalURL", has_rl);
345	}
346	doc_list.appendChild(doc_node);
347	Element dm_response_message = (Element) this.mr.process(dm_message);
348	if (processErrorElements(dm_response_message, page_response)) {
349	return result;
350	}
351
352	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
353	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
354
355	// Merge the metadata with the structure information
356	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
357	for (int i = 0; i < doc_nodes.getLength(); i++) {
358	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
359	}
360	// get teh top level doc metadata out
361	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
362	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
363	if (doc_meta_list != null) {
364	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
365	}
366	// Build a request to obtain some document content
367	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
368	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
369	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
370	dc_message.appendChild(dc_request);
371
372
373	// Create a parameter list to specify the request parameters - empty for now
374	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
375	if (service_params != null) {
376	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
377	}
378
379	dc_request.appendChild(dc_param_list);
380
381	// get the content
382	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
383	if (expand_document) {
384	dc_request.appendChild(dm_doc_list);
385	} else {
386	dc_request.appendChild(basic_doc_list);
387	}
388	logger.debug("request = "+converter.getString(dc_message));
389	Element dc_response_message = (Element) this.mr.process(dc_message);
390	if (processErrorElements(dc_response_message, page_response)) {
391	return result;
392	}
393
394	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
395
396	if (expand_document) {
397	// Merge the content with the structure information
398	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
399	for (int i = 0; i < doc_nodes.getLength(); i++) {
400	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
401	if (content != null) {
402	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
403	}
404	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
405	}
406	} else {
407	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
408	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
409	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
410	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
411
412	if (dc_response_doc_content == null) {
413	// no content to add
414	if (dc_response_doc_external !=null){
415	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
416
417	the_document.setAttribute("selectedNode", modified_doc_id);
418	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
419	}
420	return result;
421	}
422	if (highlight_query_terms) {
423	dc_response_doc.removeChild(dc_response_doc_content);
424
425	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
426	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
427	}
428
429
430	if (provide_annotations) {
431	String service_selected = (String)params.get(ENRICH_DOC_ARG);
432	if (service_selected != null && service_selected.equals("1")) {
433	// now we can modifiy the response doc if needed
434	String enrich_service = (String)params.get(GSParams.SERVICE);
435	// send a message to the service
436	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
437	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
438	enrich_message.appendChild(enrich_request);
439	// check for parameters
440	HashMap e_service_params = (HashMap)params.get("s1");
441	if (e_service_params != null) {
442	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
443	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
444	enrich_request.appendChild(enrich_pl);
445	}
446	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
447	enrich_request.appendChild(e_doc_list);
448	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
449
450	Node enrich_response = this.mr.process(enrich_message);
451
452	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
453	path = GSPath.createPath(links);
454	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
455
456	}
457	} // if provide_annotations
458
459
460	// use the returned id rather than the sent one cos there may have
461	// been modifiers such as .pr that are removed.
462	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
463	the_document.setAttribute("selectedNode", modified_doc_id);
464	if (has_dummy) {
465	// change the id if necessary and add the content
466	Element dummy_node = (Element)doc_nodes.item(0);
467
468	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
469	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
470	// hack for simple type
471	if (document_type.equals("simple")) {
472	// we dont want the internal docNode, just want the content and metadata in the document
473	// rethink this!!
474	the_document.removeChild(dummy_node);
475
476	NodeList dummy_children = dummy_node.getChildNodes();
477	//for (int i=0; i<dummy_children.getLength(); i++) {
478	for (int i=dummy_children.getLength()-1; i>=0; i--) {
479	the_document.appendChild(dummy_children.item(i));
480
481	}
482	}
483	} else {
484	// Merge the document content with the metadata and structure information
485	for (int i = 0; i < doc_nodes.getLength(); i++) {
486	Node dn = doc_nodes.item(i);
487	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
488	if (dn_id.equals(modified_doc_id)) {
489	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
490	break;
491	}
492	}
493	}
494	}
495	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
496	return result;
497	}
498
499	/** tell the param class what its arguments are
500	* if an action has its own arguments, this should add them to the params
501	* object - particularly important for args that should not be saved */
502	public boolean getActionParameters(GSParams params) {
503	params.addParameter(GOTO_PAGE_ARG, false);
504	params.addParameter(ENRICH_DOC_ARG, false);
505	return true;
506	}
507
508
509	/** this method gets the collection description, the format info, the
510	* list of enrich services, etc - stuff that is needed for the page,
511	* but is the same whatever the query is - should be cached */
512	protected boolean getBackgroundData(Element page_response,
513	String collection, String lang,
514	String uid) {
515
516	// create a message to process - contains requests for the collection
517	// description, the format element, the enrich services on offer
518	// these could all be cached
519	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
520	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
521	// the format request - ignore for now, where does this request go to??
522	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
523	info_message.appendChild(format_request);
524
525	// the enrich_services request - only do this if provide_annotations is true
526
527	if (provide_annotations) {
528	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
529	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
530	info_message.appendChild(enrich_services_request);
531	}
532
533	Element info_response = (Element)this.mr.process(info_message);
534
535	// the collection is the first response
536	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
537	Element format_resp = (Element) responses.item(0);
538
539	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
540	if (format_elem != null) {
541	logger.debug("doc action found a format statement");
542	// set teh format type
543	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
544	page_response.appendChild(this.doc.importNode(format_elem, true));
545	}
546
547	if (provide_annotations) {
548	Element services_resp = (Element)responses.item(1);
549
550	// a new message for the mr
551	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
552
553	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
554	boolean service_found = false;
555	for (int j=0; j<e_services.getLength(); j++) {
556	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
557	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
558	enrich_message.appendChild(s);
559	service_found = true;
560	}
561	}
562	if (service_found) {
563	Element enrich_response = (Element)this.mr.process(enrich_message);
564
565	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
566	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
567	for (int i=0; i<e_responses.getLength(); i++) {
568	Element e_resp = (Element)e_responses.item(i);
569	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
570	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
571	service_list.appendChild(e_service);
572	}
573	page_response.appendChild(service_list);
574	}
575	} // if provide_annotations
576	return true;
577
578	}
579
580	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
581	*/
582	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
583
584	// do the query again to get term info
585	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
586	HashMap params = GSXML.extractParams(cgi_param_list, false);
587
588	HashMap previous_params = (HashMap)params.get("p");
589	if (previous_params == null) {
590	return dc_response_doc_content;
591	}
592	String service_name = (String)previous_params.get(GSParams.SERVICE);
593	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
594	logger.debug("invalid service, not doing highlighting");
595	return dc_response_doc_content;
596	}
597	String collection = (String)params.get(GSParams.COLLECTION);
598	String lang = request.getAttribute(GSXML.LANG_ATT);
599	String uid = request.getAttribute(GSXML.USER_ID_ATT);
600	String to = GSPath.appendLink(collection, service_name);
601
602	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
603	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
604	mr_query_message.appendChild(mr_query_request);
605
606	// paramList
607	HashMap service_params = (HashMap)params.get("s1");
608
609	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
610	GSXML.addParametersToList(this.doc, query_param_list, service_params);
611	mr_query_request.appendChild(query_param_list);
612
613	// do the query
614	Element mr_query_response = (Element)this.mr.process(mr_query_message);
615
616	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
617	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
618	if (query_term_list_element == null) {
619	// no term info
620	logger.error("No query term information.\n");
621	return dc_response_doc_content;
622	}
623
624	String content = GSXML.getNodeText(dc_response_doc_content);
625
626	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
627	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
628
629	HashSet query_term_variants = new HashSet();
630	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
631	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
632	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
633	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
634	for (int j = 0; j < equivalent_terms.length; j++) {
635	query_term_variants.add(equivalent_terms[j]);
636	}
637	}
638
639	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
640
641	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
642	String performed_query = GSXML.getNodeText(query_element) + " ";
643
644	ArrayList phrase_query_p_term_variants_list = new ArrayList();
645	int term_start = 0;
646	boolean in_term = false;
647	boolean in_phrase = false;
648	for (int i = 0; i < performed_query.length(); i++) {
649	char character = performed_query.charAt(i);
650	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
651
652	// Has a query term just started?
653	if (in_term == false && is_character_letter_or_digit == true) {
654	in_term = true;
655	term_start = i;
656	}
657
658	// Or has a term just finished?
659	else if (in_term == true && is_character_letter_or_digit == false) {
660	in_term = false;
661	String term = performed_query.substring(term_start, i);
662
663	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
664	if (term_element != null) {
665
666	HashSet phrase_query_p_term_x_variants = new HashSet();
667
668	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
669	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
670	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
671	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
672	for (int k = 0; k < term_equivalent_terms.length; k++) {
673	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
674	}
675	}
676	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
677
678	if (in_phrase == false) {
679	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
680	phrase_query_p_term_variants_list = new ArrayList();
681	}
682	}
683	}
684	// Watch for phrases (surrounded by quotes)
685	if (character == '\"') {
686	// Has a phrase just started?
687	if (in_phrase == false) {
688	in_phrase = true;
689	}
690	// Or has a phrase just finished?
691	else if (in_phrase == true) {
692	in_phrase = false;
693	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
694	}
695
696	phrase_query_p_term_variants_list = new ArrayList();
697	}
698	}
699
700	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
701	}
702
703
704	/**
705	* Highlights query terms in a piece of text.
706	*/
707	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
708	{
709	// Convert the content string to an array of characters for speed
710	char[] content_characters = new char[content.length()];
711	content.getChars(0, content.length(), content_characters, 0);
712
713	// Now skim through the content, identifying word matches
714	ArrayList word_matches = new ArrayList();
715	int word_start = 0;
716	boolean in_word = false;
717	boolean preceding_word_matched = false;
718	for (int i = 0; i < content_characters.length; i++) {
719	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
720
721	// Has a word just started?
722	if (in_word == false && is_character_letter_or_digit == true) {
723	in_word = true;
724	word_start = i;
725	}
726
727	// Or has a word just finished?
728	else if (in_word == true && is_character_letter_or_digit == false) {
729	in_word = false;
730
731	// Check if the word matches any of the query term equivalents
732	String word = new String(content_characters, word_start, (i - word_start));
733	if (query_term_variants.contains(word)) {
734	// We have found a matching word, so remember its location
735	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
736	preceding_word_matched = true;
737	}
738	else {
739	preceding_word_matched = false;
740	}
741	}
742	}
743
744	// Don't forget the last word...
745	if (in_word == true) {
746	// Check if the word matches any of the query term equivalents
747	String word = new String(content_characters, word_start, (content_characters.length - word_start));
748	if (query_term_variants.contains(word)) {
749	// We have found a matching word, so remember its location
750	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
751	}
752	}
753
754	ArrayList highlight_start_positions = new ArrayList();
755	ArrayList highlight_end_positions = new ArrayList();
756
757	// Deal with phrases now
758	ArrayList partial_phrase_matches = new ArrayList();
759	for (int i = 0; i < word_matches.size(); i++) {
760	WordMatch word_match = (WordMatch) word_matches.get(i);
761
762	// See if any partial phrase matches are extended by this word
763	if (word_match.preceding_word_matched) {
764	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
765	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
766	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
767	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
768	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
769	partial_phrase_match.num_words_matched++;
770
771	// Has a complete phrase match occurred?
772	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
773	// Check for overlaps by looking at the previous highlight range
774	if (!highlight_end_positions.isEmpty()) {
775	int last_highlight_index = highlight_end_positions.size() - 1;
776	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
777	if (last_highlight_end > partial_phrase_match.start_position) {
778	// There is an overlap, so remove the previous phrase match
779	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
780	highlight_end_positions.remove(last_highlight_index);
781	partial_phrase_match.start_position = last_highlight_start;
782	}
783	}
784
785	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
786	highlight_end_positions.add(new Integer(word_match.end_position));
787	}
788	// No, but add the partial match back into the list for next time
789	else {
790	partial_phrase_matches.add(partial_phrase_match);
791	}
792	}
793	}
794	}
795	else {
796	partial_phrase_matches.clear();
797	}
798
799	// See if this word is at the start of any of the phrases
800	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
801	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
802	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
803	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
804	// If this phrase is just one word long, we have a complete match
805	if (phrase_query_p_term_variants_list.size() == 1) {
806	highlight_start_positions.add(new Integer(word_match.start_position));
807	highlight_end_positions.add(new Integer(word_match.end_position));
808	}
809	// Otherwise we have the start of a potential phrase match
810	else {
811	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
812	}
813	}
814	}
815	}
816
817	// Now add the annotation tags into the document at the correct points
818	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
819
820	int last_wrote = 0;
821	for (int i = 0; i < highlight_start_positions.size(); i++) {
822	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
823	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
824
825	// Print anything before the highlight range
826	if (last_wrote < highlight_start) {
827	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
828	content_element.appendChild(this.doc.createTextNode(preceding_text));
829	}
830
831	// Print the highlight text, annotated
832	if (highlight_end > last_wrote) {
833	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
834	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
835	annotation_element.setAttribute("type", "query_term");
836	content_element.appendChild(annotation_element);
837	last_wrote = highlight_end;
838	}
839	}
840
841	// Finish off any unwritten text
842	if (last_wrote < content_characters.length) {
843	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
844	content_element.appendChild(this.doc.createTextNode(remaining_text));
845	}
846
847	return content_element;
848	}
849
850
851	static private class WordMatch
852	{
853	public String word;
854	public int start_position;
855	public int end_position;
856	public boolean preceding_word_matched;
857
858	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
859	{
860	this.word = word;
861	this.start_position = start_position;
862	this.end_position = end_position;
863	this.preceding_word_matched = preceding_word_matched;
864	}
865	}
866
867
868	static private class PartialPhraseMatch
869	{
870	public int start_position;
871	public int query_phrase_number;
872	public int num_words_matched;
873
874	public PartialPhraseMatch(int start_position, int query_phrase_number)
875	{
876	this.start_position = start_position;
877	this.query_phrase_number = query_phrase_number;
878	this.num_words_matched = 1;
879	}
880	}
881	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: