Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 13270

Last change on this file since 13270 was 13270, checked in by shaoqun, 17 years ago
replace Category class which is deprecated with Logger class
Property svn:keywords set to `Author Date Id Revision`
File size: 35.0 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Element process (Element message)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	// the response
74	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
75	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
76	result.appendChild(page_response);
77
78	// get the request - assume only one
79	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
80	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
81	HashMap params = GSXML.extractParams(cgi_paramList, false);
82
83	// just in case there are some that need to get passed to the services
84	HashMap service_params = (HashMap)params.get("s0");
85
86	String collection = (String) params.get(GSParams.COLLECTION);
87	String lang = request.getAttribute(GSXML.LANG_ATT);
88	String uid = request.getAttribute(GSXML.USER_ID_ATT);
89	String document_name = (String) params.get(GSParams.DOCUMENT);
90	if (document_name == null \|\| document_name.equals("")) {
91	logger.error("no document specified!");
92	return result;
93	}
94	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
95	if (document_type == null) {
96	document_type = "simple";
97	}
98	//whether to retrieve siblings or not
99	boolean get_siblings = false;
100	String sibs = (String) params.get(SIBLING_ARG);
101	if (sibs != null && sibs.equals("1")) {
102	get_siblings = true;
103	}
104
105	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
106	if (sibling_num != null && !sibling_num.equals("")) {
107	// we have to modify the doc name
108	document_name = document_name+"."+sibling_num+".ss";
109	}
110
111	boolean expand_document = false;
112	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
113	if (ed_arg != null && ed_arg.equals("1")) {
114	expand_document = true;
115	}
116
117
118	boolean expand_contents = false;
119	if (expand_document) { // we always expand the contents with the text
120	expand_contents = true;
121	} else {
122	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
123	if (ec_arg != null && ec_arg.equals("1")) {
124	expand_contents = true;
125	}
126	}
127	// get the additional data needed for the page
128	getBackgroundData(page_response, collection, lang, uid);
129	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
130
131	// the_document is where all the doc info - structure and metadata etc
132	// is added into, to be returned in the page
133	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
134	page_response.appendChild(the_document);
135
136	// set the doctype from the cgi arg as an attribute
137	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
138
139	// create a basic doc list containing the current node
140	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
141	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
142	basic_doc_list.appendChild(current_doc);
143	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
144
145	// Create a parameter list to specify the required structure information
146	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
147
148	if (service_params != null) {
149	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
150	}
151
152	Element ds_param = null;
153	boolean get_structure = false;
154	boolean get_structure_info = false;
155	if (document_type.equals("paged")) {
156	get_structure_info = true;
157	// get teh info needed for paged naviagtion
158	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
159	ds_param_list.appendChild(ds_param);
160	ds_param.setAttribute(GSXML.NAME_ATT, "info");
161	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
162	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
163	ds_param_list.appendChild(ds_param);
164	ds_param.setAttribute(GSXML.NAME_ATT, "info");
165	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
166	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
167	ds_param_list.appendChild(ds_param);
168	ds_param.setAttribute(GSXML.NAME_ATT, "info");
169	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
170
171	} else if (document_type.equals("hierarchy")){
172	get_structure = true;
173	if (expand_contents) {
174	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175	ds_param_list.appendChild(ds_param);
176	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
177	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
178	} else {
179	// get the info needed for table of contents
180	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
181	ds_param_list.appendChild(ds_param);
182	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
183	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
184	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
185	ds_param_list.appendChild(ds_param);
186	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
187	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
188	if (get_siblings) {
189	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
190	ds_param_list.appendChild(ds_param);
191	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
192	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
193	}
194	}
195	} else {
196	// we dont need any structure
197	}
198
199	boolean has_dummy = false;
200	if (get_structure \|\| get_structure_info) {
201
202	// Build a request to obtain the document structure
203	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
204	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
205	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
206	ds_message.appendChild(ds_request);
207	ds_request.appendChild(ds_param_list);
208
209	// create a doc_node_list and put in the doc_node that we are interested in
210	ds_request.appendChild(basic_doc_list);
211
212	// Process the document structure retrieve message
213	Element ds_response_message = (Element) this.mr.process(ds_message);
214	if (processErrorElements(ds_response_message, page_response)) {
215	return result;
216	}
217
218	// get the info and print out
219	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
220	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
221	path = GSPath.appendLink(path, "nodeStructureInfo");
222	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
223	// get the doc_node bit
224	if (ds_response_struct_info != null) {
225	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
226	}
227	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
228	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
229	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
230	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
231
232	if (ds_response_structure != null) {
233	// add the contents of the structure bit into the_document
234	NodeList structs = ds_response_structure.getChildNodes();
235	for (int i=0; i<structs.getLength();i++) {
236	the_document.appendChild(this.doc.importNode(structs.item(i), true));
237	}
238	} else {
239	// no structure nodes, so put in a dummy doc node
240	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
241	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
242	the_document.appendChild(doc_node);
243	has_dummy = true;
244	}
245	} else { // a simple type - we dont have a dummy node for simple
246	// should think about this more
247	// no structure request, so just put in a dummy doc node
248	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
249	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
250	the_document.appendChild(doc_node);
251	has_dummy = true;
252	}
253
254	// Build a request to obtain some document metadata
255	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
256	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
257	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
258	dm_message.appendChild(dm_request);
259	// Create a parameter list to specify the required metadata information
260
261	HashSet meta_names = new HashSet();
262	meta_names.add("Title"); // the default
263	if (format_elem != null) {
264	extractMetadataNames(format_elem, meta_names);
265	}
266
267	Element dm_param_list = createMetadataParamList(meta_names);
268	if (service_params != null) {
269	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
270	}
271
272	dm_request.appendChild(dm_param_list);
273
274
275	// create the doc node list for the metadata request
276	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
277	dm_request.appendChild(dm_doc_list);
278
279	// Add each node from the structure response into the metadata request
280	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
281	for (int i = 0; i < doc_nodes.getLength(); i++) {
282	Element doc_node = (Element) doc_nodes.item(i);
283	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
284
285	// Add the documentNode to the list
286	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
287	dm_doc_list.appendChild(dm_doc_node);
288	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
289	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
290	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
291	}
292
293	// we also want a metadata request to the top level document to get
294	// assocfilepath - this could be cached too
295	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
296	dm_message.appendChild(doc_meta_request);
297	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
298	if (service_params != null) {
299	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
300	}
301
302	doc_meta_request.appendChild(doc_meta_param_list);
303	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
304	doc_meta_param_list.appendChild(doc_param);
305	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
306	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
307
308	// create the doc node list for the metadata request
309	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
310	doc_meta_request.appendChild(doc_list);
311
312	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313	// the node we want is the root document node
314	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
315	doc_list.appendChild(doc_node);
316	Element dm_response_message = (Element) this.mr.process(dm_message);
317	if (processErrorElements(dm_response_message, page_response)) {
318	return result;
319	}
320
321	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
322	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
323
324	// Merge the metadata with the structure information
325	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
326	for (int i = 0; i < doc_nodes.getLength(); i++) {
327	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
328	}
329	// get teh top level doc metadata out
330	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
331	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
332	if (doc_meta_list != null) {
333	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
334	}
335	// Build a request to obtain some document content
336	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
337	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
338	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
339	dc_message.appendChild(dc_request);
340
341
342	// Create a parameter list to specify the request parameters - empty for now
343	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
344	if (service_params != null) {
345	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
346	}
347
348	dc_request.appendChild(dc_param_list);
349
350	// get the content
351	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
352	if (expand_document) {
353	dc_request.appendChild(dm_doc_list);
354	} else {
355	dc_request.appendChild(basic_doc_list);
356	}
357	logger.debug("request = "+converter.getString(dc_message));
358	Element dc_response_message = (Element) this.mr.process(dc_message);
359	if (processErrorElements(dc_response_message, page_response)) {
360	return result;
361	}
362
363	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
364
365	if (expand_document) {
366	// Merge the content with the structure information
367	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
368	for (int i = 0; i < doc_nodes.getLength(); i++) {
369	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
370	if (content != null) {
371	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
372	}
373	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
374	}
375	} else {
376
377	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
378	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
379	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
380
381	if (dc_response_doc_content == null) {
382	// no content to add
383	return result;
384	}
385	if (highlight_query_terms) {
386	dc_response_doc.removeChild(dc_response_doc_content);
387
388	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
389	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
390	}
391
392
393	if (provide_annotations) {
394	String service_selected = (String)params.get(ENRICH_DOC_ARG);
395	if (service_selected != null && service_selected.equals("1")) {
396	// now we can modifiy the response doc if needed
397	String enrich_service = (String)params.get(GSParams.SERVICE);
398	// send a message to the service
399	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
400	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
401	enrich_message.appendChild(enrich_request);
402	// check for parameters
403	HashMap e_service_params = (HashMap)params.get("s1");
404	if (e_service_params != null) {
405	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
406	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
407	enrich_request.appendChild(enrich_pl);
408	}
409	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
410	enrich_request.appendChild(e_doc_list);
411	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
412
413	Element enrich_response = this.mr.process(enrich_message);
414
415	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
416	path = GSPath.createPath(links);
417	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
418
419	}
420	} // if provide_annotations
421
422
423	// use the returned id rather than the sent one cos there may have
424	// been modifiers such as .pr that are removed.
425	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
426	the_document.setAttribute("selectedNode", modified_doc_id);
427	if (has_dummy) {
428	// change the id if necessary and add the content
429	Element dummy_node = (Element)doc_nodes.item(0);
430
431	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
432	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
433	// hack for simple type
434	if (document_type.equals("simple")) {
435	// we dont want the internal docNode, just want the content and metadata in the document
436	// rethink this!!
437	the_document.removeChild(dummy_node);
438
439	NodeList dummy_children = dummy_node.getChildNodes();
440	//for (int i=0; i<dummy_children.getLength(); i++) {
441	for (int i=dummy_children.getLength()-1; i>=0; i--) {
442	the_document.appendChild(dummy_children.item(i));
443
444	}
445	}
446	} else {
447	// Merge the document content with the metadata and structure information
448	for (int i = 0; i < doc_nodes.getLength(); i++) {
449	Node dn = doc_nodes.item(i);
450	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
451	if (dn_id.equals(modified_doc_id)) {
452	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
453	break;
454	}
455	}
456	}
457	}
458	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
459	return result;
460	}
461
462	/** tell the param class what its arguments are
463	* if an action has its own arguments, this should add them to the params
464	* object - particularly important for args that should not be saved */
465	public boolean getActionParameters(GSParams params) {
466	params.addParameter(GOTO_PAGE_ARG, false);
467	params.addParameter(ENRICH_DOC_ARG, false);
468	return true;
469	}
470
471
472	/** this method gets the collection description, the format info, the
473	* list of enrich services, etc - stuff that is needed for the page,
474	* but is the same whatever the query is - should be cached */
475	protected boolean getBackgroundData(Element page_response,
476	String collection, String lang,
477	String uid) {
478
479	// create a message to process - contains requests for the collection
480	// description, the format element, the enrich services on offer
481	// these could all be cached
482	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
483	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
484	// the format request - ignore for now, where does this request go to??
485	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
486	info_message.appendChild(format_request);
487
488	// the enrich_services request - only do this if provide_annotations is true
489
490	if (provide_annotations) {
491	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
492	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
493	info_message.appendChild(enrich_services_request);
494	}
495
496	Element info_response = (Element)this.mr.process(info_message);
497
498	// the collection is the first response
499	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
500	Element format_resp = (Element) responses.item(0);
501
502	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
503	if (format_elem != null) {
504	logger.debug("doc action found a format statement");
505	// set teh format type
506	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
507	page_response.appendChild(this.doc.importNode(format_elem, true));
508	}
509
510	if (provide_annotations) {
511	Element services_resp = (Element)responses.item(1);
512
513	// a new message for the mr
514	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
515
516	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
517	boolean service_found = false;
518	for (int j=0; j<e_services.getLength(); j++) {
519	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
520	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
521	enrich_message.appendChild(s);
522	service_found = true;
523	}
524	}
525	if (service_found) {
526	Element enrich_response = this.mr.process(enrich_message);
527
528	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
529	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
530	for (int i=0; i<e_responses.getLength(); i++) {
531	Element e_resp = (Element)e_responses.item(i);
532	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
533	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
534	service_list.appendChild(e_service);
535	}
536	page_response.appendChild(service_list);
537	}
538	} // if provide_annotations
539	return true;
540
541	}
542
543	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
544	*/
545	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
546
547	// do the query again to get term info
548	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
549	HashMap params = GSXML.extractParams(cgi_param_list, false);
550
551	HashMap previous_params = (HashMap)params.get("p");
552	if (previous_params == null) {
553	return dc_response_doc_content;
554	}
555	String service_name = (String)previous_params.get(GSParams.SERVICE);
556	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
557	logger.error("invalid service, not doing highlighting");
558	return dc_response_doc_content;
559	}
560	String collection = (String)params.get(GSParams.COLLECTION);
561	String lang = request.getAttribute(GSXML.LANG_ATT);
562	String uid = request.getAttribute(GSXML.USER_ID_ATT);
563	String to = GSPath.appendLink(collection, service_name);
564
565	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
566	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
567	mr_query_message.appendChild(mr_query_request);
568
569	// paramList
570	HashMap service_params = (HashMap)params.get("s1");
571
572	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
573	GSXML.addParametersToList(this.doc, query_param_list, service_params);
574	mr_query_request.appendChild(query_param_list);
575
576	// do the query
577	Element mr_query_response = (Element)this.mr.process(mr_query_message);
578
579	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
580	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
581	if (query_term_list_element == null) {
582	// no term info
583	logger.error("No query term information.\n");
584	return dc_response_doc_content;
585	}
586
587	String content = GSXML.getNodeText(dc_response_doc_content);
588
589	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
590	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
591
592	HashSet query_term_variants = new HashSet();
593	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
594	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
595	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
596	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
597	for (int j = 0; j < equivalent_terms.length; j++) {
598	query_term_variants.add(equivalent_terms[j]);
599	}
600	}
601
602	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
603
604	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
605	String performed_query = GSXML.getNodeText(query_element) + " ";
606
607	ArrayList phrase_query_p_term_variants_list = new ArrayList();
608	int term_start = 0;
609	boolean in_term = false;
610	boolean in_phrase = false;
611	for (int i = 0; i < performed_query.length(); i++) {
612	char character = performed_query.charAt(i);
613	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
614
615	// Has a query term just started?
616	if (in_term == false && is_character_letter_or_digit == true) {
617	in_term = true;
618	term_start = i;
619	}
620
621	// Or has a term just finished?
622	else if (in_term == true && is_character_letter_or_digit == false) {
623	in_term = false;
624	String term = performed_query.substring(term_start, i);
625
626	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
627	if (term_element != null) {
628
629	HashSet phrase_query_p_term_x_variants = new HashSet();
630
631	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
632	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
633	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
634	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
635	for (int k = 0; k < term_equivalent_terms.length; k++) {
636	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
637	}
638	}
639	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
640
641	if (in_phrase == false) {
642	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
643	phrase_query_p_term_variants_list = new ArrayList();
644	}
645	}
646	}
647	// Watch for phrases (surrounded by quotes)
648	if (character == '\"') {
649	// Has a phrase just started?
650	if (in_phrase == false) {
651	in_phrase = true;
652	}
653	// Or has a phrase just finished?
654	else if (in_phrase == true) {
655	in_phrase = false;
656	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
657	}
658
659	phrase_query_p_term_variants_list = new ArrayList();
660	}
661	}
662
663	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
664	}
665
666
667	/**
668	* Highlights query terms in a piece of text.
669	*/
670	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
671	{
672	// Convert the content string to an array of characters for speed
673	char[] content_characters = new char[content.length()];
674	content.getChars(0, content.length(), content_characters, 0);
675
676	// Now skim through the content, identifying word matches
677	ArrayList word_matches = new ArrayList();
678	int word_start = 0;
679	boolean in_word = false;
680	boolean preceding_word_matched = false;
681	for (int i = 0; i < content_characters.length; i++) {
682	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
683
684	// Has a word just started?
685	if (in_word == false && is_character_letter_or_digit == true) {
686	in_word = true;
687	word_start = i;
688	}
689
690	// Or has a word just finished?
691	else if (in_word == true && is_character_letter_or_digit == false) {
692	in_word = false;
693
694	// Check if the word matches any of the query term equivalents
695	String word = new String(content_characters, word_start, (i - word_start));
696	if (query_term_variants.contains(word)) {
697	// We have found a matching word, so remember its location
698	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
699	preceding_word_matched = true;
700	}
701	else {
702	preceding_word_matched = false;
703	}
704	}
705	}
706
707	// Don't forget the last word...
708	if (in_word == true) {
709	// Check if the word matches any of the query term equivalents
710	String word = new String(content_characters, word_start, (content_characters.length - word_start));
711	if (query_term_variants.contains(word)) {
712	// We have found a matching word, so remember its location
713	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
714	}
715	}
716
717	ArrayList highlight_start_positions = new ArrayList();
718	ArrayList highlight_end_positions = new ArrayList();
719
720	// Deal with phrases now
721	ArrayList partial_phrase_matches = new ArrayList();
722	for (int i = 0; i < word_matches.size(); i++) {
723	WordMatch word_match = (WordMatch) word_matches.get(i);
724
725	// See if any partial phrase matches are extended by this word
726	if (word_match.preceding_word_matched) {
727	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
728	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
729	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
730	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
731	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
732	partial_phrase_match.num_words_matched++;
733
734	// Has a complete phrase match occurred?
735	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
736	// Check for overlaps by looking at the previous highlight range
737	if (!highlight_end_positions.isEmpty()) {
738	int last_highlight_index = highlight_end_positions.size() - 1;
739	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
740	if (last_highlight_end > partial_phrase_match.start_position) {
741	// There is an overlap, so remove the previous phrase match
742	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
743	highlight_end_positions.remove(last_highlight_index);
744	partial_phrase_match.start_position = last_highlight_start;
745	}
746	}
747
748	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
749	highlight_end_positions.add(new Integer(word_match.end_position));
750	}
751	// No, but add the partial match back into the list for next time
752	else {
753	partial_phrase_matches.add(partial_phrase_match);
754	}
755	}
756	}
757	}
758	else {
759	partial_phrase_matches.clear();
760	}
761
762	// See if this word is at the start of any of the phrases
763	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
764	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
765	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
766	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
767	// If this phrase is just one word long, we have a complete match
768	if (phrase_query_p_term_variants_list.size() == 1) {
769	highlight_start_positions.add(new Integer(word_match.start_position));
770	highlight_end_positions.add(new Integer(word_match.end_position));
771	}
772	// Otherwise we have the start of a potential phrase match
773	else {
774	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
775	}
776	}
777	}
778	}
779
780	// Now add the annotation tags into the document at the correct points
781	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
782
783	int last_wrote = 0;
784	for (int i = 0; i < highlight_start_positions.size(); i++) {
785	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
786	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
787
788	// Print anything before the highlight range
789	if (last_wrote < highlight_start) {
790	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
791	// System.err.print(preceding_text);
792	content_element.appendChild(this.doc.createTextNode(preceding_text));
793	}
794
795	// Print the highlight text, annotated
796	if (highlight_end > last_wrote) {
797	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
798	// System.err.print("\|" + highlight_text + "\|");
799	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
800	annotation_element.setAttribute("type", "query_term");
801	content_element.appendChild(annotation_element);
802	last_wrote = highlight_end;
803	}
804	}
805
806	// Finish off any unwritten text
807	if (last_wrote < content_characters.length) {
808	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
809	// System.err.print(remaining_text);
810	content_element.appendChild(this.doc.createTextNode(remaining_text));
811	}
812
813	return content_element;
814	}
815
816
817	static private class WordMatch
818	{
819	public String word;
820	public int start_position;
821	public int end_position;
822	public boolean preceding_word_matched;
823
824	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
825	{
826	this.word = word;
827	this.start_position = start_position;
828	this.end_position = end_position;
829	this.preceding_word_matched = preceding_word_matched;
830	}
831	}
832
833
834	static private class PartialPhraseMatch
835	{
836	public int start_position;
837	public int query_phrase_number;
838	public int num_words_matched;
839
840	public PartialPhraseMatch(int start_position, int query_phrase_number)
841	{
842	this.start_position = start_position;
843	this.query_phrase_number = query_phrase_number;
844	this.num_words_matched = 1;
845	}
846	}
847	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: