Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 9007

Last change on this file since 9007 was 9007, checked in by kjdon, 19 years ago
added a check for null term element in search term highlighting code. fields for mgpp were being parsed as a term, but don't match a term element
Property svn:keywords set to `Author Date Id Revision`
File size: 34.8 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38
39	/** Action class for retrieving Documents via the message router
40	*/
41	public class DocumentAction extends Action {
42
43	// this is used to specify that the sibling nodes of a selected one should be obtained
44	public static final String SIBLING_ARG = "sib";
45	public static final String GOTO_PAGE_ARG = "gp";
46	public static final String ENRICH_DOC_ARG = "end";
47
48	/** if this is set to true, when a document is displayed, any annotation
49	* type services (enrich) will be offered to the user as well */
50	protected boolean provide_annotations = false;
51
52	protected boolean highlight_query_terms = false;
53
54	public boolean configure() {
55	super.configure();
56	String highlight = (String)config_params.get("highlightQueryTerms");
57	if (highlight != null && highlight.equals("true")) {
58	highlight_query_terms = true;
59	}
60	String annotate = (String)config_params.get("displayAnnotationService");
61	if (annotate != null && annotate.equals("true")) {
62	provide_annotations = true;
63	}
64	return true;
65	}
66	public Element process (Element message)
67	{
68	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
69
70	// the response
71	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
72	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
73	result.appendChild(page_response);
74
75	// get the request - assume only one
76	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
77	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
78	HashMap params = GSXML.extractParams(cgi_paramList, false);
79
80	// just in case there are some that need to get passed to the services
81	HashMap service_params = (HashMap)params.get("s0");
82
83	String collection = (String) params.get(GSParams.COLLECTION);
84	String lang = request.getAttribute(GSXML.LANG_ATT);
85	String uid = request.getAttribute(GSXML.USER_ID_ATT);
86	String document_name = (String) params.get(GSParams.DOCUMENT);
87	if (document_name == null \|\| document_name.equals("")) {
88	System.err.println("DocumentAction Error: no document specified!");
89	return result;
90	}
91	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
92	if (document_type == null) {
93	document_type = "simple";
94	}
95	//whether to retrieve siblings or not
96	boolean get_siblings = false;
97	String sibs = (String) params.get(SIBLING_ARG);
98	if (sibs != null && sibs.equals("1")) {
99	get_siblings = true;
100	}
101
102	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
103	if (sibling_num != null && !sibling_num.equals("")) {
104	// we have to modify the doc name
105	document_name = document_name+"."+sibling_num+".ss";
106	}
107
108	boolean expand_document = false;
109	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
110	if (ed_arg != null && ed_arg.equals("1")) {
111	expand_document = true;
112	}
113
114
115	boolean expand_contents = false;
116	if (expand_document) { // we always expand the contents with the text
117	expand_contents = true;
118	} else {
119	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
120	if (ec_arg != null && ec_arg.equals("1")) {
121	expand_contents = true;
122	}
123	}
124	// get the additional data needed for the page
125	getBackgroundData(page_response, collection, lang, uid);
126	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
127
128	// the_document is where all the doc info - structure and metadata etc
129	// is added into, to be returned in the page
130	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
131	page_response.appendChild(the_document);
132
133	// set the doctype from the cgi arg as an attribute
134	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
135
136	// create a basic doc list containing the current node
137	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
138	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
139	basic_doc_list.appendChild(current_doc);
140	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
141
142	// Create a parameter list to specify the required structure information
143	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
144
145	if (service_params != null) {
146	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
147	}
148
149	Element ds_param = null;
150	boolean get_structure = false;
151	boolean get_structure_info = false;
152	if (document_type.equals("paged")) {
153	get_structure_info = true;
154	// get teh info needed for paged naviagtion
155	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
156	ds_param_list.appendChild(ds_param);
157	ds_param.setAttribute(GSXML.NAME_ATT, "info");
158	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
159	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
160	ds_param_list.appendChild(ds_param);
161	ds_param.setAttribute(GSXML.NAME_ATT, "info");
162	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
163	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
164	ds_param_list.appendChild(ds_param);
165	ds_param.setAttribute(GSXML.NAME_ATT, "info");
166	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
167
168	} else if (document_type.equals("hierarchy")){
169	get_structure = true;
170	if (expand_contents) {
171	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172	ds_param_list.appendChild(ds_param);
173	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
174	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
175	} else {
176	// get the info needed for table of contents
177	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178	ds_param_list.appendChild(ds_param);
179	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
180	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
181	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
182	ds_param_list.appendChild(ds_param);
183	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
184	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
185	if (get_siblings) {
186	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
187	ds_param_list.appendChild(ds_param);
188	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
189	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
190	}
191	}
192	} else {
193	// we dont need any structure
194	}
195
196	boolean has_dummy = false;
197	if (get_structure \|\| get_structure_info) {
198
199	// Build a request to obtain the document structure
200	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
201	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
202	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
203	ds_message.appendChild(ds_request);
204	ds_request.appendChild(ds_param_list);
205
206	// create a doc_node_list and put in the doc_node that we are interested in
207	ds_request.appendChild(basic_doc_list);
208
209	// Process the document structure retrieve message
210	Element ds_response_message = (Element) this.mr.process(ds_message);
211
212	// get the info and print out
213	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
214	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
215	path = GSPath.appendLink(path, "nodeStructureInfo");
216	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
217	// get the doc_node bit
218	if (ds_response_struct_info != null) {
219	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
220	}
221	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
222	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
223	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
224	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
225
226	if (ds_response_structure != null) {
227	// add the contents of the structure bit into the_document
228	NodeList structs = ds_response_structure.getChildNodes();
229	for (int i=0; i<structs.getLength();i++) {
230	the_document.appendChild(this.doc.importNode(structs.item(i), true));
231	}
232	} else {
233	// no structure nodes, so put in a dummy doc node
234	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
235	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
236	the_document.appendChild(doc_node);
237	has_dummy = true;
238	}
239	} else { // a simple type - we dont have a dummy node for simple
240	// should think about this more
241	// no structure request, so just put in a dummy doc node
242	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
243	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
244	the_document.appendChild(doc_node);
245	has_dummy = true;
246	}
247
248	// Build a request to obtain some document metadata
249	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
250	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
251	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
252	dm_message.appendChild(dm_request);
253	// Create a parameter list to specify the required metadata information
254
255	HashSet meta_names = new HashSet();
256	meta_names.add("Title"); // the default
257	if (format_elem != null) {
258	extractMetadataNames(format_elem, meta_names);
259	}
260
261	Element dm_param_list = createMetadataParamList(meta_names);
262	if (service_params != null) {
263	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
264	}
265
266	dm_request.appendChild(dm_param_list);
267
268
269	// create the doc node list for the metadata request
270	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
271	dm_request.appendChild(dm_doc_list);
272
273	// Add each node from the structure response into the metadata request
274	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
275	for (int i = 0; i < doc_nodes.getLength(); i++) {
276	Element doc_node = (Element) doc_nodes.item(i);
277	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
278
279	// Add the documentNode to the list
280	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
281	dm_doc_list.appendChild(dm_doc_node);
282	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
283	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
284	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
285	}
286
287	// we also want a metadata request to the top level document to get
288	// assocfilepath - this could be cached too
289	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
290	dm_message.appendChild(doc_meta_request);
291	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
292	if (service_params != null) {
293	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
294	}
295
296	doc_meta_request.appendChild(doc_meta_param_list);
297	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
298	doc_meta_param_list.appendChild(doc_param);
299	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
300	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
301
302	// create the doc node list for the metadata request
303	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
304	doc_meta_request.appendChild(doc_list);
305
306	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
307	// teh node we want is the root document node
308	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
309	doc_list.appendChild(doc_node);
310	Element dm_response_message = (Element) this.mr.process(dm_message);
311
312	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
313	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
314
315	// Merge the metadata with the structure information
316	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
317	for (int i = 0; i < doc_nodes.getLength(); i++) {
318	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
319	}
320	// get teh top level doc metadata out
321	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
322	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
323	if (doc_meta_list != null) {
324	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
325	}
326	// Build a request to obtain some document content
327	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
328	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
329	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
330	dc_message.appendChild(dc_request);
331
332
333	// Create a parameter list to specify the request parameters - empty for now
334	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
335	if (service_params != null) {
336	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
337	}
338
339	dc_request.appendChild(dc_param_list);
340
341	// get the content
342	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
343	if (expand_document) {
344	dc_request.appendChild(dm_doc_list);
345	} else {
346	dc_request.appendChild(basic_doc_list);
347	}
348	System.err.println("request = "+converter.getString(dc_message));
349	Element dc_response_message = (Element) this.mr.process(dc_message);
350	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
351
352	if (expand_document) {
353	// Merge the content with the structure information
354	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
355	for (int i = 0; i < doc_nodes.getLength(); i++) {
356	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
357	if (content != null) {
358	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
359	}
360	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
361	}
362	} else {
363
364	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
365	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
366	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
367
368	if (dc_response_doc_content == null) {
369	// no content to add
370	return result;
371	}
372	if (highlight_query_terms) {
373	dc_response_doc.removeChild(dc_response_doc_content);
374
375	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
376	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
377	}
378
379
380	if (provide_annotations) {
381	String service_selected = (String)params.get(ENRICH_DOC_ARG);
382	if (service_selected != null && service_selected.equals("1")) {
383	// now we can modifiy the response doc if needed
384	String enrich_service = (String)params.get(GSParams.SERVICE);
385	// send a message to the service
386	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
387	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
388	enrich_message.appendChild(enrich_request);
389	// check for parameters
390	HashMap e_service_params = (HashMap)params.get("s1");
391	if (e_service_params != null) {
392	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
393	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
394	enrich_request.appendChild(enrich_pl);
395	}
396	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
397	enrich_request.appendChild(e_doc_list);
398	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
399
400	Element enrich_response = this.mr.process(enrich_message);
401
402	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
403	path = GSPath.createPath(links);
404	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
405
406	}
407	} // if provide_annotations
408
409
410	// use the returned id rather than the sent one cos there may have
411	// been modifiers such as .pr that are removed.
412	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
413	the_document.setAttribute("selectedNode", modified_doc_id);
414	if (has_dummy) {
415	// change the id if necessary and add the content
416	Element dummy_node = (Element)doc_nodes.item(0);
417
418	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
419	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
420	// hack for simple type
421	if (document_type.equals("simple")) {
422	// we dont want the internal docNode, just want the content and metadata in the document
423	// rethink this!!
424	the_document.removeChild(dummy_node);
425
426	NodeList dummy_children = dummy_node.getChildNodes();
427	//for (int i=0; i<dummy_children.getLength(); i++) {
428	for (int i=dummy_children.getLength()-1; i>=0; i--) {
429	the_document.appendChild(dummy_children.item(i));
430
431	}
432	}
433	} else {
434	// Merge the document content with the metadata and structure information
435	for (int i = 0; i < doc_nodes.getLength(); i++) {
436	Node dn = doc_nodes.item(i);
437	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
438	if (dn_id.equals(modified_doc_id)) {
439	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
440	break;
441	}
442	}
443	}
444	}
445	///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
446	return result;
447	}
448
449	/** tell the param class what its arguments are
450	* if an action has its own arguments, this should add them to the params
451	* object - particularly important for args that should not be saved */
452	public boolean getActionParameters(GSParams params) {
453	params.addParameter(GOTO_PAGE_ARG, false);
454	params.addParameter(ENRICH_DOC_ARG, false);
455	return true;
456	}
457
458
459	/** this method gets the collection description, the format info, the
460	* list of enrich services, etc - stuff that is needed for the page,
461	* but is the same whatever the query is - should be cached */
462	protected boolean getBackgroundData(Element page_response,
463	String collection, String lang,
464	String uid) {
465
466	// create a message to process - contains requests for the collection
467	// description, the format element, the enrich services on offer
468	// these could all be cached
469	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
470	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
471	// the format request - ignore for now, where does this request go to??
472	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
473	info_message.appendChild(format_request);
474
475	// the enrich_services request - only do this if provide_annotations is true
476
477	if (provide_annotations) {
478	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
479	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
480	info_message.appendChild(enrich_services_request);
481	}
482
483	Element info_response = (Element)this.mr.process(info_message);
484
485	// the collection is the first response
486	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
487	Element format_resp = (Element) responses.item(0);
488
489	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
490	if (format_elem != null) {
491	///ystem.out.println("doc action found a format statement");
492	// set teh format type
493	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
494	page_response.appendChild(this.doc.importNode(format_elem, true));
495	}
496
497	if (provide_annotations) {
498	Element services_resp = (Element)responses.item(1);
499
500	// a new message for the mr
501	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
502
503	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
504	boolean service_found = false;
505	for (int j=0; j<e_services.getLength(); j++) {
506	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
507	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
508	enrich_message.appendChild(s);
509	service_found = true;
510	}
511	}
512	if (service_found) {
513	Element enrich_response = this.mr.process(enrich_message);
514
515	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
516	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
517	for (int i=0; i<e_responses.getLength(); i++) {
518	Element e_resp = (Element)e_responses.item(i);
519	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
520	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
521	service_list.appendChild(e_service);
522	}
523	page_response.appendChild(service_list);
524	}
525	} // if provide_annotations
526	return true;
527
528	}
529
530	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
531	*/
532	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
533
534	// do the query again to get term info
535	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
536	HashMap params = GSXML.extractParams(cgi_param_list, false);
537
538
539	String service_name = (String)((HashMap)params.get("p")).get(GSParams.SERVICE);
540	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
541	System.err.println("DocumentAction: invalid service, not doing highlighting");
542	return dc_response_doc_content;
543	}
544	String collection = (String)params.get(GSParams.COLLECTION);
545	String lang = request.getAttribute(GSXML.LANG_ATT);
546	String uid = request.getAttribute(GSXML.USER_ID_ATT);
547	String to = GSPath.appendLink(collection, service_name);
548
549	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
550	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
551	mr_query_message.appendChild(mr_query_request);
552
553	// paramList
554	HashMap service_params = (HashMap)params.get("s1");
555
556	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
557	GSXML.addParametersToList(this.doc, query_param_list, service_params);
558	mr_query_request.appendChild(query_param_list);
559
560	// do the query
561	Element mr_query_response = (Element)this.mr.process(mr_query_message);
562
563	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
564	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
565	if (query_term_list_element == null) {
566	// no term info
567	System.err.println("DocumentAction: Warning: No query term information.\n");
568	return dc_response_doc_content;
569	}
570
571	String content = GSXML.getNodeText(dc_response_doc_content);
572
573	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
574	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
575
576	HashSet query_term_variants = new HashSet();
577	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
578	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
579	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
580	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
581	for (int j = 0; j < equivalent_terms.length; j++) {
582	System.err.println("Adding query term variant: " + equivalent_terms[j]);
583	query_term_variants.add(equivalent_terms[j]);
584	}
585	}
586
587	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
588
589	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
590	String performed_query = GSXML.getNodeText(query_element) + " ";
591
592	ArrayList phrase_query_p_term_variants_list = new ArrayList();
593	int term_start = 0;
594	boolean in_term = false;
595	boolean in_phrase = false;
596	for (int i = 0; i < performed_query.length(); i++) {
597	char character = performed_query.charAt(i);
598	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
599
600	// Has a query term just started?
601	if (in_term == false && is_character_letter_or_digit == true) {
602	in_term = true;
603	term_start = i;
604	}
605
606	// Or has a term just finished?
607	else if (in_term == true && is_character_letter_or_digit == false) {
608	in_term = false;
609	String term = performed_query.substring(term_start, i);
610	System.err.println("Term: " + term);
611
612	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
613	if (term_element != null) {
614
615	HashSet phrase_query_p_term_x_variants = new HashSet();
616
617	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
618	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
619	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
620	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
621	for (int k = 0; k < term_equivalent_terms.length; k++) {
622	System.err.println("Adding query term variant: " + term_equivalent_terms[k]);
623	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
624	}
625	}
626	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
627
628	if (in_phrase == false) {
629	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
630	phrase_query_p_term_variants_list = new ArrayList();
631	}
632	}
633	}
634	// Watch for phrases (surrounded by quotes)
635	if (character == '\"') {
636	// Has a phrase just started?
637	if (in_phrase == false) {
638	in_phrase = true;
639	}
640	// Or has a phrase just finished?
641	else if (in_phrase == true) {
642	in_phrase = false;
643	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
644	}
645
646	phrase_query_p_term_variants_list = new ArrayList();
647	}
648	}
649
650	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
651	}
652
653
654	/**
655	* Highlights query terms in a piece of text.
656	*/
657	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
658	{
659	// Convert the content string to an array of characters for speed
660	char[] content_characters = new char[content.length()];
661	content.getChars(0, content.length(), content_characters, 0);
662
663	// Now skim through the content, identifying word matches
664	ArrayList word_matches = new ArrayList();
665	int word_start = 0;
666	boolean in_word = false;
667	boolean preceding_word_matched = false;
668	for (int i = 0; i < content_characters.length; i++) {
669	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
670
671	// Has a word just started?
672	if (in_word == false && is_character_letter_or_digit == true) {
673	in_word = true;
674	word_start = i;
675	}
676
677	// Or has a word just finished?
678	else if (in_word == true && is_character_letter_or_digit == false) {
679	in_word = false;
680
681	// Check if the word matches any of the query term equivalents
682	String word = new String(content_characters, word_start, (i - word_start));
683	if (query_term_variants.contains(word)) {
684	// We have found a matching word, so remember its location
685	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
686	preceding_word_matched = true;
687	}
688	else {
689	preceding_word_matched = false;
690	}
691	}
692	}
693
694	// Don't forget the last word...
695	if (in_word == true) {
696	// Check if the word matches any of the query term equivalents
697	String word = new String(content_characters, word_start, (content_characters.length - word_start));
698	if (query_term_variants.contains(word)) {
699	// We have found a matching word, so remember its location
700	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
701	}
702	}
703
704	ArrayList highlight_start_positions = new ArrayList();
705	ArrayList highlight_end_positions = new ArrayList();
706
707	// Deal with phrases now
708	ArrayList partial_phrase_matches = new ArrayList();
709	for (int i = 0; i < word_matches.size(); i++) {
710	WordMatch word_match = (WordMatch) word_matches.get(i);
711
712	// See if any partial phrase matches are extended by this word
713	if (word_match.preceding_word_matched) {
714	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
715	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
716	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
717	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
718	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
719	partial_phrase_match.num_words_matched++;
720
721	// Has a complete phrase match occurred?
722	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
723	// Check for overlaps by looking at the previous highlight range
724	if (!highlight_end_positions.isEmpty()) {
725	int last_highlight_index = highlight_end_positions.size() - 1;
726	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
727	if (last_highlight_end > partial_phrase_match.start_position) {
728	// There is an overlap, so remove the previous phrase match
729	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
730	highlight_end_positions.remove(last_highlight_index);
731	partial_phrase_match.start_position = last_highlight_start;
732	}
733	}
734
735	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
736	highlight_end_positions.add(new Integer(word_match.end_position));
737	}
738	// No, but add the partial match back into the list for next time
739	else {
740	partial_phrase_matches.add(partial_phrase_match);
741	}
742	}
743	}
744	}
745	else {
746	partial_phrase_matches.clear();
747	}
748
749	// See if this word is at the start of any of the phrases
750	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
751	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
752	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
753	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
754	// If this phrase is just one word long, we have a complete match
755	if (phrase_query_p_term_variants_list.size() == 1) {
756	highlight_start_positions.add(new Integer(word_match.start_position));
757	highlight_end_positions.add(new Integer(word_match.end_position));
758	}
759	// Otherwise we have the start of a potential phrase match
760	else {
761	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
762	}
763	}
764	}
765	}
766
767	// Now add the annotation tags into the document at the correct points
768	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
769
770	int last_wrote = 0;
771	for (int i = 0; i < highlight_start_positions.size(); i++) {
772	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
773	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
774
775	// Print anything before the highlight range
776	if (last_wrote < highlight_start) {
777	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
778	// System.err.print(preceding_text);
779	content_element.appendChild(this.doc.createTextNode(preceding_text));
780	}
781
782	// Print the highlight text, annotated
783	if (highlight_end > last_wrote) {
784	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
785	// System.err.print("\|" + highlight_text + "\|");
786	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
787	annotation_element.setAttribute("type", "query_term");
788	content_element.appendChild(annotation_element);
789	last_wrote = highlight_end;
790	}
791	}
792
793	// Finish off any unwritten text
794	if (last_wrote < content_characters.length) {
795	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
796	// System.err.print(remaining_text);
797	content_element.appendChild(this.doc.createTextNode(remaining_text));
798	}
799
800	return content_element;
801	}
802
803
804	static private class WordMatch
805	{
806	public String word;
807	public int start_position;
808	public int end_position;
809	public boolean preceding_word_matched;
810
811	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
812	{
813	this.word = word;
814	this.start_position = start_position;
815	this.end_position = end_position;
816	this.preceding_word_matched = preceding_word_matched;
817	}
818	}
819
820
821	static private class PartialPhraseMatch
822	{
823	public int start_position;
824	public int query_phrase_number;
825	public int num_words_matched;
826
827	public PartialPhraseMatch(int start_position, int query_phrase_number)
828	{
829	this.start_position = start_position;
830	this.query_phrase_number = query_phrase_number;
831	this.num_words_matched = 1;
832	}
833	}
834	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: