Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 8731

Last change on this file since 8731 was 8731, checked in by mdewsnip, 19 years ago
Much more advanced query term highlighting. Supports query term highlighting and query phrase highlighting, with all permutations of case folding and stemming.
Property svn:keywords set to `Author Date Id Revision`
File size: 34.1 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38
39	/** Action class for retrieving Documents via the message router
40	*/
41	public class DocumentAction extends Action {
42
43	// this is used to specify that the sibling nodes of a selected one should be obtained
44	public static final String SIBLING_ARG = "sib";
45	public static final String GOTO_PAGE_ARG = "gp";
46	public static final String ENRICH_DOC_ARG = "end";
47
48	/** if this is set to true, when a document is displayed, any annotation
49	* type services (enrich) will be offered to the user as well */
50	protected static final boolean provide_annotations = false; //true;
51
52
53	public Element process (Element message)
54	{
55	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
56
57	// the response
58	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
59	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
60	result.appendChild(page_response);
61
62	// get the request - assume only one
63	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
64	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
65	HashMap params = GSXML.extractParams(cgi_paramList, false);
66
67	// just in case there are some that need to get passed to the services
68	HashMap service_params = (HashMap)params.get("s0");
69
70	String collection = (String) params.get(GSParams.COLLECTION);
71	String lang = request.getAttribute(GSXML.LANG_ATT);
72	String uid = request.getAttribute(GSXML.USER_ID_ATT);
73	String document_name = (String) params.get(GSParams.DOCUMENT);
74	if (document_name == null \|\| document_name.equals("")) {
75	System.err.println("DocumentAction Error: no document specified!");
76	return result;
77	}
78	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
79	if (document_type == null) {
80	document_type = "simple";
81	}
82	//whether to retrieve siblings or not
83	boolean get_siblings = false;
84	String sibs = (String) params.get(SIBLING_ARG);
85	if (sibs != null && sibs.equals("1")) {
86	get_siblings = true;
87	}
88
89	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
90	if (sibling_num != null && !sibling_num.equals("")) {
91	// we have to modify the doc name
92	document_name = document_name+"."+sibling_num+".ss";
93	}
94
95	boolean expand_document = false;
96	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
97	if (ed_arg != null && ed_arg.equals("1")) {
98	expand_document = true;
99	}
100
101
102	boolean expand_contents = false;
103	if (expand_document) { // we always expand the contents with the text
104	expand_contents = true;
105	} else {
106	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
107	if (ec_arg != null && ec_arg.equals("1")) {
108	expand_contents = true;
109	}
110	}
111	// get the additional data needed for the page
112	getBackgroundData(page_response, collection, lang, uid);
113	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
114
115	// the_document is where all the doc info - structure and metadata etc
116	// is added into, to be returned in the page
117	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
118	page_response.appendChild(the_document);
119
120	// set the doctype from the cgi arg as an attribute
121	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
122
123	// create a basic doc list containing the current node
124	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
125	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
126	basic_doc_list.appendChild(current_doc);
127	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
128
129	// Create a parameter list to specify the required structure information
130	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
131
132	if (service_params != null) {
133	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
134	}
135
136	Element ds_param = null;
137	boolean get_structure = false;
138	boolean get_structure_info = false;
139	if (document_type.equals("paged")) {
140	get_structure_info = true;
141	// get teh info needed for paged naviagtion
142	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
143	ds_param_list.appendChild(ds_param);
144	ds_param.setAttribute(GSXML.NAME_ATT, "info");
145	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
146	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
147	ds_param_list.appendChild(ds_param);
148	ds_param.setAttribute(GSXML.NAME_ATT, "info");
149	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
150	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
151	ds_param_list.appendChild(ds_param);
152	ds_param.setAttribute(GSXML.NAME_ATT, "info");
153	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
154
155	} else if (document_type.equals("hierarchy")){
156	get_structure = true;
157	if (expand_contents) {
158	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
159	ds_param_list.appendChild(ds_param);
160	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
161	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
162	} else {
163	// get the info needed for table of contents
164	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
165	ds_param_list.appendChild(ds_param);
166	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
167	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
168	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
169	ds_param_list.appendChild(ds_param);
170	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
171	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
172	if (get_siblings) {
173	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
174	ds_param_list.appendChild(ds_param);
175	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
176	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
177	}
178	}
179	} else {
180	// we dont need any structure
181	}
182
183	boolean has_dummy = false;
184	if (get_structure \|\| get_structure_info) {
185
186	// Build a request to obtain the document structure
187	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
188	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
189	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
190	ds_message.appendChild(ds_request);
191	ds_request.appendChild(ds_param_list);
192
193	// create a doc_node_list and put in the doc_node that we are interested in
194	ds_request.appendChild(basic_doc_list);
195
196	// Process the document structure retrieve message
197	Element ds_response_message = (Element) this.mr.process(ds_message);
198
199	// get the info and print out
200	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
201	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
202	path = GSPath.appendLink(path, "nodeStructureInfo");
203	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
204	// get the doc_node bit
205	if (ds_response_struct_info != null) {
206	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
207	}
208	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
209	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
210	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
211	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
212
213	if (ds_response_structure != null) {
214	// add the contents of the structure bit into the_document
215	NodeList structs = ds_response_structure.getChildNodes();
216	for (int i=0; i<structs.getLength();i++) {
217	the_document.appendChild(this.doc.importNode(structs.item(i), true));
218	}
219	} else {
220	// no structure nodes, so put in a dummy doc node
221	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
222	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
223	the_document.appendChild(doc_node);
224	has_dummy = true;
225	}
226	} else { // a simple type - we dont have a dummy node for simple
227	// should think about this more
228	// no structure request, so just put in a dummy doc node
229	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
230	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
231	the_document.appendChild(doc_node);
232	has_dummy = true;
233	}
234
235	// Build a request to obtain some document metadata
236	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
237	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
238	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
239	dm_message.appendChild(dm_request);
240	// Create a parameter list to specify the required metadata information
241
242	HashSet meta_names = new HashSet();
243	meta_names.add("Title"); // the default
244	if (format_elem != null) {
245	extractMetadataNames(format_elem, meta_names);
246	}
247
248	Element dm_param_list = createMetadataParamList(meta_names);
249	if (service_params != null) {
250	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
251	}
252
253	dm_request.appendChild(dm_param_list);
254
255
256	// create the doc node list for the metadata request
257	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
258	dm_request.appendChild(dm_doc_list);
259
260	// Add each node from the structure response into the metadata request
261	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
262	for (int i = 0; i < doc_nodes.getLength(); i++) {
263	Element doc_node = (Element) doc_nodes.item(i);
264	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
265
266	// Add the documentNode to the list
267	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
268	dm_doc_list.appendChild(dm_doc_node);
269	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
270	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
271	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
272	}
273
274	// we also want a metadata request to the top level document to get
275	// assocfilepath - this could be cached too
276	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
277	dm_message.appendChild(doc_meta_request);
278	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
279	if (service_params != null) {
280	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
281	}
282
283	doc_meta_request.appendChild(doc_meta_param_list);
284	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
285	doc_meta_param_list.appendChild(doc_param);
286	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
287	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
288
289	// create the doc node list for the metadata request
290	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
291	doc_meta_request.appendChild(doc_list);
292
293	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
294	// teh node we want is the root document node
295	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
296	doc_list.appendChild(doc_node);
297	Element dm_response_message = (Element) this.mr.process(dm_message);
298
299	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
300	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
301
302	// Merge the metadata with the structure information
303	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
304	for (int i = 0; i < doc_nodes.getLength(); i++) {
305	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
306	}
307	// get teh top level doc metadata out
308	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
309	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
310	if (doc_meta_list != null) {
311	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
312	}
313	// Build a request to obtain some document content
314	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
315	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
316	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
317	dc_message.appendChild(dc_request);
318
319
320	// Create a parameter list to specify the request parameters - empty for now
321	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
322	if (service_params != null) {
323	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
324	}
325
326	dc_request.appendChild(dc_param_list);
327
328	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
329	if (expand_document) {
330	dc_request.appendChild(dm_doc_list);
331	} else {
332	dc_request.appendChild(basic_doc_list);
333	}
334	Element dc_response_message = (Element) this.mr.process(dc_message);
335	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
336
337	if (expand_document) {
338	// Merge the content with the structure information
339	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
340	for (int i = 0; i < doc_nodes.getLength(); i++) {
341	doc_nodes.item(i).appendChild(this.doc.importNode(GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent"), true));
342	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
343	}
344	} else {
345
346	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
347	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
348	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
349
350
351	boolean highlight_query_terms = true;
352	if (highlight_query_terms) {
353	dc_response_doc.removeChild(dc_response_doc_content);
354
355	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
356	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
357	}
358
359
360	if (provide_annotations) {
361	String service_selected = (String)params.get(ENRICH_DOC_ARG);
362	if (service_selected != null && service_selected.equals("1")) {
363	// now we can modifiy the response doc if needed
364	String enrich_service = (String)params.get(GSParams.SERVICE);
365	// send a message to the service
366	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
367	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
368	enrich_message.appendChild(enrich_request);
369	// check for parameters
370	HashMap e_service_params = (HashMap)params.get("s1");
371	if (e_service_params != null) {
372	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
373	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
374	enrich_request.appendChild(enrich_pl);
375	}
376	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
377	enrich_request.appendChild(e_doc_list);
378	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
379
380	Element enrich_response = this.mr.process(enrich_message);
381
382	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
383	path = GSPath.createPath(links);
384	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
385
386	}
387	}
388
389
390	// use the returned id rather than the sent one cos there may have
391	// been modifiers such as .pr that are removed.
392	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
393	the_document.setAttribute("selectedNode", modified_doc_id);
394	if (has_dummy) {
395	// change the id if necessary and add the content
396	Element dummy_node = (Element)doc_nodes.item(0);
397
398	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
399	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
400	// hack for simple type
401	if (document_type.equals("simple")) {
402	// we dont want the internal docNode, just want the content and metadata in the document
403	// rethink this!!
404	the_document.removeChild(dummy_node);
405
406	NodeList dummy_children = dummy_node.getChildNodes();
407	//for (int i=0; i<dummy_children.getLength(); i++) {
408	for (int i=dummy_children.getLength()-1; i>=0; i--) {
409	the_document.appendChild(dummy_children.item(i));
410
411	}
412	}
413	} else {
414	// Merge the document content with the metadata and structure information
415	for (int i = 0; i < doc_nodes.getLength(); i++) {
416	Node dn = doc_nodes.item(i);
417	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
418	if (dn_id.equals(modified_doc_id)) {
419	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
420	break;
421	}
422	}
423	}
424	}
425	///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
426	return result;
427	}
428
429	/** tell the param class what its arguments are
430	* if an action has its own arguments, this should add them to the params
431	* object - particularly important for args that should not be saved */
432	public boolean getActionParameters(GSParams params) {
433	params.addParameter(GOTO_PAGE_ARG, false);
434	params.addParameter(ENRICH_DOC_ARG, false);
435	return true;
436	}
437
438
439	/** this method gets the collection description, the format info, the
440	* list of enrich services, etc - stuff that is needed for the page,
441	* but is the same whatever the query is - should be cached */
442	protected boolean getBackgroundData(Element page_response,
443	String collection, String lang,
444	String uid) {
445
446	// create a message to process - contains requests for the collection
447	// description, the format element, the enrich services on offer
448	// these could all be cached
449	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
450	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
451	// the format request - ignore for now, where does this request go to??
452	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
453	info_message.appendChild(format_request);
454
455	// the enrich_services request - only do this if provide_annotations is true
456
457	if (provide_annotations) {
458	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
459	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
460	info_message.appendChild(enrich_services_request);
461	}
462
463	Element info_response = (Element)this.mr.process(info_message);
464
465	// the collection is the first response
466	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
467	Element format_resp = (Element) responses.item(0);
468
469	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
470	if (format_elem != null) {
471	///ystem.out.println("doc action found a format statement");
472	// set teh format type
473	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
474	page_response.appendChild(this.doc.importNode(format_elem, true));
475	}
476
477	if (provide_annotations) {
478	Element services_resp = (Element)responses.item(1);
479
480	// a new message for the mr
481	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
482
483	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
484	boolean service_found = false;
485	for (int j=0; j<e_services.getLength(); j++) {
486	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
487	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
488	enrich_message.appendChild(s);
489	service_found = true;
490	}
491	}
492	if (service_found) {
493	Element enrich_response = this.mr.process(enrich_message);
494
495	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
496	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
497	for (int i=0; i<e_responses.getLength(); i++) {
498	Element e_resp = (Element)e_responses.item(i);
499	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
500	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
501	service_list.appendChild(e_service);
502	}
503	page_response.appendChild(service_list);
504	}
505	} // if provide_annotations
506	return true;
507
508	}
509
510	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
511	*/
512	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
513
514	// do the query again to get term info
515	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
516	HashMap params = GSXML.extractParams(cgi_param_list, false);
517
518
519	String service_name = (String)((HashMap)params.get("p")).get(GSParams.SERVICE);
520	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
521	System.err.println("DocumentAction: invalid service, not doing highlighting");
522	return dc_response_doc_content;
523	}
524	String collection = (String)params.get(GSParams.COLLECTION);
525	String lang = request.getAttribute(GSXML.LANG_ATT);
526	String uid = request.getAttribute(GSXML.USER_ID_ATT);
527	String to = GSPath.appendLink(collection, service_name);
528
529	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
530	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
531	mr_query_message.appendChild(mr_query_request);
532
533	// paramList
534	HashMap service_params = (HashMap)params.get("s1");
535
536	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
537	GSXML.addParametersToList(this.doc, query_param_list, service_params);
538	mr_query_request.appendChild(query_param_list);
539
540	// do the query
541	Element mr_query_response = (Element)this.mr.process(mr_query_message);
542
543	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
544	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
545	if (query_term_list_element == null) {
546	// no term info
547	System.err.println("DocumentAction: Warning: No query term information.\n");
548	return dc_response_doc_content;
549	}
550
551	String content = GSXML.getNodeText(dc_response_doc_content);
552
553	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
554	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
555
556	HashSet query_term_variants = new HashSet();
557	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
558	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
559	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
560	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
561	for (int j = 0; j < equivalent_terms.length; j++) {
562	System.err.println("Adding query term variant: " + equivalent_terms[j]);
563	query_term_variants.add(equivalent_terms[j]);
564	}
565	}
566
567	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
568
569	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
570	String performed_query = GSXML.getNodeText(query_element) + " ";
571
572	ArrayList phrase_query_p_term_variants_list = new ArrayList();
573	int term_start = 0;
574	boolean in_term = false;
575	boolean in_phrase = false;
576	for (int i = 0; i < performed_query.length(); i++) {
577	char character = performed_query.charAt(i);
578	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
579
580	// Has a query term just started?
581	if (in_term == false && is_character_letter_or_digit == true) {
582	in_term = true;
583	term_start = i;
584	}
585
586	// Or has a term just finished?
587	else if (in_term == true && is_character_letter_or_digit == false) {
588	in_term = false;
589	String term = performed_query.substring(term_start, i);
590	System.err.println("Term: " + term);
591
592	HashSet phrase_query_p_term_x_variants = new HashSet();
593	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
594	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
595	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
596	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
597	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
598	for (int k = 0; k < term_equivalent_terms.length; k++) {
599	System.err.println("Adding query term variant: " + term_equivalent_terms[k]);
600	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
601	}
602	}
603	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
604
605	if (in_phrase == false) {
606	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
607	phrase_query_p_term_variants_list = new ArrayList();
608	}
609	}
610
611	// Watch for phrases (surrounded by quotes)
612	if (character == '\"') {
613	// Has a phrase just started?
614	if (in_phrase == false) {
615	in_phrase = true;
616	}
617	// Or has a phrase just finished?
618	else if (in_phrase == true) {
619	in_phrase = false;
620	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
621	}
622
623	phrase_query_p_term_variants_list = new ArrayList();
624	}
625	}
626
627	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
628	}
629
630
631	/**
632	* Highlights query terms in a piece of text.
633	*/
634	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
635	{
636	// Convert the content string to an array of characters for speed
637	char[] content_characters = new char[content.length()];
638	content.getChars(0, content.length(), content_characters, 0);
639
640	// Now skim through the content, identifying word matches
641	ArrayList word_matches = new ArrayList();
642	int word_start = 0;
643	boolean in_word = false;
644	boolean preceding_word_matched = false;
645	for (int i = 0; i < content_characters.length; i++) {
646	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
647
648	// Has a word just started?
649	if (in_word == false && is_character_letter_or_digit == true) {
650	in_word = true;
651	word_start = i;
652	}
653
654	// Or has a word just finished?
655	else if (in_word == true && is_character_letter_or_digit == false) {
656	in_word = false;
657
658	// Check if the word matches any of the query term equivalents
659	String word = new String(content_characters, word_start, (i - word_start));
660	if (query_term_variants.contains(word)) {
661	// We have found a matching word, so remember its location
662	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
663	preceding_word_matched = true;
664	}
665	else {
666	preceding_word_matched = false;
667	}
668	}
669	}
670
671	// Don't forget the last word...
672	if (in_word == true) {
673	// Check if the word matches any of the query term equivalents
674	String word = new String(content_characters, word_start, (content_characters.length - word_start));
675	if (query_term_variants.contains(word)) {
676	// We have found a matching word, so remember its location
677	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
678	}
679	}
680
681	ArrayList highlight_start_positions = new ArrayList();
682	ArrayList highlight_end_positions = new ArrayList();
683
684	// Deal with phrases now
685	ArrayList partial_phrase_matches = new ArrayList();
686	for (int i = 0; i < word_matches.size(); i++) {
687	WordMatch word_match = (WordMatch) word_matches.get(i);
688
689	// See if any partial phrase matches are extended by this word
690	if (word_match.preceding_word_matched) {
691	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
692	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
693	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
694	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
695	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
696	partial_phrase_match.num_words_matched++;
697
698	// Has a complete phrase match occurred?
699	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
700	// Check for overlaps by looking at the previous highlight range
701	if (!highlight_end_positions.isEmpty()) {
702	int last_highlight_index = highlight_end_positions.size() - 1;
703	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
704	if (last_highlight_end > partial_phrase_match.start_position) {
705	// There is an overlap, so remove the previous phrase match
706	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
707	highlight_end_positions.remove(last_highlight_index);
708	partial_phrase_match.start_position = last_highlight_start;
709	}
710	}
711
712	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
713	highlight_end_positions.add(new Integer(word_match.end_position));
714	}
715	// No, but add the partial match back into the list for next time
716	else {
717	partial_phrase_matches.add(partial_phrase_match);
718	}
719	}
720	}
721	}
722	else {
723	partial_phrase_matches.clear();
724	}
725
726	// See if this word is at the start of any of the phrases
727	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
728	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
729	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
730	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
731	// If this phrase is just one word long, we have a complete match
732	if (phrase_query_p_term_variants_list.size() == 1) {
733	highlight_start_positions.add(new Integer(word_match.start_position));
734	highlight_end_positions.add(new Integer(word_match.end_position));
735	}
736	// Otherwise we have the start of a potential phrase match
737	else {
738	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
739	}
740	}
741	}
742	}
743
744	// Now add the annotation tags into the document at the correct points
745	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
746
747	int last_wrote = 0;
748	for (int i = 0; i < highlight_start_positions.size(); i++) {
749	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
750	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
751
752	// Print anything before the highlight range
753	if (last_wrote < highlight_start) {
754	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
755	// System.err.print(preceding_text);
756	content_element.appendChild(this.doc.createTextNode(preceding_text));
757	}
758
759	// Print the highlight text, annotated
760	if (highlight_end > last_wrote) {
761	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
762	// System.err.print("\|" + highlight_text + "\|");
763	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
764	annotation_element.setAttribute("type", "query_term");
765	content_element.appendChild(annotation_element);
766	last_wrote = highlight_end;
767	}
768	}
769
770	// Finish off any unwritten text
771	if (last_wrote < content_characters.length) {
772	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
773	// System.err.print(remaining_text);
774	content_element.appendChild(this.doc.createTextNode(remaining_text));
775	}
776
777	return content_element;
778	}
779
780
781	static private class WordMatch
782	{
783	public String word;
784	public int start_position;
785	public int end_position;
786	public boolean preceding_word_matched;
787
788	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
789	{
790	this.word = word;
791	this.start_position = start_position;
792	this.end_position = end_position;
793	this.preceding_word_matched = preceding_word_matched;
794	}
795	}
796
797
798	static private class PartialPhraseMatch
799	{
800	public int start_position;
801	public int query_phrase_number;
802	public int num_words_matched;
803
804	public PartialPhraseMatch(int start_position, int query_phrase_number)
805	{
806	this.start_position = start_position;
807	this.query_phrase_number = query_phrase_number;
808	this.num_words_matched = 1;
809	}
810	}
811	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: