Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago
merged from branch ant-install-branch: merge 1
Property svn:keywords set to `Author Date Id Revision`
File size: 35.2 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38
39	/** Action class for retrieving Documents via the message router
40	*/
41	public class DocumentAction extends Action {
42
43	// this is used to specify that the sibling nodes of a selected one should be obtained
44	public static final String SIBLING_ARG = "sib";
45	public static final String GOTO_PAGE_ARG = "gp";
46	public static final String ENRICH_DOC_ARG = "end";
47
48	/** if this is set to true, when a document is displayed, any annotation
49	* type services (enrich) will be offered to the user as well */
50	protected boolean provide_annotations = false;
51
52	protected boolean highlight_query_terms = false;
53
54	public boolean configure() {
55	super.configure();
56	String highlight = (String)config_params.get("highlightQueryTerms");
57	if (highlight != null && highlight.equals("true")) {
58	highlight_query_terms = true;
59	}
60	String annotate = (String)config_params.get("displayAnnotationService");
61	if (annotate != null && annotate.equals("true")) {
62	provide_annotations = true;
63	}
64	return true;
65	}
66	public Element process (Element message)
67	{
68	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
69
70	// the response
71	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
72	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
73	result.appendChild(page_response);
74
75	// get the request - assume only one
76	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
77	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
78	HashMap params = GSXML.extractParams(cgi_paramList, false);
79
80	// just in case there are some that need to get passed to the services
81	HashMap service_params = (HashMap)params.get("s0");
82
83	String collection = (String) params.get(GSParams.COLLECTION);
84	String lang = request.getAttribute(GSXML.LANG_ATT);
85	String uid = request.getAttribute(GSXML.USER_ID_ATT);
86	String document_name = (String) params.get(GSParams.DOCUMENT);
87	if (document_name == null \|\| document_name.equals("")) {
88	System.err.println("DocumentAction Error: no document specified!");
89	return result;
90	}
91	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
92	if (document_type == null) {
93	document_type = "simple";
94	}
95	//whether to retrieve siblings or not
96	boolean get_siblings = false;
97	String sibs = (String) params.get(SIBLING_ARG);
98	if (sibs != null && sibs.equals("1")) {
99	get_siblings = true;
100	}
101
102	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
103	if (sibling_num != null && !sibling_num.equals("")) {
104	// we have to modify the doc name
105	document_name = document_name+"."+sibling_num+".ss";
106	}
107
108	boolean expand_document = false;
109	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
110	if (ed_arg != null && ed_arg.equals("1")) {
111	expand_document = true;
112	}
113
114
115	boolean expand_contents = false;
116	if (expand_document) { // we always expand the contents with the text
117	expand_contents = true;
118	} else {
119	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
120	if (ec_arg != null && ec_arg.equals("1")) {
121	expand_contents = true;
122	}
123	}
124	// get the additional data needed for the page
125	getBackgroundData(page_response, collection, lang, uid);
126	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
127
128	// the_document is where all the doc info - structure and metadata etc
129	// is added into, to be returned in the page
130	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
131	page_response.appendChild(the_document);
132
133	// set the doctype from the cgi arg as an attribute
134	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
135
136	// create a basic doc list containing the current node
137	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
138	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
139	basic_doc_list.appendChild(current_doc);
140	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
141
142	// Create a parameter list to specify the required structure information
143	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
144
145	if (service_params != null) {
146	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
147	}
148
149	Element ds_param = null;
150	boolean get_structure = false;
151	boolean get_structure_info = false;
152	if (document_type.equals("paged")) {
153	get_structure_info = true;
154	// get teh info needed for paged naviagtion
155	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
156	ds_param_list.appendChild(ds_param);
157	ds_param.setAttribute(GSXML.NAME_ATT, "info");
158	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
159	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
160	ds_param_list.appendChild(ds_param);
161	ds_param.setAttribute(GSXML.NAME_ATT, "info");
162	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
163	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
164	ds_param_list.appendChild(ds_param);
165	ds_param.setAttribute(GSXML.NAME_ATT, "info");
166	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
167
168	} else if (document_type.equals("hierarchy")){
169	get_structure = true;
170	if (expand_contents) {
171	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172	ds_param_list.appendChild(ds_param);
173	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
174	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
175	} else {
176	// get the info needed for table of contents
177	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178	ds_param_list.appendChild(ds_param);
179	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
180	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
181	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
182	ds_param_list.appendChild(ds_param);
183	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
184	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
185	if (get_siblings) {
186	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
187	ds_param_list.appendChild(ds_param);
188	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
189	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
190	}
191	}
192	} else {
193	// we dont need any structure
194	}
195
196	boolean has_dummy = false;
197	if (get_structure \|\| get_structure_info) {
198
199	// Build a request to obtain the document structure
200	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
201	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
202	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
203	ds_message.appendChild(ds_request);
204	ds_request.appendChild(ds_param_list);
205
206	// create a doc_node_list and put in the doc_node that we are interested in
207	ds_request.appendChild(basic_doc_list);
208
209	// Process the document structure retrieve message
210	Element ds_response_message = (Element) this.mr.process(ds_message);
211	if (processErrorElements(ds_response_message, page_response)) {
212	return result;
213	}
214
215	// get the info and print out
216	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
217	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
218	path = GSPath.appendLink(path, "nodeStructureInfo");
219	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
220	// get the doc_node bit
221	if (ds_response_struct_info != null) {
222	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
223	}
224	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
225	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
226	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
227	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
228
229	if (ds_response_structure != null) {
230	// add the contents of the structure bit into the_document
231	NodeList structs = ds_response_structure.getChildNodes();
232	for (int i=0; i<structs.getLength();i++) {
233	the_document.appendChild(this.doc.importNode(structs.item(i), true));
234	}
235	} else {
236	// no structure nodes, so put in a dummy doc node
237	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
238	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
239	the_document.appendChild(doc_node);
240	has_dummy = true;
241	}
242	} else { // a simple type - we dont have a dummy node for simple
243	// should think about this more
244	// no structure request, so just put in a dummy doc node
245	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
246	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
247	the_document.appendChild(doc_node);
248	has_dummy = true;
249	}
250
251	// Build a request to obtain some document metadata
252	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
253	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
254	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
255	dm_message.appendChild(dm_request);
256	// Create a parameter list to specify the required metadata information
257
258	HashSet meta_names = new HashSet();
259	meta_names.add("Title"); // the default
260	if (format_elem != null) {
261	extractMetadataNames(format_elem, meta_names);
262	}
263
264	Element dm_param_list = createMetadataParamList(meta_names);
265	if (service_params != null) {
266	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
267	}
268
269	dm_request.appendChild(dm_param_list);
270
271
272	// create the doc node list for the metadata request
273	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
274	dm_request.appendChild(dm_doc_list);
275
276	// Add each node from the structure response into the metadata request
277	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
278	for (int i = 0; i < doc_nodes.getLength(); i++) {
279	Element doc_node = (Element) doc_nodes.item(i);
280	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
281
282	// Add the documentNode to the list
283	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
284	dm_doc_list.appendChild(dm_doc_node);
285	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
286	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
287	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
288	}
289
290	// we also want a metadata request to the top level document to get
291	// assocfilepath - this could be cached too
292	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
293	dm_message.appendChild(doc_meta_request);
294	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
295	if (service_params != null) {
296	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
297	}
298
299	doc_meta_request.appendChild(doc_meta_param_list);
300	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
301	doc_meta_param_list.appendChild(doc_param);
302	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
303	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
304
305	// create the doc node list for the metadata request
306	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
307	doc_meta_request.appendChild(doc_list);
308
309	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
310	// teh node we want is the root document node
311	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
312	doc_list.appendChild(doc_node);
313	Element dm_response_message = (Element) this.mr.process(dm_message);
314	if (processErrorElements(dm_response_message, page_response)) {
315	return result;
316	}
317
318	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
319	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
320
321	// Merge the metadata with the structure information
322	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
323	for (int i = 0; i < doc_nodes.getLength(); i++) {
324	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
325	}
326	// get teh top level doc metadata out
327	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
328	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
329	if (doc_meta_list != null) {
330	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
331	}
332	// Build a request to obtain some document content
333	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
334	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
335	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
336	dc_message.appendChild(dc_request);
337
338
339	// Create a parameter list to specify the request parameters - empty for now
340	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
341	if (service_params != null) {
342	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
343	}
344
345	dc_request.appendChild(dc_param_list);
346
347	// get the content
348	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
349	if (expand_document) {
350	dc_request.appendChild(dm_doc_list);
351	} else {
352	dc_request.appendChild(basic_doc_list);
353	}
354	System.err.println("request = "+converter.getString(dc_message));
355	Element dc_response_message = (Element) this.mr.process(dc_message);
356	if (processErrorElements(dc_response_message, page_response)) {
357	return result;
358	}
359
360	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
361
362	if (expand_document) {
363	// Merge the content with the structure information
364	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
365	for (int i = 0; i < doc_nodes.getLength(); i++) {
366	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
367	if (content != null) {
368	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
369	}
370	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
371	}
372	} else {
373
374	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
375	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
376	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
377
378	if (dc_response_doc_content == null) {
379	// no content to add
380	return result;
381	}
382	if (highlight_query_terms) {
383	dc_response_doc.removeChild(dc_response_doc_content);
384
385	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
386	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
387	}
388
389
390	if (provide_annotations) {
391	String service_selected = (String)params.get(ENRICH_DOC_ARG);
392	if (service_selected != null && service_selected.equals("1")) {
393	// now we can modifiy the response doc if needed
394	String enrich_service = (String)params.get(GSParams.SERVICE);
395	// send a message to the service
396	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
397	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
398	enrich_message.appendChild(enrich_request);
399	// check for parameters
400	HashMap e_service_params = (HashMap)params.get("s1");
401	if (e_service_params != null) {
402	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
403	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
404	enrich_request.appendChild(enrich_pl);
405	}
406	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
407	enrich_request.appendChild(e_doc_list);
408	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
409
410	Element enrich_response = this.mr.process(enrich_message);
411
412	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
413	path = GSPath.createPath(links);
414	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
415
416	}
417	} // if provide_annotations
418
419
420	// use the returned id rather than the sent one cos there may have
421	// been modifiers such as .pr that are removed.
422	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
423	the_document.setAttribute("selectedNode", modified_doc_id);
424	if (has_dummy) {
425	// change the id if necessary and add the content
426	Element dummy_node = (Element)doc_nodes.item(0);
427
428	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
429	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
430	// hack for simple type
431	if (document_type.equals("simple")) {
432	// we dont want the internal docNode, just want the content and metadata in the document
433	// rethink this!!
434	the_document.removeChild(dummy_node);
435
436	NodeList dummy_children = dummy_node.getChildNodes();
437	//for (int i=0; i<dummy_children.getLength(); i++) {
438	for (int i=dummy_children.getLength()-1; i>=0; i--) {
439	the_document.appendChild(dummy_children.item(i));
440
441	}
442	}
443	} else {
444	// Merge the document content with the metadata and structure information
445	for (int i = 0; i < doc_nodes.getLength(); i++) {
446	Node dn = doc_nodes.item(i);
447	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
448	if (dn_id.equals(modified_doc_id)) {
449	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
450	break;
451	}
452	}
453	}
454	}
455	///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
456	return result;
457	}
458
459	/** tell the param class what its arguments are
460	* if an action has its own arguments, this should add them to the params
461	* object - particularly important for args that should not be saved */
462	public boolean getActionParameters(GSParams params) {
463	params.addParameter(GOTO_PAGE_ARG, false);
464	params.addParameter(ENRICH_DOC_ARG, false);
465	return true;
466	}
467
468
469	/** this method gets the collection description, the format info, the
470	* list of enrich services, etc - stuff that is needed for the page,
471	* but is the same whatever the query is - should be cached */
472	protected boolean getBackgroundData(Element page_response,
473	String collection, String lang,
474	String uid) {
475
476	// create a message to process - contains requests for the collection
477	// description, the format element, the enrich services on offer
478	// these could all be cached
479	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
480	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
481	// the format request - ignore for now, where does this request go to??
482	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
483	info_message.appendChild(format_request);
484
485	// the enrich_services request - only do this if provide_annotations is true
486
487	if (provide_annotations) {
488	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
489	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
490	info_message.appendChild(enrich_services_request);
491	}
492
493	Element info_response = (Element)this.mr.process(info_message);
494
495	// the collection is the first response
496	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
497	Element format_resp = (Element) responses.item(0);
498
499	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
500	if (format_elem != null) {
501	///ystem.out.println("doc action found a format statement");
502	// set teh format type
503	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
504	page_response.appendChild(this.doc.importNode(format_elem, true));
505	}
506
507	if (provide_annotations) {
508	Element services_resp = (Element)responses.item(1);
509
510	// a new message for the mr
511	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
512
513	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
514	boolean service_found = false;
515	for (int j=0; j<e_services.getLength(); j++) {
516	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
517	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
518	enrich_message.appendChild(s);
519	service_found = true;
520	}
521	}
522	if (service_found) {
523	Element enrich_response = this.mr.process(enrich_message);
524
525	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
526	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
527	for (int i=0; i<e_responses.getLength(); i++) {
528	Element e_resp = (Element)e_responses.item(i);
529	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
530	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
531	service_list.appendChild(e_service);
532	}
533	page_response.appendChild(service_list);
534	}
535	} // if provide_annotations
536	return true;
537
538	}
539
540	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
541	*/
542	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
543
544	// do the query again to get term info
545	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
546	HashMap params = GSXML.extractParams(cgi_param_list, false);
547
548	HashMap previous_params = (HashMap)params.get("p");
549	if (previous_params == null) {
550	return dc_response_doc_content;
551	}
552	String service_name = (String)previous_params.get(GSParams.SERVICE);
553	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
554	System.err.println("DocumentAction: invalid service, not doing highlighting");
555	return dc_response_doc_content;
556	}
557	String collection = (String)params.get(GSParams.COLLECTION);
558	String lang = request.getAttribute(GSXML.LANG_ATT);
559	String uid = request.getAttribute(GSXML.USER_ID_ATT);
560	String to = GSPath.appendLink(collection, service_name);
561
562	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
563	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
564	mr_query_message.appendChild(mr_query_request);
565
566	// paramList
567	HashMap service_params = (HashMap)params.get("s1");
568
569	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
570	GSXML.addParametersToList(this.doc, query_param_list, service_params);
571	mr_query_request.appendChild(query_param_list);
572
573	// do the query
574	Element mr_query_response = (Element)this.mr.process(mr_query_message);
575
576	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
577	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
578	if (query_term_list_element == null) {
579	// no term info
580	System.err.println("DocumentAction: Warning: No query term information.\n");
581	return dc_response_doc_content;
582	}
583
584	String content = GSXML.getNodeText(dc_response_doc_content);
585
586	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
587	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
588
589	HashSet query_term_variants = new HashSet();
590	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
591	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
592	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
593	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
594	for (int j = 0; j < equivalent_terms.length; j++) {
595	System.err.println("Adding query term variant: " + equivalent_terms[j]);
596	query_term_variants.add(equivalent_terms[j]);
597	}
598	}
599
600	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
601
602	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
603	String performed_query = GSXML.getNodeText(query_element) + " ";
604
605	ArrayList phrase_query_p_term_variants_list = new ArrayList();
606	int term_start = 0;
607	boolean in_term = false;
608	boolean in_phrase = false;
609	for (int i = 0; i < performed_query.length(); i++) {
610	char character = performed_query.charAt(i);
611	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
612
613	// Has a query term just started?
614	if (in_term == false && is_character_letter_or_digit == true) {
615	in_term = true;
616	term_start = i;
617	}
618
619	// Or has a term just finished?
620	else if (in_term == true && is_character_letter_or_digit == false) {
621	in_term = false;
622	String term = performed_query.substring(term_start, i);
623	System.err.println("Term: " + term);
624
625	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
626	if (term_element != null) {
627
628	HashSet phrase_query_p_term_x_variants = new HashSet();
629
630	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
631	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
632	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
633	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
634	for (int k = 0; k < term_equivalent_terms.length; k++) {
635	System.err.println("Adding query term variant: " + term_equivalent_terms[k]);
636	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
637	}
638	}
639	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
640
641	if (in_phrase == false) {
642	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
643	phrase_query_p_term_variants_list = new ArrayList();
644	}
645	}
646	}
647	// Watch for phrases (surrounded by quotes)
648	if (character == '\"') {
649	// Has a phrase just started?
650	if (in_phrase == false) {
651	in_phrase = true;
652	}
653	// Or has a phrase just finished?
654	else if (in_phrase == true) {
655	in_phrase = false;
656	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
657	}
658
659	phrase_query_p_term_variants_list = new ArrayList();
660	}
661	}
662
663	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
664	}
665
666
667	/**
668	* Highlights query terms in a piece of text.
669	*/
670	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
671	{
672	// Convert the content string to an array of characters for speed
673	char[] content_characters = new char[content.length()];
674	content.getChars(0, content.length(), content_characters, 0);
675
676	// Now skim through the content, identifying word matches
677	ArrayList word_matches = new ArrayList();
678	int word_start = 0;
679	boolean in_word = false;
680	boolean preceding_word_matched = false;
681	for (int i = 0; i < content_characters.length; i++) {
682	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
683
684	// Has a word just started?
685	if (in_word == false && is_character_letter_or_digit == true) {
686	in_word = true;
687	word_start = i;
688	}
689
690	// Or has a word just finished?
691	else if (in_word == true && is_character_letter_or_digit == false) {
692	in_word = false;
693
694	// Check if the word matches any of the query term equivalents
695	String word = new String(content_characters, word_start, (i - word_start));
696	if (query_term_variants.contains(word)) {
697	// We have found a matching word, so remember its location
698	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
699	preceding_word_matched = true;
700	}
701	else {
702	preceding_word_matched = false;
703	}
704	}
705	}
706
707	// Don't forget the last word...
708	if (in_word == true) {
709	// Check if the word matches any of the query term equivalents
710	String word = new String(content_characters, word_start, (content_characters.length - word_start));
711	if (query_term_variants.contains(word)) {
712	// We have found a matching word, so remember its location
713	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
714	}
715	}
716
717	ArrayList highlight_start_positions = new ArrayList();
718	ArrayList highlight_end_positions = new ArrayList();
719
720	// Deal with phrases now
721	ArrayList partial_phrase_matches = new ArrayList();
722	for (int i = 0; i < word_matches.size(); i++) {
723	WordMatch word_match = (WordMatch) word_matches.get(i);
724
725	// See if any partial phrase matches are extended by this word
726	if (word_match.preceding_word_matched) {
727	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
728	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
729	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
730	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
731	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
732	partial_phrase_match.num_words_matched++;
733
734	// Has a complete phrase match occurred?
735	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
736	// Check for overlaps by looking at the previous highlight range
737	if (!highlight_end_positions.isEmpty()) {
738	int last_highlight_index = highlight_end_positions.size() - 1;
739	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
740	if (last_highlight_end > partial_phrase_match.start_position) {
741	// There is an overlap, so remove the previous phrase match
742	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
743	highlight_end_positions.remove(last_highlight_index);
744	partial_phrase_match.start_position = last_highlight_start;
745	}
746	}
747
748	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
749	highlight_end_positions.add(new Integer(word_match.end_position));
750	}
751	// No, but add the partial match back into the list for next time
752	else {
753	partial_phrase_matches.add(partial_phrase_match);
754	}
755	}
756	}
757	}
758	else {
759	partial_phrase_matches.clear();
760	}
761
762	// See if this word is at the start of any of the phrases
763	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
764	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
765	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
766	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
767	// If this phrase is just one word long, we have a complete match
768	if (phrase_query_p_term_variants_list.size() == 1) {
769	highlight_start_positions.add(new Integer(word_match.start_position));
770	highlight_end_positions.add(new Integer(word_match.end_position));
771	}
772	// Otherwise we have the start of a potential phrase match
773	else {
774	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
775	}
776	}
777	}
778	}
779
780	// Now add the annotation tags into the document at the correct points
781	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
782
783	int last_wrote = 0;
784	for (int i = 0; i < highlight_start_positions.size(); i++) {
785	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
786	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
787
788	// Print anything before the highlight range
789	if (last_wrote < highlight_start) {
790	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
791	// System.err.print(preceding_text);
792	content_element.appendChild(this.doc.createTextNode(preceding_text));
793	}
794
795	// Print the highlight text, annotated
796	if (highlight_end > last_wrote) {
797	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
798	// System.err.print("\|" + highlight_text + "\|");
799	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
800	annotation_element.setAttribute("type", "query_term");
801	content_element.appendChild(annotation_element);
802	last_wrote = highlight_end;
803	}
804	}
805
806	// Finish off any unwritten text
807	if (last_wrote < content_characters.length) {
808	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
809	// System.err.print(remaining_text);
810	content_element.appendChild(this.doc.createTextNode(remaining_text));
811	}
812
813	return content_element;
814	}
815
816
817	static private class WordMatch
818	{
819	public String word;
820	public int start_position;
821	public int end_position;
822	public boolean preceding_word_matched;
823
824	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
825	{
826	this.word = word;
827	this.start_position = start_position;
828	this.end_position = end_position;
829	this.preceding_word_matched = preceding_word_matched;
830	}
831	}
832
833
834	static private class PartialPhraseMatch
835	{
836	public int start_position;
837	public int query_phrase_number;
838	public int num_words_matched;
839
840	public PartialPhraseMatch(int start_position, int query_phrase_number)
841	{
842	this.start_position = start_position;
843	this.query_phrase_number = query_phrase_number;
844	this.num_words_matched = 1;
845	}
846	}
847	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: