Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 14525

Last change on this file since 14525 was 14525, checked in by qq6, 17 years ago
adding href and rl values into the document node, if they can be identified from the params list
Property svn:keywords set to `Author Date Id Revision`
File size: 36.5 KB

Line
1	/*
2	* DocumentAction.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.action;
20
21	// Greenstone classes
22	import org.greenstone.gsdl3.core.ModuleInterface;
23	import org.greenstone.gsdl3.util.*;
24
25	// XML classes
26	import org.w3c.dom.Document;
27	import org.w3c.dom.Element;
28	import org.w3c.dom.Node;
29	import org.w3c.dom.Text;
30	import org.w3c.dom.NodeList;
31
32	// General Java classes
33	import java.util.ArrayList;
34	import java.util.HashMap;
35	import java.util.HashSet;
36	import java.io.File;
37
38	import org.apache.log4j.*;
39
40	/** Action class for retrieving Documents via the message router
41	*/
42	public class DocumentAction extends Action {
43
44	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46	// this is used to specify that the sibling nodes of a selected one should be obtained
47	public static final String SIBLING_ARG = "sib";
48	public static final String GOTO_PAGE_ARG = "gp";
49	public static final String ENRICH_DOC_ARG = "end";
50
51	/** if this is set to true, when a document is displayed, any annotation
52	* type services (enrich) will be offered to the user as well */
53	protected boolean provide_annotations = false;
54
55	protected boolean highlight_query_terms = false;
56
57	public boolean configure() {
58	super.configure();
59	String highlight = (String)config_params.get("highlightQueryTerms");
60	if (highlight != null && highlight.equals("true")) {
61	highlight_query_terms = true;
62	}
63	String annotate = (String)config_params.get("displayAnnotationService");
64	if (annotate != null && annotate.equals("true")) {
65	provide_annotations = true;
66	}
67	return true;
68	}
69	public Element process (Element message)
70	{
71	// for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73	// the response
74	Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
75	Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
76	result.appendChild(page_response);
77
78	// get the request - assume only one
79	Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
80	Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
81	HashMap params = GSXML.extractParams(cgi_paramList, false);
82
83	// just in case there are some that need to get passed to the services
84	HashMap service_params = (HashMap)params.get("s0");
85
86	String has_rl = null;
87	String has_href = null;
88	has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
89	has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
90	String collection = (String) params.get(GSParams.COLLECTION);
91	String lang = request.getAttribute(GSXML.LANG_ATT);
92	String uid = request.getAttribute(GSXML.USER_ID_ATT);
93	String document_name = (String) params.get(GSParams.DOCUMENT);
94	if ((document_name == null \|\| document_name.equals("")) && (has_href == null \|\| has_href.equals(""))) {
95	logger.error("no document specified!");
96	return result;
97	}
98	String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
99	if (document_type == null) {
100	document_type = "simple";
101	}
102	//whether to retrieve siblings or not
103	boolean get_siblings = false;
104	String sibs = (String) params.get(SIBLING_ARG);
105	if (sibs != null && sibs.equals("1")) {
106	get_siblings = true;
107	}
108
109	String sibling_num = (String) params.get(GOTO_PAGE_ARG);
110	if (sibling_num != null && !sibling_num.equals("")) {
111	// we have to modify the doc name
112	document_name = document_name+"."+sibling_num+".ss";
113	}
114
115	boolean expand_document = false;
116	String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
117	if (ed_arg != null && ed_arg.equals("1")) {
118	expand_document = true;
119	}
120
121
122	boolean expand_contents = false;
123	if (expand_document) { // we always expand the contents with the text
124	expand_contents = true;
125	} else {
126	String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
127	if (ec_arg != null && ec_arg.equals("1")) {
128	expand_contents = true;
129	}
130	}
131	// get the additional data needed for the page
132	getBackgroundData(page_response, collection, lang, uid);
133	Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
134
135	// the_document is where all the doc info - structure and metadata etc
136	// is added into, to be returned in the page
137	Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
138	page_response.appendChild(the_document);
139
140	// set the doctype from the cgi arg as an attribute
141	the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
142
143	// create a basic doc list containing the current node
144	Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
145	Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
146	basic_doc_list.appendChild(current_doc);
147	if (document_name.length()!=0){
148	current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
149	}else if (has_href.length()!=0){
150	current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
151	current_doc.setAttribute("externalURL", has_rl);
152	}
153
154	// Create a parameter list to specify the required structure information
155	Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
156
157	if (service_params != null) {
158	GSXML.addParametersToList(this.doc, ds_param_list, service_params);
159	}
160
161	Element ds_param = null;
162	boolean get_structure = false;
163	boolean get_structure_info = false;
164	if (document_type.equals("paged")) {
165	get_structure_info = true;
166	// get teh info needed for paged naviagtion
167	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
168	ds_param_list.appendChild(ds_param);
169	ds_param.setAttribute(GSXML.NAME_ATT, "info");
170	ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
171	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172	ds_param_list.appendChild(ds_param);
173	ds_param.setAttribute(GSXML.NAME_ATT, "info");
174	ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
175	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
176	ds_param_list.appendChild(ds_param);
177	ds_param.setAttribute(GSXML.NAME_ATT, "info");
178	ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
179
180	} else if (document_type.equals("hierarchy")){
181	get_structure = true;
182	if (expand_contents) {
183	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
184	ds_param_list.appendChild(ds_param);
185	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
186	ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
187	} else {
188	// get the info needed for table of contents
189	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
190	ds_param_list.appendChild(ds_param);
191	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
192	ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
193	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
194	ds_param_list.appendChild(ds_param);
195	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
196	ds_param.setAttribute(GSXML.VALUE_ATT, "children");
197	if (get_siblings) {
198	ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
199	ds_param_list.appendChild(ds_param);
200	ds_param.setAttribute(GSXML.NAME_ATT, "structure");
201	ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
202	}
203	}
204	} else {
205	// we dont need any structure
206	}
207
208	boolean has_dummy = false;
209	if (get_structure \|\| get_structure_info) {
210
211	// Build a request to obtain the document structure
212	Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
213	String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
214	Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
215	ds_message.appendChild(ds_request);
216	ds_request.appendChild(ds_param_list);
217
218	// create a doc_node_list and put in the doc_node that we are interested in
219	ds_request.appendChild(basic_doc_list);
220
221	// Process the document structure retrieve message
222	Element ds_response_message = (Element) this.mr.process(ds_message);
223	if (processErrorElements(ds_response_message, page_response)) {
224	return result;
225	}
226
227	// get the info and print out
228	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
229	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
230	path = GSPath.appendLink(path, "nodeStructureInfo");
231	Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
232	// get the doc_node bit
233	if (ds_response_struct_info != null) {
234	the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
235	}
236	path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
237	path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
238	path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
239	Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
240
241	if (ds_response_structure != null) {
242	// add the contents of the structure bit into the_document
243	NodeList structs = ds_response_structure.getChildNodes();
244	for (int i=0; i<structs.getLength();i++) {
245	the_document.appendChild(this.doc.importNode(structs.item(i), true));
246	}
247	} else {
248	// no structure nodes, so put in a dummy doc node
249	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
250	if (document_name.length()!=0){
251	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
252	}else if (has_href.length()!=0){
253	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
254	doc_node.setAttribute("externalURL", has_rl);
255	}
256	the_document.appendChild(doc_node);
257	has_dummy = true;
258	}
259	} else { // a simple type - we dont have a dummy node for simple
260	// should think about this more
261	// no structure request, so just put in a dummy doc node
262	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
263	if (document_name.length()!=0){
264	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
265	}else if (has_href.length()!=0){
266	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
267	doc_node.setAttribute("externalURL", has_rl);
268	}
269	the_document.appendChild(doc_node);
270	has_dummy = true;
271	}
272
273	// Build a request to obtain some document metadata
274	Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
275	String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
276	Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
277	dm_message.appendChild(dm_request);
278	// Create a parameter list to specify the required metadata information
279
280	HashSet meta_names = new HashSet();
281	meta_names.add("Title"); // the default
282	if (format_elem != null) {
283	extractMetadataNames(format_elem, meta_names);
284	}
285
286	Element dm_param_list = createMetadataParamList(meta_names);
287	if (service_params != null) {
288	GSXML.addParametersToList(this.doc, dm_param_list, service_params);
289	}
290
291	dm_request.appendChild(dm_param_list);
292
293
294	// create the doc node list for the metadata request
295	Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
296	dm_request.appendChild(dm_doc_list);
297
298	// Add each node from the structure response into the metadata request
299	NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
300	for (int i = 0; i < doc_nodes.getLength(); i++) {
301	Element doc_node = (Element) doc_nodes.item(i);
302	String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
303
304	// Add the documentNode to the list
305	Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
306	dm_doc_list.appendChild(dm_doc_node);
307	dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
308	dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
309	doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
310	}
311
312	// we also want a metadata request to the top level document to get
313	// assocfilepath - this could be cached too
314	Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
315	dm_message.appendChild(doc_meta_request);
316	Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
317	if (service_params != null) {
318	GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
319	}
320
321	doc_meta_request.appendChild(doc_meta_param_list);
322	Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
323	doc_meta_param_list.appendChild(doc_param);
324	doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
325	doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
326
327	// create the doc node list for the metadata request
328	Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
329	doc_meta_request.appendChild(doc_list);
330
331	Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
332	// the node we want is the root document node
333	if (document_name.length()!=0){
334	doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
335	}else if (has_href.length()!=0){
336	doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
337	doc_node.setAttribute("externalURL", has_rl);
338	}
339	doc_list.appendChild(doc_node);
340	Element dm_response_message = (Element) this.mr.process(dm_message);
341	if (processErrorElements(dm_response_message, page_response)) {
342	return result;
343	}
344
345	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
346	Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
347
348	// Merge the metadata with the structure information
349	NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
350	for (int i = 0; i < doc_nodes.getLength(); i++) {
351	GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
352	}
353	// get teh top level doc metadata out
354	Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
355	Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
356	if (doc_meta_list != null) {
357	the_document.appendChild(this.doc.importNode(doc_meta_list, true));
358	}
359	// Build a request to obtain some document content
360	Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
361	to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
362	Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
363	dc_message.appendChild(dc_request);
364
365
366	// Create a parameter list to specify the request parameters - empty for now
367	Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
368	if (service_params != null) {
369	GSXML.addParametersToList(this.doc, dc_param_list, service_params);
370	}
371
372	dc_request.appendChild(dc_param_list);
373
374	// get the content
375	// the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
376	if (expand_document) {
377	dc_request.appendChild(dm_doc_list);
378	} else {
379	dc_request.appendChild(basic_doc_list);
380	}
381	logger.debug("request = "+converter.getString(dc_message));
382	Element dc_response_message = (Element) this.mr.process(dc_message);
383	if (processErrorElements(dc_response_message, page_response)) {
384	return result;
385	}
386
387	Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
388
389	if (expand_document) {
390	// Merge the content with the structure information
391	NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
392	for (int i = 0; i < doc_nodes.getLength(); i++) {
393	Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
394	if (content != null) {
395	doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
396	}
397	//GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
398	}
399	} else {
400	//path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
401	Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
402	Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
403	Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
404
405	if (dc_response_doc_content == null) {
406	// no content to add
407	if (dc_response_doc_external !=null){
408	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
409
410	the_document.setAttribute("selectedNode", modified_doc_id);
411	the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
412	}
413	return result;
414	}
415	if (highlight_query_terms) {
416	dc_response_doc.removeChild(dc_response_doc_content);
417
418	dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
419	dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
420	}
421
422
423	if (provide_annotations) {
424	String service_selected = (String)params.get(ENRICH_DOC_ARG);
425	if (service_selected != null && service_selected.equals("1")) {
426	// now we can modifiy the response doc if needed
427	String enrich_service = (String)params.get(GSParams.SERVICE);
428	// send a message to the service
429	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
430	Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
431	enrich_message.appendChild(enrich_request);
432	// check for parameters
433	HashMap e_service_params = (HashMap)params.get("s1");
434	if (e_service_params != null) {
435	Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
436	GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
437	enrich_request.appendChild(enrich_pl);
438	}
439	Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
440	enrich_request.appendChild(e_doc_list);
441	e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
442
443	Element enrich_response = this.mr.process(enrich_message);
444
445	String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
446	path = GSPath.createPath(links);
447	dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
448
449	}
450	} // if provide_annotations
451
452
453	// use the returned id rather than the sent one cos there may have
454	// been modifiers such as .pr that are removed.
455	String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
456	the_document.setAttribute("selectedNode", modified_doc_id);
457	if (has_dummy) {
458	// change the id if necessary and add the content
459	Element dummy_node = (Element)doc_nodes.item(0);
460
461	dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
462	dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
463	// hack for simple type
464	if (document_type.equals("simple")) {
465	// we dont want the internal docNode, just want the content and metadata in the document
466	// rethink this!!
467	the_document.removeChild(dummy_node);
468
469	NodeList dummy_children = dummy_node.getChildNodes();
470	//for (int i=0; i<dummy_children.getLength(); i++) {
471	for (int i=dummy_children.getLength()-1; i>=0; i--) {
472	the_document.appendChild(dummy_children.item(i));
473
474	}
475	}
476	} else {
477	// Merge the document content with the metadata and structure information
478	for (int i = 0; i < doc_nodes.getLength(); i++) {
479	Node dn = doc_nodes.item(i);
480	String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
481	if (dn_id.equals(modified_doc_id)) {
482	dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
483	break;
484	}
485	}
486	}
487	}
488	logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
489	return result;
490	}
491
492	/** tell the param class what its arguments are
493	* if an action has its own arguments, this should add them to the params
494	* object - particularly important for args that should not be saved */
495	public boolean getActionParameters(GSParams params) {
496	params.addParameter(GOTO_PAGE_ARG, false);
497	params.addParameter(ENRICH_DOC_ARG, false);
498	return true;
499	}
500
501
502	/** this method gets the collection description, the format info, the
503	* list of enrich services, etc - stuff that is needed for the page,
504	* but is the same whatever the query is - should be cached */
505	protected boolean getBackgroundData(Element page_response,
506	String collection, String lang,
507	String uid) {
508
509	// create a message to process - contains requests for the collection
510	// description, the format element, the enrich services on offer
511	// these could all be cached
512	Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
513	String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
514	// the format request - ignore for now, where does this request go to??
515	Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
516	info_message.appendChild(format_request);
517
518	// the enrich_services request - only do this if provide_annotations is true
519
520	if (provide_annotations) {
521	Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
522	enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
523	info_message.appendChild(enrich_services_request);
524	}
525
526	Element info_response = (Element)this.mr.process(info_message);
527
528	// the collection is the first response
529	NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
530	Element format_resp = (Element) responses.item(0);
531
532	Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
533	if (format_elem != null) {
534	logger.debug("doc action found a format statement");
535	// set teh format type
536	format_elem.setAttribute(GSXML.TYPE_ATT, "display");
537	page_response.appendChild(this.doc.importNode(format_elem, true));
538	}
539
540	if (provide_annotations) {
541	Element services_resp = (Element)responses.item(1);
542
543	// a new message for the mr
544	Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
545
546	NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
547	boolean service_found = false;
548	for (int j=0; j<e_services.getLength(); j++) {
549	if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
550	Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
551	enrich_message.appendChild(s);
552	service_found = true;
553	}
554	}
555	if (service_found) {
556	Element enrich_response = this.mr.process(enrich_message);
557
558	NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
559	Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
560	for (int i=0; i<e_responses.getLength(); i++) {
561	Element e_resp = (Element)e_responses.item(i);
562	Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
563	e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
564	service_list.appendChild(e_service);
565	}
566	page_response.appendChild(service_list);
567	}
568	} // if provide_annotations
569	return true;
570
571	}
572
573	/** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
574	*/
575	protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
576
577	// do the query again to get term info
578	Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
579	HashMap params = GSXML.extractParams(cgi_param_list, false);
580
581	HashMap previous_params = (HashMap)params.get("p");
582	if (previous_params == null) {
583	return dc_response_doc_content;
584	}
585	String service_name = (String)previous_params.get(GSParams.SERVICE);
586	if (service_name == null \|\| !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
587	logger.error("invalid service, not doing highlighting");
588	return dc_response_doc_content;
589	}
590	String collection = (String)params.get(GSParams.COLLECTION);
591	String lang = request.getAttribute(GSXML.LANG_ATT);
592	String uid = request.getAttribute(GSXML.USER_ID_ATT);
593	String to = GSPath.appendLink(collection, service_name);
594
595	Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
596	Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
597	mr_query_message.appendChild(mr_query_request);
598
599	// paramList
600	HashMap service_params = (HashMap)params.get("s1");
601
602	Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
603	GSXML.addParametersToList(this.doc, query_param_list, service_params);
604	mr_query_request.appendChild(query_param_list);
605
606	// do the query
607	Element mr_query_response = (Element)this.mr.process(mr_query_message);
608
609	String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
610	Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
611	if (query_term_list_element == null) {
612	// no term info
613	logger.error("No query term information.\n");
614	return dc_response_doc_content;
615	}
616
617	String content = GSXML.getNodeText(dc_response_doc_content);
618
619	String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
620	Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
621
622	HashSet query_term_variants = new HashSet();
623	NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
624	for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
625	Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
626	String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
627	for (int j = 0; j < equivalent_terms.length; j++) {
628	query_term_variants.add(equivalent_terms[j]);
629	}
630	}
631
632	ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
633
634	Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
635	String performed_query = GSXML.getNodeText(query_element) + " ";
636
637	ArrayList phrase_query_p_term_variants_list = new ArrayList();
638	int term_start = 0;
639	boolean in_term = false;
640	boolean in_phrase = false;
641	for (int i = 0; i < performed_query.length(); i++) {
642	char character = performed_query.charAt(i);
643	boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
644
645	// Has a query term just started?
646	if (in_term == false && is_character_letter_or_digit == true) {
647	in_term = true;
648	term_start = i;
649	}
650
651	// Or has a term just finished?
652	else if (in_term == true && is_character_letter_or_digit == false) {
653	in_term = false;
654	String term = performed_query.substring(term_start, i);
655
656	Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
657	if (term_element != null) {
658
659	HashSet phrase_query_p_term_x_variants = new HashSet();
660
661	NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
662	for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
663	Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
664	String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
665	for (int k = 0; k < term_equivalent_terms.length; k++) {
666	phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
667	}
668	}
669	phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
670
671	if (in_phrase == false) {
672	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
673	phrase_query_p_term_variants_list = new ArrayList();
674	}
675	}
676	}
677	// Watch for phrases (surrounded by quotes)
678	if (character == '\"') {
679	// Has a phrase just started?
680	if (in_phrase == false) {
681	in_phrase = true;
682	}
683	// Or has a phrase just finished?
684	else if (in_phrase == true) {
685	in_phrase = false;
686	phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
687	}
688
689	phrase_query_p_term_variants_list = new ArrayList();
690	}
691	}
692
693	return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
694	}
695
696
697	/**
698	* Highlights query terms in a piece of text.
699	*/
700	private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
701	{
702	// Convert the content string to an array of characters for speed
703	char[] content_characters = new char[content.length()];
704	content.getChars(0, content.length(), content_characters, 0);
705
706	// Now skim through the content, identifying word matches
707	ArrayList word_matches = new ArrayList();
708	int word_start = 0;
709	boolean in_word = false;
710	boolean preceding_word_matched = false;
711	for (int i = 0; i < content_characters.length; i++) {
712	boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
713
714	// Has a word just started?
715	if (in_word == false && is_character_letter_or_digit == true) {
716	in_word = true;
717	word_start = i;
718	}
719
720	// Or has a word just finished?
721	else if (in_word == true && is_character_letter_or_digit == false) {
722	in_word = false;
723
724	// Check if the word matches any of the query term equivalents
725	String word = new String(content_characters, word_start, (i - word_start));
726	if (query_term_variants.contains(word)) {
727	// We have found a matching word, so remember its location
728	word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
729	preceding_word_matched = true;
730	}
731	else {
732	preceding_word_matched = false;
733	}
734	}
735	}
736
737	// Don't forget the last word...
738	if (in_word == true) {
739	// Check if the word matches any of the query term equivalents
740	String word = new String(content_characters, word_start, (content_characters.length - word_start));
741	if (query_term_variants.contains(word)) {
742	// We have found a matching word, so remember its location
743	word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
744	}
745	}
746
747	ArrayList highlight_start_positions = new ArrayList();
748	ArrayList highlight_end_positions = new ArrayList();
749
750	// Deal with phrases now
751	ArrayList partial_phrase_matches = new ArrayList();
752	for (int i = 0; i < word_matches.size(); i++) {
753	WordMatch word_match = (WordMatch) word_matches.get(i);
754
755	// See if any partial phrase matches are extended by this word
756	if (word_match.preceding_word_matched) {
757	for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
758	PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
759	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
760	HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
761	if (phrase_query_p_term_x_variants.contains(word_match.word)) {
762	partial_phrase_match.num_words_matched++;
763
764	// Has a complete phrase match occurred?
765	if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
766	// Check for overlaps by looking at the previous highlight range
767	if (!highlight_end_positions.isEmpty()) {
768	int last_highlight_index = highlight_end_positions.size() - 1;
769	int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
770	if (last_highlight_end > partial_phrase_match.start_position) {
771	// There is an overlap, so remove the previous phrase match
772	int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
773	highlight_end_positions.remove(last_highlight_index);
774	partial_phrase_match.start_position = last_highlight_start;
775	}
776	}
777
778	highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
779	highlight_end_positions.add(new Integer(word_match.end_position));
780	}
781	// No, but add the partial match back into the list for next time
782	else {
783	partial_phrase_matches.add(partial_phrase_match);
784	}
785	}
786	}
787	}
788	else {
789	partial_phrase_matches.clear();
790	}
791
792	// See if this word is at the start of any of the phrases
793	for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
794	ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
795	HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
796	if (phrase_query_p_term_1_variants.contains(word_match.word)) {
797	// If this phrase is just one word long, we have a complete match
798	if (phrase_query_p_term_variants_list.size() == 1) {
799	highlight_start_positions.add(new Integer(word_match.start_position));
800	highlight_end_positions.add(new Integer(word_match.end_position));
801	}
802	// Otherwise we have the start of a potential phrase match
803	else {
804	partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
805	}
806	}
807	}
808	}
809
810	// Now add the annotation tags into the document at the correct points
811	Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
812
813	int last_wrote = 0;
814	for (int i = 0; i < highlight_start_positions.size(); i++) {
815	int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
816	int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
817
818	// Print anything before the highlight range
819	if (last_wrote < highlight_start) {
820	String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
821	// System.err.print(preceding_text);
822	content_element.appendChild(this.doc.createTextNode(preceding_text));
823	}
824
825	// Print the highlight text, annotated
826	if (highlight_end > last_wrote) {
827	String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
828	// System.err.print("\|" + highlight_text + "\|");
829	Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
830	annotation_element.setAttribute("type", "query_term");
831	content_element.appendChild(annotation_element);
832	last_wrote = highlight_end;
833	}
834	}
835
836	// Finish off any unwritten text
837	if (last_wrote < content_characters.length) {
838	String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
839	// System.err.print(remaining_text);
840	content_element.appendChild(this.doc.createTextNode(remaining_text));
841	}
842
843	return content_element;
844	}
845
846
847	static private class WordMatch
848	{
849	public String word;
850	public int start_position;
851	public int end_position;
852	public boolean preceding_word_matched;
853
854	public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
855	{
856	this.word = word;
857	this.start_position = start_position;
858	this.end_position = end_position;
859	this.preceding_word_matched = preceding_word_matched;
860	}
861	}
862
863
864	static private class PartialPhraseMatch
865	{
866	public int start_position;
867	public int query_phrase_number;
868	public int num_words_matched;
869
870	public PartialPhraseMatch(int start_position, int query_phrase_number)
871	{
872	this.start_position = start_position;
873	this.query_phrase_number = query_phrase_number;
874	this.num_words_matched = 1;
875	}
876	}
877	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: