source: main/branches/64_bit_Greenstone/greenstone3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 23632

Last change on this file since 23632 was 23632, checked in by sjm84, 13 years ago

Adding the latest trunk changes as well as tidying up several files and removing more -m32 stuff

  • Property svn:keywords set to Author Date Id Revision
File size: 36.7 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router
41 */
42public class DocumentAction extends Action {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /** if this is set to true, when a document is displayed, any annotation
52 * type services (enrich) will be offered to the user as well */
53 protected boolean provide_annotations = false;
54
55 protected boolean highlight_query_terms = false;
56
57 public boolean configure() {
58 super.configure();
59 String highlight = (String)config_params.get("highlightQueryTerms");
60 if (highlight != null && highlight.equals("true")) {
61 highlight_query_terms = true;
62 }
63 String annotate = (String)config_params.get("displayAnnotationService");
64 if (annotate != null && annotate.equals("true")) {
65 provide_annotations = true;
66 }
67 return true;
68 }
69 public Node process (Node message_node)
70 {
71 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73 Element message = this.converter.nodeToElement(message_node);
74
75 // the response
76 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78 result.appendChild(page_response);
79
80 // get the request - assume only one
81 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83 HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85 // just in case there are some that need to get passed to the services
86 HashMap service_params = (HashMap)params.get("s0");
87
88
89 String has_rl = null;
90 String has_href = null;
91 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
92 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
93 String collection = (String) params.get(GSParams.COLLECTION);
94 String lang = request.getAttribute(GSXML.LANG_ATT);
95 String uid = request.getAttribute(GSXML.USER_ID_ATT);
96 String document_name = (String) params.get(GSParams.DOCUMENT);
97 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) {
98 logger.error("no document specified!");
99 return result;
100 }
101 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
102 if (document_type == null) {
103 document_type = "simple";
104 }
105 //whether to retrieve siblings or not
106 boolean get_siblings = false;
107 String sibs = (String) params.get(SIBLING_ARG);
108 if (sibs != null && sibs.equals("1")) {
109 get_siblings = true;
110 }
111
112 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
113 if (sibling_num != null && !sibling_num.equals("")) {
114 // we have to modify the doc name
115 document_name = document_name+"."+sibling_num+".ss";
116 }
117
118 boolean expand_document = false;
119 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
120 if (ed_arg != null && ed_arg.equals("1")) {
121 expand_document = true;
122 }
123
124
125 boolean expand_contents = false;
126 if (expand_document) { // we always expand the contents with the text
127 expand_contents = true;
128 } else {
129 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
130 if (ec_arg != null && ec_arg.equals("1")) {
131 expand_contents = true;
132 }
133 }
134
135 //append site metadata
136 addSiteMetadata( page_response, lang, uid);
137
138 // get the additional data needed for the page
139 getBackgroundData(page_response, collection, lang, uid);
140 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
141
142 // the_document is where all the doc info - structure and metadata etc
143 // is added into, to be returned in the page
144 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
145 page_response.appendChild(the_document);
146
147 // set the doctype from the cgi arg as an attribute
148 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
149
150 // create a basic doc list containing the current node
151 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
152 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
153 basic_doc_list.appendChild(current_doc);
154 if (document_name.length()!=0){
155 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
156 }else if (has_href.length()!=0){
157 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
158 current_doc.setAttribute("externalURL", has_rl);
159 }
160
161 // Create a parameter list to specify the required structure information
162 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163
164 if (service_params != null) {
165 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
166 }
167
168 Element ds_param = null;
169 boolean get_structure = false;
170 boolean get_structure_info = false;
171 if (document_type.equals("paged")) {
172 get_structure_info = true;
173 // get teh info needed for paged naviagtion
174 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175 ds_param_list.appendChild(ds_param);
176 ds_param.setAttribute(GSXML.NAME_ATT, "info");
177 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
178 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
179 ds_param_list.appendChild(ds_param);
180 ds_param.setAttribute(GSXML.NAME_ATT, "info");
181 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
182 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
183 ds_param_list.appendChild(ds_param);
184 ds_param.setAttribute(GSXML.NAME_ATT, "info");
185 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
186
187 } else if (document_type.equals("hierarchy")){
188 get_structure = true;
189 if (expand_contents) {
190 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
191 ds_param_list.appendChild(ds_param);
192 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
193 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
194 } else {
195 // get the info needed for table of contents
196 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
197 ds_param_list.appendChild(ds_param);
198 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
199 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
200 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201 ds_param_list.appendChild(ds_param);
202 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
204 if (get_siblings) {
205 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206 ds_param_list.appendChild(ds_param);
207 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
208 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
209 }
210 }
211 } else {
212 // we dont need any structure
213 }
214
215 boolean has_dummy = false;
216 if (get_structure || get_structure_info) {
217
218 // Build a request to obtain the document structure
219 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
220 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
221 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
222 ds_message.appendChild(ds_request);
223 ds_request.appendChild(ds_param_list);
224
225 // create a doc_node_list and put in the doc_node that we are interested in
226 ds_request.appendChild(basic_doc_list);
227
228 // Process the document structure retrieve message
229 Element ds_response_message = (Element) this.mr.process(ds_message);
230 if (processErrorElements(ds_response_message, page_response)) {
231 return result;
232 }
233
234 // get the info and print out
235 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
236 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
237 path = GSPath.appendLink(path, "nodeStructureInfo");
238 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
239 // get the doc_node bit
240 if (ds_response_struct_info != null) {
241 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
242 }
243 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
244 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
245 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
246 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
247
248 if (ds_response_structure != null) {
249 // add the contents of the structure bit into the_document
250 NodeList structs = ds_response_structure.getChildNodes();
251 for (int i=0; i<structs.getLength();i++) {
252 the_document.appendChild(this.doc.importNode(structs.item(i), true));
253 }
254 } else {
255 // no structure nodes, so put in a dummy doc node
256 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257 if (document_name.length()!=0){
258 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
259 }else if (has_href.length()!=0){
260 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
261 doc_node.setAttribute("externalURL", has_rl);
262 }
263 the_document.appendChild(doc_node);
264 has_dummy = true;
265 }
266 } else { // a simple type - we dont have a dummy node for simple
267 // should think about this more
268 // no structure request, so just put in a dummy doc node
269 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
270 if (document_name.length()!=0){
271 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
272 }else if (has_href.length()!=0){
273 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
274 doc_node.setAttribute("externalURL", has_rl);
275 }
276 the_document.appendChild(doc_node);
277 has_dummy = true;
278 }
279
280 // Build a request to obtain some document metadata
281 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
283 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
284 dm_message.appendChild(dm_request);
285 // Create a parameter list to specify the required metadata information
286
287 HashSet meta_names = new HashSet();
288 meta_names.add("Title"); // the default
289 if (format_elem != null) {
290 extractMetadataNames(format_elem, meta_names);
291 }
292
293 Element dm_param_list = createMetadataParamList(meta_names);
294 if (service_params != null) {
295 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
296 }
297
298 dm_request.appendChild(dm_param_list);
299
300
301 // create the doc node list for the metadata request
302 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
303 dm_request.appendChild(dm_doc_list);
304
305 // Add each node from the structure response into the metadata request
306 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
307 for (int i = 0; i < doc_nodes.getLength(); i++) {
308 Element doc_node = (Element) doc_nodes.item(i);
309 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
310
311 // Add the documentNode to the list
312 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313 dm_doc_list.appendChild(dm_doc_node);
314 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
315 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
316 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
317 }
318
319 // we also want a metadata request to the top level document to get
320 // assocfilepath - this could be cached too
321 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
322 dm_message.appendChild(doc_meta_request);
323 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
324 if (service_params != null) {
325 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
326 }
327
328 doc_meta_request.appendChild(doc_meta_param_list);
329 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
330 doc_meta_param_list.appendChild(doc_param);
331 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
332 doc_param.setAttribute(GSXML.VALUE_ATT, "assocfilepath");
333
334 // create the doc node list for the metadata request
335 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
336 doc_meta_request.appendChild(doc_list);
337
338 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
339 // the node we want is the root document node
340 if (document_name.length()!=0){
341 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
342 }else if (has_href.length()!=0){
343 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
344 doc_node.setAttribute("externalURL", has_rl);
345 }
346 doc_list.appendChild(doc_node);
347 Element dm_response_message = (Element) this.mr.process(dm_message);
348 if (processErrorElements(dm_response_message, page_response)) {
349 return result;
350 }
351
352 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
353 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
354
355 // Merge the metadata with the structure information
356 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
357 for (int i = 0; i < doc_nodes.getLength(); i++) {
358 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
359 }
360 // get the top level doc metadata out
361 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
362 Element top_doc_node = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode");
363 GSXML.mergeMetadataLists(the_document, top_doc_node);
364
365 // Build a request to obtain some document content
366 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
367 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
368 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
369 dc_message.appendChild(dc_request);
370
371
372 // Create a parameter list to specify the request parameters - empty for now
373 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
374 if (service_params != null) {
375 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
376 }
377
378 dc_request.appendChild(dc_param_list);
379
380 // get the content
381 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
382 if (expand_document) {
383 dc_request.appendChild(dm_doc_list);
384 } else {
385 dc_request.appendChild(basic_doc_list);
386 }
387 logger.debug("request = "+converter.getString(dc_message));
388 Element dc_response_message = (Element) this.mr.process(dc_message);
389 if (processErrorElements(dc_response_message, page_response)) {
390 return result;
391 }
392
393 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
394
395 if (expand_document) {
396 // Merge the content with the structure information
397 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
398 for (int i = 0; i < doc_nodes.getLength(); i++) {
399 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
400 if (content != null) {
401 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
402 }
403 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
404 }
405 } else {
406 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
407 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
408 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
409 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
410
411 if (dc_response_doc_content == null) {
412 // no content to add
413 if (dc_response_doc_external !=null){
414 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
415
416 the_document.setAttribute("selectedNode", modified_doc_id);
417 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
418 }
419 return result;
420 }
421 if (highlight_query_terms) {
422 dc_response_doc.removeChild(dc_response_doc_content);
423
424 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
425 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
426 }
427
428
429 if (provide_annotations) {
430 String service_selected = (String)params.get(ENRICH_DOC_ARG);
431 if (service_selected != null && service_selected.equals("1")) {
432 // now we can modifiy the response doc if needed
433 String enrich_service = (String)params.get(GSParams.SERVICE);
434 // send a message to the service
435 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
436 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
437 enrich_message.appendChild(enrich_request);
438 // check for parameters
439 HashMap e_service_params = (HashMap)params.get("s1");
440 if (e_service_params != null) {
441 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
442 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
443 enrich_request.appendChild(enrich_pl);
444 }
445 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
446 enrich_request.appendChild(e_doc_list);
447 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
448
449 Node enrich_response = this.mr.process(enrich_message);
450
451 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
452 path = GSPath.createPath(links);
453 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
454
455 }
456 } // if provide_annotations
457
458
459 // use the returned id rather than the sent one cos there may have
460 // been modifiers such as .pr that are removed.
461 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
462 the_document.setAttribute("selectedNode", modified_doc_id);
463 if (has_dummy) {
464 // change the id if necessary and add the content
465 Element dummy_node = (Element)doc_nodes.item(0);
466
467 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
468 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
469 // hack for simple type
470 if (document_type.equals("simple")) {
471 // we dont want the internal docNode, just want the content and metadata in the document
472 // rethink this!!
473 the_document.removeChild(dummy_node);
474
475 NodeList dummy_children = dummy_node.getChildNodes();
476 //for (int i=0; i<dummy_children.getLength(); i++) {
477 for (int i=dummy_children.getLength()-1; i>=0; i--) {
478 // special case as we don't want more than one metadata list
479 if (dummy_children.item(i).getNodeName().equals(GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER)) {
480 GSXML.mergeMetadataFromList(the_document, dummy_children.item(i));
481 } else {
482 the_document.appendChild(dummy_children.item(i));
483 }
484 }
485 }
486 } else {
487 // Merge the document content with the metadata and structure information
488 for (int i = 0; i < doc_nodes.getLength(); i++) {
489 Node dn = doc_nodes.item(i);
490 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
491 if (dn_id.equals(modified_doc_id)) {
492 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
493 break;
494 }
495 }
496 }
497 }
498 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
499 return result;
500 }
501
502 /** tell the param class what its arguments are
503 * if an action has its own arguments, this should add them to the params
504 * object - particularly important for args that should not be saved */
505 public boolean getActionParameters(GSParams params) {
506 params.addParameter(GOTO_PAGE_ARG, false);
507 params.addParameter(ENRICH_DOC_ARG, false);
508 return true;
509 }
510
511
512 /** this method gets the collection description, the format info, the
513 * list of enrich services, etc - stuff that is needed for the page,
514 * but is the same whatever the query is - should be cached */
515 protected boolean getBackgroundData(Element page_response,
516 String collection, String lang,
517 String uid) {
518
519 // create a message to process - contains requests for the collection
520 // description, the format element, the enrich services on offer
521 // these could all be cached
522 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
523 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
524 // the format request - ignore for now, where does this request go to??
525 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
526 info_message.appendChild(format_request);
527
528 // the enrich_services request - only do this if provide_annotations is true
529
530 if (provide_annotations) {
531 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
532 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
533 info_message.appendChild(enrich_services_request);
534 }
535
536 Element info_response = (Element)this.mr.process(info_message);
537
538 // the collection is the first response
539 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
540 Element format_resp = (Element) responses.item(0);
541
542 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
543 if (format_elem != null) {
544 logger.debug("doc action found a format statement");
545 // set teh format type
546 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
547 page_response.appendChild(this.doc.importNode(format_elem, true));
548 }
549
550 if (provide_annotations) {
551 Element services_resp = (Element)responses.item(1);
552
553 // a new message for the mr
554 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
555
556 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
557 boolean service_found = false;
558 for (int j=0; j<e_services.getLength(); j++) {
559 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
560 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
561 enrich_message.appendChild(s);
562 service_found = true;
563 }
564 }
565 if (service_found) {
566 Element enrich_response = (Element)this.mr.process(enrich_message);
567
568 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
569 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
570 for (int i=0; i<e_responses.getLength(); i++) {
571 Element e_resp = (Element)e_responses.item(i);
572 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
573 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
574 service_list.appendChild(e_service);
575 }
576 page_response.appendChild(service_list);
577 }
578 } // if provide_annotations
579 return true;
580
581 }
582
583 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
584 */
585 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
586
587 // do the query again to get term info
588 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
589 HashMap params = GSXML.extractParams(cgi_param_list, false);
590
591 HashMap previous_params = (HashMap)params.get("p");
592 if (previous_params == null) {
593 return dc_response_doc_content;
594 }
595 String service_name = (String)previous_params.get(GSParams.SERVICE);
596 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
597 logger.debug("invalid service, not doing highlighting");
598 return dc_response_doc_content;
599 }
600 String collection = (String)params.get(GSParams.COLLECTION);
601 String lang = request.getAttribute(GSXML.LANG_ATT);
602 String uid = request.getAttribute(GSXML.USER_ID_ATT);
603 String to = GSPath.appendLink(collection, service_name);
604
605 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
606 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
607 mr_query_message.appendChild(mr_query_request);
608
609 // paramList
610 HashMap service_params = (HashMap)params.get("s1");
611
612 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
613 GSXML.addParametersToList(this.doc, query_param_list, service_params);
614 mr_query_request.appendChild(query_param_list);
615
616 // do the query
617 Element mr_query_response = (Element)this.mr.process(mr_query_message);
618
619 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
620 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
621 if (query_term_list_element == null) {
622 // no term info
623 logger.error("No query term information.\n");
624 return dc_response_doc_content;
625 }
626
627 String content = GSXML.getNodeText(dc_response_doc_content);
628
629 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
630 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
631
632 HashSet query_term_variants = new HashSet();
633 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
634 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
635 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
636 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
637 for (int j = 0; j < equivalent_terms.length; j++) {
638 query_term_variants.add(equivalent_terms[j]);
639 }
640 }
641
642 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
643
644 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
645 String performed_query = GSXML.getNodeText(query_element) + " ";
646
647 ArrayList phrase_query_p_term_variants_list = new ArrayList();
648 int term_start = 0;
649 boolean in_term = false;
650 boolean in_phrase = false;
651 for (int i = 0; i < performed_query.length(); i++) {
652 char character = performed_query.charAt(i);
653 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
654
655 // Has a query term just started?
656 if (in_term == false && is_character_letter_or_digit == true) {
657 in_term = true;
658 term_start = i;
659 }
660
661 // Or has a term just finished?
662 else if (in_term == true && is_character_letter_or_digit == false) {
663 in_term = false;
664 String term = performed_query.substring(term_start, i);
665
666 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
667 if (term_element != null) {
668
669 HashSet phrase_query_p_term_x_variants = new HashSet();
670
671 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
672 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
673 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
674 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
675 for (int k = 0; k < term_equivalent_terms.length; k++) {
676 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
677 }
678 }
679 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
680
681 if (in_phrase == false) {
682 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
683 phrase_query_p_term_variants_list = new ArrayList();
684 }
685 }
686 }
687 // Watch for phrases (surrounded by quotes)
688 if (character == '\"') {
689 // Has a phrase just started?
690 if (in_phrase == false) {
691 in_phrase = true;
692 }
693 // Or has a phrase just finished?
694 else if (in_phrase == true) {
695 in_phrase = false;
696 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
697 }
698
699 phrase_query_p_term_variants_list = new ArrayList();
700 }
701 }
702
703 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
704 }
705
706
707 /**
708 * Highlights query terms in a piece of text.
709 */
710 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
711 {
712 // Convert the content string to an array of characters for speed
713 char[] content_characters = new char[content.length()];
714 content.getChars(0, content.length(), content_characters, 0);
715
716 // Now skim through the content, identifying word matches
717 ArrayList word_matches = new ArrayList();
718 int word_start = 0;
719 boolean in_word = false;
720 boolean preceding_word_matched = false;
721 for (int i = 0; i < content_characters.length; i++) {
722 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
723
724 // Has a word just started?
725 if (in_word == false && is_character_letter_or_digit == true) {
726 in_word = true;
727 word_start = i;
728 }
729
730 // Or has a word just finished?
731 else if (in_word == true && is_character_letter_or_digit == false) {
732 in_word = false;
733
734 // Check if the word matches any of the query term equivalents
735 String word = new String(content_characters, word_start, (i - word_start));
736 if (query_term_variants.contains(word)) {
737 // We have found a matching word, so remember its location
738 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
739 preceding_word_matched = true;
740 }
741 else {
742 preceding_word_matched = false;
743 }
744 }
745 }
746
747 // Don't forget the last word...
748 if (in_word == true) {
749 // Check if the word matches any of the query term equivalents
750 String word = new String(content_characters, word_start, (content_characters.length - word_start));
751 if (query_term_variants.contains(word)) {
752 // We have found a matching word, so remember its location
753 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
754 }
755 }
756
757 ArrayList highlight_start_positions = new ArrayList();
758 ArrayList highlight_end_positions = new ArrayList();
759
760 // Deal with phrases now
761 ArrayList partial_phrase_matches = new ArrayList();
762 for (int i = 0; i < word_matches.size(); i++) {
763 WordMatch word_match = (WordMatch) word_matches.get(i);
764
765 // See if any partial phrase matches are extended by this word
766 if (word_match.preceding_word_matched) {
767 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
768 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
769 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
770 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
771 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
772 partial_phrase_match.num_words_matched++;
773
774 // Has a complete phrase match occurred?
775 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
776 // Check for overlaps by looking at the previous highlight range
777 if (!highlight_end_positions.isEmpty()) {
778 int last_highlight_index = highlight_end_positions.size() - 1;
779 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
780 if (last_highlight_end > partial_phrase_match.start_position) {
781 // There is an overlap, so remove the previous phrase match
782 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
783 highlight_end_positions.remove(last_highlight_index);
784 partial_phrase_match.start_position = last_highlight_start;
785 }
786 }
787
788 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
789 highlight_end_positions.add(new Integer(word_match.end_position));
790 }
791 // No, but add the partial match back into the list for next time
792 else {
793 partial_phrase_matches.add(partial_phrase_match);
794 }
795 }
796 }
797 }
798 else {
799 partial_phrase_matches.clear();
800 }
801
802 // See if this word is at the start of any of the phrases
803 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
804 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
805 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
806 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
807 // If this phrase is just one word long, we have a complete match
808 if (phrase_query_p_term_variants_list.size() == 1) {
809 highlight_start_positions.add(new Integer(word_match.start_position));
810 highlight_end_positions.add(new Integer(word_match.end_position));
811 }
812 // Otherwise we have the start of a potential phrase match
813 else {
814 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
815 }
816 }
817 }
818 }
819
820 // Now add the annotation tags into the document at the correct points
821 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
822
823 int last_wrote = 0;
824 for (int i = 0; i < highlight_start_positions.size(); i++) {
825 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
826 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
827
828 // Print anything before the highlight range
829 if (last_wrote < highlight_start) {
830 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
831 content_element.appendChild(this.doc.createTextNode(preceding_text));
832 }
833
834 // Print the highlight text, annotated
835 if (highlight_end > last_wrote) {
836 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
837 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
838 annotation_element.setAttribute("type", "query_term");
839 content_element.appendChild(annotation_element);
840 last_wrote = highlight_end;
841 }
842 }
843
844 // Finish off any unwritten text
845 if (last_wrote < content_characters.length) {
846 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
847 content_element.appendChild(this.doc.createTextNode(remaining_text));
848 }
849
850 return content_element;
851 }
852
853
854 static private class WordMatch
855 {
856 public String word;
857 public int start_position;
858 public int end_position;
859 public boolean preceding_word_matched;
860
861 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
862 {
863 this.word = word;
864 this.start_position = start_position;
865 this.end_position = end_position;
866 this.preceding_word_matched = preceding_word_matched;
867 }
868 }
869
870
871 static private class PartialPhraseMatch
872 {
873 public int start_position;
874 public int query_phrase_number;
875 public int num_words_matched;
876
877 public PartialPhraseMatch(int start_position, int query_phrase_number)
878 {
879 this.start_position = start_position;
880 this.query_phrase_number = query_phrase_number;
881 this.num_words_matched = 1;
882 }
883 }
884}
Note: See TracBrowser for help on using the repository browser.