source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 14525

Last change on this file since 14525 was 14525, checked in by qq6, 17 years ago

adding href and rl values into the document node, if they can be identified from the params list

  • Property svn:keywords set to Author Date Id Revision
File size: 36.5 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router
41 */
42public class DocumentAction extends Action {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /** if this is set to true, when a document is displayed, any annotation
52 * type services (enrich) will be offered to the user as well */
53 protected boolean provide_annotations = false;
54
55 protected boolean highlight_query_terms = false;
56
57 public boolean configure() {
58 super.configure();
59 String highlight = (String)config_params.get("highlightQueryTerms");
60 if (highlight != null && highlight.equals("true")) {
61 highlight_query_terms = true;
62 }
63 String annotate = (String)config_params.get("displayAnnotationService");
64 if (annotate != null && annotate.equals("true")) {
65 provide_annotations = true;
66 }
67 return true;
68 }
69 public Element process (Element message)
70 {
71 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73 // the response
74 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
75 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
76 result.appendChild(page_response);
77
78 // get the request - assume only one
79 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
80 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
81 HashMap params = GSXML.extractParams(cgi_paramList, false);
82
83 // just in case there are some that need to get passed to the services
84 HashMap service_params = (HashMap)params.get("s0");
85
86 String has_rl = null;
87 String has_href = null;
88 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
89 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
90 String collection = (String) params.get(GSParams.COLLECTION);
91 String lang = request.getAttribute(GSXML.LANG_ATT);
92 String uid = request.getAttribute(GSXML.USER_ID_ATT);
93 String document_name = (String) params.get(GSParams.DOCUMENT);
94 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) {
95 logger.error("no document specified!");
96 return result;
97 }
98 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
99 if (document_type == null) {
100 document_type = "simple";
101 }
102 //whether to retrieve siblings or not
103 boolean get_siblings = false;
104 String sibs = (String) params.get(SIBLING_ARG);
105 if (sibs != null && sibs.equals("1")) {
106 get_siblings = true;
107 }
108
109 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
110 if (sibling_num != null && !sibling_num.equals("")) {
111 // we have to modify the doc name
112 document_name = document_name+"."+sibling_num+".ss";
113 }
114
115 boolean expand_document = false;
116 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
117 if (ed_arg != null && ed_arg.equals("1")) {
118 expand_document = true;
119 }
120
121
122 boolean expand_contents = false;
123 if (expand_document) { // we always expand the contents with the text
124 expand_contents = true;
125 } else {
126 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
127 if (ec_arg != null && ec_arg.equals("1")) {
128 expand_contents = true;
129 }
130 }
131 // get the additional data needed for the page
132 getBackgroundData(page_response, collection, lang, uid);
133 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
134
135 // the_document is where all the doc info - structure and metadata etc
136 // is added into, to be returned in the page
137 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
138 page_response.appendChild(the_document);
139
140 // set the doctype from the cgi arg as an attribute
141 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
142
143 // create a basic doc list containing the current node
144 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
145 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
146 basic_doc_list.appendChild(current_doc);
147 if (document_name.length()!=0){
148 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
149 }else if (has_href.length()!=0){
150 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
151 current_doc.setAttribute("externalURL", has_rl);
152 }
153
154 // Create a parameter list to specify the required structure information
155 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
156
157 if (service_params != null) {
158 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
159 }
160
161 Element ds_param = null;
162 boolean get_structure = false;
163 boolean get_structure_info = false;
164 if (document_type.equals("paged")) {
165 get_structure_info = true;
166 // get teh info needed for paged naviagtion
167 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
168 ds_param_list.appendChild(ds_param);
169 ds_param.setAttribute(GSXML.NAME_ATT, "info");
170 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
171 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172 ds_param_list.appendChild(ds_param);
173 ds_param.setAttribute(GSXML.NAME_ATT, "info");
174 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
175 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
176 ds_param_list.appendChild(ds_param);
177 ds_param.setAttribute(GSXML.NAME_ATT, "info");
178 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
179
180 } else if (document_type.equals("hierarchy")){
181 get_structure = true;
182 if (expand_contents) {
183 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
184 ds_param_list.appendChild(ds_param);
185 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
186 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
187 } else {
188 // get the info needed for table of contents
189 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
190 ds_param_list.appendChild(ds_param);
191 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
192 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
193 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
194 ds_param_list.appendChild(ds_param);
195 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
196 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
197 if (get_siblings) {
198 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
199 ds_param_list.appendChild(ds_param);
200 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
201 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
202 }
203 }
204 } else {
205 // we dont need any structure
206 }
207
208 boolean has_dummy = false;
209 if (get_structure || get_structure_info) {
210
211 // Build a request to obtain the document structure
212 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
213 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
214 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
215 ds_message.appendChild(ds_request);
216 ds_request.appendChild(ds_param_list);
217
218 // create a doc_node_list and put in the doc_node that we are interested in
219 ds_request.appendChild(basic_doc_list);
220
221 // Process the document structure retrieve message
222 Element ds_response_message = (Element) this.mr.process(ds_message);
223 if (processErrorElements(ds_response_message, page_response)) {
224 return result;
225 }
226
227 // get the info and print out
228 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
229 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
230 path = GSPath.appendLink(path, "nodeStructureInfo");
231 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
232 // get the doc_node bit
233 if (ds_response_struct_info != null) {
234 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
235 }
236 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
237 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
238 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
239 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
240
241 if (ds_response_structure != null) {
242 // add the contents of the structure bit into the_document
243 NodeList structs = ds_response_structure.getChildNodes();
244 for (int i=0; i<structs.getLength();i++) {
245 the_document.appendChild(this.doc.importNode(structs.item(i), true));
246 }
247 } else {
248 // no structure nodes, so put in a dummy doc node
249 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
250 if (document_name.length()!=0){
251 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
252 }else if (has_href.length()!=0){
253 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
254 doc_node.setAttribute("externalURL", has_rl);
255 }
256 the_document.appendChild(doc_node);
257 has_dummy = true;
258 }
259 } else { // a simple type - we dont have a dummy node for simple
260 // should think about this more
261 // no structure request, so just put in a dummy doc node
262 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
263 if (document_name.length()!=0){
264 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
265 }else if (has_href.length()!=0){
266 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
267 doc_node.setAttribute("externalURL", has_rl);
268 }
269 the_document.appendChild(doc_node);
270 has_dummy = true;
271 }
272
273 // Build a request to obtain some document metadata
274 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
275 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
276 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
277 dm_message.appendChild(dm_request);
278 // Create a parameter list to specify the required metadata information
279
280 HashSet meta_names = new HashSet();
281 meta_names.add("Title"); // the default
282 if (format_elem != null) {
283 extractMetadataNames(format_elem, meta_names);
284 }
285
286 Element dm_param_list = createMetadataParamList(meta_names);
287 if (service_params != null) {
288 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
289 }
290
291 dm_request.appendChild(dm_param_list);
292
293
294 // create the doc node list for the metadata request
295 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
296 dm_request.appendChild(dm_doc_list);
297
298 // Add each node from the structure response into the metadata request
299 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
300 for (int i = 0; i < doc_nodes.getLength(); i++) {
301 Element doc_node = (Element) doc_nodes.item(i);
302 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
303
304 // Add the documentNode to the list
305 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
306 dm_doc_list.appendChild(dm_doc_node);
307 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
308 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
309 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
310 }
311
312 // we also want a metadata request to the top level document to get
313 // assocfilepath - this could be cached too
314 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
315 dm_message.appendChild(doc_meta_request);
316 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
317 if (service_params != null) {
318 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
319 }
320
321 doc_meta_request.appendChild(doc_meta_param_list);
322 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
323 doc_meta_param_list.appendChild(doc_param);
324 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
325 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
326
327 // create the doc node list for the metadata request
328 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
329 doc_meta_request.appendChild(doc_list);
330
331 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
332 // the node we want is the root document node
333 if (document_name.length()!=0){
334 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
335 }else if (has_href.length()!=0){
336 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
337 doc_node.setAttribute("externalURL", has_rl);
338 }
339 doc_list.appendChild(doc_node);
340 Element dm_response_message = (Element) this.mr.process(dm_message);
341 if (processErrorElements(dm_response_message, page_response)) {
342 return result;
343 }
344
345 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
346 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
347
348 // Merge the metadata with the structure information
349 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
350 for (int i = 0; i < doc_nodes.getLength(); i++) {
351 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
352 }
353 // get teh top level doc metadata out
354 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
355 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
356 if (doc_meta_list != null) {
357 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
358 }
359 // Build a request to obtain some document content
360 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
361 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
362 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
363 dc_message.appendChild(dc_request);
364
365
366 // Create a parameter list to specify the request parameters - empty for now
367 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
368 if (service_params != null) {
369 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
370 }
371
372 dc_request.appendChild(dc_param_list);
373
374 // get the content
375 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
376 if (expand_document) {
377 dc_request.appendChild(dm_doc_list);
378 } else {
379 dc_request.appendChild(basic_doc_list);
380 }
381 logger.debug("request = "+converter.getString(dc_message));
382 Element dc_response_message = (Element) this.mr.process(dc_message);
383 if (processErrorElements(dc_response_message, page_response)) {
384 return result;
385 }
386
387 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
388
389 if (expand_document) {
390 // Merge the content with the structure information
391 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
392 for (int i = 0; i < doc_nodes.getLength(); i++) {
393 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
394 if (content != null) {
395 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
396 }
397 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
398 }
399 } else {
400 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
401 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
402 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
403 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
404
405 if (dc_response_doc_content == null) {
406 // no content to add
407 if (dc_response_doc_external !=null){
408 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
409
410 the_document.setAttribute("selectedNode", modified_doc_id);
411 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
412 }
413 return result;
414 }
415 if (highlight_query_terms) {
416 dc_response_doc.removeChild(dc_response_doc_content);
417
418 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
419 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
420 }
421
422
423 if (provide_annotations) {
424 String service_selected = (String)params.get(ENRICH_DOC_ARG);
425 if (service_selected != null && service_selected.equals("1")) {
426 // now we can modifiy the response doc if needed
427 String enrich_service = (String)params.get(GSParams.SERVICE);
428 // send a message to the service
429 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
430 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
431 enrich_message.appendChild(enrich_request);
432 // check for parameters
433 HashMap e_service_params = (HashMap)params.get("s1");
434 if (e_service_params != null) {
435 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
436 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
437 enrich_request.appendChild(enrich_pl);
438 }
439 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
440 enrich_request.appendChild(e_doc_list);
441 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
442
443 Element enrich_response = this.mr.process(enrich_message);
444
445 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
446 path = GSPath.createPath(links);
447 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
448
449 }
450 } // if provide_annotations
451
452
453 // use the returned id rather than the sent one cos there may have
454 // been modifiers such as .pr that are removed.
455 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
456 the_document.setAttribute("selectedNode", modified_doc_id);
457 if (has_dummy) {
458 // change the id if necessary and add the content
459 Element dummy_node = (Element)doc_nodes.item(0);
460
461 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
462 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
463 // hack for simple type
464 if (document_type.equals("simple")) {
465 // we dont want the internal docNode, just want the content and metadata in the document
466 // rethink this!!
467 the_document.removeChild(dummy_node);
468
469 NodeList dummy_children = dummy_node.getChildNodes();
470 //for (int i=0; i<dummy_children.getLength(); i++) {
471 for (int i=dummy_children.getLength()-1; i>=0; i--) {
472 the_document.appendChild(dummy_children.item(i));
473
474 }
475 }
476 } else {
477 // Merge the document content with the metadata and structure information
478 for (int i = 0; i < doc_nodes.getLength(); i++) {
479 Node dn = doc_nodes.item(i);
480 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
481 if (dn_id.equals(modified_doc_id)) {
482 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
483 break;
484 }
485 }
486 }
487 }
488 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
489 return result;
490 }
491
492 /** tell the param class what its arguments are
493 * if an action has its own arguments, this should add them to the params
494 * object - particularly important for args that should not be saved */
495 public boolean getActionParameters(GSParams params) {
496 params.addParameter(GOTO_PAGE_ARG, false);
497 params.addParameter(ENRICH_DOC_ARG, false);
498 return true;
499 }
500
501
502 /** this method gets the collection description, the format info, the
503 * list of enrich services, etc - stuff that is needed for the page,
504 * but is the same whatever the query is - should be cached */
505 protected boolean getBackgroundData(Element page_response,
506 String collection, String lang,
507 String uid) {
508
509 // create a message to process - contains requests for the collection
510 // description, the format element, the enrich services on offer
511 // these could all be cached
512 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
513 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
514 // the format request - ignore for now, where does this request go to??
515 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
516 info_message.appendChild(format_request);
517
518 // the enrich_services request - only do this if provide_annotations is true
519
520 if (provide_annotations) {
521 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
522 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
523 info_message.appendChild(enrich_services_request);
524 }
525
526 Element info_response = (Element)this.mr.process(info_message);
527
528 // the collection is the first response
529 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
530 Element format_resp = (Element) responses.item(0);
531
532 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
533 if (format_elem != null) {
534 logger.debug("doc action found a format statement");
535 // set teh format type
536 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
537 page_response.appendChild(this.doc.importNode(format_elem, true));
538 }
539
540 if (provide_annotations) {
541 Element services_resp = (Element)responses.item(1);
542
543 // a new message for the mr
544 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
545
546 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
547 boolean service_found = false;
548 for (int j=0; j<e_services.getLength(); j++) {
549 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
550 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
551 enrich_message.appendChild(s);
552 service_found = true;
553 }
554 }
555 if (service_found) {
556 Element enrich_response = this.mr.process(enrich_message);
557
558 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
559 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
560 for (int i=0; i<e_responses.getLength(); i++) {
561 Element e_resp = (Element)e_responses.item(i);
562 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
563 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
564 service_list.appendChild(e_service);
565 }
566 page_response.appendChild(service_list);
567 }
568 } // if provide_annotations
569 return true;
570
571 }
572
573 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
574 */
575 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
576
577 // do the query again to get term info
578 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
579 HashMap params = GSXML.extractParams(cgi_param_list, false);
580
581 HashMap previous_params = (HashMap)params.get("p");
582 if (previous_params == null) {
583 return dc_response_doc_content;
584 }
585 String service_name = (String)previous_params.get(GSParams.SERVICE);
586 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
587 logger.error("invalid service, not doing highlighting");
588 return dc_response_doc_content;
589 }
590 String collection = (String)params.get(GSParams.COLLECTION);
591 String lang = request.getAttribute(GSXML.LANG_ATT);
592 String uid = request.getAttribute(GSXML.USER_ID_ATT);
593 String to = GSPath.appendLink(collection, service_name);
594
595 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
596 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
597 mr_query_message.appendChild(mr_query_request);
598
599 // paramList
600 HashMap service_params = (HashMap)params.get("s1");
601
602 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
603 GSXML.addParametersToList(this.doc, query_param_list, service_params);
604 mr_query_request.appendChild(query_param_list);
605
606 // do the query
607 Element mr_query_response = (Element)this.mr.process(mr_query_message);
608
609 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
610 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
611 if (query_term_list_element == null) {
612 // no term info
613 logger.error("No query term information.\n");
614 return dc_response_doc_content;
615 }
616
617 String content = GSXML.getNodeText(dc_response_doc_content);
618
619 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
620 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
621
622 HashSet query_term_variants = new HashSet();
623 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
624 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
625 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
626 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
627 for (int j = 0; j < equivalent_terms.length; j++) {
628 query_term_variants.add(equivalent_terms[j]);
629 }
630 }
631
632 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
633
634 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
635 String performed_query = GSXML.getNodeText(query_element) + " ";
636
637 ArrayList phrase_query_p_term_variants_list = new ArrayList();
638 int term_start = 0;
639 boolean in_term = false;
640 boolean in_phrase = false;
641 for (int i = 0; i < performed_query.length(); i++) {
642 char character = performed_query.charAt(i);
643 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
644
645 // Has a query term just started?
646 if (in_term == false && is_character_letter_or_digit == true) {
647 in_term = true;
648 term_start = i;
649 }
650
651 // Or has a term just finished?
652 else if (in_term == true && is_character_letter_or_digit == false) {
653 in_term = false;
654 String term = performed_query.substring(term_start, i);
655
656 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
657 if (term_element != null) {
658
659 HashSet phrase_query_p_term_x_variants = new HashSet();
660
661 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
662 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
663 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
664 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
665 for (int k = 0; k < term_equivalent_terms.length; k++) {
666 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
667 }
668 }
669 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
670
671 if (in_phrase == false) {
672 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
673 phrase_query_p_term_variants_list = new ArrayList();
674 }
675 }
676 }
677 // Watch for phrases (surrounded by quotes)
678 if (character == '\"') {
679 // Has a phrase just started?
680 if (in_phrase == false) {
681 in_phrase = true;
682 }
683 // Or has a phrase just finished?
684 else if (in_phrase == true) {
685 in_phrase = false;
686 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
687 }
688
689 phrase_query_p_term_variants_list = new ArrayList();
690 }
691 }
692
693 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
694 }
695
696
697 /**
698 * Highlights query terms in a piece of text.
699 */
700 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
701 {
702 // Convert the content string to an array of characters for speed
703 char[] content_characters = new char[content.length()];
704 content.getChars(0, content.length(), content_characters, 0);
705
706 // Now skim through the content, identifying word matches
707 ArrayList word_matches = new ArrayList();
708 int word_start = 0;
709 boolean in_word = false;
710 boolean preceding_word_matched = false;
711 for (int i = 0; i < content_characters.length; i++) {
712 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
713
714 // Has a word just started?
715 if (in_word == false && is_character_letter_or_digit == true) {
716 in_word = true;
717 word_start = i;
718 }
719
720 // Or has a word just finished?
721 else if (in_word == true && is_character_letter_or_digit == false) {
722 in_word = false;
723
724 // Check if the word matches any of the query term equivalents
725 String word = new String(content_characters, word_start, (i - word_start));
726 if (query_term_variants.contains(word)) {
727 // We have found a matching word, so remember its location
728 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
729 preceding_word_matched = true;
730 }
731 else {
732 preceding_word_matched = false;
733 }
734 }
735 }
736
737 // Don't forget the last word...
738 if (in_word == true) {
739 // Check if the word matches any of the query term equivalents
740 String word = new String(content_characters, word_start, (content_characters.length - word_start));
741 if (query_term_variants.contains(word)) {
742 // We have found a matching word, so remember its location
743 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
744 }
745 }
746
747 ArrayList highlight_start_positions = new ArrayList();
748 ArrayList highlight_end_positions = new ArrayList();
749
750 // Deal with phrases now
751 ArrayList partial_phrase_matches = new ArrayList();
752 for (int i = 0; i < word_matches.size(); i++) {
753 WordMatch word_match = (WordMatch) word_matches.get(i);
754
755 // See if any partial phrase matches are extended by this word
756 if (word_match.preceding_word_matched) {
757 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
758 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
759 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
760 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
761 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
762 partial_phrase_match.num_words_matched++;
763
764 // Has a complete phrase match occurred?
765 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
766 // Check for overlaps by looking at the previous highlight range
767 if (!highlight_end_positions.isEmpty()) {
768 int last_highlight_index = highlight_end_positions.size() - 1;
769 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
770 if (last_highlight_end > partial_phrase_match.start_position) {
771 // There is an overlap, so remove the previous phrase match
772 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
773 highlight_end_positions.remove(last_highlight_index);
774 partial_phrase_match.start_position = last_highlight_start;
775 }
776 }
777
778 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
779 highlight_end_positions.add(new Integer(word_match.end_position));
780 }
781 // No, but add the partial match back into the list for next time
782 else {
783 partial_phrase_matches.add(partial_phrase_match);
784 }
785 }
786 }
787 }
788 else {
789 partial_phrase_matches.clear();
790 }
791
792 // See if this word is at the start of any of the phrases
793 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
794 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
795 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
796 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
797 // If this phrase is just one word long, we have a complete match
798 if (phrase_query_p_term_variants_list.size() == 1) {
799 highlight_start_positions.add(new Integer(word_match.start_position));
800 highlight_end_positions.add(new Integer(word_match.end_position));
801 }
802 // Otherwise we have the start of a potential phrase match
803 else {
804 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
805 }
806 }
807 }
808 }
809
810 // Now add the annotation tags into the document at the correct points
811 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
812
813 int last_wrote = 0;
814 for (int i = 0; i < highlight_start_positions.size(); i++) {
815 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
816 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
817
818 // Print anything before the highlight range
819 if (last_wrote < highlight_start) {
820 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
821 // System.err.print(preceding_text);
822 content_element.appendChild(this.doc.createTextNode(preceding_text));
823 }
824
825 // Print the highlight text, annotated
826 if (highlight_end > last_wrote) {
827 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
828 // System.err.print("|" + highlight_text + "|");
829 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
830 annotation_element.setAttribute("type", "query_term");
831 content_element.appendChild(annotation_element);
832 last_wrote = highlight_end;
833 }
834 }
835
836 // Finish off any unwritten text
837 if (last_wrote < content_characters.length) {
838 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
839 // System.err.print(remaining_text);
840 content_element.appendChild(this.doc.createTextNode(remaining_text));
841 }
842
843 return content_element;
844 }
845
846
847 static private class WordMatch
848 {
849 public String word;
850 public int start_position;
851 public int end_position;
852 public boolean preceding_word_matched;
853
854 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
855 {
856 this.word = word;
857 this.start_position = start_position;
858 this.end_position = end_position;
859 this.preceding_word_matched = preceding_word_matched;
860 }
861 }
862
863
864 static private class PartialPhraseMatch
865 {
866 public int start_position;
867 public int query_phrase_number;
868 public int num_words_matched;
869
870 public PartialPhraseMatch(int start_position, int query_phrase_number)
871 {
872 this.start_position = start_position;
873 this.query_phrase_number = query_phrase_number;
874 this.num_words_matched = 1;
875 }
876 }
877}
Note: See TracBrowser for help on using the repository browser.