source: greenstone3/trunk/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 20292

Last change on this file since 20292 was 20292, checked in by kjdon, 15 years ago

removed some System.err debug messages, which don't look like they are needed.

  • Property svn:keywords set to Author Date Id Revision
File size: 36.5 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router
41 */
42public class DocumentAction extends Action {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /** if this is set to true, when a document is displayed, any annotation
52 * type services (enrich) will be offered to the user as well */
53 protected boolean provide_annotations = false;
54
55 protected boolean highlight_query_terms = false;
56
57 public boolean configure() {
58 super.configure();
59 String highlight = (String)config_params.get("highlightQueryTerms");
60 if (highlight != null && highlight.equals("true")) {
61 highlight_query_terms = true;
62 }
63 String annotate = (String)config_params.get("displayAnnotationService");
64 if (annotate != null && annotate.equals("true")) {
65 provide_annotations = true;
66 }
67 return true;
68 }
69 public Node process (Node message_node)
70 {
71 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73 Element message = this.converter.nodeToElement(message_node);
74
75 // the response
76 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
77 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
78 result.appendChild(page_response);
79
80 // get the request - assume only one
81 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
82 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
83 HashMap params = GSXML.extractParams(cgi_paramList, false);
84
85 // just in case there are some that need to get passed to the services
86 HashMap service_params = (HashMap)params.get("s0");
87
88
89 String has_rl = null;
90 String has_href = null;
91 has_href = (String) params.get("href");//for an external link : get the href URL if it is existing in the params list
92 has_rl = (String) params.get("rl");//for an external link : get the rl value if it is existing in the params list
93 String collection = (String) params.get(GSParams.COLLECTION);
94 String lang = request.getAttribute(GSXML.LANG_ATT);
95 String uid = request.getAttribute(GSXML.USER_ID_ATT);
96 String document_name = (String) params.get(GSParams.DOCUMENT);
97 if ((document_name == null || document_name.equals("")) && (has_href == null || has_href.equals(""))) {
98 logger.error("no document specified!");
99 return result;
100 }
101 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
102 if (document_type == null) {
103 document_type = "simple";
104 }
105 //whether to retrieve siblings or not
106 boolean get_siblings = false;
107 String sibs = (String) params.get(SIBLING_ARG);
108 if (sibs != null && sibs.equals("1")) {
109 get_siblings = true;
110 }
111
112 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
113 if (sibling_num != null && !sibling_num.equals("")) {
114 // we have to modify the doc name
115 document_name = document_name+"."+sibling_num+".ss";
116 }
117
118 boolean expand_document = false;
119 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
120 if (ed_arg != null && ed_arg.equals("1")) {
121 expand_document = true;
122 }
123
124
125 boolean expand_contents = false;
126 if (expand_document) { // we always expand the contents with the text
127 expand_contents = true;
128 } else {
129 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
130 if (ec_arg != null && ec_arg.equals("1")) {
131 expand_contents = true;
132 }
133 }
134
135 //append site metadata
136 addSiteMetadata( page_response, lang, uid);
137
138 // get the additional data needed for the page
139 getBackgroundData(page_response, collection, lang, uid);
140 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
141
142 // the_document is where all the doc info - structure and metadata etc
143 // is added into, to be returned in the page
144 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
145 page_response.appendChild(the_document);
146
147 // set the doctype from the cgi arg as an attribute
148 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
149
150 // create a basic doc list containing the current node
151 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
152 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
153 basic_doc_list.appendChild(current_doc);
154 if (document_name.length()!=0){
155 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
156 }else if (has_href.length()!=0){
157 current_doc.setAttribute(GSXML.NODE_ID_ATT, has_href);
158 current_doc.setAttribute("externalURL", has_rl);
159 }
160
161 // Create a parameter list to specify the required structure information
162 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
163
164 if (service_params != null) {
165 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
166 }
167
168 Element ds_param = null;
169 boolean get_structure = false;
170 boolean get_structure_info = false;
171 if (document_type.equals("paged")) {
172 get_structure_info = true;
173 // get teh info needed for paged naviagtion
174 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175 ds_param_list.appendChild(ds_param);
176 ds_param.setAttribute(GSXML.NAME_ATT, "info");
177 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
178 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
179 ds_param_list.appendChild(ds_param);
180 ds_param.setAttribute(GSXML.NAME_ATT, "info");
181 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
182 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
183 ds_param_list.appendChild(ds_param);
184 ds_param.setAttribute(GSXML.NAME_ATT, "info");
185 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
186
187 } else if (document_type.equals("hierarchy")){
188 get_structure = true;
189 if (expand_contents) {
190 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
191 ds_param_list.appendChild(ds_param);
192 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
193 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
194 } else {
195 // get the info needed for table of contents
196 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
197 ds_param_list.appendChild(ds_param);
198 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
199 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
200 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
201 ds_param_list.appendChild(ds_param);
202 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
203 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
204 if (get_siblings) {
205 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
206 ds_param_list.appendChild(ds_param);
207 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
208 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
209 }
210 }
211 } else {
212 // we dont need any structure
213 }
214
215 boolean has_dummy = false;
216 if (get_structure || get_structure_info) {
217
218 // Build a request to obtain the document structure
219 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
220 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
221 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
222 ds_message.appendChild(ds_request);
223 ds_request.appendChild(ds_param_list);
224
225 // create a doc_node_list and put in the doc_node that we are interested in
226 ds_request.appendChild(basic_doc_list);
227
228 // Process the document structure retrieve message
229 Element ds_response_message = (Element) this.mr.process(ds_message);
230 if (processErrorElements(ds_response_message, page_response)) {
231 return result;
232 }
233
234 // get the info and print out
235 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
236 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
237 path = GSPath.appendLink(path, "nodeStructureInfo");
238 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
239 // get the doc_node bit
240 if (ds_response_struct_info != null) {
241 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
242 }
243 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
244 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
245 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
246 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
247
248 if (ds_response_structure != null) {
249 // add the contents of the structure bit into the_document
250 NodeList structs = ds_response_structure.getChildNodes();
251 for (int i=0; i<structs.getLength();i++) {
252 the_document.appendChild(this.doc.importNode(structs.item(i), true));
253 }
254 } else {
255 // no structure nodes, so put in a dummy doc node
256 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
257 if (document_name.length()!=0){
258 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
259 }else if (has_href.length()!=0){
260 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
261 doc_node.setAttribute("externalURL", has_rl);
262 }
263 the_document.appendChild(doc_node);
264 has_dummy = true;
265 }
266 } else { // a simple type - we dont have a dummy node for simple
267 // should think about this more
268 // no structure request, so just put in a dummy doc node
269 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
270 if (document_name.length()!=0){
271 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
272 }else if (has_href.length()!=0){
273 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href);
274 doc_node.setAttribute("externalURL", has_rl);
275 }
276 the_document.appendChild(doc_node);
277 has_dummy = true;
278 }
279
280 // Build a request to obtain some document metadata
281 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
282 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
283 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
284 dm_message.appendChild(dm_request);
285 // Create a parameter list to specify the required metadata information
286
287 HashSet meta_names = new HashSet();
288 meta_names.add("Title"); // the default
289 if (format_elem != null) {
290 extractMetadataNames(format_elem, meta_names);
291 }
292
293 Element dm_param_list = createMetadataParamList(meta_names);
294 if (service_params != null) {
295 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
296 }
297
298 dm_request.appendChild(dm_param_list);
299
300
301 // create the doc node list for the metadata request
302 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
303 dm_request.appendChild(dm_doc_list);
304
305 // Add each node from the structure response into the metadata request
306 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
307 for (int i = 0; i < doc_nodes.getLength(); i++) {
308 Element doc_node = (Element) doc_nodes.item(i);
309 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
310
311 // Add the documentNode to the list
312 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313 dm_doc_list.appendChild(dm_doc_node);
314 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
315 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
316 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
317 }
318
319 // we also want a metadata request to the top level document to get
320 // assocfilepath - this could be cached too
321 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
322 dm_message.appendChild(doc_meta_request);
323 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
324 if (service_params != null) {
325 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
326 }
327
328 doc_meta_request.appendChild(doc_meta_param_list);
329 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
330 doc_meta_param_list.appendChild(doc_param);
331 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
332 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
333
334 // create the doc node list for the metadata request
335 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
336 doc_meta_request.appendChild(doc_list);
337
338 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
339 // the node we want is the root document node
340 if (document_name.length()!=0){
341 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
342 }else if (has_href.length()!=0){
343 doc_node.setAttribute(GSXML.NODE_ID_ATT, has_href+".rt");
344 doc_node.setAttribute("externalURL", has_rl);
345 }
346 doc_list.appendChild(doc_node);
347 Element dm_response_message = (Element) this.mr.process(dm_message);
348 if (processErrorElements(dm_response_message, page_response)) {
349 return result;
350 }
351
352 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
353 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
354
355 // Merge the metadata with the structure information
356 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
357 for (int i = 0; i < doc_nodes.getLength(); i++) {
358 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
359 }
360 // get teh top level doc metadata out
361 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
362 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
363 if (doc_meta_list != null) {
364 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
365 }
366 // Build a request to obtain some document content
367 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
368 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
369 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
370 dc_message.appendChild(dc_request);
371
372
373 // Create a parameter list to specify the request parameters - empty for now
374 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
375 if (service_params != null) {
376 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
377 }
378
379 dc_request.appendChild(dc_param_list);
380
381 // get the content
382 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
383 if (expand_document) {
384 dc_request.appendChild(dm_doc_list);
385 } else {
386 dc_request.appendChild(basic_doc_list);
387 }
388 logger.debug("request = "+converter.getString(dc_message));
389 Element dc_response_message = (Element) this.mr.process(dc_message);
390 if (processErrorElements(dc_response_message, page_response)) {
391 return result;
392 }
393
394 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
395
396 if (expand_document) {
397 // Merge the content with the structure information
398 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
399 for (int i = 0; i < doc_nodes.getLength(); i++) {
400 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
401 if (content != null) {
402 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
403 }
404 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
405 }
406 } else {
407 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
408 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
409 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
410 Element dc_response_doc_external = (Element) GSXML.getChildByTagName(dc_response_doc, "external");
411
412 if (dc_response_doc_content == null) {
413 // no content to add
414 if (dc_response_doc_external !=null){
415 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
416
417 the_document.setAttribute("selectedNode", modified_doc_id);
418 the_document.setAttribute("external", dc_response_doc_external.getAttribute("external_link"));
419 }
420 return result;
421 }
422 if (highlight_query_terms) {
423 dc_response_doc.removeChild(dc_response_doc_content);
424
425 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
426 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
427 }
428
429
430 if (provide_annotations) {
431 String service_selected = (String)params.get(ENRICH_DOC_ARG);
432 if (service_selected != null && service_selected.equals("1")) {
433 // now we can modifiy the response doc if needed
434 String enrich_service = (String)params.get(GSParams.SERVICE);
435 // send a message to the service
436 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
437 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
438 enrich_message.appendChild(enrich_request);
439 // check for parameters
440 HashMap e_service_params = (HashMap)params.get("s1");
441 if (e_service_params != null) {
442 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
443 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
444 enrich_request.appendChild(enrich_pl);
445 }
446 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
447 enrich_request.appendChild(e_doc_list);
448 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
449
450 Node enrich_response = this.mr.process(enrich_message);
451
452 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
453 path = GSPath.createPath(links);
454 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
455
456 }
457 } // if provide_annotations
458
459
460 // use the returned id rather than the sent one cos there may have
461 // been modifiers such as .pr that are removed.
462 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
463 the_document.setAttribute("selectedNode", modified_doc_id);
464 if (has_dummy) {
465 // change the id if necessary and add the content
466 Element dummy_node = (Element)doc_nodes.item(0);
467
468 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
469 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
470 // hack for simple type
471 if (document_type.equals("simple")) {
472 // we dont want the internal docNode, just want the content and metadata in the document
473 // rethink this!!
474 the_document.removeChild(dummy_node);
475
476 NodeList dummy_children = dummy_node.getChildNodes();
477 //for (int i=0; i<dummy_children.getLength(); i++) {
478 for (int i=dummy_children.getLength()-1; i>=0; i--) {
479 the_document.appendChild(dummy_children.item(i));
480
481 }
482 }
483 } else {
484 // Merge the document content with the metadata and structure information
485 for (int i = 0; i < doc_nodes.getLength(); i++) {
486 Node dn = doc_nodes.item(i);
487 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
488 if (dn_id.equals(modified_doc_id)) {
489 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
490 break;
491 }
492 }
493 }
494 }
495 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
496 return result;
497 }
498
499 /** tell the param class what its arguments are
500 * if an action has its own arguments, this should add them to the params
501 * object - particularly important for args that should not be saved */
502 public boolean getActionParameters(GSParams params) {
503 params.addParameter(GOTO_PAGE_ARG, false);
504 params.addParameter(ENRICH_DOC_ARG, false);
505 return true;
506 }
507
508
509 /** this method gets the collection description, the format info, the
510 * list of enrich services, etc - stuff that is needed for the page,
511 * but is the same whatever the query is - should be cached */
512 protected boolean getBackgroundData(Element page_response,
513 String collection, String lang,
514 String uid) {
515
516 // create a message to process - contains requests for the collection
517 // description, the format element, the enrich services on offer
518 // these could all be cached
519 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
520 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
521 // the format request - ignore for now, where does this request go to??
522 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
523 info_message.appendChild(format_request);
524
525 // the enrich_services request - only do this if provide_annotations is true
526
527 if (provide_annotations) {
528 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
529 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
530 info_message.appendChild(enrich_services_request);
531 }
532
533 Element info_response = (Element)this.mr.process(info_message);
534
535 // the collection is the first response
536 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
537 Element format_resp = (Element) responses.item(0);
538
539 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
540 if (format_elem != null) {
541 logger.debug("doc action found a format statement");
542 // set teh format type
543 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
544 page_response.appendChild(this.doc.importNode(format_elem, true));
545 }
546
547 if (provide_annotations) {
548 Element services_resp = (Element)responses.item(1);
549
550 // a new message for the mr
551 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
552
553 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
554 boolean service_found = false;
555 for (int j=0; j<e_services.getLength(); j++) {
556 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
557 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
558 enrich_message.appendChild(s);
559 service_found = true;
560 }
561 }
562 if (service_found) {
563 Element enrich_response = (Element)this.mr.process(enrich_message);
564
565 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
566 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
567 for (int i=0; i<e_responses.getLength(); i++) {
568 Element e_resp = (Element)e_responses.item(i);
569 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
570 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
571 service_list.appendChild(e_service);
572 }
573 page_response.appendChild(service_list);
574 }
575 } // if provide_annotations
576 return true;
577
578 }
579
580 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
581 */
582 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
583
584 // do the query again to get term info
585 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
586 HashMap params = GSXML.extractParams(cgi_param_list, false);
587
588 HashMap previous_params = (HashMap)params.get("p");
589 if (previous_params == null) {
590 return dc_response_doc_content;
591 }
592 String service_name = (String)previous_params.get(GSParams.SERVICE);
593 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
594 logger.debug("invalid service, not doing highlighting");
595 return dc_response_doc_content;
596 }
597 String collection = (String)params.get(GSParams.COLLECTION);
598 String lang = request.getAttribute(GSXML.LANG_ATT);
599 String uid = request.getAttribute(GSXML.USER_ID_ATT);
600 String to = GSPath.appendLink(collection, service_name);
601
602 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
603 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
604 mr_query_message.appendChild(mr_query_request);
605
606 // paramList
607 HashMap service_params = (HashMap)params.get("s1");
608
609 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
610 GSXML.addParametersToList(this.doc, query_param_list, service_params);
611 mr_query_request.appendChild(query_param_list);
612
613 // do the query
614 Element mr_query_response = (Element)this.mr.process(mr_query_message);
615
616 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
617 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
618 if (query_term_list_element == null) {
619 // no term info
620 logger.error("No query term information.\n");
621 return dc_response_doc_content;
622 }
623
624 String content = GSXML.getNodeText(dc_response_doc_content);
625
626 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
627 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
628
629 HashSet query_term_variants = new HashSet();
630 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
631 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
632 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
633 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
634 for (int j = 0; j < equivalent_terms.length; j++) {
635 query_term_variants.add(equivalent_terms[j]);
636 }
637 }
638
639 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
640
641 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
642 String performed_query = GSXML.getNodeText(query_element) + " ";
643
644 ArrayList phrase_query_p_term_variants_list = new ArrayList();
645 int term_start = 0;
646 boolean in_term = false;
647 boolean in_phrase = false;
648 for (int i = 0; i < performed_query.length(); i++) {
649 char character = performed_query.charAt(i);
650 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
651
652 // Has a query term just started?
653 if (in_term == false && is_character_letter_or_digit == true) {
654 in_term = true;
655 term_start = i;
656 }
657
658 // Or has a term just finished?
659 else if (in_term == true && is_character_letter_or_digit == false) {
660 in_term = false;
661 String term = performed_query.substring(term_start, i);
662
663 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
664 if (term_element != null) {
665
666 HashSet phrase_query_p_term_x_variants = new HashSet();
667
668 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
669 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
670 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
671 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
672 for (int k = 0; k < term_equivalent_terms.length; k++) {
673 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
674 }
675 }
676 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
677
678 if (in_phrase == false) {
679 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
680 phrase_query_p_term_variants_list = new ArrayList();
681 }
682 }
683 }
684 // Watch for phrases (surrounded by quotes)
685 if (character == '\"') {
686 // Has a phrase just started?
687 if (in_phrase == false) {
688 in_phrase = true;
689 }
690 // Or has a phrase just finished?
691 else if (in_phrase == true) {
692 in_phrase = false;
693 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
694 }
695
696 phrase_query_p_term_variants_list = new ArrayList();
697 }
698 }
699
700 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
701 }
702
703
704 /**
705 * Highlights query terms in a piece of text.
706 */
707 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
708 {
709 // Convert the content string to an array of characters for speed
710 char[] content_characters = new char[content.length()];
711 content.getChars(0, content.length(), content_characters, 0);
712
713 // Now skim through the content, identifying word matches
714 ArrayList word_matches = new ArrayList();
715 int word_start = 0;
716 boolean in_word = false;
717 boolean preceding_word_matched = false;
718 for (int i = 0; i < content_characters.length; i++) {
719 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
720
721 // Has a word just started?
722 if (in_word == false && is_character_letter_or_digit == true) {
723 in_word = true;
724 word_start = i;
725 }
726
727 // Or has a word just finished?
728 else if (in_word == true && is_character_letter_or_digit == false) {
729 in_word = false;
730
731 // Check if the word matches any of the query term equivalents
732 String word = new String(content_characters, word_start, (i - word_start));
733 if (query_term_variants.contains(word)) {
734 // We have found a matching word, so remember its location
735 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
736 preceding_word_matched = true;
737 }
738 else {
739 preceding_word_matched = false;
740 }
741 }
742 }
743
744 // Don't forget the last word...
745 if (in_word == true) {
746 // Check if the word matches any of the query term equivalents
747 String word = new String(content_characters, word_start, (content_characters.length - word_start));
748 if (query_term_variants.contains(word)) {
749 // We have found a matching word, so remember its location
750 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
751 }
752 }
753
754 ArrayList highlight_start_positions = new ArrayList();
755 ArrayList highlight_end_positions = new ArrayList();
756
757 // Deal with phrases now
758 ArrayList partial_phrase_matches = new ArrayList();
759 for (int i = 0; i < word_matches.size(); i++) {
760 WordMatch word_match = (WordMatch) word_matches.get(i);
761
762 // See if any partial phrase matches are extended by this word
763 if (word_match.preceding_word_matched) {
764 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
765 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
766 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
767 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
768 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
769 partial_phrase_match.num_words_matched++;
770
771 // Has a complete phrase match occurred?
772 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
773 // Check for overlaps by looking at the previous highlight range
774 if (!highlight_end_positions.isEmpty()) {
775 int last_highlight_index = highlight_end_positions.size() - 1;
776 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
777 if (last_highlight_end > partial_phrase_match.start_position) {
778 // There is an overlap, so remove the previous phrase match
779 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
780 highlight_end_positions.remove(last_highlight_index);
781 partial_phrase_match.start_position = last_highlight_start;
782 }
783 }
784
785 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
786 highlight_end_positions.add(new Integer(word_match.end_position));
787 }
788 // No, but add the partial match back into the list for next time
789 else {
790 partial_phrase_matches.add(partial_phrase_match);
791 }
792 }
793 }
794 }
795 else {
796 partial_phrase_matches.clear();
797 }
798
799 // See if this word is at the start of any of the phrases
800 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
801 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
802 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
803 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
804 // If this phrase is just one word long, we have a complete match
805 if (phrase_query_p_term_variants_list.size() == 1) {
806 highlight_start_positions.add(new Integer(word_match.start_position));
807 highlight_end_positions.add(new Integer(word_match.end_position));
808 }
809 // Otherwise we have the start of a potential phrase match
810 else {
811 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
812 }
813 }
814 }
815 }
816
817 // Now add the annotation tags into the document at the correct points
818 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
819
820 int last_wrote = 0;
821 for (int i = 0; i < highlight_start_positions.size(); i++) {
822 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
823 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
824
825 // Print anything before the highlight range
826 if (last_wrote < highlight_start) {
827 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
828 content_element.appendChild(this.doc.createTextNode(preceding_text));
829 }
830
831 // Print the highlight text, annotated
832 if (highlight_end > last_wrote) {
833 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
834 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
835 annotation_element.setAttribute("type", "query_term");
836 content_element.appendChild(annotation_element);
837 last_wrote = highlight_end;
838 }
839 }
840
841 // Finish off any unwritten text
842 if (last_wrote < content_characters.length) {
843 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
844 content_element.appendChild(this.doc.createTextNode(remaining_text));
845 }
846
847 return content_element;
848 }
849
850
851 static private class WordMatch
852 {
853 public String word;
854 public int start_position;
855 public int end_position;
856 public boolean preceding_word_matched;
857
858 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
859 {
860 this.word = word;
861 this.start_position = start_position;
862 this.end_position = end_position;
863 this.preceding_word_matched = preceding_word_matched;
864 }
865 }
866
867
868 static private class PartialPhraseMatch
869 {
870 public int start_position;
871 public int query_phrase_number;
872 public int num_words_matched;
873
874 public PartialPhraseMatch(int start_position, int query_phrase_number)
875 {
876 this.start_position = start_position;
877 this.query_phrase_number = query_phrase_number;
878 this.num_words_matched = 1;
879 }
880 }
881}
Note: See TracBrowser for help on using the repository browser.