source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 13270

Last change on this file since 13270 was 13270, checked in by shaoqun, 17 years ago

replace Category class which is deprecated with Logger class

  • Property svn:keywords set to Author Date Id Revision
File size: 35.0 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38import org.apache.log4j.*;
39
40/** Action class for retrieving Documents via the message router
41 */
42public class DocumentAction extends Action {
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.action.DocumentAction.class.getName());
45
46 // this is used to specify that the sibling nodes of a selected one should be obtained
47 public static final String SIBLING_ARG = "sib";
48 public static final String GOTO_PAGE_ARG = "gp";
49 public static final String ENRICH_DOC_ARG = "end";
50
51 /** if this is set to true, when a document is displayed, any annotation
52 * type services (enrich) will be offered to the user as well */
53 protected boolean provide_annotations = false;
54
55 protected boolean highlight_query_terms = false;
56
57 public boolean configure() {
58 super.configure();
59 String highlight = (String)config_params.get("highlightQueryTerms");
60 if (highlight != null && highlight.equals("true")) {
61 highlight_query_terms = true;
62 }
63 String annotate = (String)config_params.get("displayAnnotationService");
64 if (annotate != null && annotate.equals("true")) {
65 provide_annotations = true;
66 }
67 return true;
68 }
69 public Element process (Element message)
70 {
71 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
72
73 // the response
74 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
75 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
76 result.appendChild(page_response);
77
78 // get the request - assume only one
79 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
80 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
81 HashMap params = GSXML.extractParams(cgi_paramList, false);
82
83 // just in case there are some that need to get passed to the services
84 HashMap service_params = (HashMap)params.get("s0");
85
86 String collection = (String) params.get(GSParams.COLLECTION);
87 String lang = request.getAttribute(GSXML.LANG_ATT);
88 String uid = request.getAttribute(GSXML.USER_ID_ATT);
89 String document_name = (String) params.get(GSParams.DOCUMENT);
90 if (document_name == null || document_name.equals("")) {
91 logger.error("no document specified!");
92 return result;
93 }
94 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
95 if (document_type == null) {
96 document_type = "simple";
97 }
98 //whether to retrieve siblings or not
99 boolean get_siblings = false;
100 String sibs = (String) params.get(SIBLING_ARG);
101 if (sibs != null && sibs.equals("1")) {
102 get_siblings = true;
103 }
104
105 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
106 if (sibling_num != null && !sibling_num.equals("")) {
107 // we have to modify the doc name
108 document_name = document_name+"."+sibling_num+".ss";
109 }
110
111 boolean expand_document = false;
112 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
113 if (ed_arg != null && ed_arg.equals("1")) {
114 expand_document = true;
115 }
116
117
118 boolean expand_contents = false;
119 if (expand_document) { // we always expand the contents with the text
120 expand_contents = true;
121 } else {
122 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
123 if (ec_arg != null && ec_arg.equals("1")) {
124 expand_contents = true;
125 }
126 }
127 // get the additional data needed for the page
128 getBackgroundData(page_response, collection, lang, uid);
129 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
130
131 // the_document is where all the doc info - structure and metadata etc
132 // is added into, to be returned in the page
133 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
134 page_response.appendChild(the_document);
135
136 // set the doctype from the cgi arg as an attribute
137 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
138
139 // create a basic doc list containing the current node
140 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
141 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
142 basic_doc_list.appendChild(current_doc);
143 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
144
145 // Create a parameter list to specify the required structure information
146 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
147
148 if (service_params != null) {
149 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
150 }
151
152 Element ds_param = null;
153 boolean get_structure = false;
154 boolean get_structure_info = false;
155 if (document_type.equals("paged")) {
156 get_structure_info = true;
157 // get teh info needed for paged naviagtion
158 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
159 ds_param_list.appendChild(ds_param);
160 ds_param.setAttribute(GSXML.NAME_ATT, "info");
161 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
162 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
163 ds_param_list.appendChild(ds_param);
164 ds_param.setAttribute(GSXML.NAME_ATT, "info");
165 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
166 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
167 ds_param_list.appendChild(ds_param);
168 ds_param.setAttribute(GSXML.NAME_ATT, "info");
169 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
170
171 } else if (document_type.equals("hierarchy")){
172 get_structure = true;
173 if (expand_contents) {
174 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
175 ds_param_list.appendChild(ds_param);
176 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
177 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
178 } else {
179 // get the info needed for table of contents
180 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
181 ds_param_list.appendChild(ds_param);
182 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
183 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
184 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
185 ds_param_list.appendChild(ds_param);
186 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
187 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
188 if (get_siblings) {
189 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
190 ds_param_list.appendChild(ds_param);
191 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
192 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
193 }
194 }
195 } else {
196 // we dont need any structure
197 }
198
199 boolean has_dummy = false;
200 if (get_structure || get_structure_info) {
201
202 // Build a request to obtain the document structure
203 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
204 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
205 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
206 ds_message.appendChild(ds_request);
207 ds_request.appendChild(ds_param_list);
208
209 // create a doc_node_list and put in the doc_node that we are interested in
210 ds_request.appendChild(basic_doc_list);
211
212 // Process the document structure retrieve message
213 Element ds_response_message = (Element) this.mr.process(ds_message);
214 if (processErrorElements(ds_response_message, page_response)) {
215 return result;
216 }
217
218 // get the info and print out
219 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
220 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
221 path = GSPath.appendLink(path, "nodeStructureInfo");
222 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
223 // get the doc_node bit
224 if (ds_response_struct_info != null) {
225 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
226 }
227 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
228 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
229 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
230 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
231
232 if (ds_response_structure != null) {
233 // add the contents of the structure bit into the_document
234 NodeList structs = ds_response_structure.getChildNodes();
235 for (int i=0; i<structs.getLength();i++) {
236 the_document.appendChild(this.doc.importNode(structs.item(i), true));
237 }
238 } else {
239 // no structure nodes, so put in a dummy doc node
240 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
241 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
242 the_document.appendChild(doc_node);
243 has_dummy = true;
244 }
245 } else { // a simple type - we dont have a dummy node for simple
246 // should think about this more
247 // no structure request, so just put in a dummy doc node
248 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
249 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
250 the_document.appendChild(doc_node);
251 has_dummy = true;
252 }
253
254 // Build a request to obtain some document metadata
255 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
256 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
257 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
258 dm_message.appendChild(dm_request);
259 // Create a parameter list to specify the required metadata information
260
261 HashSet meta_names = new HashSet();
262 meta_names.add("Title"); // the default
263 if (format_elem != null) {
264 extractMetadataNames(format_elem, meta_names);
265 }
266
267 Element dm_param_list = createMetadataParamList(meta_names);
268 if (service_params != null) {
269 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
270 }
271
272 dm_request.appendChild(dm_param_list);
273
274
275 // create the doc node list for the metadata request
276 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
277 dm_request.appendChild(dm_doc_list);
278
279 // Add each node from the structure response into the metadata request
280 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
281 for (int i = 0; i < doc_nodes.getLength(); i++) {
282 Element doc_node = (Element) doc_nodes.item(i);
283 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
284
285 // Add the documentNode to the list
286 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
287 dm_doc_list.appendChild(dm_doc_node);
288 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
289 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
290 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
291 }
292
293 // we also want a metadata request to the top level document to get
294 // assocfilepath - this could be cached too
295 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
296 dm_message.appendChild(doc_meta_request);
297 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
298 if (service_params != null) {
299 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
300 }
301
302 doc_meta_request.appendChild(doc_meta_param_list);
303 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
304 doc_meta_param_list.appendChild(doc_param);
305 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
306 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
307
308 // create the doc node list for the metadata request
309 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
310 doc_meta_request.appendChild(doc_list);
311
312 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
313 // the node we want is the root document node
314 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
315 doc_list.appendChild(doc_node);
316 Element dm_response_message = (Element) this.mr.process(dm_message);
317 if (processErrorElements(dm_response_message, page_response)) {
318 return result;
319 }
320
321 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
322 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
323
324 // Merge the metadata with the structure information
325 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
326 for (int i = 0; i < doc_nodes.getLength(); i++) {
327 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
328 }
329 // get teh top level doc metadata out
330 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
331 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
332 if (doc_meta_list != null) {
333 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
334 }
335 // Build a request to obtain some document content
336 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
337 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
338 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
339 dc_message.appendChild(dc_request);
340
341
342 // Create a parameter list to specify the request parameters - empty for now
343 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
344 if (service_params != null) {
345 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
346 }
347
348 dc_request.appendChild(dc_param_list);
349
350 // get the content
351 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
352 if (expand_document) {
353 dc_request.appendChild(dm_doc_list);
354 } else {
355 dc_request.appendChild(basic_doc_list);
356 }
357 logger.debug("request = "+converter.getString(dc_message));
358 Element dc_response_message = (Element) this.mr.process(dc_message);
359 if (processErrorElements(dc_response_message, page_response)) {
360 return result;
361 }
362
363 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
364
365 if (expand_document) {
366 // Merge the content with the structure information
367 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
368 for (int i = 0; i < doc_nodes.getLength(); i++) {
369 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
370 if (content != null) {
371 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
372 }
373 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
374 }
375 } else {
376
377 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
378 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
379 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
380
381 if (dc_response_doc_content == null) {
382 // no content to add
383 return result;
384 }
385 if (highlight_query_terms) {
386 dc_response_doc.removeChild(dc_response_doc_content);
387
388 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
389 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
390 }
391
392
393 if (provide_annotations) {
394 String service_selected = (String)params.get(ENRICH_DOC_ARG);
395 if (service_selected != null && service_selected.equals("1")) {
396 // now we can modifiy the response doc if needed
397 String enrich_service = (String)params.get(GSParams.SERVICE);
398 // send a message to the service
399 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
400 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
401 enrich_message.appendChild(enrich_request);
402 // check for parameters
403 HashMap e_service_params = (HashMap)params.get("s1");
404 if (e_service_params != null) {
405 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
406 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
407 enrich_request.appendChild(enrich_pl);
408 }
409 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
410 enrich_request.appendChild(e_doc_list);
411 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
412
413 Element enrich_response = this.mr.process(enrich_message);
414
415 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
416 path = GSPath.createPath(links);
417 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
418
419 }
420 } // if provide_annotations
421
422
423 // use the returned id rather than the sent one cos there may have
424 // been modifiers such as .pr that are removed.
425 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
426 the_document.setAttribute("selectedNode", modified_doc_id);
427 if (has_dummy) {
428 // change the id if necessary and add the content
429 Element dummy_node = (Element)doc_nodes.item(0);
430
431 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
432 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
433 // hack for simple type
434 if (document_type.equals("simple")) {
435 // we dont want the internal docNode, just want the content and metadata in the document
436 // rethink this!!
437 the_document.removeChild(dummy_node);
438
439 NodeList dummy_children = dummy_node.getChildNodes();
440 //for (int i=0; i<dummy_children.getLength(); i++) {
441 for (int i=dummy_children.getLength()-1; i>=0; i--) {
442 the_document.appendChild(dummy_children.item(i));
443
444 }
445 }
446 } else {
447 // Merge the document content with the metadata and structure information
448 for (int i = 0; i < doc_nodes.getLength(); i++) {
449 Node dn = doc_nodes.item(i);
450 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
451 if (dn_id.equals(modified_doc_id)) {
452 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
453 break;
454 }
455 }
456 }
457 }
458 logger.debug("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
459 return result;
460 }
461
462 /** tell the param class what its arguments are
463 * if an action has its own arguments, this should add them to the params
464 * object - particularly important for args that should not be saved */
465 public boolean getActionParameters(GSParams params) {
466 params.addParameter(GOTO_PAGE_ARG, false);
467 params.addParameter(ENRICH_DOC_ARG, false);
468 return true;
469 }
470
471
472 /** this method gets the collection description, the format info, the
473 * list of enrich services, etc - stuff that is needed for the page,
474 * but is the same whatever the query is - should be cached */
475 protected boolean getBackgroundData(Element page_response,
476 String collection, String lang,
477 String uid) {
478
479 // create a message to process - contains requests for the collection
480 // description, the format element, the enrich services on offer
481 // these could all be cached
482 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
483 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
484 // the format request - ignore for now, where does this request go to??
485 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
486 info_message.appendChild(format_request);
487
488 // the enrich_services request - only do this if provide_annotations is true
489
490 if (provide_annotations) {
491 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
492 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
493 info_message.appendChild(enrich_services_request);
494 }
495
496 Element info_response = (Element)this.mr.process(info_message);
497
498 // the collection is the first response
499 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
500 Element format_resp = (Element) responses.item(0);
501
502 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
503 if (format_elem != null) {
504 logger.debug("doc action found a format statement");
505 // set teh format type
506 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
507 page_response.appendChild(this.doc.importNode(format_elem, true));
508 }
509
510 if (provide_annotations) {
511 Element services_resp = (Element)responses.item(1);
512
513 // a new message for the mr
514 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
515
516 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
517 boolean service_found = false;
518 for (int j=0; j<e_services.getLength(); j++) {
519 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
520 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
521 enrich_message.appendChild(s);
522 service_found = true;
523 }
524 }
525 if (service_found) {
526 Element enrich_response = this.mr.process(enrich_message);
527
528 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
529 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
530 for (int i=0; i<e_responses.getLength(); i++) {
531 Element e_resp = (Element)e_responses.item(i);
532 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
533 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
534 service_list.appendChild(e_service);
535 }
536 page_response.appendChild(service_list);
537 }
538 } // if provide_annotations
539 return true;
540
541 }
542
543 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
544 */
545 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
546
547 // do the query again to get term info
548 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
549 HashMap params = GSXML.extractParams(cgi_param_list, false);
550
551 HashMap previous_params = (HashMap)params.get("p");
552 if (previous_params == null) {
553 return dc_response_doc_content;
554 }
555 String service_name = (String)previous_params.get(GSParams.SERVICE);
556 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
557 logger.error("invalid service, not doing highlighting");
558 return dc_response_doc_content;
559 }
560 String collection = (String)params.get(GSParams.COLLECTION);
561 String lang = request.getAttribute(GSXML.LANG_ATT);
562 String uid = request.getAttribute(GSXML.USER_ID_ATT);
563 String to = GSPath.appendLink(collection, service_name);
564
565 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
566 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
567 mr_query_message.appendChild(mr_query_request);
568
569 // paramList
570 HashMap service_params = (HashMap)params.get("s1");
571
572 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
573 GSXML.addParametersToList(this.doc, query_param_list, service_params);
574 mr_query_request.appendChild(query_param_list);
575
576 // do the query
577 Element mr_query_response = (Element)this.mr.process(mr_query_message);
578
579 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
580 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
581 if (query_term_list_element == null) {
582 // no term info
583 logger.error("No query term information.\n");
584 return dc_response_doc_content;
585 }
586
587 String content = GSXML.getNodeText(dc_response_doc_content);
588
589 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
590 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
591
592 HashSet query_term_variants = new HashSet();
593 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
594 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
595 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
596 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
597 for (int j = 0; j < equivalent_terms.length; j++) {
598 query_term_variants.add(equivalent_terms[j]);
599 }
600 }
601
602 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
603
604 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
605 String performed_query = GSXML.getNodeText(query_element) + " ";
606
607 ArrayList phrase_query_p_term_variants_list = new ArrayList();
608 int term_start = 0;
609 boolean in_term = false;
610 boolean in_phrase = false;
611 for (int i = 0; i < performed_query.length(); i++) {
612 char character = performed_query.charAt(i);
613 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
614
615 // Has a query term just started?
616 if (in_term == false && is_character_letter_or_digit == true) {
617 in_term = true;
618 term_start = i;
619 }
620
621 // Or has a term just finished?
622 else if (in_term == true && is_character_letter_or_digit == false) {
623 in_term = false;
624 String term = performed_query.substring(term_start, i);
625
626 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
627 if (term_element != null) {
628
629 HashSet phrase_query_p_term_x_variants = new HashSet();
630
631 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
632 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
633 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
634 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
635 for (int k = 0; k < term_equivalent_terms.length; k++) {
636 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
637 }
638 }
639 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
640
641 if (in_phrase == false) {
642 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
643 phrase_query_p_term_variants_list = new ArrayList();
644 }
645 }
646 }
647 // Watch for phrases (surrounded by quotes)
648 if (character == '\"') {
649 // Has a phrase just started?
650 if (in_phrase == false) {
651 in_phrase = true;
652 }
653 // Or has a phrase just finished?
654 else if (in_phrase == true) {
655 in_phrase = false;
656 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
657 }
658
659 phrase_query_p_term_variants_list = new ArrayList();
660 }
661 }
662
663 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
664 }
665
666
667 /**
668 * Highlights query terms in a piece of text.
669 */
670 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
671 {
672 // Convert the content string to an array of characters for speed
673 char[] content_characters = new char[content.length()];
674 content.getChars(0, content.length(), content_characters, 0);
675
676 // Now skim through the content, identifying word matches
677 ArrayList word_matches = new ArrayList();
678 int word_start = 0;
679 boolean in_word = false;
680 boolean preceding_word_matched = false;
681 for (int i = 0; i < content_characters.length; i++) {
682 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
683
684 // Has a word just started?
685 if (in_word == false && is_character_letter_or_digit == true) {
686 in_word = true;
687 word_start = i;
688 }
689
690 // Or has a word just finished?
691 else if (in_word == true && is_character_letter_or_digit == false) {
692 in_word = false;
693
694 // Check if the word matches any of the query term equivalents
695 String word = new String(content_characters, word_start, (i - word_start));
696 if (query_term_variants.contains(word)) {
697 // We have found a matching word, so remember its location
698 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
699 preceding_word_matched = true;
700 }
701 else {
702 preceding_word_matched = false;
703 }
704 }
705 }
706
707 // Don't forget the last word...
708 if (in_word == true) {
709 // Check if the word matches any of the query term equivalents
710 String word = new String(content_characters, word_start, (content_characters.length - word_start));
711 if (query_term_variants.contains(word)) {
712 // We have found a matching word, so remember its location
713 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
714 }
715 }
716
717 ArrayList highlight_start_positions = new ArrayList();
718 ArrayList highlight_end_positions = new ArrayList();
719
720 // Deal with phrases now
721 ArrayList partial_phrase_matches = new ArrayList();
722 for (int i = 0; i < word_matches.size(); i++) {
723 WordMatch word_match = (WordMatch) word_matches.get(i);
724
725 // See if any partial phrase matches are extended by this word
726 if (word_match.preceding_word_matched) {
727 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
728 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
729 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
730 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
731 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
732 partial_phrase_match.num_words_matched++;
733
734 // Has a complete phrase match occurred?
735 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
736 // Check for overlaps by looking at the previous highlight range
737 if (!highlight_end_positions.isEmpty()) {
738 int last_highlight_index = highlight_end_positions.size() - 1;
739 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
740 if (last_highlight_end > partial_phrase_match.start_position) {
741 // There is an overlap, so remove the previous phrase match
742 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
743 highlight_end_positions.remove(last_highlight_index);
744 partial_phrase_match.start_position = last_highlight_start;
745 }
746 }
747
748 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
749 highlight_end_positions.add(new Integer(word_match.end_position));
750 }
751 // No, but add the partial match back into the list for next time
752 else {
753 partial_phrase_matches.add(partial_phrase_match);
754 }
755 }
756 }
757 }
758 else {
759 partial_phrase_matches.clear();
760 }
761
762 // See if this word is at the start of any of the phrases
763 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
764 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
765 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
766 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
767 // If this phrase is just one word long, we have a complete match
768 if (phrase_query_p_term_variants_list.size() == 1) {
769 highlight_start_positions.add(new Integer(word_match.start_position));
770 highlight_end_positions.add(new Integer(word_match.end_position));
771 }
772 // Otherwise we have the start of a potential phrase match
773 else {
774 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
775 }
776 }
777 }
778 }
779
780 // Now add the annotation tags into the document at the correct points
781 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
782
783 int last_wrote = 0;
784 for (int i = 0; i < highlight_start_positions.size(); i++) {
785 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
786 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
787
788 // Print anything before the highlight range
789 if (last_wrote < highlight_start) {
790 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
791 // System.err.print(preceding_text);
792 content_element.appendChild(this.doc.createTextNode(preceding_text));
793 }
794
795 // Print the highlight text, annotated
796 if (highlight_end > last_wrote) {
797 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
798 // System.err.print("|" + highlight_text + "|");
799 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
800 annotation_element.setAttribute("type", "query_term");
801 content_element.appendChild(annotation_element);
802 last_wrote = highlight_end;
803 }
804 }
805
806 // Finish off any unwritten text
807 if (last_wrote < content_characters.length) {
808 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
809 // System.err.print(remaining_text);
810 content_element.appendChild(this.doc.createTextNode(remaining_text));
811 }
812
813 return content_element;
814 }
815
816
817 static private class WordMatch
818 {
819 public String word;
820 public int start_position;
821 public int end_position;
822 public boolean preceding_word_matched;
823
824 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
825 {
826 this.word = word;
827 this.start_position = start_position;
828 this.end_position = end_position;
829 this.preceding_word_matched = preceding_word_matched;
830 }
831 }
832
833
834 static private class PartialPhraseMatch
835 {
836 public int start_position;
837 public int query_phrase_number;
838 public int num_words_matched;
839
840 public PartialPhraseMatch(int start_position, int query_phrase_number)
841 {
842 this.start_position = start_position;
843 this.query_phrase_number = query_phrase_number;
844 this.num_words_matched = 1;
845 }
846 }
847}
Note: See TracBrowser for help on using the repository browser.