source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 9007

Last change on this file since 9007 was 9007, checked in by kjdon, 19 years ago

added a check for null term element in search term highlighting code. fields for mgpp were being parsed as a term, but don't match a term element

  • Property svn:keywords set to Author Date Id Revision
File size: 34.8 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38
39/** Action class for retrieving Documents via the message router
40 */
41public class DocumentAction extends Action {
42
43 // this is used to specify that the sibling nodes of a selected one should be obtained
44 public static final String SIBLING_ARG = "sib";
45 public static final String GOTO_PAGE_ARG = "gp";
46 public static final String ENRICH_DOC_ARG = "end";
47
48 /** if this is set to true, when a document is displayed, any annotation
49 * type services (enrich) will be offered to the user as well */
50 protected boolean provide_annotations = false;
51
52 protected boolean highlight_query_terms = false;
53
54 public boolean configure() {
55 super.configure();
56 String highlight = (String)config_params.get("highlightQueryTerms");
57 if (highlight != null && highlight.equals("true")) {
58 highlight_query_terms = true;
59 }
60 String annotate = (String)config_params.get("displayAnnotationService");
61 if (annotate != null && annotate.equals("true")) {
62 provide_annotations = true;
63 }
64 return true;
65 }
66 public Element process (Element message)
67 {
68 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
69
70 // the response
71 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
72 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
73 result.appendChild(page_response);
74
75 // get the request - assume only one
76 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
77 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
78 HashMap params = GSXML.extractParams(cgi_paramList, false);
79
80 // just in case there are some that need to get passed to the services
81 HashMap service_params = (HashMap)params.get("s0");
82
83 String collection = (String) params.get(GSParams.COLLECTION);
84 String lang = request.getAttribute(GSXML.LANG_ATT);
85 String uid = request.getAttribute(GSXML.USER_ID_ATT);
86 String document_name = (String) params.get(GSParams.DOCUMENT);
87 if (document_name == null || document_name.equals("")) {
88 System.err.println("DocumentAction Error: no document specified!");
89 return result;
90 }
91 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
92 if (document_type == null) {
93 document_type = "simple";
94 }
95 //whether to retrieve siblings or not
96 boolean get_siblings = false;
97 String sibs = (String) params.get(SIBLING_ARG);
98 if (sibs != null && sibs.equals("1")) {
99 get_siblings = true;
100 }
101
102 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
103 if (sibling_num != null && !sibling_num.equals("")) {
104 // we have to modify the doc name
105 document_name = document_name+"."+sibling_num+".ss";
106 }
107
108 boolean expand_document = false;
109 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
110 if (ed_arg != null && ed_arg.equals("1")) {
111 expand_document = true;
112 }
113
114
115 boolean expand_contents = false;
116 if (expand_document) { // we always expand the contents with the text
117 expand_contents = true;
118 } else {
119 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
120 if (ec_arg != null && ec_arg.equals("1")) {
121 expand_contents = true;
122 }
123 }
124 // get the additional data needed for the page
125 getBackgroundData(page_response, collection, lang, uid);
126 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
127
128 // the_document is where all the doc info - structure and metadata etc
129 // is added into, to be returned in the page
130 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
131 page_response.appendChild(the_document);
132
133 // set the doctype from the cgi arg as an attribute
134 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
135
136 // create a basic doc list containing the current node
137 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
138 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
139 basic_doc_list.appendChild(current_doc);
140 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
141
142 // Create a parameter list to specify the required structure information
143 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
144
145 if (service_params != null) {
146 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
147 }
148
149 Element ds_param = null;
150 boolean get_structure = false;
151 boolean get_structure_info = false;
152 if (document_type.equals("paged")) {
153 get_structure_info = true;
154 // get teh info needed for paged naviagtion
155 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
156 ds_param_list.appendChild(ds_param);
157 ds_param.setAttribute(GSXML.NAME_ATT, "info");
158 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
159 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
160 ds_param_list.appendChild(ds_param);
161 ds_param.setAttribute(GSXML.NAME_ATT, "info");
162 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
163 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
164 ds_param_list.appendChild(ds_param);
165 ds_param.setAttribute(GSXML.NAME_ATT, "info");
166 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
167
168 } else if (document_type.equals("hierarchy")){
169 get_structure = true;
170 if (expand_contents) {
171 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172 ds_param_list.appendChild(ds_param);
173 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
174 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
175 } else {
176 // get the info needed for table of contents
177 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178 ds_param_list.appendChild(ds_param);
179 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
180 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
181 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
182 ds_param_list.appendChild(ds_param);
183 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
184 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
185 if (get_siblings) {
186 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
187 ds_param_list.appendChild(ds_param);
188 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
189 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
190 }
191 }
192 } else {
193 // we dont need any structure
194 }
195
196 boolean has_dummy = false;
197 if (get_structure || get_structure_info) {
198
199 // Build a request to obtain the document structure
200 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
201 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
202 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
203 ds_message.appendChild(ds_request);
204 ds_request.appendChild(ds_param_list);
205
206 // create a doc_node_list and put in the doc_node that we are interested in
207 ds_request.appendChild(basic_doc_list);
208
209 // Process the document structure retrieve message
210 Element ds_response_message = (Element) this.mr.process(ds_message);
211
212 // get the info and print out
213 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
214 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
215 path = GSPath.appendLink(path, "nodeStructureInfo");
216 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
217 // get the doc_node bit
218 if (ds_response_struct_info != null) {
219 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
220 }
221 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
222 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
223 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
224 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
225
226 if (ds_response_structure != null) {
227 // add the contents of the structure bit into the_document
228 NodeList structs = ds_response_structure.getChildNodes();
229 for (int i=0; i<structs.getLength();i++) {
230 the_document.appendChild(this.doc.importNode(structs.item(i), true));
231 }
232 } else {
233 // no structure nodes, so put in a dummy doc node
234 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
235 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
236 the_document.appendChild(doc_node);
237 has_dummy = true;
238 }
239 } else { // a simple type - we dont have a dummy node for simple
240 // should think about this more
241 // no structure request, so just put in a dummy doc node
242 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
243 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
244 the_document.appendChild(doc_node);
245 has_dummy = true;
246 }
247
248 // Build a request to obtain some document metadata
249 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
250 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
251 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
252 dm_message.appendChild(dm_request);
253 // Create a parameter list to specify the required metadata information
254
255 HashSet meta_names = new HashSet();
256 meta_names.add("Title"); // the default
257 if (format_elem != null) {
258 extractMetadataNames(format_elem, meta_names);
259 }
260
261 Element dm_param_list = createMetadataParamList(meta_names);
262 if (service_params != null) {
263 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
264 }
265
266 dm_request.appendChild(dm_param_list);
267
268
269 // create the doc node list for the metadata request
270 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
271 dm_request.appendChild(dm_doc_list);
272
273 // Add each node from the structure response into the metadata request
274 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
275 for (int i = 0; i < doc_nodes.getLength(); i++) {
276 Element doc_node = (Element) doc_nodes.item(i);
277 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
278
279 // Add the documentNode to the list
280 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
281 dm_doc_list.appendChild(dm_doc_node);
282 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
283 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
284 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
285 }
286
287 // we also want a metadata request to the top level document to get
288 // assocfilepath - this could be cached too
289 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
290 dm_message.appendChild(doc_meta_request);
291 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
292 if (service_params != null) {
293 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
294 }
295
296 doc_meta_request.appendChild(doc_meta_param_list);
297 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
298 doc_meta_param_list.appendChild(doc_param);
299 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
300 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
301
302 // create the doc node list for the metadata request
303 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
304 doc_meta_request.appendChild(doc_list);
305
306 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
307 // teh node we want is the root document node
308 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
309 doc_list.appendChild(doc_node);
310 Element dm_response_message = (Element) this.mr.process(dm_message);
311
312 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
313 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
314
315 // Merge the metadata with the structure information
316 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
317 for (int i = 0; i < doc_nodes.getLength(); i++) {
318 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
319 }
320 // get teh top level doc metadata out
321 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
322 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
323 if (doc_meta_list != null) {
324 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
325 }
326 // Build a request to obtain some document content
327 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
328 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
329 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
330 dc_message.appendChild(dc_request);
331
332
333 // Create a parameter list to specify the request parameters - empty for now
334 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
335 if (service_params != null) {
336 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
337 }
338
339 dc_request.appendChild(dc_param_list);
340
341 // get the content
342 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
343 if (expand_document) {
344 dc_request.appendChild(dm_doc_list);
345 } else {
346 dc_request.appendChild(basic_doc_list);
347 }
348 System.err.println("request = "+converter.getString(dc_message));
349 Element dc_response_message = (Element) this.mr.process(dc_message);
350 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
351
352 if (expand_document) {
353 // Merge the content with the structure information
354 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
355 for (int i = 0; i < doc_nodes.getLength(); i++) {
356 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
357 if (content != null) {
358 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
359 }
360 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
361 }
362 } else {
363
364 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
365 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
366 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
367
368 if (dc_response_doc_content == null) {
369 // no content to add
370 return result;
371 }
372 if (highlight_query_terms) {
373 dc_response_doc.removeChild(dc_response_doc_content);
374
375 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
376 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
377 }
378
379
380 if (provide_annotations) {
381 String service_selected = (String)params.get(ENRICH_DOC_ARG);
382 if (service_selected != null && service_selected.equals("1")) {
383 // now we can modifiy the response doc if needed
384 String enrich_service = (String)params.get(GSParams.SERVICE);
385 // send a message to the service
386 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
387 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
388 enrich_message.appendChild(enrich_request);
389 // check for parameters
390 HashMap e_service_params = (HashMap)params.get("s1");
391 if (e_service_params != null) {
392 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
393 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
394 enrich_request.appendChild(enrich_pl);
395 }
396 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
397 enrich_request.appendChild(e_doc_list);
398 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
399
400 Element enrich_response = this.mr.process(enrich_message);
401
402 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
403 path = GSPath.createPath(links);
404 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
405
406 }
407 } // if provide_annotations
408
409
410 // use the returned id rather than the sent one cos there may have
411 // been modifiers such as .pr that are removed.
412 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
413 the_document.setAttribute("selectedNode", modified_doc_id);
414 if (has_dummy) {
415 // change the id if necessary and add the content
416 Element dummy_node = (Element)doc_nodes.item(0);
417
418 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
419 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
420 // hack for simple type
421 if (document_type.equals("simple")) {
422 // we dont want the internal docNode, just want the content and metadata in the document
423 // rethink this!!
424 the_document.removeChild(dummy_node);
425
426 NodeList dummy_children = dummy_node.getChildNodes();
427 //for (int i=0; i<dummy_children.getLength(); i++) {
428 for (int i=dummy_children.getLength()-1; i>=0; i--) {
429 the_document.appendChild(dummy_children.item(i));
430
431 }
432 }
433 } else {
434 // Merge the document content with the metadata and structure information
435 for (int i = 0; i < doc_nodes.getLength(); i++) {
436 Node dn = doc_nodes.item(i);
437 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
438 if (dn_id.equals(modified_doc_id)) {
439 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
440 break;
441 }
442 }
443 }
444 }
445 ///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
446 return result;
447 }
448
449 /** tell the param class what its arguments are
450 * if an action has its own arguments, this should add them to the params
451 * object - particularly important for args that should not be saved */
452 public boolean getActionParameters(GSParams params) {
453 params.addParameter(GOTO_PAGE_ARG, false);
454 params.addParameter(ENRICH_DOC_ARG, false);
455 return true;
456 }
457
458
459 /** this method gets the collection description, the format info, the
460 * list of enrich services, etc - stuff that is needed for the page,
461 * but is the same whatever the query is - should be cached */
462 protected boolean getBackgroundData(Element page_response,
463 String collection, String lang,
464 String uid) {
465
466 // create a message to process - contains requests for the collection
467 // description, the format element, the enrich services on offer
468 // these could all be cached
469 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
470 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
471 // the format request - ignore for now, where does this request go to??
472 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
473 info_message.appendChild(format_request);
474
475 // the enrich_services request - only do this if provide_annotations is true
476
477 if (provide_annotations) {
478 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
479 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
480 info_message.appendChild(enrich_services_request);
481 }
482
483 Element info_response = (Element)this.mr.process(info_message);
484
485 // the collection is the first response
486 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
487 Element format_resp = (Element) responses.item(0);
488
489 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
490 if (format_elem != null) {
491 ///ystem.out.println("doc action found a format statement");
492 // set teh format type
493 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
494 page_response.appendChild(this.doc.importNode(format_elem, true));
495 }
496
497 if (provide_annotations) {
498 Element services_resp = (Element)responses.item(1);
499
500 // a new message for the mr
501 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
502
503 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
504 boolean service_found = false;
505 for (int j=0; j<e_services.getLength(); j++) {
506 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
507 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
508 enrich_message.appendChild(s);
509 service_found = true;
510 }
511 }
512 if (service_found) {
513 Element enrich_response = this.mr.process(enrich_message);
514
515 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
516 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
517 for (int i=0; i<e_responses.getLength(); i++) {
518 Element e_resp = (Element)e_responses.item(i);
519 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
520 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
521 service_list.appendChild(e_service);
522 }
523 page_response.appendChild(service_list);
524 }
525 } // if provide_annotations
526 return true;
527
528 }
529
530 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
531 */
532 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
533
534 // do the query again to get term info
535 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
536 HashMap params = GSXML.extractParams(cgi_param_list, false);
537
538
539 String service_name = (String)((HashMap)params.get("p")).get(GSParams.SERVICE);
540 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
541 System.err.println("DocumentAction: invalid service, not doing highlighting");
542 return dc_response_doc_content;
543 }
544 String collection = (String)params.get(GSParams.COLLECTION);
545 String lang = request.getAttribute(GSXML.LANG_ATT);
546 String uid = request.getAttribute(GSXML.USER_ID_ATT);
547 String to = GSPath.appendLink(collection, service_name);
548
549 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
550 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
551 mr_query_message.appendChild(mr_query_request);
552
553 // paramList
554 HashMap service_params = (HashMap)params.get("s1");
555
556 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
557 GSXML.addParametersToList(this.doc, query_param_list, service_params);
558 mr_query_request.appendChild(query_param_list);
559
560 // do the query
561 Element mr_query_response = (Element)this.mr.process(mr_query_message);
562
563 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
564 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
565 if (query_term_list_element == null) {
566 // no term info
567 System.err.println("DocumentAction: Warning: No query term information.\n");
568 return dc_response_doc_content;
569 }
570
571 String content = GSXML.getNodeText(dc_response_doc_content);
572
573 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
574 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
575
576 HashSet query_term_variants = new HashSet();
577 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
578 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
579 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
580 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
581 for (int j = 0; j < equivalent_terms.length; j++) {
582 System.err.println("Adding query term variant: " + equivalent_terms[j]);
583 query_term_variants.add(equivalent_terms[j]);
584 }
585 }
586
587 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
588
589 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
590 String performed_query = GSXML.getNodeText(query_element) + " ";
591
592 ArrayList phrase_query_p_term_variants_list = new ArrayList();
593 int term_start = 0;
594 boolean in_term = false;
595 boolean in_phrase = false;
596 for (int i = 0; i < performed_query.length(); i++) {
597 char character = performed_query.charAt(i);
598 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
599
600 // Has a query term just started?
601 if (in_term == false && is_character_letter_or_digit == true) {
602 in_term = true;
603 term_start = i;
604 }
605
606 // Or has a term just finished?
607 else if (in_term == true && is_character_letter_or_digit == false) {
608 in_term = false;
609 String term = performed_query.substring(term_start, i);
610 System.err.println("Term: " + term);
611
612 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
613 if (term_element != null) {
614
615 HashSet phrase_query_p_term_x_variants = new HashSet();
616
617 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
618 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
619 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
620 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
621 for (int k = 0; k < term_equivalent_terms.length; k++) {
622 System.err.println("Adding query term variant: " + term_equivalent_terms[k]);
623 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
624 }
625 }
626 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
627
628 if (in_phrase == false) {
629 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
630 phrase_query_p_term_variants_list = new ArrayList();
631 }
632 }
633 }
634 // Watch for phrases (surrounded by quotes)
635 if (character == '\"') {
636 // Has a phrase just started?
637 if (in_phrase == false) {
638 in_phrase = true;
639 }
640 // Or has a phrase just finished?
641 else if (in_phrase == true) {
642 in_phrase = false;
643 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
644 }
645
646 phrase_query_p_term_variants_list = new ArrayList();
647 }
648 }
649
650 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
651 }
652
653
654 /**
655 * Highlights query terms in a piece of text.
656 */
657 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
658 {
659 // Convert the content string to an array of characters for speed
660 char[] content_characters = new char[content.length()];
661 content.getChars(0, content.length(), content_characters, 0);
662
663 // Now skim through the content, identifying word matches
664 ArrayList word_matches = new ArrayList();
665 int word_start = 0;
666 boolean in_word = false;
667 boolean preceding_word_matched = false;
668 for (int i = 0; i < content_characters.length; i++) {
669 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
670
671 // Has a word just started?
672 if (in_word == false && is_character_letter_or_digit == true) {
673 in_word = true;
674 word_start = i;
675 }
676
677 // Or has a word just finished?
678 else if (in_word == true && is_character_letter_or_digit == false) {
679 in_word = false;
680
681 // Check if the word matches any of the query term equivalents
682 String word = new String(content_characters, word_start, (i - word_start));
683 if (query_term_variants.contains(word)) {
684 // We have found a matching word, so remember its location
685 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
686 preceding_word_matched = true;
687 }
688 else {
689 preceding_word_matched = false;
690 }
691 }
692 }
693
694 // Don't forget the last word...
695 if (in_word == true) {
696 // Check if the word matches any of the query term equivalents
697 String word = new String(content_characters, word_start, (content_characters.length - word_start));
698 if (query_term_variants.contains(word)) {
699 // We have found a matching word, so remember its location
700 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
701 }
702 }
703
704 ArrayList highlight_start_positions = new ArrayList();
705 ArrayList highlight_end_positions = new ArrayList();
706
707 // Deal with phrases now
708 ArrayList partial_phrase_matches = new ArrayList();
709 for (int i = 0; i < word_matches.size(); i++) {
710 WordMatch word_match = (WordMatch) word_matches.get(i);
711
712 // See if any partial phrase matches are extended by this word
713 if (word_match.preceding_word_matched) {
714 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
715 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
716 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
717 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
718 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
719 partial_phrase_match.num_words_matched++;
720
721 // Has a complete phrase match occurred?
722 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
723 // Check for overlaps by looking at the previous highlight range
724 if (!highlight_end_positions.isEmpty()) {
725 int last_highlight_index = highlight_end_positions.size() - 1;
726 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
727 if (last_highlight_end > partial_phrase_match.start_position) {
728 // There is an overlap, so remove the previous phrase match
729 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
730 highlight_end_positions.remove(last_highlight_index);
731 partial_phrase_match.start_position = last_highlight_start;
732 }
733 }
734
735 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
736 highlight_end_positions.add(new Integer(word_match.end_position));
737 }
738 // No, but add the partial match back into the list for next time
739 else {
740 partial_phrase_matches.add(partial_phrase_match);
741 }
742 }
743 }
744 }
745 else {
746 partial_phrase_matches.clear();
747 }
748
749 // See if this word is at the start of any of the phrases
750 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
751 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
752 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
753 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
754 // If this phrase is just one word long, we have a complete match
755 if (phrase_query_p_term_variants_list.size() == 1) {
756 highlight_start_positions.add(new Integer(word_match.start_position));
757 highlight_end_positions.add(new Integer(word_match.end_position));
758 }
759 // Otherwise we have the start of a potential phrase match
760 else {
761 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
762 }
763 }
764 }
765 }
766
767 // Now add the annotation tags into the document at the correct points
768 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
769
770 int last_wrote = 0;
771 for (int i = 0; i < highlight_start_positions.size(); i++) {
772 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
773 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
774
775 // Print anything before the highlight range
776 if (last_wrote < highlight_start) {
777 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
778 // System.err.print(preceding_text);
779 content_element.appendChild(this.doc.createTextNode(preceding_text));
780 }
781
782 // Print the highlight text, annotated
783 if (highlight_end > last_wrote) {
784 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
785 // System.err.print("|" + highlight_text + "|");
786 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
787 annotation_element.setAttribute("type", "query_term");
788 content_element.appendChild(annotation_element);
789 last_wrote = highlight_end;
790 }
791 }
792
793 // Finish off any unwritten text
794 if (last_wrote < content_characters.length) {
795 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
796 // System.err.print(remaining_text);
797 content_element.appendChild(this.doc.createTextNode(remaining_text));
798 }
799
800 return content_element;
801 }
802
803
804 static private class WordMatch
805 {
806 public String word;
807 public int start_position;
808 public int end_position;
809 public boolean preceding_word_matched;
810
811 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
812 {
813 this.word = word;
814 this.start_position = start_position;
815 this.end_position = end_position;
816 this.preceding_word_matched = preceding_word_matched;
817 }
818 }
819
820
821 static private class PartialPhraseMatch
822 {
823 public int start_position;
824 public int query_phrase_number;
825 public int num_words_matched;
826
827 public PartialPhraseMatch(int start_position, int query_phrase_number)
828 {
829 this.start_position = start_position;
830 this.query_phrase_number = query_phrase_number;
831 this.num_words_matched = 1;
832 }
833 }
834}
Note: See TracBrowser for help on using the repository browser.