source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:keywords set to Author Date Id Revision
File size: 35.2 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.HashSet;
36import java.io.File;
37
38
39/** Action class for retrieving Documents via the message router
40 */
41public class DocumentAction extends Action {
42
43 // this is used to specify that the sibling nodes of a selected one should be obtained
44 public static final String SIBLING_ARG = "sib";
45 public static final String GOTO_PAGE_ARG = "gp";
46 public static final String ENRICH_DOC_ARG = "end";
47
48 /** if this is set to true, when a document is displayed, any annotation
49 * type services (enrich) will be offered to the user as well */
50 protected boolean provide_annotations = false;
51
52 protected boolean highlight_query_terms = false;
53
54 public boolean configure() {
55 super.configure();
56 String highlight = (String)config_params.get("highlightQueryTerms");
57 if (highlight != null && highlight.equals("true")) {
58 highlight_query_terms = true;
59 }
60 String annotate = (String)config_params.get("displayAnnotationService");
61 if (annotate != null && annotate.equals("true")) {
62 provide_annotations = true;
63 }
64 return true;
65 }
66 public Element process (Element message)
67 {
68 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
69
70 // the response
71 Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
72 Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
73 result.appendChild(page_response);
74
75 // get the request - assume only one
76 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
77 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
78 HashMap params = GSXML.extractParams(cgi_paramList, false);
79
80 // just in case there are some that need to get passed to the services
81 HashMap service_params = (HashMap)params.get("s0");
82
83 String collection = (String) params.get(GSParams.COLLECTION);
84 String lang = request.getAttribute(GSXML.LANG_ATT);
85 String uid = request.getAttribute(GSXML.USER_ID_ATT);
86 String document_name = (String) params.get(GSParams.DOCUMENT);
87 if (document_name == null || document_name.equals("")) {
88 System.err.println("DocumentAction Error: no document specified!");
89 return result;
90 }
91 String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
92 if (document_type == null) {
93 document_type = "simple";
94 }
95 //whether to retrieve siblings or not
96 boolean get_siblings = false;
97 String sibs = (String) params.get(SIBLING_ARG);
98 if (sibs != null && sibs.equals("1")) {
99 get_siblings = true;
100 }
101
102 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
103 if (sibling_num != null && !sibling_num.equals("")) {
104 // we have to modify the doc name
105 document_name = document_name+"."+sibling_num+".ss";
106 }
107
108 boolean expand_document = false;
109 String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
110 if (ed_arg != null && ed_arg.equals("1")) {
111 expand_document = true;
112 }
113
114
115 boolean expand_contents = false;
116 if (expand_document) { // we always expand the contents with the text
117 expand_contents = true;
118 } else {
119 String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
120 if (ec_arg != null && ec_arg.equals("1")) {
121 expand_contents = true;
122 }
123 }
124 // get the additional data needed for the page
125 getBackgroundData(page_response, collection, lang, uid);
126 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
127
128 // the_document is where all the doc info - structure and metadata etc
129 // is added into, to be returned in the page
130 Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
131 page_response.appendChild(the_document);
132
133 // set the doctype from the cgi arg as an attribute
134 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
135
136 // create a basic doc list containing the current node
137 Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
138 Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
139 basic_doc_list.appendChild(current_doc);
140 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
141
142 // Create a parameter list to specify the required structure information
143 Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
144
145 if (service_params != null) {
146 GSXML.addParametersToList(this.doc, ds_param_list, service_params);
147 }
148
149 Element ds_param = null;
150 boolean get_structure = false;
151 boolean get_structure_info = false;
152 if (document_type.equals("paged")) {
153 get_structure_info = true;
154 // get teh info needed for paged naviagtion
155 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
156 ds_param_list.appendChild(ds_param);
157 ds_param.setAttribute(GSXML.NAME_ATT, "info");
158 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
159 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
160 ds_param_list.appendChild(ds_param);
161 ds_param.setAttribute(GSXML.NAME_ATT, "info");
162 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
163 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
164 ds_param_list.appendChild(ds_param);
165 ds_param.setAttribute(GSXML.NAME_ATT, "info");
166 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
167
168 } else if (document_type.equals("hierarchy")){
169 get_structure = true;
170 if (expand_contents) {
171 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
172 ds_param_list.appendChild(ds_param);
173 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
174 ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
175 } else {
176 // get the info needed for table of contents
177 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
178 ds_param_list.appendChild(ds_param);
179 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
180 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
181 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
182 ds_param_list.appendChild(ds_param);
183 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
184 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
185 if (get_siblings) {
186 ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
187 ds_param_list.appendChild(ds_param);
188 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
189 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
190 }
191 }
192 } else {
193 // we dont need any structure
194 }
195
196 boolean has_dummy = false;
197 if (get_structure || get_structure_info) {
198
199 // Build a request to obtain the document structure
200 Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
201 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
202 Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
203 ds_message.appendChild(ds_request);
204 ds_request.appendChild(ds_param_list);
205
206 // create a doc_node_list and put in the doc_node that we are interested in
207 ds_request.appendChild(basic_doc_list);
208
209 // Process the document structure retrieve message
210 Element ds_response_message = (Element) this.mr.process(ds_message);
211 if (processErrorElements(ds_response_message, page_response)) {
212 return result;
213 }
214
215 // get the info and print out
216 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
217 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
218 path = GSPath.appendLink(path, "nodeStructureInfo");
219 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
220 // get the doc_node bit
221 if (ds_response_struct_info != null) {
222 the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
223 }
224 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
225 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
226 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
227 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
228
229 if (ds_response_structure != null) {
230 // add the contents of the structure bit into the_document
231 NodeList structs = ds_response_structure.getChildNodes();
232 for (int i=0; i<structs.getLength();i++) {
233 the_document.appendChild(this.doc.importNode(structs.item(i), true));
234 }
235 } else {
236 // no structure nodes, so put in a dummy doc node
237 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
238 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
239 the_document.appendChild(doc_node);
240 has_dummy = true;
241 }
242 } else { // a simple type - we dont have a dummy node for simple
243 // should think about this more
244 // no structure request, so just put in a dummy doc node
245 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
246 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
247 the_document.appendChild(doc_node);
248 has_dummy = true;
249 }
250
251 // Build a request to obtain some document metadata
252 Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
253 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
254 Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
255 dm_message.appendChild(dm_request);
256 // Create a parameter list to specify the required metadata information
257
258 HashSet meta_names = new HashSet();
259 meta_names.add("Title"); // the default
260 if (format_elem != null) {
261 extractMetadataNames(format_elem, meta_names);
262 }
263
264 Element dm_param_list = createMetadataParamList(meta_names);
265 if (service_params != null) {
266 GSXML.addParametersToList(this.doc, dm_param_list, service_params);
267 }
268
269 dm_request.appendChild(dm_param_list);
270
271
272 // create the doc node list for the metadata request
273 Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
274 dm_request.appendChild(dm_doc_list);
275
276 // Add each node from the structure response into the metadata request
277 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
278 for (int i = 0; i < doc_nodes.getLength(); i++) {
279 Element doc_node = (Element) doc_nodes.item(i);
280 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
281
282 // Add the documentNode to the list
283 Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
284 dm_doc_list.appendChild(dm_doc_node);
285 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
286 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
287 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
288 }
289
290 // we also want a metadata request to the top level document to get
291 // assocfilepath - this could be cached too
292 Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
293 dm_message.appendChild(doc_meta_request);
294 Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
295 if (service_params != null) {
296 GSXML.addParametersToList(this.doc, doc_meta_param_list, service_params);
297 }
298
299 doc_meta_request.appendChild(doc_meta_param_list);
300 Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
301 doc_meta_param_list.appendChild(doc_param);
302 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
303 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
304
305 // create the doc node list for the metadata request
306 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
307 doc_meta_request.appendChild(doc_list);
308
309 Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
310 // teh node we want is the root document node
311 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
312 doc_list.appendChild(doc_node);
313 Element dm_response_message = (Element) this.mr.process(dm_message);
314 if (processErrorElements(dm_response_message, page_response)) {
315 return result;
316 }
317
318 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
319 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
320
321 // Merge the metadata with the structure information
322 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
323 for (int i = 0; i < doc_nodes.getLength(); i++) {
324 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
325 }
326 // get teh top level doc metadata out
327 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
328 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
329 if (doc_meta_list != null) {
330 the_document.appendChild(this.doc.importNode(doc_meta_list, true));
331 }
332 // Build a request to obtain some document content
333 Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
334 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
335 Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
336 dc_message.appendChild(dc_request);
337
338
339 // Create a parameter list to specify the request parameters - empty for now
340 Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
341 if (service_params != null) {
342 GSXML.addParametersToList(this.doc, dc_param_list, service_params);
343 }
344
345 dc_request.appendChild(dc_param_list);
346
347 // get the content
348 // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
349 if (expand_document) {
350 dc_request.appendChild(dm_doc_list);
351 } else {
352 dc_request.appendChild(basic_doc_list);
353 }
354 System.err.println("request = "+converter.getString(dc_message));
355 Element dc_response_message = (Element) this.mr.process(dc_message);
356 if (processErrorElements(dc_response_message, page_response)) {
357 return result;
358 }
359
360 Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
361
362 if (expand_document) {
363 // Merge the content with the structure information
364 NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
365 for (int i = 0; i < doc_nodes.getLength(); i++) {
366 Node content = GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent");
367 if (content != null) {
368 doc_nodes.item(i).appendChild(this.doc.importNode(content, true));
369 }
370 //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
371 }
372 } else {
373
374 //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
375 Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
376 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
377
378 if (dc_response_doc_content == null) {
379 // no content to add
380 return result;
381 }
382 if (highlight_query_terms) {
383 dc_response_doc.removeChild(dc_response_doc_content);
384
385 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
386 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
387 }
388
389
390 if (provide_annotations) {
391 String service_selected = (String)params.get(ENRICH_DOC_ARG);
392 if (service_selected != null && service_selected.equals("1")) {
393 // now we can modifiy the response doc if needed
394 String enrich_service = (String)params.get(GSParams.SERVICE);
395 // send a message to the service
396 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
397 Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
398 enrich_message.appendChild(enrich_request);
399 // check for parameters
400 HashMap e_service_params = (HashMap)params.get("s1");
401 if (e_service_params != null) {
402 Element enrich_pl = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
403 GSXML.addParametersToList(this.doc, enrich_pl, e_service_params);
404 enrich_request.appendChild(enrich_pl);
405 }
406 Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
407 enrich_request.appendChild(e_doc_list);
408 e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
409
410 Element enrich_response = this.mr.process(enrich_message);
411
412 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
413 path = GSPath.createPath(links);
414 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
415
416 }
417 } // if provide_annotations
418
419
420 // use the returned id rather than the sent one cos there may have
421 // been modifiers such as .pr that are removed.
422 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
423 the_document.setAttribute("selectedNode", modified_doc_id);
424 if (has_dummy) {
425 // change the id if necessary and add the content
426 Element dummy_node = (Element)doc_nodes.item(0);
427
428 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
429 dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
430 // hack for simple type
431 if (document_type.equals("simple")) {
432 // we dont want the internal docNode, just want the content and metadata in the document
433 // rethink this!!
434 the_document.removeChild(dummy_node);
435
436 NodeList dummy_children = dummy_node.getChildNodes();
437 //for (int i=0; i<dummy_children.getLength(); i++) {
438 for (int i=dummy_children.getLength()-1; i>=0; i--) {
439 the_document.appendChild(dummy_children.item(i));
440
441 }
442 }
443 } else {
444 // Merge the document content with the metadata and structure information
445 for (int i = 0; i < doc_nodes.getLength(); i++) {
446 Node dn = doc_nodes.item(i);
447 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
448 if (dn_id.equals(modified_doc_id)) {
449 dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
450 break;
451 }
452 }
453 }
454 }
455 ///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
456 return result;
457 }
458
459 /** tell the param class what its arguments are
460 * if an action has its own arguments, this should add them to the params
461 * object - particularly important for args that should not be saved */
462 public boolean getActionParameters(GSParams params) {
463 params.addParameter(GOTO_PAGE_ARG, false);
464 params.addParameter(ENRICH_DOC_ARG, false);
465 return true;
466 }
467
468
469 /** this method gets the collection description, the format info, the
470 * list of enrich services, etc - stuff that is needed for the page,
471 * but is the same whatever the query is - should be cached */
472 protected boolean getBackgroundData(Element page_response,
473 String collection, String lang,
474 String uid) {
475
476 // create a message to process - contains requests for the collection
477 // description, the format element, the enrich services on offer
478 // these could all be cached
479 Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
480 String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
481 // the format request - ignore for now, where does this request go to??
482 Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
483 info_message.appendChild(format_request);
484
485 // the enrich_services request - only do this if provide_annotations is true
486
487 if (provide_annotations) {
488 Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
489 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
490 info_message.appendChild(enrich_services_request);
491 }
492
493 Element info_response = (Element)this.mr.process(info_message);
494
495 // the collection is the first response
496 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
497 Element format_resp = (Element) responses.item(0);
498
499 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
500 if (format_elem != null) {
501 ///ystem.out.println("doc action found a format statement");
502 // set teh format type
503 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
504 page_response.appendChild(this.doc.importNode(format_elem, true));
505 }
506
507 if (provide_annotations) {
508 Element services_resp = (Element)responses.item(1);
509
510 // a new message for the mr
511 Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
512
513 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
514 boolean service_found = false;
515 for (int j=0; j<e_services.getLength(); j++) {
516 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
517 Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
518 enrich_message.appendChild(s);
519 service_found = true;
520 }
521 }
522 if (service_found) {
523 Element enrich_response = this.mr.process(enrich_message);
524
525 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
526 Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
527 for (int i=0; i<e_responses.getLength(); i++) {
528 Element e_resp = (Element)e_responses.item(i);
529 Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
530 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
531 service_list.appendChild(e_service);
532 }
533 page_response.appendChild(service_list);
534 }
535 } // if provide_annotations
536 return true;
537
538 }
539
540 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
541 */
542 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
543
544 // do the query again to get term info
545 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
546 HashMap params = GSXML.extractParams(cgi_param_list, false);
547
548 HashMap previous_params = (HashMap)params.get("p");
549 if (previous_params == null) {
550 return dc_response_doc_content;
551 }
552 String service_name = (String)previous_params.get(GSParams.SERVICE);
553 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
554 System.err.println("DocumentAction: invalid service, not doing highlighting");
555 return dc_response_doc_content;
556 }
557 String collection = (String)params.get(GSParams.COLLECTION);
558 String lang = request.getAttribute(GSXML.LANG_ATT);
559 String uid = request.getAttribute(GSXML.USER_ID_ATT);
560 String to = GSPath.appendLink(collection, service_name);
561
562 Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
563 Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
564 mr_query_message.appendChild(mr_query_request);
565
566 // paramList
567 HashMap service_params = (HashMap)params.get("s1");
568
569 Element query_param_list = this.doc.createElement(GSXML.PARAM_ELEM + GSXML.LIST_MODIFIER);
570 GSXML.addParametersToList(this.doc, query_param_list, service_params);
571 mr_query_request.appendChild(query_param_list);
572
573 // do the query
574 Element mr_query_response = (Element)this.mr.process(mr_query_message);
575
576 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
577 Element query_term_list_element = (Element) GSXML.getNodeByPath(mr_query_response, path);
578 if (query_term_list_element == null) {
579 // no term info
580 System.err.println("DocumentAction: Warning: No query term information.\n");
581 return dc_response_doc_content;
582 }
583
584 String content = GSXML.getNodeText(dc_response_doc_content);
585
586 String metadata_path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
587 Element metadata_list = (Element) GSXML.getNodeByPath(mr_query_response, metadata_path);
588
589 HashSet query_term_variants = new HashSet();
590 NodeList equivalent_terms_nodelist = query_term_list_element.getElementsByTagName("equivTermList");
591 for (int i = 0; i < equivalent_terms_nodelist.getLength(); i++) {
592 Element equivalent_terms_element = (Element) equivalent_terms_nodelist.item(i);
593 String[] equivalent_terms = GSXML.getAttributeValuesFromList(equivalent_terms_element, GSXML.NAME_ATT);
594 for (int j = 0; j < equivalent_terms.length; j++) {
595 System.err.println("Adding query term variant: " + equivalent_terms[j]);
596 query_term_variants.add(equivalent_terms[j]);
597 }
598 }
599
600 ArrayList phrase_query_term_variants_hierarchy = new ArrayList();
601
602 Element query_element = GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "query");
603 String performed_query = GSXML.getNodeText(query_element) + " ";
604
605 ArrayList phrase_query_p_term_variants_list = new ArrayList();
606 int term_start = 0;
607 boolean in_term = false;
608 boolean in_phrase = false;
609 for (int i = 0; i < performed_query.length(); i++) {
610 char character = performed_query.charAt(i);
611 boolean is_character_letter_or_digit = Character.isLetterOrDigit(character);
612
613 // Has a query term just started?
614 if (in_term == false && is_character_letter_or_digit == true) {
615 in_term = true;
616 term_start = i;
617 }
618
619 // Or has a term just finished?
620 else if (in_term == true && is_character_letter_or_digit == false) {
621 in_term = false;
622 String term = performed_query.substring(term_start, i);
623 System.err.println("Term: " + term);
624
625 Element term_element = GSXML.getNamedElement(query_term_list_element, GSXML.TERM_ELEM, GSXML.NAME_ATT, term);
626 if (term_element != null) {
627
628 HashSet phrase_query_p_term_x_variants = new HashSet();
629
630 NodeList term_equivalent_terms_nodelist = term_element.getElementsByTagName("equivTermList");
631 for (int j = 0; j < term_equivalent_terms_nodelist.getLength(); j++) {
632 Element term_equivalent_terms_element = (Element) term_equivalent_terms_nodelist.item(j);
633 String[] term_equivalent_terms = GSXML.getAttributeValuesFromList(term_equivalent_terms_element, GSXML.NAME_ATT);
634 for (int k = 0; k < term_equivalent_terms.length; k++) {
635 System.err.println("Adding query term variant: " + term_equivalent_terms[k]);
636 phrase_query_p_term_x_variants.add(term_equivalent_terms[k]);
637 }
638 }
639 phrase_query_p_term_variants_list.add(phrase_query_p_term_x_variants);
640
641 if (in_phrase == false) {
642 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
643 phrase_query_p_term_variants_list = new ArrayList();
644 }
645 }
646 }
647 // Watch for phrases (surrounded by quotes)
648 if (character == '\"') {
649 // Has a phrase just started?
650 if (in_phrase == false) {
651 in_phrase = true;
652 }
653 // Or has a phrase just finished?
654 else if (in_phrase == true) {
655 in_phrase = false;
656 phrase_query_term_variants_hierarchy.add(phrase_query_p_term_variants_list);
657 }
658
659 phrase_query_p_term_variants_list = new ArrayList();
660 }
661 }
662
663 return highlightQueryTermsInternal(content, query_term_variants, phrase_query_term_variants_hierarchy);
664 }
665
666
667 /**
668 * Highlights query terms in a piece of text.
669 */
670 private Element highlightQueryTermsInternal(String content, HashSet query_term_variants, ArrayList phrase_query_term_variants_hierarchy)
671 {
672 // Convert the content string to an array of characters for speed
673 char[] content_characters = new char[content.length()];
674 content.getChars(0, content.length(), content_characters, 0);
675
676 // Now skim through the content, identifying word matches
677 ArrayList word_matches = new ArrayList();
678 int word_start = 0;
679 boolean in_word = false;
680 boolean preceding_word_matched = false;
681 for (int i = 0; i < content_characters.length; i++) {
682 boolean is_character_letter_or_digit = Character.isLetterOrDigit(content_characters[i]);
683
684 // Has a word just started?
685 if (in_word == false && is_character_letter_or_digit == true) {
686 in_word = true;
687 word_start = i;
688 }
689
690 // Or has a word just finished?
691 else if (in_word == true && is_character_letter_or_digit == false) {
692 in_word = false;
693
694 // Check if the word matches any of the query term equivalents
695 String word = new String(content_characters, word_start, (i - word_start));
696 if (query_term_variants.contains(word)) {
697 // We have found a matching word, so remember its location
698 word_matches.add(new WordMatch(word, word_start, i, preceding_word_matched));
699 preceding_word_matched = true;
700 }
701 else {
702 preceding_word_matched = false;
703 }
704 }
705 }
706
707 // Don't forget the last word...
708 if (in_word == true) {
709 // Check if the word matches any of the query term equivalents
710 String word = new String(content_characters, word_start, (content_characters.length - word_start));
711 if (query_term_variants.contains(word)) {
712 // We have found a matching word, so remember its location
713 word_matches.add(new WordMatch(word, word_start, content_characters.length, preceding_word_matched));
714 }
715 }
716
717 ArrayList highlight_start_positions = new ArrayList();
718 ArrayList highlight_end_positions = new ArrayList();
719
720 // Deal with phrases now
721 ArrayList partial_phrase_matches = new ArrayList();
722 for (int i = 0; i < word_matches.size(); i++) {
723 WordMatch word_match = (WordMatch) word_matches.get(i);
724
725 // See if any partial phrase matches are extended by this word
726 if (word_match.preceding_word_matched) {
727 for (int j = partial_phrase_matches.size() - 1; j >= 0; j--) {
728 PartialPhraseMatch partial_phrase_match = (PartialPhraseMatch) partial_phrase_matches.remove(j);
729 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(partial_phrase_match.query_phrase_number);
730 HashSet phrase_query_p_term_x_variants = (HashSet) phrase_query_p_term_variants_list.get(partial_phrase_match.num_words_matched);
731 if (phrase_query_p_term_x_variants.contains(word_match.word)) {
732 partial_phrase_match.num_words_matched++;
733
734 // Has a complete phrase match occurred?
735 if (partial_phrase_match.num_words_matched == phrase_query_p_term_variants_list.size()) {
736 // Check for overlaps by looking at the previous highlight range
737 if (!highlight_end_positions.isEmpty()) {
738 int last_highlight_index = highlight_end_positions.size() - 1;
739 int last_highlight_end = ((Integer) highlight_end_positions.get(last_highlight_index)).intValue();
740 if (last_highlight_end > partial_phrase_match.start_position) {
741 // There is an overlap, so remove the previous phrase match
742 int last_highlight_start = ((Integer) highlight_start_positions.remove(last_highlight_index)).intValue();
743 highlight_end_positions.remove(last_highlight_index);
744 partial_phrase_match.start_position = last_highlight_start;
745 }
746 }
747
748 highlight_start_positions.add(new Integer(partial_phrase_match.start_position));
749 highlight_end_positions.add(new Integer(word_match.end_position));
750 }
751 // No, but add the partial match back into the list for next time
752 else {
753 partial_phrase_matches.add(partial_phrase_match);
754 }
755 }
756 }
757 }
758 else {
759 partial_phrase_matches.clear();
760 }
761
762 // See if this word is at the start of any of the phrases
763 for (int p = 0; p < phrase_query_term_variants_hierarchy.size(); p++) {
764 ArrayList phrase_query_p_term_variants_list = (ArrayList) phrase_query_term_variants_hierarchy.get(p);
765 HashSet phrase_query_p_term_1_variants = (HashSet) phrase_query_p_term_variants_list.get(0);
766 if (phrase_query_p_term_1_variants.contains(word_match.word)) {
767 // If this phrase is just one word long, we have a complete match
768 if (phrase_query_p_term_variants_list.size() == 1) {
769 highlight_start_positions.add(new Integer(word_match.start_position));
770 highlight_end_positions.add(new Integer(word_match.end_position));
771 }
772 // Otherwise we have the start of a potential phrase match
773 else {
774 partial_phrase_matches.add(new PartialPhraseMatch(word_match.start_position, p));
775 }
776 }
777 }
778 }
779
780 // Now add the annotation tags into the document at the correct points
781 Element content_element = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
782
783 int last_wrote = 0;
784 for (int i = 0; i < highlight_start_positions.size(); i++) {
785 int highlight_start = ((Integer) highlight_start_positions.get(i)).intValue();
786 int highlight_end = ((Integer) highlight_end_positions.get(i)).intValue();
787
788 // Print anything before the highlight range
789 if (last_wrote < highlight_start) {
790 String preceding_text = new String(content_characters, last_wrote, (highlight_start - last_wrote));
791 // System.err.print(preceding_text);
792 content_element.appendChild(this.doc.createTextNode(preceding_text));
793 }
794
795 // Print the highlight text, annotated
796 if (highlight_end > last_wrote) {
797 String highlight_text = new String(content_characters, highlight_start, (highlight_end - highlight_start));
798 // System.err.print("|" + highlight_text + "|");
799 Element annotation_element = GSXML.createTextElement(this.doc, "annotation", highlight_text);
800 annotation_element.setAttribute("type", "query_term");
801 content_element.appendChild(annotation_element);
802 last_wrote = highlight_end;
803 }
804 }
805
806 // Finish off any unwritten text
807 if (last_wrote < content_characters.length) {
808 String remaining_text = new String(content_characters, last_wrote, (content_characters.length - last_wrote));
809 // System.err.print(remaining_text);
810 content_element.appendChild(this.doc.createTextNode(remaining_text));
811 }
812
813 return content_element;
814 }
815
816
817 static private class WordMatch
818 {
819 public String word;
820 public int start_position;
821 public int end_position;
822 public boolean preceding_word_matched;
823
824 public WordMatch(String word, int start_position, int end_position, boolean preceding_word_matched)
825 {
826 this.word = word;
827 this.start_position = start_position;
828 this.end_position = end_position;
829 this.preceding_word_matched = preceding_word_matched;
830 }
831 }
832
833
834 static private class PartialPhraseMatch
835 {
836 public int start_position;
837 public int query_phrase_number;
838 public int num_words_matched;
839
840 public PartialPhraseMatch(int start_position, int query_phrase_number)
841 {
842 this.start_position = start_position;
843 this.query_phrase_number = query_phrase_number;
844 this.num_words_matched = 1;
845 }
846 }
847}
Note: See TracBrowser for help on using the repository browser.