source: trunk/gsdl3/src/java/org/greenstone/gsdl3/action/DocumentAction.java@ 4980

Last change on this file since 4980 was 4875, checked in by kjdon, 21 years ago

the goto form uses gp arg - this is the gotopage arg. if present we modify the document_name using the ss extension: docnum.pagenum.ss

  • Property svn:keywords set to Author Date Id Revision
File size: 23.3 KB
Line 
1/*
2 * DocumentAction.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.action;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.ModuleInterface;
23import org.greenstone.gsdl3.util.*;
24
25// XML classes
26import org.w3c.dom.Document;
27import org.w3c.dom.Element;
28import org.w3c.dom.Node;
29import org.w3c.dom.Text;
30import org.w3c.dom.NodeList;
31
32// General Java classes
33import java.util.HashMap;
34import java.util.HashSet;
35import java.io.File;
36
37
38/** Action class for retrieving Documents via the message router
39 */
40public class DocumentAction extends Action {
41
42 // this is used to specify that the sibling nodes of a selected one should be obtained
43 public static final String SIBLING_ARG = "sib";
44 public static final String DOC_TYPE_ARG = "dt";
45 public static final String GOTO_PAGE_ARG = "gp";
46
47 /** if this is set to true, when a document is displayed, any annotation
48 * type services (enrich) will be offered to the user as well */
49 protected static final boolean provide_annotations = false; //true;
50
51 public Element process (Element message)
52 {
53 // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
54
55 // the response
56 Element result = doc_.createElement(GSXML.MESSAGE_ELEM);
57 Element page_response = doc_.createElement(GSXML.RESPONSE_ELEM);
58 result.appendChild(page_response);
59
60 // get the request - assume only one
61 Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
62 Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
63 HashMap params = GSXML.extractParams(cgi_paramList, false);
64
65 String collection = (String) params.get(GSCGI.COLLECTION_ARG);
66 String lang = request.getAttribute(GSXML.LANG_ATT);
67 String document_name = (String) params.get(GSCGI.DOCUMENT_ARG);
68 if (document_name == null || document_name.equals("")) {
69 System.err.println("DocumentAction Error: no document specified!");
70 return result;
71 }
72 String document_type = (String) params.get(DOC_TYPE_ARG);
73 if (document_type == null) {
74 document_type = "simple";
75 }
76 //whether to retrieve siblings or not
77 boolean get_siblings = false;
78 String sibs = (String) params.get(SIBLING_ARG);
79 if (sibs != null && sibs.equals("1")) {
80 get_siblings = true;
81 }
82
83 String sibling_num = (String) params.get(GOTO_PAGE_ARG);
84 if (sibling_num != null && !sibling_num.equals("")) {
85 // we have to modify the doc name
86 document_name = document_name+"."+sibling_num+".ss";
87 }
88 // get the additional data needed for the page
89 getBackgroundData(page_response, collection, lang);
90 Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
91
92 // the_document is where all the doc info - structure and metadata etc
93 // is added into, to be returned in the page
94 Element the_document = doc_.createElement(GSXML.DOCUMENT_ELEM);
95 page_response.appendChild(the_document);
96
97 // set the doctype from the cgi arg as an attribute
98 the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
99
100 // create a basic doc list containing the current node
101 Element basic_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
102 Element current_doc = doc_.createElement(GSXML.DOC_NODE_ELEM);
103 basic_doc_list.appendChild(current_doc);
104 current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
105
106 // Create a parameter list to specify the required structure information
107 Element ds_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
108
109 Element ds_param = null;
110 boolean get_structure = false;
111 boolean get_structure_info = false;
112 if (document_type.equals("paged")) {
113 get_structure_info = true;
114 // get teh info needed for paged naviagtion
115 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
116 ds_param_list.appendChild(ds_param);
117 ds_param.setAttribute(GSXML.NAME_ATT, "info");
118 ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
119 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
120 ds_param_list.appendChild(ds_param);
121 ds_param.setAttribute(GSXML.NAME_ATT, "info");
122 ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
123 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
124 ds_param_list.appendChild(ds_param);
125 ds_param.setAttribute(GSXML.NAME_ATT, "info");
126 ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
127
128 } else if (document_type.equals("hierarchy")){
129 get_structure = true;
130 // get the info needed for table of contents
131 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
132 ds_param_list.appendChild(ds_param);
133 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
134 ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
135 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
136 ds_param_list.appendChild(ds_param);
137 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
138 ds_param.setAttribute(GSXML.VALUE_ATT, "children");
139 if (get_siblings) {
140 ds_param = doc_.createElement(GSXML.PARAM_ELEM);
141 ds_param_list.appendChild(ds_param);
142 ds_param.setAttribute(GSXML.NAME_ATT, "structure");
143 ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
144 }
145 } else {
146 // we dont need any structure
147 }
148
149 boolean has_dummy = false;
150 if (get_structure || get_structure_info) {
151
152 // Build a request to obtain the document structure
153 Element ds_message = doc_.createElement(GSXML.MESSAGE_ELEM);
154 String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
155 Element ds_request = GSXML.createBasicRequest(doc_,GSXML.REQUEST_TYPE_PROCESS, to, lang);
156 ds_message.appendChild(ds_request);
157 ds_request.appendChild(ds_param_list);
158
159 // create a doc_node_list and put in the doc_node that we are interested in
160 ds_request.appendChild(basic_doc_list);
161
162 // Process the document structure retrieve message
163 Element ds_response_message = (Element) mr_.process(ds_message);
164
165 // get the info and print out
166 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
167 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
168 path = GSPath.appendLink(path, "nodeStructureInfo");
169 Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
170 // get the doc_node bit
171 if (ds_response_struct_info != null) {
172 the_document.appendChild(doc_.importNode(ds_response_struct_info, true));
173 }
174 path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
175 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
176 path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
177 Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
178
179 if (ds_response_structure != null) {
180 // add the contents of the structure bit into the_document
181 NodeList structs = ds_response_structure.getChildNodes();
182 for (int i=0; i<structs.getLength();i++) {
183 the_document.appendChild(doc_.importNode(structs.item(i), true));
184 }
185 } else {
186 // no structure nodes, so put in a dummy doc node
187 Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
188 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
189 the_document.appendChild(doc_node);
190 has_dummy = true;
191 }
192 } else { // a simple type - we dont have a dummy node for simple
193 // should think about this more
194 // no structure request, so just put in a dummy doc node
195 Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
196 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
197 the_document.appendChild(doc_node);
198 has_dummy = true;
199 }
200
201 // Build a request to obtain some document metadata
202 Element dm_message = doc_.createElement(GSXML.MESSAGE_ELEM);
203 String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
204 Element dm_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
205 dm_message.appendChild(dm_request);
206 // Create a parameter list to specify the required metadata information
207
208 HashSet meta_names = new HashSet();
209 meta_names.add("Title"); // the default
210 if (format_elem != null) {
211 extractMetadataNames(format_elem, meta_names);
212 }
213
214 Element dm_param_list = createMetadataParamList(meta_names);
215 dm_request.appendChild(dm_param_list);
216
217 // create the doc node list for the metadata request
218 Element dm_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
219 dm_request.appendChild(dm_doc_list);
220
221 // Add each node from the structure response into the metadata request
222 NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
223 for (int i = 0; i < doc_nodes.getLength(); i++) {
224 Element doc_node = (Element) doc_nodes.item(i);
225 String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
226
227 // Add the documentNode to the list
228 Element dm_doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
229 dm_doc_list.appendChild(dm_doc_node);
230 dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
231 dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
232 doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
233 }
234
235 // we also want a metadata request to the top level document to get
236 // assocfilepath - this could be cached too
237 Element doc_meta_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
238 dm_message.appendChild(doc_meta_request);
239 Element doc_meta_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
240 doc_meta_request.appendChild(doc_meta_param_list);
241 Element doc_param = doc_.createElement(GSXML.PARAM_ELEM);
242 doc_meta_param_list.appendChild(doc_param);
243 doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
244 doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
245
246 // create the doc node list for the metadata request
247 Element doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
248 doc_meta_request.appendChild(doc_list);
249
250 Element doc_node = doc_.createElement(GSXML.DOC_NODE_ELEM);
251 // teh node we want is the root document node
252 doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
253 doc_list.appendChild(doc_node);
254 Element dm_response_message = (Element) mr_.process(dm_message);
255
256 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
257 Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
258
259 // Merge the metadata with the structure information
260 NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
261 for (int i = 0; i < doc_nodes.getLength(); i++) {
262 GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
263 }
264 // get teh top level doc metadata out
265 Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
266 Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
267 the_document.appendChild(doc_.importNode(doc_meta_list, true));
268
269 // Build a request to obtain some document content
270 Element dc_message = doc_.createElement(GSXML.MESSAGE_ELEM);
271 to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
272 Element dc_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
273 dc_message.appendChild(dc_request);
274
275
276 // Create a parameter list to specify the request parameters - empty for now
277 Element dc_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
278 dc_request.appendChild(dc_param_list);
279
280 // the doc list for the content request is the same as the one for the structure request
281 dc_request.appendChild(basic_doc_list);
282
283 Element dc_response_message = (Element) mr_.process(dc_message);
284 path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
285 Element dc_response_doc = (Element) GSXML.getNodeByPath(dc_response_message, path);
286 Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
287
288
289 boolean highlight_query_terms = true;
290 if (highlight_query_terms) {
291 dc_response_doc.removeChild(dc_response_doc_content);
292
293 dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
294 dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
295 }
296 if (provide_annotations) {
297 // now we can modifiy the response doc if needed
298 String enrich_service = (String)params.get(GSCGI.SERVICE_ARG);
299 if (enrich_service != null && !enrich_service.equals("")) {
300 // send a message to the service
301 Element enrich_message = doc_.createElement(GSXML.MESSAGE_ELEM);
302 Element enrich_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang);
303 enrich_message.appendChild(enrich_request);
304 Element enrich_pl = getServiceParamList(cgi_paramList);
305 enrich_request.appendChild(enrich_pl);
306 Element e_doc_list = doc_.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
307 enrich_request.appendChild(e_doc_list);
308 e_doc_list.appendChild(doc_.importNode(dc_response_doc, true));
309
310 Element enrich_response = mr_.process(enrich_message);
311
312 String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
313 path = GSPath.createPath(links);
314 dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
315
316 }
317 }
318 // use the returned id rather than the sent one cos there may have
319 // been modifiers such as .pr that are removed.
320 String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
321 the_document.setAttribute("selectedNode", modified_doc_id);
322 if (has_dummy) {
323 // change the id if necessary and add the content
324 Element dummy_node = (Element)doc_nodes.item(0);
325
326 dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
327 dummy_node.appendChild(doc_.importNode(dc_response_doc_content, true));
328 // hack for simple type
329 if (document_type.equals("simple")) {
330 // we dont want the internal docNode, just want the content and metadata in the document
331 // rethink this!!
332 the_document.removeChild(dummy_node);
333
334 NodeList dummy_children = dummy_node.getChildNodes();
335 //for (int i=0; i<dummy_children.getLength(); i++) {
336 for (int i=dummy_children.getLength()-1; i>=0; i--) {
337 the_document.appendChild(dummy_children.item(i));
338
339 }
340 }
341 } else {
342 // Merge the document content with the metadata and structure information
343 for (int i = 0; i < doc_nodes.getLength(); i++) {
344 Node dn = doc_nodes.item(i);
345 String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
346 if (dn_id.equals(modified_doc_id)) {
347 dn.appendChild(doc_.importNode(dc_response_doc_content, true));
348 break;
349 }
350 }
351 }
352
353 ///ystem.out.println("(DocumentAction) Page:\n" + converter_.getPrettyString(result));
354 return result;
355 }
356
357 protected Element getServiceParamList(Element cgi_param_list) {
358
359 Element new_param_list = doc_.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
360 Element param;
361 NodeList cgi_params = cgi_param_list.getChildNodes();
362 for (int i=0; i<cgi_params.getLength(); i++) {
363 Element p = (Element) cgi_params.item(i);
364 String name = p.getAttribute(GSXML.NAME_ATT);
365 if (name.equals(GSCGI.SERVICE_ARG) || name.equals(GSCGI.REQUEST_TYPE_ARG) || name.equals(GSCGI.CLUSTER_ARG) || name.equals(GSCGI.DOCUMENT_ARG) || name.equals(SIBLING_ARG) ) {
366 continue;
367 }
368 // esle add it in to the list
369 new_param_list.appendChild(doc_.importNode(p, true));
370 }
371 return new_param_list;
372 }
373
374 /** this method gets the collection description, the format info, the
375 * list of enrich services, etc - stuff that is needed for the page,
376 * but is the same whatever the query is - should be cached */
377 protected boolean getBackgroundData(Element page_response,
378 String collection, String lang) {
379
380 // create a message to process - contains requests for the collection
381 // description, the format element, the enrich services on offer
382 // these could all be cached
383 Element info_message = doc_.createElement(GSXML.MESSAGE_ELEM);
384 String path = GSPath.appendLink(collection, "DocumentMetadataRetrieve");
385 // the format request - ignore for now, where does this request go to??
386 Element format_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_FORMAT, path, lang);
387 info_message.appendChild(format_request);
388
389 // the enrich_services request - only do this if provide_annotations is true
390
391 if (provide_annotations) {
392 Element enrich_services_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_DESCRIBE, "", lang);
393 enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
394 info_message.appendChild(enrich_services_request);
395 }
396
397 Element info_response = (Element)mr_.process(info_message);
398
399 // the collection is the first response
400 NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
401 Element format_resp = (Element) responses.item(0);
402
403 Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
404 if (format_elem != null) {
405 ///ystem.out.println("doc action found a format statement");
406 // set teh format type
407 format_elem.setAttribute(GSXML.TYPE_ATT, "display");
408 page_response.appendChild(doc_.importNode(format_elem, true));
409 }
410
411 if (provide_annotations) {
412 Element services_resp = (Element)responses.item(1);
413
414 // a new message for the mr
415 Element enrich_message = doc_.createElement(GSXML.MESSAGE_ELEM);
416
417 NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
418 boolean service_found = false;
419 for (int j=0; j<e_services.getLength(); j++) {
420 if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
421 Element s = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang);
422 enrich_message.appendChild(s);
423 service_found = true;
424 }
425 }
426 if (service_found) {
427 Element enrich_response = mr_.process(enrich_message);
428
429 NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
430 Element service_list = doc_.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
431 for (int i=0; i<e_responses.getLength(); i++) {
432 Element e_resp = (Element)e_responses.item(i);
433 Element e_service = (Element)doc_.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
434 e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
435 service_list.appendChild(e_service);
436 }
437 page_response.appendChild(service_list);
438 }
439 } // if provide_annotations
440 return true;
441
442 }
443
444 /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
445 */
446 protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
447
448 // do the query again to get term info
449 Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
450 HashMap params = GSXML.extractParams(cgi_param_list, false);
451
452 String service_name = (String)params.get(GSCGI.SERVICE_ARG);
453 if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
454 return dc_response_doc_content;
455 }
456 String collection = (String)params.get(GSCGI.COLLECTION_ARG);
457 String lang = request.getAttribute(GSXML.LANG_ATT);
458 String to = GSPath.appendLink(collection, service_name);
459
460 Element mr_query_message = doc_.createElement(GSXML.MESSAGE_ELEM);
461 Element mr_query_request = GSXML.createBasicRequest(doc_, GSXML.REQUEST_TYPE_PROCESS, to, lang);
462 mr_query_message.appendChild(mr_query_request);
463
464 // paramList
465 Element query_param_list = (Element)doc_.importNode(cgi_param_list, true);
466 mr_query_request.appendChild(query_param_list);
467
468 // do the query
469 Element mr_query_response = (Element)mr_.process(mr_query_message);
470
471 String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
472 Element query_term_info_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
473 if (query_term_info_list == null) {
474 // no term info
475 System.err.println("DocumentAction: Warning: No query term information.\n");
476 return dc_response_doc_content;
477 }
478
479 NodeList equivs = query_term_info_list.getElementsByTagName("equivTermList");
480 HashSet all_terms = new HashSet();
481 for (int i=0; i<equivs.getLength(); i++) {
482
483 // get the terms
484 String [] terms = GSXML.getAttributeValuesFromList((Element)equivs.item(i), GSXML.NAME_ATT);
485 for (int j=0; j<terms.length; j++) {
486
487 all_terms.add(terms[j]);
488 }
489 }
490
491 Element new_content_elem = doc_.createElement(GSXML.NODE_CONTENT_ELEM);
492
493 String content = GSXML.getNodeText(dc_response_doc_content);
494
495 StringBuffer temp = new StringBuffer();
496 StringBuffer temp_content = new StringBuffer();
497
498 for (int i=0; i<content.length(); i++) {
499 char c = content.charAt(i);
500 if (Character.isLetterOrDigit(c)) {
501 // not word boundary
502 temp.append(c);
503 } else {
504 // word boundary
505 // add the last word if there was one
506 if (temp.length()>0) {
507 if (all_terms.contains(temp.toString())) {
508 //if there is anything already present in temp_content, add it as a text node
509 Text t = doc_.createTextNode(temp_content.toString());
510 new_content_elem.appendChild(t);
511 temp_content.delete(0, temp_content.length());
512 Element annot = GSXML.createTextElement(doc_, "annotation", temp.toString());
513 annot.setAttribute("type", "query_term");
514 new_content_elem.appendChild(annot);
515 //new_content.append("<annotation type='query_term'>"+temp+"</annotation>");
516 } else {
517 temp_content.append(temp);
518 }
519 temp.delete(0, temp.length());
520 }
521 if (c=='<') {
522 temp_content.append(c);
523 i++;
524 // skip over html
525 while (i<content.length() && content.charAt(i)!='>') {
526 temp_content.append(content.charAt(i));
527 i++;
528 }
529 temp_content.append(content.charAt(i));
530 //temp_content.append(GSXML.xmlSafe(temp.toString()));
531 //temp.delete(0, temp.length());
532
533 } else {
534 temp_content.append(c);
535 }
536 }
537 }
538 // append anything left of temp_content and temp
539 Text t = doc_.createTextNode(temp_content.toString());
540 new_content_elem.appendChild(t);
541
542 if (temp.length() > 0) {
543 Element annot = GSXML.createTextElement(doc_, "annotation", temp.toString());
544 annot.setAttribute("type", "query_term");
545 new_content_elem.appendChild(annot);
546 }
547 //String content_string = "<nodeContent>"+new_content.toString()+"</nodeContent>";
548 //Element content_elem = converter_.getDOM(content_string).getDocumentElement();
549 return new_content_elem;
550 }
551}
Note: See TracBrowser for help on using the repository browser.