1 | /*
|
---|
2 | * DocumentAction.java
|
---|
3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
4 | *
|
---|
5 | * This program is free software; you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program; if not, write to the Free Software
|
---|
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
18 | */
|
---|
19 | package org.greenstone.gsdl3.action;
|
---|
20 |
|
---|
21 | // Greenstone classes
|
---|
22 | import org.greenstone.gsdl3.core.ModuleInterface;
|
---|
23 | import org.greenstone.gsdl3.util.*;
|
---|
24 |
|
---|
25 | // XML classes
|
---|
26 | import org.w3c.dom.Document;
|
---|
27 | import org.w3c.dom.Element;
|
---|
28 | import org.w3c.dom.Node;
|
---|
29 | import org.w3c.dom.Text;
|
---|
30 | import org.w3c.dom.NodeList;
|
---|
31 |
|
---|
32 | // General Java classes
|
---|
33 | import java.util.HashMap;
|
---|
34 | import java.util.HashSet;
|
---|
35 | import java.io.File;
|
---|
36 |
|
---|
37 |
|
---|
38 | /** Action class for retrieving Documents via the message router
|
---|
39 | */
|
---|
40 | public class DocumentAction extends Action {
|
---|
41 |
|
---|
42 | // this is used to specify that the sibling nodes of a selected one should be obtained
|
---|
43 | public static final String SIBLING_ARG = "sib";
|
---|
44 | public static final String GOTO_PAGE_ARG = "gp";
|
---|
45 | public static final String ENRICH_DOC_ARG = "end";
|
---|
46 |
|
---|
47 | /** if this is set to true, when a document is displayed, any annotation
|
---|
48 | * type services (enrich) will be offered to the user as well */
|
---|
49 | protected static final boolean provide_annotations = false; //true;
|
---|
50 |
|
---|
51 | public Element process (Element message)
|
---|
52 | {
|
---|
53 | // for now, no subaction eventually we may want to have subactions such as text assoc or something ?
|
---|
54 |
|
---|
55 | // the response
|
---|
56 | Element result = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
57 | Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM);
|
---|
58 | result.appendChild(page_response);
|
---|
59 |
|
---|
60 | // get the request - assume only one
|
---|
61 | Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM);
|
---|
62 | Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
63 | HashMap params = GSXML.extractParams(cgi_paramList, false);
|
---|
64 |
|
---|
65 | String collection = (String) params.get(GSParams.COLLECTION);
|
---|
66 | String lang = request.getAttribute(GSXML.LANG_ATT);
|
---|
67 | String uid = request.getAttribute(GSXML.USER_ID_ATT);
|
---|
68 | String document_name = (String) params.get(GSParams.DOCUMENT);
|
---|
69 | if (document_name == null || document_name.equals("")) {
|
---|
70 | System.err.println("DocumentAction Error: no document specified!");
|
---|
71 | return result;
|
---|
72 | }
|
---|
73 | String document_type = (String) params.get(GSParams.DOCUMENT_TYPE);
|
---|
74 | if (document_type == null) {
|
---|
75 | document_type = "simple";
|
---|
76 | }
|
---|
77 | //whether to retrieve siblings or not
|
---|
78 | boolean get_siblings = false;
|
---|
79 | String sibs = (String) params.get(SIBLING_ARG);
|
---|
80 | if (sibs != null && sibs.equals("1")) {
|
---|
81 | get_siblings = true;
|
---|
82 | }
|
---|
83 |
|
---|
84 | String sibling_num = (String) params.get(GOTO_PAGE_ARG);
|
---|
85 | if (sibling_num != null && !sibling_num.equals("")) {
|
---|
86 | // we have to modify the doc name
|
---|
87 | document_name = document_name+"."+sibling_num+".ss";
|
---|
88 | }
|
---|
89 |
|
---|
90 | boolean expand_document = false;
|
---|
91 | String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT);
|
---|
92 | if (ed_arg != null && ed_arg.equals("1")) {
|
---|
93 | expand_document = true;
|
---|
94 | }
|
---|
95 |
|
---|
96 |
|
---|
97 | boolean expand_contents = false;
|
---|
98 | if (expand_document) { // we always expand the contents with the text
|
---|
99 | expand_contents = true;
|
---|
100 | } else {
|
---|
101 | String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS);
|
---|
102 | if (ec_arg != null && ec_arg.equals("1")) {
|
---|
103 | expand_contents = true;
|
---|
104 | }
|
---|
105 | }
|
---|
106 | // get the additional data needed for the page
|
---|
107 | getBackgroundData(page_response, collection, lang, uid);
|
---|
108 | Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM);
|
---|
109 |
|
---|
110 | // the_document is where all the doc info - structure and metadata etc
|
---|
111 | // is added into, to be returned in the page
|
---|
112 | Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM);
|
---|
113 | page_response.appendChild(the_document);
|
---|
114 |
|
---|
115 | // set the doctype from the cgi arg as an attribute
|
---|
116 | the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type);
|
---|
117 |
|
---|
118 | // create a basic doc list containing the current node
|
---|
119 | Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
120 | Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
121 | basic_doc_list.appendChild(current_doc);
|
---|
122 | current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name);
|
---|
123 |
|
---|
124 | // Create a parameter list to specify the required structure information
|
---|
125 | Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
126 |
|
---|
127 | Element ds_param = null;
|
---|
128 | boolean get_structure = false;
|
---|
129 | boolean get_structure_info = false;
|
---|
130 | if (document_type.equals("paged")) {
|
---|
131 | get_structure_info = true;
|
---|
132 | // get teh info needed for paged naviagtion
|
---|
133 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
134 | ds_param_list.appendChild(ds_param);
|
---|
135 | ds_param.setAttribute(GSXML.NAME_ATT, "info");
|
---|
136 | ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings");
|
---|
137 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
138 | ds_param_list.appendChild(ds_param);
|
---|
139 | ds_param.setAttribute(GSXML.NAME_ATT, "info");
|
---|
140 | ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren");
|
---|
141 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
142 | ds_param_list.appendChild(ds_param);
|
---|
143 | ds_param.setAttribute(GSXML.NAME_ATT, "info");
|
---|
144 | ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition");
|
---|
145 |
|
---|
146 | } else if (document_type.equals("hierarchy")){
|
---|
147 | get_structure = true;
|
---|
148 | if (expand_contents) {
|
---|
149 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
150 | ds_param_list.appendChild(ds_param);
|
---|
151 | ds_param.setAttribute(GSXML.NAME_ATT, "structure");
|
---|
152 | ds_param.setAttribute(GSXML.VALUE_ATT, "entire");
|
---|
153 | } else {
|
---|
154 | // get the info needed for table of contents
|
---|
155 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
156 | ds_param_list.appendChild(ds_param);
|
---|
157 | ds_param.setAttribute(GSXML.NAME_ATT, "structure");
|
---|
158 | ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors");
|
---|
159 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
160 | ds_param_list.appendChild(ds_param);
|
---|
161 | ds_param.setAttribute(GSXML.NAME_ATT, "structure");
|
---|
162 | ds_param.setAttribute(GSXML.VALUE_ATT, "children");
|
---|
163 | if (get_siblings) {
|
---|
164 | ds_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
165 | ds_param_list.appendChild(ds_param);
|
---|
166 | ds_param.setAttribute(GSXML.NAME_ATT, "structure");
|
---|
167 | ds_param.setAttribute(GSXML.VALUE_ATT, "siblings");
|
---|
168 | }
|
---|
169 | }
|
---|
170 | } else {
|
---|
171 | // we dont need any structure
|
---|
172 | }
|
---|
173 |
|
---|
174 | boolean has_dummy = false;
|
---|
175 | if (get_structure || get_structure_info) {
|
---|
176 |
|
---|
177 | // Build a request to obtain the document structure
|
---|
178 | Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
179 | String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired?
|
---|
180 | Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
181 | ds_message.appendChild(ds_request);
|
---|
182 | ds_request.appendChild(ds_param_list);
|
---|
183 |
|
---|
184 | // create a doc_node_list and put in the doc_node that we are interested in
|
---|
185 | ds_request.appendChild(basic_doc_list);
|
---|
186 |
|
---|
187 | // Process the document structure retrieve message
|
---|
188 | Element ds_response_message = (Element) this.mr.process(ds_message);
|
---|
189 |
|
---|
190 | // get the info and print out
|
---|
191 | String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
192 | path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
|
---|
193 | path = GSPath.appendLink(path, "nodeStructureInfo");
|
---|
194 | Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path);
|
---|
195 | // get the doc_node bit
|
---|
196 | if (ds_response_struct_info != null) {
|
---|
197 | the_document.appendChild(this.doc.importNode(ds_response_struct_info, true));
|
---|
198 | }
|
---|
199 | path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
200 | path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
|
---|
201 | path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM);
|
---|
202 | Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path);
|
---|
203 |
|
---|
204 | if (ds_response_structure != null) {
|
---|
205 | // add the contents of the structure bit into the_document
|
---|
206 | NodeList structs = ds_response_structure.getChildNodes();
|
---|
207 | for (int i=0; i<structs.getLength();i++) {
|
---|
208 | the_document.appendChild(this.doc.importNode(structs.item(i), true));
|
---|
209 | }
|
---|
210 | } else {
|
---|
211 | // no structure nodes, so put in a dummy doc node
|
---|
212 | Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
213 | doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
|
---|
214 | the_document.appendChild(doc_node);
|
---|
215 | has_dummy = true;
|
---|
216 | }
|
---|
217 | } else { // a simple type - we dont have a dummy node for simple
|
---|
218 | // should think about this more
|
---|
219 | // no structure request, so just put in a dummy doc node
|
---|
220 | Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
221 | doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name);
|
---|
222 | the_document.appendChild(doc_node);
|
---|
223 | has_dummy = true;
|
---|
224 | }
|
---|
225 |
|
---|
226 | // Build a request to obtain some document metadata
|
---|
227 | Element dm_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
228 | String to = GSPath.appendLink(collection, "DocumentMetadataRetrieve"); // Hard-wired?
|
---|
229 | Element dm_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
230 | dm_message.appendChild(dm_request);
|
---|
231 | // Create a parameter list to specify the required metadata information
|
---|
232 |
|
---|
233 | HashSet meta_names = new HashSet();
|
---|
234 | meta_names.add("Title"); // the default
|
---|
235 | if (format_elem != null) {
|
---|
236 | extractMetadataNames(format_elem, meta_names);
|
---|
237 | }
|
---|
238 |
|
---|
239 | Element dm_param_list = createMetadataParamList(meta_names);
|
---|
240 | dm_request.appendChild(dm_param_list);
|
---|
241 |
|
---|
242 | // create the doc node list for the metadata request
|
---|
243 | Element dm_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
244 | dm_request.appendChild(dm_doc_list);
|
---|
245 |
|
---|
246 | // Add each node from the structure response into the metadata request
|
---|
247 | NodeList doc_nodes = the_document.getElementsByTagName(GSXML.DOC_NODE_ELEM);
|
---|
248 | for (int i = 0; i < doc_nodes.getLength(); i++) {
|
---|
249 | Element doc_node = (Element) doc_nodes.item(i);
|
---|
250 | String doc_node_id = doc_node.getAttribute(GSXML.NODE_ID_ATT);
|
---|
251 |
|
---|
252 | // Add the documentNode to the list
|
---|
253 | Element dm_doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
254 | dm_doc_list.appendChild(dm_doc_node);
|
---|
255 | dm_doc_node.setAttribute(GSXML.NODE_ID_ATT, doc_node_id);
|
---|
256 | dm_doc_node.setAttribute(GSXML.NODE_TYPE_ATT,
|
---|
257 | doc_node.getAttribute(GSXML.NODE_TYPE_ATT));
|
---|
258 | }
|
---|
259 |
|
---|
260 | // we also want a metadata request to the top level document to get
|
---|
261 | // assocfilepath - this could be cached too
|
---|
262 | Element doc_meta_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
263 | dm_message.appendChild(doc_meta_request);
|
---|
264 | Element doc_meta_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
265 | doc_meta_request.appendChild(doc_meta_param_list);
|
---|
266 | Element doc_param = this.doc.createElement(GSXML.PARAM_ELEM);
|
---|
267 | doc_meta_param_list.appendChild(doc_param);
|
---|
268 | doc_param.setAttribute(GSXML.NAME_ATT, "metadata");
|
---|
269 | doc_param.setAttribute(GSXML.VALUE_ATT, "archivedir");
|
---|
270 |
|
---|
271 | // create the doc node list for the metadata request
|
---|
272 | Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
273 | doc_meta_request.appendChild(doc_list);
|
---|
274 |
|
---|
275 | Element doc_node = this.doc.createElement(GSXML.DOC_NODE_ELEM);
|
---|
276 | // teh node we want is the root document node
|
---|
277 | doc_node.setAttribute(GSXML.NODE_ID_ATT, document_name+".rt");
|
---|
278 | doc_list.appendChild(doc_node);
|
---|
279 | Element dm_response_message = (Element) this.mr.process(dm_message);
|
---|
280 |
|
---|
281 | String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
282 | Element dm_response_doc_list = (Element) GSXML.getNodeByPath(dm_response_message, path);
|
---|
283 |
|
---|
284 | // Merge the metadata with the structure information
|
---|
285 | NodeList dm_response_docs = dm_response_doc_list.getChildNodes();
|
---|
286 | for (int i = 0; i < doc_nodes.getLength(); i++) {
|
---|
287 | GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
|
---|
288 | }
|
---|
289 | // get teh top level doc metadata out
|
---|
290 | Element doc_meta_response = (Element)dm_response_message.getElementsByTagName(GSXML.RESPONSE_ELEM).item(1);
|
---|
291 | Element doc_meta_list = (Element)GSXML.getNodeByPath(doc_meta_response, "documentNodeList/documentNode/metadataList");
|
---|
292 | if (doc_meta_list != null) {
|
---|
293 | the_document.appendChild(this.doc.importNode(doc_meta_list, true));
|
---|
294 | }
|
---|
295 | // Build a request to obtain some document content
|
---|
296 | Element dc_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
297 | to = GSPath.appendLink(collection, "DocumentContentRetrieve"); // Hard-wired?
|
---|
298 | Element dc_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
299 | dc_message.appendChild(dc_request);
|
---|
300 |
|
---|
301 |
|
---|
302 | // Create a parameter list to specify the request parameters - empty for now
|
---|
303 | Element dc_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
304 | dc_request.appendChild(dc_param_list);
|
---|
305 |
|
---|
306 | // the doc list for the content request is the same as the one for the structure request unless we want the whole document, in which case its the same as for the metadata request.
|
---|
307 | if (expand_document) {
|
---|
308 | dc_request.appendChild(dm_doc_list);
|
---|
309 | } else {
|
---|
310 | dc_request.appendChild(basic_doc_list);
|
---|
311 | }
|
---|
312 | Element dc_response_message = (Element) this.mr.process(dc_message);
|
---|
313 | Element dc_response_doc_list = (Element) GSXML.getNodeByPath(dc_response_message, path);
|
---|
314 |
|
---|
315 | if (expand_document) {
|
---|
316 | // Merge the content with the structure information
|
---|
317 | NodeList dc_response_docs = dc_response_doc_list.getChildNodes();
|
---|
318 | for (int i = 0; i < doc_nodes.getLength(); i++) {
|
---|
319 | doc_nodes.item(i).appendChild(this.doc.importNode(GSXML.getChildByTagName((Element)dc_response_docs.item(i), "nodeContent"), true));
|
---|
320 | //GSXML.mergeMetadataLists(doc_nodes.item(i), dm_response_docs.item(i));
|
---|
321 | }
|
---|
322 | } else {
|
---|
323 |
|
---|
324 | //path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM);
|
---|
325 | Element dc_response_doc = (Element) GSXML.getChildByTagName(dc_response_doc_list, GSXML.DOC_NODE_ELEM);
|
---|
326 | Element dc_response_doc_content = (Element) GSXML.getChildByTagName(dc_response_doc, GSXML.NODE_CONTENT_ELEM);
|
---|
327 |
|
---|
328 |
|
---|
329 | boolean highlight_query_terms = true;
|
---|
330 | if (highlight_query_terms) {
|
---|
331 | dc_response_doc.removeChild(dc_response_doc_content);
|
---|
332 |
|
---|
333 | dc_response_doc_content = highlightQueryTerms(request, dc_response_doc_content);
|
---|
334 | dc_response_doc.appendChild(dc_response_doc.getOwnerDocument().importNode(dc_response_doc_content, true));
|
---|
335 | }
|
---|
336 |
|
---|
337 |
|
---|
338 | if (provide_annotations) {
|
---|
339 | String service_selected = (String)params.get(ENRICH_DOC_ARG);
|
---|
340 | if (service_selected != null && service_selected.equals("1")) {
|
---|
341 | // now we can modifiy the response doc if needed
|
---|
342 | String enrich_service = (String)params.get(GSParams.SERVICE);
|
---|
343 | // send a message to the service
|
---|
344 | Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
345 | Element enrich_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, enrich_service, lang, uid);
|
---|
346 | enrich_message.appendChild(enrich_request);
|
---|
347 | // check for parameters
|
---|
348 | HashMap service_params = (HashMap)params.get("s1");
|
---|
349 | if (service_params != null) {
|
---|
350 | Element enrich_pl = GSXML.createParameterList(this.doc, service_params);
|
---|
351 | enrich_request.appendChild(enrich_pl);
|
---|
352 | }
|
---|
353 | Element e_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
|
---|
354 | enrich_request.appendChild(e_doc_list);
|
---|
355 | e_doc_list.appendChild(this.doc.importNode(dc_response_doc, true));
|
---|
356 |
|
---|
357 | Element enrich_response = this.mr.process(enrich_message);
|
---|
358 |
|
---|
359 | String [] links = {GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_CONTENT_ELEM};
|
---|
360 | path = GSPath.createPath(links);
|
---|
361 | dc_response_doc_content = (Element)GSXML.getNodeByPath(enrich_response, path);
|
---|
362 |
|
---|
363 | }
|
---|
364 | }
|
---|
365 |
|
---|
366 |
|
---|
367 | // use the returned id rather than the sent one cos there may have
|
---|
368 | // been modifiers such as .pr that are removed.
|
---|
369 | String modified_doc_id = dc_response_doc.getAttribute(GSXML.NODE_ID_ATT);
|
---|
370 | the_document.setAttribute("selectedNode", modified_doc_id);
|
---|
371 | if (has_dummy) {
|
---|
372 | // change the id if necessary and add the content
|
---|
373 | Element dummy_node = (Element)doc_nodes.item(0);
|
---|
374 |
|
---|
375 | dummy_node.setAttribute(GSXML.NODE_ID_ATT, modified_doc_id);
|
---|
376 | dummy_node.appendChild(this.doc.importNode(dc_response_doc_content, true));
|
---|
377 | // hack for simple type
|
---|
378 | if (document_type.equals("simple")) {
|
---|
379 | // we dont want the internal docNode, just want the content and metadata in the document
|
---|
380 | // rethink this!!
|
---|
381 | the_document.removeChild(dummy_node);
|
---|
382 |
|
---|
383 | NodeList dummy_children = dummy_node.getChildNodes();
|
---|
384 | //for (int i=0; i<dummy_children.getLength(); i++) {
|
---|
385 | for (int i=dummy_children.getLength()-1; i>=0; i--) {
|
---|
386 | the_document.appendChild(dummy_children.item(i));
|
---|
387 |
|
---|
388 | }
|
---|
389 | }
|
---|
390 | } else {
|
---|
391 | // Merge the document content with the metadata and structure information
|
---|
392 | for (int i = 0; i < doc_nodes.getLength(); i++) {
|
---|
393 | Node dn = doc_nodes.item(i);
|
---|
394 | String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT);
|
---|
395 | if (dn_id.equals(modified_doc_id)) {
|
---|
396 | dn.appendChild(this.doc.importNode(dc_response_doc_content, true));
|
---|
397 | break;
|
---|
398 | }
|
---|
399 | }
|
---|
400 | }
|
---|
401 | }
|
---|
402 | ///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result));
|
---|
403 | return result;
|
---|
404 | }
|
---|
405 |
|
---|
406 | /** tell the param class what its arguments are
|
---|
407 | * if an action has its own arguments, this should add them to the params
|
---|
408 | * object - particularly important for args that should not be saved */
|
---|
409 | public boolean getActionParameters(GSParams params) {
|
---|
410 | params.addParameter(GOTO_PAGE_ARG, false);
|
---|
411 | params.addParameter(ENRICH_DOC_ARG, false);
|
---|
412 | return true;
|
---|
413 | }
|
---|
414 |
|
---|
415 |
|
---|
416 | /** this method gets the collection description, the format info, the
|
---|
417 | * list of enrich services, etc - stuff that is needed for the page,
|
---|
418 | * but is the same whatever the query is - should be cached */
|
---|
419 | protected boolean getBackgroundData(Element page_response,
|
---|
420 | String collection, String lang,
|
---|
421 | String uid) {
|
---|
422 |
|
---|
423 | // create a message to process - contains requests for the collection
|
---|
424 | // description, the format element, the enrich services on offer
|
---|
425 | // these could all be cached
|
---|
426 | Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
427 | String path = GSPath.appendLink(collection, "DocumentContentRetrieve");
|
---|
428 | // the format request - ignore for now, where does this request go to??
|
---|
429 | Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid);
|
---|
430 | info_message.appendChild(format_request);
|
---|
431 |
|
---|
432 | // the enrich_services request - only do this if provide_annotations is true
|
---|
433 |
|
---|
434 | if (provide_annotations) {
|
---|
435 | Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid);
|
---|
436 | enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList");
|
---|
437 | info_message.appendChild(enrich_services_request);
|
---|
438 | }
|
---|
439 |
|
---|
440 | Element info_response = (Element)this.mr.process(info_message);
|
---|
441 |
|
---|
442 | // the collection is the first response
|
---|
443 | NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
|
---|
444 | Element format_resp = (Element) responses.item(0);
|
---|
445 |
|
---|
446 | Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM);
|
---|
447 | if (format_elem != null) {
|
---|
448 | ///ystem.out.println("doc action found a format statement");
|
---|
449 | // set teh format type
|
---|
450 | format_elem.setAttribute(GSXML.TYPE_ATT, "display");
|
---|
451 | page_response.appendChild(this.doc.importNode(format_elem, true));
|
---|
452 | }
|
---|
453 |
|
---|
454 | if (provide_annotations) {
|
---|
455 | Element services_resp = (Element)responses.item(1);
|
---|
456 |
|
---|
457 | // a new message for the mr
|
---|
458 | Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
459 |
|
---|
460 | NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM);
|
---|
461 | boolean service_found = false;
|
---|
462 | for (int j=0; j<e_services.getLength(); j++) {
|
---|
463 | if (((Element)e_services.item(j)).getAttribute(GSXML.TYPE_ATT).equals("enrich")) {
|
---|
464 | Element s = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, ((Element)e_services.item(j)).getAttribute(GSXML.NAME_ATT), lang, uid);
|
---|
465 | enrich_message.appendChild(s);
|
---|
466 | service_found = true;
|
---|
467 | }
|
---|
468 | }
|
---|
469 | if (service_found) {
|
---|
470 | Element enrich_response = this.mr.process(enrich_message);
|
---|
471 |
|
---|
472 | NodeList e_responses = enrich_response.getElementsByTagName(GSXML.RESPONSE_ELEM);
|
---|
473 | Element service_list = this.doc.createElement(GSXML.SERVICE_ELEM + GSXML.LIST_MODIFIER);
|
---|
474 | for (int i=0; i<e_responses.getLength(); i++) {
|
---|
475 | Element e_resp = (Element)e_responses.item(i);
|
---|
476 | Element e_service = (Element)this.doc.importNode(GSXML.getChildByTagName(e_resp, GSXML.SERVICE_ELEM), true);
|
---|
477 | e_service.setAttribute(GSXML.NAME_ATT, e_resp.getAttribute(GSXML.FROM_ATT));
|
---|
478 | service_list.appendChild(e_service);
|
---|
479 | }
|
---|
480 | page_response.appendChild(service_list);
|
---|
481 | }
|
---|
482 | } // if provide_annotations
|
---|
483 | return true;
|
---|
484 |
|
---|
485 | }
|
---|
486 |
|
---|
487 | /** this involves a bit of a hack to get the equivalent query terms - has to requery the query service - uses the last selected service name. (if it ends in query). should this action do the query or should it send a message to the query action? but that will involve lots of extra stuff. also doesn't handle phrases properly - just highlights all the terms found in the text.
|
---|
488 | */
|
---|
489 | protected Element highlightQueryTerms(Element request, Element dc_response_doc_content) {
|
---|
490 |
|
---|
491 | // do the query again to get term info
|
---|
492 | Element cgi_param_list = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
|
---|
493 | HashMap params = GSXML.extractParams(cgi_param_list, false);
|
---|
494 |
|
---|
495 | String service_name = (String)params.get(GSParams.SERVICE);
|
---|
496 | if (service_name == null || !service_name.endsWith("Query")) { // hack for now - we only do highlighting if we were in a query last - ie not if we were in a browse thingy
|
---|
497 | return dc_response_doc_content;
|
---|
498 | }
|
---|
499 | String collection = (String)params.get(GSParams.COLLECTION);
|
---|
500 | String lang = request.getAttribute(GSXML.LANG_ATT);
|
---|
501 | String uid = request.getAttribute(GSXML.USER_ID_ATT);
|
---|
502 | String to = GSPath.appendLink(collection, service_name);
|
---|
503 |
|
---|
504 | Element mr_query_message = this.doc.createElement(GSXML.MESSAGE_ELEM);
|
---|
505 | Element mr_query_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_PROCESS, to, lang, uid);
|
---|
506 | mr_query_message.appendChild(mr_query_request);
|
---|
507 |
|
---|
508 | // paramList
|
---|
509 | Element query_param_list = (Element)this.doc.importNode(cgi_param_list, true);
|
---|
510 | mr_query_request.appendChild(query_param_list);
|
---|
511 |
|
---|
512 | // do the query
|
---|
513 | Element mr_query_response = (Element)this.mr.process(mr_query_message);
|
---|
514 |
|
---|
515 | String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.TERM_ELEM+GSXML.LIST_MODIFIER);
|
---|
516 | Element query_term_info_list = (Element) GSXML.getNodeByPath(mr_query_response, path);
|
---|
517 | if (query_term_info_list == null) {
|
---|
518 | // no term info
|
---|
519 | System.err.println("DocumentAction: Warning: No query term information.\n");
|
---|
520 | return dc_response_doc_content;
|
---|
521 | }
|
---|
522 |
|
---|
523 | NodeList equivs = query_term_info_list.getElementsByTagName("equivTermList");
|
---|
524 | HashSet all_terms = new HashSet();
|
---|
525 | for (int i=0; i<equivs.getLength(); i++) {
|
---|
526 |
|
---|
527 | // get the terms
|
---|
528 | String [] terms = GSXML.getAttributeValuesFromList((Element)equivs.item(i), GSXML.NAME_ATT);
|
---|
529 | for (int j=0; j<terms.length; j++) {
|
---|
530 |
|
---|
531 | all_terms.add(terms[j]);
|
---|
532 | }
|
---|
533 | }
|
---|
534 |
|
---|
535 | Element new_content_elem = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
|
---|
536 |
|
---|
537 | String content = GSXML.getNodeText(dc_response_doc_content);
|
---|
538 |
|
---|
539 | StringBuffer temp = new StringBuffer();
|
---|
540 | StringBuffer temp_content = new StringBuffer();
|
---|
541 |
|
---|
542 | for (int i=0; i<content.length(); i++) {
|
---|
543 | char c = content.charAt(i);
|
---|
544 | if (Character.isLetterOrDigit(c)) {
|
---|
545 | // not word boundary
|
---|
546 | temp.append(c);
|
---|
547 | } else {
|
---|
548 | // word boundary
|
---|
549 | // add the last word if there was one
|
---|
550 | if (temp.length()>0) {
|
---|
551 | if (all_terms.contains(temp.toString())) {
|
---|
552 | //if there is anything already present in temp_content, add it as a text node
|
---|
553 | Text t = this.doc.createTextNode(temp_content.toString());
|
---|
554 | new_content_elem.appendChild(t);
|
---|
555 | temp_content.delete(0, temp_content.length());
|
---|
556 | Element annot = GSXML.createTextElement(this.doc, "annotation", temp.toString());
|
---|
557 | annot.setAttribute("type", "query_term");
|
---|
558 | new_content_elem.appendChild(annot);
|
---|
559 | //new_content.append("<annotation type='query_term'>"+temp+"</annotation>");
|
---|
560 | } else {
|
---|
561 | temp_content.append(temp);
|
---|
562 | }
|
---|
563 | temp.delete(0, temp.length());
|
---|
564 | }
|
---|
565 | if (c=='<') {
|
---|
566 | temp_content.append(c);
|
---|
567 | i++;
|
---|
568 | // skip over html
|
---|
569 | while (i<content.length() && content.charAt(i)!='>') {
|
---|
570 | temp_content.append(content.charAt(i));
|
---|
571 | i++;
|
---|
572 | }
|
---|
573 | temp_content.append(content.charAt(i));
|
---|
574 | //temp_content.append(GSXML.xmlSafe(temp.toString()));
|
---|
575 | //temp.delete(0, temp.length());
|
---|
576 |
|
---|
577 | } else {
|
---|
578 | temp_content.append(c);
|
---|
579 | }
|
---|
580 | }
|
---|
581 | }
|
---|
582 | // append anything left of temp_content and temp
|
---|
583 | Text t = this.doc.createTextNode(temp_content.toString());
|
---|
584 | new_content_elem.appendChild(t);
|
---|
585 |
|
---|
586 | if (temp.length() > 0) {
|
---|
587 | Element annot = GSXML.createTextElement(this.doc, "annotation", temp.toString());
|
---|
588 | annot.setAttribute("type", "query_term");
|
---|
589 | new_content_elem.appendChild(annot);
|
---|
590 | }
|
---|
591 | //String content_string = "<nodeContent>"+new_content.toString()+"</nodeContent>";
|
---|
592 | //Element content_elem = this.converter.getDOM(content_string).getDocumentElement();
|
---|
593 | return new_content_elem;
|
---|
594 | }
|
---|
595 | }
|
---|