/* * DocumentAction.java * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.action; // Greenstone classes import org.greenstone.gsdl3.core.ModuleInterface; import org.greenstone.gsdl3.util.*; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.Text; import org.w3c.dom.NodeList; // General Java classes import java.util.HashMap; import java.util.HashSet; import java.io.File; /** Action class for retrieving Documents via the message router */ public class DocumentAction extends Action { // this is used to specify that the sibling nodes of a selected one should be obtained public static final String SIBLING_ARG = "sib"; public static final String GOTO_PAGE_ARG = "gp"; public static final String ENRICH_DOC_ARG = "end"; /** if this is set to true, when a document is displayed, any annotation * type services (enrich) will be offered to the user as well */ protected static final boolean provide_annotations = false; //true; public Element process (Element message) { // for now, no subaction eventually we may want to have subactions such as text assoc or something ? // the response Element result = this.doc.createElement(GSXML.MESSAGE_ELEM); Element page_response = this.doc.createElement(GSXML.RESPONSE_ELEM); result.appendChild(page_response); // get the request - assume only one Element request = (Element)GSXML.getChildByTagName(message, GSXML.REQUEST_ELEM); Element cgi_paramList = (Element)GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); HashMap params = GSXML.extractParams(cgi_paramList, false); // just in case there are some that need to get passed to the services HashMap service_params = (HashMap)params.get("s0"); String collection = (String) params.get(GSParams.COLLECTION); String lang = request.getAttribute(GSXML.LANG_ATT); String uid = request.getAttribute(GSXML.USER_ID_ATT); String document_name = (String) params.get(GSParams.DOCUMENT); if (document_name == null || document_name.equals("")) { System.err.println("DocumentAction Error: no document specified!"); return result; } String document_type = (String) params.get(GSParams.DOCUMENT_TYPE); if (document_type == null) { document_type = "simple"; } //whether to retrieve siblings or not boolean get_siblings = false; String sibs = (String) params.get(SIBLING_ARG); if (sibs != null && sibs.equals("1")) { get_siblings = true; } String sibling_num = (String) params.get(GOTO_PAGE_ARG); if (sibling_num != null && !sibling_num.equals("")) { // we have to modify the doc name document_name = document_name+"."+sibling_num+".ss"; } boolean expand_document = false; String ed_arg = (String) params.get(GSParams.EXPAND_DOCUMENT); if (ed_arg != null && ed_arg.equals("1")) { expand_document = true; } boolean expand_contents = false; if (expand_document) { // we always expand the contents with the text expand_contents = true; } else { String ec_arg = (String) params.get(GSParams.EXPAND_CONTENTS); if (ec_arg != null && ec_arg.equals("1")) { expand_contents = true; } } // get the additional data needed for the page getBackgroundData(page_response, collection, lang, uid); Element format_elem = (Element)GSXML.getChildByTagName(page_response, GSXML.FORMAT_ELEM); // the_document is where all the doc info - structure and metadata etc // is added into, to be returned in the page Element the_document = this.doc.createElement(GSXML.DOCUMENT_ELEM); page_response.appendChild(the_document); // set the doctype from the cgi arg as an attribute the_document.setAttribute(GSXML.DOC_TYPE_ATT, document_type); // create a basic doc list containing the current node Element basic_doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); Element current_doc = this.doc.createElement(GSXML.DOC_NODE_ELEM); basic_doc_list.appendChild(current_doc); current_doc.setAttribute(GSXML.NODE_ID_ATT, document_name); // Create a parameter list to specify the required structure information Element ds_param_list = this.doc.createElement(GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER); if (service_params != null) { GSXML.addParametersToList(this.doc, ds_param_list, service_params); } Element ds_param = null; boolean get_structure = false; boolean get_structure_info = false; if (document_type.equals("paged")) { get_structure_info = true; // get teh info needed for paged naviagtion ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "info"); ds_param.setAttribute(GSXML.VALUE_ATT, "numSiblings"); ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "info"); ds_param.setAttribute(GSXML.VALUE_ATT, "numChildren"); ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "info"); ds_param.setAttribute(GSXML.VALUE_ATT, "siblingPosition"); } else if (document_type.equals("hierarchy")){ get_structure = true; if (expand_contents) { ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "structure"); ds_param.setAttribute(GSXML.VALUE_ATT, "entire"); } else { // get the info needed for table of contents ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "structure"); ds_param.setAttribute(GSXML.VALUE_ATT, "ancestors"); ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "structure"); ds_param.setAttribute(GSXML.VALUE_ATT, "children"); if (get_siblings) { ds_param = this.doc.createElement(GSXML.PARAM_ELEM); ds_param_list.appendChild(ds_param); ds_param.setAttribute(GSXML.NAME_ATT, "structure"); ds_param.setAttribute(GSXML.VALUE_ATT, "siblings"); } } } else { // we dont need any structure } boolean has_dummy = false; if (get_structure || get_structure_info) { // Build a request to obtain the document structure Element ds_message = this.doc.createElement(GSXML.MESSAGE_ELEM); String to = GSPath.appendLink(collection, "DocumentStructureRetrieve");// Hard-wired? Element ds_request = GSXML.createBasicRequest(this.doc,GSXML.REQUEST_TYPE_PROCESS, to, lang, uid); ds_message.appendChild(ds_request); ds_request.appendChild(ds_param_list); // create a doc_node_list and put in the doc_node that we are interested in ds_request.appendChild(basic_doc_list); // Process the document structure retrieve message Element ds_response_message = (Element) this.mr.process(ds_message); // get the info and print out String path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); path = GSPath.appendLink(path, "nodeStructureInfo"); Element ds_response_struct_info = (Element) GSXML.getNodeByPath(ds_response_message, path); // get the doc_node bit if (ds_response_struct_info != null) { the_document.appendChild(this.doc.importNode(ds_response_struct_info, true)); } path = GSPath.appendLink(GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER); path = GSPath.appendLink(path, GSXML.DOC_NODE_ELEM); path = GSPath.appendLink(path, GSXML.NODE_STRUCTURE_ELEM); Element ds_response_structure = (Element) GSXML.getNodeByPath(ds_response_message, path); if (ds_response_structure != null) { // add the contents of the structure bit into the_document NodeList structs = ds_response_structure.getChildNodes(); for (int i=0; i=0; i--) { the_document.appendChild(dummy_children.item(i)); } } } else { // Merge the document content with the metadata and structure information for (int i = 0; i < doc_nodes.getLength(); i++) { Node dn = doc_nodes.item(i); String dn_id = ((Element)dn).getAttribute(GSXML.NODE_ID_ATT); if (dn_id.equals(modified_doc_id)) { dn.appendChild(this.doc.importNode(dc_response_doc_content, true)); break; } } } } ///ystem.out.println("(DocumentAction) Page:\n" + this.converter.getPrettyString(result)); return result; } /** tell the param class what its arguments are * if an action has its own arguments, this should add them to the params * object - particularly important for args that should not be saved */ public boolean getActionParameters(GSParams params) { params.addParameter(GOTO_PAGE_ARG, false); params.addParameter(ENRICH_DOC_ARG, false); return true; } /** this method gets the collection description, the format info, the * list of enrich services, etc - stuff that is needed for the page, * but is the same whatever the query is - should be cached */ protected boolean getBackgroundData(Element page_response, String collection, String lang, String uid) { // create a message to process - contains requests for the collection // description, the format element, the enrich services on offer // these could all be cached Element info_message = this.doc.createElement(GSXML.MESSAGE_ELEM); String path = GSPath.appendLink(collection, "DocumentContentRetrieve"); // the format request - ignore for now, where does this request go to?? Element format_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_FORMAT, path, lang, uid); info_message.appendChild(format_request); // the enrich_services request - only do this if provide_annotations is true if (provide_annotations) { Element enrich_services_request = GSXML.createBasicRequest(this.doc, GSXML.REQUEST_TYPE_DESCRIBE, "", lang, uid); enrich_services_request.setAttribute(GSXML.INFO_ATT, "serviceList"); info_message.appendChild(enrich_services_request); } Element info_response = (Element)this.mr.process(info_message); // the collection is the first response NodeList responses = info_response.getElementsByTagName(GSXML.RESPONSE_ELEM); Element format_resp = (Element) responses.item(0); Element format_elem = (Element)GSXML.getChildByTagName(format_resp, GSXML.FORMAT_ELEM); if (format_elem != null) { ///ystem.out.println("doc action found a format statement"); // set teh format type format_elem.setAttribute(GSXML.TYPE_ATT, "display"); page_response.appendChild(this.doc.importNode(format_elem, true)); } if (provide_annotations) { Element services_resp = (Element)responses.item(1); // a new message for the mr Element enrich_message = this.doc.createElement(GSXML.MESSAGE_ELEM); NodeList e_services = services_resp.getElementsByTagName(GSXML.SERVICE_ELEM); boolean service_found = false; for (int j=0; j0) { if (all_terms.contains(temp.toString())) { //if there is anything already present in temp_content, add it as a text node Text t = this.doc.createTextNode(temp_content.toString()); new_content_elem.appendChild(t); temp_content.delete(0, temp_content.length()); Element annot = GSXML.createTextElement(this.doc, "annotation", temp.toString()); annot.setAttribute("type", "query_term"); new_content_elem.appendChild(annot); //new_content.append(""+temp+""); } else { temp_content.append(temp); } temp.delete(0, temp.length()); } if (c=='<') { temp_content.append(c); i++; // skip over html while (i 0) { Element annot = GSXML.createTextElement(this.doc, "annotation", temp.toString()); annot.setAttribute("type", "query_term"); new_content_elem.appendChild(annot); } //String content_string = ""+new_content.toString()+""; //Element content_elem = this.converter.getDOM(content_string).getDocumentElement(); return new_content_elem; } }