/* * ServiceRack.java * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // greenstone classes import java.io.StringReader; import java.util.HashMap; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.log4j.Logger; import org.greenstone.gs3client.dlservices.DigitalLibraryServicesAPIA; import org.greenstone.gs3client.dlservices.FedoraServicesAPIA; import org.greenstone.gsdl3.core.MessageRouter; import org.greenstone.gsdl3.util.Dictionary; import org.greenstone.gsdl3.util.GSPath; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.MacroResolver; import org.greenstone.gsdl3.util.OID; import org.greenstone.gsdl3.util.XMLConverter; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.apache.commons.lang3.StringUtils; /* // greenstone classes import org.greenstone.gsdl3.util.*; import org.greenstone.gsdl3.core.*; // for fedora import org.greenstone.gs3client.dlservices.*; import org.greenstone.fedora.services.FedoraGS3Exception.CancelledException; // xml classes import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Element; import org.w3c.dom.Document; import org.xml.sax.InputSource; import javax.xml.parsers.*; import org.apache.xpath.XPathAPI; // general java classes import java.io.Reader; import java.io.StringReader; import java.io.File; import java.util.HashMap; import java.util.Locale; import java.util.Properties; import java.util.ResourceBundle; import java.util.regex.*; import java.lang.reflect.Method; */ import org.apache.log4j.*; /** * FedoraServiceProxy - communicates with the FedoraGS3 interface. * * @author Anupama Krishnan */ public class FedoraServiceProxy extends ServiceRack implements OID.OIDTranslatable { public static class BasicTextMacroResolver extends MacroResolver { private static final Pattern p_back_slash = Pattern.compile("\\\"");// create a pattern "\\\"", but it matches both " and \" public String resolve(String text, String lang, String scope, String doc_oid) { if (text == null || text.equals("")) { return text; } if (!scope.equals(SCOPE_TEXT) || text_macros.size()==0) { return text; } java.util.ArrayList macros = text_macros; for (int i=0; i" Matcher m_slash = p_back_slash.matcher(text); String clean_str = ""; int s=0; while (m_slash.find()) { if (!text.substring(m_slash.end()-2, m_slash.end()-1).equals("\\")) { clean_str = clean_str + text.substring(s,m_slash.end()-1); // it matches ", so get a substring before " }else{ clean_str = clean_str + text.substring(s,m_slash.end()-2);// it matches \", so get a substring before \ } s = m_slash.end();// get the index of the last match clean_str = clean_str + "\""; } text = clean_str + text.substring(s,text.length()); } } } } return text; } } static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.FedoraServiceProxy.class.getName()); protected MacroResolver macro_resolver = null; /** The handle to the fedora connection */ private DigitalLibraryServicesAPIA fedoraServicesAPIA; private String prevLanguage = ""; public void cleanUp() { super.cleanUp(); } /** sets the message router */ public void setMessageRouter(MessageRouter m) { this.router = m; setLibraryName(m.getLibraryName()); } /** the no-args constructor */ public FedoraServiceProxy() { super(); this.converter = new XMLConverter(); this.doc = this.converter.newDOM(); this.short_service_info = this.doc.createElement(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER); this.macro_resolver = new BasicTextMacroResolver(); } /* configure the service module * * @param info the XML node with name equal * to the class name (of the subclass) * * must configure short_service_info_ and service_info_map_ * @return true if configured ok * must be implemented in subclasses */ /*public boolean configure(Element info) { return configure(info, null); }*/ public boolean configure(Element info, Element extra_info) { // set up the class loader if (!super.configure(info, extra_info)){ return false; } // Try to instantiate a Fedora dl handle try { // Fedora connection settings defaults. // Read host and port from global.properties, since by default, we expect the Greenstone server to be used Properties globalProperties = new Properties(); globalProperties.load(Class.forName("org.greenstone.util.GlobalProperties").getClassLoader().getResourceAsStream("global.properties")); String host = globalProperties.getProperty("tomcat.server", "localhost"); String port = globalProperties.getProperty("tomcat.port", "8383"); String protocol = "http"; String username = "fedoraIntCallUser"; //"fedoraAdmin" String password = "changeme"; //"" // See if buildConfig.xml overrides any of the defaults // info is the Element from buildConfig.xml (extra_info are the Elements of collectionConfig.xml) NodeList nodes = info.getElementsByTagName("fedoraConnection"); if(nodes != null && nodes.getLength() > 0) { Element fedoraElement = (Element)nodes.item(0); if(fedoraElement.hasAttribute("protocol")) { protocol = fedoraElement.getAttribute("protocol"); } if(fedoraElement.hasAttribute("host")) { host = fedoraElement.getAttribute("host"); } if(fedoraElement.hasAttribute("port")) { port = fedoraElement.getAttribute("port"); } if(fedoraElement.hasAttribute("username")) { username = fedoraElement.getAttribute("username"); } if(fedoraElement.hasAttribute("password")) { password = fedoraElement.getAttribute("password"); } } fedoraServicesAPIA = new FedoraServicesAPIA(protocol, host, Integer.parseInt(port), username, password); } catch(org.greenstone.fedora.services.FedoraGS3Exception.CancelledException e) { // The user pressed cancel in the fedora services instantiation dialog return false; } catch(Exception e) { logger.error("Error instantiating the interface to the Fedora Repository:\n", e); // second parameter prints e's stacktrace return false; } // Need to put the available services into short_service_info // This is used by DefaultReceptionist.process() has an exception. But DefaultReceptionist.addExtraInfo() // isn't helpful, and the problem actually already occurs in // Receptionist.process() -> PageAction.process() -> MessageRouter.process() // -> Collection/ServiceCluster.process() -> ServiceCluster.configureServiceRackList() // -> ServiceRack.process() -> ServiceRack.processDescribe() -> ServiceRack.getServiceList(). // ServiceRack.getServiceList() requires this ServiceRack's services to be filled into the // short_service_info Element which needs to be done in this FedoraServiceProxy.configure(). // get the display and format elements from the coll config file for // the classifiers AbstractBrowse.extractExtraClassifierInfo(info, extra_info); // Copied from IViaProxy.java: String collection = fedoraServicesAPIA.describeCollection(this.cluster_name); Element collNode = getResponseAsDOM(collection); Element serviceList = (Element)collNode.getElementsByTagName(GSXML.SERVICE_ELEM+GSXML.LIST_MODIFIER).item(0); //this.short_service_info.appendChild(short_service_info.getOwnerDocument().importNode(serviceList, true)); // we want the individual service Elements, not the serviceList Element which will wrap it later NodeList services = collNode.getElementsByTagName(GSXML.SERVICE_ELEM); for(int i = 0; i < services.getLength(); i++) { Node service = services.item(i); this.short_service_info.appendChild(short_service_info.getOwnerDocument().importNode(service, true)); } // add some format info to service map if there is any String path = GSPath.appendLink(GSXML.SEARCH_ELEM, GSXML.FORMAT_ELEM); Element search_format = (Element) GSXML.getNodeByPath(extra_info, path); if (search_format != null) { this.format_info_map.put("TextQuery", this.doc.importNode(search_format, true)); } // look for document display format path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM); Element display_format = (Element)GSXML.getNodeByPath(extra_info, path); if (display_format != null) { this.format_info_map.put("DocumentContentRetrieve", this.doc.importNode(display_format, true)); // should we make a copy? } // the format info Element cb_format_info = this.doc.createElement(GSXML.FORMAT_ELEM); boolean format_found = false; // look for classifier path = GSPath.appendLink(GSXML.BROWSE_ELEM, GSXML.FORMAT_ELEM); Element browse_format = (Element)GSXML.getNodeByPath(extra_info, path); if (browse_format != null) { cb_format_info.appendChild(GSXML.duplicateWithNewName(this.doc, browse_format, GSXML.DEFAULT_ELEM, true)); format_found = true; } // add in to the description a simplified list of classifiers Element browse = (Element)GSXML.getChildByTagName(extra_info, "browse"); // the NodeList classifiers = browse.getElementsByTagName(GSXML.CLASSIFIER_ELEM); for(int i=0; i 0) { metafields = new String[params.getLength()]; for(int i = 0; i < metafields.length; i++) { Element param = (Element)params.item(i); //if(param.hasAttribute(GSXML.NAME_ATT) && param.getAttribute(GSXML.NAME_ATT).equals("metadata") && param.hasAttribute(GSXML.VALUE_ATT)) { if(param.hasAttribute(GSXML.VALUE_ATT)){ metafields[i] = param.getAttribute(GSXML.VALUE_ATT); } else { metafields[i] = ""; } } } String lang = request.getAttribute(GSXML.LANG_ATT); if(!lang.equals(prevLanguage)) { prevLanguage = lang; fedoraServicesAPIA.setLanguage(lang); } Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentMetadata( this.cluster_name, docIDs, metafields)); return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0); } protected Element processClassifierBrowseMetadataRetrieve(Element request) { String[] classIDs = parse(request, GSXML.CLASS_NODE_ELEM, GSXML.NODE_ID_ATT); //String[] relLinks = parse(request, GSXML.CLASS_NODE_ELEM, "externalURL"); if(classIDs == null) { logger.error("ClassifierBrowseMetadataRetrieve request specified no classifier nodes.\n"); return this.doc.createElement(GSXML.RESPONSE_ELEM); // empty response } else { for(int i = 0; i < classIDs.length; i++) { classIDs[i] = translateId(classIDs[i]); } } NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM); String[] metafields = {}; if(params.getLength() > 0) { metafields = new String[params.getLength()]; for(int i = 0; i < metafields.length; i++) { Element param = (Element)params.item(i); if(param.hasAttribute(GSXML.VALUE_ATT)){ metafields[i] = param.getAttribute(GSXML.VALUE_ATT); } else { metafields[i] = ""; } } } String lang = request.getAttribute(GSXML.LANG_ATT); if(!lang.equals(prevLanguage)) { prevLanguage = lang; fedoraServicesAPIA.setLanguage(lang); } Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseMetadata( this.cluster_name, "ClassifierBrowseMetadataRetrieve", classIDs, metafields)); //logger.error("**** Response from retrieveBrowseMeta: " + GSXML.elementToString(response, true)); return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0); } protected Element processClassifierBrowse(Element request) { String collection = this.cluster_name; String lang = request.getAttribute(GSXML.LANG_ATT); if(!lang.equals(prevLanguage)) { prevLanguage = lang; fedoraServicesAPIA.setLanguage(lang); } NodeList classNodes = request.getElementsByTagName(GSXML.CLASS_NODE_ELEM); if(classNodes == null || classNodes.getLength() <= 0) { logger.error("ClassifierBrowse request specified no classifier IDs.\n"); return this.doc.createElement(GSXML.RESPONSE_ELEM); // empty response } String classifierIDs[] = new String[classNodes.getLength()]; for(int i = 0; i < classifierIDs.length; i++) { Element e = (Element)classNodes.item(i); classifierIDs[i] = e.getAttribute(GSXML.NODE_ID_ATT); classifierIDs[i] = translateId(classifierIDs[i]); } NodeList params = request.getElementsByTagName(GSXML.PARAM_ELEM); String structure=""; String info=""; for(int i = 0; i < params.getLength(); i++) { Element param = (Element)params.item(i); if(param.getAttribute("name").equals("structure")) { structure = structure + param.getAttribute("value") + "|"; } else if(param.getAttribute("name").equals("info")) { info = info + param.getAttribute("value") + "|"; } } ///structure = structure + "siblings"; //test for getting with classifier browse structure: siblings Element response = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseStructure(collection, "ClassifierBrowse", classifierIDs, new String[] {structure}, new String[] {info})); //logger.error("**** FedoraServiceProxy - Response from retrieveBrowseStructure: " + GSXML.elementToString(response, true)); return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0); } protected Element processTextQuery(Element request) { return processQuery(request, "TextQuery"); } protected Element processFieldQuery(Element request) { return processQuery(request, "FieldQuery"); } protected Element processQuery(Element request, String querytype) { String collection = this.cluster_name; String lang = request.getAttribute(GSXML.LANG_ATT); if(!lang.equals(prevLanguage)) { prevLanguage = lang; fedoraServicesAPIA.setLanguage(lang); } NodeList paramNodes = request.getElementsByTagName(GSXML.PARAM_ELEM); if(paramNodes.getLength() > 0) { HashMap params = new HashMap(paramNodes.getLength()); for(int i = 0; i < paramNodes.getLength(); i++) { Element param = (Element)paramNodes.item(i); params.put(param.getAttribute(GSXML.NAME_ATT), param.getAttribute(GSXML.VALUE_ATT)); } Element response = getResponseAsDOM(fedoraServicesAPIA.query(collection, querytype, params)); return (Element)response.getElementsByTagName(GSXML.RESPONSE_ELEM).item(0); } else { logger.error("TextQuery request specified no parameters.\n"); return this.doc.createElement(GSXML.RESPONSE_ELEM); // empty response } } // get the requested nodeIDs out of a request message protected String[] parse(Element request, String nodeType, String attribute) { String[] nodevalues = null; int count = 0; Element docList = (Element) GSXML.getChildByTagName(request, nodeType+GSXML.LIST_MODIFIER); if (docList != null) { NodeList docNodes = docList.getElementsByTagName(nodeType); if(docNodes.getLength() > 0) { nodevalues = new String[docNodes.getLength()]; for(int i = 0; i < nodevalues.length; i++) { Element e = (Element)docNodes.item(i); String id = e.getAttribute(attribute); // Not sure why there are at times requests for hashXXX.dir, which is not a fedora PID // To skip these: if not requesting an externalURL and if requesting a docNode, // then the ID has to contain the : character special to fedora PIDs if(attribute == "externalURL" || (nodeType != GSXML.DOC_NODE_ELEM || id.contains(":"))) { nodevalues[count++] = id; } } } } if(count == 0) { return null; } String[] tmp = new String[count]; for(int i = 0; i < count; i++) { tmp[i] = nodevalues[i]; } nodevalues = null; nodevalues = tmp; return nodevalues; } /** if id ends in .fc, .pc etc, then translate it to the correct id * For now (for testing things work) the default implementation is to just remove the suffix */ protected String translateId(String id) { if (OID.needsTranslating(id)) { return OID.translateOID(this, id); //return translateOID(id); } return id; } /** if an id is not a greenstone id (an external id) then translate * it to a greenstone one * default implementation: return the id. Custom implementation: * the id is a url that maps to a fedorapid whose dc.title contains the required HASHID */ protected String translateExternalId(String id) { //logger.error("*** to translate an external ID: " + id); /////return id; return this.externalId2OID(id); } /** converts an external id to greenstone OID. External ID is a URL link * that, if relative, maps to a fedorapid that has an entry in fedora. * The dc:title meta for that fedorapid will contain the required OID. */ public String externalId2OID(String extid) { if(extid.endsWith(".rt") && (extid.indexOf('.') != extid.lastIndexOf('.'))) { // .rt is not file extension, but Greenstone request for root of document // not relevant for external ID extid = extid.substring(0, extid.length()-3); } // the following method is unique to FedoraServicesAPIA String response = ((FedoraServicesAPIA)fedoraServicesAPIA).getDocIDforURL(extid, this.cluster_name); if(response.indexOf(GSXML.ERROR_ELEM) != -1) { logger.error("**** The following error occurred when trying to find externalID for ID " + extid); logger.error(response); return extid; } if(response.equals("")) { return extid; } else { return response; } } /** translates relative oids into proper oids: * .pr (parent), .rt (root) .fc (first child), .lc (last child), * .ns (next sibling), .ps (previous sibling) * .np (next page), .pp (previous page) : links sections in the order that you'd read the document * a suffix is expected to be present so test before using */ public String processOID(String doc_id, String top, String suff, int sibling_num) { // send off request to get sibling etc. information from Fedora Element response = null; String[] children = null; if(doc_id.startsWith("CL")) { // classifiernode response = getResponseAsDOM(fedoraServicesAPIA.retrieveBrowseStructure(this.cluster_name, "ClassifierBrowse", new String[]{doc_id}, new String[]{"children"}, new String[]{"siblingPosition"})); NodeList nl = response.getElementsByTagName(GSXML.NODE_STRUCTURE_ELEM); if(nl.getLength() > 0) { Element nodeStructure = (Element)nl.item(0); if(nodeStructure != null) { Element root = (Element) GSXML.getChildByTagName(nodeStructure, GSXML.CLASS_NODE_ELEM); if(root != null) { // get children NodeList classNodes = root.getElementsByTagName(GSXML.CLASS_NODE_ELEM); if(classNodes != null) { children = new String[classNodes.getLength()]; for(int i = 0; i < children.length; i++) { Element child = (Element)classNodes.item(i); children[i] = child.getAttribute(GSXML.NODE_ID_ATT); } } } } } } else { // documentnode response = getResponseAsDOM(fedoraServicesAPIA.retrieveDocumentStructure(this.cluster_name, new String[]{doc_id}, new String[]{"children"}, new String[]{"siblingPosition"})); String path = GSPath.createPath(new String[]{GSXML.RESPONSE_ELEM, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER, GSXML.DOC_NODE_ELEM, GSXML.NODE_STRUCTURE_ELEM, GSXML.DOC_NODE_ELEM}); Element parentDocNode = (Element) GSXML.getNodeByPath(response, path); if (parentDocNode == null) { return top; } // else NodeList docNodes = parentDocNode.getElementsByTagName(GSXML.DOC_NODE_ELEM); // only children should remain, since that's what we requested if(docNodes.getLength() > 0) { children = new String[docNodes.getLength()]; for(int i = 0; i < children.length; i++) { Element e = (Element)docNodes.item(i); children[i] = e.getAttribute(GSXML.NODE_ID_ATT); } } else { // return root node children = new String[]{doc_id}; } } if (suff.equals("fc")) { return children[0]; } else if (suff.equals("lc")) { return children[children.length-1]; } else { if (suff.equals("ss")) { return children[sibling_num-1]; } // find the position that we are at. int i=0; while(i