/* * OAIPMH.java * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.greenstone.gsdl3.service; // Greenstone classes import org.greenstone.gsdl3.core.GSException; import org.greenstone.gsdl3.util.GSXML; import org.greenstone.gsdl3.util.OAIXML; import org.greenstone.gsdl3.util.OID; import org.greenstone.gsdl3.util.GSFile; import org.greenstone.gsdl3.util.XMLConverter; import org.greenstone.gsdl3.util.SimpleCollectionDatabase; import org.greenstone.gsdl3.util.DBInfo; // XML classes import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; // General Java classes import java.io.File; import java.util.StringTokenizer; import java.util.Vector; import java.util.Set; import java.util.Iterator; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Map.Entry; import org.apache.log4j.Logger; /** Implements the oai metadata retrieval service for GS3 collections. * Dig into each collection's database and retrieve the metadata * */ public class OAIPMH extends ServiceRack { static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName()); protected SimpleCollectionDatabase coll_db = null; protected SimpleCollectionDatabase oaiinf_db = null; protected String site_name = ""; protected String coll_name = ""; // set this up during configure protected Element list_sets_response = null; protected Element meta_formats_definition = null; protected HashMap> format_elements_map = null; protected HashMap format_response_map = null; /** constructor */ public OAIPMH() { } public void cleanUp() { super.cleanUp();//?? this.coll_db.closeDatabase(); this.oaiinf_db.closeDatabase(); } /** configure this service info is the OAIPMH service rack from collectionConfig.xml, and extra_info is buildConfig.xml */ public boolean configure(Element info, Element extra_info) { if (!super.configure(info, extra_info)){ logger.info("Configuring ServiceRack.java returns false."); return false; } //get the names from ServiceRack.java this.site_name = this.router.getSiteName(); this.coll_name = this.cluster_name; logger.info("Configuring OAIPMH..."); this.config_info = info; // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER); String index_stem = ""; String infodb_type = ""; if (metadata_list != null) { Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem"); if (index_stem_elem != null) { index_stem = GSXML.getNodeText(index_stem_elem); } Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType"); if (infodb_type_elem != null) { infodb_type = GSXML.getNodeText(infodb_type_elem); } } if (index_stem == null || index_stem.equals("")) { index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is . } if (infodb_type == null || infodb_type.equals("")) { infodb_type = "gdbm"; // the default } coll_db = new SimpleCollectionDatabase(infodb_type); if (!coll_db.databaseOK()) { logger.error("Couldn't create the collection database of type "+infodb_type); return false; } oaiinf_db = new SimpleCollectionDatabase(infodb_type); if (!oaiinf_db.databaseOK()) { logger.error("Couldn't create the oai-inf database of type "+infodb_type); oaiinf_db = null; return false; } // Open databases for querying String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type); if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) { logger.error("Could not open collection database!"); return false; } // the oaiinf_db is called oai-inf. String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type); if (oaiinf_db != null && !this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) { logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!"); } // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present. configureSetInfo(); // the short_service_info is used by the message router to find the method names, Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM); list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS); list_records.setAttribute(GSXML.TYPE_ATT, "oai"); this.short_service_info.appendChild(list_records); Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM); list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS); list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai"); this.short_service_info.appendChild(list_identifiers); Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM); list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS); list_sets.setAttribute(GSXML.TYPE_ATT, "oai"); this.short_service_info.appendChild(list_sets); Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM); list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS); list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai"); this.short_service_info.appendChild(list_metadata_formats); Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM); get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD); get_record.setAttribute(GSXML.TYPE_ATT, "oai"); this.short_service_info.appendChild(get_record); return true; } public boolean configureOAI(Element oai_config_elem) { this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS); this.format_response_map = new HashMap(); this.format_elements_map = new HashMap>(); // for now, all we want is the metadata prefix description and the mapping list Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS); if (main_lmf_elem == null) { logger.error("No listMetadataFormats element found in OAIConfig.xml"); return false; } NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT); if (meta_formats_list.getLength() == 0) { logger.error("no metadataFormat elements found in OAIPMH serviceRack element"); return false; } boolean found_meta_format = false; for(int i=0; i getAllCollectionElements(Element meta_format) { HashSet meta_name_set = new HashSet(); NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT); for (int i=0; i param_map = GSXML.getParamMap(params); String prefix = param_map.get(OAIXML.METADATA_PREFIX); if (prefix == null || prefix.equals("")) { //Just a double-check logger.error("the value of metadataPrefix att is not present in the request."); return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); } // check that we support this format if (!format_response_map.containsKey(prefix)) { logger.error("metadata prefix is not supported for collection "+this.coll_name); return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); } Document doc = XMLConverter.newDOM(); String oid = param_map.get(OAIXML.OID); // TODO should this be identifier??? boolean OID_is_deleted = false; long millis = -1; DBInfo oai_info = null; if(oaiinf_db != null) { oai_info = this.oaiinf_db.getInfo(oid); if (oai_info == null) { logger.warn("OID: " + oid + " is not present in the collection's oai-inf database."); } else { String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS); if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) { OID_is_deleted = true; // get the right timestamp for deletion: from oaiinf db String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // in seconds presumably, like oailastmodified in the collection index db millis = Long.parseLong(timestamp)*1000; // in milliseconds } } } //get a DBInfo object of the identifier; if this identifier is not present in the database, // null is returned. DBInfo info = this.coll_db.getInfo(oid); if (info == null) { logger.error("OID: " + oid + " is not present in the collection database."); //return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); // may exist as deleted in oai-inf db } else if (millis == -1) { // so !OID_is_deleted, get oailastmodified from collection's index db ArrayList keys = new ArrayList(info.getKeys()); millis = getDateStampMillis(info); } String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis); Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM); Element get_record = doc.createElement(OAIXML.GET_RECORD); get_record_response.appendChild(get_record); Element record = doc.createElement(OAIXML.RECORD); //compose the header element record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted)); if(!OID_is_deleted) { //compose the metadata element record.appendChild(createMetadataElement(doc, prefix, info)); } get_record.appendChild(record); return get_record_response; } /** return a list of records in specified set, containing metadata from specified prefix*/ protected Element processListRecords(Element req) { return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true); } /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */ protected Element processListIdentifiers(Element req) { return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false); } // Get a list of records/identifiers that match the parameters. protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) { /** arguments: metadataPrefix: required * from: optional * until: optional * set: optional * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist) * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist */ NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM); if(params.getLength() == 0) { logger.error("must at least have the metadataPrefix parameter, can't be none"); return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, ""); } HashMap param_map = GSXML.getParamMap(params); String prefix = ""; Date from_date = null; Date until_date = null; if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) { //Just a double-check logger.error("A param element containing the metadataPrefix is not present."); return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); } prefix = param_map.get(OAIXML.METADATA_PREFIX); if (prefix == null || prefix.equals("")) { //Just a double-check logger.error("the value of metadataPrefix att is not present in the request."); return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); } if(param_map.containsKey(OAIXML.FROM)) { String from = param_map.get(OAIXML.FROM); from_date = OAIXML.getDate(from); } if(param_map.containsKey(OAIXML.UNTIL)) { String until = param_map.get(OAIXML.UNTIL); until_date = OAIXML.getDate(until); } if (!format_response_map.containsKey(prefix)) { logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name); return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); } // get list of oids ArrayList oid_list = null; if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db oid_list = new ArrayList(oaiinf_db.getAllKeys()); if(oid_list == null) { // try getting the OIDs from the oai entries in the index db logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name); oid_list = getChildrenIds(OAIXML.BROWSELIST); } } if (oid_list == null) { logger.error("No matched records found in collection: oai-inf and index db's browselist are empty"); return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, ""); } // all validation is done // get the list of elements that are in this metadata prefix HashSet set_of_elems = format_elements_map.get(prefix); Document doc = XMLConverter.newDOM(); Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM); Element list_items = doc.createElement(response_name); list_items_response.appendChild(list_items); for(int i=0; i element Element metadata = doc.createElement(OAIXML.METADATA); // the element Element prfx_str_elem = OAIXML.getMetadataPrefixElement(doc, prefix, OAIXML.oai_version); metadata.appendChild(prfx_str_elem); Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix); NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT); // for each element in the definition for (int i=0; i values = info.getMultiInfo(meta_name); if (values != null && values.size()!=0) { for (int i=0; i values = info.getMultiInfo(names[i]); if (values == null || values.size()==0) { continue; } for (int j=0; j oid_list = null; if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db oid_list = new ArrayList(oaiinf_db.getAllKeys()); if(oid_list == null) { // try getting the OIDs from the oai entries in the index db oid_list = getChildrenIds(OAIXML.BROWSELIST); } } */ // assume meta formats are only for OIDs that have not been deleted // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs ArrayList oid_list = getChildrenIds(OAIXML.BROWSELIST); if (oid_list == null || oid_list.contains(oid) == false) { logger.error("OID: " + oid + " is not present in the database."); Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); // logger.error((new XMLConverter()).getPrettyString (e)); return e; } DBInfo info = null; info = this.coll_db.getInfo(oid); if (info == null) { //just double check return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, ""); } Document doc = XMLConverter.newDOM(); Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM); Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS); list_metadata_formats_response.appendChild(list_metadata_formats); boolean has_meta_format = false; // for each format in format_elements_map Iterator it = format_elements_map.keySet().iterator(); while (it.hasNext()) { String format = it.next(); HashSet set_of_elems = format_elements_map.get(format); if (documentContainsMetadata(info, set_of_elems)) { // add this format into the response has_meta_format = true; list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true)); } } if (has_meta_format == false) { logger.error("Specified metadata names are not contained in the database."); return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, ""); } else { return list_metadata_formats_response; } } protected boolean documentContainsMetadata(DBInfo info, HashSet set_of_elems) { if (set_of_elems.size() == 0) { return false; } Iterator i = set_of_elems.iterator(); while (i.hasNext()) { if (!info.getInfo(i.next()).equals("")) { return true; } } return false; } /** returns a list of the child ids in order, null if no children */ protected ArrayList getChildrenIds(String node_id) { DBInfo info = this.coll_db.getInfo(node_id); if (info == null) { return null; } String contains = info.getInfo("contains"); if (contains.equals("")) { return null; } ArrayList children = new ArrayList(); StringTokenizer st = new StringTokenizer(contains, ";"); while (st.hasMoreTokens()) { String child_id = st.nextToken().replaceAll("\"", node_id); children.add(child_id); } return children; } /**method to check whether any of the 'metadata_names' is contained in the 'info'. * The name may be in the form: ,, in which the mapped name is * optional. The mapped name is looked up in the DBInfo; if not present, use the first * name which is mandatory. */ protected boolean containsMetadata(DBInfo info, String[] metadata_names) { if (metadata_names == null) return false; logger.info("checking metadata names in db."); for(int i=0; i