/********************************************************************** * * oaiaction.cpp -- * * Copyright (C) 2004-2010 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "oaiaction.h" #include "oaitools.h" #include "recptprototools.h" #if defined(GSDL_USE_IOS_H) # if defined(__WIN32__) # include // vc4 # else # include # endif #else # include #endif #include oaiaction::oaiaction(const text_t &name) { this->logout = new ofstream("oai.log", ios::app); this->configuration = NULL; this->name = name; this->mEarliestDatestamp = ""; } //---------------------------------------------------------------------------------------------- // Over-ridden by child classes bool oaiaction::validateAction(recptproto *protocol, oaiargs ¶ms) { this->errorType = ""; return true; } //---------------------------------------------------------------------------------------------- /********** * Compare the supplied metadataPrefix to all those that * are supported. If there is NO match, return true. If * it DOES match one, return false. */ bool oaiaction::formatNotSupported(text_t &metaFormat) { // is it in our list? if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true; return false; } //---------------------------------------------------------------------------------------------- /********** * Function for outputting the appropriate error(s) with the (version 2.0) request. * The error(s) MUST be included in the response, and take the form: * Description of error */ void oaiaction::output_error(ostream &output, text_t &errorType) { text_t description = ""; if(errorType == "badArgument"){ description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax"; } else if(errorType == "noRecordsMatch"){ description = "No record matches all the requested parameters"; } else if(errorType == "cannotDisseminateFormat"){ description = "The metadata format specified is not supported by the item or by the repository"; } else if(errorType == "idDoesNotExist"){ description = "The value of the identifier is unknown or illegal in this repository"; } else if(errorType == "badVerb"){ description = "Value of the verb argument is illegal, missing, or repeated"; } else if(errorType == "noMetadataFormats"){ description = "There are no metadata formats available for the item"; } else if(errorType == "badResumptionToken"){ description = "The value of the resumptionToken argument is invalid or expired"; } else if(errorType == "noSetHierarchy"){ description = "The repository does not support sets"; } output << " " << description << "\n"; } //---------------------------------------------------------------------------------------------- text_t oaiaction::getName() { return this->name; } //---------------------------------------------------------------------------------------------- /********** * Used in version 2.0 to provide the tag for each document. The function is passed * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix * systems) and converts it to YYYY-MM-DD format. */ text_t oaiaction::parseDatestamp(time_t &rawtime) { text_t year, month, day, lastModified; tm *ptm; ptm = gmtime(&rawtime); int raw_month = ptm->tm_mon + 1; int raw_day = ptm->tm_mday; year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format // Need the month in MM format, so if month is 1..9, add a 0 to the front if(raw_month < 10){ month = "0"; month += raw_month; } else month = raw_month; if(raw_day < 10){ day = "0"; day += raw_day; } else day = raw_day; lastModified = year + "-" + month + "-" + day; return lastModified; } //---------------------------------------------------------------------------------------------- /********** * Used by both versions to get the date & time of the client's request. The tag is * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time * is expected to be in UTC, and Z should simply be the character 'Z'. */ void oaiaction::getResponseDate(text_t &date) { time_t rawtime; tm *ptm; time(&rawtime); // Get the epoch time ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object text_t month, day, hour, minute, second; int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1 int raw_day = ptm->tm_mday; int raw_hour = ptm->tm_hour; int raw_minute = ptm->tm_min; int raw_second = ptm->tm_sec; // Need the month in MM format, so if month is 1..9, add a 0 to the front if(raw_month < 10){ month = "0"; } month += raw_month; // Same for days, hours, minutes and seconds if(raw_day < 10){ day = "0"; } day += raw_day; if(raw_hour < 10){ hour = "0"; } hour += raw_hour; if(raw_minute < 10){ minute = "0"; } minute += raw_minute; if(raw_second < 10){ second = "0"; } second += raw_second; // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format date += "-"; date += month; date += "-"; date += day; date += "T"; date += hour; date += ":"; date += minute; date += ":"; date += second; // If we're using v1.1, then tack on local time offset, otherwise don't if(this->configuration->getOAIVersion() == 110){ date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from // UTC (GMT), e.g. "+8:00", "-5:00" } else date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset } //---------------------------------------------------------------------------------------------- /********** * Does different request tags depending on the version of the OAI protocol running */ void oaiaction::getRequestURL(oaiargs ¶ms, text_t &requestURL) { // Iterators for moving through the list of parameters (keys) specified text_tmap::const_iterator here; text_tmap::const_iterator end; int numArgs = params.getSize(); here = params.begin(); end = params.end(); text_t baseURL = this->configuration->getBaseURL(); int version = this->configuration->getOAIVersion(); switch(version){ case 110: /* Takes the form: * http://baseURL.com/oaimain?verb="someVerb"&key=value */ requestURL = " " + baseURL; if(numArgs == 0) break; // If no args, all done - the error will be picked up later // The following lines will give us the "label=value" syntax requestURL += "?"; requestURL += here->first; requestURL += "="; requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant ++here; while(here != end){ requestURL +="&"; // Stick in the ampersand in URL encoding requestURL += (here->first + "=" + html_safe(here->second)); ++here; } requestURL += "\n"; break; case 200: default: /* Takes the form: * * http://baseURL.com/oaimain */ if(numArgs == 0) { requestURL = " " + baseURL + "\n"; break; } requestURL = " first + "=\"" + html_safe(here->second) + "\""; ++here; while(here != end){ requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\""); ++here; } requestURL += ">\n " + baseURL + "\n"; break; } } //---------------------------------------------------------------------------------------------- /********** * Send the (OAI version-dependent) response text to the output stream */ void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs ¶ms) { bool error; text_t date, requestURL; // Write the response date & time into 'date' this->getResponseDate(date); int version = this->configuration->getOAIVersion(); // validate the action error = !this->validateAction(protocol, params); // raise an error for duplicated arguments and set the // error type "manually" here... if (params.hasDuplicateArg() && !error) { this->errorType = "badArgument"; error = true; } // start with the required http header if (version <= 110 && error){ output << "Status: 400 " << this->errorType << "\n"; output << "Content-Type: text/xml\n\n"; return; } output << "Status: 200\n"; output << "Content-Type: text/xml\n\n"; // output xml header parts output << "\n"; if(version <= 110){ // output OAI v1.1 action header tag output << "<" << this->name; output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" "; output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "; output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name; output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n"; } else { text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot(); output << "\n"; // output OAI v2.0 action header tag output << "\n"; } // output current time for response output << " " << date << "\n"; // output request URL. This differs depending on the OAI protocol version currently running, so // the entire field - including tags - must be put into the text_t variable by getRequestURL() this->getRequestURL(params, requestURL); output << requestURL ; if (error == false) { // a string stream to write the content of the action to; this is done so that we can // avoid outputting the action tag if the action's body is unsuccessful, in which // case the leading tag must be suppressed #if defined(GSDL_USE_IOS_H) ostrstream outstream; #else ostringstream outstream; #endif // Version 2.0 needs an , etc. tag after the OAI-PMH header, IF there is no error // // An action that outputs no content should raise an error state to suppress the // matching opening and close tags if it outputs no content in OAI 2.0 error = !this->output_content(outstream, protocol, params); // output the leading tag if no error occurred if (error == false) { if (version >= 200) { this->output_action_tag(output, true); } } // now output the body of the action content #if defined(GSDL_USE_IOS_H) outstream << ends; // Ensure outstream is null-terminated correctly #endif output << outstream.str(); } else { if (version >= 200) { this->output_error(output, this->errorType); } } // close out our response - both versions need this line, but v2.0 only needs it if there was no error if((version == 110) || (version >= 200 && error == false)){ this->output_action_tag(output, false); } if(version >= 200){ output << "\n"; } } void oaiaction::output_action_tag(ostream &output, bool openTag) { output << " <"; if (!openTag) { output << "/"; } output << this->name << ">" << endl; } void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified, const text_tarray &memberOf, int oaiVersion) { output << "
" << endl; output << " " << oaiLabel << "" << endl; output << " " << lastModified << "" << endl; text_t collection_id; // Find the collection id from oai:repos-id:collection:doc oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id); if(oaiVersion >= 200){ text_tarray::const_iterator member = memberOf.begin(); text_tarray::const_iterator memEnd = memberOf.end(); // As well as all the subsets that a doc appears in, it is also a member of the 'collection' set output << " " << collection_id << "" << endl; while (member != memEnd) { text_t oaiSet = *member; oaiclassifier::toOAI(collection_id, oaiSet); output << " " << oaiSet << "" << endl; ++member; } } output << "
" << endl; } void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified) { text_t temp; MetadataInfo_tmap::iterator current = doc_info.metadata.begin(); MetadataInfo_tmap::iterator end = doc_info.metadata.end(); while(current != end){ temp = current->first; lc(temp); // lowercased for string comparison below if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata // assume it is correct format lastModified = current->second.values[0]; return; } else{ if (temp == "oailastmodified" && lastModified == "" && current->second.values.size() >= 1) { lastModified = current->second.values[0]; time_t raw_time = (time_t)lastModified.getint(); lastModified = this->parseDatestamp(raw_time); } } ++current; } } bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection, const text_t &OID, recptproto *protocol, ostream &logout) { FilterResponse_t response; text_tset metadata; bool status_ok = get_info(OID, collection, "", metadata, false, protocol, response, logout); bool not_too_early = false, not_too_recent = false; if(status_ok) { ResultDocInfo_t doc_info = response.docInfo[0]; text_t lastModDate; this->getLastModifiedDate(doc_info, lastModDate); // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison if(from != ""){ if(from <= lastModDate) not_too_early = true; } else not_too_early = true; // If there's no FROM field, then the record can't be too early if(until != ""){ if(lastModDate <= until) not_too_recent = true; } else not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent if(not_too_early && not_too_recent) return true; else return false; } else return false; } text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs ¶ms) { text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0"; //if(version == "2.0"){ // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the // repository, but we're just setting it to be the default oldest possible date - ugly, but judged // not to be worth the effort of trolling through all the lastmodified dates (by others with more // say than me) // The above was before. However, now we mirror GS3 way of dealing with // earliestDatestamp by going through the earliestDatestamp field of each OAI // collection's build.cfg in order to work out earliestdatestamp of this Repository: // by going through all the collections and getting the earliest among the // "earliestDatestamp" values stored for each collection in its build.cfg // (the earliestDatestamp for a collection has already been extracted from // their build.cfg file at this point by collectserver::configure. The field // is declared in comtypes.h) // Get a list of the OAI-enabled collections available text_tarray& collections = this->configuration->getCollectionsList(); if (collections.size() > 0) { // get the identifier from the params text_t identifier = params["identifier"]; text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":"; identifier.replace(oai_OID_prefix, ""); // Get the current collection from the identifier text_t collection_name = ""; oaiclassifier::toGSDL(collection_name, identifier); // Find the starting collection text_tarray::iterator collection_iterator = collections.begin(); while (collection_iterator != collections.end()) { if (collection_name == "" || collection_name == *collection_iterator) { break; } collection_iterator++; } // Now loop through the remaining collections // to work out the earliest datestamp while (collection_iterator != collections.end()) { collection_name = (*collection_iterator); ColInfoResponse_t cinfo; comerror_t err; protocol->get_collectinfo(collection_name, cinfo, err, cerr); if (err == noError) { text_t eDatestamp = cinfo.earliestDatestamp; time_t raw_time = (time_t)eDatestamp.getint(); eDatestamp = this->parseDatestamp(raw_time); if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection earliestDatestamp = eDatestamp; } else if(eDatestamp < earliestDatestamp) { earliestDatestamp = eDatestamp; } } collection_iterator++; } } //} // if repository's earliestDatestamp is still unset, default to unix epoch time if(earliestDatestamp == "") { earliestDatestamp = "1970-01-01"; } this->mEarliestDatestamp = earliestDatestamp; return mEarliestDatestamp; }