#include "abstractlistaction.h" #include "OIDtools.h" #include "recptprototools.h" #include "oaitools.h" bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name) { text_tset metadata; FilterResponse_t response; return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout); } bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs ¶ms) { // Reset variables this->output_docs = 0; text_t set_name = params["set"]; text_t position = ""; // Process the resumptionToken if there is one if (params["resumptionToken"] != "") { ResumptionToken resumption_token(params["resumptionToken"]); set_name = resumption_token.getSet(); position = resumption_token.getPosition(); } // Case for "set" argument present -- output just the records in the specified set if (set_name != "") { // Separate the collection name and Greenstone classifier OID from the set name text_t collection_name = ""; text_t gsdl_classifier_OID = set_name; oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID); // If output_content_for_set() returns false a resumption token has been output, so it's time to stop if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false) { return true; } } // Case for no "set" argument present -- output all records in all collections else { // Get a list of the collections available text_tarray& collections = this->configuration->getCollectionsList(); if (collections.size() == 0) { return false; } // Get the current collection from the position value text_t collection_name = ""; oaiclassifier::toGSDL(collection_name, position); // Find the starting collection text_tarray::iterator collection_iterator = collections.begin(); while (collection_iterator != collections.end()) { if (collection_name == "" || collection_name == *collection_iterator) { break; } collection_iterator++; } // Now loop through the remaining collections while (collection_iterator != collections.end()) { // If output_content_for_set() returns false a resumption token has been output, so it's time to stop if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false) { return true; } collection_iterator++; } } // If no records were output throw an error if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0) { errorType = "noRecordsMatch"; this->output_error(output, errorType); return false; } return true; } bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name) { // Check if the set is actually a collection if (gsdl_classifier_OID == "") { gsdl_classifier_OID = "oai"; } text_t metadata_prefix = params["metadataPrefix"]; text_t from = params["from"]; text_t until = params["until"]; text_t position = ""; // Process the resumptionToken if there is one if (params["resumptionToken"] != "") { ResumptionToken resumption_token(params["resumptionToken"]); metadata_prefix = resumption_token.getMetadataPrefix(); from = resumption_token.getFrom(); until = resumption_token.getUntil(); position = resumption_token.getPosition(); } // Get the list of identifiers in this collection // Collections should not contain too many identifiers otherwise this will use a lot of time and memory text_tset metadata; // Must be empty for efficiency FilterResponse_t identifiers_response; get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout); // Find the starting position, if necessary ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin(); if (output_docs == 0) { while (identifier_iterator != identifiers_response.docInfo.end()) { if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID)) { break; } identifier_iterator++; } } // Now loop through displaying the next matching records while (identifier_iterator != identifiers_response.docInfo.end()) { position = (*identifier_iterator).OID; text_t document_OID = position; if (starts_with(document_OID, "oai.")) { document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout); } // Check this OID is in the (optional) date range specified if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until)) { // If we've output the desired number of records return a resumptionToken and we're done if (this->output_docs == this->configuration->resumeAfter()) { // If a set has been specified, we can use the collection's buildDate in the resumption token text_t date_stamp = ""; if (set_name != "") { ColInfoResponse_t cinfo; comerror_t err; protocol->get_collectinfo(collection_name, cinfo, err, cerr); date_stamp = cinfo.buildDate; } ResumptionToken resumption_token(date_stamp, set_name, metadata_prefix, from, until, collection_name + ":" + position); // Don't add any whitespace around the resumption token as it can confuse harvesters/validators output << " " << resumption_token.getResumptionTokenString() << "" << endl; return false; } // Otherwise output this record and increment the count this->output_document(output, protocol, collection_name, document_OID, metadata_prefix); this->output_docs++; } identifier_iterator++; } return true; } bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t& collection, text_t oai_OID, text_t from, text_t until) { // If no "from" or "until" value is specified every record matches, so we don't need to go any further if (from == "" && until == "") { return true; } // Get the datestamp from the document as sections do not have this metadata text_t document_OID; get_top(oai_OID, document_OID); // Request the lastmodified value for this document text_tset metadata; metadata.insert("lastmodified"); metadata.insert("gs.OAIDateStamp"); FilterResponse_t response; if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout)) { return false; } text_t last_modified_date; this->getLastModifiedDate(response.docInfo[0], last_modified_date); // Check this record is not before the "from" value, if it exists if (from != "" && last_modified_date < from) { // Too early return false; } // Check this record is not after the "until" value, if it exists if (until != "" && last_modified_date > until) { // Too late return false; } // Just right return true; }