Changeset 20590
- Timestamp:
- 2009-09-11T11:54:17+12:00 (15 years ago)
- Location:
- gsdl/trunk/runtime-src/src/oaiservr
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp
r16835 r20590 1 1 #include "abstractlistaction.h" 2 #include "OIDtools.h" 2 3 #include "recptprototools.h" 3 4 4 5 #include "oaitools.h" 5 6 6 //-------------------------------------------------------------------------------------------------- 7 8 bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name) 9 { 10 text_tset metadata; 11 FilterResponse_t response; 12 return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout); 13 } 14 7 15 8 16 bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs ¶ms) 9 17 { 10 bool prevDocSeen; 11 ResumptionToken *token = NULL; 12 13 // start the call; clear down the total number of output documents 14 this->outputDocs = 0; 15 16 // We don't actually handle resumptionTokens yet; if we get one, ignore it 17 if (params["resumptionToken"] != "") { 18 token = new ResumptionToken(params["resumptionToken"]); 19 } 20 21 this->replyToken = NULL; 22 23 // if we've been asked for a set, then use it! 24 if (params["set"] != "") { 25 // get the children of this set 26 text_t gsdlSet = params["set"]; 27 text_t gsdlCollect = ""; 28 29 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further 30 // set specified after the name of the collection however, then gsdlSet is empty. 31 oaiclassifier::toGSDL(gsdlCollect, gsdlSet); 32 33 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so 34 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then 35 // use recurse_set() to traverse any sub classifiers to find the relevant docs. 36 if(gsdlSet == ""){ 37 ColInfoResponse_t cinfo; 38 comerror_t err; 39 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params); 40 } 41 else { 42 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) { 43 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token); 44 } 45 } 46 } 47 // output all records in all hierarchies 48 else { 49 this->output_content_for_all(output, protocol, params); 50 } 51 52 // If - regardless of set required - no documents have been seen, throw an error. 53 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) { 18 // Reset variables 19 this->output_docs = 0; 20 21 text_t set_name = params["set"]; 22 text_t position = params["position"]; 23 24 // Process the resumptionToken if there is one 25 if (params["resumptionToken"] != "") 26 { 27 ResumptionToken resumption_token(params["resumptionToken"]); 28 set_name = resumption_token.getSet(); 29 position = resumption_token.getPosition(); 30 } 31 32 // Case for "set" argument present -- output just the records in the specified set 33 if (set_name != "") 34 { 35 // Separate the collection name and Greenstone classifier OID from the set name 36 text_t collection_name = ""; 37 text_t gsdl_classifier_OID = set_name; 38 oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID); 39 40 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop 41 if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false) 42 { 43 return true; 44 } 45 } 46 47 // Case for no "set" argument present -- output all records in all collections 48 else 49 { 50 // Get a list of the collections available 51 text_tarray& collections = this->configuration->getCollectionsList(); 52 if (collections.size() == 0) 53 { 54 return false; 55 } 56 57 // Get the current collection from the position value 58 text_t collection_name = ""; 59 oaiclassifier::toGSDL(collection_name, position); 60 61 // Find the starting collection 62 text_tarray::iterator collection_iterator = collections.begin(); 63 while (collection_iterator != collections.end()) 64 { 65 if (collection_name == "" || collection_name == *collection_iterator) 66 { 67 break; 68 } 69 70 collection_iterator++; 71 } 72 73 // Now loop through the remaining collections 74 while (collection_iterator != collections.end()) 75 { 76 // If output_content_for_set() returns false a resumption token has been output, so it's time to stop 77 if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false) 78 { 79 return true; 80 } 81 82 collection_iterator++; 83 } 84 } 85 86 // If no records were output throw an error 87 if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0) 88 { 54 89 errorType = "noRecordsMatch"; 55 90 this->output_error(output, errorType); 56 57 return false; 58 } 59 60 // do a resumption token if required; errors cancel a token... 61 if (this->replyToken != NULL && this->errorType == "") { 62 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 63 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl; 91 return false; 64 92 } 65 93 … … 67 95 } 68 96 69 //-------------------------------------------------------------------------------------------------- 70 71 void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect, 72 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs ¶ms) 73 { int startDoc = 0; 74 75 text_t metadataPrefix = params["metadataPrefix"]; 76 77 // check resumption token 78 if (params["resumptionToken"] != "") { 79 ResumptionToken token(params["resumptionToken"]); 80 if (token.getCollection() == gsdlCollect) { 81 startDoc = token.getPosition() - 1; // first document is said to be 1.. 82 metadataPrefix = "oai_dc"; // TO DO: This should come from the resumption token 83 } 84 } 85 86 // Get the OAI nodes from the info db file 87 text_t oai_root_node = "oai"; 97 98 bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name) 99 { 100 // Check if the set is actually a collection 101 if (gsdl_classifier_OID == "") 102 { 103 gsdl_classifier_OID = "oai"; 104 } 105 106 text_t metadata_prefix = params["metadataPrefix"]; 107 text_t from = params["from"]; 108 text_t until = params["until"]; 109 text_t position = ""; 110 111 // Process the resumptionToken if there is one 112 if (params["resumptionToken"] != "") 113 { 114 ResumptionToken resumption_token(params["resumptionToken"]); 115 metadata_prefix = resumption_token.getMetadataPrefix(); 116 from = resumption_token.getFrom(); 117 until = resumption_token.getUntil(); 118 position = resumption_token.getPosition(); 119 } 120 121 // Get the list of identifiers in this collection 122 // Collections should not contain too many identifiers otherwise this will use a lot of time and memory 88 123 text_tset metadata; // Must be empty for efficiency 124 FilterResponse_t identifiers_response; 125 get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout); 126 127 // Find the starting position, if necessary 128 ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin(); 129 if (output_docs == 0) 130 { 131 while (identifier_iterator != identifiers_response.docInfo.end()) 132 { 133 if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID)) 134 { 135 break; 136 } 137 138 identifier_iterator++; 139 } 140 } 141 142 // Now loop through displaying the next matching records 143 while (identifier_iterator != identifiers_response.docInfo.end()) 144 { 145 position = (*identifier_iterator).OID; 146 147 text_t document_OID = position; 148 if (starts_with(document_OID, "oai.")) 149 { 150 document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout); 151 } 152 153 // Check this OID is in the (optional) date range specified 154 if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until)) 155 { 156 // If we've output the desired number of records return a resumptionToken and we're done 157 if (this->output_docs == this->configuration->resumeAfter()) 158 { 159 // Get the buildDate from the build.cfg file 160 ColInfoResponse_t cinfo; 161 comerror_t err; 162 protocol->get_collectinfo(collection_name, cinfo, err, cerr); 163 164 ResumptionToken resumption_token(cinfo.buildDate, set_name, metadata_prefix, from, until, collection_name + ":" + position); 165 166 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 167 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl; 168 return false; 169 } 170 171 // Otherwise output this record and increment the count 172 this->output_document(output, protocol, collection_name, document_OID, metadata_prefix); 173 this->output_docs++; 174 } 175 176 identifier_iterator++; 177 } 178 179 return true; 180 } 181 182 183 bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs ¶ms, 184 text_t& collection, text_t oai_OID, text_t from, text_t until) 185 { 186 // If no "from" or "until" value is specified every record matches, so we don't need to go any further 187 if (from == "" && until == "") 188 { 189 return true; 190 } 191 192 // Get the datestamp from the document as sections do not have this metadata 193 text_t document_OID; 194 get_top(oai_OID, document_OID); 195 196 // Request the lastmodified value for this document 197 text_tset metadata; 198 metadata.insert("lastmodified"); 89 199 FilterResponse_t response; 90 get_children(oai_root_node, gsdlCollect, "", metadata, false, protocol, response, *this->logout); 91 92 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection 93 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has 94 // been set to true by a previous collection that this won't overwrite it to be false). 95 if (response.docInfo.size() > 0) { 96 int errorCount = 0; // Count the number of errors found in the given collection 97 98 for (long i = startDoc; i < response.docInfo.size(); ++i) { 99 if (errorCount > 3) { // If num errors reaches the cut-off value, bail. 100 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect 101 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n"; 102 return; 103 } 104 105 text_t oai_id = "oai."; 106 oai_id += i; 107 108 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout); 109 110 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so 111 ++errorCount; // increase error count 112 continue; 113 } 114 115 116 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within 117 // the required date range (if specified). 118 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) { 119 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, metadataPrefix)) { 120 // this should be an IF statement, where prevDocSeen is only set to true if the above 121 // function call returns true (indicating that the doc supported the metadata prefix) but 122 // for some reason this is always false. This means that if no doc in the requested set supports 123 // the metadata format, the "no records match" error that should be thrown won't be... 124 // 125 // GRB: the above comment is no longer true; proper checks are made 126 this->prevDocSeen = true; 127 ++this->outputDocs; 128 } 129 } 130 131 // if we've output the number of resumption documents; prepare a resumptionToken 132 if (this->outputDocs == this->configuration->resumeAfter()) { 133 this->replyToken = new ResumptionToken(gsdlCollect, "", ""); 134 this->replyToken->setPosition("", i+2); 135 break; 136 } 137 } 138 139 cinfo.clear(); // Clear for next collection to use (if there is one). 140 } 141 } 142 143 //-------------------------------------------------------------------------------------------- 144 // Returns true if at least one document record is found 145 void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs ¶ms) 146 { 147 ColInfoResponse_t cinfo; 148 comerror_t err; 149 text_tarray collections; 150 text_t gsdlCollect = ""; 151 ResumptionToken *token = NULL; 152 153 // get a list of the collections available 154 collections = this->configuration->getCollectionsList(); 155 // protocol->get_collection_list(collections, err, output); 156 157 if (params["resumptionToken"] != "") { 158 token = new ResumptionToken(params["resumptionToken"]); 159 } 160 161 for(int current_col = 0; current_col < collections.size(); ++current_col){ 162 gsdlCollect = collections[current_col]; 163 164 // ignore all leading collections before the one that matches the resumptiontoken 165 if (token != NULL && 166 token->getCollection() != gsdlCollect) 167 { continue; 168 } 169 170 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params); 171 172 // once we've output at least one collection, continue 173 // outputting all others until the resumption total hits 174 token = NULL; 175 176 if (this->outputDocs == this->configuration->resumeAfter()) { 177 break; 178 } 179 } 180 } 181 182 //------------------------------------------------------------------------------------------------- 183 // Check that the requested from/until dates don't include a time, as this would be asking for too 184 // fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown. 185 /* 186 bool abstractlistaction::granularityTooFine(text_t &from, text_t &until) 187 { 188 if (from != "" && from.){ 189 190 } 191 192 } 193 */ 194 //------------------------------------------------------------------------------------------------- 195 196 bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, 197 const text_t &classifier) 198 { text_t topClass; 199 FilterResponse_t response; 200 text_tset metadata; 201 ofstream logout("oai.log", ios::app); 202 203 // exclude false children of a top-level classifier immediately... 204 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) { 205 return false; 206 } 207 208 // now check the top-level parent 209 metadata.insert("supportsmemberof"); 210 211 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.'); 212 if (dot != classifier.end()) { 213 topClass = substr(classifier.begin(), dot); 214 } 215 else { 216 topClass = classifier; 217 } 218 219 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) { 220 return false; 221 } 222 223 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) { 224 return false; 225 } 226 227 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") { 228 return false; 229 } 230 200 if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout)) 201 { 202 return false; 203 } 204 205 text_t last_modified_date; 206 this->getLastModifiedDate(response.docInfo[0], last_modified_date); 207 208 // Check this record is not before the "from" value, if it exists 209 if (from != "" && last_modified_date < from) 210 { 211 // Too early 212 return false; 213 } 214 215 // Check this record is not after the "until" value, if it exists 216 if (until != "" && last_modified_date > until) 217 { 218 // Too late 219 return false; 220 } 221 222 // Just right 231 223 return true; 232 224 } 233 234 void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,235 const text_t &classifier, oaiargs ¶ms, ResumptionToken *resumptionToken)236 {237 // metadata for this call238 FilterResponse_t response;239 text_tset metadata;240 ofstream logout("oai.log", ios::app);241 text_t from = params["from"];242 text_t until = params["until"];243 text_t metadataPrefix = params["metadataPrefix"];244 // ResumptionToken resumptionToken(params["resumptionToken"]);245 int startPos = 0;246 247 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily248 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It249 // is therefore not enough to check that the response object in the current iteration has no docs - we250 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.251 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The252 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,253 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to254 // throw the noRecordsMatch error.255 256 // bool prevDocSeen = false;257 258 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);259 260 if (params["resumptionToken"] != "") {261 // if we're at a resumptionToken262 if (classifier == resumptionToken->getNode()) {263 startPos = resumptionToken->getPosition();264 }265 else {266 text_t fullNode = resumptionToken->getNode();267 text_t::iterator leafIter = fullNode.begin() + classifier.size();268 269 // if the next character isn't a dot, blow up!270 if (*leafIter != '.') {271 // fatal error;272 exit(1);273 }274 275 // get the first '.' after the current classifier point;276 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');277 278 // now, create a new subpath279 text_t nextNode = substr(fullNode.begin(), separator);280 281 // seek forward; TODO: improve performance of this282 for (int c = 0; c < response.numDocs; ++c) {283 if (response.docInfo[c].OID == nextNode) {284 startPos = c;285 break;286 }287 }288 }289 290 // We need to subtract one from the startPos value to turn it into an index value291 startPos--;292 }293 294 for (int c = startPos; c < response.numDocs; ++c) {295 text_t child = response.docInfo[c].OID;296 297 // distinguish classifiers and documents by checking whether OID298 // starts with CL or not299 text_t childHead;300 text_t::const_iterator start = child.begin();301 text_t::const_iterator here = child.begin();302 here += 2;303 childHead = substr(start, here);304 305 // documents we output now306 if (childHead != "CL") {307 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits308 // within the required date range (if specified)309 if (this->inDateRange(from, until, collection, child, protocol, output)) {310 // TODO: check that the document can be disseminated in the required metadataPrefix311 312 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {313 this->prevDocSeen = true;314 ++this->outputDocs;315 }316 }317 }318 // children which are classifiers are recursed319 else {320 if (resumptionToken != NULL) {321 int depth = countchar(classifier.begin(), classifier.end(), '.');322 resumptionToken->setOffset(depth, c+2);323 }324 this->recurse_set(output, protocol, collection, child, params, resumptionToken);325 }326 327 if (this->outputDocs == this->configuration->resumeAfter()) {328 this->replyToken = new ResumptionToken(collection, params["set"], "");329 this->replyToken->setPosition(classifier, c+2);330 break;331 }332 }333 }334 335 336 337 -
gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.h
r16712 r20590 8 8 { 9 9 public: 10 abstractlistaction(const text_t &name) : oaiaction(name) {this->prevDocSeen = false;} 10 abstractlistaction(const text_t &name) : oaiaction(name) { } 11 11 12 virtual bool output_document(ostream &output, recptproto *protocol, const text_t &collection, 12 13 const text_t &OID, const text_t &metadataPrefix) = 0; 14 13 15 virtual bool output_content(ostream &output, recptproto *protocol, oaiargs ¶ms); 14 virtual void output_content_for_all(ostream &output, recptproto *protocol, oaiargs ¶ms); 15 virtual void output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect, 16 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs ¶ms); 17 virtual void recurse_set(ostream &output, recptproto *protocol, const text_t &collection, 18 const text_t &classifier, oaiargs ¶ms, ResumptionToken *resumptionToken); 16 17 virtual bool output_content_for_set(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name); 18 19 19 20 protected: 20 bool prevDocSeen; 21 int outputDocs; 22 ResumptionToken *replyToken; 23 bool check_classifier(recptproto *protocol, const text_t &collection, const text_t &classifier); 21 int output_docs; 22 23 bool check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name); 24 25 bool in_date_range(ostream &output, recptproto *protocol, oaiargs ¶ms, 26 text_t& collection, text_t oai_OID, text_t from, text_t until); 24 27 }; 28 25 29 #endif -
gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp
r20574 r20590 1 1 #include "listsetsaction.h" 2 2 3 #if defined(GSDL_USE_STL_H) 4 #include <fstream.h> 5 #else 6 #include <fstream> 7 #endif 8 3 #include "resumptiontoken.h" 9 4 #include "recptprototools.h" 10 5 #include "oaitools.h" 6 11 7 12 8 bool listsetsaction::validateAction(recptproto *protocol, oaiargs ¶ms) … … 56 52 // Check the resumption token is valid 57 53 ResumptionToken token(params["resumptionToken"]); 58 if (t rue) // TO DO: Fix this (the token.isValid() function is useless for ListSets)54 if (token.isValid()) 59 55 { 60 56 // Everything is fine, and we don't continue further because this is an exclusive argument … … 90 86 bool listsetsaction::output_content(ostream &output, recptproto *protocol, oaiargs ¶ms) 91 87 { 92 // output the total list of classifier points 93 94 // variables required 95 text_t browseOID = "browse"; 96 FilterResponse_t response; 97 comerror_t err; 98 text_tarray & collections = this->configuration->getCollectionsList(); 99 text_tset metadata; 100 ofstream logout("oai.log", ios::app); 101 102 // get a list of the collections available 103 // protocol->get_collection_list(collections, err, output); 104 if (collections.size() == 0) { 105 logout << "Found *no* OAI collections - check main.cfg for oaicollection items and read the OAI documentation.\n"; 106 } 107 108 // check resumption token 109 int startSet = 0; 110 if (params["resumptionToken"] != "") { 111 ResumptionToken token(params["resumptionToken"]); 112 startSet = token.getPosition() - 1; // first document is said to be 1.. 113 } 114 this->replyToken = NULL; 115 116 this->setNumber = 0; 88 // Reset variables 117 89 this->setsOutput = 0; 118 for(int current_col = 0; current_col < collections.size(); ++current_col) { 119 // output the collection as a set, first, then its children 120 text_t gsdlCollect = collections[current_col]; 121 90 91 text_t collection = ""; 92 93 // Process the resumptionToken if there is one 94 if (params["resumptionToken"] != "") 95 { 96 ResumptionToken resumption_token(params["resumptionToken"]); 97 collection = resumption_token.getSet(); 98 } 99 100 // Get a list of the collections available 101 text_tarray& collections = this->configuration->getCollectionsList(); 102 if (collections.size() == 0) 103 { 104 return false; 105 } 106 107 // Find the starting collection 108 text_tarray::iterator collection_iterator = collections.begin(); 109 while (collection_iterator != collections.end()) 110 { 111 if (collection == "" || collection == *collection_iterator) 112 { 113 break; 114 } 115 116 collection_iterator++; 117 } 118 119 // Now loop through the remaining collections 120 while (collection_iterator != collections.end()) 121 { 122 collection = (*collection_iterator); 123 124 // If we've output the desired number of records return a resumptionToken and we're done 122 125 if (this->setsOutput == this->configuration->resumeAfter()) 123 126 { 124 this->replyToken = new ResumptionToken("", "", ""); 125 this->replyToken->setPosition("", this->setNumber+1); 126 break; 127 } 128 129 if (this->setNumber >= startSet) 130 { 127 // Get the buildDate from the build.cfg file 128 ColInfoResponse_t cinfo; 129 comerror_t err; 130 protocol->get_collectinfo(collection, cinfo, err, cerr); 131 132 ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", ""); 133 134 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 135 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl; 136 return true; 137 } 138 139 // If output_content_for_col() returns false a resumption token has been output, so it's time to stop 140 if (output_content_for_col(output, protocol, params, collection) == false) 141 { 142 return true; 143 } 144 145 collection_iterator++; 146 } 147 148 return true; 149 } 150 151 152 bool listsetsaction::output_content_for_col(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection) 153 { 154 text_t position = ""; 155 156 // Process the resumptionToken if there is one 157 if (params["resumptionToken"] != "") 158 { 159 ResumptionToken resumption_token(params["resumptionToken"]); 160 position = resumption_token.getPosition(); 161 } 162 163 // Get the list of sets in this collection 164 // Collections should not contain too many sets otherwise this will use a lot of time and memory 165 text_tset metadata; // Must be empty for efficiency 166 FilterResponse_t sets_response; 167 get_children("browse", collection, "", metadata, false, protocol, sets_response, *this->logout); 168 169 // Find the starting position, if necessary 170 ResultDocInfo_tarray::iterator set_iterator = sets_response.docInfo.begin(); 171 if (this->setsOutput == 0) 172 { 173 while (set_iterator != sets_response.docInfo.end()) 174 { 175 if (position == "" || position == (*set_iterator).OID) 176 { 177 break; 178 } 179 180 set_iterator++; 181 } 182 } 183 184 // Output the collection as a set 185 if (position == "") 186 { 187 output << " <set>" << endl; 188 output << " <setSpec>" << collection << "</setSpec>" << endl; 189 output << " <setName>" << collection << "</setName>" << endl; 190 output << " </set>" << endl; 191 this->setsOutput++; 192 } 193 194 // Now loop through displaying the next matching records 195 while (set_iterator != sets_response.docInfo.end()) 196 { 197 text_t set = (*set_iterator).OID; 198 199 // Only classifiers with supportsmemberof become OAI sets, for reasons I don't really understand 200 text_tset set_metadata; 201 set_metadata.insert("supportsmemberof"); 202 set_metadata.insert("Title"); 203 FilterResponse_t set_response; 204 get_info(set, collection, "", set_metadata, false, protocol, set_response, *this->logout); 205 206 if (set_response.docInfo[0].metadata["supportsmemberof"].values.size() > 0 && set_response.docInfo[0].metadata["supportsmemberof"].values[0] == "true") 207 { 208 // If we've output the desired number of records return a resumptionToken and we're done 209 if (this->setsOutput == this->configuration->resumeAfter()) 210 { 211 // Get the buildDate from the build.cfg file 212 ColInfoResponse_t cinfo; 213 comerror_t err; 214 protocol->get_collectinfo(collection, cinfo, err, cerr); 215 216 ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", set); 217 218 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 219 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl; 220 return false; 221 } 222 223 // Otherwise output this set and increment the count 224 text_t set_title = set_response.docInfo[0].metadata["Title"].values[0]; 131 225 output << " <set>" << endl; 132 output << " <setSpec>" << gsdlCollect << "</setSpec>" << endl;;133 output << " <setName>" << gsdlCollect<< "</setName>" << endl;226 output << " <setSpec>" << collection << ":" << set << "</setSpec>" << endl; 227 output << " <setName>" << collection << ":" << set_title << "</setName>" << endl; 134 228 output << " </set>" << endl; 135 229 this->setsOutput++; 136 230 } 137 setNumber++; 138 139 // get all the children of the (relevant) classifier data structures 140 get_children(browseOID, gsdlCollect, "", metadata, false, protocol, response, logout); 141 // and send them to the "recurse_content" list 142 for (int c = 0; c < response.numDocs; ++c) { 143 this->recurse_content(output, protocol, gsdlCollect, response.docInfo[c].OID, gsdlCollect, startSet); 144 } 145 } 146 147 // do a resumption token if required; errors cancel a token... 148 if (this->replyToken != NULL && this->errorType == "") { 149 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 150 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl; 231 232 set_iterator++; 151 233 } 152 234 153 235 return true; 154 236 } 155 156 void listsetsaction::recurse_content(ostream &output, recptproto *protocol, text_t &collection,157 const text_t &classifier, text_t setHierarchy, int startSet)158 {159 // metadata for this call160 FilterResponse_t response;161 text_tset metadata;162 ofstream logout("oai.log", ios::app);163 164 if (this->setsOutput == this->configuration->resumeAfter())165 {166 this->replyToken = new ResumptionToken("", "", "");167 this->replyToken->setPosition("", this->setNumber+1);168 return;169 }170 171 metadata.insert("contains");172 metadata.insert("Title");173 metadata.insert("supportsmemberof");174 175 // get the document information176 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {177 //cerr << "recurse content: Bad identifier or protocol " << classifier << endl;178 return;179 }180 181 // check for top-level classifiers, check if the set name includes a '.'; if182 // not, it is a top-level classifier: check for memberof support. Those without183 // memberof support will not be supported on OAI184 if (findchar(classifier.begin(), classifier.end(), '.') == classifier.end()) {185 if (response.docInfo[0].metadata["supportsmemberof"].values.size() > 0) {186 text_t memberOf = response.docInfo[0].metadata["supportsmemberof"].values[0];187 if (memberOf != "true") {188 return;189 }190 }191 else {192 return;193 }194 }195 196 MetadataInfo_tmap::iterator here = response.docInfo[0].metadata.begin();197 MetadataInfo_tmap::iterator end = response.docInfo[0].metadata.end();198 text_t title;199 200 while (here != end)201 {202 // Each set should only have one title - hence we only output one title here203 // (it is a set title, not a collection)204 if (here->first == "Title" && here->second.values.size() > 0) {205 title = here->second.values[0];206 }207 208 ++here;209 }210 211 // output the xml for this set; use the classifier id for the name212 // if the title is blank213 // curSet holds the colon-separated sequence of parent sets of the current set214 text_t curSet;215 if (this->setNumber >= startSet)216 {217 output << " <set>" << endl;218 text_t oai_classifier = classifier;219 oaiclassifier::toOAI(collection, oai_classifier);220 output << " <setSpec>" << oai_classifier << "</setSpec>" << endl;221 output << " <setName>";222 if (!title.empty()) {223 curSet = setHierarchy + ":" + title;224 }225 else {226 curSet = classifier; // Pretty much never gets here (shouldn't, at least)227 }228 output << curSet;229 output << "</setName>" << endl;230 output << " </set>" << endl;231 this->setsOutput++;232 }233 this->setNumber++;234 235 // get the children of this classifier and iterate them236 get_children(classifier, collection, "", metadata, false, protocol, response, logout);237 for (int c = 0; c < response.numDocs; ++c) {238 text_t child = response.docInfo[c].OID;239 240 if (child == classifier)241 continue;242 243 // check for non classifier items and exclude them244 text_t childHead;245 text_t::const_iterator start = child.begin();246 text_t::const_iterator here = child.begin();247 here += 2;248 childHead = substr(start, here);249 250 if (childHead != "CL")251 continue;252 253 // Recurse for "proper" classifier children. Pass curSet, the colon-separated list of254 // parent sets. curSet is pass-by-value, so that as we step out of recursion we remember255 // old set hierarchies.256 this->recurse_content(output, protocol, collection, child, curSet, startSet);257 }258 259 return;260 } -
gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.h
r20574 r20590 6 6 public: 7 7 listsetsaction() : oaiaction("ListSets") { }; 8 8 9 virtual bool validateAction(recptproto *protocol, oaiargs ¶ms); 9 10 10 11 protected: 11 int setNumber; 12 int setsOutput; 13 ResumptionToken *replyToken; 12 int setsOutput; 13 14 14 bool output_content(ostream &output, recptproto *protocol, oaiargs ¶ms); 15 void recurse_content(ostream &output, recptproto *protocol, text_t &collection, const text_t &classifier, 16 text_t setHierarchy, int startSet);15 16 bool output_content_for_col(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection); 17 17 }; -
gsdl/trunk/runtime-src/src/oaiservr/resumptiontoken.cpp
r15380 r20590 1 1 #include "resumptiontoken.h" 2 #include "oaitools.h"3 2 4 /** 5 * Generate an initial resumption token from some basic details. 6 * 7 * TODO: add optional argument to set the server name. 8 */9 ResumptionToken::ResumptionToken(const text_t &collection, const text_t &node, 10 const text_t &buildDate) 11 { this->collection = collection;12 this-> browseNode = node;13 this-> buildDate = buildDate;14 this-> startItem = 0;3 4 ResumptionToken::ResumptionToken(const text_t &build_date, const text_t &set, const text_t &metadata_prefix, 5 const text_t &from, const text_t &until, const text_t &position) 6 { 7 this->build_date = build_date; 8 this->set = set; 9 this->metadata_prefix = metadata_prefix; 10 this->from = from; 11 this->until = until; 12 this->position = position; 13 this->valid = true; 15 14 } 16 15 17 /**18 * Generate a resumption token from a URN-style format.19 *20 * See getToken() for details of the format.21 *22 * TODO: support inclusion of an optional server name.23 */24 ResumptionToken::ResumptionToken(const text_t &URN)25 { text_t::const_iterator first = URN.begin();26 text_t::const_iterator last = URN.end();27 text_t::const_iterator second;28 16 29 this->collection = ""; 30 this->browseNode = ""; 31 this->startItem = -1; 32 33 text_t::const_iterator here = findchar(first, last, ':'); 34 if (here == first) { 35 return; 17 ResumptionToken::ResumptionToken(const text_t &resumption_token_string) 18 { 19 this->build_date = ""; 20 this->set = ""; 21 this->metadata_prefix = ""; 22 this->from = ""; 23 this->until = ""; 24 this->position = ""; 25 26 // This uses custom code into of the text_t splitchar() function because that is buggy 27 text_tarray resumption_token_string_parts; 28 text_t resumption_token_string_part; 29 text_t::const_iterator resumption_token_string_iterator = resumption_token_string.begin(); 30 while (resumption_token_string_iterator != resumption_token_string.end()) 31 { 32 if (*resumption_token_string_iterator == ',') 33 { 34 resumption_token_string_parts.push_back(resumption_token_string_part); 35 resumption_token_string_part.clear(); 36 } 37 else 38 { 39 resumption_token_string_part.push_back(*resumption_token_string_iterator); 40 } 41 42 resumption_token_string_iterator++; 36 43 } 37 38 text_t oainamespace = substr(first, here); 39 if (oainamespace != "gsdloai") { 44 resumption_token_string_parts.push_back(resumption_token_string_part); 45 46 if (resumption_token_string_parts.size() != 6) 47 { 48 // The resumption token is invalid -- there should be exactly 6 parts 49 this->valid = false; 40 50 return; 41 51 } 42 52 43 // increment past the first colon to get the location 44 first = ++here; 45 46 // get the collection, browseNode 47 here = findchar(first, last, ','); 48 if (here == last) { 49 return; 50 } 51 52 second = findchar(first, here,'.'); 53 this->collection = substr(first, second); 54 55 // cerr << "Collection " << this->collection << endl; 56 57 if (second != here) { 58 // get past the '.' 59 ++second; 60 this->browseNode = substr(second, here); 61 } 62 else { 63 first = here; 64 } 65 // get past the ',' 66 first = ++here; 67 68 // find the second ',' to delimit the position stack 69 second = findchar(first, last, ','); 70 71 // if not found, then get build and start item 72 if (second != first) { 73 // extract list and step past it 74 text_t offsetList = substr(first, second); 75 first = ++second; 76 77 do { 78 second = findchar(offsetList.begin(), offsetList.end(), '.'); 79 if (second == offsetList.end()) 80 break; 81 82 // extract and push the next position 83 text_t thisPos = substr(offsetList.begin(), second); 84 this->browsePosition.push_back(thisPos.getint()); 85 86 // pop the position from the list 87 offsetList = substr(++second, offsetList.end()); 88 } while (true); 89 this->browsePosition.push_back(offsetList.getint()); 90 } 91 else { 92 first ++; 93 } 94 95 // now find the build date marker 96 here = findchar(first, last, '-'); 97 if (here == first) { 98 this->startItem = substr(first, last).getint(); 99 } 100 else { 101 this->startItem = substr(first, here).getint(); 102 this->buildDate = substr(++here, last); 103 } 53 this->build_date = resumption_token_string_parts[0]; 54 this->set = resumption_token_string_parts[1]; 55 this->metadata_prefix = resumption_token_string_parts[2]; 56 this->from = resumption_token_string_parts[3]; 57 this->until = resumption_token_string_parts[4]; 58 this->position = resumption_token_string_parts[5]; 59 this->valid = true; 104 60 } 105 61 106 /**107 * Get a resumption token in text_t format.108 *109 * Resumption tokens are in the format:110 *111 * gsdloai:<serverName>:collectionname.browseNode,startItem-BuildDate112 *113 * The resumption token format does not currently implement the use of114 * the optional <serverName> item; it is taken to default to the name of115 * the receiving server.116 *117 * TODO: add server identity as an optional argument; also change118 * ResumptionToken(text_t &) accordingly.119 */120 text_t ResumptionToken::getToken()121 { text_t reply = "gsdloai:";122 reply = reply + this->collection;123 if (this->browseNode != "") {124 reply = reply + "." + this->browseNode;125 }126 reply = reply + ",";127 for (int i = 0; i < this->browsePosition.size(); i++) {128 if (i != 0) {129 reply.append(".");130 }131 reply.appendint(i);132 }133 reply = reply + ",";134 reply.append(this->startItem);135 reply = reply + "-" + buildDate;136 62 137 return reply; 63 text_t ResumptionToken::getResumptionTokenString() 64 { 65 return this->build_date + "," + this->set + "," + this->metadata_prefix + "," + this->from + "," + this->until + "," + this->position; 138 66 } 139 67 140 /** 141 * Update the position of an existing resumption token 142 */ 143 void ResumptionToken::setPosition(const text_t &node, int startItem) 144 { this->browseNode = node; 145 this->startItem = startItem; 68 69 bool ResumptionToken::isValid() 70 { 71 return this->valid; 146 72 } 147 148 /**149 * Check if the resumption token is valid - only a very primitive150 * check is done here; one ought to check for an existing collection151 * and valid browse Node, build date and startItem152 *153 * TODO: implement improved validation checking.154 */155 bool ResumptionToken::isValid()156 { return this->collection != "";157 } -
gsdl/trunk/runtime-src/src/oaiservr/resumptiontoken.h
r11769 r20590 2 2 #define _RESUMPTIONTOKEN_H_ 3 3 4 #include <vector>5 6 4 #include "text_t.h" 7 5 8 // use the standard namespace9 #if !defined (GSDL_NAMESPACE_BROKEN)10 #if defined(GSDL_USE_OBJECTSPACE)11 using namespace ospace::std;12 #else13 using namespace std;14 #endif15 #endif16 6 17 7 class ResumptionToken 18 { private: 19 text_t collection; 20 text_t browseNode; 21 vector<int> browsePosition; 22 int startItem; 23 text_t buildDate; 24 // TODO: add a server name to the variables list; see getToken in resumptionToken.cpp 8 { 9 private: 10 text_t build_date; 11 text_t set; 12 text_t metadata_prefix; 13 text_t from; 14 text_t until; 15 text_t position; 16 17 bool valid; 25 18 26 19 public: 27 ResumptionToken(const text_t & collection, const text_t &rootNode, const text_t &buildDate);28 ResumptionToken(const text_t &URN);29 text_t getToken();20 ResumptionToken(const text_t &build_date, const text_t &set, const text_t &metadata_prefix, 21 const text_t &from, const text_t &until, const text_t &position); 22 ResumptionToken(const text_t &resumption_token_string); 30 23 31 void setPosition(const text_t &node, int startItem); 24 text_t getBuildDate() { return build_date; } 25 text_t getSet() { return set; } 26 text_t getMetadataPrefix() { return metadata_prefix; } 27 text_t getFrom() { return from; } 28 text_t getUntil() { return until; } 29 text_t getPosition() { return position; } 32 30 33 text_t getCollection() { return collection; } 34 text_t getNode() { return browseNode; } 35 int getPosition() { return startItem; } 36 int getOffsetDepth() { return this->browsePosition.size(); } 37 void setOffset(int depth, int position) { this->browsePosition[depth] = position; } 38 int getOffset(int offset) { return this->browsePosition[offset]; } 31 text_t getResumptionTokenString(); 39 32 bool isValid(); 40 33 }; 41 34 35 42 36 #endif
Note:
See TracChangeset
for help on using the changeset viewer.