Changeset 20590 for gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp
- Timestamp:
- 2009-09-11T11:54:17+12:00 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp
r20574 r20590 1 1 #include "listsetsaction.h" 2 2 3 #if defined(GSDL_USE_STL_H) 4 #include <fstream.h> 5 #else 6 #include <fstream> 7 #endif 8 3 #include "resumptiontoken.h" 9 4 #include "recptprototools.h" 10 5 #include "oaitools.h" 6 11 7 12 8 bool listsetsaction::validateAction(recptproto *protocol, oaiargs ¶ms) … … 56 52 // Check the resumption token is valid 57 53 ResumptionToken token(params["resumptionToken"]); 58 if (t rue) // TO DO: Fix this (the token.isValid() function is useless for ListSets)54 if (token.isValid()) 59 55 { 60 56 // Everything is fine, and we don't continue further because this is an exclusive argument … … 90 86 bool listsetsaction::output_content(ostream &output, recptproto *protocol, oaiargs ¶ms) 91 87 { 92 // output the total list of classifier points 93 94 // variables required 95 text_t browseOID = "browse"; 96 FilterResponse_t response; 97 comerror_t err; 98 text_tarray & collections = this->configuration->getCollectionsList(); 99 text_tset metadata; 100 ofstream logout("oai.log", ios::app); 101 102 // get a list of the collections available 103 // protocol->get_collection_list(collections, err, output); 104 if (collections.size() == 0) { 105 logout << "Found *no* OAI collections - check main.cfg for oaicollection items and read the OAI documentation.\n"; 106 } 107 108 // check resumption token 109 int startSet = 0; 110 if (params["resumptionToken"] != "") { 111 ResumptionToken token(params["resumptionToken"]); 112 startSet = token.getPosition() - 1; // first document is said to be 1.. 113 } 114 this->replyToken = NULL; 115 116 this->setNumber = 0; 88 // Reset variables 117 89 this->setsOutput = 0; 118 for(int current_col = 0; current_col < collections.size(); ++current_col) { 119 // output the collection as a set, first, then its children 120 text_t gsdlCollect = collections[current_col]; 121 90 91 text_t collection = ""; 92 93 // Process the resumptionToken if there is one 94 if (params["resumptionToken"] != "") 95 { 96 ResumptionToken resumption_token(params["resumptionToken"]); 97 collection = resumption_token.getSet(); 98 } 99 100 // Get a list of the collections available 101 text_tarray& collections = this->configuration->getCollectionsList(); 102 if (collections.size() == 0) 103 { 104 return false; 105 } 106 107 // Find the starting collection 108 text_tarray::iterator collection_iterator = collections.begin(); 109 while (collection_iterator != collections.end()) 110 { 111 if (collection == "" || collection == *collection_iterator) 112 { 113 break; 114 } 115 116 collection_iterator++; 117 } 118 119 // Now loop through the remaining collections 120 while (collection_iterator != collections.end()) 121 { 122 collection = (*collection_iterator); 123 124 // If we've output the desired number of records return a resumptionToken and we're done 122 125 if (this->setsOutput == this->configuration->resumeAfter()) 123 126 { 124 this->replyToken = new ResumptionToken("", "", ""); 125 this->replyToken->setPosition("", this->setNumber+1); 126 break; 127 } 128 129 if (this->setNumber >= startSet) 130 { 127 // Get the buildDate from the build.cfg file 128 ColInfoResponse_t cinfo; 129 comerror_t err; 130 protocol->get_collectinfo(collection, cinfo, err, cerr); 131 132 ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", ""); 133 134 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 135 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl; 136 return true; 137 } 138 139 // If output_content_for_col() returns false a resumption token has been output, so it's time to stop 140 if (output_content_for_col(output, protocol, params, collection) == false) 141 { 142 return true; 143 } 144 145 collection_iterator++; 146 } 147 148 return true; 149 } 150 151 152 bool listsetsaction::output_content_for_col(ostream &output, recptproto *protocol, oaiargs ¶ms, text_t collection) 153 { 154 text_t position = ""; 155 156 // Process the resumptionToken if there is one 157 if (params["resumptionToken"] != "") 158 { 159 ResumptionToken resumption_token(params["resumptionToken"]); 160 position = resumption_token.getPosition(); 161 } 162 163 // Get the list of sets in this collection 164 // Collections should not contain too many sets otherwise this will use a lot of time and memory 165 text_tset metadata; // Must be empty for efficiency 166 FilterResponse_t sets_response; 167 get_children("browse", collection, "", metadata, false, protocol, sets_response, *this->logout); 168 169 // Find the starting position, if necessary 170 ResultDocInfo_tarray::iterator set_iterator = sets_response.docInfo.begin(); 171 if (this->setsOutput == 0) 172 { 173 while (set_iterator != sets_response.docInfo.end()) 174 { 175 if (position == "" || position == (*set_iterator).OID) 176 { 177 break; 178 } 179 180 set_iterator++; 181 } 182 } 183 184 // Output the collection as a set 185 if (position == "") 186 { 187 output << " <set>" << endl; 188 output << " <setSpec>" << collection << "</setSpec>" << endl; 189 output << " <setName>" << collection << "</setName>" << endl; 190 output << " </set>" << endl; 191 this->setsOutput++; 192 } 193 194 // Now loop through displaying the next matching records 195 while (set_iterator != sets_response.docInfo.end()) 196 { 197 text_t set = (*set_iterator).OID; 198 199 // Only classifiers with supportsmemberof become OAI sets, for reasons I don't really understand 200 text_tset set_metadata; 201 set_metadata.insert("supportsmemberof"); 202 set_metadata.insert("Title"); 203 FilterResponse_t set_response; 204 get_info(set, collection, "", set_metadata, false, protocol, set_response, *this->logout); 205 206 if (set_response.docInfo[0].metadata["supportsmemberof"].values.size() > 0 && set_response.docInfo[0].metadata["supportsmemberof"].values[0] == "true") 207 { 208 // If we've output the desired number of records return a resumptionToken and we're done 209 if (this->setsOutput == this->configuration->resumeAfter()) 210 { 211 // Get the buildDate from the build.cfg file 212 ColInfoResponse_t cinfo; 213 comerror_t err; 214 protocol->get_collectinfo(collection, cinfo, err, cerr); 215 216 ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", set); 217 218 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 219 output << " <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl; 220 return false; 221 } 222 223 // Otherwise output this set and increment the count 224 text_t set_title = set_response.docInfo[0].metadata["Title"].values[0]; 131 225 output << " <set>" << endl; 132 output << " <setSpec>" << gsdlCollect << "</setSpec>" << endl;;133 output << " <setName>" << gsdlCollect<< "</setName>" << endl;226 output << " <setSpec>" << collection << ":" << set << "</setSpec>" << endl; 227 output << " <setName>" << collection << ":" << set_title << "</setName>" << endl; 134 228 output << " </set>" << endl; 135 229 this->setsOutput++; 136 230 } 137 setNumber++; 138 139 // get all the children of the (relevant) classifier data structures 140 get_children(browseOID, gsdlCollect, "", metadata, false, protocol, response, logout); 141 // and send them to the "recurse_content" list 142 for (int c = 0; c < response.numDocs; ++c) { 143 this->recurse_content(output, protocol, gsdlCollect, response.docInfo[c].OID, gsdlCollect, startSet); 144 } 145 } 146 147 // do a resumption token if required; errors cancel a token... 148 if (this->replyToken != NULL && this->errorType == "") { 149 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators 150 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl; 231 232 set_iterator++; 151 233 } 152 234 153 235 return true; 154 236 } 155 156 void listsetsaction::recurse_content(ostream &output, recptproto *protocol, text_t &collection,157 const text_t &classifier, text_t setHierarchy, int startSet)158 {159 // metadata for this call160 FilterResponse_t response;161 text_tset metadata;162 ofstream logout("oai.log", ios::app);163 164 if (this->setsOutput == this->configuration->resumeAfter())165 {166 this->replyToken = new ResumptionToken("", "", "");167 this->replyToken->setPosition("", this->setNumber+1);168 return;169 }170 171 metadata.insert("contains");172 metadata.insert("Title");173 metadata.insert("supportsmemberof");174 175 // get the document information176 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {177 //cerr << "recurse content: Bad identifier or protocol " << classifier << endl;178 return;179 }180 181 // check for top-level classifiers, check if the set name includes a '.'; if182 // not, it is a top-level classifier: check for memberof support. Those without183 // memberof support will not be supported on OAI184 if (findchar(classifier.begin(), classifier.end(), '.') == classifier.end()) {185 if (response.docInfo[0].metadata["supportsmemberof"].values.size() > 0) {186 text_t memberOf = response.docInfo[0].metadata["supportsmemberof"].values[0];187 if (memberOf != "true") {188 return;189 }190 }191 else {192 return;193 }194 }195 196 MetadataInfo_tmap::iterator here = response.docInfo[0].metadata.begin();197 MetadataInfo_tmap::iterator end = response.docInfo[0].metadata.end();198 text_t title;199 200 while (here != end)201 {202 // Each set should only have one title - hence we only output one title here203 // (it is a set title, not a collection)204 if (here->first == "Title" && here->second.values.size() > 0) {205 title = here->second.values[0];206 }207 208 ++here;209 }210 211 // output the xml for this set; use the classifier id for the name212 // if the title is blank213 // curSet holds the colon-separated sequence of parent sets of the current set214 text_t curSet;215 if (this->setNumber >= startSet)216 {217 output << " <set>" << endl;218 text_t oai_classifier = classifier;219 oaiclassifier::toOAI(collection, oai_classifier);220 output << " <setSpec>" << oai_classifier << "</setSpec>" << endl;221 output << " <setName>";222 if (!title.empty()) {223 curSet = setHierarchy + ":" + title;224 }225 else {226 curSet = classifier; // Pretty much never gets here (shouldn't, at least)227 }228 output << curSet;229 output << "</setName>" << endl;230 output << " </set>" << endl;231 this->setsOutput++;232 }233 this->setNumber++;234 235 // get the children of this classifier and iterate them236 get_children(classifier, collection, "", metadata, false, protocol, response, logout);237 for (int c = 0; c < response.numDocs; ++c) {238 text_t child = response.docInfo[c].OID;239 240 if (child == classifier)241 continue;242 243 // check for non classifier items and exclude them244 text_t childHead;245 text_t::const_iterator start = child.begin();246 text_t::const_iterator here = child.begin();247 here += 2;248 childHead = substr(start, here);249 250 if (childHead != "CL")251 continue;252 253 // Recurse for "proper" classifier children. Pass curSet, the colon-separated list of254 // parent sets. curSet is pass-by-value, so that as we step out of recursion we remember255 // old set hierarchies.256 this->recurse_content(output, protocol, collection, child, curSet, startSet);257 }258 259 return;260 }
Note:
See TracChangeset
for help on using the changeset viewer.