Ignore:
Timestamp:
2009-09-11T11:54:17+12:00 (15 years ago)
Author:
mdewsnip
Message:

Completely rewrote the resumption token support, as its buginess finally tipped the "I can't stand it any more" scale...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp

    r20574 r20590  
    11#include "listsetsaction.h"
    22
    3 #if defined(GSDL_USE_STL_H)
    4 #include <fstream.h>
    5 #else
    6 #include <fstream>
    7 #endif
    8 
     3#include "resumptiontoken.h"
    94#include "recptprototools.h"
    105#include "oaitools.h"
     6
    117
    128bool listsetsaction::validateAction(recptproto *protocol, oaiargs &params)
     
    5652    // Check the resumption token is valid
    5753    ResumptionToken token(params["resumptionToken"]);
    58     if (true)  // TO DO: Fix this (the token.isValid() function is useless for ListSets)
     54    if (token.isValid())
    5955    {
    6056      // Everything is fine, and we don't continue further because this is an exclusive argument
     
    9086bool listsetsaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
    9187{
    92   // output the total list of classifier points
    93 
    94   // variables required
    95   text_t browseOID = "browse";
    96   FilterResponse_t response;
    97   comerror_t       err;
    98   text_tarray &    collections = this->configuration->getCollectionsList();
    99   text_tset        metadata;
    100   ofstream         logout("oai.log", ios::app);
    101  
    102   // get a list of the collections available
    103   //  protocol->get_collection_list(collections, err, output);
    104   if (collections.size() == 0) {
    105     logout << "Found *no* OAI collections - check main.cfg for oaicollection items and read the OAI documentation.\n";
    106   }
    107 
    108   // check resumption token
    109   int startSet = 0;
    110   if (params["resumptionToken"] != "") {
    111     ResumptionToken token(params["resumptionToken"]);
    112     startSet = token.getPosition() - 1; // first document is said to be 1..
    113   }
    114   this->replyToken = NULL;
    115 
    116   this->setNumber = 0;
     88  // Reset variables
    11789  this->setsOutput = 0;
    118   for(int current_col = 0; current_col < collections.size(); ++current_col) {
    119     // output the collection as a set, first, then its children
    120     text_t gsdlCollect = collections[current_col];
    121 
     90
     91  text_t collection = "";
     92
     93  // Process the resumptionToken if there is one
     94  if (params["resumptionToken"] != "")
     95  {
     96    ResumptionToken resumption_token(params["resumptionToken"]);
     97    collection = resumption_token.getSet();
     98  }
     99
     100  // Get a list of the collections available
     101  text_tarray& collections = this->configuration->getCollectionsList();
     102  if (collections.size() == 0)
     103  {
     104    return false;
     105  }
     106
     107  // Find the starting collection
     108  text_tarray::iterator collection_iterator = collections.begin();
     109  while (collection_iterator != collections.end())
     110  {
     111    if (collection == "" || collection == *collection_iterator)
     112    {
     113      break;
     114    }
     115
     116    collection_iterator++;
     117  }
     118
     119  // Now loop through the remaining collections
     120  while (collection_iterator != collections.end())
     121  {
     122    collection = (*collection_iterator);
     123
     124    // If we've output the desired number of records return a resumptionToken and we're done
    122125    if (this->setsOutput == this->configuration->resumeAfter())
    123126    {
    124       this->replyToken = new ResumptionToken("", "", "");
    125       this->replyToken->setPosition("", this->setNumber+1);
    126       break;
    127     }
    128 
    129     if (this->setNumber >= startSet)
    130     {
     127      // Get the buildDate from the build.cfg file
     128      ColInfoResponse_t cinfo;
     129      comerror_t err;
     130      protocol->get_collectinfo(collection, cinfo, err, cerr);
     131
     132      ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", "");
     133
     134      // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
     135      output << "  <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
     136      return true;
     137    }
     138
     139    // If output_content_for_col() returns false a resumption token has been output, so it's time to stop
     140    if (output_content_for_col(output, protocol, params, collection) == false)
     141    {
     142      return true;
     143    }
     144
     145    collection_iterator++;
     146  }
     147
     148  return true;
     149}
     150
     151
     152bool listsetsaction::output_content_for_col(ostream &output, recptproto *protocol, oaiargs &params, text_t collection)
     153{
     154  text_t position = "";
     155
     156  // Process the resumptionToken if there is one
     157  if (params["resumptionToken"] != "")
     158  {
     159    ResumptionToken resumption_token(params["resumptionToken"]);
     160    position = resumption_token.getPosition();
     161  }
     162
     163  // Get the list of sets in this collection
     164  // Collections should not contain too many sets otherwise this will use a lot of time and memory
     165  text_tset metadata;  // Must be empty for efficiency
     166  FilterResponse_t sets_response;
     167  get_children("browse", collection, "", metadata, false, protocol, sets_response, *this->logout);
     168
     169  // Find the starting position, if necessary
     170  ResultDocInfo_tarray::iterator set_iterator = sets_response.docInfo.begin();
     171  if (this->setsOutput == 0)
     172  {
     173    while (set_iterator != sets_response.docInfo.end())
     174    {
     175      if (position == "" || position == (*set_iterator).OID)
     176      {
     177    break;
     178      }
     179
     180      set_iterator++;
     181    }
     182  }
     183
     184  // Output the collection as a set
     185  if (position == "")
     186  {
     187    output << "  <set>" << endl;
     188    output << "    <setSpec>" << collection << "</setSpec>" << endl;
     189    output << "    <setName>" << collection << "</setName>" << endl;
     190    output << "  </set>" << endl;
     191    this->setsOutput++;
     192  }
     193
     194  // Now loop through displaying the next matching records
     195  while (set_iterator != sets_response.docInfo.end())
     196  {
     197    text_t set = (*set_iterator).OID;
     198
     199    // Only classifiers with supportsmemberof become OAI sets, for reasons I don't really understand
     200    text_tset set_metadata;
     201    set_metadata.insert("supportsmemberof");
     202    set_metadata.insert("Title");
     203    FilterResponse_t set_response;
     204    get_info(set, collection, "", set_metadata, false, protocol, set_response, *this->logout);
     205
     206    if (set_response.docInfo[0].metadata["supportsmemberof"].values.size() > 0 && set_response.docInfo[0].metadata["supportsmemberof"].values[0] == "true")
     207    {
     208      // If we've output the desired number of records return a resumptionToken and we're done
     209      if (this->setsOutput == this->configuration->resumeAfter())
     210      {
     211    // Get the buildDate from the build.cfg file
     212    ColInfoResponse_t cinfo;
     213    comerror_t err;
     214    protocol->get_collectinfo(collection, cinfo, err, cerr);
     215
     216    ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", set);
     217
     218    // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
     219    output << "  <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
     220    return false;
     221      }
     222
     223      // Otherwise output this set and increment the count
     224      text_t set_title = set_response.docInfo[0].metadata["Title"].values[0];
    131225      output << "  <set>" << endl;
    132       output << "    <setSpec>" << gsdlCollect << "</setSpec>" << endl;;
    133       output << "    <setName>" << gsdlCollect << "</setName>" << endl;
     226      output << "    <setSpec>" << collection << ":" << set << "</setSpec>" << endl;
     227      output << "    <setName>" << collection << ":" << set_title << "</setName>" << endl;
    134228      output << "  </set>" << endl;
    135229      this->setsOutput++;
    136230    }
    137     setNumber++;
    138 
    139     // get all the children of the (relevant) classifier data structures
    140     get_children(browseOID, gsdlCollect, "", metadata, false, protocol, response, logout);
    141     // and send them to the "recurse_content" list
    142     for (int c = 0; c < response.numDocs; ++c) {
    143       this->recurse_content(output, protocol, gsdlCollect, response.docInfo[c].OID, gsdlCollect, startSet);
    144     }
    145   }
    146 
    147   // do a resumption token if required; errors cancel a token...
    148   if (this->replyToken != NULL && this->errorType == "") {
    149     // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
    150     output << "  <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
     231
     232    set_iterator++;
    151233  }
    152234
    153235  return true;
    154236}
    155 
    156 void listsetsaction::recurse_content(ostream &output, recptproto *protocol, text_t &collection,
    157                      const text_t &classifier, text_t setHierarchy, int startSet)
    158 {
    159   // metadata for this call
    160   FilterResponse_t response;
    161   text_tset        metadata;
    162   ofstream         logout("oai.log", ios::app);
    163 
    164   if (this->setsOutput == this->configuration->resumeAfter())
    165   {
    166     this->replyToken = new ResumptionToken("", "", "");
    167     this->replyToken->setPosition("", this->setNumber+1);
    168     return;
    169   }
    170 
    171   metadata.insert("contains");
    172   metadata.insert("Title");
    173   metadata.insert("supportsmemberof");
    174 
    175   // get the document information
    176   if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
    177     //cerr << "recurse content: Bad identifier or protocol " << classifier << endl;
    178     return;
    179   }
    180 
    181   // check for top-level classifiers, check if the set name includes a '.'; if
    182   // not, it is a top-level classifier: check for memberof support.  Those without
    183   // memberof support will not be supported on OAI
    184   if (findchar(classifier.begin(), classifier.end(), '.') == classifier.end()) {
    185     if (response.docInfo[0].metadata["supportsmemberof"].values.size() > 0) {
    186       text_t memberOf = response.docInfo[0].metadata["supportsmemberof"].values[0];
    187       if (memberOf != "true") {
    188     return;
    189       }
    190     }
    191     else {
    192       return;
    193     }
    194   }
    195 
    196   MetadataInfo_tmap::iterator here = response.docInfo[0].metadata.begin();
    197   MetadataInfo_tmap::iterator end  = response.docInfo[0].metadata.end();
    198   text_t title;
    199 
    200   while (here != end)
    201   {
    202     // Each set should only have one title - hence we only output one title here
    203     // (it is a set title, not a collection)
    204     if (here->first == "Title" && here->second.values.size() > 0) {
    205       title = here->second.values[0];
    206     }
    207 
    208     ++here;
    209   }
    210 
    211   // output the xml for this set; use the classifier id for the name
    212   // if the title is blank
    213   // curSet holds the colon-separated sequence of parent sets of the current set
    214   text_t curSet;
    215   if (this->setNumber >= startSet)
    216   {
    217     output << "  <set>" << endl;
    218     text_t oai_classifier = classifier;
    219     oaiclassifier::toOAI(collection, oai_classifier);
    220     output << "    <setSpec>" << oai_classifier << "</setSpec>" << endl;
    221     output << "    <setName>";
    222     if (!title.empty()) {
    223       curSet = setHierarchy + ":" + title;
    224     }
    225     else {
    226       curSet = classifier; // Pretty much never gets here (shouldn't, at least)
    227     }
    228     output << curSet;
    229     output << "</setName>" << endl;
    230     output << "  </set>" << endl;
    231     this->setsOutput++;
    232   }
    233   this->setNumber++;
    234 
    235   // get the children of this classifier and iterate them
    236   get_children(classifier, collection, "", metadata, false, protocol, response, logout);
    237   for (int c = 0; c < response.numDocs; ++c) {
    238     text_t child = response.docInfo[c].OID;
    239 
    240     if (child == classifier)
    241       continue;
    242 
    243     // check for non classifier items and exclude them
    244     text_t childHead;
    245     text_t::const_iterator start = child.begin();
    246     text_t::const_iterator here  = child.begin();
    247     here += 2;
    248     childHead = substr(start, here);
    249 
    250     if (childHead != "CL")
    251       continue;
    252 
    253     // Recurse for "proper" classifier children. Pass curSet, the colon-separated list of
    254     // parent sets. curSet is pass-by-value, so that as we step out of recursion we remember
    255     // old set hierarchies.
    256     this->recurse_content(output, protocol, collection, child, curSet, startSet);
    257   }
    258  
    259   return;
    260 }
Note: See TracChangeset for help on using the changeset viewer.