Changeset 20590


Ignore:
Timestamp:
2009-09-11T11:54:17+12:00 (15 years ago)
Author:
mdewsnip
Message:

Completely rewrote the resumption token support, as its buginess finally tipped the "I can't stand it any more" scale...

Location:
gsdl/trunk/runtime-src/src/oaiservr
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp

    r16835 r20590  
    11#include "abstractlistaction.h"
     2#include "OIDtools.h"
    23#include "recptprototools.h"
    34
    45#include "oaitools.h"
    56
    6 //--------------------------------------------------------------------------------------------------
     7
     8bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name)
     9{
     10  text_tset metadata;
     11  FilterResponse_t response;
     12  return get_info(set_name, collection, "", metadata, false, protocol, response, *this->logout);
     13}
     14
    715
    816bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
    917{
    10   bool   prevDocSeen;
    11   ResumptionToken *token = NULL;
    12 
    13   // start the call; clear down the total number of output documents
    14   this->outputDocs = 0;
    15 
    16   // We don't actually handle resumptionTokens yet; if we get one, ignore it
    17   if (params["resumptionToken"] != "") {
    18     token = new ResumptionToken(params["resumptionToken"]);
    19   }
    20 
    21   this->replyToken = NULL;
    22 
    23   // if we've been asked for a set, then use it!
    24   if (params["set"] != "") {
    25     // get the children of this set
    26     text_t gsdlSet = params["set"];
    27     text_t gsdlCollect = "";
    28 
    29     // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
    30     // set specified after the name of the collection however, then gsdlSet is empty.
    31     oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
    32 
    33     // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
    34     // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
    35     // use recurse_set() to traverse any sub classifiers to find the relevant docs.
    36     if(gsdlSet == ""){
    37       ColInfoResponse_t cinfo;
    38       comerror_t err;
    39       this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
    40     }
    41     else {
    42       if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
    43     this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
    44       }
    45     }
    46   }
    47   // output all records in all hierarchies
    48   else {
    49     this->output_content_for_all(output, protocol, params);
    50   }
    51 
    52   // If - regardless of set required - no documents have been seen, throw an error.
    53   if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
     18  // Reset variables
     19  this->output_docs = 0;
     20
     21  text_t set_name = params["set"];
     22  text_t position = params["position"];
     23
     24  // Process the resumptionToken if there is one
     25  if (params["resumptionToken"] != "")
     26  {
     27    ResumptionToken resumption_token(params["resumptionToken"]);
     28    set_name = resumption_token.getSet();
     29    position = resumption_token.getPosition();
     30  }
     31
     32  // Case for "set" argument present -- output just the records in the specified set
     33  if (set_name != "")
     34  {
     35    // Separate the collection name and Greenstone classifier OID from the set name
     36    text_t collection_name = "";
     37    text_t gsdl_classifier_OID = set_name;
     38    oaiclassifier::toGSDL(collection_name, gsdl_classifier_OID);
     39
     40    // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
     41    if (output_content_for_set(output, protocol, params, collection_name, gsdl_classifier_OID, set_name) == false)
     42    {
     43      return true;
     44    }
     45  }
     46
     47  // Case for no "set" argument present -- output all records in all collections
     48  else
     49  {
     50    // Get a list of the collections available
     51    text_tarray& collections = this->configuration->getCollectionsList();
     52    if (collections.size() == 0)
     53    {
     54      return false;
     55    }
     56
     57    // Get the current collection from the position value
     58    text_t collection_name = "";
     59    oaiclassifier::toGSDL(collection_name, position);
     60
     61    // Find the starting collection
     62    text_tarray::iterator collection_iterator = collections.begin();
     63    while (collection_iterator != collections.end())
     64    {
     65      if (collection_name == "" || collection_name == *collection_iterator)
     66      {
     67    break;
     68      }
     69
     70      collection_iterator++;
     71    }
     72
     73    // Now loop through the remaining collections
     74    while (collection_iterator != collections.end())
     75    {
     76      // If output_content_for_set() returns false a resumption token has been output, so it's time to stop
     77      if (output_content_for_set(output, protocol, params, *collection_iterator, "", "") == false)
     78      {
     79    return true;
     80      }
     81
     82      collection_iterator++;
     83    }
     84  }
     85
     86  // If no records were output throw an error
     87  if (this->configuration->getOAIVersion() >= 200 && this->output_docs == 0)
     88  {
    5489    errorType = "noRecordsMatch";
    5590    this->output_error(output, errorType);
    56 
    57     return false;
    58   }
    59 
    60   // do a resumption token if required; errors cancel a token...
    61   if (this->replyToken != NULL && this->errorType == "") {
    62     // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
    63     output << "  <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
     91    return false;
    6492  }
    6593
     
    6795}
    6896
    69 //--------------------------------------------------------------------------------------------------
    70 
    71 void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
    72                         ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
    73 { int startDoc = 0;
    74 
    75   text_t metadataPrefix = params["metadataPrefix"];
    76 
    77   // check resumption token
    78   if (params["resumptionToken"] != "") {
    79     ResumptionToken token(params["resumptionToken"]);
    80     if (token.getCollection() == gsdlCollect) {
    81       startDoc = token.getPosition() - 1; // first document is said to be 1..
    82       metadataPrefix = "oai_dc";  // TO DO: This should come from the resumption token
    83     }
    84   }
    85 
    86   // Get the OAI nodes from the info db file
    87   text_t oai_root_node = "oai";
     97
     98bool abstractlistaction::output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name)
     99{
     100  // Check if the set is actually a collection
     101  if (gsdl_classifier_OID == "")
     102  {
     103    gsdl_classifier_OID = "oai";
     104  }
     105
     106  text_t metadata_prefix = params["metadataPrefix"];
     107  text_t from = params["from"];
     108  text_t until = params["until"];
     109  text_t position = "";
     110
     111  // Process the resumptionToken if there is one
     112  if (params["resumptionToken"] != "")
     113  {
     114    ResumptionToken resumption_token(params["resumptionToken"]);
     115    metadata_prefix = resumption_token.getMetadataPrefix();
     116    from = resumption_token.getFrom();
     117    until = resumption_token.getUntil();
     118    position = resumption_token.getPosition();
     119  }
     120
     121  // Get the list of identifiers in this collection
     122  // Collections should not contain too many identifiers otherwise this will use a lot of time and memory
    88123  text_tset metadata;  // Must be empty for efficiency
     124  FilterResponse_t identifiers_response;
     125  get_children(gsdl_classifier_OID, collection_name, "", metadata, false, protocol, identifiers_response, *this->logout);
     126
     127  // Find the starting position, if necessary
     128  ResultDocInfo_tarray::iterator identifier_iterator = identifiers_response.docInfo.begin();
     129  if (output_docs == 0)
     130  {
     131    while (identifier_iterator != identifiers_response.docInfo.end())
     132    {
     133      if (position == "" || position == (collection_name + ":" + (*identifier_iterator).OID))
     134      {
     135    break;
     136      }
     137
     138      identifier_iterator++;
     139    }
     140  }
     141
     142  // Now loop through displaying the next matching records
     143  while (identifier_iterator != identifiers_response.docInfo.end())
     144  {
     145    position = (*identifier_iterator).OID;
     146
     147    text_t document_OID = position;
     148    if (starts_with(document_OID, "oai."))
     149    {
     150      document_OID = oaiclassifier::getGSDL_OID(collection_name, document_OID, protocol, *this->logout);
     151    }
     152
     153    // Check this OID is in the (optional) date range specified
     154    if (this->in_date_range(output, protocol, params, collection_name, document_OID, from, until))
     155    {
     156      // If we've output the desired number of records return a resumptionToken and we're done
     157      if (this->output_docs == this->configuration->resumeAfter())
     158      {
     159    // Get the buildDate from the build.cfg file
     160    ColInfoResponse_t cinfo;
     161    comerror_t err;
     162    protocol->get_collectinfo(collection_name, cinfo, err, cerr);
     163
     164    ResumptionToken resumption_token(cinfo.buildDate, set_name, metadata_prefix, from, until, collection_name + ":" + position);
     165
     166    // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
     167    output << "  <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
     168    return false;
     169      }
     170
     171      // Otherwise output this record and increment the count
     172      this->output_document(output, protocol, collection_name, document_OID, metadata_prefix);
     173      this->output_docs++;
     174    }
     175
     176    identifier_iterator++;
     177  }
     178
     179  return true;
     180}
     181
     182
     183bool abstractlistaction::in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
     184                       text_t& collection, text_t oai_OID, text_t from, text_t until)
     185{
     186  // If no "from" or "until" value is specified every record matches, so we don't need to go any further
     187  if (from == "" && until == "")
     188  {
     189    return true;
     190  }
     191
     192  // Get the datestamp from the document as sections do not have this metadata
     193  text_t document_OID;
     194  get_top(oai_OID, document_OID);
     195
     196  // Request the lastmodified value for this document
     197  text_tset metadata;
     198  metadata.insert("lastmodified");
    89199  FilterResponse_t response;
    90   get_children(oai_root_node, gsdlCollect, "", metadata, false, protocol, response, *this->logout);
    91  
    92   // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
    93   // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
    94   // been set to true by a previous collection that this won't overwrite it to be false).
    95   if (response.docInfo.size() > 0) {
    96     int errorCount      = 0; // Count the number of errors found in the given collection
    97 
    98     for (long i = startDoc; i < response.docInfo.size(); ++i) {
    99       if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
    100     cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
    101          << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
    102     return;
    103       }
    104      
    105       text_t oai_id = "oai.";
    106       oai_id += i;
    107      
    108       text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
    109      
    110       if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
    111     ++errorCount;     // increase error count
    112     continue;
    113       }
    114 
    115 
    116       // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
    117       // the required date range (if specified).
    118       if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
    119     if (this->output_document(output, protocol, gsdlCollect, gsdl_id, metadataPrefix)) {
    120       // this should be an IF statement, where prevDocSeen is only set to true if the above
    121       // function call returns true (indicating that the doc supported the metadata prefix) but
    122       // for some reason this is always false. This means that if no doc in the requested set supports
    123       // the metadata format, the "no records match" error that should be thrown won't be...
    124       //
    125       // GRB: the above comment is no longer true; proper checks are made
    126       this->prevDocSeen = true;
    127       ++this->outputDocs;
    128     }
    129       }
    130 
    131       // if we've output the number of resumption documents; prepare a resumptionToken
    132       if (this->outputDocs == this->configuration->resumeAfter()) {
    133     this->replyToken = new ResumptionToken(gsdlCollect, "", "");
    134     this->replyToken->setPosition("", i+2);
    135     break;
    136       }
    137     }
    138 
    139     cinfo.clear(); // Clear for next collection to use (if there is one).
    140   }
    141 }
    142 
    143 //--------------------------------------------------------------------------------------------
    144 // Returns true if at least one document record is found
    145 void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
    146 {
    147   ColInfoResponse_t cinfo;
    148   comerror_t        err;
    149   text_tarray       collections;
    150   text_t            gsdlCollect = "";
    151   ResumptionToken   *token = NULL;
    152 
    153   // get a list of the collections available
    154   collections = this->configuration->getCollectionsList();
    155   //  protocol->get_collection_list(collections, err, output);
    156  
    157   if (params["resumptionToken"] != "") {
    158     token = new ResumptionToken(params["resumptionToken"]);
    159   }
    160 
    161   for(int current_col = 0; current_col < collections.size(); ++current_col){
    162     gsdlCollect = collections[current_col];
    163 
    164     // ignore all leading collections before the one that matches the resumptiontoken
    165     if (token != NULL &&
    166     token->getCollection() != gsdlCollect)
    167     { continue;
    168     }
    169 
    170     this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
    171 
    172     // once we've output at least one collection, continue
    173     // outputting all others until the resumption total hits
    174     token = NULL;
    175 
    176     if (this->outputDocs == this->configuration->resumeAfter()) {
    177       break;
    178     }
    179   }
    180 }
    181 
    182 //-------------------------------------------------------------------------------------------------
    183 // Check that the requested from/until dates don't include a time, as this would be asking for too
    184 // fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
    185 /*
    186 bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
    187 {
    188   if (from != "" && from.){
    189    
    190   }
    191  
    192 }
    193 */
    194 //-------------------------------------------------------------------------------------------------
    195 
    196 bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
    197                       const text_t &classifier)
    198 { text_t topClass;
    199   FilterResponse_t response;
    200   text_tset        metadata;
    201   ofstream         logout("oai.log", ios::app);
    202 
    203   // exclude false children of a top-level classifier immediately...
    204   if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
    205     return false;
    206   }
    207  
    208   // now check the top-level parent
    209   metadata.insert("supportsmemberof");
    210 
    211   text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
    212   if (dot != classifier.end()) {
    213     topClass = substr(classifier.begin(), dot);
    214   }
    215   else {
    216     topClass = classifier;
    217   }
    218 
    219   if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
    220     return false;
    221   }
    222  
    223   if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
    224     return false;
    225   }
    226 
    227   if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
    228     return false;
    229   }
    230 
     200  if (!get_info(document_OID, collection, "", metadata, false, protocol, response, *this->logout))
     201  {
     202    return false;
     203  }
     204
     205  text_t last_modified_date;
     206  this->getLastModifiedDate(response.docInfo[0], last_modified_date);
     207
     208  // Check this record is not before the "from" value, if it exists
     209  if (from != "" && last_modified_date < from)
     210  {
     211    // Too early
     212    return false;
     213  }
     214
     215  // Check this record is not after the "until" value, if it exists
     216  if (until != "" && last_modified_date > until)
     217  {
     218    // Too late
     219    return false;
     220  }
     221
     222  // Just right
    231223  return true;
    232224}
    233 
    234 void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
    235                      const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
    236 {
    237   // metadata for this call
    238   FilterResponse_t response;
    239   text_tset        metadata;
    240   ofstream         logout("oai.log", ios::app);
    241   text_t           from = params["from"];
    242   text_t           until = params["until"];
    243   text_t           metadataPrefix = params["metadataPrefix"];
    244   //  ResumptionToken  resumptionToken(params["resumptionToken"]);
    245   int              startPos = 0;
    246 
    247   // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
    248   // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
    249   // is therefore not enough to check that the response object in the current iteration has no docs - we
    250   // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
    251   // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
    252   // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
    253   // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
    254   // throw the noRecordsMatch error.
    255  
    256   //  bool prevDocSeen = false;
    257 
    258   get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
    259 
    260   if (params["resumptionToken"] != "") {
    261     // if we're at a resumptionToken
    262     if (classifier == resumptionToken->getNode()) {
    263       startPos = resumptionToken->getPosition();
    264     }
    265     else {
    266       text_t fullNode = resumptionToken->getNode();
    267       text_t::iterator leafIter = fullNode.begin() + classifier.size();
    268 
    269       // if the next character isn't a dot, blow up!
    270       if (*leafIter != '.') {
    271     // fatal error;
    272     exit(1);
    273       }
    274      
    275       // get the first '.' after the current classifier point;
    276       text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
    277 
    278       // now, create a new subpath
    279       text_t nextNode = substr(fullNode.begin(), separator);
    280 
    281       // seek forward; TODO: improve performance of this
    282       for (int c = 0; c < response.numDocs; ++c) {
    283     if (response.docInfo[c].OID == nextNode) {
    284       startPos = c;
    285       break;
    286     }
    287       }
    288     }
    289 
    290     // We need to subtract one from the startPos value to turn it into an index value
    291     startPos--;
    292   }
    293  
    294   for (int c = startPos; c < response.numDocs; ++c) {
    295     text_t child = response.docInfo[c].OID;
    296    
    297     // distinguish classifiers and documents by checking whether OID
    298     // starts with CL or not
    299     text_t childHead;
    300     text_t::const_iterator start = child.begin();
    301     text_t::const_iterator here  = child.begin();
    302     here += 2;
    303     childHead = substr(start, here);
    304    
    305     // documents we output now
    306     if (childHead != "CL") {
    307       // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
    308       // within the required date range (if specified)
    309       if (this->inDateRange(from, until, collection, child, protocol, output)) {
    310     // TODO: check that the document can be disseminated in the required metadataPrefix
    311 
    312     if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
    313       this->prevDocSeen = true;
    314       ++this->outputDocs;
    315     }
    316       }
    317     }
    318     // children which are classifiers are recursed
    319     else {
    320       if (resumptionToken != NULL) {
    321     int depth = countchar(classifier.begin(), classifier.end(), '.');
    322     resumptionToken->setOffset(depth, c+2);
    323       }
    324       this->recurse_set(output, protocol, collection, child, params, resumptionToken);
    325     }
    326 
    327     if (this->outputDocs == this->configuration->resumeAfter()) {
    328       this->replyToken = new ResumptionToken(collection, params["set"], "");
    329       this->replyToken->setPosition(classifier, c+2);     
    330       break;
    331     }
    332   }
    333 }
    334 
    335 
    336 
    337 
  • gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.h

    r16712 r20590  
    88{
    99 public:
    10   abstractlistaction(const text_t &name) : oaiaction(name) {this->prevDocSeen = false;}
     10  abstractlistaction(const text_t &name) : oaiaction(name) { }
     11
    1112  virtual bool output_document(ostream &output, recptproto *protocol, const text_t &collection,
    1213                   const text_t &OID, const text_t &metadataPrefix) = 0;
     14
    1315  virtual bool output_content(ostream &output, recptproto *protocol, oaiargs &params);
    14   virtual void output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params);
    15   virtual void output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
    16                       ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params);
    17   virtual void recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
    18                const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken);
     16
     17  virtual bool output_content_for_set(ostream &output, recptproto *protocol, oaiargs &params, text_t collection_name, text_t gsdl_classifier_OID, text_t set_name);
     18
     19
    1920 protected:
    20   bool prevDocSeen;
    21   int  outputDocs;
    22   ResumptionToken *replyToken;
    23   bool check_classifier(recptproto *protocol, const text_t &collection, const text_t &classifier);
     21  int output_docs;
     22
     23  bool check_classifier(recptproto *protocol, const text_t &collection, const text_t &set_name);
     24
     25  bool in_date_range(ostream &output, recptproto *protocol, oaiargs &params,
     26             text_t& collection, text_t oai_OID, text_t from, text_t until);
    2427};
     28
    2529#endif
  • gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp

    r20574 r20590  
    11#include "listsetsaction.h"
    22
    3 #if defined(GSDL_USE_STL_H)
    4 #include <fstream.h>
    5 #else
    6 #include <fstream>
    7 #endif
    8 
     3#include "resumptiontoken.h"
    94#include "recptprototools.h"
    105#include "oaitools.h"
     6
    117
    128bool listsetsaction::validateAction(recptproto *protocol, oaiargs &params)
     
    5652    // Check the resumption token is valid
    5753    ResumptionToken token(params["resumptionToken"]);
    58     if (true)  // TO DO: Fix this (the token.isValid() function is useless for ListSets)
     54    if (token.isValid())
    5955    {
    6056      // Everything is fine, and we don't continue further because this is an exclusive argument
     
    9086bool listsetsaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
    9187{
    92   // output the total list of classifier points
    93 
    94   // variables required
    95   text_t browseOID = "browse";
    96   FilterResponse_t response;
    97   comerror_t       err;
    98   text_tarray &    collections = this->configuration->getCollectionsList();
    99   text_tset        metadata;
    100   ofstream         logout("oai.log", ios::app);
    101  
    102   // get a list of the collections available
    103   //  protocol->get_collection_list(collections, err, output);
    104   if (collections.size() == 0) {
    105     logout << "Found *no* OAI collections - check main.cfg for oaicollection items and read the OAI documentation.\n";
    106   }
    107 
    108   // check resumption token
    109   int startSet = 0;
    110   if (params["resumptionToken"] != "") {
    111     ResumptionToken token(params["resumptionToken"]);
    112     startSet = token.getPosition() - 1; // first document is said to be 1..
    113   }
    114   this->replyToken = NULL;
    115 
    116   this->setNumber = 0;
     88  // Reset variables
    11789  this->setsOutput = 0;
    118   for(int current_col = 0; current_col < collections.size(); ++current_col) {
    119     // output the collection as a set, first, then its children
    120     text_t gsdlCollect = collections[current_col];
    121 
     90
     91  text_t collection = "";
     92
     93  // Process the resumptionToken if there is one
     94  if (params["resumptionToken"] != "")
     95  {
     96    ResumptionToken resumption_token(params["resumptionToken"]);
     97    collection = resumption_token.getSet();
     98  }
     99
     100  // Get a list of the collections available
     101  text_tarray& collections = this->configuration->getCollectionsList();
     102  if (collections.size() == 0)
     103  {
     104    return false;
     105  }
     106
     107  // Find the starting collection
     108  text_tarray::iterator collection_iterator = collections.begin();
     109  while (collection_iterator != collections.end())
     110  {
     111    if (collection == "" || collection == *collection_iterator)
     112    {
     113      break;
     114    }
     115
     116    collection_iterator++;
     117  }
     118
     119  // Now loop through the remaining collections
     120  while (collection_iterator != collections.end())
     121  {
     122    collection = (*collection_iterator);
     123
     124    // If we've output the desired number of records return a resumptionToken and we're done
    122125    if (this->setsOutput == this->configuration->resumeAfter())
    123126    {
    124       this->replyToken = new ResumptionToken("", "", "");
    125       this->replyToken->setPosition("", this->setNumber+1);
    126       break;
    127     }
    128 
    129     if (this->setNumber >= startSet)
    130     {
     127      // Get the buildDate from the build.cfg file
     128      ColInfoResponse_t cinfo;
     129      comerror_t err;
     130      protocol->get_collectinfo(collection, cinfo, err, cerr);
     131
     132      ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", "");
     133
     134      // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
     135      output << "  <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
     136      return true;
     137    }
     138
     139    // If output_content_for_col() returns false a resumption token has been output, so it's time to stop
     140    if (output_content_for_col(output, protocol, params, collection) == false)
     141    {
     142      return true;
     143    }
     144
     145    collection_iterator++;
     146  }
     147
     148  return true;
     149}
     150
     151
     152bool listsetsaction::output_content_for_col(ostream &output, recptproto *protocol, oaiargs &params, text_t collection)
     153{
     154  text_t position = "";
     155
     156  // Process the resumptionToken if there is one
     157  if (params["resumptionToken"] != "")
     158  {
     159    ResumptionToken resumption_token(params["resumptionToken"]);
     160    position = resumption_token.getPosition();
     161  }
     162
     163  // Get the list of sets in this collection
     164  // Collections should not contain too many sets otherwise this will use a lot of time and memory
     165  text_tset metadata;  // Must be empty for efficiency
     166  FilterResponse_t sets_response;
     167  get_children("browse", collection, "", metadata, false, protocol, sets_response, *this->logout);
     168
     169  // Find the starting position, if necessary
     170  ResultDocInfo_tarray::iterator set_iterator = sets_response.docInfo.begin();
     171  if (this->setsOutput == 0)
     172  {
     173    while (set_iterator != sets_response.docInfo.end())
     174    {
     175      if (position == "" || position == (*set_iterator).OID)
     176      {
     177    break;
     178      }
     179
     180      set_iterator++;
     181    }
     182  }
     183
     184  // Output the collection as a set
     185  if (position == "")
     186  {
     187    output << "  <set>" << endl;
     188    output << "    <setSpec>" << collection << "</setSpec>" << endl;
     189    output << "    <setName>" << collection << "</setName>" << endl;
     190    output << "  </set>" << endl;
     191    this->setsOutput++;
     192  }
     193
     194  // Now loop through displaying the next matching records
     195  while (set_iterator != sets_response.docInfo.end())
     196  {
     197    text_t set = (*set_iterator).OID;
     198
     199    // Only classifiers with supportsmemberof become OAI sets, for reasons I don't really understand
     200    text_tset set_metadata;
     201    set_metadata.insert("supportsmemberof");
     202    set_metadata.insert("Title");
     203    FilterResponse_t set_response;
     204    get_info(set, collection, "", set_metadata, false, protocol, set_response, *this->logout);
     205
     206    if (set_response.docInfo[0].metadata["supportsmemberof"].values.size() > 0 && set_response.docInfo[0].metadata["supportsmemberof"].values[0] == "true")
     207    {
     208      // If we've output the desired number of records return a resumptionToken and we're done
     209      if (this->setsOutput == this->configuration->resumeAfter())
     210      {
     211    // Get the buildDate from the build.cfg file
     212    ColInfoResponse_t cinfo;
     213    comerror_t err;
     214    protocol->get_collectinfo(collection, cinfo, err, cerr);
     215
     216    ResumptionToken resumption_token(cinfo.buildDate, collection, "", "", "", set);
     217
     218    // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
     219    output << "  <resumptionToken>" << resumption_token.getResumptionTokenString() << "</resumptionToken>" << endl;
     220    return false;
     221      }
     222
     223      // Otherwise output this set and increment the count
     224      text_t set_title = set_response.docInfo[0].metadata["Title"].values[0];
    131225      output << "  <set>" << endl;
    132       output << "    <setSpec>" << gsdlCollect << "</setSpec>" << endl;;
    133       output << "    <setName>" << gsdlCollect << "</setName>" << endl;
     226      output << "    <setSpec>" << collection << ":" << set << "</setSpec>" << endl;
     227      output << "    <setName>" << collection << ":" << set_title << "</setName>" << endl;
    134228      output << "  </set>" << endl;
    135229      this->setsOutput++;
    136230    }
    137     setNumber++;
    138 
    139     // get all the children of the (relevant) classifier data structures
    140     get_children(browseOID, gsdlCollect, "", metadata, false, protocol, response, logout);
    141     // and send them to the "recurse_content" list
    142     for (int c = 0; c < response.numDocs; ++c) {
    143       this->recurse_content(output, protocol, gsdlCollect, response.docInfo[c].OID, gsdlCollect, startSet);
    144     }
    145   }
    146 
    147   // do a resumption token if required; errors cancel a token...
    148   if (this->replyToken != NULL && this->errorType == "") {
    149     // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
    150     output << "  <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
     231
     232    set_iterator++;
    151233  }
    152234
    153235  return true;
    154236}
    155 
    156 void listsetsaction::recurse_content(ostream &output, recptproto *protocol, text_t &collection,
    157                      const text_t &classifier, text_t setHierarchy, int startSet)
    158 {
    159   // metadata for this call
    160   FilterResponse_t response;
    161   text_tset        metadata;
    162   ofstream         logout("oai.log", ios::app);
    163 
    164   if (this->setsOutput == this->configuration->resumeAfter())
    165   {
    166     this->replyToken = new ResumptionToken("", "", "");
    167     this->replyToken->setPosition("", this->setNumber+1);
    168     return;
    169   }
    170 
    171   metadata.insert("contains");
    172   metadata.insert("Title");
    173   metadata.insert("supportsmemberof");
    174 
    175   // get the document information
    176   if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
    177     //cerr << "recurse content: Bad identifier or protocol " << classifier << endl;
    178     return;
    179   }
    180 
    181   // check for top-level classifiers, check if the set name includes a '.'; if
    182   // not, it is a top-level classifier: check for memberof support.  Those without
    183   // memberof support will not be supported on OAI
    184   if (findchar(classifier.begin(), classifier.end(), '.') == classifier.end()) {
    185     if (response.docInfo[0].metadata["supportsmemberof"].values.size() > 0) {
    186       text_t memberOf = response.docInfo[0].metadata["supportsmemberof"].values[0];
    187       if (memberOf != "true") {
    188     return;
    189       }
    190     }
    191     else {
    192       return;
    193     }
    194   }
    195 
    196   MetadataInfo_tmap::iterator here = response.docInfo[0].metadata.begin();
    197   MetadataInfo_tmap::iterator end  = response.docInfo[0].metadata.end();
    198   text_t title;
    199 
    200   while (here != end)
    201   {
    202     // Each set should only have one title - hence we only output one title here
    203     // (it is a set title, not a collection)
    204     if (here->first == "Title" && here->second.values.size() > 0) {
    205       title = here->second.values[0];
    206     }
    207 
    208     ++here;
    209   }
    210 
    211   // output the xml for this set; use the classifier id for the name
    212   // if the title is blank
    213   // curSet holds the colon-separated sequence of parent sets of the current set
    214   text_t curSet;
    215   if (this->setNumber >= startSet)
    216   {
    217     output << "  <set>" << endl;
    218     text_t oai_classifier = classifier;
    219     oaiclassifier::toOAI(collection, oai_classifier);
    220     output << "    <setSpec>" << oai_classifier << "</setSpec>" << endl;
    221     output << "    <setName>";
    222     if (!title.empty()) {
    223       curSet = setHierarchy + ":" + title;
    224     }
    225     else {
    226       curSet = classifier; // Pretty much never gets here (shouldn't, at least)
    227     }
    228     output << curSet;
    229     output << "</setName>" << endl;
    230     output << "  </set>" << endl;
    231     this->setsOutput++;
    232   }
    233   this->setNumber++;
    234 
    235   // get the children of this classifier and iterate them
    236   get_children(classifier, collection, "", metadata, false, protocol, response, logout);
    237   for (int c = 0; c < response.numDocs; ++c) {
    238     text_t child = response.docInfo[c].OID;
    239 
    240     if (child == classifier)
    241       continue;
    242 
    243     // check for non classifier items and exclude them
    244     text_t childHead;
    245     text_t::const_iterator start = child.begin();
    246     text_t::const_iterator here  = child.begin();
    247     here += 2;
    248     childHead = substr(start, here);
    249 
    250     if (childHead != "CL")
    251       continue;
    252 
    253     // Recurse for "proper" classifier children. Pass curSet, the colon-separated list of
    254     // parent sets. curSet is pass-by-value, so that as we step out of recursion we remember
    255     // old set hierarchies.
    256     this->recurse_content(output, protocol, collection, child, curSet, startSet);
    257   }
    258  
    259   return;
    260 }
  • gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.h

    r20574 r20590  
    66 public:
    77  listsetsaction() : oaiaction("ListSets") { };
     8
    89  virtual bool validateAction(recptproto *protocol, oaiargs &params);
    910
    1011 protected:
    11   int  setNumber;
    12   int  setsOutput;
    13   ResumptionToken *replyToken;
     12  int setsOutput;
     13
    1414  bool output_content(ostream &output, recptproto *protocol, oaiargs &params);
    15   void recurse_content(ostream &output, recptproto *protocol, text_t &collection, const text_t &classifier,
    16                text_t setHierarchy, int startSet);
     15
     16  bool output_content_for_col(ostream &output, recptproto *protocol, oaiargs &params, text_t collection);
    1717};
  • gsdl/trunk/runtime-src/src/oaiservr/resumptiontoken.cpp

    r15380 r20590  
    11#include "resumptiontoken.h"
    2 #include "oaitools.h"
    32
    4 /**
    5  *  Generate an initial resumption token from some basic details.
    6  *
    7  *  TODO: add optional argument to set the server name.
    8  */
    9 ResumptionToken::ResumptionToken(const text_t &collection, const text_t &node,
    10                  const text_t &buildDate)
    11 { this->collection = collection;
    12   this->browseNode = node;
    13   this->buildDate  = buildDate;
    14   this->startItem  = 0;
     3
     4ResumptionToken::ResumptionToken(const text_t &build_date, const text_t &set, const text_t &metadata_prefix,
     5                 const text_t &from, const text_t &until, const text_t &position)
     6{
     7  this->build_date = build_date;
     8  this->set = set;
     9  this->metadata_prefix = metadata_prefix;
     10  this->from = from;
     11  this->until = until;
     12  this->position = position;
     13  this->valid = true;
    1514}
    1615
    17 /**
    18  *  Generate a resumption token from a URN-style format.
    19  *
    20  *  See getToken() for details of the format.
    21  *
    22  *  TODO: support inclusion of an optional server name.
    23  */
    24 ResumptionToken::ResumptionToken(const text_t &URN)
    25 { text_t::const_iterator first = URN.begin();
    26   text_t::const_iterator last = URN.end();
    27   text_t::const_iterator second;
    2816
    29   this->collection = "";
    30   this->browseNode = "";
    31   this->startItem  = -1;
    32  
    33   text_t::const_iterator here = findchar(first, last, ':');
    34   if (here == first) {
    35     return;
     17ResumptionToken::ResumptionToken(const text_t &resumption_token_string)
     18{
     19  this->build_date = "";
     20  this->set = "";
     21  this->metadata_prefix = "";
     22  this->from = "";
     23  this->until = "";
     24  this->position = "";
     25
     26  // This uses custom code into of the text_t splitchar() function because that is buggy
     27  text_tarray resumption_token_string_parts;
     28  text_t resumption_token_string_part;
     29  text_t::const_iterator resumption_token_string_iterator = resumption_token_string.begin();
     30  while (resumption_token_string_iterator != resumption_token_string.end())
     31  {
     32    if (*resumption_token_string_iterator == ',')
     33    {
     34      resumption_token_string_parts.push_back(resumption_token_string_part);
     35      resumption_token_string_part.clear();
     36    }
     37    else
     38    {
     39      resumption_token_string_part.push_back(*resumption_token_string_iterator);
     40    }
     41
     42    resumption_token_string_iterator++;
    3643  }
    37  
    38   text_t oainamespace = substr(first, here);
    39   if (oainamespace != "gsdloai") {
     44  resumption_token_string_parts.push_back(resumption_token_string_part);
     45
     46  if (resumption_token_string_parts.size() != 6)
     47  {
     48    // The resumption token is invalid -- there should be exactly 6 parts
     49    this->valid = false;
    4050    return;
    4151  }
    4252
    43   // increment past the first colon to get the location
    44   first = ++here; 
    45 
    46   // get the collection, browseNode
    47   here = findchar(first, last, ',');
    48   if (here == last) {
    49     return;
    50   }
    51 
    52   second = findchar(first, here,'.');
    53   this->collection = substr(first, second);
    54 
    55   //  cerr << "Collection " << this->collection << endl;
    56 
    57   if (second != here) {
    58     // get past the '.'
    59     ++second;
    60     this->browseNode = substr(second, here);
    61   }
    62   else {
    63     first = here;
    64   }
    65   // get past the ','
    66   first = ++here;
    67 
    68   // find the second ',' to delimit the position stack
    69   second = findchar(first, last, ',');
    70 
    71   // if not found, then get build and start item
    72   if (second != first) {
    73     // extract list and step past it
    74     text_t offsetList = substr(first, second);
    75     first = ++second;
    76    
    77     do {
    78       second = findchar(offsetList.begin(), offsetList.end(), '.');
    79       if (second == offsetList.end())
    80     break;
    81      
    82       // extract and push the next position
    83       text_t thisPos = substr(offsetList.begin(), second);
    84       this->browsePosition.push_back(thisPos.getint());
    85 
    86       // pop the position from the list
    87       offsetList = substr(++second, offsetList.end());
    88     } while (true);   
    89     this->browsePosition.push_back(offsetList.getint());
    90   }
    91   else {
    92     first ++;
    93   }
    94 
    95   // now find the build date marker
    96   here = findchar(first, last, '-');
    97   if (here == first) {
    98     this->startItem = substr(first, last).getint();
    99   }
    100   else {
    101     this->startItem = substr(first, here).getint();
    102     this->buildDate = substr(++here, last);
    103   }
     53  this->build_date = resumption_token_string_parts[0];
     54  this->set = resumption_token_string_parts[1];
     55  this->metadata_prefix = resumption_token_string_parts[2];
     56  this->from = resumption_token_string_parts[3];
     57  this->until = resumption_token_string_parts[4];
     58  this->position = resumption_token_string_parts[5];
     59  this->valid = true;
    10460}
    10561
    106 /**
    107  *  Get a resumption token in text_t format.
    108  *
    109  *  Resumption tokens are in the format:
    110  *
    111  *    gsdloai:<serverName>:collectionname.browseNode,startItem-BuildDate
    112  *
    113  *  The resumption token format does not currently implement the use of
    114  *  the optional <serverName> item; it is taken to default to the name of
    115  *  the receiving server.
    116  *
    117  *  TODO: add server identity as an optional argument; also change
    118  *        ResumptionToken(text_t &) accordingly.
    119  */
    120 text_t ResumptionToken::getToken()
    121 { text_t reply = "gsdloai:";
    122   reply = reply + this->collection;
    123   if (this->browseNode != "") {
    124     reply = reply + "." + this->browseNode;
    125   }
    126   reply = reply + ",";
    127   for (int i = 0; i < this->browsePosition.size(); i++) {
    128     if (i != 0) {
    129       reply.append(".");
    130     }
    131     reply.appendint(i);
    132   }
    133   reply = reply + ",";
    134   reply.append(this->startItem);
    135   reply = reply + "-" + buildDate;
    13662
    137   return reply;
     63text_t ResumptionToken::getResumptionTokenString()
     64{
     65  return this->build_date + "," + this->set + "," + this->metadata_prefix + "," + this->from + "," + this->until + "," + this->position;
    13866}
    13967
    140 /**
    141  *  Update the position of an existing resumption token
    142  */
    143 void ResumptionToken::setPosition(const text_t &node, int startItem)
    144 { this->browseNode = node;
    145   this->startItem  = startItem;
     68
     69bool ResumptionToken::isValid()
     70{
     71  return this->valid;
    14672}
    147 
    148 /**
    149  *  Check if the resumption token is valid - only a very primitive
    150  *  check is done here; one ought to check for an existing collection
    151  *  and valid browse Node, build date and startItem
    152  *
    153  *  TODO: implement improved validation checking.
    154  */
    155 bool ResumptionToken::isValid()
    156 { return this->collection != "";
    157 }
  • gsdl/trunk/runtime-src/src/oaiservr/resumptiontoken.h

    r11769 r20590  
    22#define _RESUMPTIONTOKEN_H_
    33
    4 #include <vector>
    5 
    64#include "text_t.h"
    75
    8 // use the standard namespace
    9 #if !defined (GSDL_NAMESPACE_BROKEN)
    10 #if defined(GSDL_USE_OBJECTSPACE)
    11 using namespace ospace::std;
    12 #else
    13 using namespace std;
    14 #endif
    15 #endif
    166
    177class ResumptionToken
    18 { private:
    19   text_t  collection;
    20   text_t  browseNode;
    21   vector<int> browsePosition;
    22   int     startItem;
    23   text_t  buildDate;
    24   // TODO: add a server name to the variables list; see getToken in resumptionToken.cpp
     8{
     9 private:
     10  text_t build_date;
     11  text_t set;
     12  text_t metadata_prefix;
     13  text_t from;
     14  text_t until;
     15  text_t position;
     16
     17  bool valid;
    2518
    2619 public:
    27   ResumptionToken(const text_t &collection, const text_t &rootNode, const text_t &buildDate);
    28   ResumptionToken(const text_t &URN);
    29   text_t  getToken();
     20  ResumptionToken(const text_t &build_date, const text_t &set, const text_t &metadata_prefix,
     21          const text_t &from, const text_t &until, const text_t &position);
     22  ResumptionToken(const text_t &resumption_token_string);
    3023
    31   void setPosition(const text_t &node, int startItem);
     24  text_t getBuildDate() { return build_date; }
     25  text_t getSet() { return set; }
     26  text_t getMetadataPrefix() { return metadata_prefix; }
     27  text_t getFrom() { return from; }
     28  text_t getUntil() { return until; }
     29  text_t getPosition() { return position; }
    3230
    33   text_t  getCollection() { return collection; }
    34   text_t  getNode() { return browseNode; }
    35   int     getPosition() { return startItem; }
    36   int     getOffsetDepth() { return this->browsePosition.size(); }
    37   void    setOffset(int depth, int position) { this->browsePosition[depth] = position; }
    38   int getOffset(int offset) { return this->browsePosition[offset]; }
     31  text_t getResumptionTokenString();
    3932  bool isValid();
    4033};
    4134
     35
    4236#endif
Note: See TracChangeset for help on using the changeset viewer.