root/gsdl/trunk/runtime-src/src/oaiservr/listsetsaction.cpp @ 16708

Revision 16708, 7.1 KB (checked in by mdewsnip, 12 years ago)

Changed the resumptionToken tags to not have any whitespace around the resumption tokens, because this confuses harvesters/validators

  • Property svn:keywords set to Author Date Id Revision
Line 
1#include "listsetsaction.h"
2
3#if defined(GSDL_USE_STL_H)
4#include <fstream.h>
5#else
6#include <fstream>
7#endif
8
9#include "recptprototools.h"
10#include "oaitools.h"
11
12bool listsetsaction::validateAction(recptproto *protocol, oaiargs &params)
13{
14  int params_size = params.getSize();
15
16  // Remove any parameters that aren't valid for this action
17  text_tmap::const_iterator param_iterator = params.begin();
18  while (param_iterator != params.end())
19  {
20    if (param_iterator->first != "verb" && param_iterator->first != "resumptionToken")
21    {
22      params.erase(param_iterator->first);
23    }
24
25    param_iterator++;
26  }
27
28  if (params_size != params.getSize()) {
29    this->errorType = "badArgument";
30    return false;
31  }
32
33  if (params["resumptionToken"] != "") {
34    ResumptionToken token(params["resumptionToken"]);
35    // TO DO: Resumption token validation checking (the token.isValid() function is useless for ListSets)
36    // if (!token.isValid()) {
37    //   this->errorType = "badResumptionToken";
38    //   return false;
39    // }
40  }
41
42  return true;
43}
44
45bool listsetsaction::output_content(ostream &output, recptproto *protocol, text_tset &collections, oaiargs &params)
46{
47  text_tset::iterator here = collections.begin();
48  text_tset::iterator end  = collections.end();
49  while (here != end) {
50    text_t collect = *here;
51    this->output_content(output, protocol, params);
52    ++here;
53  }
54  return true;
55}
56
57bool listsetsaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
58{
59  // output the total list of classifier points
60
61  // variables required
62  text_t browseOID = "browse";
63  FilterResponse_t response;
64  comerror_t       err;
65  text_tarray &    collections = this->configuration->getCollectionsList();
66  text_tset        metadata;
67  ofstream         logout("oai.log", ios::app);
68 
69  // get a list of the collections available
70  //  protocol->get_collection_list(collections, err, output);
71  if (collections.size() == 0) {
72    logout << "Found *no* OAI collections - check main.cfg for oaicollection items and read the OAI documentation.\n";
73  }
74
75  // check resumption token
76  int startSet = 0;
77  if (params["resumptionToken"] != "") {
78    ResumptionToken token(params["resumptionToken"]);
79    startSet = token.getPosition() - 1; // first document is said to be 1..
80  }
81  this->replyToken = NULL;
82
83  this->setNumber = 0;
84  this->setsOutput = 0;
85  for(int current_col = 0; current_col < collections.size(); ++current_col) {
86    // output the collection as a set, first, then its children
87    text_t gsdlCollect = collections[current_col];
88
89    if (this->setsOutput == this->configuration->resumeAfter())
90    {
91      this->replyToken = new ResumptionToken("", "", "");
92      this->replyToken->setPosition("", this->setNumber+1);
93      break;
94    }
95
96    if (this->setNumber >= startSet)
97    {
98      output << "  <set>" << endl;
99      output << "    <setSpec>" << gsdlCollect << "</setSpec>" << endl;;
100      output << "    <setName>" << gsdlCollect << "</setName>" << endl;
101      output << "  </set>" << endl;
102      this->setsOutput++;
103    }
104    setNumber++;
105
106    // get all the children of the (relevant) classifier data structures
107    get_children(browseOID, gsdlCollect, "", metadata, false, protocol, response, logout);
108    // and send them to the "recurse_content" list
109    for (int c = 0; c < response.numDocs; ++c) {
110      this->recurse_content(output, protocol, gsdlCollect, response.docInfo[c].OID, gsdlCollect, startSet);
111    }
112  }
113
114  // do a resumption token if required; errors cancel a token...
115  if (this->replyToken != NULL && this->errorType == "") {
116    // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
117    output << "  <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
118  }
119
120  return true;
121}
122
123void listsetsaction::recurse_content(ostream &output, recptproto *protocol, text_t &collection,
124                     const text_t &classifier, text_t setHierarchy, int startSet)
125{
126  // metadata for this call
127  FilterResponse_t response;
128  text_tset        metadata;
129  ofstream         logout("oai.log", ios::app);
130
131  if (this->setsOutput == this->configuration->resumeAfter())
132  {
133    this->replyToken = new ResumptionToken("", "", "");
134    this->replyToken->setPosition("", this->setNumber+1);
135    return;
136  }
137
138  metadata.insert("contains");
139  metadata.insert("Title");
140  metadata.insert("supportsmemberof");
141
142  // get the document information
143  if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
144    //cerr << "recurse content: Bad identifier or protocol " << classifier << endl;
145    return;
146  }
147
148  // check for top-level classifiers, check if the set name includes a '.'; if
149  // not, it is a top-level classifier: check for memberof support.  Those without
150  // memberof support will not be supported on OAI
151  if (findchar(classifier.begin(), classifier.end(), '.') == classifier.end()) {
152    if (response.docInfo[0].metadata["supportsmemberof"].values.size() > 0) {
153      text_t memberOf = response.docInfo[0].metadata["supportsmemberof"].values[0];
154      if (memberOf != "true") {
155    return;
156      }
157    }
158    else {
159      return;
160    }
161  }
162
163  MetadataInfo_tmap::iterator here = response.docInfo[0].metadata.begin();
164  MetadataInfo_tmap::iterator end  = response.docInfo[0].metadata.end();
165  text_t title;
166
167  while (here != end)
168  {
169    // Each set should only have one title - hence we only output one title here
170    // (it is a set title, not a collection)
171    if (here->first == "Title" && here->second.values.size() > 0) {
172      title = here->second.values[0];
173    }
174
175    ++here;
176  }
177
178  // output the xml for this set; use the classifier id for the name
179  // if the title is blank
180  // curSet holds the colon-separated sequence of parent sets of the current set
181  text_t curSet;
182  if (this->setNumber >= startSet)
183  {
184    output << "  <set>" << endl;
185    text_t oai_classifier = classifier;
186    oaiclassifier::toOAI(collection, oai_classifier);
187    output << "    <setSpec>" << oai_classifier << "</setSpec>" << endl;
188    output << "    <setName>";
189    if (!title.empty()) {
190      curSet = setHierarchy + ":" + title;
191    }
192    else {
193      curSet = classifier; // Pretty much never gets here (shouldn't, at least)
194    }
195    output << curSet;
196    output << "</setName>" << endl;
197    output << "  </set>" << endl;
198    this->setsOutput++;
199  }
200  this->setNumber++;
201
202  // get the children of this classifier and iterate them
203  get_children(classifier, collection, "", metadata, false, protocol, response, logout);
204  for (int c = 0; c < response.numDocs; ++c) {
205    text_t child = response.docInfo[c].OID;
206
207    if (child == classifier)
208      continue;
209
210    // check for non classifier items and exclude them
211    text_t childHead;
212    text_t::const_iterator start = child.begin();
213    text_t::const_iterator here  = child.begin();
214    here += 2;
215    childHead = substr(start, here);
216
217    if (childHead != "CL")
218      continue;
219
220    // Recurse for "proper" classifier children. Pass curSet, the colon-separated list of
221    // parent sets. curSet is pass-by-value, so that as we step out of recursion we remember
222    // old set hierarchies.
223    this->recurse_content(output, protocol, collection, child, curSet, startSet);
224  }
225 
226  return;
227}
Note: See TracBrowser for help on using the browser.