source: trunk/gsdl/src/oaiservr/abstractlistaction.cpp@ 11732

Last change on this file since 11732 was 11732, checked in by grbuchan, 18 years ago

Improved functionality in resumptiontoken; additional support of resumption
behaviour in configuration; better honouring of OAI settings on collections
(i.e. collections not explicitly listed could be revealed through OAI by
accident).

  • Property svn:keywords set to Author Date Id Revision
File size: 14.0 KB
Line 
1#include "abstractlistaction.h"
2#include "OIDtools.h"
3
4#include "oaitools.h"
5
6bool abstractlistaction::validateAction(recptproto *protocol, oaiargs &params, int &numArgs)
7{
8 text_t from = params["from"];
9 text_t until = params["until"];
10
11 // from date must be less than, or equal to, until date
12 if ((from != "") && (until != "") && !(from <= until)){
13 this->errorType = "badArgument";
14 return false;
15 }
16
17 if (from != ""){
18 // Must be in the form YYYY-MM-DD
19 if(from.size() != 10){
20 this->errorType = "badArgument";
21 return false;
22 }
23 else{
24 if(from[4] != '-' || from[7] != '-'){
25 this->errorType = "badArgument";
26 return false;
27 }
28 }
29 ++numArgs; // Increase valid args count
30 }
31
32 if (until != ""){
33 // Must be in the form YYYY-MM-DD
34 if(until.size() != 10){
35 this->errorType = "badArgument";
36 return false;
37 }
38 else{
39 if(until[4] != '-' || until[7] != '-'){
40 this->errorType = "badArgument";
41 return false;
42 }
43 }
44 ++numArgs; // Increase valid args count
45 }
46
47 if (params["set"] != "") {
48 text_t gsdlSet = params["set"];
49 text_t gsdlCollect = "";
50
51 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
52 // set specified after the name of the collection however, then gsdlSet is empty.
53 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
54
55 comerror_t err;
56 ColInfoResponse_t cinfo;
57
58 // check that the collection is accessible
59 protocol->get_collectinfo(gsdlCollect, cinfo, err, cerr);
60 if (err != noError) {
61 this->errorType = "badArgument";
62 return false;
63 }
64
65 // check the child set if it is given
66 if (gsdlSet != "") {
67 if (!this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
68 this->errorType = "badArgument";
69 return false;
70 }
71 }
72 ++numArgs;
73 }
74
75 if (params["resumptionToken"] != "") {
76 ResumptionToken token(params["resumptionToken"]);
77
78 if (!token.isValid()) {
79 this->errorType = "badResumptionToken";
80 return false;
81 }
82 ++numArgs;
83 }
84
85 this->errorType = "";
86 return true;
87}
88
89//--------------------------------------------------------------------------------------------------
90
91bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
92{
93 text_t from = params["from"];
94 text_t until = params["until"];
95 text_t metaFormat = params["metadataPrefix"];
96 bool prevDocSeen;
97 ResumptionToken *token = NULL;
98
99 // start the call; clear down the total number of output documents
100 this->outputDocs = 0;
101
102 // We don't actually handle resumptionTokens yet; if we get one, ignore it
103 if (params["resumptionToken"] != "") {
104 token = new ResumptionToken(params["resumptionToken"]);
105 }
106
107 this->replyToken = NULL;
108
109 // if we've been asked for a set, then use it!
110 if (params["set"] != "") {
111 // get the children of this set
112 text_t gsdlSet = params["set"];
113 text_t gsdlCollect = "";
114
115 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
116 // set specified after the name of the collection however, then gsdlSet is empty.
117 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
118
119 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
120 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
121 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
122 if(gsdlSet == ""){
123 ColInfoResponse_t cinfo;
124 comerror_t err;
125 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
126 }
127 else {
128 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
129 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
130 }
131 }
132 }
133 // output all records in all hierarchies
134 else {
135 this->output_content_for_all(output, protocol, params);
136 }
137
138 // If - regardless of set required - no documents have been seen, throw an error.
139 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
140 errorType = "noRecordsMatch";
141 this->output_error(output, errorType);
142
143 return false;
144 }
145
146 // do a resumption token if required; errors cancel a token...
147 if (this->replyToken != NULL && this->errorType == "") {
148 output << " <resumptionToken>" << endl;
149 output << " " << this->replyToken->getToken() << endl;
150 output << " </resumptionToken>" << endl;
151 }
152
153 return true;
154}
155
156//--------------------------------------------------------------------------------------------------
157
158void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
159 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
160{ int startDoc = 0;
161
162 // get the collection information
163 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
164
165 // check resumption token
166 if (params["resumptionToken"] != "") {
167 ResumptionToken token(params["resumptionToken"]);
168 if (token.getCollection() == gsdlCollect) {
169 startDoc = token.getPosition() - 1; // first document is said to be 1..
170 }
171 }
172
173 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
174 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
175 // been set to true by a previous collection that this won't overwrite it to be false).
176 if (cinfo.numDocs > 0) {
177 int errorCount = 0; // Count the number of errors found in the given collection
178 text_t from = params["from"];
179 text_t until = params["until"];
180
181 for (long i = startDoc; i < cinfo.numDocs; ++i) {
182 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
183 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
184 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
185 return;
186 }
187
188 text_t oai_id = "oai.";
189 oai_id += i;
190
191 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
192
193 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
194 ++errorCount; // increase error count
195 continue;
196 }
197
198
199 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
200 // the required date range (if specified).
201 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
202 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
203 // this should be an IF statement, where prevDocSeen is only set to true if the above
204 // function call returns true (indicating that the doc supported the metadata prefix) but
205 // for some reason this is always false. This means that if no doc in the requested set supports
206 // the metadata format, the "no records match" error that should be thrown won't be...
207 //
208 // GRB: the above comment is no longer true; proper checks are made
209 this->prevDocSeen = true;
210 ++this->outputDocs;
211 }
212 }
213
214 // if we've output the number of resumption documents; prepare a resumptionToken
215 if (this->outputDocs == this->configuration->resumeAfter()) {
216 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
217 this->replyToken->setPosition("", i+2);
218 break;
219 }
220 }
221
222 cinfo.clear(); // Clear for next collection to use (if there is one).
223 }
224}
225
226//--------------------------------------------------------------------------------------------
227// Returns true if at least one document record is found
228void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
229{
230 ColInfoResponse_t cinfo;
231 comerror_t err;
232 text_tarray collections;
233 text_t gsdlCollect = "";
234 ResumptionToken *token = NULL;
235
236 // get a list of the collections available
237 collections = this->configuration->getCollectionsList();
238 // protocol->get_collection_list(collections, err, output);
239
240 if (params["resumptionToken"] != "") {
241 token = new ResumptionToken(params["resumptionToken"]);
242 }
243
244 for(int current_col = 0; current_col < collections.size(); ++current_col){
245 gsdlCollect = collections[current_col];
246
247 // ignore all leading collections before the one that matches the resumptiontoken
248 if (token != NULL &&
249 token->getCollection() != gsdlCollect)
250 { continue;
251 }
252
253 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
254
255 // once we've output at least one collection, continue
256 // outputting all others until the resumption total hits
257 token = NULL;
258
259 if (this->outputDocs == this->configuration->resumeAfter()) {
260 break;
261 }
262 }
263}
264
265//-------------------------------------------------------------------------------------------------
266// Check that the requested from/until dates don't include a time, as this would be asking for too
267// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
268/*
269bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
270{
271 if (from != "" && from.){
272
273 }
274
275}
276*/
277//-------------------------------------------------------------------------------------------------
278
279bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
280 const text_t &classifier)
281{ text_t topClass;
282 FilterResponse_t response;
283 text_tset metadata;
284 ofstream logout("oai.log", ios::app);
285
286 // exclude false children of a top-level classifier immediately...
287 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
288 return false;
289 }
290
291 // now check the top-level parent
292 metadata.insert("supportsmemberof");
293
294 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
295 if (dot != classifier.end()) {
296 topClass = substr(classifier.begin(), dot);
297 }
298 else {
299 topClass = classifier;
300 }
301
302 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
303 return false;
304 }
305
306 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
307 return false;
308 }
309
310 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
311 return false;
312 }
313
314 return true;
315}
316
317void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
318 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
319{
320 // metadata for this call
321 FilterResponse_t response;
322 text_tset metadata;
323 ofstream logout("oai.log", ios::app);
324 text_t from = params["from"];
325 text_t until = params["until"];
326 text_t metadataPrefix = params["metadataPrefix"];
327 // ResumptionToken resumptionToken(params["resumptionToken"]);
328 int startPos = 0;
329
330 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
331 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
332 // is therefore not enough to check that the response object in the current iteration has no docs - we
333 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
334 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
335 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
336 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
337 // throw the noRecordsMatch error.
338
339 // bool prevDocSeen = false;
340
341 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
342
343 if (params["resumptionToken"] != "") {
344 // if we're at a resumptionToken
345 if (classifier == resumptionToken->getNode()) {
346 startPos = resumptionToken->getPosition();
347 }
348 else {
349 text_t fullNode = resumptionToken->getNode();
350 text_t::iterator leafIter = fullNode.begin() + classifier.size();
351
352 // if the next character isn't a dot, blow up!
353 if (*leafIter != '.') {
354 // fatal error;
355 exit(1);
356 }
357
358 // get the first '.' after the current classifier point;
359 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
360
361 // now, create a new subpath
362 text_t nextNode = substr(fullNode.begin(), separator);
363
364 // seek forward; TODO: improve performance of this
365 for (int c = 0; c < response.numDocs; ++c) {
366 if (response.docInfo[c].OID == nextNode) {
367 startPos = c;
368 break;
369 }
370 }
371 }
372 }
373
374 for (int c = startPos; c < response.numDocs; ++c) {
375 text_t child = response.docInfo[c].OID;
376
377 // distinguish classifiers and documents by checking whether OID
378 // starts with CL or not
379 text_t childHead;
380 text_t::const_iterator start = child.begin();
381 text_t::const_iterator here = child.begin();
382 here += 2;
383 childHead = substr(start, here);
384
385 // documents we output now
386 if (childHead != "CL") {
387 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
388 // within the required date range (if specified)
389 if (this->inDateRange(from, until, collection, child, protocol, output)) {
390 // TODO: check that the document can be disseminated in the required metadataPrefix
391
392 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
393 this->prevDocSeen = true;
394 ++this->outputDocs;
395 }
396 }
397 }
398 // children which are classifiers are recursed
399 else {
400 if (resumptionToken != NULL) {
401 int depth = countchar(classifier.begin(), classifier.end(), '.');
402 resumptionToken->setOffset(depth, c+2);
403 }
404 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
405 }
406
407 if (this->outputDocs == this->configuration->resumeAfter()) {
408 // this->replyToken = new ResumptionToken(collection, params["set"], "");
409 this->replyToken = resumptionToken;
410 this->replyToken->setPosition(classifier, c+2);
411 }
412 }
413}
414
415
416
417
Note: See TracBrowser for help on using the repository browser.