source: gsdl/trunk/runtime-src/src/oaiservr/abstractlistaction.cpp@ 16710

Last change on this file since 16710 was 16710, checked in by mdewsnip, 16 years ago

Completely tidied up abstractlistaction::validateAction() to handle the resumptionToken argument properly, and to structure the checking code more consistently.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.8 KB
Line 
1#include "abstractlistaction.h"
2#include "recptprototools.h"
3
4#include "oaitools.h"
5
6bool abstractlistaction::validateAction(recptproto *protocol, oaiargs &params)
7{
8 // ----------------------------------------------------------------------------
9 // 1. Check for invalid arguments
10 // ----------------------------------------------------------------------------
11 bool invalid_argument_supplied = false;
12 text_tmap::const_iterator param_iterator = params.begin();
13 while (param_iterator != params.end())
14 {
15 // Check for arguments that aren't valid for this action
16 if (param_iterator->first != "verb" &&
17 param_iterator->first != "from" &&
18 param_iterator->first != "until" &&
19 param_iterator->first != "set" &&
20 param_iterator->first != "resumptionToken" &&
21 param_iterator->first != "metadataPrefix")
22 {
23 // We've found an invalid argument
24 invalid_argument_supplied = true;
25
26 // Delete the invalid argument from the list so it doesn't end up in the <request> tag that is returned
27 params.erase(param_iterator->first);
28 }
29
30 // The metadataPrefix argument is not allowed in OAI v1.1
31 else if (param_iterator->first == "metadataPrefix" && this->configuration->getOAIVersion() <= 110)
32 {
33 // We've found an invalid argument
34 invalid_argument_supplied = true;
35
36 // Delete the invalid argument from the list so it doesn't end up in the <request> tag that is returned
37 params.erase(param_iterator->first);
38 }
39
40 param_iterator++;
41 }
42
43 // If we found an invalid argument it's an error, so don't go any further
44 if (invalid_argument_supplied)
45 {
46 this->errorType = "badArgument";
47 return false;
48 }
49
50 // ----------------------------------------------------------------------------
51 // 2. Handle any exclusive arguments
52 // ----------------------------------------------------------------------------
53
54 // The resumptionToken argument is exclusive
55 if (params["resumptionToken"] != "")
56 {
57 // This argument is exclusive, so no other arguments are allowed (except "verb" of course)
58 if (params.getSize() != 2)
59 {
60 this->errorType = "badArgument";
61 return false;
62 }
63
64 // Check the resumption token is valid
65 ResumptionToken token(params["resumptionToken"]);
66 if (token.isValid())
67 {
68 // Everything is fine, and we don't continue further because this is an exclusive argument
69 this->errorType = "";
70 return true;
71 }
72 else
73 {
74 // There was an error with the resumption token
75 this->errorType = "badResumptionToken";
76 return false;
77 }
78 }
79
80 // ----------------------------------------------------------------------------
81 // 3. Handle any required arguments
82 // ----------------------------------------------------------------------------
83
84 // OAI v2.0 requires metadataPrefix
85 if (this->configuration->getOAIVersion() > 110)
86 {
87 text_t metadataPrefix = params["metadataPrefix"];
88
89 // Check that the metadataPrefix argument exists
90 if (metadataPrefix == "")
91 {
92 this->errorType = "badArgument";
93 return false;
94 }
95 // Check that the metadataPrefix is a format we support
96 if (this->formatNotSupported(metadataPrefix))
97 {
98 this->errorType = "cannotDisseminateFormat";
99 return false;
100 }
101 }
102
103 // ----------------------------------------------------------------------------
104 // 4. Check any remaining arguments
105 // ----------------------------------------------------------------------------
106
107 // Check "from" and "until" arguments
108 if (params["from"] != "" || params["until"] != "")
109 {
110 text_t from = params["from"];
111 text_t until = params["until"];
112
113 // Check the from date is in the correct format: YYYY-MM-DD
114 if (from != "")
115 {
116 // Must be in the form YYYY-MM-DD
117 if (from.size() != 10 || from[4] != '-' || from[7] != '-')
118 {
119 this->errorType = "badArgument";
120 params.erase("from");
121 }
122 }
123 // Check the until date is in the correct format: YYYY-MM-DD
124 if (until != "")
125 {
126 // Must be in the form YYYY-MM-DD
127 if (until.size() != 10 || until[4] != '-' || until[7] != '-')
128 {
129 this->errorType = "badArgument";
130 params.erase("until");
131 }
132 }
133
134 if (this->errorType == "badArgument")
135 {
136 return false;
137 }
138
139 // If both arguments are supplied the from date must be less than or equal to the until date
140 if (from != "" && until != "" && !(from <= until))
141 {
142 this->errorType = "badArgument";
143 return false;
144 }
145 }
146
147 // Check "set" argument
148 if (params["set"] != "")
149 {
150 // Example set specification: "demo:CL2"
151 text_t set = params["set"];
152
153 // given 'demo:CL2', toGSDL returns 'demo' in collection and 'CL2' in set. If there is no further
154 // set specified after the name of the collection however, then set is empty.
155 text_t collection = "";
156 oaiclassifier::toGSDL(collection, set);
157
158 // Check that the collection is accessible
159 ColInfoResponse_t cinfo;
160 comerror_t err;
161 protocol->get_collectinfo(collection, cinfo, err, cerr);
162 if (err != noError)
163 {
164 this->errorType = "badArgument";
165 return false;
166 }
167
168 // Check the collection is one that is in the list in the oai.cfg file
169 text_tarray &collections = this->configuration->getCollectionsList();
170 bool collection_found = false;
171 for (int c = 0; c < collections.size(); c++)
172 {
173 if (collections[c] == collection)
174 {
175 collection_found = true;
176 break;
177 }
178 }
179
180 // The collection was not found
181 if (!collection_found)
182 {
183 this->errorType = "badArgument";
184 return false;
185 }
186
187 // Check the child set if it was given
188 if (set != "" && !this->check_classifier(protocol, collection, set))
189 {
190 this->errorType = "badArgument";
191 return false;
192 }
193 }
194
195 // If we've reached here everything must be fine
196 this->errorType = "";
197 return true;
198}
199
200//--------------------------------------------------------------------------------------------------
201
202bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
203{
204 text_t from = params["from"];
205 text_t until = params["until"];
206 text_t metaFormat = params["metadataPrefix"];
207 bool prevDocSeen;
208 ResumptionToken *token = NULL;
209
210 // start the call; clear down the total number of output documents
211 this->outputDocs = 0;
212
213 // We don't actually handle resumptionTokens yet; if we get one, ignore it
214 if (params["resumptionToken"] != "") {
215 token = new ResumptionToken(params["resumptionToken"]);
216 }
217
218 this->replyToken = NULL;
219
220 // if we've been asked for a set, then use it!
221 if (params["set"] != "") {
222 // get the children of this set
223 text_t gsdlSet = params["set"];
224 text_t gsdlCollect = "";
225
226 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
227 // set specified after the name of the collection however, then gsdlSet is empty.
228 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
229
230 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
231 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
232 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
233 if(gsdlSet == ""){
234 ColInfoResponse_t cinfo;
235 comerror_t err;
236 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
237 }
238 else {
239 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
240 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
241 }
242 }
243 }
244 // output all records in all hierarchies
245 else {
246 this->output_content_for_all(output, protocol, params);
247 }
248
249 // If - regardless of set required - no documents have been seen, throw an error.
250 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
251 errorType = "noRecordsMatch";
252 this->output_error(output, errorType);
253
254 return false;
255 }
256
257 // do a resumption token if required; errors cancel a token...
258 if (this->replyToken != NULL && this->errorType == "") {
259 // Don't add any whitespace around the resumption token as it can confuse harvesters/validators
260 output << " <resumptionToken>" << this->replyToken->getToken() << "</resumptionToken>" << endl;
261 }
262
263 return true;
264}
265
266//--------------------------------------------------------------------------------------------------
267
268void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
269 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
270{ int startDoc = 0;
271
272 // get the collection information
273 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
274
275 // check resumption token
276 if (params["resumptionToken"] != "") {
277 ResumptionToken token(params["resumptionToken"]);
278 if (token.getCollection() == gsdlCollect) {
279 startDoc = token.getPosition() - 1; // first document is said to be 1..
280 }
281 }
282
283 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
284 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
285 // been set to true by a previous collection that this won't overwrite it to be false).
286 if (cinfo.numDocs > 0) {
287 int errorCount = 0; // Count the number of errors found in the given collection
288 text_t from = params["from"];
289 text_t until = params["until"];
290
291 for (long i = startDoc; i < cinfo.numDocs; ++i) {
292 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
293 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
294 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
295 return;
296 }
297
298 text_t oai_id = "oai.";
299 oai_id += i;
300
301 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
302
303 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
304 ++errorCount; // increase error count
305 continue;
306 }
307
308
309 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
310 // the required date range (if specified).
311 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
312 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
313 // this should be an IF statement, where prevDocSeen is only set to true if the above
314 // function call returns true (indicating that the doc supported the metadata prefix) but
315 // for some reason this is always false. This means that if no doc in the requested set supports
316 // the metadata format, the "no records match" error that should be thrown won't be...
317 //
318 // GRB: the above comment is no longer true; proper checks are made
319 this->prevDocSeen = true;
320 ++this->outputDocs;
321 }
322 }
323
324 // if we've output the number of resumption documents; prepare a resumptionToken
325 if (this->outputDocs == this->configuration->resumeAfter()) {
326 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
327 this->replyToken->setPosition("", i+2);
328 break;
329 }
330 }
331
332 cinfo.clear(); // Clear for next collection to use (if there is one).
333 }
334}
335
336//--------------------------------------------------------------------------------------------
337// Returns true if at least one document record is found
338void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
339{
340 ColInfoResponse_t cinfo;
341 comerror_t err;
342 text_tarray collections;
343 text_t gsdlCollect = "";
344 ResumptionToken *token = NULL;
345
346 // get a list of the collections available
347 collections = this->configuration->getCollectionsList();
348 // protocol->get_collection_list(collections, err, output);
349
350 if (params["resumptionToken"] != "") {
351 token = new ResumptionToken(params["resumptionToken"]);
352 }
353
354 for(int current_col = 0; current_col < collections.size(); ++current_col){
355 gsdlCollect = collections[current_col];
356
357 // ignore all leading collections before the one that matches the resumptiontoken
358 if (token != NULL &&
359 token->getCollection() != gsdlCollect)
360 { continue;
361 }
362
363 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
364
365 // once we've output at least one collection, continue
366 // outputting all others until the resumption total hits
367 token = NULL;
368
369 if (this->outputDocs == this->configuration->resumeAfter()) {
370 break;
371 }
372 }
373}
374
375//-------------------------------------------------------------------------------------------------
376// Check that the requested from/until dates don't include a time, as this would be asking for too
377// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
378/*
379bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
380{
381 if (from != "" && from.){
382
383 }
384
385}
386*/
387//-------------------------------------------------------------------------------------------------
388
389bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
390 const text_t &classifier)
391{ text_t topClass;
392 FilterResponse_t response;
393 text_tset metadata;
394 ofstream logout("oai.log", ios::app);
395
396 // exclude false children of a top-level classifier immediately...
397 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
398 return false;
399 }
400
401 // now check the top-level parent
402 metadata.insert("supportsmemberof");
403
404 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
405 if (dot != classifier.end()) {
406 topClass = substr(classifier.begin(), dot);
407 }
408 else {
409 topClass = classifier;
410 }
411
412 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
413 return false;
414 }
415
416 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
417 return false;
418 }
419
420 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
421 return false;
422 }
423
424 return true;
425}
426
427void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
428 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
429{
430 // metadata for this call
431 FilterResponse_t response;
432 text_tset metadata;
433 ofstream logout("oai.log", ios::app);
434 text_t from = params["from"];
435 text_t until = params["until"];
436 text_t metadataPrefix = params["metadataPrefix"];
437 // ResumptionToken resumptionToken(params["resumptionToken"]);
438 int startPos = 0;
439
440 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
441 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
442 // is therefore not enough to check that the response object in the current iteration has no docs - we
443 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
444 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
445 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
446 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
447 // throw the noRecordsMatch error.
448
449 // bool prevDocSeen = false;
450
451 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
452
453 if (params["resumptionToken"] != "") {
454 // if we're at a resumptionToken
455 if (classifier == resumptionToken->getNode()) {
456 startPos = resumptionToken->getPosition();
457 }
458 else {
459 text_t fullNode = resumptionToken->getNode();
460 text_t::iterator leafIter = fullNode.begin() + classifier.size();
461
462 // if the next character isn't a dot, blow up!
463 if (*leafIter != '.') {
464 // fatal error;
465 exit(1);
466 }
467
468 // get the first '.' after the current classifier point;
469 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
470
471 // now, create a new subpath
472 text_t nextNode = substr(fullNode.begin(), separator);
473
474 // seek forward; TODO: improve performance of this
475 for (int c = 0; c < response.numDocs; ++c) {
476 if (response.docInfo[c].OID == nextNode) {
477 startPos = c;
478 break;
479 }
480 }
481 }
482
483 // We need to subtract one from the startPos value to turn it into an index value
484 startPos--;
485 }
486
487 for (int c = startPos; c < response.numDocs; ++c) {
488 text_t child = response.docInfo[c].OID;
489
490 // distinguish classifiers and documents by checking whether OID
491 // starts with CL or not
492 text_t childHead;
493 text_t::const_iterator start = child.begin();
494 text_t::const_iterator here = child.begin();
495 here += 2;
496 childHead = substr(start, here);
497
498 // documents we output now
499 if (childHead != "CL") {
500 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
501 // within the required date range (if specified)
502 if (this->inDateRange(from, until, collection, child, protocol, output)) {
503 // TODO: check that the document can be disseminated in the required metadataPrefix
504
505 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
506 this->prevDocSeen = true;
507 ++this->outputDocs;
508 }
509 }
510 }
511 // children which are classifiers are recursed
512 else {
513 if (resumptionToken != NULL) {
514 int depth = countchar(classifier.begin(), classifier.end(), '.');
515 resumptionToken->setOffset(depth, c+2);
516 }
517 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
518 }
519
520 if (this->outputDocs == this->configuration->resumeAfter()) {
521 this->replyToken = new ResumptionToken(collection, params["set"], "");
522 this->replyToken->setPosition(classifier, c+2);
523 break;
524 }
525 }
526}
527
528
529
530
Note: See TracBrowser for help on using the repository browser.