source: gsdl/trunk/src/oaiservr/abstractlistaction.cpp@ 15198

Last change on this file since 15198 was 15198, checked in by mdewsnip, 16 years ago

Now each action checks for invalid arguments in the params structure and deletes any that aren't valid, so they don't get into the "<request>" tag in the resulting XML and cause OAI validation errors. By DL Consulting Ltd.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.9 KB
Line 
1#include "abstractlistaction.h"
2#include "OIDtools.h"
3
4#include "oaitools.h"
5
6bool abstractlistaction::validateAction(recptproto *protocol, oaiargs &params, int &numArgs)
7{
8 // Remove any parameters that aren't valid for this action
9 text_tmap::const_iterator param_iterator = params.begin();
10 while (param_iterator != params.end())
11 {
12 if (param_iterator->first != "verb" &&
13 param_iterator->first != "from" &&
14 param_iterator->first != "until" &&
15 param_iterator->first != "set" &&
16 param_iterator->first != "resumptionToken" &&
17 param_iterator->first != "metadataPrefix")
18 {
19 params.erase(param_iterator->first);
20 }
21
22 param_iterator++;
23 }
24
25 text_t from = params["from"];
26 text_t until = params["until"];
27
28 // from date must be less than, or equal to, until date
29 if ((from != "") && (until != "") && !(from <= until)){
30 this->errorType = "badArgument";
31 return false;
32 }
33
34 if (from != ""){
35 // Must be in the form YYYY-MM-DD
36 if(from.size() != 10){
37 this->errorType = "badArgument";
38 params.erase("from");
39 }
40 else{
41 if(from[4] != '-' || from[7] != '-'){
42 this->errorType = "badArgument";
43 params.erase("from");
44 }
45 }
46 ++numArgs; // Increase valid args count
47 }
48
49 if (until != ""){
50 // Must be in the form YYYY-MM-DD
51 if(until.size() != 10){
52 this->errorType = "badArgument";
53 params.erase("until");
54 }
55 else{
56 if(until[4] != '-' || until[7] != '-'){
57 this->errorType = "badArgument";
58 params.erase("until");
59 }
60 }
61 ++numArgs; // Increase valid args count
62 }
63
64 if (this->errorType == "badArgument")
65 {
66 return false;
67 }
68
69 if (params["set"] != "") {
70 text_t gsdlSet = params["set"];
71 text_t gsdlCollect = "";
72
73 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
74 // set specified after the name of the collection however, then gsdlSet is empty.
75 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
76
77 comerror_t err;
78 ColInfoResponse_t cinfo;
79
80 // check that the collection is accessible
81 protocol->get_collectinfo(gsdlCollect, cinfo, err, cerr);
82 if (err != noError) {
83 this->errorType = "badArgument";
84 return false;
85 }
86
87 // exclude collections that are not listed in the configured OAI list
88 text_tarray &collections = this->configuration->getCollectionsList();
89 int c;
90 for (c = 0; c < collections.size(); c ++) {
91 if (collections[c] == gsdlCollect)
92 break;
93 }
94 if (c == collections.size()) {
95 this->errorType = "badArgument";
96 return false;
97 }
98
99 if (gsdlSet != "") {
100 // check the child set if it is given
101 if (!this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
102 this->errorType = "badArgument";
103 return false;
104 }
105 }
106 ++numArgs;
107 }
108
109 if (params["resumptionToken"] != "") {
110 ResumptionToken token(params["resumptionToken"]);
111
112 if (!token.isValid()) {
113 this->errorType = "badResumptionToken";
114 return false;
115 }
116 ++numArgs;
117 }
118
119 this->errorType = "";
120 return true;
121}
122
123//--------------------------------------------------------------------------------------------------
124
125bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
126{
127 text_t from = params["from"];
128 text_t until = params["until"];
129 text_t metaFormat = params["metadataPrefix"];
130 bool prevDocSeen;
131 ResumptionToken *token = NULL;
132
133 // start the call; clear down the total number of output documents
134 this->outputDocs = 0;
135
136 // We don't actually handle resumptionTokens yet; if we get one, ignore it
137 if (params["resumptionToken"] != "") {
138 token = new ResumptionToken(params["resumptionToken"]);
139 }
140
141 this->replyToken = NULL;
142
143 // if we've been asked for a set, then use it!
144 if (params["set"] != "") {
145 // get the children of this set
146 text_t gsdlSet = params["set"];
147 text_t gsdlCollect = "";
148
149 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
150 // set specified after the name of the collection however, then gsdlSet is empty.
151 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
152
153 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
154 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
155 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
156 if(gsdlSet == ""){
157 ColInfoResponse_t cinfo;
158 comerror_t err;
159 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
160 }
161 else {
162 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
163 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
164 }
165 }
166 }
167 // output all records in all hierarchies
168 else {
169 this->output_content_for_all(output, protocol, params);
170 }
171
172 // If - regardless of set required - no documents have been seen, throw an error.
173 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
174 errorType = "noRecordsMatch";
175 this->output_error(output, errorType);
176
177 return false;
178 }
179
180 // do a resumption token if required; errors cancel a token...
181 if (this->replyToken != NULL && this->errorType == "") {
182 output << " <resumptionToken>" << endl;
183 output << " " << this->replyToken->getToken() << endl;
184 output << " </resumptionToken>" << endl;
185 }
186
187 return true;
188}
189
190//--------------------------------------------------------------------------------------------------
191
192void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
193 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
194{ int startDoc = 0;
195
196 // get the collection information
197 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
198
199 // check resumption token
200 if (params["resumptionToken"] != "") {
201 ResumptionToken token(params["resumptionToken"]);
202 if (token.getCollection() == gsdlCollect) {
203 startDoc = token.getPosition() - 1; // first document is said to be 1..
204 }
205 }
206
207 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
208 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
209 // been set to true by a previous collection that this won't overwrite it to be false).
210 if (cinfo.numDocs > 0) {
211 int errorCount = 0; // Count the number of errors found in the given collection
212 text_t from = params["from"];
213 text_t until = params["until"];
214
215 for (long i = startDoc; i < cinfo.numDocs; ++i) {
216 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
217 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
218 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
219 return;
220 }
221
222 text_t oai_id = "oai.";
223 oai_id += i;
224
225 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
226
227 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
228 ++errorCount; // increase error count
229 continue;
230 }
231
232
233 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
234 // the required date range (if specified).
235 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
236 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
237 // this should be an IF statement, where prevDocSeen is only set to true if the above
238 // function call returns true (indicating that the doc supported the metadata prefix) but
239 // for some reason this is always false. This means that if no doc in the requested set supports
240 // the metadata format, the "no records match" error that should be thrown won't be...
241 //
242 // GRB: the above comment is no longer true; proper checks are made
243 this->prevDocSeen = true;
244 ++this->outputDocs;
245 }
246 }
247
248 // if we've output the number of resumption documents; prepare a resumptionToken
249 if (this->outputDocs == this->configuration->resumeAfter()) {
250 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
251 this->replyToken->setPosition("", i+2);
252 break;
253 }
254 }
255
256 cinfo.clear(); // Clear for next collection to use (if there is one).
257 }
258}
259
260//--------------------------------------------------------------------------------------------
261// Returns true if at least one document record is found
262void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
263{
264 ColInfoResponse_t cinfo;
265 comerror_t err;
266 text_tarray collections;
267 text_t gsdlCollect = "";
268 ResumptionToken *token = NULL;
269
270 // get a list of the collections available
271 collections = this->configuration->getCollectionsList();
272 // protocol->get_collection_list(collections, err, output);
273
274 if (params["resumptionToken"] != "") {
275 token = new ResumptionToken(params["resumptionToken"]);
276 }
277
278 for(int current_col = 0; current_col < collections.size(); ++current_col){
279 gsdlCollect = collections[current_col];
280
281 // ignore all leading collections before the one that matches the resumptiontoken
282 if (token != NULL &&
283 token->getCollection() != gsdlCollect)
284 { continue;
285 }
286
287 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
288
289 // once we've output at least one collection, continue
290 // outputting all others until the resumption total hits
291 token = NULL;
292
293 if (this->outputDocs == this->configuration->resumeAfter()) {
294 break;
295 }
296 }
297}
298
299//-------------------------------------------------------------------------------------------------
300// Check that the requested from/until dates don't include a time, as this would be asking for too
301// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
302/*
303bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
304{
305 if (from != "" && from.){
306
307 }
308
309}
310*/
311//-------------------------------------------------------------------------------------------------
312
313bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
314 const text_t &classifier)
315{ text_t topClass;
316 FilterResponse_t response;
317 text_tset metadata;
318 ofstream logout("oai.log", ios::app);
319
320 // exclude false children of a top-level classifier immediately...
321 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
322 return false;
323 }
324
325 // now check the top-level parent
326 metadata.insert("supportsmemberof");
327
328 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
329 if (dot != classifier.end()) {
330 topClass = substr(classifier.begin(), dot);
331 }
332 else {
333 topClass = classifier;
334 }
335
336 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
337 return false;
338 }
339
340 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
341 return false;
342 }
343
344 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
345 return false;
346 }
347
348 return true;
349}
350
351void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
352 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
353{
354 // metadata for this call
355 FilterResponse_t response;
356 text_tset metadata;
357 ofstream logout("oai.log", ios::app);
358 text_t from = params["from"];
359 text_t until = params["until"];
360 text_t metadataPrefix = params["metadataPrefix"];
361 // ResumptionToken resumptionToken(params["resumptionToken"]);
362 int startPos = 0;
363
364 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
365 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
366 // is therefore not enough to check that the response object in the current iteration has no docs - we
367 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
368 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
369 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
370 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
371 // throw the noRecordsMatch error.
372
373 // bool prevDocSeen = false;
374
375 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
376
377 if (params["resumptionToken"] != "") {
378 // if we're at a resumptionToken
379 if (classifier == resumptionToken->getNode()) {
380 startPos = resumptionToken->getPosition();
381 }
382 else {
383 text_t fullNode = resumptionToken->getNode();
384 text_t::iterator leafIter = fullNode.begin() + classifier.size();
385
386 // if the next character isn't a dot, blow up!
387 if (*leafIter != '.') {
388 // fatal error;
389 exit(1);
390 }
391
392 // get the first '.' after the current classifier point;
393 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
394
395 // now, create a new subpath
396 text_t nextNode = substr(fullNode.begin(), separator);
397
398 // seek forward; TODO: improve performance of this
399 for (int c = 0; c < response.numDocs; ++c) {
400 if (response.docInfo[c].OID == nextNode) {
401 startPos = c;
402 break;
403 }
404 }
405 }
406 }
407
408 for (int c = startPos; c < response.numDocs; ++c) {
409 text_t child = response.docInfo[c].OID;
410
411 // distinguish classifiers and documents by checking whether OID
412 // starts with CL or not
413 text_t childHead;
414 text_t::const_iterator start = child.begin();
415 text_t::const_iterator here = child.begin();
416 here += 2;
417 childHead = substr(start, here);
418
419 // documents we output now
420 if (childHead != "CL") {
421 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
422 // within the required date range (if specified)
423 if (this->inDateRange(from, until, collection, child, protocol, output)) {
424 // TODO: check that the document can be disseminated in the required metadataPrefix
425
426 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
427 this->prevDocSeen = true;
428 ++this->outputDocs;
429 }
430 }
431 }
432 // children which are classifiers are recursed
433 else {
434 if (resumptionToken != NULL) {
435 int depth = countchar(classifier.begin(), classifier.end(), '.');
436 resumptionToken->setOffset(depth, c+2);
437 }
438 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
439 }
440
441 if (this->outputDocs == this->configuration->resumeAfter()) {
442 // this->replyToken = new ResumptionToken(collection, params["set"], "");
443 this->replyToken = resumptionToken;
444 this->replyToken->setPosition(classifier, c+2);
445 }
446 }
447}
448
449
450
451
Note: See TracBrowser for help on using the repository browser.