source: gsdl/trunk/src/oaiservr/abstractlistaction.cpp@ 16310

Last change on this file since 16310 was 15428, checked in by mdewsnip, 16 years ago

Changed all the "OIDtools.h" to "recptprototools.h".

  • Property svn:keywords set to Author Date Id Revision
File size: 15.0 KB
Line 
1#include "abstractlistaction.h"
2#include "recptprototools.h"
3
4#include "oaitools.h"
5
6bool abstractlistaction::validateAction(recptproto *protocol, oaiargs &params, int &numArgs)
7{
8 // Remove any parameters that aren't valid for this action
9 text_tmap::const_iterator param_iterator = params.begin();
10 while (param_iterator != params.end())
11 {
12 if (param_iterator->first != "verb" &&
13 param_iterator->first != "from" &&
14 param_iterator->first != "until" &&
15 param_iterator->first != "set" &&
16 param_iterator->first != "resumptionToken" &&
17 param_iterator->first != "metadataPrefix")
18 {
19 params.erase(param_iterator->first);
20 }
21
22 param_iterator++;
23 }
24
25 text_t from = params["from"];
26 text_t until = params["until"];
27
28 // from date must be less than, or equal to, until date
29 if ((from != "") && (until != "") && !(from <= until)){
30 this->errorType = "badArgument";
31 return false;
32 }
33
34 if (from != ""){
35 // Must be in the form YYYY-MM-DD
36 if(from.size() != 10){
37 this->errorType = "badArgument";
38 params.erase("from");
39 }
40 else{
41 if(from[4] != '-' || from[7] != '-'){
42 this->errorType = "badArgument";
43 params.erase("from");
44 }
45 }
46 ++numArgs; // Increase valid args count
47 }
48
49 if (until != ""){
50 // Must be in the form YYYY-MM-DD
51 if(until.size() != 10){
52 this->errorType = "badArgument";
53 params.erase("until");
54 }
55 else{
56 if(until[4] != '-' || until[7] != '-'){
57 this->errorType = "badArgument";
58 params.erase("until");
59 }
60 }
61 ++numArgs; // Increase valid args count
62 }
63
64 if (this->errorType == "badArgument")
65 {
66 return false;
67 }
68
69 if (params["set"] != "") {
70 text_t gsdlSet = params["set"];
71 text_t gsdlCollect = "";
72
73 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
74 // set specified after the name of the collection however, then gsdlSet is empty.
75 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
76
77 comerror_t err;
78 ColInfoResponse_t cinfo;
79
80 // check that the collection is accessible
81 protocol->get_collectinfo(gsdlCollect, cinfo, err, cerr);
82 if (err != noError) {
83 this->errorType = "badArgument";
84 return false;
85 }
86
87 // exclude collections that are not listed in the configured OAI list
88 text_tarray &collections = this->configuration->getCollectionsList();
89 int c;
90 for (c = 0; c < collections.size(); c ++) {
91 if (collections[c] == gsdlCollect)
92 break;
93 }
94 if (c == collections.size()) {
95 this->errorType = "badArgument";
96 return false;
97 }
98
99 if (gsdlSet != "") {
100 // check the child set if it is given
101 if (!this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
102 this->errorType = "badArgument";
103 return false;
104 }
105 }
106 ++numArgs;
107 }
108
109 if (params["resumptionToken"] != "") {
110 ResumptionToken token(params["resumptionToken"]);
111
112 if (!token.isValid()) {
113 this->errorType = "badResumptionToken";
114 return false;
115 }
116 ++numArgs;
117 }
118
119 this->errorType = "";
120 return true;
121}
122
123//--------------------------------------------------------------------------------------------------
124
125bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
126{
127 text_t from = params["from"];
128 text_t until = params["until"];
129 text_t metaFormat = params["metadataPrefix"];
130 bool prevDocSeen;
131 ResumptionToken *token = NULL;
132
133 // start the call; clear down the total number of output documents
134 this->outputDocs = 0;
135
136 // We don't actually handle resumptionTokens yet; if we get one, ignore it
137 if (params["resumptionToken"] != "") {
138 token = new ResumptionToken(params["resumptionToken"]);
139 }
140
141 this->replyToken = NULL;
142
143 // if we've been asked for a set, then use it!
144 if (params["set"] != "") {
145 // get the children of this set
146 text_t gsdlSet = params["set"];
147 text_t gsdlCollect = "";
148
149 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
150 // set specified after the name of the collection however, then gsdlSet is empty.
151 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
152
153 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
154 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
155 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
156 if(gsdlSet == ""){
157 ColInfoResponse_t cinfo;
158 comerror_t err;
159 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
160 }
161 else {
162 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
163 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params, token);
164 }
165 }
166 }
167 // output all records in all hierarchies
168 else {
169 this->output_content_for_all(output, protocol, params);
170 }
171
172 // If - regardless of set required - no documents have been seen, throw an error.
173 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
174 errorType = "noRecordsMatch";
175 this->output_error(output, errorType);
176
177 return false;
178 }
179
180 // do a resumption token if required; errors cancel a token...
181 if (this->replyToken != NULL && this->errorType == "") {
182 output << " <resumptionToken>" << endl;
183 output << " " << this->replyToken->getToken() << endl;
184 output << " </resumptionToken>" << endl;
185 }
186
187 return true;
188}
189
190//--------------------------------------------------------------------------------------------------
191
192void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
193 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
194{ int startDoc = 0;
195
196 // get the collection information
197 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
198
199 // check resumption token
200 if (params["resumptionToken"] != "") {
201 ResumptionToken token(params["resumptionToken"]);
202 if (token.getCollection() == gsdlCollect) {
203 startDoc = token.getPosition() - 1; // first document is said to be 1..
204 }
205 }
206
207 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
208 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
209 // been set to true by a previous collection that this won't overwrite it to be false).
210 if (cinfo.numDocs > 0) {
211 int errorCount = 0; // Count the number of errors found in the given collection
212 text_t from = params["from"];
213 text_t until = params["until"];
214
215 for (long i = startDoc; i < cinfo.numDocs; ++i) {
216 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
217 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
218 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
219 return;
220 }
221
222 text_t oai_id = "oai.";
223 oai_id += i;
224
225 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
226
227 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
228 ++errorCount; // increase error count
229 continue;
230 }
231
232
233 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits within
234 // the required date range (if specified).
235 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
236 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
237 // this should be an IF statement, where prevDocSeen is only set to true if the above
238 // function call returns true (indicating that the doc supported the metadata prefix) but
239 // for some reason this is always false. This means that if no doc in the requested set supports
240 // the metadata format, the "no records match" error that should be thrown won't be...
241 //
242 // GRB: the above comment is no longer true; proper checks are made
243 this->prevDocSeen = true;
244 ++this->outputDocs;
245 }
246 }
247
248 // if we've output the number of resumption documents; prepare a resumptionToken
249 if (this->outputDocs == this->configuration->resumeAfter()) {
250 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
251 this->replyToken->setPosition("", i+2);
252 break;
253 }
254 }
255
256 cinfo.clear(); // Clear for next collection to use (if there is one).
257 }
258}
259
260//--------------------------------------------------------------------------------------------
261// Returns true if at least one document record is found
262void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
263{
264 ColInfoResponse_t cinfo;
265 comerror_t err;
266 text_tarray collections;
267 text_t gsdlCollect = "";
268 ResumptionToken *token = NULL;
269
270 // get a list of the collections available
271 collections = this->configuration->getCollectionsList();
272 // protocol->get_collection_list(collections, err, output);
273
274 if (params["resumptionToken"] != "") {
275 token = new ResumptionToken(params["resumptionToken"]);
276 }
277
278 for(int current_col = 0; current_col < collections.size(); ++current_col){
279 gsdlCollect = collections[current_col];
280
281 // ignore all leading collections before the one that matches the resumptiontoken
282 if (token != NULL &&
283 token->getCollection() != gsdlCollect)
284 { continue;
285 }
286
287 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
288
289 // once we've output at least one collection, continue
290 // outputting all others until the resumption total hits
291 token = NULL;
292
293 if (this->outputDocs == this->configuration->resumeAfter()) {
294 break;
295 }
296 }
297}
298
299//-------------------------------------------------------------------------------------------------
300// Check that the requested from/until dates don't include a time, as this would be asking for too
301// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
302/*
303bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
304{
305 if (from != "" && from.){
306
307 }
308
309}
310*/
311//-------------------------------------------------------------------------------------------------
312
313bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
314 const text_t &classifier)
315{ text_t topClass;
316 FilterResponse_t response;
317 text_tset metadata;
318 ofstream logout("oai.log", ios::app);
319
320 // exclude false children of a top-level classifier immediately...
321 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
322 return false;
323 }
324
325 // now check the top-level parent
326 metadata.insert("supportsmemberof");
327
328 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
329 if (dot != classifier.end()) {
330 topClass = substr(classifier.begin(), dot);
331 }
332 else {
333 topClass = classifier;
334 }
335
336 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
337 return false;
338 }
339
340 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
341 return false;
342 }
343
344 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
345 return false;
346 }
347
348 return true;
349}
350
351void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
352 const text_t &classifier, oaiargs &params, ResumptionToken *resumptionToken)
353{
354 // metadata for this call
355 FilterResponse_t response;
356 text_tset metadata;
357 ofstream logout("oai.log", ios::app);
358 text_t from = params["from"];
359 text_t until = params["until"];
360 text_t metadataPrefix = params["metadataPrefix"];
361 // ResumptionToken resumptionToken(params["resumptionToken"]);
362 int startPos = 0;
363
364 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
365 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
366 // is therefore not enough to check that the response object in the current iteration has no docs - we
367 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
368 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
369 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
370 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
371 // throw the noRecordsMatch error.
372
373 // bool prevDocSeen = false;
374
375 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
376
377 if (params["resumptionToken"] != "") {
378 // if we're at a resumptionToken
379 if (classifier == resumptionToken->getNode()) {
380 startPos = resumptionToken->getPosition();
381 }
382 else {
383 text_t fullNode = resumptionToken->getNode();
384 text_t::iterator leafIter = fullNode.begin() + classifier.size();
385
386 // if the next character isn't a dot, blow up!
387 if (*leafIter != '.') {
388 // fatal error;
389 exit(1);
390 }
391
392 // get the first '.' after the current classifier point;
393 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
394
395 // now, create a new subpath
396 text_t nextNode = substr(fullNode.begin(), separator);
397
398 // seek forward; TODO: improve performance of this
399 for (int c = 0; c < response.numDocs; ++c) {
400 if (response.docInfo[c].OID == nextNode) {
401 startPos = c;
402 break;
403 }
404 }
405 }
406
407 // We need to subtract one from the startPos value to turn it into an index value
408 startPos--;
409 }
410
411 for (int c = startPos; c < response.numDocs; ++c) {
412 text_t child = response.docInfo[c].OID;
413
414 // distinguish classifiers and documents by checking whether OID
415 // starts with CL or not
416 text_t childHead;
417 text_t::const_iterator start = child.begin();
418 text_t::const_iterator here = child.begin();
419 here += 2;
420 childHead = substr(start, here);
421
422 // documents we output now
423 if (childHead != "CL") {
424 // Check that the item with the 0ID 'gsdl_id' has a lastmodified field that fits
425 // within the required date range (if specified)
426 if (this->inDateRange(from, until, collection, child, protocol, output)) {
427 // TODO: check that the document can be disseminated in the required metadataPrefix
428
429 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
430 this->prevDocSeen = true;
431 ++this->outputDocs;
432 }
433 }
434 }
435 // children which are classifiers are recursed
436 else {
437 if (resumptionToken != NULL) {
438 int depth = countchar(classifier.begin(), classifier.end(), '.');
439 resumptionToken->setOffset(depth, c+2);
440 }
441 this->recurse_set(output, protocol, collection, child, params, resumptionToken);
442 }
443
444 if (this->outputDocs == this->configuration->resumeAfter()) {
445 this->replyToken = new ResumptionToken(collection, params["set"], "");
446 this->replyToken->setPosition(classifier, c+2);
447 break;
448 }
449 }
450}
451
452
453
454
Note: See TracBrowser for help on using the repository browser.