source: trunk/gsdl/src/oaiservr/abstractlistaction.cpp@ 9608

Last change on this file since 9608 was 9608, checked in by kjdon, 19 years ago

added in x++ -> ++x changes submitted by Emanuel Dejanu

  • Property svn:keywords set to Author Date Id Revision
File size: 14.1 KB
Line 
1#include "abstractlistaction.h"
2#include "OIDtools.h"
3
4#include "oaitools.h"
5
6// The following makes sure that we don't actually use ResumptionTokens
7#define MAXRECORDS -1
8
9bool abstractlistaction::validateAction(recptproto *protocol, oaiargs &params, int &numArgs)
10{
11 text_t from = params["from"];
12 text_t until = params["until"];
13
14 // from date must be less than, or equal to, until date
15 if ((from != "") && (until != "") && !(from <= until)){
16 this->errorType = "badArgument";
17 return false;
18 }
19
20 if (from != ""){
21 // Must be in the form YYYY-MM-DD
22 if(from.size() != 10){
23 this->errorType = "badArgument";
24 return false;
25 }
26 else{
27 if(from[4] != '-' || from[7] != '-'){
28 this->errorType = "badArgument";
29 return false;
30 }
31 }
32 ++numArgs; // Increase valid args count
33 }
34
35 if (until != ""){
36 // Must be in the form YYYY-MM-DD
37 if(until.size() != 10){
38 this->errorType = "badArgument";
39 return false;
40 }
41 else{
42 if(until[4] != '-' || until[7] != '-'){
43 this->errorType = "badArgument";
44 return false;
45 }
46 }
47 ++numArgs; // Increase valid args count
48 }
49
50 if (params["set"] != "") {
51 text_t gsdlSet = params["set"];
52 text_t gsdlCollect = "";
53
54 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
55 // set specified after the name of the collection however, then gsdlSet is empty.
56 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
57
58 comerror_t err;
59 ColInfoResponse_t cinfo;
60
61 // check that the collection is accessible
62 protocol->get_collectinfo(gsdlCollect, cinfo, err, cerr);
63 if (err != noError) {
64 this->errorType = "badArgument";
65 return false;
66 }
67
68 // check the child set if it is given
69 if (gsdlSet != "") {
70 if (!this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
71 this->errorType = "badArgument";
72 return false;
73 }
74 }
75 ++numArgs;
76 }
77
78 if (params["resumptionToken"] != "") {
79 ResumptionToken token(params["resumptionToken"]);
80
81 if (!token.isValid()) {
82 this->errorType = "badResumptionToken";
83 return false;
84 }
85 ++numArgs;
86 }
87
88 this->errorType = "";
89 return true;
90}
91
92//--------------------------------------------------------------------------------------------------
93
94bool abstractlistaction::output_content(ostream &output, recptproto *protocol, oaiargs &params)
95{
96 text_t from = params["from"];
97 text_t until = params["until"];
98 text_t metaFormat = params["metadataPrefix"];
99 bool prevDocSeen;
100 ResumptionToken *token = NULL;
101
102 // start the call; clear down the total number of output documents
103 this->outputDocs = 0;
104
105 // We don't actually handle resumptionTokens yet; if we get one, ignore it
106 if (params["resumptionToken"] != "") {
107 token = new ResumptionToken(params["resumptionToken"]);
108 }
109
110 // if we've been asked for a set, then use it!
111 if (params["set"] != "") {
112 // get the children of this set
113 text_t gsdlSet = params["set"];
114 text_t gsdlCollect = "";
115
116 // given 'demo:CL2', toGSDL returns 'demo' in gsdlCollect and 'CL2' in gsdlSet. If there is no further
117 // set specified after the name of the collection however, then gsdlSet is empty.
118 oaiclassifier::toGSDL(gsdlCollect, gsdlSet);
119
120 // If gsdlSet is empty, then the user is requesting all the identifiers for the collection, so
121 // we simply output all docs via their oai_id tag. But if a specific subset IS requested, then
122 // use recurse_set() to traverse any sub classifiers to find the relevant docs.
123 if(gsdlSet == ""){
124 ColInfoResponse_t cinfo;
125 comerror_t err;
126 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
127 }
128 else {
129 if (this->check_classifier(protocol, gsdlCollect, gsdlSet)) {
130 this->recurse_set(output, protocol, gsdlCollect, gsdlSet, params);
131 }
132 }
133 }
134 // output all records in all hierarchies
135 else {
136 this->output_content_for_all(output, protocol, params);
137 }
138
139 // If - regardless of set required - no documents have been seen, throw an error.
140 if (this->configuration->getOAIVersion() >= 200 && this->prevDocSeen == false) {
141 errorType = "noRecordsMatch";
142 this->output_error(output, errorType);
143
144 return false;
145 }
146
147 // do a resumption token if required; errors cancel a token...
148 if (this->replyToken != NULL && this->errorType != "") {
149 output << "<resumptionToken>";
150 output << this->replyToken->getToken();
151 output << "</resumptionToken>";
152 }
153
154 return true;
155}
156
157//--------------------------------------------------------------------------------------------------
158
159void abstractlistaction::output_content_for_col(ostream &output, recptproto *protocol, text_t &gsdlCollect,
160 ColInfoResponse_t &cinfo, comerror_t &err, oaiargs &params)
161{ int startDoc = 0;
162
163 // get the collection information
164 protocol->get_collectinfo(gsdlCollect, cinfo, err, *this->logout);
165
166 // check resumption token
167 if (params["resumptionToken"] != "") {
168 ResumptionToken token(params["resumptionToken"]);
169 if (token.getCollection() == gsdlCollect) {
170 startDoc = token.getPosition() - 1; // first document is said to be 1..
171 }
172 }
173
174 // If numDocs is 0, do nothing - this->prevDocSeen will stay false if this is the only collection
175 // looked at, or will keep whatever value it had prior to this col (ensures that if the flag has
176 // been set to true by a previous collection that this won't overwrite it to be false).
177 if (cinfo.numDocs > 0) {
178 int errorCount = 0; // Count the number of errors found in the given collection
179 text_t from = params["from"];
180 text_t until = params["until"];
181
182 for (long i = startDoc; i < cinfo.numDocs; ++i) {
183 if (errorCount > 3) { // If num errors reaches the cut-off value, bail.
184 cerr << "Error: too many records(" << errorCount << ") in the " << gsdlCollect
185 << " collection have invalid or non-existant oai_ids - skipping remainder of collection.\n";
186 return;
187 }
188
189 text_t oai_id = "oai.";
190 oai_id += i;
191
192 text_t gsdl_id = oaiclassifier::getGSDL_OID(gsdlCollect, oai_id, protocol, *this->logout);
193
194 if (gsdl_id == "") { // If the string is empty, then the document didn't have an oai_id, so
195 ++errorCount; // increase error count
196 continue;
197 }
198
199 // Check that the item with the HASH ID 'gsdl_id' has a lastmodified field that fits within
200 // the required date range (if specified).
201 if (this->inDateRange(params["from"], params["until"], gsdlCollect, gsdl_id, protocol, output)) {
202 if (this->output_document(output, protocol, gsdlCollect, gsdl_id, params["metadataPrefix"])) {
203 // this should be an IF statement, where prevDocSeen is only set to true if the above
204 // function call returns true (indicating that the doc supported the metadata prefix) but
205 // for some reason this is always false. This means that if no doc in the requested set supports
206 // the metadata format, the "no records match" error that should be thrown won't be...
207 //
208 // GRB: the above comment is no longer true; proper checks are made
209 this->prevDocSeen = true;
210 ++this->outputDocs;
211 }
212 }
213
214 // if we've output MAXRECORDS documents; prepare a resumptionToken
215 if (this->outputDocs == MAXRECORDS) {
216 this->replyToken = new ResumptionToken(gsdlCollect, "", "");
217 this->replyToken->setPosition("", i+2);
218 break;
219 }
220 }
221
222 cinfo.clear(); // Clear for next collection to use (if there is one).
223 }
224}
225
226//--------------------------------------------------------------------------------------------
227// Returns true if at least one document record is found
228void abstractlistaction::output_content_for_all(ostream &output, recptproto *protocol, oaiargs &params)
229{
230 ColInfoResponse_t cinfo;
231 comerror_t err;
232 text_tarray collections;
233 text_t gsdlCollect = "";
234
235 // get a list of the collections available
236 protocol->get_collection_list(collections, err, output);
237
238 for(int current_col = 0; current_col < collections.size(); ++current_col){
239 gsdlCollect = collections[current_col];
240 this->output_content_for_col(output, protocol, gsdlCollect, cinfo, err, params);
241 }
242}
243
244//-------------------------------------------------------------------------------------------------
245// Check that the requested from/until dates don't include a time, as this would be asking for too
246// fine a level of granularity, one that greenstone doesn't support. An OAI error must be thrown.
247/*
248bool abstractlistaction::granularityTooFine(text_t &from, text_t &until)
249{
250 if (from != "" && from.){
251
252 }
253
254}
255*/
256//-------------------------------------------------------------------------------------------------
257
258bool abstractlistaction::check_classifier(recptproto *protocol, const text_t &collection,
259 const text_t &classifier)
260{ text_t topClass;
261 FilterResponse_t response;
262 text_tset metadata;
263 ofstream logout("oai.log", ios::app);
264
265 // exclude false children of a top-level classifier immediately...
266 if (!get_info(classifier, collection, "", metadata, false, protocol, response, logout)) {
267 return false;
268 }
269
270 // now check the top-level parent
271 metadata.insert("supportsmemberof");
272
273 text_t::const_iterator dot = findchar(classifier.begin(), classifier.end(), '.');
274 if (dot != classifier.end()) {
275 topClass = substr(classifier.begin(), dot);
276 }
277 else {
278 topClass = classifier;
279 }
280
281 if (!get_info(topClass, collection, "", metadata, false, protocol, response, logout)) {
282 return false;
283 }
284
285 if (response.docInfo[0].metadata["supportsmemberof"].values.size() == 0) {
286 return false;
287 }
288
289 if (response.docInfo[0].metadata["supportsmemberof"].values[0] != "true") {
290 return false;
291 }
292
293 return true;
294}
295
296void abstractlistaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection,
297 const text_t &classifier, oaiargs &params)
298{
299 // metadata for this call
300 FilterResponse_t response;
301 text_tset metadata;
302 ofstream logout("oai.log", ios::app);
303 text_t from = params["from"];
304 text_t until = params["until"];
305 text_t metadataPrefix = params["metadataPrefix"];
306 ResumptionToken resumptionToken(params["resumptionToken"]);
307 int startPos = 0;
308
309 // This is a recursive function, and so just because the current set is empty doesn't mean we necessarily
310 // want to throw a 'noRecordsMatch' error; another set (parent/sibling/child) may have had documents. It
311 // is therefore not enough to check that the response object in the current iteration has no docs - we
312 // must also verify that NO OTHER set has had any documents. This is done with the 'prevDocSeen' flag.
313 // It is set to FALSE initially, but as soon as we see a set that isn't empty, it is set to TRUE. The
314 // 'noRecordsMatch' error will only be thrown if, after all appropriate sets have been recursed into,
315 // the 'prevDocSeen' flag is still FALSE. The function returns false if no docs were seen, allowing us to
316 // throw the noRecordsMatch error.
317
318 // bool prevDocSeen = false;
319
320 get_children(classifier, collection, "", metadata, false, protocol, response, *this->logout);
321
322 if (params["resumptionToken"] != "") {
323 // if we're at a resumptionToken
324 if (classifier == resumptionToken.getNode()) {
325 startPos = resumptionToken.getPosition();
326 }
327 else {
328 text_t fullNode = resumptionToken.getNode();
329 text_t::iterator leafIter = fullNode.begin() + classifier.size();
330
331 // if the next character isn't a dot, blow up!
332 if (*leafIter != '.') {
333 // fatal error;
334 exit(1);
335 }
336
337 // get the first '.' after the current classifier point;
338 text_t::iterator separator = findchar(leafIter + 1, fullNode.end(), '.');
339
340 // now, create a new subpath
341 text_t nextNode = substr(fullNode.begin(), separator);
342
343 // seek forward; TODO: improve performance of this
344 for (int c = 0; c < response.numDocs; ++c) {
345 if (response.docInfo[c].OID == nextNode) {
346 startPos = c;
347 break;
348 }
349 }
350 }
351 }
352
353 for (int c = startPos; c < response.numDocs; ++c) {
354 text_t child = response.docInfo[c].OID;
355
356 // check for HASH items and exclude them
357 text_t childHead;
358 text_t::const_iterator start = child.begin();
359 text_t::const_iterator here = child.begin();
360 here += 4;
361 childHead = substr(start, here);
362
363 // documents we output now
364 if (childHead == "HASH") {
365 // Check that the item with the HASH ID 'gsdl_id' has a lastmodified field that fits
366 // within the required date range (if specified)
367 if (this->inDateRange(from, until, collection, child, protocol, output)) {
368 // TODO: check that the document can be disseminated in the required metadataPrefix
369
370 if (this->output_document(output, protocol, collection, child, metadataPrefix)) {
371 this->prevDocSeen = true;
372 ++this->outputDocs;
373 }
374 }
375 }
376 // children which are classifiers are recursed
377 else {
378 this->recurse_set(output, protocol, collection, child, params);
379 }
380
381 if (this->outputDocs == MAXRECORDS) {
382 this->replyToken = new ResumptionToken(collection, params["set"], "");
383 this->replyToken->setPosition(classifier, c+2);
384 }
385 }
386}
387
388/*
389bool listrecsaction::recurse_set(ostream &output, recptproto *protocol, const text_t &collection, const text_t &classifier, const text_t &metadataPrefix)
390{
391 FilterResponse_t response;
392 text_tset metadata;
393 ofstream logout("oai.log", ios::app);
394 bool prevDocSeen = false;
395
396 get_children(classifier, collection, metadata, false, protocol, response, logout);
397
398 for (int c = 0; c < response.numDocs; ++c) {
399 text_t child = response.docInfo[c].OID;
400
401 // check for HASH items and exclude them
402 text_t childHead;
403 text_t::const_iterator start = child.begin();
404 text_t::const_iterator here = child.begin();
405 here += 4;
406 childHead = substr(start, here);
407
408 // documents we output now
409 if (childHead == "HASH") {
410 this->output_document(output, protocol, collection, child, metadataPrefix);
411 ++this->outputDocs;
412 prevDocSeen = true;
413 }
414 // children which are classifiers are recursed
415 else {
416 prevDocSeen = this->recurse_set(output, protocol, collection, child, metadataPrefix);
417 }
418 }
419 return prevDocSeen;
420}
421*/
422
423
424
425
Note: See TracBrowser for help on using the repository browser.