source: main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp@ 24412

Last change on this file since 24412 was 24412, checked in by ak19, 13 years ago

Fixed a new failure of the OAI validation test of GS2's OAI server: where list records is given an Until date that is earlier than the earliestDatestamp. Needs to return a noRecordsMatch. It does now. The calculation of the earliestDatestamp is now shifted to the oaiaction.cpp superclass and called by both identifyaction and listrecordsaction for the identify and listrecords OAI queries, since these tasks need to work with earliestDatestamp.

  • Property svn:keywords set to Author Date Id Revision
File size: 18.7 KB
Line 
1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "oaiaction.h"
28#include "oaitools.h"
29#include "recptprototools.h"
30
31#if defined(GSDL_USE_IOS_H)
32# if defined(__WIN32__)
33# include <strstrea.h> // vc4
34# else
35# include <strstream.h>
36# endif
37#else
38# include <sstream>
39#endif
40
41#include <time.h>
42
43oaiaction::oaiaction(const text_t &name)
44{
45 this->logout = new ofstream("oai.log", ios::app);
46 this->configuration = NULL;
47 this->name = name;
48 this->mEarliestDatestamp = "";
49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56 this->errorType = "";
57 return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
69 // is it in our list?
70 if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71 return false;
72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83 text_t description = "";
84
85 if(errorType == "badArgument"){
86 description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87 }
88 else if(errorType == "noRecordsMatch"){
89 description = "No record matches all the requested parameters";
90 }
91 else if(errorType == "cannotDisseminateFormat"){
92 description = "The metadata format specified is not supported by the item or by the repository";
93 }
94 else if(errorType == "idDoesNotExist"){
95 description = "The value of the identifier is unknown or illegal in this repository";
96 }
97 else if(errorType == "badVerb"){
98 description = "Value of the verb argument is illegal, missing, or repeated";
99 }
100 else if(errorType == "noMetadataFormats"){
101 description = "There are no metadata formats available for the item";
102 }
103 else if(errorType == "badResumptionToken"){
104 description = "The value of the resumptionToken argument is invalid or expired";
105 }
106 else if(errorType == "noSetHierarchy"){
107 description = "The repository does not support sets";
108 }
109
110 output << " <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117 return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129 text_t year, month, day, lastModified;
130 tm *ptm;
131 ptm = gmtime(&rawtime);
132 int raw_month = ptm->tm_mon + 1;
133 int raw_day = ptm->tm_mday;
134
135 year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137 // Need the month in MM format, so if month is 1..9, add a 0 to the front
138 if(raw_month < 10){
139 month = "0";
140 month += raw_month;
141 }
142 else month = raw_month;
143
144 if(raw_day < 10){
145 day = "0";
146 day += raw_day;
147 }
148 else day = raw_day;
149
150 lastModified = year + "-" + month + "-" + day;
151
152 return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164 time_t rawtime;
165 tm *ptm;
166
167 time(&rawtime); // Get the epoch time
168
169 ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171 text_t month, day, hour, minute, second;
172 int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1
173 int raw_day = ptm->tm_mday;
174 int raw_hour = ptm->tm_hour;
175 int raw_minute = ptm->tm_min;
176 int raw_second = ptm->tm_sec;
177
178 // Need the month in MM format, so if month is 1..9, add a 0 to the front
179 if(raw_month < 10){
180 month = "0";
181 }
182 month += raw_month;
183
184 // Same for days, hours, minutes and seconds
185 if(raw_day < 10){
186 day = "0";
187 }
188 day += raw_day;
189
190 if(raw_hour < 10){
191 hour = "0";
192 }
193 hour += raw_hour;
194
195 if(raw_minute < 10){
196 minute = "0";
197 }
198 minute += raw_minute;
199
200 if(raw_second < 10){
201 second = "0";
202 }
203 second += raw_second;
204
205 // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206 date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207 date += "-";
208 date += month;
209 date += "-";
210 date += day;
211 date += "T";
212 date += hour;
213 date += ":";
214 date += minute;
215 date += ":";
216 date += second;
217 // If we're using v1.1, then tack on local time offset, otherwise don't
218 if(this->configuration->getOAIVersion() == 110){
219 date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220 // UTC (GMT), e.g. "+8:00", "-5:00"
221 }
222 else
223 date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232 // Iterators for moving through the list of parameters (keys) specified
233 text_tmap::const_iterator here;
234 text_tmap::const_iterator end;
235 int numArgs = params.getSize();
236
237 here = params.begin();
238 end = params.end();
239
240 text_t baseURL = this->configuration->getBaseURL();
241
242 int version = this->configuration->getOAIVersion();
243
244 switch(version){
245 case 110:
246 /* Takes the form:
247 * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248 */
249 requestURL = " <requestURL>" + baseURL;
250
251 if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252
253 // The following lines will give us the "label=value" syntax
254 requestURL += "?";
255 requestURL += here->first;
256 requestURL += "=";
257 requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
258 ++here;
259
260 while(here != end){
261 requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262 requestURL += (here->first + "=" + html_safe(here->second));
263 ++here;
264 }
265 requestURL += "</requestURL>\n";
266 break;
267
268 case 200:
269 default:
270 /* Takes the form:
271 * <request verb="someVerb" key="value" key="value">
272 * http://baseURL.com/oaimain</request>
273 */
274 if(numArgs == 0) {
275 requestURL = " <request>" + baseURL + "</request>\n";
276 break;
277 }
278 requestURL = " <request " + here->first + "=\"" + html_safe(here->second) + "\"";
279 ++here;
280 while(here != end){
281 requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");
282 ++here;
283 }
284 requestURL += ">\n " + baseURL + "</request>\n";
285 break;
286 }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295 bool error;
296 text_t date, requestURL;
297
298 // Write the response date & time into 'date'
299 this->getResponseDate(date);
300 int version = this->configuration->getOAIVersion();
301
302 // validate the action
303 error = !this->validateAction(protocol, params);
304
305 // raise an error for duplicated arguments and set the
306 // error type "manually" here...
307 if (params.hasDuplicateArg() && !error) {
308 this->errorType = "badArgument";
309 error = true;
310 }
311
312 // start with the required http header
313 if (version <= 110 && error){
314 output << "Status: 400 " << this->errorType << "\n";
315 output << "Content-Type: text/xml\n\n";
316 return;
317 }
318
319 output << "Status: 200\n";
320 output << "Content-Type: text/xml\n\n";
321
322 // output xml header parts
323 output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325 if(version <= 110){
326 // output OAI v1.1 action header tag
327 output << "<" << this->name;
328 output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
329 output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330 output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331 output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332 }
333 else {
334 text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
335 output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
336 // output OAI v2.0 action header tag
337 output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
338 << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339 << " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340 << " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341 }
342 // output current time for response
343 output << " <responseDate>" << date << "</responseDate>\n";
344
345 // output request URL. This differs depending on the OAI protocol version currently running, so
346 // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347 this->getRequestURL(params, requestURL);
348
349 output << requestURL ;
350
351 if (error == false) {
352 // a string stream to write the content of the action to; this is done so that we can
353 // avoid outputting the action tag if the action's body is unsuccessful, in which
354 // case the leading tag must be suppressed
355#if defined(GSDL_USE_IOS_H)
356 ostrstream outstream;
357#else
358 ostringstream outstream;
359#endif
360
361 // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362 //
363 // An action that outputs no content should raise an error state to suppress the
364 // matching opening and close tags if it outputs no content in OAI 2.0
365 error = !this->output_content(outstream, protocol, params);
366
367 // output the leading tag if no error occurred
368 if (error == false) {
369 if (version >= 200) {
370 this->output_action_tag(output, true);
371 }
372 }
373
374 // now output the body of the action content
375#if defined(GSDL_USE_IOS_H)
376 outstream << ends; // Ensure outstream is null-terminated correctly
377#endif
378 output << outstream.str();
379 }
380 else {
381 if (version >= 200) {
382 this->output_error(output, this->errorType);
383 }
384 }
385
386 // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387 if((version == 110) || (version >= 200 && error == false)){
388 this->output_action_tag(output, false);
389 }
390 if(version >= 200){
391 output << "</OAI-PMH>\n";
392 }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397 output << " <";
398 if (!openTag) {
399 output << "/";
400 }
401 output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
405 const text_tarray &memberOf, int oaiVersion)
406{
407 output << " <header>" << endl;
408 output << " <identifier>" << oaiLabel << "</identifier>" << endl;
409 output << " <datestamp>" << lastModified << "</datestamp>" << endl;
410
411 text_t collection_id;
412 // Find the collection id from oai:repos-id:collection:doc
413 oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
414 if(oaiVersion >= 200){
415 text_tarray::const_iterator member = memberOf.begin();
416 text_tarray::const_iterator memEnd = memberOf.end();
417
418 // As well as all the subsets that a doc appears in, it is also a member of the 'collection' set
419 output << " <setSpec>" << collection_id << "</setSpec>" << endl;
420 while (member != memEnd) {
421 text_t oaiSet = *member;
422 oaiclassifier::toOAI(collection_id, oaiSet);
423 output << " <setSpec>" << oaiSet << "</setSpec>" << endl;
424 ++member;
425 }
426 }
427 output << " </header>" << endl;
428}
429
430void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
431{
432 text_t temp;
433
434
435 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
436 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
437
438 while(current != end){
439 temp = current->first;
440 lc(temp); // lowercased for string comparison below
441 if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
442 // assume it is correct format
443 lastModified = current->second.values[0];
444 return;
445 }
446 else{
447 if (temp == "oailastmodified" && lastModified == "" && current->second.values.size() >= 1) {
448 lastModified = current->second.values[0];
449 time_t raw_time = (time_t)lastModified.getint();
450 lastModified = this->parseDatestamp(raw_time);
451 }
452 }
453 ++current;
454 }
455}
456
457bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
458 const text_t &OID, recptproto *protocol, ostream &logout)
459{
460 FilterResponse_t response;
461 text_tset metadata;
462 bool status_ok = get_info(OID, collection, "", metadata, false, protocol, response, logout);
463 bool not_too_early = false, not_too_recent = false;
464
465 if(status_ok) {
466 ResultDocInfo_t doc_info = response.docInfo[0];
467 text_t lastModDate;
468 this->getLastModifiedDate(doc_info, lastModDate);
469
470 // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
471 if(from != ""){
472 if(from <= lastModDate)
473 not_too_early = true;
474 }
475 else
476 not_too_early = true; // If there's no FROM field, then the record can't be too early
477
478 if(until != ""){
479 if(lastModDate <= until)
480 not_too_recent = true;
481 }
482 else
483 not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent
484
485 if(not_too_early && not_too_recent)
486 return true;
487 else
488 return false;
489 }
490 else
491 return false;
492}
493
494text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
495
496 text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
497
498 //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
499 //if(version == "2.0"){
500
501 // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
502 // repository, but we're just setting it to be the default oldest possible date - ugly, but judged
503 // not to be worth the effort of trolling through all the lastmodified dates (by others with more
504 // say than me)
505
506 // The above was before. However, now we mirror GS3 way of dealing with
507 // earliestDatestamp by going through the earliestDatestamp field of each OAI
508 // collection's build.cfg in order to work out earliestdatestamp of this Repository:
509 // by going through all the collections and getting the earliest among the
510 // "earliestDatestamp" values stored for each collection in its build.cfg
511 // (the earliestDatestamp for a collection has already been extracted from
512 // their build.cfg file at this point by collectserver::configure. The field
513 // is declared in comtypes.h)
514
515
516 // Get a list of the OAI-enabled collections available
517 text_tarray& collections = this->configuration->getCollectionsList();
518 if (collections.size() > 0)
519 {
520 // get the identifier from the params
521 text_t identifier = params["identifier"];
522 text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
523 identifier.replace(oai_OID_prefix, "");
524
525 // Get the current collection from the identifier
526 text_t collection_name = "";
527 oaiclassifier::toGSDL(collection_name, identifier);
528
529 // Find the starting collection
530 text_tarray::iterator collection_iterator = collections.begin();
531 while (collection_iterator != collections.end())
532 {
533 if (collection_name == "" || collection_name == *collection_iterator)
534 {
535 break;
536 }
537
538 collection_iterator++;
539 }
540
541 // Now loop through the remaining collections
542 // to work out the earliest datestamp
543 while (collection_iterator != collections.end())
544 {
545 collection_name = (*collection_iterator);
546
547 ColInfoResponse_t cinfo;
548 comerror_t err;
549 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
550 if (err == noError) {
551 text_t eDatestamp = cinfo.earliestDatestamp;
552 time_t raw_time = (time_t)eDatestamp.getint();
553 eDatestamp = this->parseDatestamp(raw_time);
554
555 if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
556 earliestDatestamp = eDatestamp;
557 } else if(eDatestamp < earliestDatestamp) {
558 earliestDatestamp = eDatestamp;
559 }
560 }
561 collection_iterator++;
562
563 }
564 }
565
566 //}
567
568 // if repository's earliestDatestamp is still unset, default to unix epoch time
569 if(earliestDatestamp == "") {
570 earliestDatestamp = "1970-01-01";
571 }
572
573 this->mEarliestDatestamp = earliestDatestamp;
574 return mEarliestDatestamp;
575}
Note: See TracBrowser for help on using the repository browser.