source: main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp

Last change on this file was 31903, checked in by ak19, 7 years ago

I hope these are all the changes necessary on the runtime side of GS2 to get the OAI server validation working for GS2: instead of working out the earliest datetime stamp of the OAI repository by comparing the builddate in index/build.cfg of each OAI collection and selecting the earliest, the oai-inf.db is now storing the special earliesttimestamp record. The timestamp of this record represents its collection's earliest timestamp. And the earliest of these among all OAI collections is now the earliest datetime of the OAI repository.

  • Property svn:keywords set to Author Date Id Revision
File size: 23.0 KB
Line 
1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "oaiaction.h"
28#include "oaitools.h"
29#include "recptprototools.h"
30
31#if defined(GSDL_USE_IOS_H)
32# if defined(__WIN32__)
33# include <strstrea.h> // vc4
34# else
35# include <strstream.h>
36# endif
37#else
38# include <sstream>
39#endif
40
41#include <time.h>
42
43oaiaction::oaiaction(const text_t &name)
44{
45 this->logout = new ofstream("oai.log", ios::app);
46 this->configuration = NULL;
47 this->name = name;
48 this->mEarliestDatestamp = "";
49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56 this->errorType = "";
57 return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
69 // is it in our list?
70 if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71 return false;
72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83 text_t description = "";
84
85 if(errorType == "badArgument"){
86 description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87 }
88 else if(errorType == "noRecordsMatch"){
89 description = "No record matches all the requested parameters";
90 }
91 else if(errorType == "cannotDisseminateFormat"){
92 description = "The metadata format specified is not supported by the item or by the repository";
93 }
94 else if(errorType == "idDoesNotExist"){
95 description = "The value of the identifier is unknown or illegal in this repository";
96 }
97 else if(errorType == "badVerb"){
98 description = "Value of the verb argument is illegal, missing, or repeated";
99 }
100 else if(errorType == "noMetadataFormats"){
101 description = "There are no metadata formats available for the item";
102 }
103 else if(errorType == "badResumptionToken"){
104 description = "The value of the resumptionToken argument is invalid or expired";
105 }
106 else if(errorType == "noSetHierarchy"){
107 description = "The repository does not support sets";
108 }
109
110 output << " <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117 return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129 text_t year, month, day, lastModified;
130 tm *ptm;
131 ptm = gmtime(&rawtime);
132 int raw_month = ptm->tm_mon + 1;
133 int raw_day = ptm->tm_mday;
134
135 year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137 // Need the month in MM format, so if month is 1..9, add a 0 to the front
138 if(raw_month < 10){
139 month = "0";
140 month += raw_month;
141 }
142 else month = raw_month;
143
144 if(raw_day < 10){
145 day = "0";
146 day += raw_day;
147 }
148 else day = raw_day;
149
150 lastModified = year + "-" + month + "-" + day;
151
152 return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164 time_t rawtime;
165 tm *ptm;
166
167 time(&rawtime); // Get the epoch time
168
169 ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171 text_t month, day, hour, minute, second;
172 int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1
173 int raw_day = ptm->tm_mday;
174 int raw_hour = ptm->tm_hour;
175 int raw_minute = ptm->tm_min;
176 int raw_second = ptm->tm_sec;
177
178 // Need the month in MM format, so if month is 1..9, add a 0 to the front
179 if(raw_month < 10){
180 month = "0";
181 }
182 month += raw_month;
183
184 // Same for days, hours, minutes and seconds
185 if(raw_day < 10){
186 day = "0";
187 }
188 day += raw_day;
189
190 if(raw_hour < 10){
191 hour = "0";
192 }
193 hour += raw_hour;
194
195 if(raw_minute < 10){
196 minute = "0";
197 }
198 minute += raw_minute;
199
200 if(raw_second < 10){
201 second = "0";
202 }
203 second += raw_second;
204
205 // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206 date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207 date += "-";
208 date += month;
209 date += "-";
210 date += day;
211 date += "T";
212 date += hour;
213 date += ":";
214 date += minute;
215 date += ":";
216 date += second;
217 // If we're using v1.1, then tack on local time offset, otherwise don't
218 if(this->configuration->getOAIVersion() == 110){
219 date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220 // UTC (GMT), e.g. "+8:00", "-5:00"
221 }
222 else
223 date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232 // Iterators for moving through the list of parameters (keys) specified
233 text_tmap::const_iterator here;
234 text_tmap::const_iterator end;
235 int numArgs = params.getSize();
236
237 here = params.begin();
238 end = params.end();
239
240 text_t baseURL = this->configuration->getBaseURL();
241
242 int version = this->configuration->getOAIVersion();
243
244 switch(version){
245 case 110:
246 /* Takes the form:
247 * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248 */
249 requestURL = " <requestURL>" + baseURL;
250
251 if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252
253 // The following lines will give us the "label=value" syntax
254 requestURL += "?";
255 requestURL += here->first;
256 requestURL += "=";
257 requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
258 ++here;
259
260 while(here != end){
261 requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262 requestURL += (here->first + "=" + html_safe(here->second));
263 ++here;
264 }
265 requestURL += "</requestURL>\n";
266 break;
267
268 case 200:
269 default:
270 /* Takes the form:
271 * <request verb="someVerb" key="value" key="value">
272 * http://baseURL.com/oaimain</request>
273 */
274 if(numArgs == 0) {
275 requestURL = " <request>" + baseURL + "</request>\n";
276 break;
277 }
278 requestURL = " <request " + here->first + "=\"" + html_safe(here->second) + "\"";
279 ++here;
280 while(here != end){
281 requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");
282 ++here;
283 }
284 requestURL += ">\n " + baseURL + "</request>\n";
285 break;
286 }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295 bool error;
296 text_t date, requestURL;
297
298 // Write the response date & time into 'date'
299 this->getResponseDate(date);
300 int version = this->configuration->getOAIVersion();
301
302 // validate the action
303 error = !this->validateAction(protocol, params);
304
305 // raise an error for duplicated arguments and set the
306 // error type "manually" here...
307 if (params.hasDuplicateArg() && !error) {
308 this->errorType = "badArgument";
309 error = true;
310 }
311
312 // start with the required http header
313 if (version <= 110 && error){
314 output << "Status: 400 " << this->errorType << "\n";
315 output << "Content-Type: text/xml\n\n";
316 return;
317 }
318
319 output << "Status: 200\n";
320 output << "Content-Type: text/xml\n\n";
321
322 // output xml header parts
323 output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325 if(version <= 110){
326 // output OAI v1.1 action header tag
327 output << "<" << this->name;
328 output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
329 output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330 output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331 output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332 }
333 else {
334 text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
335 output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
336 // output OAI v2.0 action header tag
337 output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
338 << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339 << " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340 << " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341 }
342 // output current time for response
343 output << " <responseDate>" << date << "</responseDate>\n";
344
345 // output request URL. This differs depending on the OAI protocol version currently running, so
346 // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347 this->getRequestURL(params, requestURL);
348
349 output << requestURL ;
350
351 if (error == false) {
352 // a string stream to write the content of the action to; this is done so that we can
353 // avoid outputting the action tag if the action's body is unsuccessful, in which
354 // case the leading tag must be suppressed
355#if defined(GSDL_USE_IOS_H)
356 ostrstream outstream;
357#else
358 ostringstream outstream;
359#endif
360
361 // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362 //
363 // An action that outputs no content should raise an error state to suppress the
364 // matching opening and close tags if it outputs no content in OAI 2.0
365 error = !this->output_content(outstream, protocol, params);
366
367 // output the leading tag if no error occurred
368 if (error == false) {
369 if (version >= 200) {
370 this->output_action_tag(output, true);
371 }
372 }
373
374 // now output the body of the action content
375#if defined(GSDL_USE_IOS_H)
376 outstream << ends; // Ensure outstream is null-terminated correctly
377#endif
378 output << outstream.str();
379 }
380 else {
381 if (version >= 200) {
382 this->output_error(output, this->errorType);
383 }
384 }
385
386 // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387 if((version == 110) || (version >= 200 && error == false)){
388 this->output_action_tag(output, false);
389 }
390 if(version >= 200){
391 output << "</OAI-PMH>\n";
392 }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397 output << " <";
398 if (!openTag) {
399 output << "/";
400 }
401 output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
405 const text_t &deleted_status, const text_tarray &memberOf, int oaiVersion)
406{
407 if(deleted_status == "D") { // record marked as deleted
408 output << " <header status=\"deleted\">" << endl;
409 } else { // deleted_status is "" or E for exists
410 output << " <header>" << endl;
411 }
412 output << " <identifier>" << oaiLabel << "</identifier>" << endl;
413 output << " <datestamp>" << lastModified << "</datestamp>" << endl;
414
415 text_t collection_id;
416 // Find the collection id from oai:repos-id:collection:doc
417 oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
418 if(oaiVersion >= 200){
419
420 // A doc is only a member of its collection set if hte collection is valid
421 // Can have super collecitons containging collecitons that are not individually valid
422 if (this->configuration->isValidCollection(collection_id)) {
423 output << " <setSpec>" << collection_id << "</setSpec>" << endl;
424 }
425 // are we part of a super collection?
426 text_tarray super_colls = this->configuration->getSuperCollectionsForThisCollection(collection_id);
427 if (super_colls.size()!=0) {
428 text_tarray::const_iterator super_start = super_colls.begin();
429 text_tarray::const_iterator super_end = super_colls.end();
430 while(super_start != super_end) {
431 output << " <setSpec>" << *super_start << "</setSpec>" << endl;
432 ++ super_start;
433 }
434 }
435
436 // Do we have subsets?
437 text_tarray::const_iterator member = memberOf.begin();
438 text_tarray::const_iterator memEnd = memberOf.end();
439 while (member != memEnd) {
440 text_t oaiSet = *member;
441 oaiclassifier::toOAI(collection_id, oaiSet);
442 output << " <setSpec>" << oaiSet << "</setSpec>" << endl;
443 ++member;
444 }
445 }
446 output << " </header>" << endl;
447}
448
449// Method that looks for the requested metaname in the doc_info, and if found, sets the value
450// in the argument 'metavalue'.
451bool oaiaction::getMeta(ResultDocInfo_t &doc_info, const text_t &metaname, text_t &metavalue)
452{
453 //ofstream logout("oai.log", ios::app);
454 //logout << "oaiaction::getMeta(): Looking for metaname = " << metaname << endl;
455
456 // use map::find, rather than testing map["array-index"], as the latter will create that index and
457 // insert an empty value into it, when we just want to test for it.
458 // See http://www.cplusplus.com/reference/map/map/operator[]/
459 // See also http://stackoverflow.com/questions/1939953/how-to-find-if-a-given-key-exists-in-a-c-stdmap
460
461 if(doc_info.metadata.find(metaname) == doc_info.metadata.end()) {
462 //logout << "\t\t\t Could not find meta " << metaname << endl;
463 //logout.close();
464 return false;
465 } else { // found meta
466 metavalue = doc_info.metadata[metaname].values[0];
467 //logout << "\t\t\t Found value = " << metavalue << endl;
468 //logout.close();
469 return true;
470 }
471
472}
473
474void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
475{
476 text_t temp;
477
478
479 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
480 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
481
482 while(current != end){
483 temp = current->first;
484 lc(temp); // lowercased for string comparison below
485 if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
486 // assume it is correct format
487 lastModified = current->second.values[0];
488 return;
489 }
490 else{
491 if(lastModified == "" && current->second.values.size() >= 1) {
492
493 if (temp == "oaiinf.timestamp") { // new way is to store oai timestamp in oai db and get it from there
494 // check if there was a timestamp for the doc in the etc/oai-inf database
495 lastModified = current->second.values[0];
496
497 } else if (temp == "oailastmodified") { // old way, being phased out
498 // check if there was an oailastmodified timestamp for the doc in the collection index db
499 lastModified = current->second.values[0];
500 }
501
502 if(lastModified != "") { // if we've now set the lastModified value, convert it for display and return
503 time_t raw_time = (time_t)lastModified.getint();
504 lastModified = this->parseDatestamp(raw_time);
505
506 return;
507 }
508 }
509 } // else keep looking for oai timestamp
510 ++current;
511 }
512
513}
514
515bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
516 const text_t &OID, recptproto *protocol, ostream &logout)
517{
518 FilterResponse_t response;
519 text_tset metadata;
520 bool status_ok = get_oai_info(OID, collection, "", metadata, false, protocol, response, logout);
521 // get timestamp from etc/oai-inf.<db> now, no longer from index.db
522 bool not_too_early = false, not_too_recent = false;
523
524 if(status_ok) {
525 ResultDocInfo_t doc_info = response.docInfo[0];
526 text_t lastModDate;
527 this->getLastModifiedDate(doc_info, lastModDate);
528
529 // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
530 if(from != ""){
531 if(from <= lastModDate)
532 not_too_early = true;
533 }
534 else
535 not_too_early = true; // If there's no FROM field, then the record can't be too early
536
537 if(until != ""){
538 if(lastModDate <= until)
539 not_too_recent = true;
540 }
541 else
542 not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent
543
544 if(not_too_early && not_too_recent)
545 return true;
546 else
547 return false;
548 }
549 else
550 return false;
551}
552
553// works out the earliest date/timestamp of the OAI repository,
554// which compares and then selects the earliest timestamp value among all collections.
555// In the past, the earliest date/timestamp used to be the unix epoch,
556// later it became the earliest among each collection's builddate in build.cfg,
557// and now, after implementing the OAI deletion policy, it needed to become the earliest
558// among each collection's OAI_EARLIESTTIMESTAMP_OID record's timestamp value.
559
560text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
561
562 text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
563
564 //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
565 //if(version == "2.0"){
566
567 // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
568 // repository, but we're just setting it to be the default oldest possible date - ugly, but judged
569 // not to be worth the effort of trolling through all the lastmodified dates (by others with more
570 // say than me)
571
572 // The above was before. However, now we mirror GS3 way of dealing with
573 // earliestDatestamp by going through the earliestDatestamp field of each OAI
574 // collection's build.cfg in order to work out earliestdatestamp of this Repository:
575 // by going through all the collections and getting the earliest among the
576 // "earliestDatestamp" values stored for each collection in its build.cfg
577 // (the earliestDatestamp for a collection has already been extracted from
578 // their build.cfg file at this point by collectserver::configure. The field
579 // is declared in comtypes.h)
580
581 // We've changed this again to grab the timestamp field of the OAI_EARLIESTTIMESTAMP_OID
582 // record of each collection's oai-inf.db (representing the time that collection was
583 // built for the very first time) and choosing the earliest among all of them.
584
585 // Get a list of the OAI-enabled collections available
586 text_tarray& collections = this->configuration->getCollectionsList();
587 if (collections.size() > 0)
588 {
589 // get the identifier from the params
590 text_t identifier = params["identifier"];
591 text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
592 identifier.replace(oai_OID_prefix, "");
593
594 // Get the current collection from the identifier
595 text_t collection_name = "";
596 oaiclassifier::toGSDL(collection_name, identifier);
597
598 // Find the starting collection
599 text_tarray::iterator collection_iterator = collections.begin();
600 while (collection_iterator != collections.end())
601 {
602 if (collection_name == "" || collection_name == *collection_iterator)
603 {
604 break;
605 }
606
607 collection_iterator++;
608 }
609
610 ofstream logout("oai.log", ios::app);
611
612 // Now loop through the remaining collections
613 // to work out the earliest datestamp
614 while (collection_iterator != collections.end())
615 {
616 collection_name = (*collection_iterator);
617
618 text_tset metadata;
619 FilterResponse_t response;
620
621 // get timestamps from etc/oai-inf.<db> now, no longer from build.cfg
622 // request the special record with OID=OAI_EARLIESTTIMESTAMP_OID for the collection
623 bool status_ok = get_oai_info(OAI_EARLIESTTIMESTAMP_OID, collection_name, "", metadata, false, protocol, response, logout);
624
625 if(status_ok) {
626 ResultDocInfo_t doc_info = response.docInfo[0];
627 text_t collEarliestTimestamp;
628 // the timestamp we want lives in the "lastmodified" field of the OAI_EARLIESTTIMESTAMP_OID record
629 this->getLastModifiedDate(doc_info, collEarliestTimestamp);
630
631 if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
632 earliestDatestamp = collEarliestTimestamp;
633 } else if(collEarliestTimestamp < earliestDatestamp) {
634 earliestDatestamp = collEarliestTimestamp;
635 }
636 } /*else { // collection may not have oai-inf.db (yet), check build.cfg?
637 // No: no need to use the old way as fallback
638
639 ColInfoResponse_t cinfo;
640 comerror_t err;
641 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
642 if (err == noError) {
643 text_t eDatestamp = cinfo.earliestDatestamp;
644 time_t raw_time = (time_t)eDatestamp.getint();
645 eDatestamp = this->parseDatestamp(raw_time);
646
647 if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
648 earliestDatestamp = eDatestamp;
649 } else if(eDatestamp < earliestDatestamp) {
650 earliestDatestamp = eDatestamp;
651 }
652 }
653 }
654 */
655 collection_iterator++;
656
657 }
658 logout.close();
659 }
660
661 //}
662
663 // if repository's earliestDatestamp is still unset, default to unix epoch time
664 if(earliestDatestamp == "") {
665 earliestDatestamp = "1970-01-01";
666 }
667
668 this->mEarliestDatestamp = earliestDatestamp;
669 return mEarliestDatestamp;
670}
Note: See TracBrowser for help on using the repository browser.