source: main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:keywords set to Author Date Id Revision
File size: 21.0 KB
Line 
1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "oaiaction.h"
28#include "oaitools.h"
29#include "recptprototools.h"
30
31#if defined(GSDL_USE_IOS_H)
32# if defined(__WIN32__)
33# include <strstrea.h> // vc4
34# else
35# include <strstream.h>
36# endif
37#else
38# include <sstream>
39#endif
40
41#include <time.h>
42
43oaiaction::oaiaction(const text_t &name)
44{
45 this->logout = new ofstream("oai.log", ios::app);
46 this->configuration = NULL;
47 this->name = name;
48 this->mEarliestDatestamp = "";
49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56 this->errorType = "";
57 return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
69 // is it in our list?
70 if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71 return false;
72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83 text_t description = "";
84
85 if(errorType == "badArgument"){
86 description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87 }
88 else if(errorType == "noRecordsMatch"){
89 description = "No record matches all the requested parameters";
90 }
91 else if(errorType == "cannotDisseminateFormat"){
92 description = "The metadata format specified is not supported by the item or by the repository";
93 }
94 else if(errorType == "idDoesNotExist"){
95 description = "The value of the identifier is unknown or illegal in this repository";
96 }
97 else if(errorType == "badVerb"){
98 description = "Value of the verb argument is illegal, missing, or repeated";
99 }
100 else if(errorType == "noMetadataFormats"){
101 description = "There are no metadata formats available for the item";
102 }
103 else if(errorType == "badResumptionToken"){
104 description = "The value of the resumptionToken argument is invalid or expired";
105 }
106 else if(errorType == "noSetHierarchy"){
107 description = "The repository does not support sets";
108 }
109
110 output << " <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117 return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129 text_t year, month, day, lastModified;
130 tm *ptm;
131 ptm = gmtime(&rawtime);
132 int raw_month = ptm->tm_mon + 1;
133 int raw_day = ptm->tm_mday;
134
135 year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137 // Need the month in MM format, so if month is 1..9, add a 0 to the front
138 if(raw_month < 10){
139 month = "0";
140 month += raw_month;
141 }
142 else month = raw_month;
143
144 if(raw_day < 10){
145 day = "0";
146 day += raw_day;
147 }
148 else day = raw_day;
149
150 lastModified = year + "-" + month + "-" + day;
151
152 return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164 time_t rawtime;
165 tm *ptm;
166
167 time(&rawtime); // Get the epoch time
168
169 ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171 text_t month, day, hour, minute, second;
172 int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1
173 int raw_day = ptm->tm_mday;
174 int raw_hour = ptm->tm_hour;
175 int raw_minute = ptm->tm_min;
176 int raw_second = ptm->tm_sec;
177
178 // Need the month in MM format, so if month is 1..9, add a 0 to the front
179 if(raw_month < 10){
180 month = "0";
181 }
182 month += raw_month;
183
184 // Same for days, hours, minutes and seconds
185 if(raw_day < 10){
186 day = "0";
187 }
188 day += raw_day;
189
190 if(raw_hour < 10){
191 hour = "0";
192 }
193 hour += raw_hour;
194
195 if(raw_minute < 10){
196 minute = "0";
197 }
198 minute += raw_minute;
199
200 if(raw_second < 10){
201 second = "0";
202 }
203 second += raw_second;
204
205 // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206 date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207 date += "-";
208 date += month;
209 date += "-";
210 date += day;
211 date += "T";
212 date += hour;
213 date += ":";
214 date += minute;
215 date += ":";
216 date += second;
217 // If we're using v1.1, then tack on local time offset, otherwise don't
218 if(this->configuration->getOAIVersion() == 110){
219 date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220 // UTC (GMT), e.g. "+8:00", "-5:00"
221 }
222 else
223 date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232 // Iterators for moving through the list of parameters (keys) specified
233 text_tmap::const_iterator here;
234 text_tmap::const_iterator end;
235 int numArgs = params.getSize();
236
237 here = params.begin();
238 end = params.end();
239
240 text_t baseURL = this->configuration->getBaseURL();
241
242 int version = this->configuration->getOAIVersion();
243
244 switch(version){
245 case 110:
246 /* Takes the form:
247 * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248 */
249 requestURL = " <requestURL>" + baseURL;
250
251 if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252
253 // The following lines will give us the "label=value" syntax
254 requestURL += "?";
255 requestURL += here->first;
256 requestURL += "=";
257 requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
258 ++here;
259
260 while(here != end){
261 requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262 requestURL += (here->first + "=" + html_safe(here->second));
263 ++here;
264 }
265 requestURL += "</requestURL>\n";
266 break;
267
268 case 200:
269 default:
270 /* Takes the form:
271 * <request verb="someVerb" key="value" key="value">
272 * http://baseURL.com/oaimain</request>
273 */
274 if(numArgs == 0) {
275 requestURL = " <request>" + baseURL + "</request>\n";
276 break;
277 }
278 requestURL = " <request " + here->first + "=\"" + html_safe(here->second) + "\"";
279 ++here;
280 while(here != end){
281 requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");
282 ++here;
283 }
284 requestURL += ">\n " + baseURL + "</request>\n";
285 break;
286 }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295 bool error;
296 text_t date, requestURL;
297
298 // Write the response date & time into 'date'
299 this->getResponseDate(date);
300 int version = this->configuration->getOAIVersion();
301
302 // validate the action
303 error = !this->validateAction(protocol, params);
304
305 // raise an error for duplicated arguments and set the
306 // error type "manually" here...
307 if (params.hasDuplicateArg() && !error) {
308 this->errorType = "badArgument";
309 error = true;
310 }
311
312 // start with the required http header
313 if (version <= 110 && error){
314 output << "Status: 400 " << this->errorType << "\n";
315 output << "Content-Type: text/xml\n\n";
316 return;
317 }
318
319 output << "Status: 200\n";
320 output << "Content-Type: text/xml\n\n";
321
322 // output xml header parts
323 output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325 if(version <= 110){
326 // output OAI v1.1 action header tag
327 output << "<" << this->name;
328 output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
329 output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330 output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331 output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332 }
333 else {
334 text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
335 output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
336 // output OAI v2.0 action header tag
337 output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
338 << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339 << " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340 << " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341 }
342 // output current time for response
343 output << " <responseDate>" << date << "</responseDate>\n";
344
345 // output request URL. This differs depending on the OAI protocol version currently running, so
346 // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347 this->getRequestURL(params, requestURL);
348
349 output << requestURL ;
350
351 if (error == false) {
352 // a string stream to write the content of the action to; this is done so that we can
353 // avoid outputting the action tag if the action's body is unsuccessful, in which
354 // case the leading tag must be suppressed
355#if defined(GSDL_USE_IOS_H)
356 ostrstream outstream;
357#else
358 ostringstream outstream;
359#endif
360
361 // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362 //
363 // An action that outputs no content should raise an error state to suppress the
364 // matching opening and close tags if it outputs no content in OAI 2.0
365 error = !this->output_content(outstream, protocol, params);
366
367 // output the leading tag if no error occurred
368 if (error == false) {
369 if (version >= 200) {
370 this->output_action_tag(output, true);
371 }
372 }
373
374 // now output the body of the action content
375#if defined(GSDL_USE_IOS_H)
376 outstream << ends; // Ensure outstream is null-terminated correctly
377#endif
378 output << outstream.str();
379 }
380 else {
381 if (version >= 200) {
382 this->output_error(output, this->errorType);
383 }
384 }
385
386 // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387 if((version == 110) || (version >= 200 && error == false)){
388 this->output_action_tag(output, false);
389 }
390 if(version >= 200){
391 output << "</OAI-PMH>\n";
392 }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397 output << " <";
398 if (!openTag) {
399 output << "/";
400 }
401 output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
405 const text_t &deleted_status, const text_tarray &memberOf, int oaiVersion)
406{
407 if(deleted_status == "D") { // record marked as deleted
408 output << " <header status=\"deleted\">" << endl;
409 } else { // deleted_status is "" or E for exists
410 output << " <header>" << endl;
411 }
412 output << " <identifier>" << oaiLabel << "</identifier>" << endl;
413 output << " <datestamp>" << lastModified << "</datestamp>" << endl;
414
415 text_t collection_id;
416 // Find the collection id from oai:repos-id:collection:doc
417 oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
418 if(oaiVersion >= 200){
419
420 // A doc is only a member of its collection set if hte collection is valid
421 // Can have super collecitons containging collecitons that are not individually valid
422 if (this->configuration->isValidCollection(collection_id)) {
423 output << " <setSpec>" << collection_id << "</setSpec>" << endl;
424 }
425 // are we part of a super collection?
426 text_tarray super_colls = this->configuration->getSuperCollectionsForThisCollection(collection_id);
427 if (super_colls.size()!=0) {
428 text_tarray::const_iterator super_start = super_colls.begin();
429 text_tarray::const_iterator super_end = super_colls.end();
430 while(super_start != super_end) {
431 output << " <setSpec>" << *super_start << "</setSpec>" << endl;
432 ++ super_start;
433 }
434 }
435
436 // Do we have subsets?
437 text_tarray::const_iterator member = memberOf.begin();
438 text_tarray::const_iterator memEnd = memberOf.end();
439 while (member != memEnd) {
440 text_t oaiSet = *member;
441 oaiclassifier::toOAI(collection_id, oaiSet);
442 output << " <setSpec>" << oaiSet << "</setSpec>" << endl;
443 ++member;
444 }
445 }
446 output << " </header>" << endl;
447}
448
449// Method that looks for the requested metaname in the doc_info, and if found, sets the value
450// in the argument 'metavalue'.
451void oaiaction::getMeta(ResultDocInfo_t &doc_info, const text_t &metaname, text_t &metavalue)
452{
453 text_t current_metaname;
454
455 //ofstream logout("oai.log", ios::app);
456
457 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
458 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
459
460 while(current != end){
461 current_metaname = current->first;
462 lc(current_metaname); // lowercased for string comparison below
463
464 if (current_metaname == metaname && current->second.values.size() >= 1) { // found match with a value
465 metavalue = current->second.values[0];
466
467 //logout << "Looking for metaname = " << current_metaname << endl;
468 //logout << "\t\t\t Found value = " << metavalue << endl;
469 return; // done
470 }
471
472 ++current;
473 }
474
475 //logout.close();
476}
477
478void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
479{
480 text_t temp;
481
482
483 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
484 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
485
486 while(current != end){
487 temp = current->first;
488 lc(temp); // lowercased for string comparison below
489 if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
490 // assume it is correct format
491 lastModified = current->second.values[0];
492 return;
493 }
494 else{
495 if(lastModified == "" && current->second.values.size() >= 1) {
496
497 if (temp == "oaiinf.timestamp") { // new way is to store oai timestamp in oai db and get it from there
498 // check if there was a timestamp for the doc in the etc/oai-inf database
499 lastModified = current->second.values[0];
500
501 } else if (temp == "oailastmodified") { // old way, being phased out
502 // check if there was an oailastmodified timestamp for the doc in the collection index db
503 lastModified = current->second.values[0];
504 }
505
506 if(lastModified != "") { // if we've now set the lastModified value, convert it for display and return
507 time_t raw_time = (time_t)lastModified.getint();
508 lastModified = this->parseDatestamp(raw_time);
509
510 return;
511 }
512 }
513 } // else keep looking for oai timestamp
514 ++current;
515 }
516
517}
518
519bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
520 const text_t &OID, recptproto *protocol, ostream &logout)
521{
522 FilterResponse_t response;
523 text_tset metadata;
524 bool status_ok = get_oai_info(OID, collection, "", metadata, false, protocol, response, logout);
525 // get timestamp from etc/oai-inf.<db> now, no longer from index.db
526 bool not_too_early = false, not_too_recent = false;
527
528 if(status_ok) {
529 ResultDocInfo_t doc_info = response.docInfo[0];
530 text_t lastModDate;
531 this->getLastModifiedDate(doc_info, lastModDate);
532
533 // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
534 if(from != ""){
535 if(from <= lastModDate)
536 not_too_early = true;
537 }
538 else
539 not_too_early = true; // If there's no FROM field, then the record can't be too early
540
541 if(until != ""){
542 if(lastModDate <= until)
543 not_too_recent = true;
544 }
545 else
546 not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent
547
548 if(not_too_early && not_too_recent)
549 return true;
550 else
551 return false;
552 }
553 else
554 return false;
555}
556
557text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
558
559 text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
560
561 //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
562 //if(version == "2.0"){
563
564 // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
565 // repository, but we're just setting it to be the default oldest possible date - ugly, but judged
566 // not to be worth the effort of trolling through all the lastmodified dates (by others with more
567 // say than me)
568
569 // The above was before. However, now we mirror GS3 way of dealing with
570 // earliestDatestamp by going through the earliestDatestamp field of each OAI
571 // collection's build.cfg in order to work out earliestdatestamp of this Repository:
572 // by going through all the collections and getting the earliest among the
573 // "earliestDatestamp" values stored for each collection in its build.cfg
574 // (the earliestDatestamp for a collection has already been extracted from
575 // their build.cfg file at this point by collectserver::configure. The field
576 // is declared in comtypes.h)
577
578
579 // Get a list of the OAI-enabled collections available
580 text_tarray& collections = this->configuration->getCollectionsList();
581 if (collections.size() > 0)
582 {
583 // get the identifier from the params
584 text_t identifier = params["identifier"];
585 text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
586 identifier.replace(oai_OID_prefix, "");
587
588 // Get the current collection from the identifier
589 text_t collection_name = "";
590 oaiclassifier::toGSDL(collection_name, identifier);
591
592 // Find the starting collection
593 text_tarray::iterator collection_iterator = collections.begin();
594 while (collection_iterator != collections.end())
595 {
596 if (collection_name == "" || collection_name == *collection_iterator)
597 {
598 break;
599 }
600
601 collection_iterator++;
602 }
603
604 // Now loop through the remaining collections
605 // to work out the earliest datestamp
606 while (collection_iterator != collections.end())
607 {
608 collection_name = (*collection_iterator);
609
610 ColInfoResponse_t cinfo;
611 comerror_t err;
612 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
613 if (err == noError) {
614 text_t eDatestamp = cinfo.earliestDatestamp;
615 time_t raw_time = (time_t)eDatestamp.getint();
616 eDatestamp = this->parseDatestamp(raw_time);
617
618 if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
619 earliestDatestamp = eDatestamp;
620 } else if(eDatestamp < earliestDatestamp) {
621 earliestDatestamp = eDatestamp;
622 }
623 }
624 collection_iterator++;
625
626 }
627 }
628
629 //}
630
631 // if repository's earliestDatestamp is still unset, default to unix epoch time
632 if(earliestDatestamp == "") {
633 earliestDatestamp = "1970-01-01";
634 }
635
636 this->mEarliestDatestamp = earliestDatestamp;
637 return mEarliestDatestamp;
638}
Note: See TracBrowser for help on using the repository browser.