source: main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp@ 31390

Last change on this file since 31390 was 31390, checked in by ak19, 7 years ago

Improved newly added oaiaction::getMeta(). Now return type is a bool and it now uses map.find() to locate requested meta.

  • Property svn:keywords set to Author Date Id Revision
File size: 21.2 KB
RevLine 
[22739]1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
[8182]27#include "oaiaction.h"
28#include "oaitools.h"
[15428]29#include "recptprototools.h"
[8182]30
[8241]31#if defined(GSDL_USE_IOS_H)
32# if defined(__WIN32__)
33# include <strstrea.h> // vc4
34# else
35# include <strstream.h>
36# endif
37#else
38# include <sstream>
39#endif
[8182]40
41#include <time.h>
42
43oaiaction::oaiaction(const text_t &name)
44{
45 this->logout = new ofstream("oai.log", ios::app);
46 this->configuration = NULL;
47 this->name = name;
[24412]48 this->mEarliestDatestamp = "";
[8182]49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56 this->errorType = "";
57 return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
[18893]69 // is it in our list?
70 if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71 return false;
[8182]72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83 text_t description = "";
84
85 if(errorType == "badArgument"){
86 description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87 }
88 else if(errorType == "noRecordsMatch"){
89 description = "No record matches all the requested parameters";
90 }
91 else if(errorType == "cannotDisseminateFormat"){
92 description = "The metadata format specified is not supported by the item or by the repository";
93 }
94 else if(errorType == "idDoesNotExist"){
95 description = "The value of the identifier is unknown or illegal in this repository";
96 }
97 else if(errorType == "badVerb"){
98 description = "Value of the verb argument is illegal, missing, or repeated";
99 }
100 else if(errorType == "noMetadataFormats"){
101 description = "There are no metadata formats available for the item";
102 }
103 else if(errorType == "badResumptionToken"){
104 description = "The value of the resumptionToken argument is invalid or expired";
105 }
106 else if(errorType == "noSetHierarchy"){
107 description = "The repository does not support sets";
108 }
109
110 output << " <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117 return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129 text_t year, month, day, lastModified;
130 tm *ptm;
131 ptm = gmtime(&rawtime);
132 int raw_month = ptm->tm_mon + 1;
133 int raw_day = ptm->tm_mday;
134
135 year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137 // Need the month in MM format, so if month is 1..9, add a 0 to the front
138 if(raw_month < 10){
139 month = "0";
140 month += raw_month;
141 }
142 else month = raw_month;
143
144 if(raw_day < 10){
145 day = "0";
146 day += raw_day;
147 }
148 else day = raw_day;
149
150 lastModified = year + "-" + month + "-" + day;
151
152 return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164 time_t rawtime;
165 tm *ptm;
166
167 time(&rawtime); // Get the epoch time
168
169 ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171 text_t month, day, hour, minute, second;
172 int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1
173 int raw_day = ptm->tm_mday;
174 int raw_hour = ptm->tm_hour;
175 int raw_minute = ptm->tm_min;
176 int raw_second = ptm->tm_sec;
177
178 // Need the month in MM format, so if month is 1..9, add a 0 to the front
179 if(raw_month < 10){
180 month = "0";
181 }
182 month += raw_month;
183
184 // Same for days, hours, minutes and seconds
185 if(raw_day < 10){
186 day = "0";
187 }
188 day += raw_day;
189
190 if(raw_hour < 10){
191 hour = "0";
192 }
193 hour += raw_hour;
194
195 if(raw_minute < 10){
196 minute = "0";
197 }
198 minute += raw_minute;
199
200 if(raw_second < 10){
201 second = "0";
202 }
203 second += raw_second;
204
205 // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206 date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207 date += "-";
208 date += month;
209 date += "-";
210 date += day;
211 date += "T";
212 date += hour;
213 date += ":";
214 date += minute;
215 date += ":";
216 date += second;
217 // If we're using v1.1, then tack on local time offset, otherwise don't
218 if(this->configuration->getOAIVersion() == 110){
219 date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220 // UTC (GMT), e.g. "+8:00", "-5:00"
221 }
222 else
223 date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232 // Iterators for moving through the list of parameters (keys) specified
233 text_tmap::const_iterator here;
234 text_tmap::const_iterator end;
235 int numArgs = params.getSize();
236
237 here = params.begin();
238 end = params.end();
239
[22213]240 text_t baseURL = this->configuration->getBaseURL();
[8182]241
242 int version = this->configuration->getOAIVersion();
243
244 switch(version){
245 case 110:
246 /* Takes the form:
247 * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248 */
[14282]249 requestURL = " <requestURL>" + baseURL;
[8182]250
251 if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252
253 // The following lines will give us the "label=value" syntax
254 requestURL += "?";
255 requestURL += here->first;
256 requestURL += "=";
257 requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
[9608]258 ++here;
[8182]259
260 while(here != end){
261 requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262 requestURL += (here->first + "=" + html_safe(here->second));
[9608]263 ++here;
[8182]264 }
265 requestURL += "</requestURL>\n";
266 break;
267
268 case 200:
269 default:
270 /* Takes the form:
271 * <request verb="someVerb" key="value" key="value">
272 * http://baseURL.com/oaimain</request>
273 */
274 if(numArgs == 0) {
[14282]275 requestURL = " <request>" + baseURL + "</request>\n";
[8182]276 break;
277 }
[15196]278 requestURL = " <request " + here->first + "=\"" + html_safe(here->second) + "\"";
[9608]279 ++here;
[8182]280 while(here != end){
[15196]281 requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");
[9608]282 ++here;
[8182]283 }
[14282]284 requestURL += ">\n " + baseURL + "</request>\n";
[8182]285 break;
286 }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295 bool error;
296 text_t date, requestURL;
297
298 // Write the response date & time into 'date'
299 this->getResponseDate(date);
300 int version = this->configuration->getOAIVersion();
301
302 // validate the action
303 error = !this->validateAction(protocol, params);
304
305 // raise an error for duplicated arguments and set the
306 // error type "manually" here...
307 if (params.hasDuplicateArg() && !error) {
308 this->errorType = "badArgument";
309 error = true;
310 }
311
312 // start with the required http header
313 if (version <= 110 && error){
314 output << "Status: 400 " << this->errorType << "\n";
315 output << "Content-Type: text/xml\n\n";
316 return;
317 }
318
319 output << "Status: 200\n";
320 output << "Content-Type: text/xml\n\n";
321
322 // output xml header parts
323 output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325 if(version <= 110){
326 // output OAI v1.1 action header tag
327 output << "<" << this->name;
[15192]328 output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
[8182]329 output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330 output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331 output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332 }
333 else {
[23234]334 text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
[19124]335 output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
[8182]336 // output OAI v2.0 action header tag
[15192]337 output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
[8182]338 << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339 << " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340 << " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341 }
342 // output current time for response
343 output << " <responseDate>" << date << "</responseDate>\n";
344
345 // output request URL. This differs depending on the OAI protocol version currently running, so
346 // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347 this->getRequestURL(params, requestURL);
348
349 output << requestURL ;
350
351 if (error == false) {
352 // a string stream to write the content of the action to; this is done so that we can
353 // avoid outputting the action tag if the action's body is unsuccessful, in which
354 // case the leading tag must be suppressed
[8306]355#if defined(GSDL_USE_IOS_H)
356 ostrstream outstream;
357#else
[8182]358 ostringstream outstream;
[8306]359#endif
[8182]360
361 // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362 //
363 // An action that outputs no content should raise an error state to suppress the
364 // matching opening and close tags if it outputs no content in OAI 2.0
365 error = !this->output_content(outstream, protocol, params);
366
367 // output the leading tag if no error occurred
368 if (error == false) {
369 if (version >= 200) {
370 this->output_action_tag(output, true);
371 }
372 }
373
374 // now output the body of the action content
[8316]375#if defined(GSDL_USE_IOS_H)
[8308]376 outstream << ends; // Ensure outstream is null-terminated correctly
[8316]377#endif
[8182]378 output << outstream.str();
379 }
380 else {
381 if (version >= 200) {
382 this->output_error(output, this->errorType);
383 }
384 }
385
386 // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387 if((version == 110) || (version >= 200 && error == false)){
388 this->output_action_tag(output, false);
389 }
390 if(version >= 200){
391 output << "</OAI-PMH>\n";
392 }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397 output << " <";
398 if (!openTag) {
399 output << "/";
400 }
401 output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
[31387]405 const text_t &deleted_status, const text_tarray &memberOf, int oaiVersion)
[8182]406{
[31387]407 if(deleted_status == "D") { // record marked as deleted
408 output << " <header status=\"deleted\">" << endl;
409 } else { // deleted_status is "" or E for exists
[8182]410 output << " <header>" << endl;
[31387]411 }
[8182]412 output << " <identifier>" << oaiLabel << "</identifier>" << endl;
413 output << " <datestamp>" << lastModified << "</datestamp>" << endl;
414
[22289]415 text_t collection_id;
416 // Find the collection id from oai:repos-id:collection:doc
417 oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
[8182]418 if(oaiVersion >= 200){
[27528]419
[27534]420 // A doc is only a member of its collection set if hte collection is valid
421 // Can have super collecitons containging collecitons that are not individually valid
422 if (this->configuration->isValidCollection(collection_id)) {
423 output << " <setSpec>" << collection_id << "</setSpec>" << endl;
424 }
[27528]425 // are we part of a super collection?
426 text_tarray super_colls = this->configuration->getSuperCollectionsForThisCollection(collection_id);
427 if (super_colls.size()!=0) {
428 text_tarray::const_iterator super_start = super_colls.begin();
429 text_tarray::const_iterator super_end = super_colls.end();
430 while(super_start != super_end) {
431 output << " <setSpec>" << *super_start << "</setSpec>" << endl;
432 ++ super_start;
433 }
434 }
435
436 // Do we have subsets?
[8182]437 text_tarray::const_iterator member = memberOf.begin();
438 text_tarray::const_iterator memEnd = memberOf.end();
439 while (member != memEnd) {
440 text_t oaiSet = *member;
[22289]441 oaiclassifier::toOAI(collection_id, oaiSet);
[8182]442 output << " <setSpec>" << oaiSet << "</setSpec>" << endl;
[9608]443 ++member;
[8182]444 }
445 }
446 output << " </header>" << endl;
447}
448
[31387]449// Method that looks for the requested metaname in the doc_info, and if found, sets the value
450// in the argument 'metavalue'.
[31390]451bool oaiaction::getMeta(ResultDocInfo_t &doc_info, const text_t &metaname, text_t &metavalue)
[31387]452{
453 //ofstream logout("oai.log", ios::app);
[31390]454 //logout << "oaiaction::getMeta(): Looking for metaname = " << metaname << endl;
[31387]455
[31390]456 // use map::find, rather than testing map["array-index"], as the latter will create that index and
457 // insert an empty value into it, when we just want to test for it.
458 // See http://www.cplusplus.com/reference/map/map/operator[]/
459 // See also http://stackoverflow.com/questions/1939953/how-to-find-if-a-given-key-exists-in-a-c-stdmap
[31387]460
[31390]461 if(doc_info.metadata.find(metaname) == doc_info.metadata.end()) {
462 //logout << "\t\t\t Could not find meta " << metaname << endl;
463 //logout.close();
464 return false;
465 } else { // found meta
466 metavalue = doc_info.metadata[metaname].values[0];
467 //logout << "\t\t\t Found value = " << metavalue << endl;
468 //logout.close();
469 return true;
[31387]470 }
[31390]471
[31387]472}
473
[8182]474void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
475{
476 text_t temp;
477
478
479 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
480 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
481
482 while(current != end){
483 temp = current->first;
[24109]484 lc(temp); // lowercased for string comparison below
485 if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
486 // assume it is correct format
487 lastModified = current->second.values[0];
488 return;
[8182]489 }
490 else{
[31387]491 if(lastModified == "" && current->second.values.size() >= 1) {
492
493 if (temp == "oaiinf.timestamp") { // new way is to store oai timestamp in oai db and get it from there
494 // check if there was a timestamp for the doc in the etc/oai-inf database
495 lastModified = current->second.values[0];
496
497 } else if (temp == "oailastmodified") { // old way, being phased out
498 // check if there was an oailastmodified timestamp for the doc in the collection index db
499 lastModified = current->second.values[0];
500 }
501
502 if(lastModified != "") { // if we've now set the lastModified value, convert it for display and return
503 time_t raw_time = (time_t)lastModified.getint();
504 lastModified = this->parseDatestamp(raw_time);
505
506 return;
507 }
[8182]508 }
[31387]509 } // else keep looking for oai timestamp
[9608]510 ++current;
[8182]511 }
[31387]512
[8182]513}
514
515bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
516 const text_t &OID, recptproto *protocol, ostream &logout)
517{
518 FilterResponse_t response;
519 text_tset metadata;
[31387]520 bool status_ok = get_oai_info(OID, collection, "", metadata, false, protocol, response, logout);
521 // get timestamp from etc/oai-inf.<db> now, no longer from index.db
[8182]522 bool not_too_early = false, not_too_recent = false;
523
524 if(status_ok) {
525 ResultDocInfo_t doc_info = response.docInfo[0];
526 text_t lastModDate;
527 this->getLastModifiedDate(doc_info, lastModDate);
528
529 // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
530 if(from != ""){
531 if(from <= lastModDate)
532 not_too_early = true;
533 }
534 else
535 not_too_early = true; // If there's no FROM field, then the record can't be too early
536
537 if(until != ""){
538 if(lastModDate <= until)
539 not_too_recent = true;
540 }
541 else
542 not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent
543
544 if(not_too_early && not_too_recent)
545 return true;
546 else
547 return false;
548 }
549 else
550 return false;
551}
[24412]552
553text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
554
555 text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
556
557 //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
558 //if(version == "2.0"){
559
560 // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
561 // repository, but we're just setting it to be the default oldest possible date - ugly, but judged
562 // not to be worth the effort of trolling through all the lastmodified dates (by others with more
563 // say than me)
564
565 // The above was before. However, now we mirror GS3 way of dealing with
566 // earliestDatestamp by going through the earliestDatestamp field of each OAI
567 // collection's build.cfg in order to work out earliestdatestamp of this Repository:
568 // by going through all the collections and getting the earliest among the
569 // "earliestDatestamp" values stored for each collection in its build.cfg
570 // (the earliestDatestamp for a collection has already been extracted from
571 // their build.cfg file at this point by collectserver::configure. The field
572 // is declared in comtypes.h)
573
574
575 // Get a list of the OAI-enabled collections available
576 text_tarray& collections = this->configuration->getCollectionsList();
577 if (collections.size() > 0)
578 {
579 // get the identifier from the params
580 text_t identifier = params["identifier"];
581 text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
582 identifier.replace(oai_OID_prefix, "");
583
584 // Get the current collection from the identifier
585 text_t collection_name = "";
586 oaiclassifier::toGSDL(collection_name, identifier);
587
588 // Find the starting collection
589 text_tarray::iterator collection_iterator = collections.begin();
590 while (collection_iterator != collections.end())
591 {
592 if (collection_name == "" || collection_name == *collection_iterator)
593 {
594 break;
595 }
596
597 collection_iterator++;
598 }
599
600 // Now loop through the remaining collections
601 // to work out the earliest datestamp
602 while (collection_iterator != collections.end())
603 {
604 collection_name = (*collection_iterator);
605
606 ColInfoResponse_t cinfo;
607 comerror_t err;
608 protocol->get_collectinfo(collection_name, cinfo, err, cerr);
609 if (err == noError) {
610 text_t eDatestamp = cinfo.earliestDatestamp;
611 time_t raw_time = (time_t)eDatestamp.getint();
612 eDatestamp = this->parseDatestamp(raw_time);
613
614 if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
615 earliestDatestamp = eDatestamp;
616 } else if(eDatestamp < earliestDatestamp) {
617 earliestDatestamp = eDatestamp;
618 }
619 }
620 collection_iterator++;
621
622 }
623 }
624
625 //}
626
627 // if repository's earliestDatestamp is still unset, default to unix epoch time
628 if(earliestDatestamp == "") {
629 earliestDatestamp = "1970-01-01";
630 }
631
632 this->mEarliestDatestamp = earliestDatestamp;
633 return mEarliestDatestamp;
[24871]634}
Note: See TracBrowser for help on using the repository browser.