root/main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp @ 24412

Revision 24412, 18.7 KB (checked in by ak19, 9 years ago)

Fixed a new failure of the OAI validation test of GS2's OAI server: where list records is given an Until date that is earlier than the earliestDatestamp. Needs to return a noRecordsMatch. It does now. The calculation of the earliestDatestamp is now shifted to the oaiaction.cpp superclass and called by both identifyaction and listrecordsaction for the identify and listrecords OAI queries, since these tasks need to work with earliestDatestamp.

  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010  The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "oaiaction.h"
28#include "oaitools.h"
29#include "recptprototools.h"
30
31#if defined(GSDL_USE_IOS_H)
32#  if defined(__WIN32__)
33#    include <strstrea.h> // vc4
34#  else
35#    include <strstream.h>
36#  endif
37#else
38#  include <sstream>
39#endif
40
41#include <time.h>
42
43oaiaction::oaiaction(const text_t &name)
44{
45  this->logout = new ofstream("oai.log", ios::app);
46  this->configuration = NULL;
47  this->name = name;
48  this->mEarliestDatestamp = "";
49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56  this->errorType = "";
57  return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
69  // is it in our list?
70  if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71  return false;
72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83  text_t description = "";
84
85  if(errorType == "badArgument"){
86    description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87  }
88  else if(errorType == "noRecordsMatch"){
89    description = "No record matches all the requested parameters";
90  }
91  else if(errorType == "cannotDisseminateFormat"){
92    description = "The metadata format specified is not supported by the item or by the repository";
93  }
94  else if(errorType == "idDoesNotExist"){
95    description = "The value of the identifier is unknown or illegal in this repository";
96  }
97  else if(errorType == "badVerb"){
98    description = "Value of the verb argument is illegal, missing, or repeated";
99  }
100  else if(errorType == "noMetadataFormats"){
101    description = "There are no metadata formats available for the item";
102  }
103  else if(errorType == "badResumptionToken"){
104    description = "The value of the resumptionToken argument is invalid or expired";
105  }
106  else if(errorType == "noSetHierarchy"){
107    description = "The repository does not support sets";
108  }
109
110  output << "  <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117  return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129  text_t year, month, day, lastModified;
130  tm *ptm;
131  ptm = gmtime(&rawtime);
132  int raw_month = ptm->tm_mon + 1;
133  int raw_day = ptm->tm_mday;
134
135  year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137  // Need the month in MM format, so if month is 1..9, add a 0 to the front
138  if(raw_month < 10){   
139    month = "0";
140    month += raw_month;
141  }
142  else month = raw_month;
143
144  if(raw_day < 10){   
145    day  = "0";
146    day += raw_day;
147  }
148  else day = raw_day;
149 
150  lastModified = year + "-" + month + "-" + day;
151
152  return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164  time_t rawtime;
165  tm    *ptm;
166
167  time(&rawtime);         // Get the epoch time
168
169  ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171  text_t month, day, hour, minute, second;
172  int    raw_month  = ptm->tm_mon + 1;  // Note Jan = 0 ... Dec = 11, so add 1
173  int    raw_day    = ptm->tm_mday;
174  int    raw_hour   = ptm->tm_hour;
175  int    raw_minute = ptm->tm_min;
176  int    raw_second = ptm->tm_sec;
177
178  // Need the month in MM format, so if month is 1..9, add a 0 to the front
179  if(raw_month < 10){   
180    month = "0";
181  }
182  month += raw_month;
183 
184  // Same for days, hours, minutes and seconds
185  if(raw_day < 10){   
186    day = "0";
187  }
188  day += raw_day;
189
190  if(raw_hour < 10){   
191    hour = "0";
192  }
193  hour += raw_hour;
194
195  if(raw_minute < 10){   
196    minute = "0";
197  }
198  minute += raw_minute;
199 
200  if(raw_second < 10){   
201    second = "0";
202  }
203  second += raw_second;
204 
205  // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206  date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207  date += "-";
208  date += month;
209  date += "-";
210  date += day;
211  date += "T";
212  date += hour;
213  date += ":";
214  date += minute;
215  date += ":";
216  date += second;
217  // If we're using v1.1, then tack on local time offset, otherwise don't
218  if(this->configuration->getOAIVersion() == 110){
219    date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220                         // UTC (GMT), e.g. "+8:00", "-5:00"
221  }
222  else
223    date += "Z";         // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232  // Iterators for moving through the list of parameters (keys) specified
233  text_tmap::const_iterator here;
234  text_tmap::const_iterator end;
235  int numArgs = params.getSize();
236
237  here = params.begin();
238  end  = params.end();
239 
240  text_t baseURL = this->configuration->getBaseURL();
241
242  int version = this->configuration->getOAIVersion();
243 
244  switch(version){
245  case 110:
246    /* Takes the form:
247     * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248     */
249    requestURL = "  <requestURL>" + baseURL;
250   
251    if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252   
253    // The following lines will give us the "label=value" syntax
254    requestURL += "?";
255    requestURL += here->first;
256    requestURL += "=";
257    requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
258    ++here;
259   
260    while(here != end){
261      requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262      requestURL += (here->first + "=" + html_safe(here->second));
263      ++here;
264    }
265    requestURL += "</requestURL>\n";
266    break;
267 
268  case 200:
269  default:
270    /* Takes the form:
271     * <request verb="someVerb" key="value" key="value">
272     *          http://baseURL.com/oaimain</request>
273     */
274    if(numArgs == 0) {
275      requestURL = "  <request>" + baseURL + "</request>\n";
276      break;
277    }
278    requestURL = "  <request " + here->first + "=\"" + html_safe(here->second) + "\"";
279    ++here;
280    while(here != end){
281      requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");     
282      ++here;
283    }
284    requestURL += ">\n           " + baseURL + "</request>\n";
285    break;
286  }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295  bool   error;
296  text_t date, requestURL;
297
298  // Write the response date & time into 'date'
299  this->getResponseDate(date);
300  int version = this->configuration->getOAIVersion();
301
302  // validate the action
303  error = !this->validateAction(protocol, params);
304
305  // raise an error for duplicated arguments and set the
306  // error type "manually" here...
307  if (params.hasDuplicateArg() && !error) {
308    this->errorType = "badArgument";
309    error = true;
310  }
311
312  // start with the required http header
313  if (version <= 110 && error){
314    output << "Status: 400 " << this->errorType << "\n";
315    output << "Content-Type: text/xml\n\n";
316    return;
317  }
318 
319  output << "Status: 200\n";
320  output << "Content-Type: text/xml\n\n";
321 
322  // output xml header parts
323  output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325  if(version <= 110){
326    // output OAI v1.1 action header tag
327    output << "<" << this->name;
328    output << "\n       xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
329    output << "\n       xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330    output << "\n       xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331    output << "\n           http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332  }
333  else {
334    text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
335    output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
336    // output OAI v2.0 action header tag
337    output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
338       << "         xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339       << "         xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340       << "             http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341  }
342  // output current time for response
343  output << "  <responseDate>" << date << "</responseDate>\n";
344 
345  // output request URL. This differs depending on the OAI protocol version currently running, so
346  // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347  this->getRequestURL(params, requestURL);
348
349  output << requestURL ;
350
351  if (error == false) {
352    // a string stream to write the content of the action to; this is done so that we can
353    // avoid outputting the action tag if the action's body is unsuccessful, in which
354    // case the leading tag must be suppressed
355#if defined(GSDL_USE_IOS_H)
356    ostrstream outstream;
357#else
358    ostringstream outstream;
359#endif
360
361    // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362    //
363    // An action that outputs no content should raise an error state to suppress the
364    // matching opening and close tags if it outputs no content in OAI 2.0
365    error = !this->output_content(outstream, protocol, params);
366
367    // output the leading tag if no error occurred
368    if (error == false) {
369      if (version >= 200) {
370    this->output_action_tag(output, true);
371      }
372    }
373
374    // now output the body of the action content
375#if defined(GSDL_USE_IOS_H)
376    outstream << ends;  // Ensure outstream is null-terminated correctly
377#endif
378    output << outstream.str();
379  }
380  else {
381    if (version >= 200) {
382      this->output_error(output, this->errorType);
383    }
384  }
385
386  // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387  if((version == 110) || (version >= 200 && error == false)){
388    this->output_action_tag(output, false);
389  }
390  if(version >= 200){
391    output << "</OAI-PMH>\n";
392  }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397  output << " <";
398  if (!openTag) {
399    output << "/";
400  }
401  output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
405                     const text_tarray &memberOf, int oaiVersion)
406{
407    output << "    <header>" << endl;
408    output << "      <identifier>" << oaiLabel     << "</identifier>" << endl;
409    output << "      <datestamp>"  << lastModified << "</datestamp>" << endl;
410   
411    text_t collection_id;
412    // Find the collection id from oai:repos-id:collection:doc
413    oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
414    if(oaiVersion >= 200){
415      text_tarray::const_iterator member = memberOf.begin();
416      text_tarray::const_iterator memEnd = memberOf.end();
417
418      // As well as all the subsets that a doc appears in, it is also a member of the 'collection' set
419      output << "      <setSpec>" << collection_id << "</setSpec>" << endl;
420      while (member != memEnd) {
421    text_t oaiSet = *member;
422    oaiclassifier::toOAI(collection_id, oaiSet);
423    output << "      <setSpec>" << oaiSet << "</setSpec>" << endl;
424    ++member;
425      }
426    }
427    output << "    </header>" << endl;
428}
429
430void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
431{
432  text_t temp;
433
434 
435  MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
436  MetadataInfo_tmap::iterator end = doc_info.metadata.end();
437
438  while(current != end){
439    temp = current->first;
440    lc(temp); // lowercased for string comparison below
441    if(temp == "gs.oaidatestamp" && current->second.values[0] != "") { // user specified a (non-empty) oaidatestamp as gs metadata
442        // assume it is correct format
443        lastModified = current->second.values[0];
444        return;
445    }
446    else{
447      if (temp == "oailastmodified" && lastModified == "" && current->second.values.size() >= 1) {
448    lastModified = current->second.values[0];
449    time_t raw_time = (time_t)lastModified.getint();
450    lastModified = this->parseDatestamp(raw_time);
451      }
452    }
453    ++current;
454  } 
455}
456
457bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
458                const text_t &OID, recptproto *protocol, ostream &logout)
459{
460  FilterResponse_t response;
461  text_tset        metadata;
462  bool status_ok = get_info(OID, collection, "", metadata, false, protocol, response, logout);
463  bool not_too_early = false, not_too_recent = false;
464
465  if(status_ok) {
466    ResultDocInfo_t doc_info = response.docInfo[0];
467    text_t lastModDate;
468    this->getLastModifiedDate(doc_info, lastModDate);
469   
470    // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
471    if(from != ""){
472      if(from <= lastModDate)
473    not_too_early = true;
474    }
475    else
476      not_too_early = true; // If there's no FROM field, then the record can't be too early
477
478    if(until != ""){
479      if(lastModDate <= until)
480    not_too_recent = true;
481    }
482    else
483      not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent   
484   
485    if(not_too_early && not_too_recent)
486      return true;
487    else
488      return false;
489  }
490  else
491    return false;
492}
493
494text_t oaiaction::calcEarliestDatestamp(recptproto *protocol, oaiargs &params) {
495   
496    text_t earliestDatestamp = ""; // do not set default to unix epoch time "1970-01-01" yet
497   
498    //text_t version = (this->configuration->getOAIVersion() <= 110) ? (text_t)"1.1":(text_t)"2.0";
499    //if(version == "2.0"){
500   
501    // earliestDatestamp *should* be the YYYY-MM-DD format of the oldest lastmodified record in the
502    // repository, but we're just setting it to be the default oldest possible date - ugly, but judged
503    // not to be worth the effort of trolling through all the lastmodified dates (by others with more
504    // say than me)
505   
506    // The above was before. However, now we mirror GS3 way of dealing with
507    // earliestDatestamp by going through the earliestDatestamp field of each OAI
508    // collection's build.cfg in order to work out earliestdatestamp of this Repository:
509    // by going through all the collections and getting the earliest among the
510    // "earliestDatestamp" values stored for each collection in its build.cfg
511    // (the earliestDatestamp for a collection has already been extracted from
512    // their build.cfg file at this point by collectserver::configure. The field
513    // is declared in comtypes.h)   
514   
515   
516    // Get a list of the OAI-enabled collections available
517    text_tarray& collections = this->configuration->getCollectionsList();
518    if (collections.size() > 0)
519    {   
520        // get the identifier from the params
521        text_t identifier = params["identifier"];
522        text_t oai_OID_prefix = "oai:"+this->configuration->getRepositoryId()+":";
523        identifier.replace(oai_OID_prefix, "");
524
525        // Get the current collection from the identifier
526        text_t collection_name = "";
527        oaiclassifier::toGSDL(collection_name, identifier);
528
529        // Find the starting collection
530        text_tarray::iterator collection_iterator = collections.begin();
531        while (collection_iterator != collections.end())
532        {
533            if (collection_name == "" || collection_name == *collection_iterator)
534            {
535              break;
536            }
537
538            collection_iterator++;
539        }
540       
541        // Now loop through the remaining collections
542        // to work out the earliest datestamp
543        while (collection_iterator != collections.end())
544        {
545            collection_name = (*collection_iterator);
546           
547            ColInfoResponse_t cinfo;
548            comerror_t err;
549            protocol->get_collectinfo(collection_name, cinfo, err, cerr);
550            if (err == noError) {               
551                text_t eDatestamp = cinfo.earliestDatestamp;
552                time_t raw_time = (time_t)eDatestamp.getint();
553                eDatestamp = this->parseDatestamp(raw_time);
554               
555                if(earliestDatestamp == "") { // first earliestdatestamp we've seen for an oai collection
556                    earliestDatestamp = eDatestamp;
557                } else if(eDatestamp < earliestDatestamp) {
558                    earliestDatestamp = eDatestamp;
559                }
560            }
561            collection_iterator++;
562           
563        }       
564    }
565       
566    //}
567   
568    // if repository's earliestDatestamp is still unset, default to unix epoch time
569    if(earliestDatestamp == "") {       
570        earliestDatestamp = "1970-01-01";
571    }
572   
573    this->mEarliestDatestamp = earliestDatestamp;
574    return mEarliestDatestamp;
575}
Note: See TracBrowser for help on using the browser.