source: main/trunk/greenstone2/runtime-src/src/oaiservr/oaiaction.cpp@ 24108

Last change on this file since 24108 was 24108, checked in by ak19, 13 years ago

Preliminary changes that were needed to get the GS2 OAIserver validated again: 1. IDs were OAI-IDs that needed their OAI prefixes removed before we can do a look up in Greenstone for those IDs; 2. lastmodified date was never retrieved since gs.oaidatestamp was retrieved even when this was empty, and so retrieval of date ranges were failing. (To get their OAI server validated, users need to change etc\oai.cfg to set repositoryId and optionally repositoryName, and provide emails for the creator and maintainer fields in the collect.cfg files of those of their collections to be visible over OAI, and use the GSI app to allow external connections to get it validated.) With the current commit, GS2's OAIserver validates successfully against the official online validator, when the validator is given the full URL (containing full domain) to a running GS server's oaiserver.cgi page. In the next commits, will be using oailastmodified(date) and working out the earliestOAIDateStamp as GS3 has been made to do this now.

  • Property svn:keywords set to Author Date Id Revision
File size: 15.7 KB
Line 
1/**********************************************************************
2 *
3 * oaiaction.cpp --
4 *
5 * Copyright (C) 2004-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "oaiaction.h"
28#include "oaitools.h"
29#include "recptprototools.h"
30
31#if defined(GSDL_USE_IOS_H)
32# if defined(__WIN32__)
33# include <strstrea.h> // vc4
34# else
35# include <strstream.h>
36# endif
37#else
38# include <sstream>
39#endif
40
41#include <time.h>
42
43
44oaiaction::oaiaction(const text_t &name)
45{
46 this->logout = new ofstream("oai.log", ios::app);
47 this->configuration = NULL;
48 this->name = name;
49}
50
51//----------------------------------------------------------------------------------------------
52
53// Over-ridden by child classes
54bool oaiaction::validateAction(recptproto *protocol, oaiargs &params)
55{
56 this->errorType = "";
57 return true;
58}
59
60//----------------------------------------------------------------------------------------------
61
62/**********
63 * Compare the supplied metadataPrefix to all those that
64 * are supported. If there is NO match, return true. If
65 * it DOES match one, return false.
66 */
67bool oaiaction::formatNotSupported(text_t &metaFormat)
68{
69 // is it in our list?
70 if (this->configuration->getMetadataSet().count(metaFormat) == 0) return true;
71 return false;
72}
73
74//----------------------------------------------------------------------------------------------
75
76/**********
77 * Function for outputting the appropriate error(s) with the (version 2.0) request.
78 * The error(s) MUST be included in the response, and take the form:
79 * <error code="errorType">Description of error</error>
80 */
81void oaiaction::output_error(ostream &output, text_t &errorType)
82{
83 text_t description = "";
84
85 if(errorType == "badArgument"){
86 description = "The request includes illegal arguments, is missing required arguments, repeats arguments or the value for an argument has an illegal syntax";
87 }
88 else if(errorType == "noRecordsMatch"){
89 description = "No record matches all the requested parameters";
90 }
91 else if(errorType == "cannotDisseminateFormat"){
92 description = "The metadata format specified is not supported by the item or by the repository";
93 }
94 else if(errorType == "idDoesNotExist"){
95 description = "The value of the identifier is unknown or illegal in this repository";
96 }
97 else if(errorType == "badVerb"){
98 description = "Value of the verb argument is illegal, missing, or repeated";
99 }
100 else if(errorType == "noMetadataFormats"){
101 description = "There are no metadata formats available for the item";
102 }
103 else if(errorType == "badResumptionToken"){
104 description = "The value of the resumptionToken argument is invalid or expired";
105 }
106 else if(errorType == "noSetHierarchy"){
107 description = "The repository does not support sets";
108 }
109
110 output << " <error code=\"" << errorType << "\">" << description << "</error>\n";
111}
112
113//----------------------------------------------------------------------------------------------
114
115text_t oaiaction::getName()
116{
117 return this->name;
118}
119
120//----------------------------------------------------------------------------------------------
121
122/**********
123 * Used in version 2.0 to provide the <datestamp> tag for each document. The function is passed
124 * the 'last modified' date of the file in epoch time (time in seconds since 1970-01-01 on Unix
125 * systems) and converts it to YYYY-MM-DD format.
126 */
127text_t oaiaction::parseDatestamp(time_t &rawtime)
128{
129 text_t year, month, day, lastModified;
130 tm *ptm;
131 ptm = gmtime(&rawtime);
132 int raw_month = ptm->tm_mon + 1;
133 int raw_day = ptm->tm_mday;
134
135 year = (ptm->tm_year + 1900); // Takes off 1900 for year by default, so replace it to get YYYY format
136
137 // Need the month in MM format, so if month is 1..9, add a 0 to the front
138 if(raw_month < 10){
139 month = "0";
140 month += raw_month;
141 }
142 else month = raw_month;
143
144 if(raw_day < 10){
145 day = "0";
146 day += raw_day;
147 }
148 else day = raw_day;
149
150 lastModified = year + "-" + month + "-" + day;
151
152 return lastModified;
153}
154
155//----------------------------------------------------------------------------------------------
156/**********
157 * Used by both versions to get the date & time of the client's request. The <responseDate> tag is
158 * in the format YYYY-MM-DDTHH:MM:SSZ, where T is simply the letter 'T' and Z is the local offset from
159 * GMT (now UTC). Examples of Z are +12:00 (NZ) or -03:00 for version 1.1. In version 2.0, the time
160 * is expected to be in UTC, and Z should simply be the character 'Z'.
161 */
162void oaiaction::getResponseDate(text_t &date)
163{
164 time_t rawtime;
165 tm *ptm;
166
167 time(&rawtime); // Get the epoch time
168
169 ptm = gmtime(&rawtime); // Convert to UTC-time formatted tm object
170
171 text_t month, day, hour, minute, second;
172 int raw_month = ptm->tm_mon + 1; // Note Jan = 0 ... Dec = 11, so add 1
173 int raw_day = ptm->tm_mday;
174 int raw_hour = ptm->tm_hour;
175 int raw_minute = ptm->tm_min;
176 int raw_second = ptm->tm_sec;
177
178 // Need the month in MM format, so if month is 1..9, add a 0 to the front
179 if(raw_month < 10){
180 month = "0";
181 }
182 month += raw_month;
183
184 // Same for days, hours, minutes and seconds
185 if(raw_day < 10){
186 day = "0";
187 }
188 day += raw_day;
189
190 if(raw_hour < 10){
191 hour = "0";
192 }
193 hour += raw_hour;
194
195 if(raw_minute < 10){
196 minute = "0";
197 }
198 minute += raw_minute;
199
200 if(raw_second < 10){
201 second = "0";
202 }
203 second += raw_second;
204
205 // Want YYYY-MM-DDTHH:MM:SS+UTC, where UTC is num hours from UTC(GMT) to localtime
206 date += (ptm->tm_year + 1900); // Takes off 1900 for year, so replace it to get YYYY format
207 date += "-";
208 date += month;
209 date += "-";
210 date += day;
211 date += "T";
212 date += hour;
213 date += ":";
214 date += minute;
215 date += ":";
216 date += second;
217 // If we're using v1.1, then tack on local time offset, otherwise don't
218 if(this->configuration->getOAIVersion() == 110){
219 date += _LOCALTIME_; // Defined in oaiaction.h. States how many hours localtime is from
220 // UTC (GMT), e.g. "+8:00", "-5:00"
221 }
222 else
223 date += "Z"; // If v2.0, we put 'Z' on the end rather than the localtime offset
224}
225
226//----------------------------------------------------------------------------------------------
227/**********
228 * Does different request tags depending on the version of the OAI protocol running
229 */
230void oaiaction::getRequestURL(oaiargs &params, text_t &requestURL)
231{
232 // Iterators for moving through the list of parameters (keys) specified
233 text_tmap::const_iterator here;
234 text_tmap::const_iterator end;
235 int numArgs = params.getSize();
236
237 here = params.begin();
238 end = params.end();
239
240 text_t baseURL = this->configuration->getBaseURL();
241
242 int version = this->configuration->getOAIVersion();
243
244 switch(version){
245 case 110:
246 /* Takes the form:
247 * <requestURL>http://baseURL.com/oaimain?verb="someVerb"&amp;key=value</requestURL>
248 */
249 requestURL = " <requestURL>" + baseURL;
250
251 if(numArgs == 0) break; // If no args, all done - the error will be picked up later
252
253 // The following lines will give us the "label=value" syntax
254 requestURL += "?";
255 requestURL += here->first;
256 requestURL += "=";
257 requestURL += html_safe(here->second); // parse the argument to ensure it is URL-encoding compliant
258 ++here;
259
260 while(here != end){
261 requestURL +="&amp;"; // Stick in the ampersand in URL encoding
262 requestURL += (here->first + "=" + html_safe(here->second));
263 ++here;
264 }
265 requestURL += "</requestURL>\n";
266 break;
267
268 case 200:
269 default:
270 /* Takes the form:
271 * <request verb="someVerb" key="value" key="value">
272 * http://baseURL.com/oaimain</request>
273 */
274 if(numArgs == 0) {
275 requestURL = " <request>" + baseURL + "</request>\n";
276 break;
277 }
278 requestURL = " <request " + here->first + "=\"" + html_safe(here->second) + "\"";
279 ++here;
280 while(here != end){
281 requestURL += (" " + here->first + "=\"" + html_safe(here->second) + "\"");
282 ++here;
283 }
284 requestURL += ">\n " + baseURL + "</request>\n";
285 break;
286 }
287}
288
289//----------------------------------------------------------------------------------------------
290/**********
291 * Send the (OAI version-dependent) response text to the output stream
292 */
293void oaiaction::getResponse(ostream &output, recptproto *protocol, oaiargs &params)
294{
295 bool error;
296 text_t date, requestURL;
297
298 // Write the response date & time into 'date'
299 this->getResponseDate(date);
300 int version = this->configuration->getOAIVersion();
301
302 // validate the action
303 error = !this->validateAction(protocol, params);
304
305 // raise an error for duplicated arguments and set the
306 // error type "manually" here...
307 if (params.hasDuplicateArg() && !error) {
308 this->errorType = "badArgument";
309 error = true;
310 }
311
312 // start with the required http header
313 if (version <= 110 && error){
314 output << "Status: 400 " << this->errorType << "\n";
315 output << "Content-Type: text/xml\n\n";
316 return;
317 }
318
319 output << "Status: 200\n";
320 output << "Content-Type: text/xml\n\n";
321
322 // output xml header parts
323 output << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n";
324
325 if(version <= 110){
326 // output OAI v1.1 action header tag
327 output << "<" << this->name;
328 output << "\n xmlns=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name << "\" ";
329 output << "\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ";
330 output << "\n xsi:schemaLocation=\"http://www.openarchives.org/OAI/1.1/OAI_" << this->name;
331 output << "\n http://www.openarchives.org/OAI/1.1/OAI_" << this->name << ".xsd\">\n";
332 }
333 else {
334 text_t baseDocRoot = this->configuration->getRelativeBaseDocRoot();
335 output << "<?xml-stylesheet type=\"text/xsl\" href=\""<<baseDocRoot<<"/web/style/oai2.xsl\" ?>\n";
336 // output OAI v2.0 action header tag
337 output << "<OAI-PMH xmlns=\"http://www.openarchives.org/OAI/2.0/\"\n"
338 << " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
339 << " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/\n"
340 << " http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd\">\n";
341 }
342 // output current time for response
343 output << " <responseDate>" << date << "</responseDate>\n";
344
345 // output request URL. This differs depending on the OAI protocol version currently running, so
346 // the entire field - including tags - must be put into the text_t variable by getRequestURL()
347 this->getRequestURL(params, requestURL);
348
349 output << requestURL ;
350
351 if (error == false) {
352 // a string stream to write the content of the action to; this is done so that we can
353 // avoid outputting the action tag if the action's body is unsuccessful, in which
354 // case the leading tag must be suppressed
355#if defined(GSDL_USE_IOS_H)
356 ostrstream outstream;
357#else
358 ostringstream outstream;
359#endif
360
361 // Version 2.0 needs an <Identify>, etc. tag after the OAI-PMH header, IF there is no error
362 //
363 // An action that outputs no content should raise an error state to suppress the
364 // matching opening and close tags if it outputs no content in OAI 2.0
365 error = !this->output_content(outstream, protocol, params);
366
367 // output the leading tag if no error occurred
368 if (error == false) {
369 if (version >= 200) {
370 this->output_action_tag(output, true);
371 }
372 }
373
374 // now output the body of the action content
375#if defined(GSDL_USE_IOS_H)
376 outstream << ends; // Ensure outstream is null-terminated correctly
377#endif
378 output << outstream.str();
379 }
380 else {
381 if (version >= 200) {
382 this->output_error(output, this->errorType);
383 }
384 }
385
386 // close out our response - both versions need this line, but v2.0 only needs it if there was no error
387 if((version == 110) || (version >= 200 && error == false)){
388 this->output_action_tag(output, false);
389 }
390 if(version >= 200){
391 output << "</OAI-PMH>\n";
392 }
393}
394
395void oaiaction::output_action_tag(ostream &output, bool openTag)
396{
397 output << " <";
398 if (!openTag) {
399 output << "/";
400 }
401 output << this->name << ">" << endl;
402}
403
404void oaiaction::output_record_header(ostream &output, const text_t &oaiLabel, const text_t &lastModified,
405 const text_tarray &memberOf, int oaiVersion)
406{
407 output << " <header>" << endl;
408 output << " <identifier>" << oaiLabel << "</identifier>" << endl;
409 output << " <datestamp>" << lastModified << "</datestamp>" << endl;
410
411 text_t collection_id;
412 // Find the collection id from oai:repos-id:collection:doc
413 oaiclassifier::getCollectionFromOAIID(oaiLabel, collection_id);
414 if(oaiVersion >= 200){
415 text_tarray::const_iterator member = memberOf.begin();
416 text_tarray::const_iterator memEnd = memberOf.end();
417
418 // As well as all the subsets that a doc appears in, it is also a member of the 'collection' set
419 output << " <setSpec>" << collection_id << "</setSpec>" << endl;
420 while (member != memEnd) {
421 text_t oaiSet = *member;
422 oaiclassifier::toOAI(collection_id, oaiSet);
423 output << " <setSpec>" << oaiSet << "</setSpec>" << endl;
424 ++member;
425 }
426 }
427 output << " </header>" << endl;
428}
429
430void oaiaction::getLastModifiedDate(ResultDocInfo_t &doc_info, text_t &lastModified)
431{
432 text_t temp;
433
434
435 MetadataInfo_tmap::iterator current = doc_info.metadata.begin();
436 MetadataInfo_tmap::iterator end = doc_info.metadata.end();
437
438 while(current != end){
439 temp = current->first;
440 lc(temp);
441 if(temp == "gs.oaidatestamp"){
442 // assume it is correct format
443 lastModified = current->second.values[0];
444 if(lastModified != "") {
445 return;
446 }
447 }
448 else{
449 if (temp == "lastmodified" && lastModified == "" && current->second.values.size() >= 1) {
450 lastModified = current->second.values[0];
451 time_t raw_time = (time_t)lastModified.getint();
452 lastModified = this->parseDatestamp(raw_time);
453 }
454 }
455 ++current;
456 }
457}
458
459bool oaiaction::inDateRange(const text_t &from, const text_t &until, const text_t &collection,
460 const text_t &OID, recptproto *protocol, ostream &logout)
461{
462 FilterResponse_t response;
463 text_tset metadata;
464 bool status_ok = get_info(OID, collection, "", metadata, false, protocol, response, logout);
465 bool not_too_early = false, not_too_recent = false;
466
467 if(status_ok) {
468 ResultDocInfo_t doc_info = response.docInfo[0];
469 text_t lastModDate;
470 this->getLastModifiedDate(doc_info, lastModDate);
471
472 // All date fields should be in the form YYYY-MM-DD, which allows for a direct comparison
473 if(from != ""){
474 if(from <= lastModDate)
475 not_too_early = true;
476 }
477 else
478 not_too_early = true; // If there's no FROM field, then the record can't be too early
479
480 if(until != ""){
481 if(lastModDate <= until)
482 not_too_recent = true;
483 }
484 else
485 not_too_recent = true; // If there's no UNTIL field, then the record can't be too recent
486
487 if(not_too_early && not_too_recent)
488 return true;
489 else
490 return false;
491 }
492 else
493 return false;
494}
Note: See TracBrowser for help on using the repository browser.