[22739] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * oaiconfig.cpp --
|
---|
| 4 | *
|
---|
| 5 | * Copyright (C) 2004-2010 The New Zealand Digital Library Project
|
---|
| 6 | *
|
---|
| 7 | * A component of the Greenstone digital library software
|
---|
| 8 | * from the New Zealand Digital Library Project at the
|
---|
| 9 | * University of Waikato, New Zealand.
|
---|
| 10 | *
|
---|
| 11 | * This program is free software; you can redistribute it and/or modify
|
---|
| 12 | * it under the terms of the GNU General Public License as published by
|
---|
| 13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 14 | * (at your option) any later version.
|
---|
| 15 | *
|
---|
| 16 | * This program is distributed in the hope that it will be useful,
|
---|
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | * GNU General Public License for more details.
|
---|
| 20 | *
|
---|
| 21 | * You should have received a copy of the GNU General Public License
|
---|
| 22 | * along with this program; if not, write to the Free Software
|
---|
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 24 | *
|
---|
| 25 | *********************************************************************/
|
---|
| 26 |
|
---|
[8182] | 27 | #include "oaiconfig.h"
|
---|
| 28 | #include <iostream>
|
---|
[20827] | 29 | #include <stdlib.h>
|
---|
[8182] | 30 | #include "fileutil.h"
|
---|
| 31 |
|
---|
[20589] | 32 |
|
---|
[8182] | 33 | /**
|
---|
[18885] | 34 | * The mapping works as follows in the oai.cfg or collect.cfg file.
|
---|
[8182] | 35 | *
|
---|
| 36 | * A line is in the format oaimapping <collection field> <oai field>
|
---|
| 37 | *
|
---|
| 38 | * The map here is used to look up the "Greenstone" name which is mapped from
|
---|
| 39 | * a given OAI field name, the reverse direction to that given in the
|
---|
| 40 | * Greenstone collect.cfg file. The oairecordaction class instance which
|
---|
| 41 | * produces output for an OAI record information request thus uses the map
|
---|
| 42 | * to work from the field in the collection it has on hand which OAI
|
---|
| 43 | * record name it should use instead.
|
---|
| 44 | *
|
---|
| 45 | * An extension is to be used for this in which the OAI field name in the
|
---|
| 46 | * collect.cfg file can be made specific for a particular record format.
|
---|
| 47 | * This is done using the OAI field name in the format of
|
---|
[18885] | 48 | * <OAI format>.<OAI field name>
|
---|
| 49 | * Thus, an rfc1807 Title field would be referred to as rfc1807.Title
|
---|
[8182] | 50 | *
|
---|
[18885] | 51 | * A collection-level mapping can be provided in oai.cfg by prepending
|
---|
| 52 | * collname to collection field:
|
---|
| 53 | * <collname:field> <oai field>
|
---|
| 54 |
|
---|
[8182] | 55 | * In the absence of a particular format name, the mapping is taken to be
|
---|
| 56 | * universal.
|
---|
| 57 | */
|
---|
| 58 |
|
---|
| 59 | oaiconfig::oaiconfig() : configurable () {
|
---|
[11732] | 60 | this->resumptionSize = -1; // Default = do not use resumption tokens
|
---|
[8182] | 61 | }
|
---|
| 62 |
|
---|
| 63 | oaiconfig::oaiconfig(text_t &gsdlhome, text_t &gsdlcollect)
|
---|
| 64 | {
|
---|
[18885] | 65 | // read main configuration file (oai.cfg) to get oai collections
|
---|
[8311] | 66 | text_t mainconfig = filename_cat(gsdlhome, "etc", "oai.cfg");
|
---|
[8182] | 67 | this->collection = "";
|
---|
[11732] | 68 | this->resumptionSize = -1;
|
---|
[8182] | 69 | this->read_configfile(mainconfig);
|
---|
[11732] | 70 |
|
---|
[8182] | 71 | // then if we've not got a specified collection in the gsdlcollect
|
---|
| 72 | // parameter, read in all the collection's individual configurations
|
---|
| 73 | if (gsdlcollect == "") {
|
---|
[18903] | 74 | text_tarray::iterator start = this->collectList.begin();
|
---|
| 75 | text_tarray::iterator here = this->collectList.end()-1;
|
---|
| 76 | while (here != start) {
|
---|
| 77 | if (!this->configureCollection(gsdlhome, *here)) {
|
---|
| 78 | this->collectList.erase(here);
|
---|
| 79 | }
|
---|
| 80 | --here;
|
---|
[8182] | 81 | }
|
---|
[18903] | 82 | // and do the first one
|
---|
| 83 | if (!this->configureCollection(gsdlhome, *here)) {
|
---|
| 84 | this->collectList.erase(here);
|
---|
[24114] | 85 | }
|
---|
[8182] | 86 | }
|
---|
| 87 | else {
|
---|
[18903] | 88 | // what do we do if this fails?
|
---|
[8182] | 89 | this->configureCollection(gsdlhome, gsdlcollect);
|
---|
| 90 | }
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 | oaiconfig::~oaiconfig()
|
---|
| 94 | {
|
---|
| 95 | oaicollectmap::iterator here = this->collectMap.begin();
|
---|
| 96 | oaicollectmap::iterator end = this->collectMap.end();
|
---|
| 97 | while (here != end) {
|
---|
| 98 | delete here->second;
|
---|
[9608] | 99 | ++here;
|
---|
[8182] | 100 | }
|
---|
| 101 | }
|
---|
| 102 |
|
---|
[11732] | 103 | int oaiconfig::resumeAfter()
|
---|
| 104 | { return this->resumptionSize;
|
---|
| 105 | }
|
---|
| 106 |
|
---|
[8182] | 107 | int oaiconfig::getOAIVersion()
|
---|
| 108 | {
|
---|
| 109 | if (this->oaiVersion == "1.1") {
|
---|
| 110 | return 110;
|
---|
| 111 | }
|
---|
| 112 | return 200;
|
---|
| 113 | }
|
---|
| 114 |
|
---|
[18903] | 115 | bool oaiconfig::configureCollection(const text_t &gsdlhome, const text_t &gsdlcollect)
|
---|
[8182] | 116 | {
|
---|
| 117 | text_t cnfgfile = filename_cat(gsdlhome, "collect", gsdlcollect, "etc", "collect.cfg");
|
---|
[18903] | 118 | if (!file_exists(cnfgfile)) {
|
---|
| 119 | return false;
|
---|
[24114] | 120 | }
|
---|
| 121 |
|
---|
[8182] | 122 | this->collection = gsdlcollect;
|
---|
| 123 | this->read_configfile(cnfgfile);
|
---|
[24114] | 124 |
|
---|
| 125 | // to work out the earliestDatestamp for the repository, need to
|
---|
| 126 | // to read in the build.cfg file of each OAI collection, in order to
|
---|
| 127 | // find the oldest earliestDatestamp field of all the OAI collections
|
---|
| 128 | text_t buildcnfgfile = filename_cat(gsdlhome, "collect", gsdlcollect, "index", "build.cfg");
|
---|
| 129 | if (file_exists(buildcnfgfile)) {
|
---|
| 130 | this->read_configfile(buildcnfgfile);
|
---|
| 131 | }
|
---|
| 132 |
|
---|
[18903] | 133 | return true;
|
---|
[8182] | 134 | }
|
---|
| 135 | void oaiconfig::configure (const text_t &key, const text_tarray &cfgline)
|
---|
| 136 | {
|
---|
| 137 | // we've got an oai mapping item, and at least two fields
|
---|
| 138 | if (key == "oaimapping" && cfgline.size() > 1) {
|
---|
| 139 | text_t::const_iterator colonAt;
|
---|
[8303] | 140 | text_t index, name, configCollection;
|
---|
| 141 |
|
---|
| 142 | // Take a default collection as being whatever the collection being configured is...
|
---|
| 143 | configCollection = this->collection;
|
---|
[8182] | 144 |
|
---|
| 145 | // get the name of the (collection) field to map; this may actually
|
---|
| 146 | // be in a colon separated format of the type
|
---|
| 147 | // <collection name>:<field name>
|
---|
| 148 | index = cfgline[0];
|
---|
[20607] | 149 | if ((colonAt = findchar(index.begin(), index.end(), ':')) != index.end()) {
|
---|
[8303] | 150 | configCollection = substr(index.begin(), colonAt);
|
---|
| 151 |
|
---|
| 152 | if (this->collection != "" && configCollection != this->collection) {
|
---|
| 153 | cerr << "Attempt to configure OAI mappings for " << configCollection << " in " << this->collection << endl;
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | colonAt += 1;
|
---|
| 157 | index = substr(colonAt, index.end());
|
---|
| 158 | }
|
---|
[8182] | 159 |
|
---|
| 160 | // the second parameter is the metadata field to map the collection
|
---|
| 161 | // field onto. It may be provided with a metadata protocol (which
|
---|
[8276] | 162 | // will be given first and separated by a period or full stop). In
|
---|
| 163 | // the case of format.field name, the splitting is done here.
|
---|
[20607] | 164 | if ((colonAt = findchar(cfgline[1].begin(), cfgline[1].end(), '.')) != cfgline[1].end()) {
|
---|
[8182] | 165 | text_t stub = substr(cfgline[1].begin(), colonAt);
|
---|
| 166 | colonAt += 1;
|
---|
| 167 | name = substr(colonAt, cfgline[1].end());
|
---|
| 168 | index.append(":");
|
---|
| 169 | index.append(stub);
|
---|
| 170 | }
|
---|
| 171 | else {
|
---|
| 172 | name = cfgline[1];
|
---|
| 173 | }
|
---|
| 174 |
|
---|
[8303] | 175 | // now 'index' is in the form <collectionfield>:(formatname)
|
---|
| 176 | // 'name' is simply the fieldname within the format
|
---|
| 177 | // 'configCollection' is the collection to be configured
|
---|
[8182] | 178 |
|
---|
| 179 | // now simply map the field name (index) onto the collection name (name)
|
---|
[8303] | 180 | if (this->collectMap[configCollection] == NULL) {
|
---|
| 181 | this->collectMap[configCollection] = new oaicollectconfig(configCollection);
|
---|
[8182] | 182 | }
|
---|
[8303] | 183 | this->collectMap[configCollection]->fieldMap[index] = name;
|
---|
[8182] | 184 |
|
---|
[8303] | 185 | // cerr << "Mapping " << index << " to " << name << " in " << configCollection << endl;
|
---|
[8182] | 186 |
|
---|
| 187 | // TODO: check that the mapped field is actually in use
|
---|
| 188 | }
|
---|
[8303] | 189 | else if (key == "oaicollection" && cfgline.size() >= 1) {
|
---|
[8182] | 190 | // Configure a collection to be used as part of the OAI archive.
|
---|
| 191 | // This line should read:
|
---|
| 192 | //
|
---|
| 193 | // oaicollection <collectionname>
|
---|
| 194 | //
|
---|
| 195 | // Where <collectionname> is the name of the directory inside the
|
---|
| 196 | // gsdl/collect folder which contains the collection.
|
---|
| 197 | //
|
---|
| 198 | // To configure several collections, merely repeat this line,
|
---|
| 199 | // or alternatively use additional collection names after the
|
---|
| 200 | // first one.
|
---|
| 201 | //
|
---|
[18892] | 202 | // This configuration should only appear in oai.cfg
|
---|
[8182] | 203 | //
|
---|
| 204 | if (this->collection != "") {
|
---|
[18892] | 205 | cerr << "Attempt to configure an oai collection outside of oai.cfg" << endl;
|
---|
[8182] | 206 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 207 | exit(1);
|
---|
| 208 | }
|
---|
[9608] | 209 | for (int c = 0; c < cfgline.size(); ++c) {
|
---|
[8303] | 210 | this->collectList.push_back(cfgline[c]);
|
---|
[8182] | 211 | }
|
---|
| 212 | }
|
---|
[18892] | 213 | else if (key == "oaimetadata" && cfgline.size() >= 1) {
|
---|
| 214 | // List of metadata prefixes to suuport
|
---|
| 215 | // This line should read:
|
---|
| 216 | //
|
---|
| 217 | // oaicollection <metadataname> <metadataname>...
|
---|
| 218 | //
|
---|
| 219 | //
|
---|
| 220 | // This configuration should only appear in oai.cfg
|
---|
| 221 | //
|
---|
| 222 | if (this->collection != "") {
|
---|
| 223 | cerr << "Attempt to configure oai metadata outside of oai.cfg" << endl;
|
---|
| 224 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 225 | exit(1);
|
---|
| 226 | }
|
---|
| 227 | for (int c = 0; c < cfgline.size(); ++c) {
|
---|
| 228 | // todo: check that the set name is valid
|
---|
| 229 | this->metadataSet.insert(cfgline[c]);
|
---|
| 230 | }
|
---|
| 231 | }
|
---|
[8182] | 232 | else if (key == "oaiinfo" && cfgline.size() >= 1) {
|
---|
| 233 | // Get a piece of information for the oai repository information
|
---|
| 234 | // request. The line should read:
|
---|
| 235 | //
|
---|
| 236 | // oaiinfo <information field name> <value>
|
---|
| 237 | //
|
---|
[18892] | 238 | // This configuration should only be attempted in oai.cfg
|
---|
[8182] | 239 | //
|
---|
| 240 | if (this->collection != "") {
|
---|
[18892] | 241 | cerr << "Attempt to set oai information outside of oai.cfg" << endl;
|
---|
[8182] | 242 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 243 | exit(1);
|
---|
| 244 | }
|
---|
| 245 |
|
---|
| 246 | // if no second parameter is given, then the first parameter
|
---|
| 247 | if (cfgline.size() == 1) {
|
---|
| 248 | this->infoMap[cfgline[0]] = cfgline[0];
|
---|
| 249 | }
|
---|
| 250 | else {
|
---|
| 251 | this->infoMap[cfgline[0]] = cfgline[1];
|
---|
| 252 | }
|
---|
| 253 | }
|
---|
[22212] | 254 | else if ( key == "oaisetname" || key == "oaisetdescription") {
|
---|
| 255 | text_t coll_name;
|
---|
| 256 | text_t value = "";
|
---|
| 257 | if (this->collection != "") {
|
---|
| 258 | // we are in collect.cfg
|
---|
| 259 | coll_name = this->collection;
|
---|
| 260 | if (cfgline.size() == 1) {
|
---|
| 261 | // just the collection value
|
---|
| 262 | value = cfgline[0];
|
---|
[8182] | 263 | }
|
---|
[22212] | 264 | else if (cfgline.size() == 2) {
|
---|
| 265 | // we have a subset name (eg for classifier)
|
---|
| 266 | coll_name.append(":");
|
---|
| 267 | coll_name.append(cfgline[0]);
|
---|
| 268 | value = cfgline[1];
|
---|
[8182] | 269 | }
|
---|
[22212] | 270 | } else if (cfgline.size() == 2) {
|
---|
| 271 | // oai.cfg, line should be collname, setName
|
---|
| 272 | coll_name = cfgline[0];
|
---|
| 273 | value = cfgline[1];
|
---|
[8182] | 274 | }
|
---|
[22212] | 275 | if (value != "") {
|
---|
| 276 | if (this->collectMap[coll_name] == NULL) {
|
---|
| 277 | this->collectMap[coll_name] = new oaicollectconfig(coll_name);
|
---|
[8182] | 278 | }
|
---|
[22212] | 279 | if (key == "oaisetname") {
|
---|
| 280 | this->collectMap[coll_name]->setName = value;
|
---|
| 281 | } else if (key == "oaisetdescription") {
|
---|
| 282 | this->collectMap[coll_name]->setDescription = value;
|
---|
[8182] | 283 | }
|
---|
| 284 | }
|
---|
| 285 | }
|
---|
[22212] | 286 |
|
---|
| 287 | else if (key == "resumeafter" && cfgline.size() >= 1) {
|
---|
| 288 | this->resumptionSize = cfgline[0].getint();
|
---|
[14284] | 289 | }
|
---|
[8182] | 290 |
|
---|
[22212] | 291 | else if (key == "maintainer") {
|
---|
| 292 | this->maintainer = cfgline[0];
|
---|
[8182] | 293 | }
|
---|
[22212] | 294 | else if (key == "repositoryName") {
|
---|
| 295 | this->repositoryName = cfgline[0];
|
---|
[8182] | 296 | }
|
---|
[22284] | 297 | else if (key == "repositoryId") {
|
---|
| 298 | this->repositoryId = cfgline[0];
|
---|
| 299 | }
|
---|
| 300 | else if (key == "repositoryIdVersion") {
|
---|
| 301 | this->repositoryIdVersion = cfgline[0];
|
---|
| 302 | }
|
---|
[23233] | 303 | else if (key == "baseServerURL") {
|
---|
| 304 | this->baseServerURL = cfgline[0];
|
---|
[8182] | 305 | }
|
---|
[23233] | 306 | else if (key == "oaiserverPath") {
|
---|
| 307 | this->oaiserverPath = cfgline[0];
|
---|
[14284] | 308 | }
|
---|
[23233] | 309 | else if (key == "libraryPath") {
|
---|
| 310 | this->libraryPath = cfgline[0];
|
---|
| 311 | }
|
---|
| 312 | else if (key == "docRootPath") {
|
---|
| 313 | this->docRootPath = cfgline[0];
|
---|
| 314 | }
|
---|
| 315 |
|
---|
[22284] | 316 | else if (key == "oaiversion") {
|
---|
| 317 | this->oaiVersion = cfgline[0];
|
---|
| 318 | }
|
---|
[22212] | 319 | }
|
---|
[14284] | 320 |
|
---|
[24114] | 321 |
|
---|
[23233] | 322 | text_t oaiconfig::generateBaseServerURL() {
|
---|
| 323 | char *server_name = getenv("SERVER_NAME");
|
---|
| 324 | char *server_port = getenv("SERVER_PORT");
|
---|
| 325 | text_t url = "http://";
|
---|
| 326 | url.append(server_name);
|
---|
| 327 | url.append(":");
|
---|
| 328 | url.append(server_port);
|
---|
| 329 | return url;
|
---|
| 330 | }
|
---|
| 331 |
|
---|
[8182] | 332 | text_t oaiconfig::getMapping(const text_t &collection, const text_t &collectfield)
|
---|
| 333 | {
|
---|
| 334 | if (this->collectMap[collection] == NULL) {
|
---|
| 335 | return "";
|
---|
| 336 | }
|
---|
| 337 | return this->collectMap[collection]->fieldMap[collectfield];
|
---|
| 338 | }
|
---|
| 339 |
|
---|
| 340 | /**
|
---|
| 341 | * Get the mapping for a field in a given collection; if no mapping
|
---|
| 342 | * exists, the result will be a blank string.
|
---|
| 343 | */
|
---|
| 344 | text_t oaiconfig::getMapping(const text_t &collection, const text_t &collectfield, const text_t &formatname)
|
---|
| 345 | {
|
---|
| 346 | text_t fullName = collectfield;
|
---|
| 347 | fullName.append(":");
|
---|
| 348 | fullName.append(formatname);
|
---|
| 349 |
|
---|
| 350 | // try the collection-specific options first
|
---|
| 351 | if (this->collectMap[collection] != NULL) {
|
---|
| 352 | // first try the most specific item - this collection, and given that protocol
|
---|
| 353 | if (this->collectMap[collection]->fieldMap.count(fullName) >= 1) {
|
---|
| 354 | return this->collectMap[collection]->fieldMap[fullName];
|
---|
| 355 | }
|
---|
| 356 | // otherwise, fall back to this collection, and all protocols
|
---|
| 357 | else if (this->collectMap[collection]->fieldMap.count(collectfield) >= 1) {
|
---|
| 358 | return this->collectMap[collection]->fieldMap[collectfield];
|
---|
| 359 | }
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | // if no mappings exist, return an empty item
|
---|
| 363 | if (this->collectMap[""] == NULL) {
|
---|
| 364 | return "";
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | // then try generic rules
|
---|
| 368 | if (this->collectMap[""]->fieldMap.count(fullName) >= 1) {
|
---|
| 369 | return this->collectMap[""]->fieldMap[fullName];
|
---|
| 370 | }
|
---|
| 371 | else {
|
---|
| 372 | return this->collectMap[""]->fieldMap[collectfield];
|
---|
| 373 | }
|
---|
| 374 | }
|
---|
[20629] | 375 |
|
---|
[22212] | 376 | text_t oaiconfig::getBaseURL()
|
---|
| 377 | {
|
---|
[23233] | 378 | if (this->baseServerURL.empty()) {
|
---|
| 379 | this->baseServerURL = generateBaseServerURL();
|
---|
| 380 | }
|
---|
| 381 | return this->baseServerURL + this->oaiserverPath;
|
---|
[22212] | 382 | }
|
---|
| 383 | text_t oaiconfig::getBaseLibraryURL()
|
---|
| 384 | {
|
---|
[23233] | 385 | if (this->baseServerURL.empty()) {
|
---|
| 386 | this->baseServerURL = generateBaseServerURL();
|
---|
| 387 | }
|
---|
| 388 | return this->baseServerURL + this->libraryPath;
|
---|
[22212] | 389 | }
|
---|
| 390 | text_t oaiconfig::getBaseDocRoot()
|
---|
| 391 | {
|
---|
[23233] | 392 | if (this->baseServerURL.empty()) {
|
---|
| 393 | this->baseServerURL = generateBaseServerURL();
|
---|
| 394 | }
|
---|
| 395 | return this->baseServerURL + this->docRootPath;
|
---|
[22212] | 396 | }
|
---|
[23233] | 397 |
|
---|
| 398 | text_t oaiconfig::getRelativeBaseDocRoot()
|
---|
| 399 | {
|
---|
| 400 | return this->docRootPath;
|
---|
| 401 | }
|
---|
[22212] | 402 | text_t oaiconfig::getRepositoryName()
|
---|
| 403 | {
|
---|
| 404 | return this->repositoryName;
|
---|
| 405 | }
|
---|
[22284] | 406 | text_t oaiconfig::getRepositoryId()
|
---|
| 407 | {
|
---|
| 408 | return this->repositoryId;
|
---|
| 409 | }
|
---|
| 410 | text_t oaiconfig::getRepositoryIdVersion()
|
---|
| 411 | {
|
---|
| 412 | return this->repositoryIdVersion;
|
---|
| 413 | }
|
---|
[22212] | 414 | text_t oaiconfig::getMaintainer()
|
---|
| 415 | {
|
---|
| 416 | return this->maintainer;
|
---|
| 417 | }
|
---|
[20629] | 418 | text_t oaiconfig::getSetName(const text_t &setSpec)
|
---|
| 419 | {
|
---|
[22212] | 420 | if (this->collectMap[setSpec] == NULL) {
|
---|
| 421 | return "" ;
|
---|
[20629] | 422 | }
|
---|
[22212] | 423 |
|
---|
| 424 | return this->collectMap[setSpec]->setName;
|
---|
| 425 |
|
---|
[20629] | 426 | }
|
---|
| 427 |
|
---|
| 428 | text_t oaiconfig::getSetDescription(const text_t &setSpec)
|
---|
| 429 | {
|
---|
[22212] | 430 | if (this->collectMap[setSpec] == NULL) {
|
---|
| 431 | return "" ;
|
---|
[20629] | 432 | }
|
---|
[22212] | 433 |
|
---|
| 434 | return this->collectMap[setSpec]->setDescription;
|
---|
| 435 | }
|
---|
[20629] | 436 |
|
---|