[8182] | 1 | #include "oaiconfig.h"
|
---|
| 2 |
|
---|
| 3 | #include <algorithm>
|
---|
| 4 | #include <iostream>
|
---|
| 5 |
|
---|
| 6 | #include "fileutil.h"
|
---|
| 7 |
|
---|
| 8 | /**
|
---|
[18885] | 9 | * The mapping works as follows in the oai.cfg or collect.cfg file.
|
---|
[8182] | 10 | *
|
---|
| 11 | * A line is in the format oaimapping <collection field> <oai field>
|
---|
| 12 | *
|
---|
| 13 | * The map here is used to look up the "Greenstone" name which is mapped from
|
---|
| 14 | * a given OAI field name, the reverse direction to that given in the
|
---|
| 15 | * Greenstone collect.cfg file. The oairecordaction class instance which
|
---|
| 16 | * produces output for an OAI record information request thus uses the map
|
---|
| 17 | * to work from the field in the collection it has on hand which OAI
|
---|
| 18 | * record name it should use instead.
|
---|
| 19 | *
|
---|
| 20 | * An extension is to be used for this in which the OAI field name in the
|
---|
| 21 | * collect.cfg file can be made specific for a particular record format.
|
---|
| 22 | * This is done using the OAI field name in the format of
|
---|
[18885] | 23 | * <OAI format>.<OAI field name>
|
---|
| 24 | * Thus, an rfc1807 Title field would be referred to as rfc1807.Title
|
---|
[8182] | 25 | *
|
---|
[18885] | 26 | * A collection-level mapping can be provided in oai.cfg by prepending
|
---|
| 27 | * collname to collection field:
|
---|
| 28 | * <collname:field> <oai field>
|
---|
| 29 |
|
---|
[8182] | 30 | * In the absence of a particular format name, the mapping is taken to be
|
---|
| 31 | * universal.
|
---|
| 32 | */
|
---|
| 33 |
|
---|
| 34 | oaiconfig::oaiconfig() : configurable () {
|
---|
[11732] | 35 | this->resumptionSize = -1; // Default = do not use resumption tokens
|
---|
[8182] | 36 | }
|
---|
| 37 |
|
---|
| 38 | oaiconfig::oaiconfig(text_t &gsdlhome, text_t &gsdlcollect)
|
---|
| 39 | {
|
---|
[18885] | 40 | // read main configuration file (oai.cfg) to get oai collections
|
---|
[8311] | 41 | text_t mainconfig = filename_cat(gsdlhome, "etc", "oai.cfg");
|
---|
[8182] | 42 | this->collection = "";
|
---|
[11732] | 43 | this->resumptionSize = -1;
|
---|
[8182] | 44 | this->read_configfile(mainconfig);
|
---|
[11732] | 45 |
|
---|
[8182] | 46 | // then if we've not got a specified collection in the gsdlcollect
|
---|
| 47 | // parameter, read in all the collection's individual configurations
|
---|
| 48 | if (gsdlcollect == "") {
|
---|
[8303] | 49 | text_tarray::iterator here = this->collectList.begin();
|
---|
| 50 | text_tarray::iterator end = this->collectList.end();
|
---|
[8182] | 51 | while (here != end) {
|
---|
| 52 | this->configureCollection(gsdlhome, *here);
|
---|
[9608] | 53 | ++here;
|
---|
[8182] | 54 | }
|
---|
| 55 | }
|
---|
| 56 | else {
|
---|
| 57 | this->configureCollection(gsdlhome, gsdlcollect);
|
---|
| 58 | }
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | oaiconfig::~oaiconfig()
|
---|
| 62 | {
|
---|
| 63 | oaicollectmap::iterator here = this->collectMap.begin();
|
---|
| 64 | oaicollectmap::iterator end = this->collectMap.end();
|
---|
| 65 | while (here != end) {
|
---|
| 66 | delete here->second;
|
---|
[9608] | 67 | ++here;
|
---|
[8182] | 68 | }
|
---|
| 69 | }
|
---|
| 70 |
|
---|
[11732] | 71 | int oaiconfig::resumeAfter()
|
---|
| 72 | { return this->resumptionSize;
|
---|
| 73 | }
|
---|
| 74 |
|
---|
[8182] | 75 | int oaiconfig::getOAIVersion()
|
---|
| 76 | {
|
---|
| 77 | if (this->oaiVersion == "1.1") {
|
---|
| 78 | return 110;
|
---|
| 79 | }
|
---|
| 80 | return 200;
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | void oaiconfig::configureCollection(const text_t &gsdlhome, const text_t &gsdlcollect)
|
---|
| 84 | {
|
---|
| 85 | text_t cnfgfile = filename_cat(gsdlhome, "collect", gsdlcollect, "etc", "collect.cfg");
|
---|
| 86 | this->collection = gsdlcollect;
|
---|
| 87 | this->read_configfile(cnfgfile);
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | void oaiconfig::configure (const text_t &key, const text_tarray &cfgline)
|
---|
| 91 | {
|
---|
| 92 | // we've got an oai mapping item, and at least two fields
|
---|
| 93 | if (key == "oaimapping" && cfgline.size() > 1) {
|
---|
| 94 | text_t::const_iterator colonAt;
|
---|
[8303] | 95 | text_t index, name, configCollection;
|
---|
| 96 |
|
---|
| 97 | // Take a default collection as being whatever the collection being configured is...
|
---|
| 98 | configCollection = this->collection;
|
---|
[8182] | 99 |
|
---|
| 100 | // get the name of the (collection) field to map; this may actually
|
---|
| 101 | // be in a colon separated format of the type
|
---|
| 102 | // <collection name>:<field name>
|
---|
| 103 | index = cfgline[0];
|
---|
[8303] | 104 | if ((colonAt = find(index.begin(), index.end(), ':')) != index.end()) {
|
---|
| 105 | configCollection = substr(index.begin(), colonAt);
|
---|
| 106 |
|
---|
| 107 | if (this->collection != "" && configCollection != this->collection) {
|
---|
| 108 | cerr << "Attempt to configure OAI mappings for " << configCollection << " in " << this->collection << endl;
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | colonAt += 1;
|
---|
| 112 | index = substr(colonAt, index.end());
|
---|
| 113 | }
|
---|
[8182] | 114 |
|
---|
| 115 | // the second parameter is the metadata field to map the collection
|
---|
| 116 | // field onto. It may be provided with a metadata protocol (which
|
---|
[8276] | 117 | // will be given first and separated by a period or full stop). In
|
---|
| 118 | // the case of format.field name, the splitting is done here.
|
---|
| 119 | if ((colonAt = find(cfgline[1].begin(), cfgline[1].end(), '.')) != cfgline[1].end()) {
|
---|
[8182] | 120 | text_t stub = substr(cfgline[1].begin(), colonAt);
|
---|
| 121 | colonAt += 1;
|
---|
| 122 | name = substr(colonAt, cfgline[1].end());
|
---|
| 123 | index.append(":");
|
---|
| 124 | index.append(stub);
|
---|
| 125 | }
|
---|
| 126 | else {
|
---|
| 127 | name = cfgline[1];
|
---|
| 128 | }
|
---|
| 129 |
|
---|
[8303] | 130 | // now 'index' is in the form <collectionfield>:(formatname)
|
---|
| 131 | // 'name' is simply the fieldname within the format
|
---|
| 132 | // 'configCollection' is the collection to be configured
|
---|
[8182] | 133 |
|
---|
| 134 | // now simply map the field name (index) onto the collection name (name)
|
---|
[8303] | 135 | if (this->collectMap[configCollection] == NULL) {
|
---|
| 136 | this->collectMap[configCollection] = new oaicollectconfig(configCollection);
|
---|
[8182] | 137 | }
|
---|
[8303] | 138 | this->collectMap[configCollection]->fieldMap[index] = name;
|
---|
[8182] | 139 |
|
---|
[8303] | 140 | // cerr << "Mapping " << index << " to " << name << " in " << configCollection << endl;
|
---|
[8182] | 141 |
|
---|
| 142 | // TODO: check that the mapped field is actually in use
|
---|
| 143 | }
|
---|
[8303] | 144 | else if (key == "oaicollection" && cfgline.size() >= 1) {
|
---|
[8182] | 145 | // Configure a collection to be used as part of the OAI archive.
|
---|
| 146 | // This line should read:
|
---|
| 147 | //
|
---|
| 148 | // oaicollection <collectionname>
|
---|
| 149 | //
|
---|
| 150 | // Where <collectionname> is the name of the directory inside the
|
---|
| 151 | // gsdl/collect folder which contains the collection.
|
---|
| 152 | //
|
---|
| 153 | // To configure several collections, merely repeat this line,
|
---|
| 154 | // or alternatively use additional collection names after the
|
---|
| 155 | // first one.
|
---|
| 156 | //
|
---|
[18892] | 157 | // This configuration should only appear in oai.cfg
|
---|
[8182] | 158 | //
|
---|
| 159 | if (this->collection != "") {
|
---|
[18892] | 160 | cerr << "Attempt to configure an oai collection outside of oai.cfg" << endl;
|
---|
[8182] | 161 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 162 | exit(1);
|
---|
| 163 | }
|
---|
[9608] | 164 | for (int c = 0; c < cfgline.size(); ++c) {
|
---|
[8303] | 165 | this->collectList.push_back(cfgline[c]);
|
---|
[8182] | 166 | }
|
---|
| 167 | }
|
---|
[18892] | 168 | else if (key == "oaimetadata" && cfgline.size() >= 1) {
|
---|
| 169 | // List of metadata prefixes to suuport
|
---|
| 170 | // This line should read:
|
---|
| 171 | //
|
---|
| 172 | // oaicollection <metadataname> <metadataname>...
|
---|
| 173 | //
|
---|
| 174 | //
|
---|
| 175 | // This configuration should only appear in oai.cfg
|
---|
| 176 | //
|
---|
| 177 | if (this->collection != "") {
|
---|
| 178 | cerr << "Attempt to configure oai metadata outside of oai.cfg" << endl;
|
---|
| 179 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 180 | exit(1);
|
---|
| 181 | }
|
---|
| 182 | for (int c = 0; c < cfgline.size(); ++c) {
|
---|
| 183 | // todo: check that the set name is valid
|
---|
| 184 | this->metadataSet.insert(cfgline[c]);
|
---|
| 185 | }
|
---|
| 186 | }
|
---|
[8182] | 187 | else if (key == "oaiinfo" && cfgline.size() >= 1) {
|
---|
| 188 | // Get a piece of information for the oai repository information
|
---|
| 189 | // request. The line should read:
|
---|
| 190 | //
|
---|
| 191 | // oaiinfo <information field name> <value>
|
---|
| 192 | //
|
---|
[18892] | 193 | // This configuration should only be attempted in oai.cfg
|
---|
[8182] | 194 | //
|
---|
| 195 | if (this->collection != "") {
|
---|
[18892] | 196 | cerr << "Attempt to set oai information outside of oai.cfg" << endl;
|
---|
[8182] | 197 | cerr << "Configuration attempted in " << this->collection << " collection." << endl;
|
---|
| 198 | exit(1);
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | // if no second parameter is given, then the first parameter
|
---|
| 202 | if (cfgline.size() == 1) {
|
---|
| 203 | this->infoMap[cfgline[0]] = cfgline[0];
|
---|
| 204 | }
|
---|
| 205 | else {
|
---|
| 206 | this->infoMap[cfgline[0]] = cfgline[1];
|
---|
| 207 | }
|
---|
| 208 | }
|
---|
| 209 | else if (key == "oaiversion" && cfgline.size() >= 1) {
|
---|
| 210 | this->oaiVersion = cfgline[0];
|
---|
| 211 | }
|
---|
[11732] | 212 | else if (key == "resumeafter" && cfgline.size() >= 1) {
|
---|
| 213 | this->resumptionSize = cfgline[0].getint();
|
---|
| 214 | }
|
---|
[8182] | 215 | // get and note a maintainer item to support the Identify Verb of OAI
|
---|
| 216 | else if (key == "maintainer" && cfgline.size() >= 1) {
|
---|
| 217 | int line = 0;
|
---|
| 218 |
|
---|
| 219 | // TODO: exhaustive checks for empty or default values of maintainer
|
---|
| 220 | while (line < cfgline.size()) {
|
---|
| 221 | if (cfgline[line] != "NULL" &&
|
---|
| 222 | cfgline[line] != "") {
|
---|
| 223 | // do something
|
---|
| 224 | break;
|
---|
| 225 | }
|
---|
| 226 | else {
|
---|
[9608] | 227 | ++line;
|
---|
[8182] | 228 | }
|
---|
| 229 | }
|
---|
| 230 |
|
---|
| 231 | // Only try to set the configuration if we have a legitimate value ...
|
---|
| 232 | if (line < cfgline.size()) {
|
---|
| 233 | // ensure we have a map to write to
|
---|
| 234 | if (this->collectMap[this->collection] == NULL) {
|
---|
| 235 | this->collectMap[this->collection] = new oaicollectconfig(this->collection);
|
---|
| 236 | }
|
---|
| 237 | this->collectMap[this->collection]->maintainer = cfgline[line];
|
---|
| 238 | }
|
---|
| 239 | }
|
---|
| 240 | else if (key == "repositoryName" && cfgline.size() >= 1) {
|
---|
| 241 | int line = 0;
|
---|
| 242 |
|
---|
| 243 | // TODO: exhaustive checks for empty or default values of repositoryName
|
---|
| 244 | while (line < cfgline.size()) {
|
---|
| 245 | if (cfgline[line] != "NULL" &&
|
---|
| 246 | cfgline[line] != "") {
|
---|
| 247 | // do something
|
---|
| 248 | break;
|
---|
| 249 | }
|
---|
| 250 | else {
|
---|
[9608] | 251 | ++line;
|
---|
[8182] | 252 | }
|
---|
| 253 | }
|
---|
| 254 |
|
---|
| 255 | // Only try to set the configuration if we have a legitimate value ...
|
---|
| 256 | if (line < cfgline.size()) {
|
---|
| 257 | // ensure we have a map to write to
|
---|
| 258 | if (this->collectMap[this->collection] == NULL) {
|
---|
| 259 | this->collectMap[this->collection] = new oaicollectconfig(this->collection);
|
---|
| 260 | }
|
---|
| 261 | this->collectMap[this->collection]->repositoryName = cfgline[line];
|
---|
| 262 | }
|
---|
| 263 | }
|
---|
[14284] | 264 | else if (key == "baseURL" && cfgline.size() >= 1) {
|
---|
| 265 | int line = 0;
|
---|
| 266 |
|
---|
| 267 | while (line < cfgline.size()) {
|
---|
| 268 | if (cfgline[line] != "NULL" &&
|
---|
| 269 | cfgline[line] != "") {
|
---|
| 270 | // do something
|
---|
| 271 | break;
|
---|
| 272 | }
|
---|
| 273 | else {
|
---|
| 274 | ++line;
|
---|
| 275 | }
|
---|
| 276 | }
|
---|
| 277 |
|
---|
| 278 | // Only try to set the configuration if we have a legitimate value ...
|
---|
| 279 | if (line < cfgline.size()) {
|
---|
| 280 | // ensure we have a map to write to
|
---|
| 281 | if (this->collectMap[this->collection] == NULL) {
|
---|
| 282 | this->collectMap[this->collection] = new oaicollectconfig(this->collection);
|
---|
| 283 | }
|
---|
| 284 | this->collectMap[this->collection]->baseURL = cfgline[line];
|
---|
| 285 | }
|
---|
| 286 | }
|
---|
[18860] | 287 | else if (key == "baseDocRoot" && cfgline.size() >= 1) {
|
---|
| 288 | int line = 0;
|
---|
| 289 |
|
---|
| 290 | while (line < cfgline.size()) {
|
---|
| 291 | if (cfgline[line] != "NULL" &&
|
---|
| 292 | cfgline[line] != "") {
|
---|
| 293 | // do something
|
---|
| 294 | break;
|
---|
| 295 | }
|
---|
| 296 | else {
|
---|
| 297 | ++line;
|
---|
| 298 | }
|
---|
| 299 | }
|
---|
| 300 |
|
---|
| 301 | // Only try to set the configuration if we have a legitimate value ...
|
---|
| 302 | if (line < cfgline.size()) {
|
---|
| 303 | // ensure we have a map to write to
|
---|
| 304 | if (this->collectMap[this->collection] == NULL) {
|
---|
| 305 | this->collectMap[this->collection] = new oaicollectconfig(this->collection);
|
---|
| 306 | }
|
---|
| 307 | this->collectMap[this->collection]->baseDocRoot = cfgline[line];
|
---|
| 308 | }
|
---|
| 309 | }
|
---|
[8182] | 310 | }
|
---|
| 311 |
|
---|
| 312 | /**
|
---|
| 313 | * TODO: store all field values in a map per collection
|
---|
| 314 | */
|
---|
| 315 | text_t oaiconfig::getCollectionConfig(const text_t &collection, const text_t &field)
|
---|
| 316 | {
|
---|
| 317 | if (this->collectMap[collection] == NULL) {
|
---|
| 318 | return "";
|
---|
| 319 | }
|
---|
| 320 | if (field == "maintainer") {
|
---|
| 321 | return this->collectMap[collection]->maintainer;
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 | if (field == "repositoryName") {
|
---|
| 325 | return this->collectMap[collection]->repositoryName;
|
---|
| 326 | }
|
---|
| 327 |
|
---|
[14284] | 328 | if (field == "baseURL") {
|
---|
| 329 | return this->collectMap[collection]->baseURL;
|
---|
| 330 | }
|
---|
| 331 |
|
---|
[18860] | 332 | if (field == "baseDocRoot") {
|
---|
| 333 | return this->collectMap[collection]->baseDocRoot;
|
---|
| 334 | }
|
---|
| 335 |
|
---|
[8182] | 336 | return "";
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | text_t oaiconfig::getMapping(const text_t &collection, const text_t &collectfield)
|
---|
| 340 | {
|
---|
| 341 | if (this->collectMap[collection] == NULL) {
|
---|
| 342 | return "";
|
---|
| 343 | }
|
---|
| 344 | return this->collectMap[collection]->fieldMap[collectfield];
|
---|
| 345 | }
|
---|
| 346 |
|
---|
| 347 | /**
|
---|
| 348 | * Get the mapping for a field in a given collection; if no mapping
|
---|
| 349 | * exists, the result will be a blank string.
|
---|
| 350 | */
|
---|
| 351 | text_t oaiconfig::getMapping(const text_t &collection, const text_t &collectfield, const text_t &formatname)
|
---|
| 352 | {
|
---|
| 353 | text_t fullName = collectfield;
|
---|
| 354 | fullName.append(":");
|
---|
| 355 | fullName.append(formatname);
|
---|
| 356 |
|
---|
| 357 | // try the collection-specific options first
|
---|
| 358 | if (this->collectMap[collection] != NULL) {
|
---|
| 359 | // first try the most specific item - this collection, and given that protocol
|
---|
| 360 | if (this->collectMap[collection]->fieldMap.count(fullName) >= 1) {
|
---|
| 361 | return this->collectMap[collection]->fieldMap[fullName];
|
---|
| 362 | }
|
---|
| 363 | // otherwise, fall back to this collection, and all protocols
|
---|
| 364 | else if (this->collectMap[collection]->fieldMap.count(collectfield) >= 1) {
|
---|
| 365 | return this->collectMap[collection]->fieldMap[collectfield];
|
---|
| 366 | }
|
---|
| 367 | }
|
---|
| 368 |
|
---|
| 369 | // if no mappings exist, return an empty item
|
---|
| 370 | if (this->collectMap[""] == NULL) {
|
---|
| 371 | return "";
|
---|
| 372 | }
|
---|
| 373 |
|
---|
| 374 | // then try generic rules
|
---|
| 375 | if (this->collectMap[""]->fieldMap.count(fullName) >= 1) {
|
---|
| 376 | return this->collectMap[""]->fieldMap[fullName];
|
---|
| 377 | }
|
---|
| 378 | else {
|
---|
| 379 | return this->collectMap[""]->fieldMap[collectfield];
|
---|
| 380 | }
|
---|
| 381 | }
|
---|