source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 31911

Last change on this file since 31911 was 31911, checked in by ak19, 7 years ago

First part of commit after using earliestDatestamp from oai-inf.db rather than build conf: 1. skip output of OAI records for internal 'OID' stored in oai-inf.db, whose entry represents the collection's earliest timestamp info. 2. Now prefer doc's lastmod date from oai-inf.db to doc's oailastmod date in indedb. So for all records, not just deleted records that only have entries in oaiinfdb and not indexdb.

File size: 35.4 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 /** constructor */
72 public OAIPMH() {
73
74 }
75
76 public void cleanUp() {
77 super.cleanUp();//??
78 this.coll_db.closeDatabase();
79 if (this.oaiinf_db != null){
80 this.oaiinf_db.closeDatabase();
81 }
82
83 }
84 /** configure this service
85 info is the OAIPMH service rack from collectionConfig.xml, and
86 extra_info is buildConfig.xml */
87 public boolean configure(Element info, Element extra_info) {
88 if (!super.configure(info, extra_info)){
89 logger.info("Configuring ServiceRack.java returns false.");
90 return false;
91 }
92
93 //get the names from ServiceRack.java
94 this.site_name = this.router.getSiteName();
95 this.coll_name = this.cluster_name;
96
97 logger.info("Configuring OAIPMH...");
98
99 this.config_info = info;
100
101 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
102 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
103 String index_stem = "";
104 String infodb_type = "";
105 if (metadata_list != null) {
106
107 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
108
109 if (index_stem_elem != null) {
110 index_stem = GSXML.getNodeText(index_stem_elem);
111 }
112
113 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
114 if (infodb_type_elem != null) {
115 infodb_type = GSXML.getNodeText(infodb_type_elem);
116 }
117
118 }
119
120 if (index_stem == null || index_stem.equals("")) {
121 index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
122 }
123 if (infodb_type == null || infodb_type.equals("")) {
124 infodb_type = "gdbm"; // the default
125 }
126
127 coll_db = new SimpleCollectionDatabase(infodb_type);
128 if (!coll_db.databaseOK()) {
129 logger.error("Couldn't create the collection database of type "+infodb_type);
130 return false;
131 }
132
133 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
134 if (!oaiinf_db.databaseOK()) {
135 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
136 return false;
137 }
138
139
140 // Open databases for querying
141 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
142 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
143 logger.error("Could not open collection database!");
144 return false;
145 }
146 // the oaiinf_db is called oai-inf.<infodb_type_extension>
147 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
148 File oaiinfFile = new File(oaiinf_db_file);
149
150 if(!oaiinfFile.exists()) {
151 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
152 oaiinf_db = null;
153 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
154 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
155 oaiinf_db = null;
156 }
157
158 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
159 configureSetInfo();
160 // the short_service_info is used by the message router to find the method names,
161
162 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
163 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
164 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
165 this.short_service_info.appendChild(list_records);
166
167 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
168 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
169 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
170 this.short_service_info.appendChild(list_identifiers);
171
172 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
173 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
174 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
175 this.short_service_info.appendChild(list_sets);
176
177 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
178 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
179 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
180 this.short_service_info.appendChild(list_metadata_formats);
181
182 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
183 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
184 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
185 this.short_service_info.appendChild(get_record);
186
187 return true;
188 }
189
190 public boolean configureOAI(Element oai_config_elem) {
191 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
192 this.format_response_map = new HashMap<String, Element>();
193 this.format_elements_map = new HashMap<String, HashSet<String>>();
194
195 // for now, all we want is the metadata prefix description and the mapping list
196 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
197 if (main_lmf_elem == null) {
198 logger.error("No listMetadataFormats element found in OAIConfig.xml");
199 return false;
200 }
201 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
202 if (meta_formats_list.getLength() == 0) {
203 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
204 return false;
205 }
206 boolean found_meta_format = false;
207 for(int i=0; i<meta_formats_list.getLength(); i++) {
208 Element mf = (Element) meta_formats_list.item(i);
209 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
210 if (prefix.equals("")) {
211 logger.error("metadataFormat element had no metadataPrefix attribute");
212 continue;
213 }
214 // get the right format from OAICOnfig
215 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
216 if (meta_format == null) {
217 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
218 continue;
219 }
220 // copy the format definition into our stored Element
221 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
222 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
223 this.meta_formats_definition.appendChild(collection_version_format);
224 // set up the response element for this format
225 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
226 // add in collection specific mappings
227 addCollectionMappings(collection_version_format, mf);
228 // now set up a list of all collection elements for reverse lookup of the mapping
229 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
230
231 }
232 return true;
233 }
234
235 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
236 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
237 for (int i=0; i<formats.getLength(); i++) {
238 Element format = (Element)formats.item(i);
239 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
240 if (prefix.equals(meta_name)) {
241 return format;
242 }
243 }
244 return null;
245 }
246
247 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
248 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
249
250 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
251 Document doc = element_list.getOwnerDocument();
252 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
253 if (coll_elements.getLength()==0) {
254 // no mappings to include
255 return;
256 }
257 for (int i=0; i<coll_elements.getLength(); i++) {
258 Element e = (Element)coll_elements.item(i);
259 String elem_name = e.getAttribute(GSXML.NAME_ATT);
260 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
261 if (main_elem == null) {
262 logger.error(elem_name+" not found in meta format, not using it");
263 } else {
264 element_list.replaceChild(doc.importNode(e, true),main_elem );
265 }
266 }
267 }
268
269 /** goes through all the mappings and makes a set of all collection
270 metadata names that could become an oai meta element - acts as
271 a reverse lookup for the mappings */
272 protected HashSet<String> getAllCollectionElements(Element meta_format) {
273 HashSet<String> meta_name_set = new HashSet<String>();
274 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
275 for (int i=0; i<elements.getLength(); i++) {
276 Element e = (Element)elements.item(i);
277 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
278 if (map == null) {
279 // there is no mapping, just use the element name
280 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
281 } else {
282 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
283 String[] name_array = list_of_names.split(",");
284 for (int j=0; j<name_array.length; j++) {
285 meta_name_set.add(name_array[j]);
286 }
287 }
288 }
289 return meta_name_set;
290 }
291
292 /** returns a specific service description */
293 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
294
295 if (service_id.equals(OAIXML.LIST_RECORDS)) {
296 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
297 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
298 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
299 return list_records;
300 }
301
302 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
303 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
304 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
305 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
306 return list_identifiers;
307 }
308 if (service_id.equals(OAIXML.LIST_SETS)) {
309 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
310 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
311 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
312 return list_sets;
313 }
314 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
315 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
316 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
317 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
318 return list_metadata_formats;
319 }
320
321 if (service_id.equals(OAIXML.GET_RECORD)) {
322 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
323 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
324 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
325 return get_record;
326 }
327
328 return null;
329 }
330
331 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
332 protected Element processListSets(Element req) {
333 return list_sets_response;
334 }
335 /** returns the actual record element used in the OAI GetRecord response */
336 protected Element processGetRecord(Element req) {
337 /** arguments:
338 identifier: required
339 metadataPrefix: required
340 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
341 */
342 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
343 HashMap<String, String> param_map = GSXML.getParamMap(params);
344
345 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
346 if (prefix == null || prefix.equals("")) {
347 //Just a double-check
348 logger.error("the value of metadataPrefix att is not present in the request.");
349 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
350 }
351
352 // check that we support this format
353 if (!format_response_map.containsKey(prefix)) {
354 logger.error("metadata prefix is not supported for collection "+this.coll_name);
355 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
356 }
357
358 Document doc = XMLConverter.newDOM();
359
360 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
361 boolean OID_is_deleted = false;
362 long millis = -1;
363
364 DBInfo oai_info = null;
365 if(oaiinf_db != null) {
366 oai_info = this.oaiinf_db.getInfo(oid);
367 if (oai_info == null) {
368 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
369 } else {
370
371 // indexdb doesn't have info on deleted docs, only oaiinf db does.
372 // So only oaiinfdb has timestamps for deleted docs
373 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
374 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
375 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
376 millis = Long.parseLong(timestamp)*1000; // in milliseconds
377
378 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
379 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
380 OID_is_deleted = true;
381 }
382 }
383 }
384
385 //get a DBInfo object of the identifier; if this identifier is not present in the database,
386 // null is returned.
387 DBInfo info = this.coll_db.getInfo(oid);
388 if (info == null) {
389 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
390 logger.error("OID: " + oid + " is not present in the collection index database.");
391 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
392 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
393 }
394 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
395 millis = getDateStampMillis(info);
396 }
397 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
398
399
400 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
401 Element get_record = doc.createElement(OAIXML.GET_RECORD);
402 get_record_response.appendChild(get_record);
403 Element record = doc.createElement(OAIXML.RECORD);
404 //compose the header element
405 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
406 if(!OID_is_deleted) {
407 //compose the metadata element
408 record.appendChild(createMetadataElement(doc, prefix, info));
409 }
410 get_record.appendChild(record);
411 return get_record_response;
412 }
413
414 /** return a list of records in specified set, containing metadata from specified prefix*/
415 protected Element processListRecords(Element req) {
416 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
417 }
418
419 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
420 protected Element processListIdentifiers(Element req) {
421 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
422 }
423
424 // Get a list of records/identifiers that match the parameters.
425 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
426 /** arguments:
427 metadataPrefix: required
428 * from: optional
429 * until: optional
430 * set: optional
431 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
432 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
433 */
434 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
435
436 if(params.getLength() == 0) {
437 logger.error("must at least have the metadataPrefix parameter, can't be none");
438 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
439 }
440
441 HashMap<String, String> param_map = GSXML.getParamMap(params);
442
443 String prefix = "";
444 Date from_date = null;
445 Date until_date = null;
446
447 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
448 //Just a double-check
449 logger.error("A param element containing the metadataPrefix is not present.");
450 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
451 }
452 prefix = param_map.get(OAIXML.METADATA_PREFIX);
453 if (prefix == null || prefix.equals("")) {
454 //Just a double-check
455 logger.error("the value of metadataPrefix att is not present in the request.");
456 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
457 }
458
459 if(param_map.containsKey(OAIXML.FROM)) {
460 String from = param_map.get(OAIXML.FROM);
461 from_date = OAIXML.getDate(from);
462 }
463 if(param_map.containsKey(OAIXML.UNTIL)) {
464 String until = param_map.get(OAIXML.UNTIL);
465 until_date = OAIXML.getDate(until);
466 }
467
468 if (!format_response_map.containsKey(prefix)) {
469 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
470 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
471 }
472
473 // get list of oids
474 ArrayList<String> oid_list = null;
475 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
476 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
477
478 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
479 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
480 oid_list = getChildrenIds(OAIXML.BROWSELIST);
481 }
482 }
483
484 if (oid_list == null) {
485 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
486 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
487 }
488 // all validation is done
489
490 // get the list of elements that are in this metadata prefix
491 HashSet<String> set_of_elems = format_elements_map.get(prefix);
492
493 Document doc = XMLConverter.newDOM();
494 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
495 Element list_items = doc.createElement(response_name);
496 list_items_response.appendChild(list_items);
497
498 for(int i=0; i<oid_list.size(); i++) {
499 String oid = oid_list.get(i);
500
501 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
502 continue;
503 }
504
505 boolean OID_is_deleted = false;
506 long millis = -1;
507
508 DBInfo oai_info = null;
509 if(oaiinf_db != null) {
510 oai_info = this.oaiinf_db.getInfo(oid);
511 if (oai_info == null) {
512 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
513 } else {
514
515 // indexdb doesn't have info on deleted docs, only oaiinf db does.
516 // So only oaiinfdb has timestamps for deleted docs
517 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
518 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
519 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
520 millis = Long.parseLong(timestamp)*1000; // in milliseconds
521
522 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
523 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
524 OID_is_deleted = true;
525 }
526 }
527 }
528 DBInfo info = this.coll_db.getInfo(oid);
529 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
530 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
531 logger.error("Collection database does not contain information about oid: " +oid);
532 }
533 }
534 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
535
536 millis = getDateStampMillis(info);
537 }
538
539 Date this_date = null;
540 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
541
542 if (millis == -1) {
543 if (from_date != null || until_date !=null) {
544 continue; // if this doc doesn't have a date for some reason, and
545 // we are doing a date range, then don't include it.
546 }
547 } else {
548 this_date = new Date(millis);
549 if (from_date != null) {
550 if(this_date.before(from_date)) {
551 continue;
552 }
553 }
554 if (until_date != null) {
555 if (this_date.after(until_date)) {
556 continue;
557 }
558 }
559 }
560
561 //compose the header element, which we'll be appending no matter what
562 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
563
564 if (include_metadata) { // doing ListRecords
565 // compose a record for adding header and metadata
566 Element record = doc.createElement(OAIXML.RECORD);
567 list_items.appendChild(record);
568 //insert the header element
569 record.appendChild(header);
570 //Now check that this id has metadata for the required prefix.
571 if (info != null && documentContainsMetadata(info, set_of_elems)) {
572 // YES, it does have some metadata for this prefix
573 //compose the metadata element
574 record.appendChild(createMetadataElement(doc, prefix, info));
575 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
576 } else { // doing ListIdentifiers
577 //append the header element
578 list_items.appendChild(header);
579 }
580
581 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
582
583 return list_items_response;
584
585 }
586
587
588 // have implemented setDescription as an element, instead of a container containing metadata
589 private boolean configureSetInfo() {
590
591 Document doc = XMLConverter.newDOM();
592 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
593 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
594 this.list_sets_response.appendChild(list_sets_elem);
595 String set_name = this.coll_name;
596 String set_description = null;
597 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
598 if (name_elem!=null) {
599 set_name = GSXML.getNodeText(name_elem);
600 if (set_name.equals("")) {
601 set_name = this.coll_name; // default to coll name if can't find one
602 }
603 }
604 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
605 if (description_elem!=null) {
606 set_description = GSXML.getNodeText(description_elem);
607 if (set_description.equals("")) {
608 set_description = null;
609 }
610 }
611 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
612 list_sets_elem.appendChild(coll_set);
613
614 // are we part of any super sets?
615 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
616 for (int i=0; i<super_set_list.getLength(); i++) {
617 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
618 if (super_name != null && !super_name.equals("")) {
619 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
620 }
621 }
622 return true;
623 }
624
625 /** create the metadata element used when processing ListRecords/GetRecord requests
626 */
627 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
628 // the <metadata> element
629 Element metadata = doc.createElement(OAIXML.METADATA);
630 // the <oai:dc namespace...> element
631 Element prfx_str_elem = OAIXML.getMetadataPrefixElement(doc, prefix, OAIXML.oai_version);
632 metadata.appendChild(prfx_str_elem);
633
634 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
635 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
636 // for each element in the definition
637 for (int i=0; i<elements.getLength(); i++) {
638 Element e = (Element)elements.item(i);
639 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
640 if (map == null) {
641 // look up the element name
642 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
643 } else {
644 // we go though the list of names in the mapping
645 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
646 }
647 }
648 // output any metadata that is not just a simple mapping
649 addCustomMetadata(prfx_str_elem, prefix, info);
650 return metadata;
651 }
652
653 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
654 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
655 Vector<String> values = info.getMultiInfo(meta_name);
656 if (values != null && values.size()!=0) {
657 for (int i=0; i<values.size(); i++) {
658 addMetadataElement(meta_list_elem, meta_name, values.get(i));
659 }
660 }
661 }
662
663 /** more complicated addMetadata - can add multiple items. */
664 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
665 String[] names = name_list.split(",");
666 for (int i=0; i<names.length; i++) {
667 Vector<String> values = info.getMultiInfo(names[i]);
668 if (values == null || values.size()==0) {
669 continue;
670 }
671 for (int j=0; j<values.size(); j++) {
672 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
673 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
674 return; // only want to add one value
675 }
676 }
677 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
678 return; // we have added all values of this meta elem
679 }
680 // otherwise, we will keep going through the list and add them all.
681 }
682 }
683
684 // specific metadata formats might need to do some custom metadata that is not
685 //just a standard mapping. eg oai_dc outputting an identifier that is a link
686 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
687
688 if (prefix.equals(OAIXML.META_FORMAT_DC)) {
689 // we want to add in another dc:identifier element with a link to the resource if possible
690 // try gs.OAIResourceURL first, then srclinkFile, then GS version of documnet
691 String gsURL = info.getInfo(OAIXML.GS_OAI_RESOURCE_URL);
692 if (gsURL.equals("")) {
693 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
694 // try srclinkFile
695 gsURL = info.getInfo("srclinkFile");
696 if (!gsURL.equals("")) {
697 // make up the link to the file
698 gsURL = base_url.replace("oaiserver", "") + "sites/" + this.site_name
699 + "/collect/" + this.coll_name + "/index/assoc/"
700 + info.getInfo("assocfilepath") + "/" + gsURL;
701 } else {
702 // no srclink file, lets provide a link to the greenstone doc
703 gsURL = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
704 }
705 }
706 // now we have the url link, add as metadata
707 addMetadataElement(meta_list_elem, "dc:identifier", gsURL);
708 }
709 }
710
711 /** create the actual metadata element for the list */
712 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
713
714 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
715 meta_list_elem.appendChild(meta);
716 }
717
718
719 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
720 */
721 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
722
723 Element header = doc.createElement(OAIXML.HEADER);
724
725 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
726 if(deleted) {
727 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
728 // then the timestamp for deletion will be from oai-inf database
729 }
730
731 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
732 GSXML.setNodeText(identifier, coll_name + ":" + oid);
733 header.appendChild(identifier);
734 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
735 GSXML.setNodeText(set_spec, coll_name);
736 header.appendChild(set_spec);
737 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
738 GSXML.setNodeText(datestamp, oailastmodified);
739 header.appendChild(datestamp);
740 return header;
741 }
742
743 /** return the metadata information */
744 protected Element processListMetadataFormats(Element req) {
745 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
746 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
747 if (param == null) {
748 logger.error("An element containing the OID attribute not is present.");
749 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
750 }
751 String oid = param.getAttribute(GSXML.VALUE_ATT);
752 if (oid == null || oid.equals("")) {
753 logger.error("No OID is present in the request.");
754 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
755 }
756
757 /*
758 ArrayList<String> oid_list = null;
759 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
760 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
761
762 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
763 oid_list = getChildrenIds(OAIXML.BROWSELIST);
764 }
765 }
766 */
767 // assume meta formats are only for OIDs that have not been deleted
768 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
769 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
770 if (oid_list == null || oid_list.contains(oid) == false) {
771 logger.error("OID: " + oid + " is not present in the database.");
772 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
773// logger.error((new XMLConverter()).getPrettyString (e));
774 return e;
775 }
776
777 DBInfo info = null;
778 info = this.coll_db.getInfo(oid);
779 if (info == null) { //just double check
780 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
781 }
782
783 Document doc = XMLConverter.newDOM();
784 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
785
786 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
787 list_metadata_formats_response.appendChild(list_metadata_formats);
788 boolean has_meta_format = false;
789
790 // for each format in format_elements_map
791 Iterator<String> it = format_elements_map.keySet().iterator();
792 while (it.hasNext()) {
793 String format = it.next();
794 HashSet<String> set_of_elems = format_elements_map.get(format);
795 if (documentContainsMetadata(info, set_of_elems)) {
796 // add this format into the response
797 has_meta_format = true;
798 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
799 }
800 }
801
802 if (has_meta_format == false) {
803 logger.error("Specified metadata names are not contained in the database.");
804 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
805 } else {
806 return list_metadata_formats_response;
807 }
808 }
809
810 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
811 if (set_of_elems.size() == 0) {
812 return false;
813 }
814 Iterator<String> i = set_of_elems.iterator();
815 while (i.hasNext()) {
816 if (!info.getInfo(i.next()).equals("")) {
817 return true;
818 }
819 }
820 return false;
821 }
822
823 /** returns a list of the child ids in order, null if no children */
824 protected ArrayList<String> getChildrenIds(String node_id) {
825 DBInfo info = this.coll_db.getInfo(node_id);
826 if (info == null) {
827 return null;
828 }
829
830 String contains = info.getInfo("contains");
831 if (contains.equals("")) {
832 return null;
833 }
834 ArrayList<String> children = new ArrayList<String>();
835 StringTokenizer st = new StringTokenizer(contains, ";");
836 while (st.hasMoreTokens()) {
837 String child_id = st.nextToken().replaceAll("\"", node_id);
838 children.add(child_id);
839 }
840 return children;
841 }
842 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
843 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
844 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
845 * name which is mandatory.
846 */
847 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
848 if (metadata_names == null) return false;
849 logger.info("checking metadata names in db.");
850 for(int i=0; i<metadata_names.length; i++) {
851 int index = metadata_names[i].indexOf(",");
852 String meta_name = (index == -1) ? metadata_names[i] :
853 metadata_names[i].substring(index + 1);
854
855 if(info.getInfo(meta_name).equals("") == false) {
856 return true;
857 }
858 }
859 return false;
860 }
861
862 protected long getDateStampMillis(DBInfo info) {
863 // gs.OAIDateStamp is in YYYY-MM-DD
864 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
865 long millis = -1;
866 if (!time_stamp.equals("")) {
867 millis = OAIXML.getTime(time_stamp);
868 }
869 if (millis == -1) {
870 // oailastmodified is in seconds
871 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
872 if (!time_stamp.equals("")) {
873 millis = Long.parseLong(time_stamp)*1000;
874 }
875 }
876 return millis;
877
878
879 }
880}
881
882
Note: See TracBrowser for help on using the repository browser.