source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 31912

Last change on this file since 31912 was 31912, checked in by ak19, 7 years ago

Now GS3 tries to obtain the _earliesttimestamp entry of the oai-inf.db for each collection to, work out the earliest among them to be the earliest timestamp of the repository. For each collection, if there is no such entry or the oai-inf db doesn't exist or can't be accessed, the collection falls back to using the value in the build config file as before. Also as before, if that doesn't exist either, it uses the lastmod date of the collection (I think also taken from build config) as the next fallback value.

File size: 36.3 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 /** constructor */
72 public OAIPMH() {
73
74 }
75
76 public void cleanUp() {
77 super.cleanUp();//??
78 this.coll_db.closeDatabase();
79 if (this.oaiinf_db != null){
80 this.oaiinf_db.closeDatabase();
81 }
82
83 }
84 /** configure this service
85 info is the OAIPMH service rack from collectionConfig.xml, and
86 extra_info is buildConfig.xml */
87 public boolean configure(Element info, Element extra_info) {
88 if (!super.configure(info, extra_info)){
89 logger.info("Configuring ServiceRack.java returns false.");
90 return false;
91 }
92
93 //get the names from ServiceRack.java
94 this.site_name = this.router.getSiteName();
95 this.coll_name = this.cluster_name;
96
97 logger.info("Configuring OAIPMH...");
98
99 this.config_info = info;
100
101 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
102 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
103 String index_stem = "";
104 String infodb_type = "";
105 if (metadata_list != null) {
106
107 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
108
109 if (index_stem_elem != null) {
110 index_stem = GSXML.getNodeText(index_stem_elem);
111 }
112
113 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
114 if (infodb_type_elem != null) {
115 infodb_type = GSXML.getNodeText(infodb_type_elem);
116 }
117
118 }
119
120 if (index_stem == null || index_stem.equals("")) {
121 index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
122 }
123 if (infodb_type == null || infodb_type.equals("")) {
124 infodb_type = "gdbm"; // the default
125 }
126
127 coll_db = new SimpleCollectionDatabase(infodb_type);
128 if (!coll_db.databaseOK()) {
129 logger.error("Couldn't create the collection database of type "+infodb_type);
130 return false;
131 }
132
133 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
134 if (!oaiinf_db.databaseOK()) {
135 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
136 return false;
137 }
138
139
140 // Open databases for querying
141 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
142 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
143 logger.error("Could not open collection database!");
144 return false;
145 }
146 // the oaiinf_db is called oai-inf.<infodb_type_extension>
147 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
148 File oaiinfFile = new File(oaiinf_db_file);
149
150 if(!oaiinfFile.exists()) {
151 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
152 oaiinf_db = null;
153 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
154 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
155 oaiinf_db = null;
156 }
157
158 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
159 configureSetInfo();
160 // the short_service_info is used by the message router to find the method names,
161
162 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
163 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
164 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
165 this.short_service_info.appendChild(list_records);
166
167 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
168 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
169 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
170 this.short_service_info.appendChild(list_identifiers);
171
172 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
173 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
174 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
175 this.short_service_info.appendChild(list_sets);
176
177 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
178 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
179 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
180 this.short_service_info.appendChild(list_metadata_formats);
181
182 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
183 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
184 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
185 this.short_service_info.appendChild(get_record);
186
187 return true;
188 }
189
190 public boolean configureOAI(Element oai_config_elem) {
191 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
192 this.format_response_map = new HashMap<String, Element>();
193 this.format_elements_map = new HashMap<String, HashSet<String>>();
194
195 // for now, all we want is the metadata prefix description and the mapping list
196 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
197 if (main_lmf_elem == null) {
198 logger.error("No listMetadataFormats element found in OAIConfig.xml");
199 return false;
200 }
201 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
202 if (meta_formats_list.getLength() == 0) {
203 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
204 return false;
205 }
206 boolean found_meta_format = false;
207 for(int i=0; i<meta_formats_list.getLength(); i++) {
208 Element mf = (Element) meta_formats_list.item(i);
209 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
210 if (prefix.equals("")) {
211 logger.error("metadataFormat element had no metadataPrefix attribute");
212 continue;
213 }
214 // get the right format from OAICOnfig
215 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
216 if (meta_format == null) {
217 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
218 continue;
219 }
220 // copy the format definition into our stored Element
221 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
222 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
223 this.meta_formats_definition.appendChild(collection_version_format);
224 // set up the response element for this format
225 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
226 // add in collection specific mappings
227 addCollectionMappings(collection_version_format, mf);
228 // now set up a list of all collection elements for reverse lookup of the mapping
229 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
230
231 }
232 return true;
233 }
234
235 /**
236 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
237 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
238 */
239 public long getEarliestTimestamp() {
240 long timestamp = -1;
241
242 DBInfo oai_info = null;
243 if(oaiinf_db != null) {
244 // get internal record containing the earliest timestamp of the collection
245 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
246 if (oai_info == null) {
247 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
248 } else {
249 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
250 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
251 }
252 }
253 return timestamp;
254 }
255
256 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
257 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
258 for (int i=0; i<formats.getLength(); i++) {
259 Element format = (Element)formats.item(i);
260 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
261 if (prefix.equals(meta_name)) {
262 return format;
263 }
264 }
265 return null;
266 }
267
268 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
269 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
270
271 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
272 Document doc = element_list.getOwnerDocument();
273 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
274 if (coll_elements.getLength()==0) {
275 // no mappings to include
276 return;
277 }
278 for (int i=0; i<coll_elements.getLength(); i++) {
279 Element e = (Element)coll_elements.item(i);
280 String elem_name = e.getAttribute(GSXML.NAME_ATT);
281 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
282 if (main_elem == null) {
283 logger.error(elem_name+" not found in meta format, not using it");
284 } else {
285 element_list.replaceChild(doc.importNode(e, true),main_elem );
286 }
287 }
288 }
289
290 /** goes through all the mappings and makes a set of all collection
291 metadata names that could become an oai meta element - acts as
292 a reverse lookup for the mappings */
293 protected HashSet<String> getAllCollectionElements(Element meta_format) {
294 HashSet<String> meta_name_set = new HashSet<String>();
295 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
296 for (int i=0; i<elements.getLength(); i++) {
297 Element e = (Element)elements.item(i);
298 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
299 if (map == null) {
300 // there is no mapping, just use the element name
301 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
302 } else {
303 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
304 String[] name_array = list_of_names.split(",");
305 for (int j=0; j<name_array.length; j++) {
306 meta_name_set.add(name_array[j]);
307 }
308 }
309 }
310 return meta_name_set;
311 }
312
313 /** returns a specific service description */
314 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
315
316 if (service_id.equals(OAIXML.LIST_RECORDS)) {
317 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
318 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
319 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
320 return list_records;
321 }
322
323 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
324 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
325 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
326 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
327 return list_identifiers;
328 }
329 if (service_id.equals(OAIXML.LIST_SETS)) {
330 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
331 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
332 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
333 return list_sets;
334 }
335 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
336 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
337 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
338 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
339 return list_metadata_formats;
340 }
341
342 if (service_id.equals(OAIXML.GET_RECORD)) {
343 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
344 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
345 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
346 return get_record;
347 }
348
349 return null;
350 }
351
352 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
353 protected Element processListSets(Element req) {
354 return list_sets_response;
355 }
356 /** returns the actual record element used in the OAI GetRecord response */
357 protected Element processGetRecord(Element req) {
358 /** arguments:
359 identifier: required
360 metadataPrefix: required
361 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
362 */
363 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
364 HashMap<String, String> param_map = GSXML.getParamMap(params);
365
366 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
367 if (prefix == null || prefix.equals("")) {
368 //Just a double-check
369 logger.error("the value of metadataPrefix att is not present in the request.");
370 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
371 }
372
373 // check that we support this format
374 if (!format_response_map.containsKey(prefix)) {
375 logger.error("metadata prefix is not supported for collection "+this.coll_name);
376 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
377 }
378
379 Document doc = XMLConverter.newDOM();
380
381 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
382 boolean OID_is_deleted = false;
383 long millis = -1;
384
385 DBInfo oai_info = null;
386 if(oaiinf_db != null) {
387 oai_info = this.oaiinf_db.getInfo(oid);
388 if (oai_info == null) {
389 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
390 } else {
391
392 // indexdb doesn't have info on deleted docs, only oaiinf db does.
393 // So only oaiinfdb has timestamps for deleted docs
394 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
395 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
396 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
397 millis = Long.parseLong(timestamp)*1000; // in milliseconds
398
399 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
400 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
401 OID_is_deleted = true;
402 }
403 }
404 }
405
406 //get a DBInfo object of the identifier; if this identifier is not present in the database,
407 // null is returned.
408 DBInfo info = this.coll_db.getInfo(oid);
409 if (info == null) {
410 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
411 logger.error("OID: " + oid + " is not present in the collection index database.");
412 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
413 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
414 }
415 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
416 millis = getDateStampMillis(info);
417 }
418 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
419
420
421 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
422 Element get_record = doc.createElement(OAIXML.GET_RECORD);
423 get_record_response.appendChild(get_record);
424 Element record = doc.createElement(OAIXML.RECORD);
425 //compose the header element
426 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
427 if(!OID_is_deleted) {
428 //compose the metadata element
429 record.appendChild(createMetadataElement(doc, prefix, info));
430 }
431 get_record.appendChild(record);
432 return get_record_response;
433 }
434
435 /** return a list of records in specified set, containing metadata from specified prefix*/
436 protected Element processListRecords(Element req) {
437 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
438 }
439
440 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
441 protected Element processListIdentifiers(Element req) {
442 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
443 }
444
445 // Get a list of records/identifiers that match the parameters.
446 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
447 /** arguments:
448 metadataPrefix: required
449 * from: optional
450 * until: optional
451 * set: optional
452 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
453 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
454 */
455 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
456
457 if(params.getLength() == 0) {
458 logger.error("must at least have the metadataPrefix parameter, can't be none");
459 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
460 }
461
462 HashMap<String, String> param_map = GSXML.getParamMap(params);
463
464 String prefix = "";
465 Date from_date = null;
466 Date until_date = null;
467
468 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
469 //Just a double-check
470 logger.error("A param element containing the metadataPrefix is not present.");
471 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
472 }
473 prefix = param_map.get(OAIXML.METADATA_PREFIX);
474 if (prefix == null || prefix.equals("")) {
475 //Just a double-check
476 logger.error("the value of metadataPrefix att is not present in the request.");
477 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
478 }
479
480 if(param_map.containsKey(OAIXML.FROM)) {
481 String from = param_map.get(OAIXML.FROM);
482 from_date = OAIXML.getDate(from);
483 }
484 if(param_map.containsKey(OAIXML.UNTIL)) {
485 String until = param_map.get(OAIXML.UNTIL);
486 until_date = OAIXML.getDate(until);
487 }
488
489 if (!format_response_map.containsKey(prefix)) {
490 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
491 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
492 }
493
494 // get list of oids
495 ArrayList<String> oid_list = null;
496 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
497 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
498
499 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
500 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
501 oid_list = getChildrenIds(OAIXML.BROWSELIST);
502 }
503 }
504
505 if (oid_list == null) {
506 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
507 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
508 }
509 // all validation is done
510
511 // get the list of elements that are in this metadata prefix
512 HashSet<String> set_of_elems = format_elements_map.get(prefix);
513
514 Document doc = XMLConverter.newDOM();
515 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
516 Element list_items = doc.createElement(response_name);
517 list_items_response.appendChild(list_items);
518
519 for(int i=0; i<oid_list.size(); i++) {
520 String oid = oid_list.get(i);
521
522 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
523 continue;
524 }
525
526 boolean OID_is_deleted = false;
527 long millis = -1;
528
529 DBInfo oai_info = null;
530 if(oaiinf_db != null) {
531 oai_info = this.oaiinf_db.getInfo(oid);
532 if (oai_info == null) {
533 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
534 } else {
535
536 // indexdb doesn't have info on deleted docs, only oaiinf db does.
537 // So only oaiinfdb has timestamps for deleted docs
538 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
539 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
540 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
541 millis = Long.parseLong(timestamp)*1000; // in milliseconds
542
543 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
544 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
545 OID_is_deleted = true;
546 }
547 }
548 }
549 DBInfo info = this.coll_db.getInfo(oid);
550 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
551 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
552 logger.error("Collection database does not contain information about oid: " +oid);
553 }
554 }
555 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
556
557 millis = getDateStampMillis(info);
558 }
559
560 Date this_date = null;
561 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
562
563 if (millis == -1) {
564 if (from_date != null || until_date !=null) {
565 continue; // if this doc doesn't have a date for some reason, and
566 // we are doing a date range, then don't include it.
567 }
568 } else {
569 this_date = new Date(millis);
570 if (from_date != null) {
571 if(this_date.before(from_date)) {
572 continue;
573 }
574 }
575 if (until_date != null) {
576 if (this_date.after(until_date)) {
577 continue;
578 }
579 }
580 }
581
582 //compose the header element, which we'll be appending no matter what
583 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
584
585 if (include_metadata) { // doing ListRecords
586 // compose a record for adding header and metadata
587 Element record = doc.createElement(OAIXML.RECORD);
588 list_items.appendChild(record);
589 //insert the header element
590 record.appendChild(header);
591 //Now check that this id has metadata for the required prefix.
592 if (info != null && documentContainsMetadata(info, set_of_elems)) {
593 // YES, it does have some metadata for this prefix
594 //compose the metadata element
595 record.appendChild(createMetadataElement(doc, prefix, info));
596 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
597 } else { // doing ListIdentifiers
598 //append the header element
599 list_items.appendChild(header);
600 }
601
602 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
603
604 return list_items_response;
605
606 }
607
608
609 // have implemented setDescription as an element, instead of a container containing metadata
610 private boolean configureSetInfo() {
611
612 Document doc = XMLConverter.newDOM();
613 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
614 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
615 this.list_sets_response.appendChild(list_sets_elem);
616 String set_name = this.coll_name;
617 String set_description = null;
618 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
619 if (name_elem!=null) {
620 set_name = GSXML.getNodeText(name_elem);
621 if (set_name.equals("")) {
622 set_name = this.coll_name; // default to coll name if can't find one
623 }
624 }
625 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
626 if (description_elem!=null) {
627 set_description = GSXML.getNodeText(description_elem);
628 if (set_description.equals("")) {
629 set_description = null;
630 }
631 }
632 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
633 list_sets_elem.appendChild(coll_set);
634
635 // are we part of any super sets?
636 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
637 for (int i=0; i<super_set_list.getLength(); i++) {
638 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
639 if (super_name != null && !super_name.equals("")) {
640 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
641 }
642 }
643 return true;
644 }
645
646 /** create the metadata element used when processing ListRecords/GetRecord requests
647 */
648 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
649 // the <metadata> element
650 Element metadata = doc.createElement(OAIXML.METADATA);
651 // the <oai:dc namespace...> element
652 Element prfx_str_elem = OAIXML.getMetadataPrefixElement(doc, prefix, OAIXML.oai_version);
653 metadata.appendChild(prfx_str_elem);
654
655 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
656 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
657 // for each element in the definition
658 for (int i=0; i<elements.getLength(); i++) {
659 Element e = (Element)elements.item(i);
660 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
661 if (map == null) {
662 // look up the element name
663 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
664 } else {
665 // we go though the list of names in the mapping
666 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
667 }
668 }
669 // output any metadata that is not just a simple mapping
670 addCustomMetadata(prfx_str_elem, prefix, info);
671 return metadata;
672 }
673
674 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
675 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
676 Vector<String> values = info.getMultiInfo(meta_name);
677 if (values != null && values.size()!=0) {
678 for (int i=0; i<values.size(); i++) {
679 addMetadataElement(meta_list_elem, meta_name, values.get(i));
680 }
681 }
682 }
683
684 /** more complicated addMetadata - can add multiple items. */
685 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
686 String[] names = name_list.split(",");
687 for (int i=0; i<names.length; i++) {
688 Vector<String> values = info.getMultiInfo(names[i]);
689 if (values == null || values.size()==0) {
690 continue;
691 }
692 for (int j=0; j<values.size(); j++) {
693 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
694 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
695 return; // only want to add one value
696 }
697 }
698 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
699 return; // we have added all values of this meta elem
700 }
701 // otherwise, we will keep going through the list and add them all.
702 }
703 }
704
705 // specific metadata formats might need to do some custom metadata that is not
706 //just a standard mapping. eg oai_dc outputting an identifier that is a link
707 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
708
709 if (prefix.equals(OAIXML.META_FORMAT_DC)) {
710 // we want to add in another dc:identifier element with a link to the resource if possible
711 // try gs.OAIResourceURL first, then srclinkFile, then GS version of documnet
712 String gsURL = info.getInfo(OAIXML.GS_OAI_RESOURCE_URL);
713 if (gsURL.equals("")) {
714 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
715 // try srclinkFile
716 gsURL = info.getInfo("srclinkFile");
717 if (!gsURL.equals("")) {
718 // make up the link to the file
719 gsURL = base_url.replace("oaiserver", "") + "sites/" + this.site_name
720 + "/collect/" + this.coll_name + "/index/assoc/"
721 + info.getInfo("assocfilepath") + "/" + gsURL;
722 } else {
723 // no srclink file, lets provide a link to the greenstone doc
724 gsURL = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
725 }
726 }
727 // now we have the url link, add as metadata
728 addMetadataElement(meta_list_elem, "dc:identifier", gsURL);
729 }
730 }
731
732 /** create the actual metadata element for the list */
733 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
734
735 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
736 meta_list_elem.appendChild(meta);
737 }
738
739
740 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
741 */
742 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
743
744 Element header = doc.createElement(OAIXML.HEADER);
745
746 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
747 if(deleted) {
748 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
749 // then the timestamp for deletion will be from oai-inf database
750 }
751
752 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
753 GSXML.setNodeText(identifier, coll_name + ":" + oid);
754 header.appendChild(identifier);
755 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
756 GSXML.setNodeText(set_spec, coll_name);
757 header.appendChild(set_spec);
758 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
759 GSXML.setNodeText(datestamp, oailastmodified);
760 header.appendChild(datestamp);
761 return header;
762 }
763
764 /** return the metadata information */
765 protected Element processListMetadataFormats(Element req) {
766 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
767 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
768 if (param == null) {
769 logger.error("An element containing the OID attribute not is present.");
770 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
771 }
772 String oid = param.getAttribute(GSXML.VALUE_ATT);
773 if (oid == null || oid.equals("")) {
774 logger.error("No OID is present in the request.");
775 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
776 }
777
778 /*
779 ArrayList<String> oid_list = null;
780 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
781 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
782
783 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
784 oid_list = getChildrenIds(OAIXML.BROWSELIST);
785 }
786 }
787 */
788 // assume meta formats are only for OIDs that have not been deleted
789 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
790 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
791 if (oid_list == null || oid_list.contains(oid) == false) {
792 logger.error("OID: " + oid + " is not present in the database.");
793 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
794// logger.error((new XMLConverter()).getPrettyString (e));
795 return e;
796 }
797
798 DBInfo info = null;
799 info = this.coll_db.getInfo(oid);
800 if (info == null) { //just double check
801 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
802 }
803
804 Document doc = XMLConverter.newDOM();
805 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
806
807 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
808 list_metadata_formats_response.appendChild(list_metadata_formats);
809 boolean has_meta_format = false;
810
811 // for each format in format_elements_map
812 Iterator<String> it = format_elements_map.keySet().iterator();
813 while (it.hasNext()) {
814 String format = it.next();
815 HashSet<String> set_of_elems = format_elements_map.get(format);
816 if (documentContainsMetadata(info, set_of_elems)) {
817 // add this format into the response
818 has_meta_format = true;
819 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
820 }
821 }
822
823 if (has_meta_format == false) {
824 logger.error("Specified metadata names are not contained in the database.");
825 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
826 } else {
827 return list_metadata_formats_response;
828 }
829 }
830
831 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
832 if (set_of_elems.size() == 0) {
833 return false;
834 }
835 Iterator<String> i = set_of_elems.iterator();
836 while (i.hasNext()) {
837 if (!info.getInfo(i.next()).equals("")) {
838 return true;
839 }
840 }
841 return false;
842 }
843
844 /** returns a list of the child ids in order, null if no children */
845 protected ArrayList<String> getChildrenIds(String node_id) {
846 DBInfo info = this.coll_db.getInfo(node_id);
847 if (info == null) {
848 return null;
849 }
850
851 String contains = info.getInfo("contains");
852 if (contains.equals("")) {
853 return null;
854 }
855 ArrayList<String> children = new ArrayList<String>();
856 StringTokenizer st = new StringTokenizer(contains, ";");
857 while (st.hasMoreTokens()) {
858 String child_id = st.nextToken().replaceAll("\"", node_id);
859 children.add(child_id);
860 }
861 return children;
862 }
863 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
864 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
865 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
866 * name which is mandatory.
867 */
868 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
869 if (metadata_names == null) return false;
870 logger.info("checking metadata names in db.");
871 for(int i=0; i<metadata_names.length; i++) {
872 int index = metadata_names[i].indexOf(",");
873 String meta_name = (index == -1) ? metadata_names[i] :
874 metadata_names[i].substring(index + 1);
875
876 if(info.getInfo(meta_name).equals("") == false) {
877 return true;
878 }
879 }
880 return false;
881 }
882
883 protected long getDateStampMillis(DBInfo info) {
884 // gs.OAIDateStamp is in YYYY-MM-DD
885 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
886 long millis = -1;
887 if (!time_stamp.equals("")) {
888 millis = OAIXML.getTime(time_stamp);
889 }
890 if (millis == -1) {
891 // oailastmodified is in seconds
892 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
893 if (!time_stamp.equals("")) {
894 millis = Long.parseLong(time_stamp)*1000;
895 }
896 }
897 return millis;
898
899
900 }
901}
902
903
Note: See TracBrowser for help on using the repository browser.