source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java@ 31966

Last change on this file since 31966 was 31966, checked in by kjdon, 7 years ago

metadata prefix elements don't change, so don't need to create it each time we are asked for metadata. Create them initially and store in a map with prefix, then just use it.

File size: 36.6 KB
Line 
1/*
2 * OAIPMH.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSXML;
24import org.greenstone.gsdl3.util.OAIXML;
25import org.greenstone.gsdl3.util.OID;
26import org.greenstone.gsdl3.util.GSFile;
27import org.greenstone.gsdl3.util.XMLConverter;
28
29import org.greenstone.gsdl3.util.SimpleCollectionDatabase;
30import org.greenstone.gsdl3.util.DBInfo;
31// XML classes
32import org.w3c.dom.Document;
33import org.w3c.dom.Element;
34import org.w3c.dom.NodeList;
35
36// General Java classes
37import java.io.File;
38import java.util.StringTokenizer;
39import java.util.Vector;
40import java.util.Set;
41import java.util.Iterator;
42import java.util.ArrayList;
43import java.util.Date;
44import java.util.HashMap;
45import java.util.HashSet;
46import java.util.Map.Entry;
47
48import org.apache.log4j.Logger;
49
50/** Implements the oai metadata retrieval service for GS3 collections.
51 * Dig into each collection's database and retrieve the metadata
52 *
53 */
54
55public class OAIPMH extends ServiceRack {
56
57 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.OAIPMH.class.getName());
58
59 protected SimpleCollectionDatabase coll_db = null;
60 protected SimpleCollectionDatabase oaiinf_db = null;
61
62 protected String site_name = "";
63 protected String coll_name = "";
64
65 // set this up during configure
66 protected Element list_sets_response = null;
67
68 protected Element meta_formats_definition = null;
69 protected HashMap<String, HashSet<String>> format_elements_map = null;
70 protected HashMap<String, Element> format_response_map = null;
71 protected HashMap<String, Element> format_meta_elem_map = null;
72
73 /** constructor */
74 public OAIPMH() {
75
76 }
77
78 public void cleanUp() {
79 super.cleanUp();//??
80 this.coll_db.closeDatabase();
81 if (this.oaiinf_db != null){
82 this.oaiinf_db.closeDatabase();
83 }
84
85 }
86 /** configure this service
87 info is the OAIPMH service rack from collectionConfig.xml, and
88 extra_info is buildConfig.xml */
89 public boolean configure(Element info, Element extra_info) {
90 if (!super.configure(info, extra_info)){
91 logger.info("Configuring ServiceRack.java returns false.");
92 return false;
93 }
94
95 //get the names from ServiceRack.java
96 this.site_name = this.router.getSiteName();
97 this.coll_name = this.cluster_name;
98
99 logger.info("Configuring OAIPMH...");
100
101 this.config_info = info;
102
103 // the index stem is either specified in the buildConfig.xml file (extra_info) or uses the collection name
104 Element metadata_list = (Element) GSXML.getChildByTagName(extra_info, GSXML.METADATA_ELEM+GSXML.LIST_MODIFIER);
105 String index_stem = "";
106 String infodb_type = "";
107 if (metadata_list != null) {
108
109 Element index_stem_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "indexStem");
110
111 if (index_stem_elem != null) {
112 index_stem = GSXML.getNodeText(index_stem_elem);
113 }
114
115 Element infodb_type_elem = (Element) GSXML.getNamedElement(metadata_list, GSXML.METADATA_ELEM, GSXML.NAME_ATT, "infodbType");
116 if (infodb_type_elem != null) {
117 infodb_type = GSXML.getNodeText(infodb_type_elem);
118 }
119
120 }
121
122 if (index_stem == null || index_stem.equals("")) {
123 index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db>
124 }
125 if (infodb_type == null || infodb_type.equals("")) {
126 infodb_type = "gdbm"; // the default
127 }
128
129 coll_db = new SimpleCollectionDatabase(infodb_type);
130 if (!coll_db.databaseOK()) {
131 logger.error("Couldn't create the collection database of type "+infodb_type);
132 return false;
133 }
134
135 oaiinf_db = new SimpleCollectionDatabase(infodb_type);
136 if (!oaiinf_db.databaseOK()) {
137 logger.error("Couldn't create the oai-inf database of type "+infodb_type);
138 return false;
139 }
140
141
142 // Open databases for querying
143 String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type);
144 if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) {
145 logger.error("Could not open collection database!");
146 return false;
147 }
148 // the oaiinf_db is called oai-inf.<infodb_type_extension>
149 String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type);
150 File oaiinfFile = new File(oaiinf_db_file);
151
152 if(!oaiinfFile.exists()) {
153 logger.warn("oai-inf database for collection + " + this.cluster_name + " does not exist.");
154 oaiinf_db = null;
155 } else if (!this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) {
156 logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!");
157 oaiinf_db = null;
158 }
159
160 // work out what sets this collection has. Will usually contain the collection itself, optional super collection, and maybe subcolls if appropriate classifiers are present.
161 configureSetInfo();
162 // the short_service_info is used by the message router to find the method names,
163
164 Element list_records = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
165 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
166 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
167 this.short_service_info.appendChild(list_records);
168
169 Element list_identifiers = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
170 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
171 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
172 this.short_service_info.appendChild(list_identifiers);
173
174 Element list_sets = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
175 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
176 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
177 this.short_service_info.appendChild(list_sets);
178
179 Element list_metadata_formats = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
180 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
181 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
182 this.short_service_info.appendChild(list_metadata_formats);
183
184 Element get_record = this.desc_doc.createElement(GSXML.SERVICE_ELEM);
185 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
186 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
187 this.short_service_info.appendChild(get_record);
188
189 return true;
190 }
191
192 public boolean configureOAI(Element oai_config_elem) {
193 this.meta_formats_definition = this.desc_doc.createElement(OAIXML.LIST_METADATA_FORMATS);
194 this.format_response_map = new HashMap<String, Element>();
195 this.format_elements_map = new HashMap<String, HashSet<String>>();
196 this.format_meta_elem_map = new HashMap<String, Element>();
197
198 // for now, all we want is the metadata prefix description and the mapping list
199 Element main_lmf_elem = (Element) GSXML.getChildByTagName(oai_config_elem, OAIXML.LIST_METADATA_FORMATS);
200 if (main_lmf_elem == null) {
201 logger.error("No listMetadataFormats element found in OAIConfig.xml");
202 return false;
203 }
204 NodeList meta_formats_list = this.config_info.getElementsByTagName(OAIXML.METADATA_FORMAT);
205 if (meta_formats_list.getLength() == 0) {
206 logger.error("no metadataFormat elements found in OAIPMH serviceRack element");
207 return false;
208 }
209
210 boolean found_meta_format = false;
211 for(int i=0; i<meta_formats_list.getLength(); i++) {
212 Element mf = (Element) meta_formats_list.item(i);
213 String prefix = mf.getAttribute(OAIXML.METADATA_PREFIX);
214 if (prefix.equals("")) {
215 logger.error("metadataFormat element had no metadataPrefix attribute");
216 continue;
217 }
218 // get the right format from OAICOnfig
219 Element meta_format = findNamedMetadataFormat(main_lmf_elem, prefix);
220 if (meta_format == null) {
221 logger.error("Couldn't find metadataFormat named "+prefix+" in OAIConfig.xml");
222 continue;
223 }
224
225 // copy the format definition into our stored Element
226 Element collection_version_format = (Element) this.desc_doc.importNode(meta_format, true);
227 collection_version_format.setAttribute(GSXML.NAME_ATT, prefix); // for convenience
228 this.meta_formats_definition.appendChild(collection_version_format);
229 // set up the response element for this format
230 format_response_map.put(prefix, OAIXML.getMetadataFormatShort(this.desc_doc, collection_version_format));
231 // add in collection specific mappings
232 addCollectionMappings(collection_version_format, mf);
233 // now set up a list of all collection elements for reverse lookup of the mapping
234 format_elements_map.put(prefix, getAllCollectionElements(collection_version_format));
235 format_meta_elem_map.put(prefix, OAIXML.getMetadataPrefixElement(this.desc_doc, prefix, collection_version_format));
236
237 }
238 return true;
239 }
240
241 /**
242 * @return the associated OAICollection's OAI_EARLIEST_TIMESTAMP_OID record's
243 * OAI_INF_TIMESTAMP field from the collection's oai-inf.db IN MILLISECONDS
244 */
245 public long getEarliestTimestamp() {
246 long timestamp = -1;
247
248 DBInfo oai_info = null;
249 if(oaiinf_db != null) {
250 // get internal record containing the earliest timestamp of the collection
251 oai_info = this.oaiinf_db.getInfo(OAIXML.OAI_EARLIEST_TIMESTAMP_OID);
252 if (oai_info == null) {
253 logger.warn("Can't get collection " + this.cluster_name + "'s earliest timestamp from oai-inf db. No entry for 'OID' " + OAIXML.OAI_EARLIEST_TIMESTAMP_OID + " in the db.");
254 } else {
255 timestamp = Long.parseLong(oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP)) * 1000; // stored in seconds, so x1000 to convert to milliseconds
256 //logger.info("@@@ found earliest OAI timestamp for collection " + this.coll_name + ": " + timestamp + " (ms)");
257 }
258 }
259 return timestamp;
260 }
261
262 protected Element findNamedMetadataFormat(Element list_meta_formats, String prefix) {
263 NodeList formats = list_meta_formats.getElementsByTagName(OAIXML.METADATA_FORMAT);
264 for (int i=0; i<formats.getLength(); i++) {
265 Element format = (Element)formats.item(i);
266 String meta_name = GSXML.getNodeText((Element)GSXML.getChildByTagName(format, OAIXML.METADATA_PREFIX));
267 if (prefix.equals(meta_name)) {
268 return format;
269 }
270 }
271 return null;
272 }
273
274 /** goes through the mappings from the collection one, and replaces existing ones in the main one */
275 protected void addCollectionMappings(Element main_meta_format, Element coll_meta_format) {
276
277 Element element_list = (Element)GSXML.getChildByTagName(main_meta_format, OAIXML.ELEMENT+GSXML.LIST_MODIFIER);
278 Document doc = element_list.getOwnerDocument();
279 NodeList coll_elements = coll_meta_format.getElementsByTagName(OAIXML.ELEMENT);
280 if (coll_elements.getLength()==0) {
281 // no mappings to include
282 return;
283 }
284 for (int i=0; i<coll_elements.getLength(); i++) {
285 Element e = (Element)coll_elements.item(i);
286 String elem_name = e.getAttribute(GSXML.NAME_ATT);
287 Element main_elem = GSXML.getNamedElement(element_list, OAIXML.ELEMENT, GSXML.NAME_ATT, elem_name);
288 if (main_elem == null) {
289 logger.error(elem_name+" not found in meta format, not using it");
290 } else {
291 element_list.replaceChild(doc.importNode(e, true),main_elem );
292 }
293 }
294 }
295
296 /** goes through all the mappings and makes a set of all collection
297 metadata names that could become an oai meta element - acts as
298 a reverse lookup for the mappings */
299 protected HashSet<String> getAllCollectionElements(Element meta_format) {
300 HashSet<String> meta_name_set = new HashSet<String>();
301 NodeList elements = meta_format.getElementsByTagName(OAIXML.ELEMENT);
302 for (int i=0; i<elements.getLength(); i++) {
303 Element e = (Element)elements.item(i);
304 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
305 if (map == null) {
306 // there is no mapping, just use the element name
307 meta_name_set.add(e.getAttribute(GSXML.NAME_ATT));
308 } else {
309 String list_of_names = map.getAttribute(OAIXML.ELEMENTS);
310 String[] name_array = list_of_names.split(",");
311 for (int j=0; j<name_array.length; j++) {
312 meta_name_set.add(name_array[j]);
313 }
314 }
315 }
316 return meta_name_set;
317 }
318
319 /** returns a specific service description */
320 public Element getServiceDescription(Document doc, String service_id, String lang, String subset) {
321
322 if (service_id.equals(OAIXML.LIST_RECORDS)) {
323 Element list_records = doc.createElement(GSXML.SERVICE_ELEM);
324 list_records.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_RECORDS);
325 list_records.setAttribute(GSXML.TYPE_ATT, "oai");
326 return list_records;
327 }
328
329 if (service_id.equals(OAIXML.LIST_IDENTIFIERS)) {
330 Element list_identifiers = doc.createElement(GSXML.SERVICE_ELEM);
331 list_identifiers.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_IDENTIFIERS);
332 list_identifiers.setAttribute(GSXML.TYPE_ATT, "oai");
333 return list_identifiers;
334 }
335 if (service_id.equals(OAIXML.LIST_SETS)) {
336 Element list_sets = doc.createElement(GSXML.SERVICE_ELEM);
337 list_sets.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_SETS);
338 list_sets.setAttribute(GSXML.TYPE_ATT, "oai");
339 return list_sets;
340 }
341 if (service_id.equals(OAIXML.LIST_METADATA_FORMATS)) {
342 Element list_metadata_formats = doc.createElement(GSXML.SERVICE_ELEM);
343 list_metadata_formats.setAttribute(GSXML.NAME_ATT, OAIXML.LIST_METADATA_FORMATS);
344 list_metadata_formats.setAttribute(GSXML.TYPE_ATT, "oai");
345 return list_metadata_formats;
346 }
347
348 if (service_id.equals(OAIXML.GET_RECORD)) {
349 Element get_record = doc.createElement(GSXML.SERVICE_ELEM);
350 get_record.setAttribute(GSXML.NAME_ATT, OAIXML.GET_RECORD);
351 get_record.setAttribute(GSXML.TYPE_ATT, "oai");
352 return get_record;
353 }
354
355 return null;
356 }
357
358 /** The list sets service returns all the sets that this collection is/is part of/contains. This is gathered by Receptionist from all collections to answer the OAI ListSets request. */
359 protected Element processListSets(Element req) {
360 return list_sets_response;
361 }
362 /** returns the actual record element used in the OAI GetRecord response */
363 protected Element processGetRecord(Element req) {
364 /** arguments:
365 identifier: required
366 metadataPrefix: required
367 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
368 */
369 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
370 HashMap<String, String> param_map = GSXML.getParamMap(params);
371
372 String prefix = param_map.get(OAIXML.METADATA_PREFIX);
373 if (prefix == null || prefix.equals("")) {
374 //Just a double-check
375 logger.error("the value of metadataPrefix att is not present in the request.");
376 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
377 }
378
379 // check that we support this format
380 if (!format_response_map.containsKey(prefix)) {
381 logger.error("metadata prefix is not supported for collection "+this.coll_name);
382 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
383 }
384
385 Document doc = XMLConverter.newDOM();
386
387 String oid = param_map.get(OAIXML.OID); // TODO should this be identifier???
388 boolean OID_is_deleted = false;
389 long millis = -1;
390
391 DBInfo oai_info = null;
392 if(oaiinf_db != null) {
393 oai_info = this.oaiinf_db.getInfo(oid);
394 if (oai_info == null) {
395 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
396 } else {
397
398 // indexdb doesn't have info on deleted docs, only oaiinf db does.
399 // So only oaiinfdb has timestamps for deleted docs
400 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
401 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
402 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds, like oailastmodified in the collection index db
403 millis = Long.parseLong(timestamp)*1000; // in milliseconds
404
405 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
406 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
407 OID_is_deleted = true;
408 }
409 }
410 }
411
412 //get a DBInfo object of the identifier; if this identifier is not present in the database,
413 // null is returned.
414 DBInfo info = this.coll_db.getInfo(oid);
415 if (info == null) {
416 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
417 logger.error("OID: " + oid + " is not present in the collection index database.");
418 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
419 } // if doc deleted, id missing in indexdb is not an error: doc id would exist only in oai-inf db, marked as deleted 'D'
420 }
421 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
422 millis = getDateStampMillis(info);
423 }
424 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
425
426
427 Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM);
428 Element get_record = doc.createElement(OAIXML.GET_RECORD);
429 get_record_response.appendChild(get_record);
430 Element record = doc.createElement(OAIXML.RECORD);
431 //compose the header element
432 record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));
433 if(!OID_is_deleted) {
434 //compose the metadata element
435 record.appendChild(createMetadataElement(doc, prefix, info));
436 }
437 get_record.appendChild(record);
438 return get_record_response;
439 }
440
441 /** return a list of records in specified set, containing metadata from specified prefix*/
442 protected Element processListRecords(Element req) {
443 return processListIdentifiersOrRecords(req, OAIXML.LIST_RECORDS, true);
444 }
445
446 /** return a list of identifiers in specified set that contain metadata belonging to specified prefix. */
447 protected Element processListIdentifiers(Element req) {
448 return processListIdentifiersOrRecords(req, OAIXML.LIST_IDENTIFIERS, false);
449 }
450
451 // Get a list of records/identifiers that match the parameters.
452 protected Element processListIdentifiersOrRecords(Element req, String response_name, boolean include_metadata) {
453 /** arguments:
454 metadataPrefix: required
455 * from: optional
456 * until: optional
457 * set: optional
458 * resumptionToken: exclusive and optional (ignored as it has been handled by OAIReceptionist)
459 * Exceptions: badArgument; cannotDisseminateFormat; idDoesNotExist
460 */
461 NodeList params = GSXML.getChildrenByTagName(req, GSXML.PARAM_ELEM);
462
463 if(params.getLength() == 0) {
464 logger.error("must at least have the metadataPrefix parameter, can't be none");
465 return OAIXML.createErrorResponse(OAIXML.BAD_ARGUMENT, "");
466 }
467
468 HashMap<String, String> param_map = GSXML.getParamMap(params);
469
470 String prefix = "";
471 Date from_date = null;
472 Date until_date = null;
473
474 if(param_map.containsKey(OAIXML.METADATA_PREFIX) == false) {
475 //Just a double-check
476 logger.error("A param element containing the metadataPrefix is not present.");
477 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
478 }
479 prefix = param_map.get(OAIXML.METADATA_PREFIX);
480 if (prefix == null || prefix.equals("")) {
481 //Just a double-check
482 logger.error("the value of metadataPrefix att is not present in the request.");
483 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
484 }
485
486 if(param_map.containsKey(OAIXML.FROM)) {
487 String from = param_map.get(OAIXML.FROM);
488 from_date = OAIXML.getDate(from);
489 }
490 if(param_map.containsKey(OAIXML.UNTIL)) {
491 String until = param_map.get(OAIXML.UNTIL);
492 until_date = OAIXML.getDate(until);
493 }
494
495 if (!format_response_map.containsKey(prefix)) {
496 logger.error(prefix + " metadata prefix is not supported for collection "+this.coll_name);
497 return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, "");
498 }
499
500 // get list of oids
501 ArrayList<String> oid_list = null;
502 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
503 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
504
505 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
506 logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name);
507 oid_list = getChildrenIds(OAIXML.BROWSELIST);
508 }
509 }
510
511 if (oid_list == null) {
512 logger.error("No matched records found in collection: oai-inf and index db's browselist are empty");
513 return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, "");
514 }
515 // all validation is done
516
517 // get the list of elements that are in this metadata prefix
518 HashSet<String> set_of_elems = format_elements_map.get(prefix);
519
520 Document doc = XMLConverter.newDOM();
521 Element list_items_response = doc.createElement(GSXML.RESPONSE_ELEM);
522 Element list_items = doc.createElement(response_name);
523 list_items_response.appendChild(list_items);
524
525 for(int i=0; i<oid_list.size(); i++) {
526 String oid = oid_list.get(i);
527
528 if(oid.equals(OAIXML.OAI_EARLIEST_TIMESTAMP_OID)) { // internal id not doc id, so skip
529 continue;
530 }
531
532 boolean OID_is_deleted = false;
533 long millis = -1;
534
535 DBInfo oai_info = null;
536 if(oaiinf_db != null) {
537 oai_info = this.oaiinf_db.getInfo(oid);
538 if (oai_info == null) {
539 logger.warn("OID: " + oid + " is not present in the collection's oai-inf database.");
540 } else {
541
542 // indexdb doesn't have info on deleted docs, only oaiinf db does.
543 // So only oaiinfdb has timestamps for deleted docs
544 // For non-deleted doc ids: also obtain timestamp from oaiinf db,
545 // but if the oaiinf db doesn't exist, resort to oailastmodified fields of indexdb.
546 String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // stored in seconds like oailastmodified in the collection index db
547 millis = Long.parseLong(timestamp)*1000; // in milliseconds
548
549 String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS);
550 if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) {
551 OID_is_deleted = true;
552 }
553 }
554 }
555 DBInfo info = this.coll_db.getInfo(oid);
556 if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of the oid
557 if(!OID_is_deleted) { // we don't expect to find entries for deleted docs in index db.
558 logger.error("Collection database does not contain information about oid: " +oid);
559 }
560 }
561 else if (millis == -1) { // so couldn't get doc lastmod from oaiinf db, get oailastmodified from collection's index db
562
563 millis = getDateStampMillis(info);
564 }
565
566 Date this_date = null;
567 String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis);
568
569 if (millis == -1) {
570 if (from_date != null || until_date !=null) {
571 continue; // if this doc doesn't have a date for some reason, and
572 // we are doing a date range, then don't include it.
573 }
574 } else {
575 this_date = new Date(millis);
576 if (from_date != null) {
577 if(this_date.before(from_date)) {
578 continue;
579 }
580 }
581 if (until_date != null) {
582 if (this_date.after(until_date)) {
583 continue;
584 }
585 }
586 }
587
588 //compose the header element, which we'll be appending no matter what
589 Element header = createHeaderElement(doc, oid, oailastmodified, OID_is_deleted);
590
591 if (include_metadata) { // doing ListRecords
592 // compose a record for adding header and metadata
593 Element record = doc.createElement(OAIXML.RECORD);
594 list_items.appendChild(record);
595 //insert the header element
596 record.appendChild(header);
597 //Now check that this id has metadata for the required prefix.
598 if (info != null && documentContainsMetadata(info, set_of_elems)) {
599 // YES, it does have some metadata for this prefix
600 //compose the metadata element
601 record.appendChild(createMetadataElement(doc, prefix, info));
602 } // otherwise the oid was 'deleted' and only in the oai-inf db and not in the info (collection index) db
603 } else { // doing ListIdentifiers
604 //append the header element
605 list_items.appendChild(header);
606 }
607
608 }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record
609
610 return list_items_response;
611
612 }
613
614
615 // have implemented setDescription as an element, instead of a container containing metadata
616 private boolean configureSetInfo() {
617
618 Document doc = XMLConverter.newDOM();
619 this.list_sets_response = doc.createElement(GSXML.RESPONSE_ELEM);
620 Element list_sets_elem = doc.createElement(OAIXML.LIST_SETS);
621 this.list_sets_response.appendChild(list_sets_elem);
622 String set_name = this.coll_name;
623 String set_description = null;
624 Element name_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_NAME);
625 if (name_elem!=null) {
626 set_name = GSXML.getNodeText(name_elem);
627 if (set_name.equals("")) {
628 set_name = this.coll_name; // default to coll name if can't find one
629 }
630 }
631 Element description_elem = (Element)GSXML.getChildByTagName(this.config_info, OAIXML.SET_DESCRIPTION);
632 if (description_elem!=null) {
633 set_description = GSXML.getNodeText(description_elem);
634 if (set_description.equals("")) {
635 set_description = null;
636 }
637 }
638 Element coll_set = OAIXML.createSet(doc, this.coll_name, set_name, set_description);
639 list_sets_elem.appendChild(coll_set);
640
641 // are we part of any super sets?
642 NodeList super_set_list = GSXML.getChildrenByTagName(this.config_info, OAIXML.OAI_SUPER_SET);
643 for (int i=0; i<super_set_list.getLength(); i++) {
644 String super_name = ((Element)super_set_list.item(i)).getAttribute(GSXML.NAME_ATT);
645 if (super_name != null && !super_name.equals("")) {
646 list_sets_elem.appendChild(OAIXML.createSet(doc, super_name, super_name, null));
647 }
648 }
649 return true;
650 }
651
652 /** create the metadata element used when processing ListRecords/GetRecord requests
653 */
654 protected Element createMetadataElement(Document doc, String prefix, DBInfo info) {
655 // the <metadata> element
656 Element metadata = doc.createElement(OAIXML.METADATA);
657 // the <oai:dc namespace...> element
658 Element prfx_str_elem = (Element)doc.importNode(this.format_meta_elem_map.get(prefix), true);
659 metadata.appendChild(prfx_str_elem);
660
661 Element meta_format_element = GSXML.getNamedElement(this.meta_formats_definition, OAIXML.METADATA_FORMAT, GSXML.NAME_ATT, prefix);
662 NodeList elements = meta_format_element.getElementsByTagName(OAIXML.ELEMENT);
663 // for each element in the definition
664 for (int i=0; i<elements.getLength(); i++) {
665 Element e = (Element)elements.item(i);
666 Element map = (Element)GSXML.getChildByTagName(e, OAIXML.MAPPING);
667 if (map == null) {
668 // look up the element name
669 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), info);
670 } else {
671 // we go though the list of names in the mapping
672 addMetadata(prfx_str_elem, e.getAttribute(GSXML.NAME_ATT), map.getAttribute(OAIXML.SELECT), map.getAttribute(OAIXML.ELEMENTS), info);
673 }
674 }
675 // output any metadata that is not just a simple mapping
676 addCustomMetadata(prfx_str_elem, prefix, info);
677 return metadata;
678 }
679
680 /** a simple addMetadata where we look for meta_name metadata, and add as that name*/
681 protected void addMetadata(Element meta_list_elem, String meta_name, DBInfo info) {
682 Vector<String> values = info.getMultiInfo(meta_name);
683 if (values != null && values.size()!=0) {
684 for (int i=0; i<values.size(); i++) {
685 addMetadataElement(meta_list_elem, meta_name, values.get(i));
686 }
687 }
688 }
689
690 /** more complicated addMetadata - can add multiple items. */
691 protected void addMetadata(Element meta_list_elem, String new_meta_name, String select_type, String name_list, DBInfo info) {
692 String[] names = name_list.split(",");
693 for (int i=0; i<names.length; i++) {
694 Vector<String> values = info.getMultiInfo(names[i]);
695 if (values == null || values.size()==0) {
696 continue;
697 }
698 for (int j=0; j<values.size(); j++) {
699 addMetadataElement(meta_list_elem, new_meta_name, values.get(j));
700 if (select_type.equals(OAIXML.SELECT_SINGLE_VALUE)) {
701 return; // only want to add one value
702 }
703 }
704 if (select_type.equals(OAIXML.SELECT_FIRST_VALID_META)) {
705 return; // we have added all values of this meta elem
706 }
707 // otherwise, we will keep going through the list and add them all.
708 }
709 }
710
711 // specific metadata formats might need to do some custom metadata that is not
712 //just a standard mapping. eg oai_dc outputting an identifier that is a link
713 protected void addCustomMetadata(Element meta_list_elem, String prefix, DBInfo info) {
714
715 if (prefix.equals(OAIXML.META_FORMAT_DC)) {
716 // we want to add in another dc:identifier element with a link to the resource if possible
717 // try gs.OAIResourceURL first, then srclinkFile, then GS version of documnet
718 String gsURL = info.getInfo(OAIXML.GS_OAI_RESOURCE_URL);
719 if (gsURL.equals("")) {
720 String base_url = OAIXML.getBaseURL(); // e.g. e.g. http://host:port/greenstone3/oaiserver
721 // try srclinkFile
722 gsURL = info.getInfo("srclinkFile");
723 if (!gsURL.equals("")) {
724 // make up the link to the file
725 gsURL = base_url.replace("oaiserver", "") + "sites/" + this.site_name
726 + "/collect/" + this.coll_name + "/index/assoc/"
727 + info.getInfo("assocfilepath") + "/" + gsURL;
728 } else {
729 // no srclink file, lets provide a link to the greenstone doc
730 gsURL = base_url.replace("oaiserver", "library") + "/collection/" + this.coll_name + "/document/" + info.getInfo("Identifier");
731 }
732 }
733 // now we have the url link, add as metadata
734 addMetadataElement(meta_list_elem, "dc:identifier", gsURL);
735 }
736 }
737
738 /** create the actual metadata element for the list */
739 protected void addMetadataElement(Element meta_list_elem, String name, String value) {
740
741 Element meta = GSXML.createTextElement(meta_list_elem.getOwnerDocument(), name, value);
742 meta_list_elem.appendChild(meta);
743 }
744
745
746 /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers
747 */
748 protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) {
749
750 Element header = doc.createElement(OAIXML.HEADER);
751
752 // if deleted, get the date and change oailastmodified to timestamp in oaiinfo
753 if(deleted) {
754 header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted
755 // then the timestamp for deletion will be from oai-inf database
756 }
757
758 Element identifier = doc.createElement(OAIXML.IDENTIFIER);
759 GSXML.setNodeText(identifier, coll_name + ":" + oid);
760 header.appendChild(identifier);
761 Element set_spec = doc.createElement(OAIXML.SET_SPEC);
762 GSXML.setNodeText(set_spec, coll_name);
763 header.appendChild(set_spec);
764 Element datestamp = doc.createElement(OAIXML.DATESTAMP);
765 GSXML.setNodeText(datestamp, oailastmodified);
766 header.appendChild(datestamp);
767 return header;
768 }
769
770 /** return the metadata information */
771 protected Element processListMetadataFormats(Element req) {
772 // the request sent here must contain an OID. see doListMetadataFormats() in OAIReceptionist
773 Element param = GSXML.getNamedElement(req, GSXML.PARAM_ELEM, GSXML.NAME_ATT, OAIXML.OID);
774 if (param == null) {
775 logger.error("An element containing the OID attribute not is present.");
776 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
777 }
778 String oid = param.getAttribute(GSXML.VALUE_ATT);
779 if (oid == null || oid.equals("")) {
780 logger.error("No OID is present in the request.");
781 return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
782 }
783
784 /*
785 ArrayList<String> oid_list = null;
786 if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db
787 oid_list = new ArrayList<String>(oaiinf_db.getAllKeys());
788
789 if(oid_list == null) { // try getting the OIDs from the oai entries in the index db
790 oid_list = getChildrenIds(OAIXML.BROWSELIST);
791 }
792 }
793 */
794 // assume meta formats are only for OIDs that have not been deleted
795 // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs
796 ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST);
797 if (oid_list == null || oid_list.contains(oid) == false) {
798 logger.error("OID: " + oid + " is not present in the database.");
799 Element e= OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, "");
800// logger.error((new XMLConverter()).getPrettyString (e));
801 return e;
802 }
803
804 DBInfo info = null;
805 info = this.coll_db.getInfo(oid);
806 if (info == null) { //just double check
807 return OAIXML.createErrorResponse(OAIXML.OAI_SERVICE_UNAVAILABLE, "");
808 }
809
810 Document doc = XMLConverter.newDOM();
811 Element list_metadata_formats_response = doc.createElement(GSXML.RESPONSE_ELEM);
812
813 Element list_metadata_formats = doc.createElement(OAIXML.LIST_METADATA_FORMATS);
814 list_metadata_formats_response.appendChild(list_metadata_formats);
815 boolean has_meta_format = false;
816
817 // for each format in format_elements_map
818 Iterator<String> it = format_elements_map.keySet().iterator();
819 while (it.hasNext()) {
820 String format = it.next();
821 HashSet<String> set_of_elems = format_elements_map.get(format);
822 if (documentContainsMetadata(info, set_of_elems)) {
823 // add this format into the response
824 has_meta_format = true;
825 list_metadata_formats.appendChild(doc.importNode(format_response_map.get(format), true));
826 }
827 }
828
829 if (has_meta_format == false) {
830 logger.error("Specified metadata names are not contained in the database.");
831 return OAIXML.createErrorResponse(OAIXML.NO_METADATA_FORMATS, "");
832 } else {
833 return list_metadata_formats_response;
834 }
835 }
836
837 protected boolean documentContainsMetadata(DBInfo info, HashSet<String> set_of_elems) {
838 if (set_of_elems.size() == 0) {
839 return false;
840 }
841 Iterator<String> i = set_of_elems.iterator();
842 while (i.hasNext()) {
843 if (!info.getInfo(i.next()).equals("")) {
844 return true;
845 }
846 }
847 return false;
848 }
849
850 /** returns a list of the child ids in order, null if no children */
851 protected ArrayList<String> getChildrenIds(String node_id) {
852 DBInfo info = this.coll_db.getInfo(node_id);
853 if (info == null) {
854 return null;
855 }
856
857 String contains = info.getInfo("contains");
858 if (contains.equals("")) {
859 return null;
860 }
861 ArrayList<String> children = new ArrayList<String>();
862 StringTokenizer st = new StringTokenizer(contains, ";");
863 while (st.hasMoreTokens()) {
864 String child_id = st.nextToken().replaceAll("\"", node_id);
865 children.add(child_id);
866 }
867 return children;
868 }
869 /**method to check whether any of the 'metadata_names' is contained in the 'info'.
870 * The name may be in the form: <name>,<mapped name>, in which the mapped name is
871 * optional. The mapped name is looked up in the DBInfo; if not present, use the first
872 * name which is mandatory.
873 */
874 protected boolean containsMetadata(DBInfo info, String[] metadata_names) {
875 if (metadata_names == null) return false;
876 logger.info("checking metadata names in db.");
877 for(int i=0; i<metadata_names.length; i++) {
878 int index = metadata_names[i].indexOf(",");
879 String meta_name = (index == -1) ? metadata_names[i] :
880 metadata_names[i].substring(index + 1);
881
882 if(info.getInfo(meta_name).equals("") == false) {
883 return true;
884 }
885 }
886 return false;
887 }
888
889 protected long getDateStampMillis(DBInfo info) {
890 // gs.OAIDateStamp is in YYYY-MM-DD
891 String time_stamp = info.getInfo(OAIXML.GS_OAI_DATE_STAMP);
892 long millis = -1;
893 if (!time_stamp.equals("")) {
894 millis = OAIXML.getTime(time_stamp);
895 }
896 if (millis == -1) {
897 // oailastmodified is in seconds
898 time_stamp = info.getInfo(OAIXML.OAI_LASTMODIFIED);
899 if (!time_stamp.equals("")) {
900 millis = Long.parseLong(time_stamp)*1000;
901 }
902 }
903 return millis;
904
905
906 }
907}
908
909
Note: See TracBrowser for help on using the repository browser.